Version:
~ [ 1.0 ] ~
1 /*
2 tidy.c - HTML TidyLib command line driver
3
4 Copyright (c) 1998-2005 World Wide Web Consortium
5 (Massachusetts Institute of Technology, European Research
6 Consortium for Informatics and Mathematics, Keio University).
7 All Rights Reserved.
8
9 CVS Info :
10
11 $Author: arnaud02 $
12 $Date: 2005/10/21 09:53:23 $
13 $Revision: 1.41 $
14 */
15
16 #include "tidy.h"
17
18 static FILE* errout = NULL; /* set to stderr */
19 /* static FILE* txtout = NULL; */ /* set to stdout */
20
21 static Bool samefile( ctmbstr filename1, ctmbstr filename2 )
22 {
23 #if FILENAMES_CASE_SENSITIVE
24 return ( strcmp( filename1, filename2 ) == 0 );
25 #else
26 return ( strcasecmp( filename1, filename2 ) == 0 );
27 #endif
28 }
29
30 static const char *cutToWhiteSpace(const char *s, uint offset, char *sbuf)
31 {
32 if (!s)
33 {
34 sbuf[0] = '\0';
35 return NULL;
36 }
37 else if (strlen(s) <= offset)
38 {
39 strcpy(sbuf,s);
40 sbuf[offset] = '\0';
41 return NULL;
42 }
43 else
44 {
45 uint j, l, n;
46 j = offset;
47 while(j && s[j] != ' ')
48 --j;
49 l = j;
50 n = j+1;
51 /* no white space */
52 if (j==0)
53 {
54 l = offset;
55 n = offset;
56 }
57 strncpy(sbuf,s,l);
58 sbuf[l] = '\0';
59 return s+n;
60 }
61 }
62
63 static void print2Columns( const char* fmt, uint l1, uint l2,
64 const char *c1, const char *c2 )
65 {
66 const char *pc1=c1, *pc2=c2;
67 char *c1buf = (char *)malloc(l1+1);
68 char *c2buf = (char *)malloc(l2+1);
69
70 do
71 {
72 pc1 = cutToWhiteSpace(pc1, l1, c1buf);
73 pc2 = cutToWhiteSpace(pc2, l2, c2buf);
74 printf(fmt,
75 c1buf[0]!='\0'?c1buf:"",
76 c2buf[0]!='\0'?c2buf:"");
77 } while (pc1 || pc2);
78 free(c1buf);
79 free(c2buf);
80 }
81
82 static void print3Columns( const char* fmt, uint l1, uint l2, uint l3,
83 const char *c1, const char *c2, const char *c3 )
84 {
85 const char *pc1=c1, *pc2=c2, *pc3=c3;
86 char *c1buf = (char *)malloc(l1+1);
87 char *c2buf = (char *)malloc(l2+1);
88 char *c3buf = (char *)malloc(l3+1);
89
90 do
91 {
92 pc1 = cutToWhiteSpace(pc1, l1, c1buf);
93 pc2 = cutToWhiteSpace(pc2, l2, c2buf);
94 pc3 = cutToWhiteSpace(pc3, l3, c3buf);
95 printf(fmt,
96 c1buf[0]!='\0'?c1buf:"",
97 c2buf[0]!='\0'?c2buf:"",
98 c3buf[0]!='\0'?c3buf:"");
99 } while (pc1 || pc2 || pc3);
100 free(c1buf);
101 free(c2buf);
102 free(c3buf);
103 }
104
105 static const char helpfmt[] = " %-19.19s %-58.58s\n";
106 static const char helpul[]
107 = "-----------------------------------------------------------------";
108 static const char fmt[] = "%-27.27s %-9.9s %-40.40s\n";
109 static const char valfmt[] = "%-27.27s %-9.9s %-1.1s%-39.39s\n";
110 static const char ul[]
111 = "=================================================================";
112
113 typedef enum
114 {
115 CmdOptFileManip,
116 CmdOptCatFIRST = CmdOptFileManip,
117 CmdOptProcDir,
118 CmdOptCharEnc,
119 CmdOptMisc,
120 CmdOptCatLAST,
121 } CmdOptCategory;
122
123 static const struct {
124 ctmbstr mnemonic;
125 ctmbstr name;
126 } cmdopt_catname[] = {
127 { "file-manip", "File manipulation" },
128 { "process-directives", "Processing directives" },
129 { "char-encoding", "Character encodings" },
130 { "misc", "Miscellaneous" }
131 };
132
133 typedef struct {
134 ctmbstr name1; /**< Name */
135 ctmbstr desc; /**< Description */
136 ctmbstr eqconfig; /**< Equivalent configuration option */
137 CmdOptCategory cat; /**< Category */
138 ctmbstr name2; /**< Name */
139 ctmbstr name3; /**< Name */
140 } CmdOptDesc;
141
142 static const CmdOptDesc cmdopt_defs[] = {
143 { "-output <file>",
144 "write output to the specified <file>",
145 "output-file: <file>", CmdOptFileManip, "-o <file>" },
146 { "-config <file>",
147 "set configuration options from the specified <file>",
148 NULL, CmdOptFileManip },
149 { "-file <file>",
150 "write errors to the specified <file>",
151 "error-file: <file>", CmdOptFileManip, "-f <file>" },
152 { "-modify",
153 "modify the original input files",
154 "write-back: yes", CmdOptFileManip, "-m" },
155 { "-indent",
156 "indent element content",
157 "indent: auto", CmdOptProcDir, "-i" },
158 { "-wrap <column>",
159 "wrap text at the specified <column>"
160 ". 0 is assumed if <column> is missing. "
161 "When this option is omitted, the default of the configuration option "
162 "\"wrap\" applies.",
163 "wrap: <column>", CmdOptProcDir, "-w <column>" },
164 { "-upper",
165 "force tags to upper case",
166 "uppercase-tags: yes", CmdOptProcDir, "-u" },
167 { "-clean",
168 "replace FONT, NOBR and CENTER tags by CSS",
169 "clean: yes", CmdOptProcDir, "-c" },
170 { "-bare",
171 "strip out smart quotes and em dashes, etc.",
172 "bare: yes", CmdOptProcDir, "-b" },
173 { "-numeric",
174 "output numeric rather than named entities",
175 "numeric-entities: yes", CmdOptProcDir, "-n" },
176 { "-errors",
177 "only show errors",
178 "markup: no", CmdOptProcDir, "-e" },
179 { "-quiet",
180 "suppress nonessential output",
181 "quiet: yes", CmdOptProcDir, "-q" },
182 { "-omit",
183 "omit optional end tags",
184 "hide-endtags: yes", CmdOptProcDir },
185 { "-xml",
186 "specify the input is well formed XML",
187 "input-xml: yes", CmdOptProcDir },
188 { "-asxml",
189 "convert HTML to well formed XHTML",
190 "output-xhtml: yes", CmdOptProcDir, "-asxhtml" },
191 { "-ashtml",
192 "force XHTML to well formed HTML",
193 "output-html: yes", CmdOptProcDir },
194 #if SUPPORT_ACCESSIBILITY_CHECKS
195 { "-access <level>",
196 "do additional accessibility checks (<level> = 0, 1, 2, 3)"
197 ". 0 is assumed if <level> is missing.",
198 "accessibility-check: <level>", CmdOptProcDir },
199 #endif
200 { "-raw",
201 "output values above 127 without conversion to entities",
202 NULL, CmdOptCharEnc },
203 { "-ascii",
204 "use ISO-8859-1 for input, US-ASCII for output",
205 NULL, CmdOptCharEnc },
206 { "-latin0",
207 "use ISO-8859-15 for input, US-ASCII for output",
208 NULL, CmdOptCharEnc },
209 { "-latin1",
210 "use ISO-8859-1 for both input and output",
211 NULL, CmdOptCharEnc },
212 #ifndef NO_NATIVE_ISO2022_SUPPORT
213 { "-iso2022",
214 "use ISO-2022 for both input and output",
215 NULL, CmdOptCharEnc },
216 #endif
217 { "-utf8",
218 "use UTF-8 for both input and output",
219 NULL, CmdOptCharEnc },
220 { "-mac",
221 "use MacRoman for input, US-ASCII for output",
222 NULL, CmdOptCharEnc },
223 { "-win1252",
224 "use Windows-1252 for input, US-ASCII for output",
225 NULL, CmdOptCharEnc },
226 { "-ibm858",
227 "use IBM-858 (CP850+Euro) for input, US-ASCII for output",
228 NULL, CmdOptCharEnc },
229 #if SUPPORT_UTF16_ENCODINGS
230 { "-utf16le",
231 "use UTF-16LE for both input and output",
232 NULL, CmdOptCharEnc },
233 { "-utf16be",
234 "use UTF-16BE for both input and output",
235 NULL, CmdOptCharEnc },
236 { "-utf16",
237 "use UTF-16 for both input and output",
238 NULL, CmdOptCharEnc },
239 #endif
240 #if SUPPORT_ASIAN_ENCODINGS /* #431953 - RJ */
241 { "-big5",
242 "use Big5 for both input and output",
243 NULL, CmdOptCharEnc },
244 { "-shiftjis",
245 "use Shift_JIS for both input and output",
246 NULL, CmdOptCharEnc },
247 { "-language <lang>",
248 "set the two-letter language code <lang> (for future use)",
249 "language: <lang>", CmdOptCharEnc },
250 #endif
251 { "-version",
252 "show the version of Tidy",
253 NULL, CmdOptMisc, "-v" },
254 { "-help",
255 "list the command line options",
256 NULL, CmdOptMisc, "-h", "-?" },
257 { "-xml-help",
258 "list the command line options in XML format",
259 NULL, CmdOptMisc },
260 { "-help-config",
261 "list all configuration options",
262 NULL, CmdOptMisc },
263 { "-xml-config",
264 "list all configuration options in XML format",
265 NULL, CmdOptMisc },
266 { "-show-config",
267 "list the current configuration settings",
268 NULL, CmdOptMisc },
269 { NULL, NULL, NULL, CmdOptMisc }
270 };
271
272 static tmbstr get_option_names( const CmdOptDesc* pos )
273 {
274 tmbstr name;
275 uint len = strlen(pos->name1);
276 if (pos->name2)
277 len += 2+strlen(pos->name2);
278 if (pos->name3)
279 len += 2+strlen(pos->name3);
280
281 name = (tmbstr)malloc(len+1);
282 strcpy(name, pos->name1);
283 if (pos->name2)
284 {
285 strcat(name, ", ");
286 strcat(name, pos->name2);
287 }
288 if (pos->name3)
289 {
290 strcat(name, ", ");
291 strcat(name, pos->name3);
292 }
293 return name;
294 }
295
296 static tmbstr get_escaped_name( ctmbstr name )
297 {
298 tmbstr escpName;
299 char aux[2];
300 uint len = 0;
301 ctmbstr c;
302 for(c=name; *c!='\0'; ++c)
303 switch(*c)
304 {
305 case '<':
306 case '>':
307 len += 4;
308 break;
309 case '"':
310 len += 6;
311 break;
312 default:
313 len += 1;
314 break;
315 }
316
317 escpName = (tmbstr)malloc(len+1);
318 escpName[0] = '\0';
319
320 aux[1] = '\0';
321 for(c=name; *c!='\0'; ++c)
322 switch(*c)
323 {
324 case '<':
325 strcat(escpName, "<");
326 break;
327 case '>':
328 strcat(escpName, ">");
329 break;
330 case '"':
331 strcat(escpName, """);
332 break;
333 default:
334 aux[0] = *c;
335 strcat(escpName, aux);
336 break;
337 }
338
339 return escpName;
340 }
341
342 static void print_help_option( void )
343 {
344 CmdOptCategory cat = CmdOptCatFIRST;
345 const CmdOptDesc* pos = cmdopt_defs;
346
347 for( cat=CmdOptCatFIRST; cat!=CmdOptCatLAST; ++cat)
348 {
349 size_t len = strlen(cmdopt_catname[cat].name);
350 printf("%s\n", cmdopt_catname[cat].name );
351 printf("%*.*s\n", (int)len, (int)len, helpul );
352 for( pos=cmdopt_defs; pos->name1; ++pos)
353 {
354 tmbstr name;
355 if (pos->cat != cat)
356 continue;
357 name = get_option_names( pos );
358 print2Columns( helpfmt, 19, 58, name, pos->desc );
359 free(name);
360 }
361 printf("\n");
362 }
363 }
364
365 static void print_xml_help_option_element( ctmbstr element, ctmbstr name )
366 {
367 tmbstr escpName;
368 if (!name)
369 return;
370 printf(" <%s>%s</%s>\n", element, escpName = get_escaped_name(name),
371 element);
372 free(escpName);
373 }
374
375 static void print_xml_help_option( void )
376 {
377 const CmdOptDesc* pos = cmdopt_defs;
378
379 for( pos=cmdopt_defs; pos->name1; ++pos)
380 {
381 printf(" <option class=\"%s\">\n", cmdopt_catname[pos->cat].mnemonic );
382 print_xml_help_option_element("name", pos->name1);
383 print_xml_help_option_element("name", pos->name2);
384 print_xml_help_option_element("name", pos->name3);
385 print_xml_help_option_element("description", pos->desc);
386 if (pos->eqconfig)
387 print_xml_help_option_element("eqconfig", pos->eqconfig);
388 else
389 printf(" <eqconfig />\n");
390 printf(" </option>\n");
391 }
392 }
393
394 static void xml_help( void )
395 {
396 printf( "<?xml version=\"1.0\"?>\n"
397 "<cmdline version=\"%s\">\n", tidyReleaseDate());
398 print_xml_help_option();
399 printf( "</cmdline>\n" );
400 }
401
402 static void help( ctmbstr prog )
403 {
404 printf( "%s [option...] [file...] [option...] [file...]\n", prog );
405 printf( "Utility to clean up and pretty print HTML/XHTML/XML\n");
406 printf( "see http://tidy.sourceforge.net/\n");
407 printf( "\n");
408
409 #ifdef PLATFORM_NAME
410 printf( "Options for HTML Tidy for %s released on %s:\n",
411 PLATFORM_NAME, tidyReleaseDate() );
412 #else
413 printf( "Options for HTML Tidy released on %s:\n", tidyReleaseDate() );
414 #endif
415 printf( "\n");
416
417 print_help_option();
418
419 printf( "Use --blah blarg for any configuration option \"blah\" with argument \"blarg\"\n");
420 printf( "\n");
421
422 printf( "Input/Output default to stdin/stdout respectively\n");
423 printf( "Single letter options apart from -f may be combined\n");
424 printf( "as in: tidy -f errs.txt -imu foo.html\n");
425 printf( "For further info on HTML see http://www.w3.org/MarkUp\n");
426 printf( "\n");
427 }
428
429 static Bool isAutoBool( TidyOption topt )
430 {
431 TidyIterator pos;
432 ctmbstr def;
433
434 if ( tidyOptGetType( topt ) != TidyInteger)
435 return no;
436
437 pos = tidyOptGetPickList( topt );
438 while ( pos )
439 {
440 def = tidyOptGetNextPick( topt, &pos );
441 if (0==strcmp(def,"yes"))
442 return yes;
443 }
444 return no;
445 }
446
447 static
448 ctmbstr ConfigCategoryName( TidyConfigCategory id )
449 {
450 switch( id )
451 {
452 case TidyMarkup:
453 return "markup";
454 case TidyDiagnostics:
455 return "diagnostics";
456 case TidyPrettyPrint:
457 return "print";
458 case TidyEncoding:
459 return "encoding";
460 case TidyMiscellaneous:
461 return "misc";
462 }
463 fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", id);
464 assert(0);
465 abort();
466 }
467
468 /* Description of an option */
469 typedef struct {
470 ctmbstr name; /**< Name */
471 ctmbstr cat; /**< Category */
472 ctmbstr type; /**< "String, ... */
473 ctmbstr vals; /**< Potential values. If NULL, use an external function */
474 ctmbstr def; /**< default */
475 tmbchar tempdefs[80]; /**< storage for default such as integer */
476 Bool haveVals; /**< if yes, vals is valid */
477 } OptionDesc;
478
479 typedef void (*OptionFunc)( TidyDoc, TidyOption, OptionDesc * );
480
481
482 /* Create description "d" related to "opt" */
483 static
484 void GetOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d )
485 {
486 TidyOptionId optId = tidyOptGetId( topt );
487 TidyOptionType optTyp = tidyOptGetType( topt );
488
489 d->name = tidyOptGetName( topt );
490 d->cat = ConfigCategoryName( tidyOptGetCategory( topt ) );
491 d->vals = NULL;
492 d->def = NULL;
493 d->haveVals = yes;
494
495 /* Handle special cases first.
496 */
497 switch ( optId )
498 {
499 case TidyDuplicateAttrs:
500 case TidyNewline:
501 case TidyAccessibilityCheckLevel:
502 d->type = "enum";
503 d->vals = NULL;
504 d->def =
505 optId==TidyNewline ?
506 "<em>Platform dependent</em>"
507 :tidyOptGetCurrPick( tdoc, optId );
508 break;
509
510 case TidyDoctype:
511 d->type = "DocType";
512 d->vals = NULL;
513 {
514 ctmbstr sdef = NULL;
515 sdef = tidyOptGetCurrPick( tdoc, TidyDoctypeMode );
516 if ( !sdef || *sdef == '*' )
517 sdef = tidyOptGetValue( tdoc, TidyDoctype );
518 d->def = sdef;
519 }
520 break;
521
522 case TidyInlineTags:
523 case TidyBlockTags:
524 case TidyEmptyTags:
525 case TidyPreTags:
526 d->type = "Tag names";
527 d->vals = "tagX, tagY, ...";
528 d->def = NULL;
529 break;
530
531 case TidyCharEncoding:
532 case TidyInCharEncoding:
533 case TidyOutCharEncoding:
534 d->type = "Encoding";
535 d->def = tidyOptGetEncName( tdoc, optId );
536 if (!d->def)
537 d->def = "?";
538 d->vals = NULL;
539 break;
540
541 /* General case will handle remaining */
542 default:
543 switch ( optTyp )
544 {
545 case TidyBoolean:
546 d->type = "Boolean";
547 d->vals = "y/n, yes/no, t/f, true/false, 1/0";
548 d->def = tidyOptGetCurrPick( tdoc, optId );
549 break;
550
551 case TidyInteger:
552 if (isAutoBool(topt))
553 {
554 d->type = "AutoBool";
555 d->vals = "auto, y/n, yes/no, t/f, true/false, 1/0";
556 d->def = tidyOptGetCurrPick( tdoc, optId );
557 }
558 else
559 {
560 uint idef;
561 d->type = "Integer";
562 if ( optId == TidyWrapLen )
563 d->vals = "0 (no wrapping), 1, 2, ...";
564 else
565 d->vals = "0, 1, 2, ...";
566
567 idef = tidyOptGetInt( tdoc, optId );
568 sprintf(d->tempdefs, "%u", idef);
569 d->def = d->tempdefs;
570 }
571 break;
572
573 case TidyString:
574 d->type = "String";
575 d->vals = NULL;
576 d->haveVals = no;
577 d->def = tidyOptGetValue( tdoc, optId );
578 break;
579 }
580 }
581 }
582
583 /* Array holding all options. Contains a trailing sentinel. */
584 typedef struct {
585 TidyOption topt[N_TIDY_OPTIONS];
586 } AllOption_t;
587
588 static
589 int cmpOpt(const void* e1_, const void *e2_)
590 {
591 const TidyOption* e1 = (const TidyOption*)e1_;
592 const TidyOption* e2 = (const TidyOption*)e2_;
593 return strcmp(tidyOptGetName(*e1), tidyOptGetName(*e2));
594 }
595
596 static
597 void getSortedOption( TidyDoc tdoc, AllOption_t *tOption )
598 {
599 TidyIterator pos = tidyGetOptionList( tdoc );
600 uint i = 0;
601
602 while ( pos )
603 {
604 TidyOption topt = tidyGetNextOption( tdoc, &pos );
605 tOption->topt[i] = topt;
606 ++i;
607 }
608 tOption->topt[i] = NULL; /* sentinel */
609
610 qsort(tOption->topt,
611 /* Do not sort the sentinel: hence `-1' */
612 sizeof(tOption->topt)/sizeof(tOption->topt[0])-1,
613 sizeof(tOption->topt[0]),
614 cmpOpt);
615 }
616
617 static void ForEachSortedOption( TidyDoc tdoc, OptionFunc OptionPrint )
618 {
619 AllOption_t tOption;
620 const TidyOption *topt;
621
622 getSortedOption( tdoc, &tOption );
623 for( topt = tOption.topt; *topt; ++topt)
624 {
625 OptionDesc d;
626
627 GetOption( tdoc, *topt, &d );
628 (*OptionPrint)( tdoc, *topt, &d );
629 }
630 }
631
632 static void ForEachOption( TidyDoc tdoc, OptionFunc OptionPrint )
633 {
634 TidyIterator pos = tidyGetOptionList( tdoc );
635
636 while ( pos )
637 {
638 TidyOption topt = tidyGetNextOption( tdoc, &pos );
639 OptionDesc d;
640
641 GetOption( tdoc, topt, &d );
642 (*OptionPrint)( tdoc, topt, &d );
643 }
644 }
645
646 static
647 void PrintAllowedValuesFromPick( TidyOption topt )
648 {
649 TidyIterator pos = tidyOptGetPickList( topt );
650 Bool first = yes;
651 ctmbstr def;
652 while ( pos )
653 {
654 if (first)
655 first = no;
656 else
657 printf(", ");
658 def = tidyOptGetNextPick( topt, &pos );
659 printf("%s", def);
660 }
661 }
662
663 static
664 void PrintAllowedValues( TidyOption topt, const OptionDesc *d )
665 {
666 if (d->vals)
667 printf( "%s", d->vals );
668 else
669 PrintAllowedValuesFromPick( topt );
670 }
671
672 static
673 void printXMLDescription( TidyDoc tdoc, TidyOption topt )
674 {
675 ctmbstr doc = tidyOptGetDoc( tdoc, topt );
676
677 if (doc)
678 printf(" <description>%s</description>\n", doc);
679 else
680 {
681 printf(" <description />\n");
682 fprintf(stderr, "Warning: option `%s' is not documented.\n",
683 tidyOptGetName( topt ));
684 }
685 }
686
687 static
688 void printXMLCrossRef( TidyDoc tdoc, TidyOption topt )
689 {
690 TidyOption optLinked;
691 TidyIterator pos = tidyOptGetDocLinksList(tdoc, topt);
692 while( pos )
693 {
694 optLinked = tidyOptGetNextDocLinks(tdoc, &pos );
695 printf(" <seealso>%s</seealso>\n",tidyOptGetName(optLinked));
696 }
697 }
698
699 static
700 void printXMLOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d )
701 {
702 if ( tidyOptIsReadOnly(topt) )
703 return;
704
705 printf( " <option class=\"%s\">\n", d->cat );
706 printf (" <name>%s</name>\n",d->name);
707 printf (" <type>%s</type>\n",d->type);
708 if (d->def)
709 printf(" <default>%s</default>\n",d->def);
710 else
711 printf(" <default />\n");
712 if (d->haveVals)
713 {
714 printf(" <example>");
715 PrintAllowedValues( topt, d );
716 printf("</example>\n");
717 }
718 else
719 {
720 printf(" <example />\n");
721 }
722 printXMLDescription( tdoc, topt );
723 printXMLCrossRef( tdoc, topt );
724 printf( " </option>\n" );
725 }
726
727 static void XMLoptionhelp( TidyDoc tdoc )
728 {
729 printf( "<?xml version=\"1.0\"?>\n"
730 "<config version=\"%s\">\n", tidyReleaseDate());
731 ForEachOption( tdoc, printXMLOption );
732 printf( "</config>\n" );
733 }
734
735 static
736 tmbstr GetAllowedValuesFromPick( TidyOption topt )
737 {
738 TidyIterator pos;
739 Bool first;
740 ctmbstr def;
741 uint len = 0;
742 tmbstr val;
743
744 pos = tidyOptGetPickList( topt );
745 first = yes;
746 while ( pos )
747 {
748 if (first)
749 first = no;
750 else
751 len += 2;
752 def = tidyOptGetNextPick( topt, &pos );
753 len += strlen(def);
754 }
755 val = (tmbstr)malloc(len+1);
756 val[0] = '\0';
757 pos = tidyOptGetPickList( topt );
758 first = yes;
759 while ( pos )
760 {
761 if (first)
762 first = no;
763 else
764 strcat(val, ", ");
765 def = tidyOptGetNextPick( topt, &pos );
766 strcat(val, def);
767 }
768 return val;
769 }
770
771 static
772 tmbstr GetAllowedValues( TidyOption topt, const OptionDesc *d )
773 {
774 if (d->vals)
775 {
776 tmbstr val = (tmbstr)malloc(1+strlen(d->vals));
777 strcpy(val, d->vals);
778 return val;
779 }
780 else
781 return GetAllowedValuesFromPick( topt );
782 }
783
784 static
785 void printOption( TidyDoc ARG_UNUSED(tdoc), TidyOption topt,
786 OptionDesc *d )
787 {
788 if ( tidyOptIsReadOnly(topt) )
789 return;
790
791 if ( *d->name || *d->type )
792 {
793 ctmbstr pval = d->vals;
794 tmbstr val = NULL;
795 if (!d->haveVals)
796 {
797 pval = "-";
798 }
799 else if (pval == NULL)
800 {
801 val = GetAllowedValues( topt, d);
802 pval = val;
803 }
804 print3Columns( fmt, 27, 9, 40, d->name, d->type, pval );
805 if (val)
806 free(val);
807 }
808 }
809
810 static void optionhelp( TidyDoc tdoc )
811 {
812 printf( "\nHTML Tidy Configuration Settings\n\n" );
813 printf( "Within a file, use the form:\n\n" );
814 printf( "wrap: 72\n" );
815 printf( "indent: no\n\n" );
816 printf( "When specified on the command line, use the form:\n\n" );
817 printf( "--wrap 72 --indent no\n\n");
818
819 printf( fmt, "Name", "Type", "Allowable values" );
820 printf( fmt, ul, ul, ul );
821
822 ForEachSortedOption( tdoc, printOption );
823 }
824
825 static
826 void printOptionValues( TidyDoc ARG_UNUSED(tdoc), TidyOption topt,
827 OptionDesc *d )
828 {
829 TidyOptionId optId = tidyOptGetId( topt );
830 ctmbstr ro = tidyOptIsReadOnly( topt ) ? "*" : "" ;
831
832 switch ( optId )
833 {
834 case TidyInlineTags:
835 case TidyBlockTags:
836 case TidyEmptyTags:
837 case TidyPreTags:
838 {
839 TidyIterator pos = tidyOptGetDeclTagList( tdoc );
840 while ( pos )
841 {
842 d->def = tidyOptGetNextDeclTag(tdoc, optId, &pos);
843 if ( pos )
844 {
845 if ( *d->name )
846 printf( valfmt, d->name, d->type, ro, d->def );
847 else
848 printf( fmt, d->name, d->type, d->def );
849 d->name = "";
850 d->type = "";
851 }
852 }
853 }
854 break;
855 case TidyNewline:
856 d->def = tidyOptGetCurrPick( tdoc, optId );
857 break;
858 }
859
860 /* fix for http://tidy.sf.net/bug/873921 */
861 if ( *d->name || *d->type || (d->def && *d->def) )
862 {
863 if ( ! d->def )
864 d->def = "";
865 if ( *d->name )
866 printf( valfmt, d->name, d->type, ro, d->def );
867 else
868 printf( fmt, d->name, d->type, d->def );
869 }
870 }
871
872 static void optionvalues( TidyDoc tdoc )
873 {
874 printf( "\nConfiguration File Settings:\n\n" );
875 printf( fmt, "Name", "Type", "Current Value" );
876 printf( fmt, ul, ul, ul );
877
878 ForEachSortedOption( tdoc, printOptionValues );
879
880 printf( "\n\nValues marked with an *asterisk are calculated \n"
881 "internally by HTML Tidy\n\n" );
882 }
883
884 static void version( void )
885 {
886 #ifdef PLATFORM_NAME
887 printf( "HTML Tidy for %s released on %s\n",
888 PLATFORM_NAME, tidyReleaseDate() );
889 #else
890 printf( "HTML Tidy released on %s\n", tidyReleaseDate() );
891 #endif
892 }
893
894 static void unknownOption( uint c )
895 {
896 fprintf( errout, "HTML Tidy: unknown option: %c\n", (char)c );
897 }
898
899 int main( int argc, char** argv )
900 {
901 ctmbstr prog = argv[0];
902 ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL;
903 TidyDoc tdoc = tidyCreate();
904 int status = 0;
905
906 uint contentErrors = 0;
907 uint contentWarnings = 0;
908 uint accessWarnings = 0;
909
910 errout = stderr; /* initialize to stderr */
911 status = 0;
912
913 #ifdef CONFIG_FILE
914 if ( tidyFileExists(CONFIG_FILE) )
915 {
916 status = tidyLoadConfig( tdoc, CONFIG_FILE );
917 if ( status != 0 )
918 fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", CONFIG_FILE, status);
919 }
920 #endif /* CONFIG_FILE */
921
922 /* look for env var "HTML_TIDY" */
923 /* then for ~/.tidyrc (on platforms defining $HOME) */
924
925 if ( cfgfil = getenv("HTML_TIDY") )
926 {
927 status = tidyLoadConfig( tdoc, cfgfil );
928 if ( status != 0 )
929 fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", cfgfil, status);
930 }
931 #ifdef USER_CONFIG_FILE
932 else if ( tidyFileExists(USER_CONFIG_FILE) )
933 {
934 status = tidyLoadConfig( tdoc, USER_CONFIG_FILE );
935 if ( status != 0 )
936 fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", USER_CONFIG_FILE, status);
937 }
938 #endif /* USER_CONFIG_FILE */
939
940 /* read command line */
941 while ( argc > 0 )
942 {
943 if (argc > 1 && argv[1][0] == '-')
944 {
945 /* support -foo and --foo */
946 ctmbstr arg = argv[1] + 1;
947
948 if ( strcasecmp(arg, "xml") == 0)
949 tidyOptSetBool( tdoc, TidyXmlTags, yes );
950
951 else if ( strcasecmp(arg, "asxml") == 0 ||
952 strcasecmp(arg, "asxhtml") == 0 )
953 {
954 tidyOptSetBool( tdoc, TidyXhtmlOut, yes );
955 }
956 else if ( strcasecmp(arg, "ashtml") == 0 )
957 tidyOptSetBool( tdoc, TidyHtmlOut, yes );
958
959 else if ( strcasecmp(arg, "indent") == 0 )
960 {
961 tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
962 if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
963 tidyOptResetToDefault( tdoc, TidyIndentSpaces );
964 }
965 else if ( strcasecmp(arg, "omit") == 0 )
966 tidyOptSetBool( tdoc, TidyHideEndTags, yes );
967
968 else if ( strcasecmp(arg, "upper") == 0 )
969 tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
970
971 else if ( strcasecmp(arg, "clean") == 0 )
972 tidyOptSetBool( tdoc, TidyMakeClean, yes );
973
974 else if ( strcasecmp(arg, "bare") == 0 )
975 tidyOptSetBool( tdoc, TidyMakeBare, yes );
976
977 else if ( strcasecmp(arg, "raw") == 0 ||
978 strcasecmp(arg, "ascii") == 0 ||
979 strcasecmp(arg, "latin0") == 0 ||
980 strcasecmp(arg, "latin1") == 0 ||
981 strcasecmp(arg, "utf8") == 0 ||
982 #ifndef NO_NATIVE_ISO2022_SUPPORT
983 strcasecmp(arg, "iso2022") == 0 ||
984 #endif
985 #if SUPPORT_UTF16_ENCODINGS
986 strcasecmp(arg, "utf16le") == 0 ||
987 strcasecmp(arg, "utf16be") == 0 ||
988 strcasecmp(arg, "utf16") == 0 ||
989 #endif
990 #if SUPPORT_ASIAN_ENCODINGS
991 strcasecmp(arg, "shiftjis") == 0 ||
992 strcasecmp(arg, "big5") == 0 ||
993 #endif
994 strcasecmp(arg, "mac") == 0 ||
995 strcasecmp(arg, "win1252") == 0 ||
996 strcasecmp(arg, "ibm858") == 0 )
997 {
998 tidySetCharEncoding( tdoc, arg );
999 }
1000 else if ( strcasecmp(arg, "numeric") == 0 )
1001 tidyOptSetBool( tdoc, TidyNumEntities, yes );
1002
1003 else if ( strcasecmp(arg, "modify") == 0 ||
1004 strcasecmp(arg, "change") == 0 || /* obsolete */
1005 strcasecmp(arg, "update") == 0 ) /* obsolete */
1006 {
1007 tidyOptSetBool( tdoc, TidyWriteBack, yes );
1008 }
1009 else if ( strcasecmp(arg, "errors") == 0 )
1010 tidyOptSetBool( tdoc, TidyShowMarkup, no );
1011
1012 else if ( strcasecmp(arg, "quiet") == 0 )
1013 tidyOptSetBool( tdoc, TidyQuiet, yes );
1014
1015 else if ( strcasecmp(arg, "help") == 0 ||
1016 strcasecmp(arg, "h") == 0 || *arg == '?' )
1017 {
1018 help( prog );
1019 tidyRelease( tdoc );
1020 return 0; /* success */
1021 }
1022 else if ( strcasecmp(arg, "xml-help") == 0)
1023 {
1024 xml_help( );
1025 tidyRelease( tdoc );
1026 return 0; /* success */
1027 }
1028 else if ( strcasecmp(arg, "help-config") == 0 )
1029 {
1030 optionhelp( tdoc );
1031 tidyRelease( tdoc );
1032 return 0; /* success */
1033 }
1034 else if ( strcasecmp(arg, "xml-config") == 0 )
1035 {
1036 XMLoptionhelp( tdoc );
1037 tidyRelease( tdoc );
1038 return 0; /* success */
1039 }
1040 else if ( strcasecmp(arg, "show-config") == 0 )
1041 {
1042 optionvalues( tdoc );
1043 tidyRelease( tdoc );
1044 return 0; /* success */
1045 }
1046 else if ( strcasecmp(arg, "config") == 0 )
1047 {
1048 if ( argc >= 3 )
1049 {
1050 ctmbstr post;
1051
1052 tidyLoadConfig( tdoc, argv[2] );
1053
1054 /* Set new error output stream if setting changed */
1055 post = tidyOptGetValue( tdoc, TidyErrFile );
1056 if ( post && (!errfil || !samefile(errfil, post)) )
1057 {
1058 errfil = post;
1059 errout = tidySetErrorFile( tdoc, post );
1060 }
1061
1062 --argc;
1063 ++argv;
1064 }
1065 }
1066
1067 #if SUPPORT_ASIAN_ENCODINGS
1068 else if ( strcasecmp(arg, "language") == 0 ||
1069 strcasecmp(arg, "lang") == 0 )
1070 {
1071 if ( argc >= 3 )
1072 {
1073 tidyOptSetValue( tdoc, TidyLanguage, argv[2] );
1074 --argc;
1075 ++argv;
1076 }
1077 }
1078 #endif
1079
1080 else if ( strcasecmp(arg, "output") == 0 ||
1081 strcasecmp(arg, "-output-file") == 0 ||
1082 strcasecmp(arg, "o") == 0 )
1083 {
1084 if ( argc >= 3 )
1085 {
1086 tidyOptSetValue( tdoc, TidyOutFile, argv[2] );
1087 --argc;
1088 ++argv;
1089 }
1090 }
1091 else if ( strcasecmp(arg, "file") == 0 ||
1092 strcasecmp(arg, "-file") == 0 ||
1093 strcasecmp(arg, "f") == 0 )
1094 {
1095 if ( argc >= 3 )
1096 {
1097 errfil = argv[2];
1098 errout = tidySetErrorFile( tdoc, errfil );
1099 --argc;
1100 ++argv;
1101 }
1102 }
1103 else if ( strcasecmp(arg, "wrap") == 0 ||
1104 strcasecmp(arg, "-wrap") == 0 ||
1105 strcasecmp(arg, "w") == 0 )
1106 {
1107 if ( argc >= 3 )
1108 {
1109 uint wraplen = 0;
1110 int nfields = sscanf( argv[2], "%u", &wraplen );
1111 tidyOptSetInt( tdoc, TidyWrapLen, wraplen );
1112 if (nfields > 0)
1113 {
1114 --argc;
1115 ++argv;
1116 }
1117 }
1118 }
1119 else if ( strcasecmp(arg, "version") == 0 ||
1120 strcasecmp(arg, "-version") == 0 ||
1121 strcasecmp(arg, "v") == 0 )
1122 {
1123 version();
1124 tidyRelease( tdoc );
1125 return 0; /* success */
1126
1127 }
1128 else if ( strncmp(argv[1], "--", 2 ) == 0)
1129 {
1130 if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) )
1131 {
1132 /* Set new error output stream if setting changed */
1133 ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile );
1134 if ( post && (!errfil || !samefile(errfil, post)) )
1135 {
1136 errfil = post;
1137 errout = tidySetErrorFile( tdoc, post );
1138 }
1139
1140 ++argv;
1141 --argc;
1142 }
1143 }
1144
1145 #if SUPPORT_ACCESSIBILITY_CHECKS
1146 else if ( strcasecmp(arg, "access") == 0 )
1147 {
1148 if ( argc >= 3 )
1149 {
1150 uint acclvl = 0;
1151 int nfields = sscanf( argv[2], "%u", &acclvl );
1152 tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl );
1153 if (nfields > 0)
1154 {
1155 --argc;
1156 ++argv;
1157 }
1158 }
1159 }
1160 #endif
1161
1162 else
1163 {
1164 uint c;
1165 ctmbstr s = argv[1];
1166
1167 while ( c = *++s )
1168 {
1169 switch ( c )
1170 {
1171 case 'i':
1172 tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
1173 if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
1174 tidyOptResetToDefault( tdoc, TidyIndentSpaces );
1175 break;
1176
1177 /* Usurp -o for output file. Anyone hiding end tags?
1178 case 'o':
1179 tidyOptSetBool( tdoc, TidyHideEndTags, yes );
1180 break;
1181 */
1182
1183 case 'u':
1184 tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
1185 break;
1186
1187 case 'c':
1188 tidyOptSetBool( tdoc, TidyMakeClean, yes );
1189 break;
1190
1191 case 'b':
1192 tidyOptSetBool( tdoc, TidyMakeBare, yes );
1193 break;
1194
1195 case 'n':
1196 tidyOptSetBool( tdoc, TidyNumEntities, yes );
1197 break;
1198
1199 case 'm':
1200 tidyOptSetBool( tdoc, TidyWriteBack, yes );
1201 break;
1202
1203 case 'e':
1204 tidyOptSetBool( tdoc, TidyShowMarkup, no );
1205 break;
1206
1207 case 'q':
1208 tidyOptSetBool( tdoc, TidyQuiet, yes );
1209 break;
1210
1211 default:
1212 unknownOption( c );
1213 break;
1214 }
1215 }
1216 }
1217
1218 --argc;
1219 ++argv;
1220 continue;
1221 }
1222
1223 if ( argc > 1 )
1224 {
1225 htmlfil = argv[1];
1226 if ( tidyOptGetBool(tdoc, TidyEmacs) )
1227 tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil );
1228 status = tidyParseFile( tdoc, htmlfil );
1229 }
1230 else
1231 {
1232 htmlfil = "stdin";
1233 status = tidyParseStdin( tdoc );
1234 }
1235
1236 if ( status >= 0 )
1237 status = tidyCleanAndRepair( tdoc );
1238
1239 if ( status >= 0 )
1240 status = tidyRunDiagnostics( tdoc );
1241
1242 if ( status > 1 ) /* If errors, do we want to force output? */
1243 status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 );
1244
1245 if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) )
1246 {
1247 if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 )
1248 status = tidySaveFile( tdoc, htmlfil );
1249 else
1250 {
1251 ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile );
1252 if ( outfil )
1253 status = tidySaveFile( tdoc, outfil );
1254 else
1255 status = tidySaveStdout( tdoc );
1256 }
1257 }
1258
1259 contentErrors += tidyErrorCount( tdoc );
1260 contentWarnings += tidyWarningCount( tdoc );
1261 accessWarnings += tidyAccessWarningCount( tdoc );
1262
1263 --argc;
1264 ++argv;
1265
1266 if ( argc <= 1 )
1267 break;
1268 }
1269
1270 if (!tidyOptGetBool(tdoc, TidyQuiet) &&
1271 errout == stderr && !contentErrors)
1272 fprintf(errout, "\n");
1273
1274 if (contentErrors + contentWarnings > 0 &&
1275 !tidyOptGetBool(tdoc, TidyQuiet))
1276 tidyErrorSummary(tdoc);
1277
1278 if (!tidyOptGetBool(tdoc, TidyQuiet))
1279 tidyGeneralInfo(tdoc);
1280
1281 /* called to free hash tables etc. */
1282 tidyRelease( tdoc );
1283
1284 /* return status can be used by scripts */
1285 if ( contentErrors > 0 )
1286 return 2;
1287
1288 if ( contentWarnings > 0 )
1289 return 1;
1290
1291 /* 0 signifies all is ok */
1292 return 0;
1293 }
1294
1295 /*
1296 * local variables:
1297 * mode: c
1298 * indent-tabs-mode: nil
1299 * c-basic-offset: 4
1300 * eval: (c-set-offset 'substatement-open 0)
1301 * end:
1302 */
1303
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.