~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

TidyLib
tidy/console/tidy.c

Version: ~ [ 1.0 ] ~

  1 /*
  2   tidy.c - HTML TidyLib command line driver
  3 
  4   Copyright (c) 1998-2005 World Wide Web Consortium
  5   (Massachusetts Institute of Technology, European Research 
  6   Consortium for Informatics and Mathematics, Keio University).
  7   All Rights Reserved.
  8 
  9   CVS Info :
 10 
 11     $Author: arnaud02 $ 
 12     $Date: 2005/10/21 09:53:23 $ 
 13     $Revision: 1.41 $ 
 14 */
 15 
 16 #include "tidy.h"
 17 
 18 static FILE* errout = NULL;  /* set to stderr */
 19 /* static FILE* txtout = NULL; */  /* set to stdout */
 20 
 21 static Bool samefile( ctmbstr filename1, ctmbstr filename2 )
 22 {
 23 #if FILENAMES_CASE_SENSITIVE
 24     return ( strcmp( filename1, filename2 ) == 0 );
 25 #else
 26     return ( strcasecmp( filename1, filename2 ) == 0 );
 27 #endif
 28 }
 29 
 30 static const char *cutToWhiteSpace(const char *s, uint offset, char *sbuf)
 31 {
 32     if (!s)
 33     {
 34         sbuf[0] = '\0';
 35         return NULL;
 36     }
 37     else if (strlen(s) <= offset)
 38     {
 39         strcpy(sbuf,s);
 40         sbuf[offset] = '\0';
 41         return NULL;
 42     }
 43     else
 44     {
 45         uint j, l, n;
 46         j = offset;
 47         while(j && s[j] != ' ')
 48             --j;
 49         l = j;
 50         n = j+1;
 51         /* no white space */
 52         if (j==0)
 53         {
 54             l = offset;
 55             n = offset;
 56         }
 57         strncpy(sbuf,s,l);
 58         sbuf[l] = '\0';
 59         return s+n;
 60     }
 61 }
 62 
 63 static void print2Columns( const char* fmt, uint l1, uint l2,
 64                            const char *c1, const char *c2 )
 65 {
 66     const char *pc1=c1, *pc2=c2;
 67     char *c1buf = (char *)malloc(l1+1);
 68     char *c2buf = (char *)malloc(l2+1);
 69 
 70     do
 71     {
 72         pc1 = cutToWhiteSpace(pc1, l1, c1buf);
 73         pc2 = cutToWhiteSpace(pc2, l2, c2buf);
 74         printf(fmt,
 75                c1buf[0]!='\0'?c1buf:"",
 76                c2buf[0]!='\0'?c2buf:"");
 77     } while (pc1 || pc2);
 78     free(c1buf);
 79     free(c2buf);
 80 }
 81 
 82 static void print3Columns( const char* fmt, uint l1, uint l2, uint l3,
 83                            const char *c1, const char *c2, const char *c3 )
 84 {
 85     const char *pc1=c1, *pc2=c2, *pc3=c3;
 86     char *c1buf = (char *)malloc(l1+1);
 87     char *c2buf = (char *)malloc(l2+1);
 88     char *c3buf = (char *)malloc(l3+1);
 89 
 90     do
 91     {
 92         pc1 = cutToWhiteSpace(pc1, l1, c1buf);
 93         pc2 = cutToWhiteSpace(pc2, l2, c2buf);
 94         pc3 = cutToWhiteSpace(pc3, l3, c3buf);
 95         printf(fmt,
 96                c1buf[0]!='\0'?c1buf:"",
 97                c2buf[0]!='\0'?c2buf:"",
 98                c3buf[0]!='\0'?c3buf:"");
 99     } while (pc1 || pc2 || pc3);
100     free(c1buf);
101     free(c2buf);
102     free(c3buf);
103 }
104 
105 static const char helpfmt[] = " %-19.19s %-58.58s\n";
106 static const char helpul[]
107         = "-----------------------------------------------------------------";
108 static const char fmt[] = "%-27.27s %-9.9s  %-40.40s\n";
109 static const char valfmt[] = "%-27.27s %-9.9s %-1.1s%-39.39s\n";
110 static const char ul[]
111         = "=================================================================";
112 
113 typedef enum
114 {
115   CmdOptFileManip,
116   CmdOptCatFIRST = CmdOptFileManip,
117   CmdOptProcDir,
118   CmdOptCharEnc,
119   CmdOptMisc,
120   CmdOptCatLAST,
121 } CmdOptCategory;
122 
123 static const struct {
124     ctmbstr mnemonic;
125     ctmbstr name;
126 } cmdopt_catname[] = {
127     { "file-manip", "File manipulation" },
128     { "process-directives", "Processing directives" },
129     { "char-encoding", "Character encodings" },
130     { "misc", "Miscellaneous" }
131 };
132 
133 typedef struct {
134     ctmbstr name1;      /**< Name */
135     ctmbstr desc;       /**< Description */
136     ctmbstr eqconfig;   /**< Equivalent configuration option */
137     CmdOptCategory cat; /**< Category */
138     ctmbstr name2;      /**< Name */
139     ctmbstr name3;      /**< Name */
140 } CmdOptDesc;
141 
142 static const CmdOptDesc cmdopt_defs[] =  {
143     { "-output <file>",
144       "write output to the specified <file>",
145       "output-file: <file>", CmdOptFileManip, "-o <file>" },
146     { "-config <file>",
147       "set configuration options from the specified <file>",
148       NULL, CmdOptFileManip },
149     { "-file <file>",
150       "write errors to the specified <file>",
151       "error-file: <file>", CmdOptFileManip, "-f <file>" },
152     { "-modify",
153       "modify the original input files",
154       "write-back: yes", CmdOptFileManip, "-m" },
155     { "-indent",
156       "indent element content",
157       "indent: auto", CmdOptProcDir, "-i" },
158     { "-wrap <column>",
159       "wrap text at the specified <column>"
160       ". 0 is assumed if <column> is missing. "
161       "When this option is omitted, the default of the configuration option "
162       "\"wrap\" applies.",
163       "wrap: <column>", CmdOptProcDir, "-w <column>" },
164     { "-upper",
165       "force tags to upper case",
166       "uppercase-tags: yes", CmdOptProcDir, "-u" },
167     { "-clean",
168       "replace FONT, NOBR and CENTER tags by CSS",
169       "clean: yes", CmdOptProcDir, "-c" },
170     { "-bare",
171       "strip out smart quotes and em dashes, etc.",
172       "bare: yes", CmdOptProcDir, "-b" },
173     { "-numeric",
174       "output numeric rather than named entities",
175       "numeric-entities: yes", CmdOptProcDir, "-n" },
176     { "-errors",
177       "only show errors",
178       "markup: no", CmdOptProcDir, "-e" },
179     { "-quiet",
180       "suppress nonessential output",
181       "quiet: yes", CmdOptProcDir, "-q" },
182     { "-omit",
183       "omit optional end tags",
184       "hide-endtags: yes", CmdOptProcDir },
185     { "-xml",
186       "specify the input is well formed XML",
187       "input-xml: yes", CmdOptProcDir },
188     { "-asxml",
189       "convert HTML to well formed XHTML",
190       "output-xhtml: yes", CmdOptProcDir, "-asxhtml" },
191     { "-ashtml",
192       "force XHTML to well formed HTML",
193       "output-html: yes", CmdOptProcDir },
194 #if SUPPORT_ACCESSIBILITY_CHECKS
195     { "-access <level>",
196       "do additional accessibility checks (<level> = 0, 1, 2, 3)"
197       ". 0 is assumed if <level> is missing.",
198       "accessibility-check: <level>", CmdOptProcDir },
199 #endif
200     { "-raw",
201       "output values above 127 without conversion to entities",
202       NULL, CmdOptCharEnc },
203     { "-ascii",
204       "use ISO-8859-1 for input, US-ASCII for output",
205       NULL, CmdOptCharEnc },
206     { "-latin0",
207       "use ISO-8859-15 for input, US-ASCII for output",
208       NULL, CmdOptCharEnc },
209     { "-latin1",
210       "use ISO-8859-1 for both input and output",
211       NULL, CmdOptCharEnc },
212 #ifndef NO_NATIVE_ISO2022_SUPPORT
213     { "-iso2022",
214       "use ISO-2022 for both input and output",
215       NULL, CmdOptCharEnc },
216 #endif
217     { "-utf8",
218       "use UTF-8 for both input and output",
219       NULL, CmdOptCharEnc },
220     { "-mac",
221       "use MacRoman for input, US-ASCII for output",
222       NULL, CmdOptCharEnc },
223     { "-win1252",
224       "use Windows-1252 for input, US-ASCII for output",
225       NULL, CmdOptCharEnc },
226     { "-ibm858",
227       "use IBM-858 (CP850+Euro) for input, US-ASCII for output",
228       NULL, CmdOptCharEnc },
229 #if SUPPORT_UTF16_ENCODINGS
230     { "-utf16le",
231       "use UTF-16LE for both input and output",
232       NULL, CmdOptCharEnc },
233     { "-utf16be",
234       "use UTF-16BE for both input and output",
235       NULL, CmdOptCharEnc },
236     { "-utf16",
237       "use UTF-16 for both input and output",
238       NULL, CmdOptCharEnc },
239 #endif
240 #if SUPPORT_ASIAN_ENCODINGS /* #431953 - RJ */
241     { "-big5",
242       "use Big5 for both input and output",
243       NULL, CmdOptCharEnc },
244     { "-shiftjis",
245       "use Shift_JIS for both input and output",
246       NULL, CmdOptCharEnc },
247     { "-language <lang>",
248       "set the two-letter language code <lang> (for future use)",
249       "language: <lang>", CmdOptCharEnc },
250 #endif
251     { "-version",
252       "show the version of Tidy",
253       NULL, CmdOptMisc, "-v" },
254     { "-help",
255       "list the command line options",
256       NULL, CmdOptMisc, "-h", "-?" },
257     { "-xml-help",
258       "list the command line options in XML format",
259       NULL, CmdOptMisc },
260     { "-help-config",
261       "list all configuration options",
262       NULL, CmdOptMisc },
263     { "-xml-config",
264       "list all configuration options in XML format",
265       NULL, CmdOptMisc },
266     { "-show-config",
267       "list the current configuration settings",
268       NULL, CmdOptMisc },
269     { NULL, NULL, NULL, CmdOptMisc }
270 };
271 
272 static tmbstr get_option_names( const CmdOptDesc* pos )
273 {
274     tmbstr name;
275     uint len = strlen(pos->name1);
276     if (pos->name2)
277         len += 2+strlen(pos->name2);
278     if (pos->name3)
279         len += 2+strlen(pos->name3);
280 
281     name = (tmbstr)malloc(len+1);
282     strcpy(name, pos->name1);
283     if (pos->name2)
284     {
285         strcat(name, ", ");
286         strcat(name, pos->name2);
287     }
288     if (pos->name3)
289     {
290         strcat(name, ", ");
291         strcat(name, pos->name3);
292     }
293     return name;
294 }
295 
296 static tmbstr get_escaped_name( ctmbstr name )
297 {
298     tmbstr escpName;
299     char aux[2];
300     uint len = 0;
301     ctmbstr c;
302     for(c=name; *c!='\0'; ++c)
303         switch(*c)
304         {
305         case '<':
306         case '>':
307             len += 4;
308             break;
309         case '"':
310             len += 6;
311             break;
312         default:
313             len += 1;
314             break;
315         }
316 
317     escpName = (tmbstr)malloc(len+1);
318     escpName[0] = '\0';
319 
320     aux[1] = '\0';
321     for(c=name; *c!='\0'; ++c)
322         switch(*c)
323         {
324         case '<':
325             strcat(escpName, "&lt;");
326             break;
327         case '>':
328             strcat(escpName, "&gt;");
329             break;
330         case '"':
331             strcat(escpName, "&quot;");
332             break;
333         default:
334             aux[0] = *c;
335             strcat(escpName, aux);
336             break;
337         }
338 
339     return escpName;
340 }
341 
342 static void print_help_option( void )
343 {
344     CmdOptCategory cat = CmdOptCatFIRST;
345     const CmdOptDesc* pos = cmdopt_defs;
346 
347     for( cat=CmdOptCatFIRST; cat!=CmdOptCatLAST; ++cat)
348     {
349         size_t len =  strlen(cmdopt_catname[cat].name);
350         printf("%s\n", cmdopt_catname[cat].name );
351         printf("%*.*s\n", (int)len, (int)len, helpul );
352         for( pos=cmdopt_defs; pos->name1; ++pos)
353         {
354             tmbstr name;
355             if (pos->cat != cat)
356                 continue;
357             name = get_option_names( pos );
358             print2Columns( helpfmt, 19, 58, name, pos->desc );
359             free(name);
360         }
361         printf("\n");
362     }
363 }
364 
365 static void print_xml_help_option_element( ctmbstr element, ctmbstr name )
366 {
367     tmbstr escpName;
368     if (!name)
369         return;
370     printf("  <%s>%s</%s>\n", element, escpName = get_escaped_name(name),
371            element);
372     free(escpName);
373 }
374 
375 static void print_xml_help_option( void )
376 {
377     const CmdOptDesc* pos = cmdopt_defs;
378 
379     for( pos=cmdopt_defs; pos->name1; ++pos)
380     {
381         printf(" <option class=\"%s\">\n", cmdopt_catname[pos->cat].mnemonic );
382         print_xml_help_option_element("name", pos->name1);
383         print_xml_help_option_element("name", pos->name2);
384         print_xml_help_option_element("name", pos->name3);
385         print_xml_help_option_element("description", pos->desc);
386         if (pos->eqconfig)
387             print_xml_help_option_element("eqconfig", pos->eqconfig);
388         else
389             printf("  <eqconfig />\n");
390         printf(" </option>\n");
391     }
392 }
393 
394 static void xml_help( void )
395 {
396     printf( "<?xml version=\"1.0\"?>\n"
397             "<cmdline version=\"%s\">\n", tidyReleaseDate());
398     print_xml_help_option();
399     printf( "</cmdline>\n" );
400 }
401 
402 static void help( ctmbstr prog )
403 {
404     printf( "%s [option...] [file...] [option...] [file...]\n", prog );
405     printf( "Utility to clean up and pretty print HTML/XHTML/XML\n");
406     printf( "see http://tidy.sourceforge.net/\n");
407     printf( "\n");
408 
409 #ifdef PLATFORM_NAME
410     printf( "Options for HTML Tidy for %s released on %s:\n",
411              PLATFORM_NAME, tidyReleaseDate() );
412 #else
413     printf( "Options for HTML Tidy released on %s:\n", tidyReleaseDate() );
414 #endif
415     printf( "\n");
416 
417     print_help_option();
418 
419     printf( "Use --blah blarg for any configuration option \"blah\" with argument \"blarg\"\n");
420     printf( "\n");
421 
422     printf( "Input/Output default to stdin/stdout respectively\n");
423     printf( "Single letter options apart from -f may be combined\n");
424     printf( "as in:  tidy -f errs.txt -imu foo.html\n");
425     printf( "For further info on HTML see http://www.w3.org/MarkUp\n");
426     printf( "\n");
427 }
428 
429 static Bool isAutoBool( TidyOption topt )
430 {
431     TidyIterator pos;
432     ctmbstr def;
433 
434     if ( tidyOptGetType( topt ) != TidyInteger)
435         return no;
436 
437     pos = tidyOptGetPickList( topt );
438     while ( pos )
439     {
440         def = tidyOptGetNextPick( topt, &pos );
441         if (0==strcmp(def,"yes"))
442            return yes;
443     }
444     return no;
445 }
446 
447 static
448 ctmbstr ConfigCategoryName( TidyConfigCategory id )
449 {
450     switch( id )
451     {
452     case TidyMarkup:
453         return "markup";
454     case TidyDiagnostics:
455         return "diagnostics";
456     case TidyPrettyPrint:
457         return "print";
458     case TidyEncoding:
459         return "encoding";
460     case TidyMiscellaneous:
461         return "misc";
462     }
463     fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", id);
464     assert(0);
465     abort();
466 }
467 
468 /* Description of an option */
469 typedef struct {
470     ctmbstr name;  /**< Name */
471     ctmbstr cat;   /**< Category */
472     ctmbstr type;  /**< "String, ... */
473     ctmbstr vals;  /**< Potential values. If NULL, use an external function */
474     ctmbstr def;   /**< default */
475     tmbchar tempdefs[80]; /**< storage for default such as integer */
476     Bool haveVals; /**< if yes, vals is valid */
477 } OptionDesc;
478 
479 typedef void (*OptionFunc)( TidyDoc, TidyOption, OptionDesc * );
480 
481 
482 /* Create description "d" related to "opt" */
483 static
484 void GetOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d )
485 {
486     TidyOptionId optId = tidyOptGetId( topt );
487     TidyOptionType optTyp = tidyOptGetType( topt );
488 
489     d->name = tidyOptGetName( topt );
490     d->cat = ConfigCategoryName( tidyOptGetCategory( topt ) );
491     d->vals = NULL;
492     d->def = NULL;
493     d->haveVals = yes;
494 
495     /* Handle special cases first.
496      */
497     switch ( optId )
498     {
499     case TidyDuplicateAttrs:
500     case TidyNewline:
501     case TidyAccessibilityCheckLevel:
502         d->type = "enum";
503         d->vals = NULL;
504         d->def =
505             optId==TidyNewline ?
506             "<em>Platform dependent</em>"
507             :tidyOptGetCurrPick( tdoc, optId );
508         break;
509 
510     case TidyDoctype:
511         d->type = "DocType";
512         d->vals = NULL;
513         {
514             ctmbstr sdef = NULL;
515             sdef = tidyOptGetCurrPick( tdoc, TidyDoctypeMode );
516             if ( !sdef || *sdef == '*' )
517                 sdef = tidyOptGetValue( tdoc, TidyDoctype );
518             d->def = sdef;
519         }
520         break;
521 
522     case TidyInlineTags:
523     case TidyBlockTags:
524     case TidyEmptyTags:
525     case TidyPreTags:
526         d->type = "Tag names";
527         d->vals = "tagX, tagY, ...";
528         d->def = NULL;
529         break;
530 
531     case TidyCharEncoding:
532     case TidyInCharEncoding:
533     case TidyOutCharEncoding:
534         d->type = "Encoding";
535         d->def = tidyOptGetEncName( tdoc, optId );
536         if (!d->def)
537             d->def = "?";
538         d->vals = NULL;
539         break;
540 
541         /* General case will handle remaining */
542     default:
543         switch ( optTyp )
544         {
545         case TidyBoolean:
546             d->type = "Boolean";
547             d->vals = "y/n, yes/no, t/f, true/false, 1/0";
548             d->def = tidyOptGetCurrPick( tdoc, optId );
549             break;
550 
551         case TidyInteger:
552             if (isAutoBool(topt))
553             {
554                 d->type = "AutoBool";
555                 d->vals = "auto, y/n, yes/no, t/f, true/false, 1/0";
556                 d->def = tidyOptGetCurrPick( tdoc, optId );
557             }
558             else
559             {
560                 uint idef;
561                 d->type = "Integer";
562                 if ( optId == TidyWrapLen )
563                     d->vals = "0 (no wrapping), 1, 2, ...";
564                 else
565                     d->vals = "0, 1, 2, ...";
566 
567                 idef = tidyOptGetInt( tdoc, optId );
568                 sprintf(d->tempdefs, "%u", idef);
569                 d->def = d->tempdefs;
570             }
571             break;
572 
573         case TidyString:
574             d->type = "String";
575             d->vals = NULL;
576             d->haveVals = no;
577             d->def = tidyOptGetValue( tdoc, optId );
578             break;
579         }
580     }
581 }
582 
583 /* Array holding all options. Contains a trailing sentinel. */
584 typedef struct {
585     TidyOption topt[N_TIDY_OPTIONS];
586 } AllOption_t;
587 
588 static
589 int cmpOpt(const void* e1_, const void *e2_)
590 {
591     const TidyOption* e1 = (const TidyOption*)e1_;
592     const TidyOption* e2 = (const TidyOption*)e2_;
593     return strcmp(tidyOptGetName(*e1), tidyOptGetName(*e2));
594 }
595 
596 static
597 void getSortedOption( TidyDoc tdoc, AllOption_t *tOption )
598 {
599     TidyIterator pos = tidyGetOptionList( tdoc );
600     uint i = 0;
601 
602     while ( pos )
603     {
604         TidyOption topt = tidyGetNextOption( tdoc, &pos );
605         tOption->topt[i] = topt;
606         ++i;
607     }
608     tOption->topt[i] = NULL; /* sentinel */
609 
610     qsort(tOption->topt,
611           /* Do not sort the sentinel: hence `-1' */
612           sizeof(tOption->topt)/sizeof(tOption->topt[0])-1,
613           sizeof(tOption->topt[0]),
614           cmpOpt);
615 }
616 
617 static void ForEachSortedOption( TidyDoc tdoc, OptionFunc OptionPrint )
618 {
619     AllOption_t tOption;
620     const TidyOption *topt;
621 
622     getSortedOption( tdoc, &tOption );
623     for( topt = tOption.topt; *topt; ++topt)
624     {
625         OptionDesc d;
626 
627         GetOption( tdoc, *topt, &d );
628         (*OptionPrint)( tdoc, *topt, &d );
629     }
630 }
631 
632 static void ForEachOption( TidyDoc tdoc, OptionFunc OptionPrint )
633 {
634     TidyIterator pos = tidyGetOptionList( tdoc );
635 
636     while ( pos )
637     {
638         TidyOption topt = tidyGetNextOption( tdoc, &pos );
639         OptionDesc d;
640 
641         GetOption( tdoc, topt, &d );
642         (*OptionPrint)( tdoc, topt, &d );
643     }
644 }
645 
646 static
647 void PrintAllowedValuesFromPick( TidyOption topt )
648 {
649     TidyIterator pos = tidyOptGetPickList( topt );
650     Bool first = yes;
651     ctmbstr def;
652     while ( pos )
653     {
654         if (first)
655             first = no;
656         else
657             printf(", ");
658         def = tidyOptGetNextPick( topt, &pos );
659         printf("%s", def);
660     }
661 }
662 
663 static
664 void PrintAllowedValues( TidyOption topt, const OptionDesc *d )
665 {
666     if (d->vals)
667         printf( "%s", d->vals );
668     else
669         PrintAllowedValuesFromPick( topt );
670 }
671 
672 static
673 void printXMLDescription( TidyDoc tdoc, TidyOption topt )
674 {
675     ctmbstr doc = tidyOptGetDoc( tdoc, topt );
676 
677     if (doc)
678         printf("  <description>%s</description>\n", doc);
679     else
680     {
681         printf("  <description />\n");
682         fprintf(stderr, "Warning: option `%s' is not documented.\n",
683                 tidyOptGetName( topt ));
684     }
685 }
686 
687 static
688 void printXMLCrossRef( TidyDoc tdoc, TidyOption topt )
689 {
690     TidyOption optLinked;
691     TidyIterator pos = tidyOptGetDocLinksList(tdoc, topt);
692     while( pos )
693     {
694         optLinked = tidyOptGetNextDocLinks(tdoc, &pos );
695         printf("  <seealso>%s</seealso>\n",tidyOptGetName(optLinked));
696     }
697 }
698 
699 static
700 void printXMLOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d )
701 {
702     if ( tidyOptIsReadOnly(topt) )
703         return;
704 
705     printf( " <option class=\"%s\">\n", d->cat );
706     printf  ("  <name>%s</name>\n",d->name);
707     printf  ("  <type>%s</type>\n",d->type);
708     if (d->def)
709         printf("  <default>%s</default>\n",d->def);
710     else
711         printf("  <default />\n");
712     if (d->haveVals)
713     {
714         printf("  <example>");
715         PrintAllowedValues( topt, d );
716         printf("</example>\n");
717     }
718     else
719     {
720         printf("  <example />\n");
721     }
722     printXMLDescription( tdoc, topt );
723     printXMLCrossRef( tdoc, topt );
724     printf( " </option>\n" );
725 }
726 
727 static void XMLoptionhelp( TidyDoc tdoc )
728 {
729     printf( "<?xml version=\"1.0\"?>\n"
730             "<config version=\"%s\">\n", tidyReleaseDate());
731     ForEachOption( tdoc, printXMLOption );
732     printf( "</config>\n" );
733 }
734 
735 static
736 tmbstr GetAllowedValuesFromPick( TidyOption topt )
737 {
738     TidyIterator pos;
739     Bool first;
740     ctmbstr def;
741     uint len = 0;
742     tmbstr val;
743 
744     pos = tidyOptGetPickList( topt );
745     first = yes;
746     while ( pos )
747     {
748         if (first)
749             first = no;
750         else
751             len += 2;
752         def = tidyOptGetNextPick( topt, &pos );
753         len += strlen(def);
754     }
755     val = (tmbstr)malloc(len+1);
756     val[0] = '\0';
757     pos = tidyOptGetPickList( topt );
758     first = yes;
759     while ( pos )
760     {
761         if (first)
762             first = no;
763         else
764             strcat(val, ", ");
765         def = tidyOptGetNextPick( topt, &pos );
766         strcat(val, def);
767     }
768     return val;
769 }
770 
771 static
772 tmbstr GetAllowedValues( TidyOption topt, const OptionDesc *d )
773 {
774     if (d->vals)
775     {
776         tmbstr val = (tmbstr)malloc(1+strlen(d->vals));
777         strcpy(val, d->vals);
778         return val;
779     }
780     else
781         return GetAllowedValuesFromPick( topt );
782 }
783 
784 static
785 void printOption( TidyDoc ARG_UNUSED(tdoc), TidyOption topt,
786                   OptionDesc *d )
787 {
788     if ( tidyOptIsReadOnly(topt) )
789         return;
790 
791     if ( *d->name || *d->type )
792     {
793         ctmbstr pval = d->vals;
794         tmbstr val = NULL;
795         if (!d->haveVals)
796         {
797             pval = "-";
798         }
799         else if (pval == NULL)
800         {
801             val = GetAllowedValues( topt, d);
802             pval = val;
803         }
804         print3Columns( fmt, 27, 9, 40, d->name, d->type, pval );
805         if (val)
806             free(val);
807     }
808 }
809 
810 static void optionhelp( TidyDoc tdoc )
811 {
812     printf( "\nHTML Tidy Configuration Settings\n\n" );
813     printf( "Within a file, use the form:\n\n" );
814     printf( "wrap: 72\n" );
815     printf( "indent: no\n\n" );
816     printf( "When specified on the command line, use the form:\n\n" );
817     printf( "--wrap 72 --indent no\n\n");
818 
819     printf( fmt, "Name", "Type", "Allowable values" );
820     printf( fmt, ul, ul, ul );
821 
822     ForEachSortedOption( tdoc, printOption );
823 }
824 
825 static
826 void printOptionValues( TidyDoc ARG_UNUSED(tdoc), TidyOption topt,
827                         OptionDesc *d )
828 {
829     TidyOptionId optId = tidyOptGetId( topt );
830     ctmbstr ro = tidyOptIsReadOnly( topt ) ? "*" : "" ;
831 
832     switch ( optId )
833     {
834     case TidyInlineTags:
835     case TidyBlockTags:
836     case TidyEmptyTags:
837     case TidyPreTags:
838         {
839             TidyIterator pos = tidyOptGetDeclTagList( tdoc );
840             while ( pos )
841             {
842                 d->def = tidyOptGetNextDeclTag(tdoc, optId, &pos);
843                 if ( pos )
844                 {
845                     if ( *d->name )
846                         printf( valfmt, d->name, d->type, ro, d->def );
847                     else
848                         printf( fmt, d->name, d->type, d->def );
849                     d->name = "";
850                     d->type = "";
851                 }
852             }
853         }
854         break;
855     case TidyNewline:
856         d->def = tidyOptGetCurrPick( tdoc, optId );
857         break;
858     }
859 
860     /* fix for http://tidy.sf.net/bug/873921 */
861     if ( *d->name || *d->type || (d->def && *d->def) )
862     {
863         if ( ! d->def )
864             d->def = "";
865         if ( *d->name )
866             printf( valfmt, d->name, d->type, ro, d->def );
867         else
868             printf( fmt, d->name, d->type, d->def );
869     }
870 }
871 
872 static void optionvalues( TidyDoc tdoc )
873 {
874     printf( "\nConfiguration File Settings:\n\n" );
875     printf( fmt, "Name", "Type", "Current Value" );
876     printf( fmt, ul, ul, ul );
877 
878     ForEachSortedOption( tdoc, printOptionValues );
879 
880     printf( "\n\nValues marked with an *asterisk are calculated \n"
881             "internally by HTML Tidy\n\n" );
882 }
883 
884 static void version( void )
885 {
886 #ifdef PLATFORM_NAME
887     printf( "HTML Tidy for %s released on %s\n",
888              PLATFORM_NAME, tidyReleaseDate() );
889 #else
890     printf( "HTML Tidy released on %s\n", tidyReleaseDate() );
891 #endif
892 }
893 
894 static void unknownOption( uint c )
895 {
896     fprintf( errout, "HTML Tidy: unknown option: %c\n", (char)c );
897 }
898 
899 int main( int argc, char** argv )
900 {
901     ctmbstr prog = argv[0];
902     ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL;
903     TidyDoc tdoc = tidyCreate();
904     int status = 0;
905 
906     uint contentErrors = 0;
907     uint contentWarnings = 0;
908     uint accessWarnings = 0;
909 
910     errout = stderr;  /* initialize to stderr */
911     status = 0;
912     
913 #ifdef CONFIG_FILE
914     if ( tidyFileExists(CONFIG_FILE) )
915     {
916         status = tidyLoadConfig( tdoc, CONFIG_FILE );
917         if ( status != 0 )
918             fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", CONFIG_FILE, status);
919     }
920 #endif /* CONFIG_FILE */
921 
922     /* look for env var "HTML_TIDY" */
923     /* then for ~/.tidyrc (on platforms defining $HOME) */
924 
925     if ( cfgfil = getenv("HTML_TIDY") )
926     {
927         status = tidyLoadConfig( tdoc, cfgfil );
928         if ( status != 0 )
929             fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", cfgfil, status);
930     }
931 #ifdef USER_CONFIG_FILE
932     else if ( tidyFileExists(USER_CONFIG_FILE) )
933     {
934         status = tidyLoadConfig( tdoc, USER_CONFIG_FILE );
935         if ( status != 0 )
936             fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", USER_CONFIG_FILE, status);
937     }
938 #endif /* USER_CONFIG_FILE */
939 
940     /* read command line */
941     while ( argc > 0 )
942     {
943         if (argc > 1 && argv[1][0] == '-')
944         {
945             /* support -foo and --foo */
946             ctmbstr arg = argv[1] + 1;
947 
948             if ( strcasecmp(arg, "xml") == 0)
949                 tidyOptSetBool( tdoc, TidyXmlTags, yes );
950 
951             else if ( strcasecmp(arg,   "asxml") == 0 ||
952                       strcasecmp(arg, "asxhtml") == 0 )
953             {
954                 tidyOptSetBool( tdoc, TidyXhtmlOut, yes );
955             }
956             else if ( strcasecmp(arg,   "ashtml") == 0 )
957                 tidyOptSetBool( tdoc, TidyHtmlOut, yes );
958 
959             else if ( strcasecmp(arg, "indent") == 0 )
960             {
961                 tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
962                 if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
963                     tidyOptResetToDefault( tdoc, TidyIndentSpaces );
964             }
965             else if ( strcasecmp(arg, "omit") == 0 )
966                 tidyOptSetBool( tdoc, TidyHideEndTags, yes );
967 
968             else if ( strcasecmp(arg, "upper") == 0 )
969                 tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
970 
971             else if ( strcasecmp(arg, "clean") == 0 )
972                 tidyOptSetBool( tdoc, TidyMakeClean, yes );
973 
974             else if ( strcasecmp(arg, "bare") == 0 )
975                 tidyOptSetBool( tdoc, TidyMakeBare, yes );
976 
977             else if ( strcasecmp(arg, "raw") == 0      ||
978                       strcasecmp(arg, "ascii") == 0    ||
979                       strcasecmp(arg, "latin0") == 0   ||
980                       strcasecmp(arg, "latin1") == 0   ||
981                       strcasecmp(arg, "utf8") == 0     ||
982 #ifndef NO_NATIVE_ISO2022_SUPPORT
983                       strcasecmp(arg, "iso2022") == 0  ||
984 #endif
985 #if SUPPORT_UTF16_ENCODINGS
986                       strcasecmp(arg, "utf16le") == 0  ||
987                       strcasecmp(arg, "utf16be") == 0  ||
988                       strcasecmp(arg, "utf16") == 0    ||
989 #endif
990 #if SUPPORT_ASIAN_ENCODINGS
991                       strcasecmp(arg, "shiftjis") == 0 ||
992                       strcasecmp(arg, "big5") == 0     ||
993 #endif
994                       strcasecmp(arg, "mac") == 0      ||
995                       strcasecmp(arg, "win1252") == 0  ||
996                       strcasecmp(arg, "ibm858") == 0 )
997             {
998                 tidySetCharEncoding( tdoc, arg );
999             }
1000             else if ( strcasecmp(arg, "numeric") == 0 )
1001                 tidyOptSetBool( tdoc, TidyNumEntities, yes );
1002 
1003             else if ( strcasecmp(arg, "modify") == 0 ||
1004                       strcasecmp(arg, "change") == 0 ||  /* obsolete */
1005                       strcasecmp(arg, "update") == 0 )   /* obsolete */
1006             {
1007                 tidyOptSetBool( tdoc, TidyWriteBack, yes );
1008             }
1009             else if ( strcasecmp(arg, "errors") == 0 )
1010                 tidyOptSetBool( tdoc, TidyShowMarkup, no );
1011 
1012             else if ( strcasecmp(arg, "quiet") == 0 )
1013                 tidyOptSetBool( tdoc, TidyQuiet, yes );
1014 
1015             else if ( strcasecmp(arg, "help") == 0 ||
1016                       strcasecmp(arg,    "h") == 0 || *arg == '?' )
1017             {
1018                 help( prog );
1019                 tidyRelease( tdoc );
1020                 return 0; /* success */
1021             }
1022             else if ( strcasecmp(arg, "xml-help") == 0)
1023             {
1024                 xml_help( );
1025                 tidyRelease( tdoc );
1026                 return 0; /* success */
1027             }
1028             else if ( strcasecmp(arg, "help-config") == 0 )
1029             {
1030                 optionhelp( tdoc );
1031                 tidyRelease( tdoc );
1032                 return 0; /* success */
1033             }
1034             else if ( strcasecmp(arg, "xml-config") == 0 )
1035             {
1036                 XMLoptionhelp( tdoc );
1037                 tidyRelease( tdoc );
1038                 return 0; /* success */
1039             }
1040             else if ( strcasecmp(arg, "show-config") == 0 )
1041             {
1042                 optionvalues( tdoc );
1043                 tidyRelease( tdoc );
1044                 return 0; /* success */
1045             }
1046             else if ( strcasecmp(arg, "config") == 0 )
1047             {
1048                 if ( argc >= 3 )
1049                 {
1050                     ctmbstr post;
1051 
1052                     tidyLoadConfig( tdoc, argv[2] );
1053 
1054                     /* Set new error output stream if setting changed */
1055                     post = tidyOptGetValue( tdoc, TidyErrFile );
1056                     if ( post && (!errfil || !samefile(errfil, post)) )
1057                     {
1058                         errfil = post;
1059                         errout = tidySetErrorFile( tdoc, post );
1060                     }
1061 
1062                     --argc;
1063                     ++argv;
1064                 }
1065             }
1066 
1067 #if SUPPORT_ASIAN_ENCODINGS
1068             else if ( strcasecmp(arg, "language") == 0 ||
1069                       strcasecmp(arg,     "lang") == 0 )
1070             {
1071                 if ( argc >= 3 )
1072                 {
1073                     tidyOptSetValue( tdoc, TidyLanguage, argv[2] );
1074                     --argc;
1075                     ++argv;
1076                 }
1077             }
1078 #endif
1079 
1080             else if ( strcasecmp(arg, "output") == 0 ||
1081                       strcasecmp(arg, "-output-file") == 0 ||
1082                       strcasecmp(arg, "o") == 0 )
1083             {
1084                 if ( argc >= 3 )
1085                 {
1086                     tidyOptSetValue( tdoc, TidyOutFile, argv[2] );
1087                     --argc;
1088                     ++argv;
1089                 }
1090             }
1091             else if ( strcasecmp(arg,  "file") == 0 ||
1092                       strcasecmp(arg, "-file") == 0 ||
1093                       strcasecmp(arg,     "f") == 0 )
1094             {
1095                 if ( argc >= 3 )
1096                 {
1097                     errfil = argv[2];
1098                     errout = tidySetErrorFile( tdoc, errfil );
1099                     --argc;
1100                     ++argv;
1101                 }
1102             }
1103             else if ( strcasecmp(arg,  "wrap") == 0 ||
1104                       strcasecmp(arg, "-wrap") == 0 ||
1105                       strcasecmp(arg,     "w") == 0 )
1106             {
1107                 if ( argc >= 3 )
1108                 {
1109                     uint wraplen = 0;
1110                     int nfields = sscanf( argv[2], "%u", &wraplen );
1111                     tidyOptSetInt( tdoc, TidyWrapLen, wraplen );
1112                     if (nfields > 0)
1113                     {
1114                         --argc;
1115                         ++argv;
1116                     }
1117                 }
1118             }
1119             else if ( strcasecmp(arg,  "version") == 0 ||
1120                       strcasecmp(arg, "-version") == 0 ||
1121                       strcasecmp(arg,        "v") == 0 )
1122             {
1123                 version();
1124                 tidyRelease( tdoc );
1125                 return 0;  /* success */
1126 
1127             }
1128             else if ( strncmp(argv[1], "--", 2 ) == 0)
1129             {
1130                 if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) )
1131                 {
1132                     /* Set new error output stream if setting changed */
1133                     ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile );
1134                     if ( post && (!errfil || !samefile(errfil, post)) )
1135                     {
1136                         errfil = post;
1137                         errout = tidySetErrorFile( tdoc, post );
1138                     }
1139 
1140                     ++argv;
1141                     --argc;
1142                 }
1143             }
1144 
1145 #if SUPPORT_ACCESSIBILITY_CHECKS
1146             else if ( strcasecmp(arg, "access") == 0 )
1147             {
1148                 if ( argc >= 3 )
1149                 {
1150                     uint acclvl = 0;
1151                     int nfields = sscanf( argv[2], "%u", &acclvl );
1152                     tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl );
1153                     if (nfields > 0)
1154                     {
1155                         --argc;
1156                         ++argv;
1157                     }
1158                 }
1159             }
1160 #endif
1161 
1162             else
1163             {
1164                 uint c;
1165                 ctmbstr s = argv[1];
1166 
1167                 while ( c = *++s )
1168                 {
1169                     switch ( c )
1170                     {
1171                     case 'i':
1172                         tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
1173                         if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
1174                             tidyOptResetToDefault( tdoc, TidyIndentSpaces );
1175                         break;
1176 
1177                     /* Usurp -o for output file.  Anyone hiding end tags?
1178                     case 'o':
1179                         tidyOptSetBool( tdoc, TidyHideEndTags, yes );
1180                         break;
1181                     */
1182 
1183                     case 'u':
1184                         tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
1185                         break;
1186 
1187                     case 'c':
1188                         tidyOptSetBool( tdoc, TidyMakeClean, yes );
1189                         break;
1190 
1191                     case 'b':
1192                         tidyOptSetBool( tdoc, TidyMakeBare, yes );
1193                         break;
1194 
1195                     case 'n':
1196                         tidyOptSetBool( tdoc, TidyNumEntities, yes );
1197                         break;
1198 
1199                     case 'm':
1200                         tidyOptSetBool( tdoc, TidyWriteBack, yes );
1201                         break;
1202 
1203                     case 'e':
1204                         tidyOptSetBool( tdoc, TidyShowMarkup, no );
1205                         break;
1206 
1207                     case 'q':
1208                         tidyOptSetBool( tdoc, TidyQuiet, yes );
1209                         break;
1210 
1211                     default:
1212                         unknownOption( c );
1213                         break;
1214                     }
1215                 }
1216             }
1217 
1218             --argc;
1219             ++argv;
1220             continue;
1221         }
1222 
1223         if ( argc > 1 )
1224         {
1225             htmlfil = argv[1];
1226             if ( tidyOptGetBool(tdoc, TidyEmacs) )
1227                 tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil );
1228             status = tidyParseFile( tdoc, htmlfil );
1229         }
1230         else
1231         {
1232             htmlfil = "stdin";
1233             status = tidyParseStdin( tdoc );
1234         }
1235 
1236         if ( status >= 0 )
1237             status = tidyCleanAndRepair( tdoc );
1238 
1239         if ( status >= 0 )
1240             status = tidyRunDiagnostics( tdoc );
1241 
1242         if ( status > 1 ) /* If errors, do we want to force output? */
1243             status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 );
1244 
1245         if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) )
1246         {
1247             if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 )
1248                 status = tidySaveFile( tdoc, htmlfil );
1249             else
1250             {
1251                 ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile );
1252                 if ( outfil )
1253                     status = tidySaveFile( tdoc, outfil );
1254                 else
1255                     status = tidySaveStdout( tdoc );
1256             }
1257         }
1258 
1259         contentErrors   += tidyErrorCount( tdoc );
1260         contentWarnings += tidyWarningCount( tdoc );
1261         accessWarnings  += tidyAccessWarningCount( tdoc );
1262 
1263         --argc;
1264         ++argv;
1265 
1266         if ( argc <= 1 )
1267             break;
1268     }
1269 
1270     if (!tidyOptGetBool(tdoc, TidyQuiet) &&
1271         errout == stderr && !contentErrors)
1272         fprintf(errout, "\n");
1273 
1274     if (contentErrors + contentWarnings > 0 && 
1275          !tidyOptGetBool(tdoc, TidyQuiet))
1276         tidyErrorSummary(tdoc);
1277 
1278     if (!tidyOptGetBool(tdoc, TidyQuiet))
1279         tidyGeneralInfo(tdoc);
1280 
1281     /* called to free hash tables etc. */
1282     tidyRelease( tdoc );
1283 
1284     /* return status can be used by scripts */
1285     if ( contentErrors > 0 )
1286         return 2;
1287 
1288     if ( contentWarnings > 0 )
1289         return 1;
1290 
1291     /* 0 signifies all is ok */
1292     return 0;
1293 }
1294 
1295 /*
1296  * local variables:
1297  * mode: c
1298  * indent-tabs-mode: nil
1299  * c-basic-offset: 4
1300  * eval: (c-set-offset 'substatement-open 0)
1301  * end:
1302  */
1303 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.