~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

TidyLib
tidy/src/config.c

Version: ~ [ 1.0 ] ~

  1 /*
  2   config.c -- read config file and manage config properties
  3   
  4   (c) 1998-2005 (W3C) MIT, ERCIM, Keio University
  5   See tidy.h for the copyright notice.
  6 
  7   CVS Info :
  8 
  9     $Author: arnaud02 $ 
 10     $Date: 2005/08/16 17:02:18 $ 
 11     $Revision: 1.92 $ 
 12 
 13 */
 14 
 15 /*
 16   config files associate a property name with a value.
 17 
 18   // comments can start at the beginning of a line
 19   # comments can start at the beginning of a line
 20   name: short values fit onto one line
 21   name: a really long value that
 22    continues on the next line
 23 
 24   property names are case insensitive and should be less than
 25   60 characters in length and must start at the begining of
 26   the line, as whitespace at the start of a line signifies a
 27   line continuation.
 28 */
 29 
 30 #include "config.h"
 31 #include "tidy-int.h"
 32 #include "message.h"
 33 #include "tmbstr.h"
 34 #include "tags.h"
 35 
 36 #ifdef WINDOWS_OS
 37 #include <io.h>
 38 #else
 39 #ifdef DMALLOC
 40 /*
 41    macro for valloc() in dmalloc.h may conflict with declaration for valloc() in unistd.h -
 42    we don't need (debugging for) valloc() here. dmalloc.h should come last but it doesn't.
 43 */
 44 #ifdef valloc
 45 #undef valloc
 46 #endif
 47 #endif
 48 #include <unistd.h>
 49 #endif
 50 
 51 #ifdef TIDY_WIN32_MLANG_SUPPORT
 52 #include "win32tc.h"
 53 #endif
 54 
 55 void InitConfig( TidyDocImpl* doc )
 56 {
 57     ClearMemory( &doc->config, sizeof(TidyConfigImpl) );
 58     ResetConfigToDefault( doc );
 59 }
 60 
 61 void FreeConfig( TidyDocImpl* doc )
 62 {
 63     ResetConfigToDefault( doc );
 64     TakeConfigSnapshot( doc );
 65 }
 66 
 67 
 68 /* Arrange so index can be cast to enum
 69 */
 70 static const ctmbstr boolPicks[] = 
 71 {
 72   "no",
 73   "yes",
 74   NULL
 75 };
 76 
 77 static const ctmbstr autoBoolPicks[] = 
 78 {
 79   "no",
 80   "yes",
 81   "auto",
 82   NULL
 83 };
 84 
 85 static const ctmbstr repeatAttrPicks[] = 
 86 {
 87   "keep-first",
 88   "keep-last",
 89   NULL
 90 };
 91 
 92 static const ctmbstr accessPicks[] = 
 93 {
 94   "0 (Tidy Classic)",
 95   "1 (Priority 1 Checks)",
 96   "2 (Priority 2 Checks)",
 97   "3 (Priority 3 Checks)",
 98   NULL
 99 };
100 
101 static const ctmbstr charEncPicks[] = 
102 {
103   "raw",
104   "ascii",
105   "latin0",
106   "latin1",
107   "utf8",
108 #ifndef NO_NATIVE_ISO2022_SUPPORT
109   "iso2022",
110 #endif
111   "mac",
112   "win1252",
113   "ibm858",
114 
115 #if SUPPORT_UTF16_ENCODINGS
116   "utf16le",
117   "utf16be",
118   "utf16",
119 #endif
120 
121 #if SUPPORT_ASIAN_ENCODINGS
122   "big5",
123   "shiftjis",
124 #endif
125 
126   NULL
127 };
128 
129 static const ctmbstr newlinePicks[] = 
130 {
131   "LF",
132   "CRLF",
133   "CR",
134   NULL
135 };
136 
137 static const ctmbstr doctypePicks[] = 
138 {
139   "omit",
140   "auto",
141   "strict",
142   "transitional",
143   "user",
144   NULL 
145 };
146 
147 #define MU TidyMarkup
148 #define DG TidyDiagnostics
149 #define PP TidyPrettyPrint
150 #define CE TidyEncoding
151 #define MS TidyMiscellaneous
152 
153 #define IN TidyInteger
154 #define BL TidyBoolean
155 #define ST TidyString
156 
157 #define XX (TidyConfigCategory)-1
158 #define XY (TidyOptionType)-1
159 
160 #define DLF DEFAULT_NL_CONFIG
161 
162 /* If Accessibility checks not supported, make config setting read-only */
163 #if SUPPORT_ACCESSIBILITY_CHECKS
164 #define ParseAcc ParseInt
165 #else
166 #define ParseAcc NULL 
167 #endif
168 
169 static const TidyOptionImpl option_defs[] =
170 {
171   { TidyUnknownOption,           MS, "unknown!",                    IN, 0,               NULL,              NULL            },
172   { TidyIndentSpaces,            PP, "indent-spaces",               IN, 2,               ParseInt,          NULL            },
173   { TidyWrapLen,                 PP, "wrap",                        IN, 68,              ParseInt,          NULL            },
174   { TidyTabSize,                 PP, "tab-size",                    IN, 8,               ParseInt,          NULL            },
175   { TidyCharEncoding,            CE, "char-encoding",               IN, ASCII,           ParseCharEnc,      charEncPicks    },
176   { TidyInCharEncoding,          CE, "input-encoding",              IN, LATIN1,          ParseCharEnc,      charEncPicks    },
177   { TidyOutCharEncoding,         CE, "output-encoding",             IN, ASCII,           ParseCharEnc,      charEncPicks    },
178   { TidyNewline,                 CE, "newline",                     IN, DLF,             ParseNewline,      newlinePicks    },
179   { TidyDoctypeMode,             MU, "doctype-mode",                IN, TidyDoctypeAuto, NULL,              doctypePicks    },
180   { TidyDoctype,                 MU, "doctype",                     ST, 0,               ParseDocType,      doctypePicks    },
181   { TidyDuplicateAttrs,          MU, "repeated-attributes",         IN, TidyKeepLast,    ParseRepeatAttr,   repeatAttrPicks },
182   { TidyAltText,                 MU, "alt-text",                    ST, 0,               ParseString,       NULL            },
183 
184   /* obsolete */
185   { TidySlideStyle,              MS, "slide-style",                 ST, 0,               ParseName,         NULL            },
186 
187   { TidyErrFile,                 MS, "error-file",                  ST, 0,               ParseString,       NULL            },
188   { TidyOutFile,                 MS, "output-file",                 ST, 0,               ParseString,       NULL            },
189   { TidyWriteBack,               MS, "write-back",                  BL, no,              ParseBool,         boolPicks       },
190   { TidyShowMarkup,              PP, "markup",                      BL, yes,             ParseBool,         boolPicks       },
191   { TidyShowWarnings,            DG, "show-warnings",               BL, yes,             ParseBool,         boolPicks       },
192   { TidyQuiet,                   MS, "quiet",                       BL, no,              ParseBool,         boolPicks       },
193   { TidyIndentContent,           PP, "indent",                      IN, TidyNoState,     ParseAutoBool,     autoBoolPicks   },
194   { TidyHideEndTags,             MU, "hide-endtags",                BL, no,              ParseBool,         boolPicks       },
195   { TidyXmlTags,                 MU, "input-xml",                   BL, no,              ParseBool,         boolPicks       },
196   { TidyXmlOut,                  MU, "output-xml",                  BL, no,              ParseBool,         boolPicks       },
197   { TidyXhtmlOut,                MU, "output-xhtml",                BL, no,              ParseBool,         boolPicks       },
198   { TidyHtmlOut,                 MU, "output-html",                 BL, no,              ParseBool,         boolPicks       },
199   { TidyXmlDecl,                 MU, "add-xml-decl",                BL, no,              ParseBool,         boolPicks       },
200   { TidyUpperCaseTags,           MU, "uppercase-tags",              BL, no,              ParseBool,         boolPicks       },
201   { TidyUpperCaseAttrs,          MU, "uppercase-attributes",        BL, no,              ParseBool,         boolPicks       },
202   { TidyMakeBare,                MU, "bare",                        BL, no,              ParseBool,         boolPicks       },
203   { TidyMakeClean,               MU, "clean",                       BL, no,              ParseBool,         boolPicks       },
204   { TidyLogicalEmphasis,         MU, "logical-emphasis",            BL, no,              ParseBool,         boolPicks       },
205   { TidyDropPropAttrs,           MU, "drop-proprietary-attributes", BL, no,              ParseBool,         boolPicks       },
206   { TidyDropFontTags,            MU, "drop-font-tags",              BL, no,              ParseBool,         boolPicks       },
207   { TidyDropEmptyParas,          MU, "drop-empty-paras",            BL, yes,             ParseBool,         boolPicks       },
208   { TidyFixComments,             MU, "fix-bad-comments",            BL, yes,             ParseBool,         boolPicks       },
209   { TidyBreakBeforeBR,           PP, "break-before-br",             BL, no,              ParseBool,         boolPicks       },
210 
211   /* obsolete */
212   { TidyBurstSlides,             PP, "split",                       BL, no,              ParseBool,         boolPicks       },
213 
214   { TidyNumEntities,             MU, "numeric-entities",            BL, no,              ParseBool,         boolPicks       },
215   { TidyQuoteMarks,              MU, "quote-marks",                 BL, no,              ParseBool,         boolPicks       },
216   { TidyQuoteNbsp,               MU, "quote-nbsp",                  BL, yes,             ParseBool,         boolPicks       },
217   { TidyQuoteAmpersand,          MU, "quote-ampersand",             BL, yes,             ParseBool,         boolPicks       },
218   { TidyWrapAttVals,             PP, "wrap-attributes",             BL, no,              ParseBool,         boolPicks       },
219   { TidyWrapScriptlets,          PP, "wrap-script-literals",        BL, no,              ParseBool,         boolPicks       },
220   { TidyWrapSection,             PP, "wrap-sections",               BL, yes,             ParseBool,         boolPicks       },
221   { TidyWrapAsp,                 PP, "wrap-asp",                    BL, yes,             ParseBool,         boolPicks       },
222   { TidyWrapJste,                PP, "wrap-jste",                   BL, yes,             ParseBool,         boolPicks       },
223   { TidyWrapPhp,                 PP, "wrap-php",                    BL, yes,             ParseBool,         boolPicks       },
224   { TidyFixBackslash,            MU, "fix-backslash",               BL, yes,             ParseBool,         boolPicks       },
225   { TidyIndentAttributes,        PP, "indent-attributes",           BL, no,              ParseBool,         boolPicks       },
226   { TidyXmlPIs,                  MU, "assume-xml-procins",          BL, no,              ParseBool,         boolPicks       },
227   { TidyXmlSpace,                MU, "add-xml-space",               BL, no,              ParseBool,         boolPicks       },
228   { TidyEncloseBodyText,         MU, "enclose-text",                BL, no,              ParseBool,         boolPicks       },
229   { TidyEncloseBlockText,        MU, "enclose-block-text",          BL, no,              ParseBool,         boolPicks       },
230   { TidyKeepFileTimes,           MS, "keep-time",                   BL, no,              ParseBool,         boolPicks       },
231   { TidyWord2000,                MU, "word-2000",                   BL, no,              ParseBool,         boolPicks       },
232   { TidyMark,                    MS, "tidy-mark",                   BL, yes,             ParseBool,         boolPicks       },
233   { TidyEmacs,                   MS, "gnu-emacs",                   BL, no,              ParseBool,         boolPicks       },
234   { TidyEmacsFile,               MS, "gnu-emacs-file",              ST, 0,               ParseString,       NULL            },
235   { TidyLiteralAttribs,          MU, "literal-attributes",          BL, no,              ParseBool,         boolPicks       },
236   { TidyBodyOnly,                MU, "show-body-only",              BL, no,              ParseBool,         boolPicks       },
237   { TidyFixUri,                  MU, "fix-uri",                     BL, yes,             ParseBool,         boolPicks       },
238   { TidyLowerLiterals,           MU, "lower-literals",              BL, yes,             ParseBool,         boolPicks       },
239   { TidyHideComments,            MU, "hide-comments",               BL, no,              ParseBool,         boolPicks       },
240   { TidyIndentCdata,             MU, "indent-cdata",                BL, no,              ParseBool,         boolPicks       },
241   { TidyForceOutput,             MS, "force-output",                BL, no,              ParseBool,         boolPicks       },
242   { TidyShowErrors,              DG, "show-errors",                 IN, 6,               ParseInt,          NULL            },
243   { TidyAsciiChars,              CE, "ascii-chars",                 BL, no,              ParseBool,         boolPicks       },
244   { TidyJoinClasses,             MU, "join-classes",                BL, no,              ParseBool,         boolPicks       },
245   { TidyJoinStyles,              MU, "join-styles",                 BL, yes,             ParseBool,         boolPicks       },
246   { TidyEscapeCdata,             MU, "escape-cdata",                BL, no,              ParseBool,         boolPicks       },
247 #if SUPPORT_ASIAN_ENCODINGS
248   { TidyLanguage,                CE, "language",                    ST, 0,               ParseName,         NULL            },
249   { TidyNCR,                     MU, "ncr",                         BL, yes,             ParseBool,         boolPicks       },
250 #endif
251 #if SUPPORT_UTF16_ENCODINGS
252   { TidyOutputBOM,               CE, "output-bom",                  IN, TidyAutoState,   ParseAutoBool,     autoBoolPicks   },
253 #endif
254   { TidyReplaceColor,            MU, "replace-color",               BL, no,              ParseBool,         boolPicks       },
255   { TidyCSSPrefix,               MU, "css-prefix",                  ST, 0,               ParseCSS1Selector, NULL            },
256   { TidyInlineTags,              MU, "new-inline-tags",             ST, 0,               ParseTagNames,     NULL            },
257   { TidyBlockTags,               MU, "new-blocklevel-tags",         ST, 0,               ParseTagNames,     NULL            },
258   { TidyEmptyTags,               MU, "new-empty-tags",              ST, 0,               ParseTagNames,     NULL            },
259   { TidyPreTags,                 MU, "new-pre-tags",                ST, 0,               ParseTagNames,     NULL            },
260   { TidyAccessibilityCheckLevel, DG, "accessibility-check",         IN, 0,               ParseAcc,          accessPicks     },
261   { TidyVertSpace,               PP, "vertical-space",              BL, no,              ParseBool,         boolPicks       },
262 #if SUPPORT_ASIAN_ENCODINGS
263   { TidyPunctWrap,               PP, "punctuation-wrap",            BL, no,              ParseBool,         boolPicks       },
264 #endif
265   { TidyMergeDivs,               MU, "merge-divs",                  IN, TidyAutoState,   ParseAutoBool,     autoBoolPicks   },
266   { N_TIDY_OPTIONS,              XX, NULL,                          XY, 0,               NULL,              NULL            }
267 };
268 
269 /* Should only be called by options set by name
270 ** thus, it is cheaper to do a few scans than set
271 ** up every option in a hash table.
272 */
273 const TidyOptionImpl* lookupOption( ctmbstr s )
274 {
275     const TidyOptionImpl* np = option_defs;
276     for ( /**/; np < option_defs + N_TIDY_OPTIONS; ++np )
277     {
278         if ( tmbstrcasecmp(s, np->name) == 0 )
279             return np;
280     }
281     return NULL;
282 }
283 
284 const TidyOptionImpl* getOption( TidyOptionId optId )
285 {
286   if ( optId < N_TIDY_OPTIONS )
287       return option_defs + optId;
288   return NULL;
289 }
290 
291 
292 static void FreeOptionValue( const TidyOptionImpl* option, ulong value )
293 {
294     if ( value && option->type == TidyString && value != option->dflt )
295     {
296         MemFree( (void*) value );
297     }
298 }
299 
300 static void CopyOptionValue( const TidyOptionImpl* option,
301                              ulong* oldval, ulong newval )
302 {
303     assert( oldval != NULL );
304     FreeOptionValue( option, *oldval );
305 
306     if ( newval && option->type == TidyString && newval != option->dflt )
307         *oldval = (ulong) tmbstrdup( (ctmbstr) newval );
308     else
309         *oldval = newval;
310 }
311 
312 
313 Bool SetOptionValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr val )
314 {
315    const TidyOptionImpl* option = &option_defs[ optId ];
316    Bool status = ( optId < N_TIDY_OPTIONS );
317    if ( status )
318    {
319       assert( option->id == optId && option->type == TidyString );
320       FreeOptionValue( option, doc->config.value[ optId ] );
321       doc->config.value[ optId ] = (ulong) tmbstrdup( val );
322    }
323    return status;
324 }
325 
326 Bool SetOptionInt( TidyDocImpl* doc, TidyOptionId optId, ulong val )
327 {
328    Bool status = ( optId < N_TIDY_OPTIONS );
329    if ( status )
330    {
331        assert( option_defs[ optId ].type == TidyInteger );
332        doc->config.value[ optId ] = val;
333    }
334    return status;
335 }
336 
337 Bool SetOptionBool( TidyDocImpl* doc, TidyOptionId optId, Bool val )
338 {
339    Bool status = ( optId < N_TIDY_OPTIONS );
340    if ( status )
341    {
342        assert( option_defs[ optId ].type == TidyBoolean );
343        doc->config.value[ optId ] = val;
344    }
345    return status;
346 }
347 
348 Bool ResetOptionToDefault( TidyDocImpl* doc, TidyOptionId optId )
349 {
350     Bool status = ( optId > 0 && optId < N_TIDY_OPTIONS );
351     if ( status )
352     {
353         const TidyOptionImpl* option = option_defs + optId;
354         ulong* value = &doc->config.value[ optId ];
355         assert( optId == option->id );
356         CopyOptionValue( option, value, option->dflt );
357     }
358     return status;
359 }
360 
361 static void ReparseTagType( TidyDocImpl* doc, TidyOptionId optId )
362 {
363     ctmbstr tagdecl = cfgStr( doc, optId );
364     tmbstr dupdecl = tmbstrdup( tagdecl );
365     ParseConfigValue( doc, optId, dupdecl );
366     MemFree( dupdecl );
367 }
368 
369 /* Not efficient, but effective */
370 static void ReparseTagDecls( TidyDocImpl* doc )
371 {
372     FreeDeclaredTags( doc, tagtype_null );
373     if ( cfg(doc, TidyInlineTags) )
374         ReparseTagType( doc, TidyInlineTags );
375     if ( cfg(doc, TidyBlockTags) )
376         ReparseTagType( doc, TidyBlockTags );
377     if ( cfg(doc, TidyEmptyTags) )
378         ReparseTagType( doc, TidyEmptyTags );
379     if ( cfg(doc, TidyPreTags) )
380         ReparseTagType( doc, TidyPreTags );
381 }
382 
383 void ResetConfigToDefault( TidyDocImpl* doc )
384 {
385     uint ixVal;
386     const TidyOptionImpl* option = option_defs;
387     ulong* value = &doc->config.value[ 0 ];
388     for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
389     {
390         assert( ixVal == (uint) option->id );
391         CopyOptionValue( option, &value[ixVal], option->dflt );
392     }
393     FreeDeclaredTags( doc, tagtype_null );
394 }
395 
396 void TakeConfigSnapshot( TidyDocImpl* doc )
397 {
398     uint ixVal;
399     const TidyOptionImpl* option = option_defs;
400     ulong* value = &doc->config.value[ 0 ];
401     ulong* snap  = &doc->config.snapshot[ 0 ];
402 
403     AdjustConfig( doc );  /* Make sure it's consistent */
404     for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
405     {
406         assert( ixVal == (uint) option->id );
407         CopyOptionValue( option, &snap[ixVal], value[ixVal] );
408     }
409 }
410 
411 void ResetConfigToSnapshot( TidyDocImpl* doc )
412 {
413     uint ixVal;
414     const TidyOptionImpl* option = option_defs;
415     ulong* value = &doc->config.value[ 0 ];
416     ulong* snap  = &doc->config.snapshot[ 0 ];
417 
418     for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
419     {
420         assert( ixVal == (uint) option->id );
421         CopyOptionValue( option, &value[ixVal], snap[ixVal] );
422     }
423     FreeDeclaredTags( doc, tagtype_null );
424     ReparseTagDecls( doc );
425 }
426 
427 void CopyConfig( TidyDocImpl* docTo, TidyDocImpl* docFrom )
428 {
429     if ( docTo != docFrom )
430     {
431         uint ixVal;
432         const TidyOptionImpl* option = option_defs;
433         ulong* from = &docFrom->config.value[ 0 ];
434         ulong* to   = &docTo->config.value[ 0 ];
435 
436         TakeConfigSnapshot( docTo );
437         for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
438         {
439             assert( ixVal == (uint) option->id );
440             CopyOptionValue( option, &to[ixVal], from[ixVal] );
441         }
442         ReparseTagDecls( docTo );
443         AdjustConfig( docTo );  /* Make sure it's consistent */
444     }
445 }
446 
447 
448 #ifdef _DEBUG
449 
450 /* Debug accessor functions will be type-safe and assert option type match */
451 ulong   _cfgGet( TidyDocImpl* doc, TidyOptionId optId )
452 {
453   assert( optId < N_TIDY_OPTIONS );
454   return doc->config.value[ optId ];
455 }
456 
457 Bool    _cfgGetBool( TidyDocImpl* doc, TidyOptionId optId )
458 {
459   ulong val = _cfgGet( doc, optId );
460   const TidyOptionImpl* opt = &option_defs[ optId ];
461   assert( opt && opt->type == TidyBoolean );
462   return (Bool) val;
463 }
464 
465 TidyTriState    _cfgGetAutoBool( TidyDocImpl* doc, TidyOptionId optId )
466 {
467   ulong val = _cfgGet( doc, optId );
468   const TidyOptionImpl* opt = &option_defs[ optId ];
469   assert( opt && opt->type == TidyInteger );
470   return (TidyTriState) val;
471 }
472 
473 ctmbstr _cfgGetString( TidyDocImpl* doc, TidyOptionId optId )
474 {
475   ulong val = _cfgGet( doc, optId );
476   const TidyOptionImpl* opt = &option_defs[ optId ];
477   assert( opt && opt->type == TidyString );
478   return (ctmbstr) val;
479 }
480 #endif
481 
482 
483 /* for use with Gnu Emacs */
484 void SetEmacsFilename( TidyDocImpl* doc, ctmbstr filename )
485 {
486     SetOptionValue( doc, TidyEmacsFile, filename );
487 }
488 
489 
490 static tchar GetC( TidyConfigImpl* config )
491 {
492     if ( config->cfgIn )
493         return ReadChar( config->cfgIn );
494     return EndOfStream;
495 }
496 
497 static tchar FirstChar( TidyConfigImpl* config )
498 {
499     config->c = GetC( config );
500     return config->c;
501 }
502 
503 static tchar AdvanceChar( TidyConfigImpl* config )
504 {
505     if ( config->c != EndOfStream )
506         config->c = GetC( config );
507     return config->c;
508 }
509 
510 static tchar SkipWhite( TidyConfigImpl* config )
511 {
512     while ( IsWhite(config->c) && !IsNewline(config->c) )
513         config->c = GetC( config );
514     return config->c;
515 }
516 
517 /* skip until end of line
518 static tchar SkipToEndofLine( TidyConfigImpl* config )
519 {
520     while ( config->c != EndOfStream )
521     {
522         config->c = GetC( config );
523         if ( config->c == '\n' || config->c == '\r' )
524             break;
525     }
526     return config->c;
527 }
528 */
529 
530 /*
531  skip over line continuations
532  to start of next property
533 */
534 static uint NextProperty( TidyConfigImpl* config )
535 {
536     do
537     {
538         /* skip to end of line */
539         while ( config->c != '\n' &&  config->c != '\r' &&  config->c != EndOfStream )
540              config->c = GetC( config );
541 
542         /* treat  \r\n   \r  or  \n as line ends */
543         if ( config->c == '\r' )
544              config->c = GetC( config );
545 
546         if ( config->c == '\n' )
547             config->c = GetC( config );
548     }
549     while ( IsWhite(config->c) );  /* line continuation? */
550 
551     return config->c;
552 }
553 
554 /*
555  Todd Lewis contributed this code for expanding
556  ~/foo or ~your/foo according to $HOME and your
557  user name. This will work partially on any system 
558  which defines $HOME.  Support for ~user/foo will
559  work on systems that support getpwnam(userid), 
560  namely Unix/Linux.
561 */
562 ctmbstr ExpandTilde( ctmbstr filename )
563 {
564     char *home_dir = NULL;
565 
566     if ( !filename )
567         return NULL;
568 
569     if ( filename[0] != '~' )
570         return filename;
571 
572     if (filename[1] == '/')
573     {
574         home_dir = getenv("HOME");
575         if ( home_dir )
576             ++filename;
577     }
578 #ifdef SUPPORT_GETPWNAM
579     else
580     {
581         struct passwd *passwd = NULL;
582         ctmbstr s = filename + 1;
583         tmbstr t;
584 
585         while ( *s && *s != '/' )
586             s++;
587 
588         if ( t = MemAlloc(s - filename) )
589         {
590             memcpy(t, filename+1, s-filename-1);
591             t[s-filename-1] = 0;
592 
593             passwd = getpwnam(t);
594 
595             MemFree(t);
596         }
597 
598         if ( passwd )
599         {
600             filename = s;
601             home_dir = passwd->pw_dir;
602         }
603     }
604 #endif /* SUPPORT_GETPWNAM */
605 
606     if ( home_dir )
607     {
608         uint len = tmbstrlen(filename) + tmbstrlen(home_dir) + 1;
609         tmbstr p = (tmbstr)MemAlloc( len );
610         tmbstrcpy( p, home_dir );
611         tmbstrcat( p, filename );
612         return (ctmbstr) p;
613     }
614     return (ctmbstr) filename;
615 }
616 
617 Bool TIDY_CALL tidyFileExists( ctmbstr filename )
618 {
619   ctmbstr fname = (tmbstr) ExpandTilde( filename );
620 #ifndef NO_ACCESS_SUPPORT
621   Bool exists = ( access(fname, 0) == 0 );
622 #else
623   Bool exists;
624   /* at present */
625   FILE* fin = fopen(fname, "r");
626   if (fin != NULL)
627       fclose(fin);
628   exists = ( fin != NULL );
629 #endif
630   if ( fname != filename )
631       MemFree( (tmbstr) fname );
632   return exists;
633 }
634 
635 
636 #ifndef TIDY_MAX_NAME
637 #define TIDY_MAX_NAME 64
638 #endif
639 
640 int ParseConfigFile( TidyDocImpl* doc, ctmbstr file )
641 {
642     return ParseConfigFileEnc( doc, file, "ascii" );
643 }
644 
645 /* open the file and parse its contents
646 */
647 int ParseConfigFileEnc( TidyDocImpl* doc, ctmbstr file, ctmbstr charenc )
648 {
649     uint opterrs = doc->optionErrors;
650     tmbstr fname = (tmbstr) ExpandTilde( file );
651     TidyConfigImpl* cfg = &doc->config;
652     FILE* fin = fopen( fname, "r" );
653     int enc = CharEncodingId( charenc );
654 
655     if ( fin == NULL || enc < 0 )
656     {
657         FileError( doc, fname, TidyConfig );
658         return -1;
659     }
660     else
661     {
662         tchar c;
663         cfg->cfgIn = FileInput( doc, fin, enc );
664         c = FirstChar( cfg );
665        
666         for ( c = SkipWhite(cfg); c != EndOfStream; c = NextProperty(cfg) )
667         {
668             uint ix = 0;
669             tmbchar name[ TIDY_MAX_NAME ] = {0};
670 
671             /* // or # start a comment */
672             if ( c == '/' || c == '#' )
673                 continue;
674 
675             while ( ix < sizeof(name)-1 && c != '\n' && c != EndOfStream && c != ':' )
676             {
677                 name[ ix++ ] = (tmbchar) c;  /* Option names all ASCII */
678                 c = AdvanceChar( cfg );
679             }
680 
681             if ( c == ':' )
682             {
683                 const TidyOptionImpl* option = lookupOption( name );
684                 c = AdvanceChar( cfg );
685                 if ( option )
686                     option->parser( doc, option );
687                 else
688                 {
689                     if (NULL != doc->pOptCallback)
690                     {
691                         TidyConfigImpl* cfg = &doc->config;
692                         tmbchar buf[8192];
693                         uint i = 0;
694                         tchar delim = 0;
695                         Bool waswhite = yes;
696 
697                         tchar c = SkipWhite( cfg );
698 
699                         if ( c == '"' || c == '\'' )
700                         {
701                             delim = c;
702                             c = AdvanceChar( cfg );
703                         }
704 
705                         while ( i < sizeof(buf)-2 && c != EndOfStream && c != '\r' && c != '\n' )
706                         {
707                             if ( delim && c == delim )
708                                 break;
709 
710                             if ( IsWhite(c) )
711                             {
712                                 if ( waswhite )
713                                 {
714                                     c = AdvanceChar( cfg );
715                                     continue;
716                                 }
717                                 c = ' ';
718                             }
719                             else
720                                 waswhite = no;
721 
722                             buf[i++] = (tmbchar) c;
723                             c = AdvanceChar( cfg );
724                         }
725                         buf[i] = '\0';
726                         if (no == (*doc->pOptCallback)( name, buf ))
727                             ReportUnknownOption( doc, name );
728                     }
729                     else
730                         ReportUnknownOption( doc, name );
731                 }
732             }
733         }
734 
735         fclose( fin );
736         MemFree( (void *)cfg->cfgIn->source.sourceData ); /* fix for bug #810259 */
737         freeStreamIn( cfg->cfgIn );
738         cfg->cfgIn = NULL;
739     }
740 
741     if ( fname != (tmbstr) file )
742         MemFree( fname );
743 
744     AdjustConfig( doc );
745 
746     /* any new config errors? If so, return warning status. */
747     return (doc->optionErrors > opterrs ? 1 : 0); 
748 }
749 
750 /* returns false if unknown option, missing parameter,
751 ** or option doesn't use parameter
752 */
753 Bool ParseConfigOption( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optval )
754 {
755     const TidyOptionImpl* option = lookupOption( optnam );
756     Bool status = ( option != NULL );
757     if ( !status )
758     {
759         /* Not a standard tidy option.  Check to see if the user application 
760            recognizes it  */
761         if (NULL != doc->pOptCallback)
762             status = (*doc->pOptCallback)( optnam, optval );
763         if (!status)
764             ReportUnknownOption( doc, optnam );
765     }
766     else 
767         status = ParseConfigValue( doc, option->id, optval );
768     return status;
769 }
770 
771 /* returns false if unknown option, missing parameter,
772 ** or option doesn't use parameter
773 */
774 Bool ParseConfigValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optval )
775 {
776     const TidyOptionImpl* option = option_defs + optId;
777     Bool status = ( optId < N_TIDY_OPTIONS && optval != NULL );
778 
779     if ( !status )
780         ReportBadArgument( doc, option->name );
781     else
782     {
783         TidyBuffer inbuf = {0};            /* Set up input source */
784         tidyBufAttach( &inbuf, (byte*)optval, tmbstrlen(optval)+1 );
785         doc->config.cfgIn = BufferInput( doc, &inbuf, ASCII );
786         doc->config.c = GetC( &doc->config );
787 
788         status = option->parser( doc, option );
789 
790         freeStreamIn(doc->config.cfgIn);  /* Release input source */
791         doc->config.cfgIn  = NULL;
792         tidyBufDetach( &inbuf );
793     }
794     return status;
795 }
796 
797 
798 /* ensure that char encodings are self consistent */
799 Bool  AdjustCharEncoding( TidyDocImpl* doc, int encoding )
800 {
801     int outenc = -1;
802     int inenc = -1;
803     
804     switch( encoding )
805     {
806     case MACROMAN:
807         inenc = MACROMAN;
808         outenc = ASCII;
809         break;
810 
811     case WIN1252:
812         inenc = WIN1252;
813         outenc = ASCII;
814         break;
815 
816     case IBM858:
817         inenc = IBM858;
818         outenc = ASCII;
819         break;
820 
821     case ASCII:
822         inenc = LATIN1;
823         outenc = ASCII;
824         break;
825 
826     case LATIN0:
827         inenc = LATIN0;
828         outenc = ASCII;
829         break;
830 
831     case RAW:
832     case LATIN1:
833     case UTF8:
834 #ifndef NO_NATIVE_ISO2022_SUPPORT
835     case ISO2022:
836 #endif
837 
838 #if SUPPORT_UTF16_ENCODINGS
839     case UTF16LE:
840     case UTF16BE:
841     case UTF16:
842 #endif
843 #if SUPPORT_ASIAN_ENCODINGS
844     case SHIFTJIS:
845     case BIG5:
846 #endif
847         inenc = outenc = encoding;
848         break;
849     }
850 
851     if ( inenc >= 0 )
852     {
853         SetOptionInt( doc, TidyCharEncoding, encoding );
854         SetOptionInt( doc, TidyInCharEncoding, inenc );
855         SetOptionInt( doc, TidyOutCharEncoding, outenc );
856         return yes;
857     }
858     return no;
859 }
860 
861 /* ensure that config is self consistent */
862 void AdjustConfig( TidyDocImpl* doc )
863 {
864     if ( cfgBool(doc, TidyEncloseBlockText) )
865         SetOptionBool( doc, TidyEncloseBodyText, yes );
866 
867     if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState )
868         SetOptionInt( doc, TidyIndentSpaces, 0 );
869 
870     /* disable wrapping */
871     if ( cfg(doc, TidyWrapLen) == 0 )
872         SetOptionInt( doc, TidyWrapLen, 0x7FFFFFFF );
873 
874     /* Word 2000 needs o:p to be declared as inline */
875     if ( cfgBool(doc, TidyWord2000) )
876     {
877         doc->config.defined_tags |= tagtype_inline;
878         DefineTag( doc, tagtype_inline, "o:p" );
879     }
880 
881     /* #480701 disable XHTML output flag if both output-xhtml and xml input are set */
882     if ( cfgBool(doc, TidyXmlTags) )
883         SetOptionBool( doc, TidyXhtmlOut, no );
884 
885     /* XHTML is written in lower case */
886     if ( cfgBool(doc, TidyXhtmlOut) )
887     {
888         SetOptionBool( doc, TidyXmlOut, yes );
889         SetOptionBool( doc, TidyUpperCaseTags, no );
890         SetOptionBool( doc, TidyUpperCaseAttrs, no );
891         /* SetOptionBool( doc, TidyXmlPIs, yes ); */
892     }
893 
894     /* if XML in, then XML out */
895     if ( cfgBool(doc, TidyXmlTags) )
896     {
897         SetOptionBool( doc, TidyXmlOut, yes );
898         SetOptionBool( doc, TidyXmlPIs, yes );
899     }
900 
901     /* #427837 - fix by Dave Raggett 02 Jun 01
902     ** generate <?xml version="1.0" encoding="iso-8859-1"?>
903     ** if the output character encoding is Latin-1 etc.
904     */
905     if ( cfg(doc, TidyOutCharEncoding) != ASCII &&
906          cfg(doc, TidyOutCharEncoding) != UTF8 &&
907 #if SUPPORT_UTF16_ENCODINGS
908          cfg(doc, TidyOutCharEncoding) != UTF16 &&
909          cfg(doc, TidyOutCharEncoding) != UTF16BE &&
910          cfg(doc, TidyOutCharEncoding) != UTF16LE &&
911 #endif
912          cfg(doc, TidyOutCharEncoding) != RAW &&
913          cfgBool(doc, TidyXmlOut) )
914     {
915         SetOptionBool( doc, TidyXmlDecl, yes );
916     }
917 
918     /* XML requires end tags */
919     if ( cfgBool(doc, TidyXmlOut) )
920     {
921 #if SUPPORT_UTF16_ENCODINGS
922         /* XML requires a BOM on output if using UTF-16 encoding */
923         ulong enc = cfg( doc, TidyOutCharEncoding );
924         if ( enc == UTF16LE || enc == UTF16BE || enc == UTF16 )
925             SetOptionInt( doc, TidyOutputBOM, yes );
926 #endif
927         SetOptionBool( doc, TidyQuoteAmpersand, yes );
928         SetOptionBool( doc, TidyHideEndTags, no );
929     }
930 }
931 
932 /* unsigned integers */
933 Bool ParseInt( TidyDocImpl* doc, const TidyOptionImpl* entry )
934 {
935     ulong number = 0;
936     Bool digits = no;
937     TidyConfigImpl* cfg = &doc->config;
938     tchar c = SkipWhite( cfg );
939 
940     while ( IsDigit(c) )
941     {
942         number = c - '' + (10 * number);
943         digits = yes;
944         c = AdvanceChar( cfg );
945     }
946 
947     if ( !digits )
948         ReportBadArgument( doc, entry->name );
949     else
950         SetOptionInt( doc, entry->id, number );
951     return digits;
952 }
953 
954 /* true/false or yes/no or 0/1 or "auto" only looks at 1st char */
955 static Bool ParseTriState( TidyTriState theState, TidyDocImpl* doc,
956                     const TidyOptionImpl* entry, ulong* flag )
957 {
958     TidyConfigImpl* cfg = &doc->config;
959     tchar c = SkipWhite( cfg );
960 
961     if (c == 't' || c == 'T' || c == 'y' || c == 'Y' || c == '1')
962         *flag = yes;
963     else if (c == 'f' || c == 'F' || c == 'n' || c == 'N' || c == '')
964         *flag = no;
965     else if (theState == TidyAutoState && (c == 'a' || c =='A'))
966         *flag = TidyAutoState;
967     else
968     {
969         ReportBadArgument( doc, entry->name );
970         return no;
971     }
972 
973     return yes;
974 }
975 
976 /* cr, lf or crlf */
977 Bool ParseNewline( TidyDocImpl* doc, const TidyOptionImpl* entry )
978 {
979     int nl = -1;
980     tmbchar work[ 16 ] = {0};
981     tmbstr cp = work, end = work + sizeof(work);
982     TidyConfigImpl* cfg = &doc->config;
983     tchar c = SkipWhite( cfg );
984 
985     while ( c!=EndOfStream && cp < end && !IsWhite(c) && c != '\r' && c != '\n' )
986     {
987         *cp++ = (tmbchar) c;
988         c = AdvanceChar( cfg );
989     }
990     *cp = 0;
991 
992     if ( tmbstrcasecmp(work, "lf") == 0 )
993         nl = TidyLF;
994     else if ( tmbstrcasecmp(work, "crlf") == 0 )
995         nl = TidyCRLF;
996     else if ( tmbstrcasecmp(work, "cr") == 0 )
997         nl = TidyCR;
998 
999     if ( nl < TidyLF || nl > TidyCR )
1000         ReportBadArgument( doc, entry->name );
1001     else
1002         SetOptionInt( doc, entry->id, nl );
1003     return ( nl >= TidyLF && nl <= TidyCR );
1004 }
1005 
1006 Bool ParseBool( TidyDocImpl* doc, const TidyOptionImpl* entry )
1007 {
1008     ulong flag = 0;
1009     Bool status = ParseTriState( TidyNoState, doc, entry, &flag );
1010     if ( status )
1011         SetOptionBool( doc, entry->id, flag != 0 );
1012     return status;
1013 }
1014 
1015 Bool ParseAutoBool( TidyDocImpl* doc, const TidyOptionImpl* entry )
1016 {
1017     ulong flag = 0;
1018     Bool status = ParseTriState( TidyAutoState, doc, entry, &flag );
1019     if ( status )
1020         SetOptionInt( doc, entry->id, flag );
1021     return status;
1022 }
1023 
1024 /* a string excluding whitespace */
1025 Bool ParseName( TidyDocImpl* doc, const TidyOptionImpl* option )
1026 {
1027     tmbchar buf[ 1024 ] = {0};
1028     uint i = 0;
1029     uint c = SkipWhite( &doc->config );
1030 
1031     while ( i < sizeof(buf)-2 && c != EndOfStream && !IsWhite(c) )
1032     {
1033         buf[i++] = (tmbchar) c;
1034         c = AdvanceChar( &doc->config );
1035     }
1036     buf[i] = 0;
1037 
1038     if ( i == 0 )
1039         ReportBadArgument( doc, option->name );
1040     else
1041         SetOptionValue( doc, option->id, buf );
1042     return ( i > 0 );
1043 }
1044 
1045 /* #508936 - CSS class naming for -clean option */
1046 Bool ParseCSS1Selector( TidyDocImpl* doc, const TidyOptionImpl* option )
1047 {
1048     char buf[256] = {0};
1049     uint i = 0;
1050     uint c = SkipWhite( &doc->config );
1051 
1052     while ( i < sizeof(buf)-2 && c != EndOfStream && !IsWhite(c) )
1053     {
1054         buf[i++] = (tmbchar) c;
1055         c = AdvanceChar( &doc->config );
1056     }
1057     buf[i] = '\0';
1058 
1059     if ( i == 0 || !IsCSS1Selector(buf) ) {
1060         ReportBadArgument( doc, option->name );
1061         return no;
1062     }
1063 
1064     buf[i++] = '-';  /* Make sure any escaped Unicode is terminated */
1065     buf[i] = 0;      /* so valid class names are generated after */
1066                      /* Tidy appends last digits. */
1067 
1068     SetOptionValue( doc, option->id, buf );
1069     return yes;
1070 }
1071 
1072 /* Coordinates Config update and Tags data */
1073 static void DeclareUserTag( TidyDocImpl* doc, TidyOptionId optId,
1074                             UserTagType tagType, ctmbstr name )
1075 {
1076   ctmbstr prvval = cfgStr( doc, optId );
1077   tmbstr catval = NULL;
1078   ctmbstr theval = name;
1079   if ( prvval )
1080   {
1081     uint len = tmbstrlen(name) + tmbstrlen(prvval) + 3;
1082     catval = tmbstrndup( prvval, len );
1083     tmbstrcat( catval, ", " );
1084     tmbstrcat( catval, name );
1085     theval = catval;
1086   }
1087   DefineTag( doc, tagType, name );
1088   SetOptionValue( doc, optId, theval );
1089   if ( catval )
1090     MemFree( catval );
1091 }
1092 
1093 /* a space or comma separated list of tag names */
1094 Bool ParseTagNames( TidyDocImpl* doc, const TidyOptionImpl* option )
1095 {
1096     TidyConfigImpl* cfg = &doc->config;
1097     tmbchar buf[1024];
1098     uint i = 0, nTags = 0;
1099     uint c = SkipWhite( cfg );
1100     UserTagType ttyp = tagtype_null;
1101 
1102     switch ( option->id )
1103     {
1104     case TidyInlineTags:  ttyp = tagtype_inline;    break;
1105     case TidyBlockTags:   ttyp = tagtype_block;     break;
1106     case TidyEmptyTags:   ttyp = tagtype_empty;     break;
1107     case TidyPreTags:     ttyp = tagtype_pre;       break;
1108     default:
1109        ReportUnknownOption( doc, option->name );
1110        return no;
1111     }
1112 
1113     SetOptionValue( doc, option->id, NULL );
1114     FreeDeclaredTags( doc, ttyp );
1115     cfg->defined_tags |= ttyp;
1116 
1117     do
1118     {
1119         if (c == ' ' || c == '\t' || c == ',')
1120         {
1121             c = AdvanceChar( cfg );
1122             continue;
1123         }
1124 
1125         if ( c == '\r' || c == '\n' )
1126         {
1127             uint c2 = AdvanceChar( cfg );
1128             if ( c == '\r' && c2 == '\n' )
1129                 c = AdvanceChar( cfg );
1130             else
1131                 c = c2;
1132 
1133             if ( !IsWhite(c) )
1134             {
1135                 buf[i] = 0;
1136                 UngetChar( c, cfg->cfgIn );
1137                 UngetChar( '\n', cfg->cfgIn );
1138                 break;
1139             }
1140         }
1141 
1142         /*
1143         if ( c == '\n' )
1144         {
1145             c = AdvanceChar( cfg );
1146             if ( !IsWhite(c) )
1147             {
1148                 buf[i] = 0;
1149                 UngetChar( c, cfg->cfgIn );
1150                 UngetChar( '\n', cfg->cfgIn );
1151                 break;
1152             }
1153         }
1154         */
1155 
1156         while ( i < sizeof(buf)-2 && c != EndOfStream && !IsWhite(c) && c != ',' )
1157         {
1158             buf[i++] = (tmbchar) c;
1159             c = AdvanceChar( cfg );
1160         }
1161 
1162         buf[i] = '\0';
1163         if (i == 0)          /* Skip empty tag definition.  Possible when */
1164             continue;        /* there is a trailing space on the line. */
1165             
1166         /* add tag to dictionary */
1167         DeclareUserTag( doc, option->id, ttyp, buf );
1168         i = 0;
1169         ++nTags;
1170     }
1171     while ( c != EndOfStream );
1172 
1173     if ( i > 0 )
1174       DeclareUserTag( doc, option->id, ttyp, buf );
1175     return ( nTags > 0 );
1176 }
1177 
1178 /* a string including whitespace */
1179 /* munges whitespace sequences */
1180 
1181 Bool ParseString( TidyDocImpl* doc, const TidyOptionImpl* option )
1182 {
1183     TidyConfigImpl* cfg = &doc->config;
1184     tmbchar buf[8192];
1185     uint i = 0;
1186     tchar delim = 0;
1187     Bool waswhite = yes;
1188 
1189     tchar c = SkipWhite( cfg );
1190