Version:
~ [ 1.0 ] ~
1 /*
2 config.c -- read config file and manage config properties
3
4 (c) 1998-2005 (W3C) MIT, ERCIM, Keio University
5 See tidy.h for the copyright notice.
6
7 CVS Info :
8
9 $Author: arnaud02 $
10 $Date: 2005/08/16 17:02:18 $
11 $Revision: 1.92 $
12
13 */
14
15 /*
16 config files associate a property name with a value.
17
18 // comments can start at the beginning of a line
19 # comments can start at the beginning of a line
20 name: short values fit onto one line
21 name: a really long value that
22 continues on the next line
23
24 property names are case insensitive and should be less than
25 60 characters in length and must start at the begining of
26 the line, as whitespace at the start of a line signifies a
27 line continuation.
28 */
29
30 #include "config.h"
31 #include "tidy-int.h"
32 #include "message.h"
33 #include "tmbstr.h"
34 #include "tags.h"
35
36 #ifdef WINDOWS_OS
37 #include <io.h>
38 #else
39 #ifdef DMALLOC
40 /*
41 macro for valloc() in dmalloc.h may conflict with declaration for valloc() in unistd.h -
42 we don't need (debugging for) valloc() here. dmalloc.h should come last but it doesn't.
43 */
44 #ifdef valloc
45 #undef valloc
46 #endif
47 #endif
48 #include <unistd.h>
49 #endif
50
51 #ifdef TIDY_WIN32_MLANG_SUPPORT
52 #include "win32tc.h"
53 #endif
54
55 void InitConfig( TidyDocImpl* doc )
56 {
57 ClearMemory( &doc->config, sizeof(TidyConfigImpl) );
58 ResetConfigToDefault( doc );
59 }
60
61 void FreeConfig( TidyDocImpl* doc )
62 {
63 ResetConfigToDefault( doc );
64 TakeConfigSnapshot( doc );
65 }
66
67
68 /* Arrange so index can be cast to enum
69 */
70 static const ctmbstr boolPicks[] =
71 {
72 "no",
73 "yes",
74 NULL
75 };
76
77 static const ctmbstr autoBoolPicks[] =
78 {
79 "no",
80 "yes",
81 "auto",
82 NULL
83 };
84
85 static const ctmbstr repeatAttrPicks[] =
86 {
87 "keep-first",
88 "keep-last",
89 NULL
90 };
91
92 static const ctmbstr accessPicks[] =
93 {
94 "0 (Tidy Classic)",
95 "1 (Priority 1 Checks)",
96 "2 (Priority 2 Checks)",
97 "3 (Priority 3 Checks)",
98 NULL
99 };
100
101 static const ctmbstr charEncPicks[] =
102 {
103 "raw",
104 "ascii",
105 "latin0",
106 "latin1",
107 "utf8",
108 #ifndef NO_NATIVE_ISO2022_SUPPORT
109 "iso2022",
110 #endif
111 "mac",
112 "win1252",
113 "ibm858",
114
115 #if SUPPORT_UTF16_ENCODINGS
116 "utf16le",
117 "utf16be",
118 "utf16",
119 #endif
120
121 #if SUPPORT_ASIAN_ENCODINGS
122 "big5",
123 "shiftjis",
124 #endif
125
126 NULL
127 };
128
129 static const ctmbstr newlinePicks[] =
130 {
131 "LF",
132 "CRLF",
133 "CR",
134 NULL
135 };
136
137 static const ctmbstr doctypePicks[] =
138 {
139 "omit",
140 "auto",
141 "strict",
142 "transitional",
143 "user",
144 NULL
145 };
146
147 #define MU TidyMarkup
148 #define DG TidyDiagnostics
149 #define PP TidyPrettyPrint
150 #define CE TidyEncoding
151 #define MS TidyMiscellaneous
152
153 #define IN TidyInteger
154 #define BL TidyBoolean
155 #define ST TidyString
156
157 #define XX (TidyConfigCategory)-1
158 #define XY (TidyOptionType)-1
159
160 #define DLF DEFAULT_NL_CONFIG
161
162 /* If Accessibility checks not supported, make config setting read-only */
163 #if SUPPORT_ACCESSIBILITY_CHECKS
164 #define ParseAcc ParseInt
165 #else
166 #define ParseAcc NULL
167 #endif
168
169 static const TidyOptionImpl option_defs[] =
170 {
171 { TidyUnknownOption, MS, "unknown!", IN, 0, NULL, NULL },
172 { TidyIndentSpaces, PP, "indent-spaces", IN, 2, ParseInt, NULL },
173 { TidyWrapLen, PP, "wrap", IN, 68, ParseInt, NULL },
174 { TidyTabSize, PP, "tab-size", IN, 8, ParseInt, NULL },
175 { TidyCharEncoding, CE, "char-encoding", IN, ASCII, ParseCharEnc, charEncPicks },
176 { TidyInCharEncoding, CE, "input-encoding", IN, LATIN1, ParseCharEnc, charEncPicks },
177 { TidyOutCharEncoding, CE, "output-encoding", IN, ASCII, ParseCharEnc, charEncPicks },
178 { TidyNewline, CE, "newline", IN, DLF, ParseNewline, newlinePicks },
179 { TidyDoctypeMode, MU, "doctype-mode", IN, TidyDoctypeAuto, NULL, doctypePicks },
180 { TidyDoctype, MU, "doctype", ST, 0, ParseDocType, doctypePicks },
181 { TidyDuplicateAttrs, MU, "repeated-attributes", IN, TidyKeepLast, ParseRepeatAttr, repeatAttrPicks },
182 { TidyAltText, MU, "alt-text", ST, 0, ParseString, NULL },
183
184 /* obsolete */
185 { TidySlideStyle, MS, "slide-style", ST, 0, ParseName, NULL },
186
187 { TidyErrFile, MS, "error-file", ST, 0, ParseString, NULL },
188 { TidyOutFile, MS, "output-file", ST, 0, ParseString, NULL },
189 { TidyWriteBack, MS, "write-back", BL, no, ParseBool, boolPicks },
190 { TidyShowMarkup, PP, "markup", BL, yes, ParseBool, boolPicks },
191 { TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks },
192 { TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks },
193 { TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks },
194 { TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks },
195 { TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks },
196 { TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks },
197 { TidyXhtmlOut, MU, "output-xhtml", BL, no, ParseBool, boolPicks },
198 { TidyHtmlOut, MU, "output-html", BL, no, ParseBool, boolPicks },
199 { TidyXmlDecl, MU, "add-xml-decl", BL, no, ParseBool, boolPicks },
200 { TidyUpperCaseTags, MU, "uppercase-tags", BL, no, ParseBool, boolPicks },
201 { TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
202 { TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
203 { TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
204 { TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
205 { TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
206 { TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },
207 { TidyDropEmptyParas, MU, "drop-empty-paras", BL, yes, ParseBool, boolPicks },
208 { TidyFixComments, MU, "fix-bad-comments", BL, yes, ParseBool, boolPicks },
209 { TidyBreakBeforeBR, PP, "break-before-br", BL, no, ParseBool, boolPicks },
210
211 /* obsolete */
212 { TidyBurstSlides, PP, "split", BL, no, ParseBool, boolPicks },
213
214 { TidyNumEntities, MU, "numeric-entities", BL, no, ParseBool, boolPicks },
215 { TidyQuoteMarks, MU, "quote-marks", BL, no, ParseBool, boolPicks },
216 { TidyQuoteNbsp, MU, "quote-nbsp", BL, yes, ParseBool, boolPicks },
217 { TidyQuoteAmpersand, MU, "quote-ampersand", BL, yes, ParseBool, boolPicks },
218 { TidyWrapAttVals, PP, "wrap-attributes", BL, no, ParseBool, boolPicks },
219 { TidyWrapScriptlets, PP, "wrap-script-literals", BL, no, ParseBool, boolPicks },
220 { TidyWrapSection, PP, "wrap-sections", BL, yes, ParseBool, boolPicks },
221 { TidyWrapAsp, PP, "wrap-asp", BL, yes, ParseBool, boolPicks },
222 { TidyWrapJste, PP, "wrap-jste", BL, yes, ParseBool, boolPicks },
223 { TidyWrapPhp, PP, "wrap-php", BL, yes, ParseBool, boolPicks },
224 { TidyFixBackslash, MU, "fix-backslash", BL, yes, ParseBool, boolPicks },
225 { TidyIndentAttributes, PP, "indent-attributes", BL, no, ParseBool, boolPicks },
226 { TidyXmlPIs, MU, "assume-xml-procins", BL, no, ParseBool, boolPicks },
227 { TidyXmlSpace, MU, "add-xml-space", BL, no, ParseBool, boolPicks },
228 { TidyEncloseBodyText, MU, "enclose-text", BL, no, ParseBool, boolPicks },
229 { TidyEncloseBlockText, MU, "enclose-block-text", BL, no, ParseBool, boolPicks },
230 { TidyKeepFileTimes, MS, "keep-time", BL, no, ParseBool, boolPicks },
231 { TidyWord2000, MU, "word-2000", BL, no, ParseBool, boolPicks },
232 { TidyMark, MS, "tidy-mark", BL, yes, ParseBool, boolPicks },
233 { TidyEmacs, MS, "gnu-emacs", BL, no, ParseBool, boolPicks },
234 { TidyEmacsFile, MS, "gnu-emacs-file", ST, 0, ParseString, NULL },
235 { TidyLiteralAttribs, MU, "literal-attributes", BL, no, ParseBool, boolPicks },
236 { TidyBodyOnly, MU, "show-body-only", BL, no, ParseBool, boolPicks },
237 { TidyFixUri, MU, "fix-uri", BL, yes, ParseBool, boolPicks },
238 { TidyLowerLiterals, MU, "lower-literals", BL, yes, ParseBool, boolPicks },
239 { TidyHideComments, MU, "hide-comments", BL, no, ParseBool, boolPicks },
240 { TidyIndentCdata, MU, "indent-cdata", BL, no, ParseBool, boolPicks },
241 { TidyForceOutput, MS, "force-output", BL, no, ParseBool, boolPicks },
242 { TidyShowErrors, DG, "show-errors", IN, 6, ParseInt, NULL },
243 { TidyAsciiChars, CE, "ascii-chars", BL, no, ParseBool, boolPicks },
244 { TidyJoinClasses, MU, "join-classes", BL, no, ParseBool, boolPicks },
245 { TidyJoinStyles, MU, "join-styles", BL, yes, ParseBool, boolPicks },
246 { TidyEscapeCdata, MU, "escape-cdata", BL, no, ParseBool, boolPicks },
247 #if SUPPORT_ASIAN_ENCODINGS
248 { TidyLanguage, CE, "language", ST, 0, ParseName, NULL },
249 { TidyNCR, MU, "ncr", BL, yes, ParseBool, boolPicks },
250 #endif
251 #if SUPPORT_UTF16_ENCODINGS
252 { TidyOutputBOM, CE, "output-bom", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
253 #endif
254 { TidyReplaceColor, MU, "replace-color", BL, no, ParseBool, boolPicks },
255 { TidyCSSPrefix, MU, "css-prefix", ST, 0, ParseCSS1Selector, NULL },
256 { TidyInlineTags, MU, "new-inline-tags", ST, 0, ParseTagNames, NULL },
257 { TidyBlockTags, MU, "new-blocklevel-tags", ST, 0, ParseTagNames, NULL },
258 { TidyEmptyTags, MU, "new-empty-tags", ST, 0, ParseTagNames, NULL },
259 { TidyPreTags, MU, "new-pre-tags", ST, 0, ParseTagNames, NULL },
260 { TidyAccessibilityCheckLevel, DG, "accessibility-check", IN, 0, ParseAcc, accessPicks },
261 { TidyVertSpace, PP, "vertical-space", BL, no, ParseBool, boolPicks },
262 #if SUPPORT_ASIAN_ENCODINGS
263 { TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks },
264 #endif
265 { TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
266 { N_TIDY_OPTIONS, XX, NULL, XY, 0, NULL, NULL }
267 };
268
269 /* Should only be called by options set by name
270 ** thus, it is cheaper to do a few scans than set
271 ** up every option in a hash table.
272 */
273 const TidyOptionImpl* lookupOption( ctmbstr s )
274 {
275 const TidyOptionImpl* np = option_defs;
276 for ( /**/; np < option_defs + N_TIDY_OPTIONS; ++np )
277 {
278 if ( tmbstrcasecmp(s, np->name) == 0 )
279 return np;
280 }
281 return NULL;
282 }
283
284 const TidyOptionImpl* getOption( TidyOptionId optId )
285 {
286 if ( optId < N_TIDY_OPTIONS )
287 return option_defs + optId;
288 return NULL;
289 }
290
291
292 static void FreeOptionValue( const TidyOptionImpl* option, ulong value )
293 {
294 if ( value && option->type == TidyString && value != option->dflt )
295 {
296 MemFree( (void*) value );
297 }
298 }
299
300 static void CopyOptionValue( const TidyOptionImpl* option,
301 ulong* oldval, ulong newval )
302 {
303 assert( oldval != NULL );
304 FreeOptionValue( option, *oldval );
305
306 if ( newval && option->type == TidyString && newval != option->dflt )
307 *oldval = (ulong) tmbstrdup( (ctmbstr) newval );
308 else
309 *oldval = newval;
310 }
311
312
313 Bool SetOptionValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr val )
314 {
315 const TidyOptionImpl* option = &option_defs[ optId ];
316 Bool status = ( optId < N_TIDY_OPTIONS );
317 if ( status )
318 {
319 assert( option->id == optId && option->type == TidyString );
320 FreeOptionValue( option, doc->config.value[ optId ] );
321 doc->config.value[ optId ] = (ulong) tmbstrdup( val );
322 }
323 return status;
324 }
325
326 Bool SetOptionInt( TidyDocImpl* doc, TidyOptionId optId, ulong val )
327 {
328 Bool status = ( optId < N_TIDY_OPTIONS );
329 if ( status )
330 {
331 assert( option_defs[ optId ].type == TidyInteger );
332 doc->config.value[ optId ] = val;
333 }
334 return status;
335 }
336
337 Bool SetOptionBool( TidyDocImpl* doc, TidyOptionId optId, Bool val )
338 {
339 Bool status = ( optId < N_TIDY_OPTIONS );
340 if ( status )
341 {
342 assert( option_defs[ optId ].type == TidyBoolean );
343 doc->config.value[ optId ] = val;
344 }
345 return status;
346 }
347
348 Bool ResetOptionToDefault( TidyDocImpl* doc, TidyOptionId optId )
349 {
350 Bool status = ( optId > 0 && optId < N_TIDY_OPTIONS );
351 if ( status )
352 {
353 const TidyOptionImpl* option = option_defs + optId;
354 ulong* value = &doc->config.value[ optId ];
355 assert( optId == option->id );
356 CopyOptionValue( option, value, option->dflt );
357 }
358 return status;
359 }
360
361 static void ReparseTagType( TidyDocImpl* doc, TidyOptionId optId )
362 {
363 ctmbstr tagdecl = cfgStr( doc, optId );
364 tmbstr dupdecl = tmbstrdup( tagdecl );
365 ParseConfigValue( doc, optId, dupdecl );
366 MemFree( dupdecl );
367 }
368
369 /* Not efficient, but effective */
370 static void ReparseTagDecls( TidyDocImpl* doc )
371 {
372 FreeDeclaredTags( doc, tagtype_null );
373 if ( cfg(doc, TidyInlineTags) )
374 ReparseTagType( doc, TidyInlineTags );
375 if ( cfg(doc, TidyBlockTags) )
376 ReparseTagType( doc, TidyBlockTags );
377 if ( cfg(doc, TidyEmptyTags) )
378 ReparseTagType( doc, TidyEmptyTags );
379 if ( cfg(doc, TidyPreTags) )
380 ReparseTagType( doc, TidyPreTags );
381 }
382
383 void ResetConfigToDefault( TidyDocImpl* doc )
384 {
385 uint ixVal;
386 const TidyOptionImpl* option = option_defs;
387 ulong* value = &doc->config.value[ 0 ];
388 for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
389 {
390 assert( ixVal == (uint) option->id );
391 CopyOptionValue( option, &value[ixVal], option->dflt );
392 }
393 FreeDeclaredTags( doc, tagtype_null );
394 }
395
396 void TakeConfigSnapshot( TidyDocImpl* doc )
397 {
398 uint ixVal;
399 const TidyOptionImpl* option = option_defs;
400 ulong* value = &doc->config.value[ 0 ];
401 ulong* snap = &doc->config.snapshot[ 0 ];
402
403 AdjustConfig( doc ); /* Make sure it's consistent */
404 for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
405 {
406 assert( ixVal == (uint) option->id );
407 CopyOptionValue( option, &snap[ixVal], value[ixVal] );
408 }
409 }
410
411 void ResetConfigToSnapshot( TidyDocImpl* doc )
412 {
413 uint ixVal;
414 const TidyOptionImpl* option = option_defs;
415 ulong* value = &doc->config.value[ 0 ];
416 ulong* snap = &doc->config.snapshot[ 0 ];
417
418 for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
419 {
420 assert( ixVal == (uint) option->id );
421 CopyOptionValue( option, &value[ixVal], snap[ixVal] );
422 }
423 FreeDeclaredTags( doc, tagtype_null );
424 ReparseTagDecls( doc );
425 }
426
427 void CopyConfig( TidyDocImpl* docTo, TidyDocImpl* docFrom )
428 {
429 if ( docTo != docFrom )
430 {
431 uint ixVal;
432 const TidyOptionImpl* option = option_defs;
433 ulong* from = &docFrom->config.value[ 0 ];
434 ulong* to = &docTo->config.value[ 0 ];
435
436 TakeConfigSnapshot( docTo );
437 for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
438 {
439 assert( ixVal == (uint) option->id );
440 CopyOptionValue( option, &to[ixVal], from[ixVal] );
441 }
442 ReparseTagDecls( docTo );
443 AdjustConfig( docTo ); /* Make sure it's consistent */
444 }
445 }
446
447
448 #ifdef _DEBUG
449
450 /* Debug accessor functions will be type-safe and assert option type match */
451 ulong _cfgGet( TidyDocImpl* doc, TidyOptionId optId )
452 {
453 assert( optId < N_TIDY_OPTIONS );
454 return doc->config.value[ optId ];
455 }
456
457 Bool _cfgGetBool( TidyDocImpl* doc, TidyOptionId optId )
458 {
459 ulong val = _cfgGet( doc, optId );
460 const TidyOptionImpl* opt = &option_defs[ optId ];
461 assert( opt && opt->type == TidyBoolean );
462 return (Bool) val;
463 }
464
465 TidyTriState _cfgGetAutoBool( TidyDocImpl* doc, TidyOptionId optId )
466 {
467 ulong val = _cfgGet( doc, optId );
468 const TidyOptionImpl* opt = &option_defs[ optId ];
469 assert( opt && opt->type == TidyInteger );
470 return (TidyTriState) val;
471 }
472
473 ctmbstr _cfgGetString( TidyDocImpl* doc, TidyOptionId optId )
474 {
475 ulong val = _cfgGet( doc, optId );
476 const TidyOptionImpl* opt = &option_defs[ optId ];
477 assert( opt && opt->type == TidyString );
478 return (ctmbstr) val;
479 }
480 #endif
481
482
483 /* for use with Gnu Emacs */
484 void SetEmacsFilename( TidyDocImpl* doc, ctmbstr filename )
485 {
486 SetOptionValue( doc, TidyEmacsFile, filename );
487 }
488
489
490 static tchar GetC( TidyConfigImpl* config )
491 {
492 if ( config->cfgIn )
493 return ReadChar( config->cfgIn );
494 return EndOfStream;
495 }
496
497 static tchar FirstChar( TidyConfigImpl* config )
498 {
499 config->c = GetC( config );
500 return config->c;
501 }
502
503 static tchar AdvanceChar( TidyConfigImpl* config )
504 {
505 if ( config->c != EndOfStream )
506 config->c = GetC( config );
507 return config->c;
508 }
509
510 static tchar SkipWhite( TidyConfigImpl* config )
511 {
512 while ( IsWhite(config->c) && !IsNewline(config->c) )
513 config->c = GetC( config );
514 return config->c;
515 }
516
517 /* skip until end of line
518 static tchar SkipToEndofLine( TidyConfigImpl* config )
519 {
520 while ( config->c != EndOfStream )
521 {
522 config->c = GetC( config );
523 if ( config->c == '\n' || config->c == '\r' )
524 break;
525 }
526 return config->c;
527 }
528 */
529
530 /*
531 skip over line continuations
532 to start of next property
533 */
534 static uint NextProperty( TidyConfigImpl* config )
535 {
536 do
537 {
538 /* skip to end of line */
539 while ( config->c != '\n' && config->c != '\r' && config->c != EndOfStream )
540 config->c = GetC( config );
541
542 /* treat \r\n \r or \n as line ends */
543 if ( config->c == '\r' )
544 config->c = GetC( config );
545
546 if ( config->c == '\n' )
547 config->c = GetC( config );
548 }
549 while ( IsWhite(config->c) ); /* line continuation? */
550
551 return config->c;
552 }
553
554 /*
555 Todd Lewis contributed this code for expanding
556 ~/foo or ~your/foo according to $HOME and your
557 user name. This will work partially on any system
558 which defines $HOME. Support for ~user/foo will
559 work on systems that support getpwnam(userid),
560 namely Unix/Linux.
561 */
562 ctmbstr ExpandTilde( ctmbstr filename )
563 {
564 char *home_dir = NULL;
565
566 if ( !filename )
567 return NULL;
568
569 if ( filename[0] != '~' )
570 return filename;
571
572 if (filename[1] == '/')
573 {
574 home_dir = getenv("HOME");
575 if ( home_dir )
576 ++filename;
577 }
578 #ifdef SUPPORT_GETPWNAM
579 else
580 {
581 struct passwd *passwd = NULL;
582 ctmbstr s = filename + 1;
583 tmbstr t;
584
585 while ( *s && *s != '/' )
586 s++;
587
588 if ( t = MemAlloc(s - filename) )
589 {
590 memcpy(t, filename+1, s-filename-1);
591 t[s-filename-1] = 0;
592
593 passwd = getpwnam(t);
594
595 MemFree(t);
596 }
597
598 if ( passwd )
599 {
600 filename = s;
601 home_dir = passwd->pw_dir;
602 }
603 }
604 #endif /* SUPPORT_GETPWNAM */
605
606 if ( home_dir )
607 {
608 uint len = tmbstrlen(filename) + tmbstrlen(home_dir) + 1;
609 tmbstr p = (tmbstr)MemAlloc( len );
610 tmbstrcpy( p, home_dir );
611 tmbstrcat( p, filename );
612 return (ctmbstr) p;
613 }
614 return (ctmbstr) filename;
615 }
616
617 Bool TIDY_CALL tidyFileExists( ctmbstr filename )
618 {
619 ctmbstr fname = (tmbstr) ExpandTilde( filename );
620 #ifndef NO_ACCESS_SUPPORT
621 Bool exists = ( access(fname, 0) == 0 );
622 #else
623 Bool exists;
624 /* at present */
625 FILE* fin = fopen(fname, "r");
626 if (fin != NULL)
627 fclose(fin);
628 exists = ( fin != NULL );
629 #endif
630 if ( fname != filename )
631 MemFree( (tmbstr) fname );
632 return exists;
633 }
634
635
636 #ifndef TIDY_MAX_NAME
637 #define TIDY_MAX_NAME 64
638 #endif
639
640 int ParseConfigFile( TidyDocImpl* doc, ctmbstr file )
641 {
642 return ParseConfigFileEnc( doc, file, "ascii" );
643 }
644
645 /* open the file and parse its contents
646 */
647 int ParseConfigFileEnc( TidyDocImpl* doc, ctmbstr file, ctmbstr charenc )
648 {
649 uint opterrs = doc->optionErrors;
650 tmbstr fname = (tmbstr) ExpandTilde( file );
651 TidyConfigImpl* cfg = &doc->config;
652 FILE* fin = fopen( fname, "r" );
653 int enc = CharEncodingId( charenc );
654
655 if ( fin == NULL || enc < 0 )
656 {
657 FileError( doc, fname, TidyConfig );
658 return -1;
659 }
660 else
661 {
662 tchar c;
663 cfg->cfgIn = FileInput( doc, fin, enc );
664 c = FirstChar( cfg );
665
666 for ( c = SkipWhite(cfg); c != EndOfStream; c = NextProperty(cfg) )
667 {
668 uint ix = 0;
669 tmbchar name[ TIDY_MAX_NAME ] = {0};
670
671 /* // or # start a comment */
672 if ( c == '/' || c == '#' )
673 continue;
674
675 while ( ix < sizeof(name)-1 && c != '\n' && c != EndOfStream && c != ':' )
676 {
677 name[ ix++ ] = (tmbchar) c; /* Option names all ASCII */
678 c = AdvanceChar( cfg );
679 }
680
681 if ( c == ':' )
682 {
683 const TidyOptionImpl* option = lookupOption( name );
684 c = AdvanceChar( cfg );
685 if ( option )
686 option->parser( doc, option );
687 else
688 {
689 if (NULL != doc->pOptCallback)
690 {
691 TidyConfigImpl* cfg = &doc->config;
692 tmbchar buf[8192];
693 uint i = 0;
694 tchar delim = 0;
695 Bool waswhite = yes;
696
697 tchar c = SkipWhite( cfg );
698
699 if ( c == '"' || c == '\'' )
700 {
701 delim = c;
702 c = AdvanceChar( cfg );
703 }
704
705 while ( i < sizeof(buf)-2 && c != EndOfStream && c != '\r' && c != '\n' )
706 {
707 if ( delim && c == delim )
708 break;
709
710 if ( IsWhite(c) )
711 {
712 if ( waswhite )
713 {
714 c = AdvanceChar( cfg );
715 continue;
716 }
717 c = ' ';
718 }
719 else
720 waswhite = no;
721
722 buf[i++] = (tmbchar) c;
723 c = AdvanceChar( cfg );
724 }
725 buf[i] = '\0';
726 if (no == (*doc->pOptCallback)( name, buf ))
727 ReportUnknownOption( doc, name );
728 }
729 else
730 ReportUnknownOption( doc, name );
731 }
732 }
733 }
734
735 fclose( fin );
736 MemFree( (void *)cfg->cfgIn->source.sourceData ); /* fix for bug #810259 */
737 freeStreamIn( cfg->cfgIn );
738 cfg->cfgIn = NULL;
739 }
740
741 if ( fname != (tmbstr) file )
742 MemFree( fname );
743
744 AdjustConfig( doc );
745
746 /* any new config errors? If so, return warning status. */
747 return (doc->optionErrors > opterrs ? 1 : 0);
748 }
749
750 /* returns false if unknown option, missing parameter,
751 ** or option doesn't use parameter
752 */
753 Bool ParseConfigOption( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optval )
754 {
755 const TidyOptionImpl* option = lookupOption( optnam );
756 Bool status = ( option != NULL );
757 if ( !status )
758 {
759 /* Not a standard tidy option. Check to see if the user application
760 recognizes it */
761 if (NULL != doc->pOptCallback)
762 status = (*doc->pOptCallback)( optnam, optval );
763 if (!status)
764 ReportUnknownOption( doc, optnam );
765 }
766 else
767 status = ParseConfigValue( doc, option->id, optval );
768 return status;
769 }
770
771 /* returns false if unknown option, missing parameter,
772 ** or option doesn't use parameter
773 */
774 Bool ParseConfigValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optval )
775 {
776 const TidyOptionImpl* option = option_defs + optId;
777 Bool status = ( optId < N_TIDY_OPTIONS && optval != NULL );
778
779 if ( !status )
780 ReportBadArgument( doc, option->name );
781 else
782 {
783 TidyBuffer inbuf = {0}; /* Set up input source */
784 tidyBufAttach( &inbuf, (byte*)optval, tmbstrlen(optval)+1 );
785 doc->config.cfgIn = BufferInput( doc, &inbuf, ASCII );
786 doc->config.c = GetC( &doc->config );
787
788 status = option->parser( doc, option );
789
790 freeStreamIn(doc->config.cfgIn); /* Release input source */
791 doc->config.cfgIn = NULL;
792 tidyBufDetach( &inbuf );
793 }
794 return status;
795 }
796
797
798 /* ensure that char encodings are self consistent */
799 Bool AdjustCharEncoding( TidyDocImpl* doc, int encoding )
800 {
801 int outenc = -1;
802 int inenc = -1;
803
804 switch( encoding )
805 {
806 case MACROMAN:
807 inenc = MACROMAN;
808 outenc = ASCII;
809 break;
810
811 case WIN1252:
812 inenc = WIN1252;
813 outenc = ASCII;
814 break;
815
816 case IBM858:
817 inenc = IBM858;
818 outenc = ASCII;
819 break;
820
821 case ASCII:
822 inenc = LATIN1;
823 outenc = ASCII;
824 break;
825
826 case LATIN0:
827 inenc = LATIN0;
828 outenc = ASCII;
829 break;
830
831 case RAW:
832 case LATIN1:
833 case UTF8:
834 #ifndef NO_NATIVE_ISO2022_SUPPORT
835 case ISO2022:
836 #endif
837
838 #if SUPPORT_UTF16_ENCODINGS
839 case UTF16LE:
840 case UTF16BE:
841 case UTF16:
842 #endif
843 #if SUPPORT_ASIAN_ENCODINGS
844 case SHIFTJIS:
845 case BIG5:
846 #endif
847 inenc = outenc = encoding;
848 break;
849 }
850
851 if ( inenc >= 0 )
852 {
853 SetOptionInt( doc, TidyCharEncoding, encoding );
854 SetOptionInt( doc, TidyInCharEncoding, inenc );
855 SetOptionInt( doc, TidyOutCharEncoding, outenc );
856 return yes;
857 }
858 return no;
859 }
860
861 /* ensure that config is self consistent */
862 void AdjustConfig( TidyDocImpl* doc )
863 {
864 if ( cfgBool(doc, TidyEncloseBlockText) )
865 SetOptionBool( doc, TidyEncloseBodyText, yes );
866
867 if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState )
868 SetOptionInt( doc, TidyIndentSpaces, 0 );
869
870 /* disable wrapping */
871 if ( cfg(doc, TidyWrapLen) == 0 )
872 SetOptionInt( doc, TidyWrapLen, 0x7FFFFFFF );
873
874 /* Word 2000 needs o:p to be declared as inline */
875 if ( cfgBool(doc, TidyWord2000) )
876 {
877 doc->config.defined_tags |= tagtype_inline;
878 DefineTag( doc, tagtype_inline, "o:p" );
879 }
880
881 /* #480701 disable XHTML output flag if both output-xhtml and xml input are set */
882 if ( cfgBool(doc, TidyXmlTags) )
883 SetOptionBool( doc, TidyXhtmlOut, no );
884
885 /* XHTML is written in lower case */
886 if ( cfgBool(doc, TidyXhtmlOut) )
887 {
888 SetOptionBool( doc, TidyXmlOut, yes );
889 SetOptionBool( doc, TidyUpperCaseTags, no );
890 SetOptionBool( doc, TidyUpperCaseAttrs, no );
891 /* SetOptionBool( doc, TidyXmlPIs, yes ); */
892 }
893
894 /* if XML in, then XML out */
895 if ( cfgBool(doc, TidyXmlTags) )
896 {
897 SetOptionBool( doc, TidyXmlOut, yes );
898 SetOptionBool( doc, TidyXmlPIs, yes );
899 }
900
901 /* #427837 - fix by Dave Raggett 02 Jun 01
902 ** generate <?xml version="1.0" encoding="iso-8859-1"?>
903 ** if the output character encoding is Latin-1 etc.
904 */
905 if ( cfg(doc, TidyOutCharEncoding) != ASCII &&
906 cfg(doc, TidyOutCharEncoding) != UTF8 &&
907 #if SUPPORT_UTF16_ENCODINGS
908 cfg(doc, TidyOutCharEncoding) != UTF16 &&
909 cfg(doc, TidyOutCharEncoding) != UTF16BE &&
910 cfg(doc, TidyOutCharEncoding) != UTF16LE &&
911 #endif
912 cfg(doc, TidyOutCharEncoding) != RAW &&
913 cfgBool(doc, TidyXmlOut) )
914 {
915 SetOptionBool( doc, TidyXmlDecl, yes );
916 }
917
918 /* XML requires end tags */
919 if ( cfgBool(doc, TidyXmlOut) )
920 {
921 #if SUPPORT_UTF16_ENCODINGS
922 /* XML requires a BOM on output if using UTF-16 encoding */
923 ulong enc = cfg( doc, TidyOutCharEncoding );
924 if ( enc == UTF16LE || enc == UTF16BE || enc == UTF16 )
925 SetOptionInt( doc, TidyOutputBOM, yes );
926 #endif
927 SetOptionBool( doc, TidyQuoteAmpersand, yes );
928 SetOptionBool( doc, TidyHideEndTags, no );
929 }
930 }
931
932 /* unsigned integers */
933 Bool ParseInt( TidyDocImpl* doc, const TidyOptionImpl* entry )
934 {
935 ulong number = 0;
936 Bool digits = no;
937 TidyConfigImpl* cfg = &doc->config;
938 tchar c = SkipWhite( cfg );
939
940 while ( IsDigit(c) )
941 {
942 number = c - '' + (10 * number);
943 digits = yes;
944 c = AdvanceChar( cfg );
945 }
946
947 if ( !digits )
948 ReportBadArgument( doc, entry->name );
949 else
950 SetOptionInt( doc, entry->id, number );
951 return digits;
952 }
953
954 /* true/false or yes/no or 0/1 or "auto" only looks at 1st char */
955 static Bool ParseTriState( TidyTriState theState, TidyDocImpl* doc,
956 const TidyOptionImpl* entry, ulong* flag )
957 {
958 TidyConfigImpl* cfg = &doc->config;
959 tchar c = SkipWhite( cfg );
960
961 if (c == 't' || c == 'T' || c == 'y' || c == 'Y' || c == '1')
962 *flag = yes;
963 else if (c == 'f' || c == 'F' || c == 'n' || c == 'N' || c == '')
964 *flag = no;
965 else if (theState == TidyAutoState && (c == 'a' || c =='A'))
966 *flag = TidyAutoState;
967 else
968 {
969 ReportBadArgument( doc, entry->name );
970 return no;
971 }
972
973 return yes;
974 }
975
976 /* cr, lf or crlf */
977 Bool ParseNewline( TidyDocImpl* doc, const TidyOptionImpl* entry )
978 {
979 int nl = -1;
980 tmbchar work[ 16 ] = {0};
981 tmbstr cp = work, end = work + sizeof(work);
982 TidyConfigImpl* cfg = &doc->config;
983 tchar c = SkipWhite( cfg );
984
985 while ( c!=EndOfStream && cp < end && !IsWhite(c) && c != '\r' && c != '\n' )
986 {
987 *cp++ = (tmbchar) c;
988 c = AdvanceChar( cfg );
989 }
990 *cp = 0;
991
992 if ( tmbstrcasecmp(work, "lf") == 0 )
993 nl = TidyLF;
994 else if ( tmbstrcasecmp(work, "crlf") == 0 )
995 nl = TidyCRLF;
996 else if ( tmbstrcasecmp(work, "cr") == 0 )
997 nl = TidyCR;
998
999 if ( nl < TidyLF || nl > TidyCR )
1000 ReportBadArgument( doc, entry->name );
1001 else
1002 SetOptionInt( doc, entry->id, nl );
1003 return ( nl >= TidyLF && nl <= TidyCR );
1004 }
1005
1006 Bool ParseBool( TidyDocImpl* doc, const TidyOptionImpl* entry )
1007 {
1008 ulong flag = 0;
1009 Bool status = ParseTriState( TidyNoState, doc, entry, &flag );
1010 if ( status )
1011 SetOptionBool( doc, entry->id, flag != 0 );
1012 return status;
1013 }
1014
1015 Bool ParseAutoBool( TidyDocImpl* doc, const TidyOptionImpl* entry )
1016 {
1017 ulong flag = 0;
1018 Bool status = ParseTriState( TidyAutoState, doc, entry, &flag );
1019 if ( status )
1020 SetOptionInt( doc, entry->id, flag );
1021 return status;
1022 }
1023
1024 /* a string excluding whitespace */
1025 Bool ParseName( TidyDocImpl* doc, const TidyOptionImpl* option )
1026 {
1027 tmbchar buf[ 1024 ] = {0};
1028 uint i = 0;
1029 uint c = SkipWhite( &doc->config );
1030
1031 while ( i < sizeof(buf)-2 && c != EndOfStream && !IsWhite(c) )
1032 {
1033 buf[i++] = (tmbchar) c;
1034 c = AdvanceChar( &doc->config );
1035 }
1036 buf[i] = 0;
1037
1038 if ( i == 0 )
1039 ReportBadArgument( doc, option->name );
1040 else
1041 SetOptionValue( doc, option->id, buf );
1042 return ( i > 0 );
1043 }
1044
1045 /* #508936 - CSS class naming for -clean option */
1046 Bool ParseCSS1Selector( TidyDocImpl* doc, const TidyOptionImpl* option )
1047 {
1048 char buf[256] = {0};
1049 uint i = 0;
1050 uint c = SkipWhite( &doc->config );
1051
1052 while ( i < sizeof(buf)-2 && c != EndOfStream && !IsWhite(c) )
1053 {
1054 buf[i++] = (tmbchar) c;
1055 c = AdvanceChar( &doc->config );
1056 }
1057 buf[i] = '\0';
1058
1059 if ( i == 0 || !IsCSS1Selector(buf) ) {
1060 ReportBadArgument( doc, option->name );
1061 return no;
1062 }
1063
1064 buf[i++] = '-'; /* Make sure any escaped Unicode is terminated */
1065 buf[i] = 0; /* so valid class names are generated after */
1066 /* Tidy appends last digits. */
1067
1068 SetOptionValue( doc, option->id, buf );
1069 return yes;
1070 }
1071
1072 /* Coordinates Config update and Tags data */
1073 static void DeclareUserTag( TidyDocImpl* doc, TidyOptionId optId,
1074 UserTagType tagType, ctmbstr name )
1075 {
1076 ctmbstr prvval = cfgStr( doc, optId );
1077 tmbstr catval = NULL;
1078 ctmbstr theval = name;
1079 if ( prvval )
1080 {
1081 uint len = tmbstrlen(name) + tmbstrlen(prvval) + 3;
1082 catval = tmbstrndup( prvval, len );
1083 tmbstrcat( catval, ", " );
1084 tmbstrcat( catval, name );
1085 theval = catval;
1086 }
1087 DefineTag( doc, tagType, name );
1088 SetOptionValue( doc, optId, theval );
1089 if ( catval )
1090 MemFree( catval );
1091 }
1092
1093 /* a space or comma separated list of tag names */
1094 Bool ParseTagNames( TidyDocImpl* doc, const TidyOptionImpl* option )
1095 {
1096 TidyConfigImpl* cfg = &doc->config;
1097 tmbchar buf[1024];
1098 uint i = 0, nTags = 0;
1099 uint c = SkipWhite( cfg );
1100 UserTagType ttyp = tagtype_null;
1101
1102 switch ( option->id )
1103 {
1104 case TidyInlineTags: ttyp = tagtype_inline; break;
1105 case TidyBlockTags: ttyp = tagtype_block; break;
1106 case TidyEmptyTags: ttyp = tagtype_empty; break;
1107 case TidyPreTags: ttyp = tagtype_pre; break;
1108 default:
1109 ReportUnknownOption( doc, option->name );
1110 return no;
1111 }
1112
1113 SetOptionValue( doc, option->id, NULL );
1114 FreeDeclaredTags( doc, ttyp );
1115 cfg->defined_tags |= ttyp;
1116
1117 do
1118 {
1119 if (c == ' ' || c == '\t' || c == ',')
1120 {
1121 c = AdvanceChar( cfg );
1122 continue;
1123 }
1124
1125 if ( c == '\r' || c == '\n' )
1126 {
1127 uint c2 = AdvanceChar( cfg );
1128 if ( c == '\r' && c2 == '\n' )
1129 c = AdvanceChar( cfg );
1130 else
1131 c = c2;
1132
1133 if ( !IsWhite(c) )
1134 {
1135 buf[i] = 0;
1136 UngetChar( c, cfg->cfgIn );
1137 UngetChar( '\n', cfg->cfgIn );
1138 break;
1139 }
1140 }
1141
1142 /*
1143 if ( c == '\n' )
1144 {
1145 c = AdvanceChar( cfg );
1146 if ( !IsWhite(c) )
1147 {
1148 buf[i] = 0;
1149 UngetChar( c, cfg->cfgIn );
1150 UngetChar( '\n', cfg->cfgIn );
1151 break;
1152 }
1153 }
1154 */
1155
1156 while ( i < sizeof(buf)-2 && c != EndOfStream && !IsWhite(c) && c != ',' )
1157 {
1158 buf[i++] = (tmbchar) c;
1159 c = AdvanceChar( cfg );
1160 }
1161
1162 buf[i] = '\0';
1163 if (i == 0) /* Skip empty tag definition. Possible when */
1164 continue; /* there is a trailing space on the line. */
1165
1166 /* add tag to dictionary */
1167 DeclareUserTag( doc, option->id, ttyp, buf );
1168 i = 0;
1169 ++nTags;
1170 }
1171 while ( c != EndOfStream );
1172
1173 if ( i > 0 )
1174 DeclareUserTag( doc, option->id, ttyp, buf );
1175 return ( nTags > 0 );
1176 }
1177
1178 /* a string including whitespace */
1179 /* munges whitespace sequences */
1180
1181 Bool ParseString( TidyDocImpl* doc, const TidyOptionImpl* option )
1182 {
1183 TidyConfigImpl* cfg = &doc->config;
1184 tmbchar buf[8192];
1185 uint i = 0;
1186 tchar delim = 0;
1187 Bool waswhite = yes;
1188
1189 tchar c = SkipWhite( cfg );
1190