Version:
~ [ 1.0 ] ~
1 /* tidylib.c -- internal library definitions
2
3 (c) 1998-2005 (W3C) MIT, ERCIM, Keio University
4 See tidy.h for the copyright notice.
5
6 CVS Info :
7
8 $Author: arnaud02 $
9 $Date: 2005/08/03 18:07:01 $
10 $Revision: 1.59 $
11
12 Defines HTML Tidy API implemented by tidy library.
13
14 Very rough initial cut for discussion purposes.
15
16 Public interface is const-correct and doesn't explicitly depend
17 on any globals. Thus, thread-safety may be introduced w/out
18 changing the interface.
19
20 Looking ahead to a C++ wrapper, C functions always pass
21 this-equivalent as 1st arg.
22
23 Created 2001-05-20 by Charles Reitzel
24
25 */
26
27 #include <errno.h>
28
29 #include "tidy-int.h"
30 #include "parser.h"
31 #include "clean.h"
32 #include "config.h"
33 #include "message.h"
34 #include "pprint.h"
35 #include "entities.h"
36 #include "tmbstr.h"
37 #include "utf8.h"
38
39 #ifdef TIDY_WIN32_MLANG_SUPPORT
40 #include "win32tc.h"
41 #endif
42
43 #ifdef NEVER
44 TidyDocImpl* tidyDocToImpl( TidyDoc tdoc )
45 {
46 return (TidyDocImpl*) tdoc;
47 }
48 TidyDoc tidyImplToDoc( TidyDocImpl* impl )
49 {
50 return (TidyDoc) impl;
51 }
52
53 Node* tidyNodeToImpl( TidyNode tnod )
54 {
55 return (Node*) tnod;
56 }
57 TidyNode tidyImplToNode( Node* node )
58 {
59 return (TidyNode) node;
60 }
61
62 AttVal* tidyAttrToImpl( TidyAttr tattr )
63 {
64 return (AttVal*) tattr;
65 }
66 TidyAttr tidyImplToAttr( AttVal* attval )
67 {
68 return (TidyAttr) attval;
69 }
70
71 const TidyOptionImpl* tidyOptionToImpl( TidyOption topt )
72 {
73 return (const TidyOptionImpl*) topt;
74 }
75 TidyOption tidyImplToOption( const TidyOptionImpl* option )
76 {
77 return (TidyOption) option;
78 }
79 #endif
80
81 /* Tidy public interface
82 **
83 ** Most functions return an integer:
84 **
85 ** 0 -> SUCCESS
86 ** >0 -> WARNING
87 ** <0 -> ERROR
88 **
89 */
90
91 TidyDoc TIDY_CALL tidyCreate(void)
92 {
93 TidyDocImpl* impl = tidyDocCreate();
94 return tidyImplToDoc( impl );
95 }
96
97 void TIDY_CALL tidyRelease( TidyDoc tdoc )
98 {
99 TidyDocImpl* impl = tidyDocToImpl( tdoc );
100 tidyDocRelease( impl );
101 }
102
103 TidyDocImpl* tidyDocCreate(void)
104 {
105 TidyDocImpl* doc = (TidyDocImpl*)MemAlloc( sizeof(TidyDocImpl) );
106 ClearMemory( doc, sizeof(*doc) );
107
108 InitMap();
109 InitTags( doc );
110 InitAttrs( doc );
111 InitConfig( doc );
112 InitPrintBuf( doc );
113
114 /* By default, wire tidy messages to standard error.
115 ** Document input will be set by parsing routines.
116 ** Document output will be set by pretty print routines.
117 ** Config input will be set by config parsing routines.
118 ** But we need to start off with a way to report errors.
119 */
120 doc->errout = StdErrOutput();
121 return doc;
122 }
123
124 void tidyDocRelease( TidyDocImpl* doc )
125 {
126 /* doc in/out opened and closed by parse/print routines */
127 if ( doc )
128 {
129 assert( doc->docIn == NULL );
130 assert( doc->docOut == NULL );
131
132 ReleaseStreamOut( doc->errout );
133 doc->errout = NULL;
134
135 FreePrintBuf( doc );
136 FreeLexer( doc );
137 FreeNode(doc, &doc->root);
138 ClearMemory(&doc->root, sizeof(Node));
139
140 if (doc->givenDoctype)
141 MemFree(doc->givenDoctype);
142
143 FreeConfig( doc );
144 FreeAttrTable( doc );
145 FreeTags( doc );
146 MemFree( doc );
147 }
148 }
149
150 /* Let application store a chunk of data w/ each Tidy tdocance.
151 ** Useful for callbacks.
152 */
153 void TIDY_CALL tidySetAppData( TidyDoc tdoc, ulong appData )
154 {
155 TidyDocImpl* impl = tidyDocToImpl( tdoc );
156 if ( impl )
157 impl->appData = appData;
158 }
159 ulong TIDY_CALL tidyGetAppData( TidyDoc tdoc )
160 {
161 TidyDocImpl* impl = tidyDocToImpl( tdoc );
162 if ( impl )
163 return impl->appData;
164 return 0;
165 }
166
167 ctmbstr TIDY_CALL tidyReleaseDate(void)
168 {
169 return ReleaseDate();
170 }
171
172
173 /* Get/set configuration options
174 */
175 Bool TIDY_CALL tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback )
176 {
177 TidyDocImpl* impl = tidyDocToImpl( tdoc );
178 if ( impl )
179 {
180 impl->pOptCallback = pOptCallback;
181 return yes;
182 }
183 return no;
184 }
185
186
187 int TIDY_CALL tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil )
188 {
189 TidyDocImpl* impl = tidyDocToImpl( tdoc );
190 if ( impl )
191 return ParseConfigFile( impl, cfgfil );
192 return -EINVAL;
193 }
194
195 int TIDY_CALL tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr cfgfil, ctmbstr charenc )
196 {
197 TidyDocImpl* impl = tidyDocToImpl( tdoc );
198 if ( impl )
199 return ParseConfigFileEnc( impl, cfgfil, charenc );
200 return -EINVAL;
201 }
202
203 int TIDY_CALL tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam )
204 {
205 TidyDocImpl* impl = tidyDocToImpl( tdoc );
206 if ( impl )
207 {
208 int enc = CharEncodingId( encnam );
209 if ( enc >= 0 && AdjustCharEncoding(impl, enc) )
210 return 0;
211
212 ReportBadArgument( impl, "char-encoding" );
213 }
214 return -EINVAL;
215 }
216
217 int TIDY_CALL tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam )
218 {
219 TidyDocImpl* impl = tidyDocToImpl( tdoc );
220 if ( impl )
221 {
222 int enc = CharEncodingId( encnam );
223 if ( enc >= 0 && SetOptionInt( impl, TidyInCharEncoding, enc ) )
224 return 0;
225
226 ReportBadArgument( impl, "in-char-encoding" );
227 }
228 return -EINVAL;
229 }
230
231 int TIDY_CALL tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam )
232 {
233 TidyDocImpl* impl = tidyDocToImpl( tdoc );
234 if ( impl )
235 {
236 int enc = CharEncodingId( encnam );
237 if ( enc >= 0 && SetOptionInt( impl, TidyOutCharEncoding, enc ) )
238 return 0;
239
240 ReportBadArgument( impl, "out-char-encoding" );
241 }
242 return -EINVAL;
243 }
244
245 TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam )
246 {
247 const TidyOptionImpl* option = lookupOption( optnam );
248 if ( option )
249 return option->id;
250 return N_TIDY_OPTIONS; /* Error */
251 }
252
253 TidyIterator TIDY_CALL tidyGetOptionList( TidyDoc tdoc )
254 {
255 TidyDocImpl* impl = tidyDocToImpl( tdoc );
256 if ( impl )
257 return getOptionList( impl );
258 return (TidyIterator) -1;
259 }
260
261 TidyOption TIDY_CALL tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos )
262 {
263 TidyDocImpl* impl = tidyDocToImpl( tdoc );
264 const TidyOptionImpl* option = NULL;
265 if ( impl )
266 option = getNextOption( impl, pos );
267 else if ( pos )
268 *pos = 0;
269 return tidyImplToOption( option );
270 }
271
272
273 TidyOption TIDY_CALL tidyGetOption( TidyDoc ARG_UNUSED(tdoc), TidyOptionId optId )
274 {
275 const TidyOptionImpl* option = getOption( optId );
276 return tidyImplToOption( option );
277 }
278 TidyOption TIDY_CALL tidyGetOptionByName( TidyDoc ARG_UNUSED(doc), ctmbstr optnam )
279 {
280 const TidyOptionImpl* option = lookupOption( optnam );
281 return tidyImplToOption( option );
282 }
283
284 TidyOptionId TIDY_CALL tidyOptGetId( TidyOption topt )
285 {
286 const TidyOptionImpl* option = tidyOptionToImpl( topt );
287 if ( option )
288 return option->id;
289 return N_TIDY_OPTIONS;
290 }
291 ctmbstr TIDY_CALL tidyOptGetName( TidyOption topt )
292 {
293 const TidyOptionImpl* option = tidyOptionToImpl( topt );
294 if ( option )
295 return option->name;
296 return NULL;
297 }
298 TidyOptionType TIDY_CALL tidyOptGetType( TidyOption topt )
299 {
300 const TidyOptionImpl* option = tidyOptionToImpl( topt );
301 if ( option )
302 return option->type;
303 return (TidyOptionType) -1;
304 }
305 TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption topt )
306 {
307 const TidyOptionImpl* option = tidyOptionToImpl( topt );
308 if ( option )
309 return option->category;
310 return (TidyConfigCategory) -1;
311 }
312 ctmbstr TIDY_CALL tidyOptGetDefault( TidyOption topt )
313 {
314 const TidyOptionImpl* option = tidyOptionToImpl( topt );
315 if ( option && option->type == TidyString )
316 return (ctmbstr) option->dflt;
317 return NULL;
318 }
319 ulong TIDY_CALL tidyOptGetDefaultInt( TidyOption topt )
320 {
321 const TidyOptionImpl* option = tidyOptionToImpl( topt );
322 if ( option && option->type != TidyString )
323 return option->dflt;
324 return ~0U;
325 }
326 Bool TIDY_CALL tidyOptGetDefaultBool( TidyOption topt )
327 {
328 const TidyOptionImpl* option = tidyOptionToImpl( topt );
329 if ( option && option->type != TidyString )
330 return ( option->dflt ? yes : no );
331 return no;
332 }
333 Bool TIDY_CALL tidyOptIsReadOnly( TidyOption topt )
334 {
335 const TidyOptionImpl* option = tidyOptionToImpl( topt );
336 if ( option )
337 return ( option->parser == NULL );
338 return yes;
339 }
340
341
342 TidyIterator TIDY_CALL tidyOptGetPickList( TidyOption topt )
343 {
344 const TidyOptionImpl* option = tidyOptionToImpl( topt );
345 if ( option )
346 return getOptionPickList( option );
347 return (TidyIterator) -1;
348 }
349 ctmbstr TIDY_CALL tidyOptGetNextPick( TidyOption topt, TidyIterator* pos )
350 {
351 const TidyOptionImpl* option = tidyOptionToImpl( topt );
352 if ( option )
353 return getNextOptionPick( option, pos );
354 return NULL;
355 }
356
357
358 ctmbstr TIDY_CALL tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId )
359 {
360 TidyDocImpl* impl = tidyDocToImpl( tdoc );
361 ctmbstr optval = NULL;
362 if ( impl )
363 optval = cfgStr( impl, optId );
364 return optval;
365 }
366 Bool TIDY_CALL tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val )
367 {
368 TidyDocImpl* impl = tidyDocToImpl( tdoc );
369 if ( impl )
370 return ParseConfigValue( impl, optId, val );
371 return no;
372 }
373 Bool TIDY_CALL tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val )
374 {
375 TidyDocImpl* impl = tidyDocToImpl( tdoc );
376 if ( impl )
377 return ParseConfigOption( impl, optnam, val );
378 return no;
379 }
380
381 ulong TIDY_CALL tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId )
382 {
383 TidyDocImpl* impl = tidyDocToImpl( tdoc );
384 ulong opti = 0;
385 if ( impl )
386 opti = cfg( impl, optId );
387 return opti;
388 }
389
390 Bool TIDY_CALL tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val )
391 {
392 TidyDocImpl* impl = tidyDocToImpl( tdoc );
393 if ( impl )
394 return SetOptionInt( impl, optId, val );
395 return no;
396 }
397
398 Bool TIDY_CALL tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId )
399 {
400 TidyDocImpl* impl = tidyDocToImpl( tdoc );
401 Bool optb = no;
402 if ( impl )
403 {
404 const TidyOptionImpl* option = getOption( optId );
405 if ( option )
406 {
407 optb = cfgBool( impl, optId );
408 }
409 }
410 return optb;
411 }
412
413 Bool TIDY_CALL tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val )
414 {
415 TidyDocImpl* impl = tidyDocToImpl( tdoc );
416 if ( impl )
417 return SetOptionBool( impl, optId, val );
418 return no;
419 }
420
421 ctmbstr TIDY_CALL tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId )
422 {
423 uint enc = tidyOptGetInt( tdoc, optId );
424 return CharEncodingOptName( enc );
425 }
426
427 ctmbstr TIDY_CALL tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId )
428 {
429 const TidyOptionImpl* option = getOption( optId );
430 if ( option && option->pickList )
431 {
432 uint ix, pick = tidyOptGetInt( tdoc, optId );
433 const ctmbstr* pL = option->pickList;
434 for ( ix=0; *pL && ix < pick; ++ix )
435 ++pL;
436 if ( *pL )
437 return *pL;
438 }
439 return NULL;
440 }
441
442
443 TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc )
444 {
445 TidyDocImpl* impl = tidyDocToImpl( tdoc );
446 TidyIterator declIter = 0;
447 if ( impl )
448 declIter = GetDeclaredTagList( impl );
449 return declIter;
450 }
451
452 ctmbstr TIDY_CALL tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId,
453 TidyIterator* iter )
454 {
455 TidyDocImpl* impl = tidyDocToImpl( tdoc );
456 ctmbstr tagnam = NULL;
457 if ( impl )
458 {
459 UserTagType tagtyp = tagtype_null;
460 if ( optId == TidyInlineTags )
461 tagtyp = tagtype_inline;
462 else if ( optId == TidyBlockTags )
463 tagtyp = tagtype_block;
464 else if ( optId == TidyEmptyTags )
465 tagtyp = tagtype_empty;
466 else if ( optId == TidyPreTags )
467 tagtyp = tagtype_pre;
468 if ( tagtyp != tagtype_null )
469 tagnam = GetNextDeclaredTag( impl, tagtyp, iter );
470 }
471 return tagnam;
472 }
473
474 ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
475 {
476 const TidyOptionId optId = tidyOptGetId( opt );
477 const TidyOptionDoc* docDesc = tidyOptGetDocDesc( optId );
478 return docDesc ? docDesc->doc : NULL;
479 }
480
481 TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
482 {
483 const TidyOptionId optId = tidyOptGetId( opt );
484 const TidyOptionDoc* docDesc = tidyOptGetDocDesc( optId );
485 if (docDesc && docDesc->links)
486 return (TidyIterator)docDesc->links;
487 return (TidyIterator)NULL;
488 }
489
490 TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, TidyIterator* pos )
491 {
492 const TidyOptionId* curr = (TidyOptionId *)*pos;
493 TidyOption opt;
494
495 if (*curr == TidyUnknownOption)
496 {
497 *pos = (TidyIterator)NULL;
498 return (TidyOption)0;
499 }
500 opt = tidyGetOption(tdoc, *curr);
501 curr++;
502 *pos = (*curr == TidyUnknownOption ) ?
503 (TidyIterator)NULL:(TidyIterator)curr;
504 return opt;
505 }
506
507 int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil )
508 {
509 TidyDocImpl* impl = tidyDocToImpl( tdoc );
510 if ( impl )
511 return SaveConfigFile( impl, cfgfil );
512 return -EINVAL;
513 }
514
515 int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink )
516 {
517 TidyDocImpl* impl = tidyDocToImpl( tdoc );
518 if ( impl )
519 return SaveConfigSink( impl, sink );
520 return -EINVAL;
521 }
522
523 Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc )
524 {
525 TidyDocImpl* impl = tidyDocToImpl( tdoc );
526 if ( impl )
527 {
528 TakeConfigSnapshot( impl );
529 return yes;
530 }
531 return no;
532 }
533 Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc )
534 {
535 TidyDocImpl* impl = tidyDocToImpl( tdoc );
536 if ( impl )
537 {
538 ResetConfigToSnapshot( impl );
539 return yes;
540 }
541 return no;
542 }
543 Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc )
544 {
545 TidyDocImpl* impl = tidyDocToImpl( tdoc );
546 if ( impl )
547 {
548 ResetConfigToDefault( impl );
549 return yes;
550 }
551 return no;
552 }
553
554 Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId optId )
555 {
556 TidyDocImpl* impl = tidyDocToImpl( tdoc );
557 if ( impl )
558 return ResetOptionToDefault( impl, optId );
559 return no;
560 }
561
562 Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc )
563 {
564 TidyDocImpl* impl = tidyDocToImpl( tdoc );
565 if ( impl )
566 return ConfigDiffThanDefault( impl );
567 return no;
568 }
569 Bool TIDY_CALL tidyOptDiffThanSnapshot( TidyDoc tdoc )
570 {
571 TidyDocImpl* impl = tidyDocToImpl( tdoc );
572 if ( impl )
573 return ConfigDiffThanSnapshot( impl );
574 return no;
575 }
576
577 Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from )
578 {
579 TidyDocImpl* docTo = tidyDocToImpl( to );
580 TidyDocImpl* docFrom = tidyDocToImpl( from );
581 if ( docTo && docFrom )
582 {
583 CopyConfig( docTo, docFrom );
584 return yes;
585 }
586 return no;
587 }
588
589
590 /* I/O and Message handling interface
591 **
592 ** By default, Tidy will define, create and use
593 ** tdocances of input and output handlers for
594 ** standard C buffered I/O (i.e. FILE* stdin,
595 ** FILE* stdout and FILE* stderr for content
596 ** input, content output and diagnostic output,
597 ** respectively. A FILE* cfgFile input handler
598 ** will be used for config files. Command line
599 ** options will just be set directly.
600 */
601
602 /* Use TidyReportFilter to filter messages by diagnostic level:
603 ** info, warning, etc. Just set diagnostic output
604 ** handler to redirect all diagnostics output. Return true
605 ** to proceed with output, false to cancel.
606 */
607 Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt )
608 {
609 TidyDocImpl* impl = tidyDocToImpl( tdoc );
610 if ( impl )
611 {
612 impl->mssgFilt = filt;
613 return yes;
614 }
615 return no;
616 }
617
618 #if 0 /* Not yet */
619 int tidySetContentOutputSink( TidyDoc tdoc, TidyOutputSink* outp )
620 {
621 TidyDocImpl* impl = tidyDocToImpl( tdoc );
622 if ( impl )
623 {
624 impl->docOut = outp;
625 return 0;
626 }
627 return -EINVAL;
628 }
629 int tidySetDiagnosticOutputSink( TidyDoc tdoc, TidyOutputSink* outp )
630 {
631 TidyDocImpl* impl = tidyDocToImpl( tdoc );
632 if ( impl )
633 {
634 impl->msgOut = outp;
635 return 0;
636 }
637 return -EINVAL;
638 }
639
640
641 /* Library helpers
642 */
643 cmbstr tidyLookupMessage( TidyDoc tdoc, int errorNo )
644 {
645 TidyDocImpl* impl = tidyDocToImpl( tdoc );
646 cmbstr mssg = NULL;
647 if ( impl )
648 mssg = tidyMessage_Lookup( impl->messages, errorNo );
649 return mssg;
650 }
651 #endif
652
653
654 FILE* TIDY_CALL tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam )
655 {
656 TidyDocImpl* impl = tidyDocToImpl( tdoc );
657 if ( impl )
658 {
659 FILE* errout = fopen( errfilnam, "wb" );
660 if ( errout )
661 {
662 uint outenc = cfg( impl, TidyOutCharEncoding );
663 uint nl = cfg( impl, TidyNewline );
664 ReleaseStreamOut( impl->errout );
665 impl->errout = FileOutput( errout, outenc, nl );
666 return errout;
667 }
668 else /* Emit message to current error sink */
669 FileError( impl, errfilnam, TidyError );
670 }
671 return NULL;
672 }
673
674 int TIDY_CALL tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf )
675 {
676 TidyDocImpl* impl = tidyDocToImpl( tdoc );
677 if ( impl )
678 {
679 uint outenc = cfg( impl, TidyOutCharEncoding );
680 uint nl = cfg( impl, TidyNewline );
681 ReleaseStreamOut( impl->errout );
682 impl->errout = BufferOutput( errbuf, outenc, nl );
683 return ( impl->errout ? 0 : -ENOMEM );
684 }
685 return -EINVAL;
686 }
687
688 int TIDY_CALL tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink )
689 {
690 TidyDocImpl* impl = tidyDocToImpl( tdoc );
691 if ( impl )
692 {
693 uint outenc = cfg( impl, TidyOutCharEncoding );
694 uint nl = cfg( impl, TidyNewline );
695 ReleaseStreamOut( impl->errout );
696 impl->errout = UserOutput( sink, outenc, nl );
697 return ( impl->errout ? 0 : -ENOMEM );
698 }
699 return -EINVAL;
700 }
701
702
703 /* Document info */
704 int TIDY_CALL tidyStatus( TidyDoc tdoc )
705 {
706 TidyDocImpl* impl = tidyDocToImpl( tdoc );
707 int tidyStat = -EINVAL;
708 if ( impl )
709 tidyStat = tidyDocStatus( impl );
710 return tidyStat;
711 }
712 int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) )
713 {
714 /* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
715 return 0;
716 }
717 Bool TIDY_CALL tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) )
718 {
719 /* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
720 return no;
721 }
722 Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) )
723 {
724 /* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
725 return no;
726 }
727
728 uint TIDY_CALL tidyErrorCount( TidyDoc tdoc )
729 {
730 TidyDocImpl* impl = tidyDocToImpl( tdoc );
731 uint count = 0xFFFFFFFF;
732 if ( impl )
733 count = impl->errors;
734 return count;
735 }
736 uint TIDY_CALL tidyWarningCount( TidyDoc tdoc )
737 {
738 TidyDocImpl* impl = tidyDocToImpl( tdoc );
739 uint count = 0xFFFFFFFF;
740 if ( impl )
741 count = impl->warnings;
742 return count;
743 }
744 uint TIDY_CALL tidyAccessWarningCount( TidyDoc tdoc )
745 {
746 TidyDocImpl* impl = tidyDocToImpl( tdoc );
747 uint count = 0xFFFFFFFF;
748 if ( impl )
749 count = impl->accessErrors;
750 return count;
751 }
752 uint TIDY_CALL tidyConfigErrorCount( TidyDoc tdoc )
753 {
754 TidyDocImpl* impl = tidyDocToImpl( tdoc );
755 uint count = 0xFFFFFFFF;
756 if ( impl )
757 count = impl->optionErrors;
758 return count;
759 }
760
761
762 /* Error reporting functions
763 */
764 void TIDY_CALL tidyErrorSummary( TidyDoc tdoc )
765 {
766 TidyDocImpl* impl = tidyDocToImpl( tdoc );
767 if ( impl )
768 ErrorSummary( impl );
769 }
770 void TIDY_CALL tidyGeneralInfo( TidyDoc tdoc )
771 {
772 TidyDocImpl* impl = tidyDocToImpl( tdoc );
773 if ( impl )
774 GeneralInfo( impl );
775 }
776
777
778 /* I/O Functions
779 **
780 ** Initial version supports only whole-file operations.
781 ** Do not expose Tidy StreamIn or Out data structures - yet.
782 */
783
784 /* Parse/load Functions
785 **
786 ** HTML/XHTML version determined from input.
787 */
788 int TIDY_CALL tidyParseFile( TidyDoc tdoc, ctmbstr filnam )
789 {
790 TidyDocImpl* doc = tidyDocToImpl( tdoc );
791 return tidyDocParseFile( doc, filnam );
792 }
793 int TIDY_CALL tidyParseStdin( TidyDoc tdoc )
794 {
795 TidyDocImpl* doc = tidyDocToImpl( tdoc );
796 return tidyDocParseStdin( doc );
797 }
798 int TIDY_CALL tidyParseString( TidyDoc tdoc, ctmbstr content )
799 {
800 TidyDocImpl* doc = tidyDocToImpl( tdoc );
801 return tidyDocParseString( doc, content );
802 }
803 int TIDY_CALL tidyParseBuffer( TidyDoc tdoc, TidyBuffer* inbuf )
804 {
805 TidyDocImpl* doc = tidyDocToImpl( tdoc );
806 return tidyDocParseBuffer( doc, inbuf );
807 }
808 int TIDY_CALL tidyParseSource( TidyDoc tdoc, TidyInputSource* source )
809 {
810 TidyDocImpl* doc = tidyDocToImpl( tdoc );
811 return tidyDocParseSource( doc, source );
812 }
813
814
815 int tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam )
816 {
817 int status = -ENOENT;
818 FILE* fin = fopen( filnam, "rb" );
819
820 #if PRESERVE_FILE_TIMES
821 struct stat sbuf = {0};
822 /* get last modified time */
823 ClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
824 if ( fin && cfgBool(doc,TidyKeepFileTimes) &&
825 fstat(fileno(fin), &sbuf) != -1 )
826 {
827 doc->filetimes.actime = sbuf.st_atime;
828 doc->filetimes.modtime = sbuf.st_mtime;
829 }
830 #endif
831
832 if ( fin )
833 {
834 StreamIn* in = FileInput( doc, fin, cfg( doc, TidyInCharEncoding ));
835 status = tidyDocParseStream( doc, in );
836 freeFileSource(&in->source, yes);
837 freeStreamIn(in);
838 }
839 else /* Error message! */
840 FileError( doc, filnam, TidyError );
841 return status;
842 }
843
844 int tidyDocParseStdin( TidyDocImpl* doc )
845 {
846 StreamIn* in = FileInput( doc, stdin, cfg( doc, TidyInCharEncoding ));
847 int status = tidyDocParseStream( doc, in );
848 freeStreamIn(in);
849 return status;
850 }
851
852 int tidyDocParseBuffer( TidyDocImpl* doc, TidyBuffer* inbuf )
853 {
854 int status = -EINVAL;
855 if ( inbuf )
856 {
857 StreamIn* in = BufferInput( doc, inbuf, cfg( doc, TidyInCharEncoding ));
858 status = tidyDocParseStream( doc, in );
859 freeStreamIn(in);
860 }
861 return status;
862 }
863
864 int tidyDocParseString( TidyDocImpl* doc, ctmbstr content )
865 {
866 int status = -EINVAL;
867 TidyBuffer inbuf = {0};
868 StreamIn* in = NULL;
869
870 if ( content )
871 {
872 tidyBufAttach( &inbuf, (byte*)content, tmbstrlen(content)+1 );
873 in = BufferInput( doc, &inbuf, cfg( doc, TidyInCharEncoding ));
874 status = tidyDocParseStream( doc, in );
875 tidyBufDetach( &inbuf );
876 freeStreamIn(in);
877 }
878 return status;
879 }
880
881 int tidyDocParseSource( TidyDocImpl* doc, TidyInputSource* source )
882 {
883 StreamIn* in = UserInput( doc, source, cfg( doc, TidyInCharEncoding ));
884 int status = tidyDocParseStream( doc, in );
885 freeStreamIn(in);
886 return status;
887 }
888
889
890 /* Print/save Functions
891 **
892 */
893 int TIDY_CALL tidySaveFile( TidyDoc tdoc, ctmbstr filnam )
894 {
895 TidyDocImpl* doc = tidyDocToImpl( tdoc );
896 return tidyDocSaveFile( doc, filnam );
897 }
898 int TIDY_CALL tidySaveStdout( TidyDoc tdoc )
899 {
900 TidyDocImpl* doc = tidyDocToImpl( tdoc );
901 return tidyDocSaveStdout( doc );
902 }
903 int TIDY_CALL tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen )
904 {
905 TidyDocImpl* doc = tidyDocToImpl( tdoc );
906 return tidyDocSaveString( doc, buffer, buflen );
907 }
908 int TIDY_CALL tidySaveBuffer( TidyDoc tdoc, TidyBuffer* outbuf )
909 {
910 TidyDocImpl* doc = tidyDocToImpl( tdoc );
911 return tidyDocSaveBuffer( doc, outbuf );
912 }
913 int TIDY_CALL tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink )
914 {
915 TidyDocImpl* doc = tidyDocToImpl( tdoc );
916 return tidyDocSaveSink( doc, sink );
917 }
918
919 int tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
920 {
921 int status = -ENOENT;
922 FILE* fout = NULL;
923
924 /* Don't zap input file if no output */
925 if ( doc->errors > 0 &&
926 cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) )
927 status = tidyDocStatus( doc );
928 else
929 fout = fopen( filnam, "wb" );
930
931 if ( fout )
932 {
933 uint outenc = cfg( doc, TidyOutCharEncoding );
934 uint nl = cfg( doc, TidyNewline );
935 StreamOut* out = FileOutput( fout, outenc, nl );
936
937 status = tidyDocSaveStream( doc, out );
938
939 fclose( fout );
940 MemFree( out );
941
942 #if PRESERVE_FILE_TIMES
943 if ( doc->filetimes.actime )
944 {
945 /* set file last accessed/modified times to original values */
946 utime( filnam, &doc->filetimes );
947 ClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
948 }
949 #endif /* PRESERVFILETIMES */
950 }
951 if ( status < 0 ) /* Error message! */
952 FileError( doc, filnam, TidyError );
953 return status;
954 }
955
956
957
958 /* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3.
959 ** The code has been left in in case it works w/ other compilers
960 ** or operating systems. If stdout is in Text mode, be aware that
961 ** it will garble UTF16 documents. In text mode, when it encounters
962 ** a single byte of value 10 (0xA), it will insert a single byte
963 ** value 13 (0xD) just before it. This has the effect of garbling
964 ** the entire document.
965 */
966
967 #if !defined(NO_SETMODE_SUPPORT)
968
969 #if defined(_WIN32) || defined(OS2_OS)
970 #include <fcntl.h>
971 #include <io.h>
972 #endif
973
974 #endif
975
976 int tidyDocSaveStdout( TidyDocImpl* doc )
977 {
978 #if !defined(NO_SETMODE_SUPPORT)
979
980 #if defined(_WIN32) || defined(OS2_OS)
981 int oldstdoutmode = -1, oldstderrmode = -1;
982 #endif
983
984 #endif
985 int status = 0;
986 uint outenc = cfg( doc, TidyOutCharEncoding );
987 uint nl = cfg( doc, TidyNewline );
988 StreamOut* out = FileOutput( stdout, outenc, nl );
989
990 #if !defined(NO_SETMODE_SUPPORT)
991
992 #if defined(_WIN32) || defined(OS2_OS)
993 oldstdoutmode = setmode( fileno(stdout), _O_BINARY );
994 oldstderrmode = setmode( fileno(stderr), _O_BINARY );
995 #endif
996
997 #endif
998
999 if ( 0 == status )
1000 status = tidyDocSaveStream( doc, out );
1001
1002 fflush(stdout);
1003 fflush(stderr);
1004
1005 #if !defined(NO_SETMODE_SUPPORT)
1006
1007 #if defined(_WIN32) || defined(OS2_OS)
1008 if ( oldstdoutmode != -1 )
1009 oldstdoutmode = setmode( fileno(stdout), oldstdoutmode );
1010 if ( oldstderrmode != -1 )
1011 oldstderrmode = setmode( fileno(stderr), oldstderrmode );
1012 #endif
1013
1014 #endif
1015
1016 MemFree( out );
1017 return status;
1018 }
1019
1020 int tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen )
1021 {
1022 uint outenc = cfg( doc, TidyOutCharEncoding );
1023 uint nl = cfg( doc, TidyNewline );
1024 TidyBuffer outbuf = {0};
1025
1026 StreamOut* out = BufferOutput( &outbuf, outenc, nl );
1027 int status = tidyDocSaveStream( doc, out );
1028
1029 if ( outbuf.size > *buflen )
1030 status = -ENOMEM;
1031 else
1032 memcpy( buffer, outbuf.bp, outbuf.size );
1033
1034 *buflen = outbuf.size;
1035 tidyBufFree( &outbuf );
1036 MemFree( out );
1037 return status;
1038 }
1039
1040 int tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
1041 {
1042 int status = -EINVAL;
1043 if ( outbuf )
1044 {
1045 uint outenc = cfg( doc, TidyOutCharEncoding );
1046 uint nl = cfg( doc, TidyNewline );
1047 StreamOut* out = BufferOutput( outbuf, outenc, nl );
1048
1049 status = tidyDocSaveStream( doc, out );
1050 MemFree( out );
1051 }
1052 return status;
1053 }
1054
1055 int tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink )
1056 {
1057 uint outenc = cfg( doc, TidyOutCharEncoding );
1058 uint nl = cfg( doc, TidyNewline );
1059 StreamOut* out = UserOutput( sink, outenc, nl );
1060 int status = tidyDocSaveStream( doc, out );
1061 MemFree( out );
1062 return status;
1063 }
1064
1065 int tidyDocStatus( TidyDocImpl* doc )
1066 {
1067 if ( doc->errors > 0 )
1068 return 2;
1069 if ( doc->warnings > 0 || doc->accessErrors > 0 )
1070 return 1;
1071 return 0;
1072 }
1073
1074
1075
1076 int TIDY_CALL tidyCleanAndRepair( TidyDoc tdoc )
1077 {
1078 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1079 if ( impl )
1080 return tidyDocCleanAndRepair( impl );
1081 return -EINVAL;
1082 }
1083
1084 int TIDY_CALL tidyRunDiagnostics( TidyDoc tdoc )
1085 {
1086 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1087 if ( impl )
1088 return tidyDocRunDiagnostics( impl );
1089 return -EINVAL;
1090 }
1091
1092
1093 /* Workhorse functions.
1094 **
1095 ** Parse requires input source, all input config items
1096 ** and diagnostic sink to have all been set before calling.
1097 **
1098 ** Emit likewise requires that document sink and all
1099 ** pretty printing options have been set.
1100 */
1101 static ctmbstr integrity = "\nPanic - tree has lost its integrity\n";
1102
1103 int tidyDocParseStream( TidyDocImpl* doc, StreamIn* in )
1104 {
1105 Bool xmlIn = cfgBool( doc, TidyXmlTags );
1106 int bomEnc;
1107
1108 assert( doc != NULL && in != NULL );
1109 assert( doc->docIn == NULL );
1110 doc->docIn = in;
1111
1112 TakeConfigSnapshot( doc ); /* Save config state */
1113 FreeLexer( doc );
1114 FreeAnchors( doc );
1115
1116 FreeNode(doc, &doc->root);
1117 ClearMemory(&doc->root, sizeof(Node));
1118
1119 if (doc->givenDoctype)
1120 MemFree(doc->givenDoctype);
1121
1122 doc->givenDoctype = NULL;
1123
1124 doc->lexer = NewLexer( doc );
1125 /* doc->lexer->root = &doc->root; */
1126 doc->root.line = doc->lexer->lines;
1127 doc->root.column = doc->lexer->columns;
1128 doc->inputHadBOM = no;
1129
1130 bomEnc = ReadBOMEncoding(in);
1131
1132 if (bomEnc != -1)
1133 {
1134 in->encoding = bomEnc;
1135 SetOptionInt(doc, TidyInCharEncoding, bomEnc);
1136 }
1137
1138 #ifdef TIDY_WIN32_MLANG_SUPPORT
1139 if (in->encoding > WIN32MLANG)
1140 Win32MLangInitInputTranscoder(in, in->encoding);
1141 #endif /* TIDY_WIN32_MLANG_SUPPORT */
1142
1143 /* Tidy doesn't alter the doctype for generic XML docs */
1144 if ( xmlIn )
1145 {
1146 ParseXMLDocument( doc );
1147 if ( !CheckNodeIntegrity( &doc->root ) )
1148 FatalError( integrity );
1149 }
1150 else
1151 {
1152 doc->warnings = 0;
1153 ParseDocument( doc );
1154 if ( !CheckNodeIntegrity( &doc->root ) )
1155 FatalError( integrity );
1156 }
1157
1158 #ifdef TIDY_WIN32_MLANG_SUPPORT
1159 Win32MLangUninitInputTranscoder(in);
1160 #endif /* TIDY_WIN32_MLANG_SUPPORT */
1161
1162 doc->docIn = NULL;
1163 return tidyDocStatus( doc );
1164 }
1165
1166 int tidyDocRunDiagnostics( TidyDocImpl* doc )
1167 {
1168 uint acclvl = cfg( doc, TidyAccessibilityCheckLevel );
1169 Bool quiet = cfgBool( doc, TidyQuiet );
1170 Bool force = cfgBool( doc, TidyForceOutput );
1171
1172 if ( !quiet )
1173 {
1174
1175 ReportMarkupVersion( doc );
1176 ReportNumWarnings( doc );
1177 }
1178
1179 if ( doc->errors > 0 && !force )
1180 NeedsAuthorIntervention( doc );
1181
1182 #if SUPPORT_ACCESSIBILITY_CHECKS
1183 if ( acclvl > 0 )
1184 AccessibilityChecks( doc );
1185 #endif
1186
1187 return tidyDocStatus( doc );
1188 }
1189
1190 int tidyDocCleanAndRepair( TidyDocImpl* doc )
1191 {
1192 Bool word2K = cfgBool( doc, TidyWord2000 );
1193 Bool logical = cfgBool( doc, TidyLogicalEmphasis );
1194 Bool clean = cfgBool( doc, TidyMakeClean );
1195 Bool dropFont = cfgBool( doc, TidyDropFontTags );
1196 Bool htmlOut = cfgBool( doc, TidyHtmlOut );
1197 Bool xmlOut = cfgBool( doc, TidyXmlOut );
1198 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1199 Bool xmlDecl = cfgBool( doc, TidyXmlDecl );
1200 Bool tidyMark = cfgBool( doc, TidyMark );
1201 Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
1202 Node* node;
1203
1204 if (tidyXmlTags)
1205 return tidyDocStatus( doc );
1206
1207 /* simplifies <b><b> ... </b> ...</b> etc. */
1208 NestedEmphasis( doc, &doc->root );
1209
1210 /* cleans up <dir>indented text</dir> etc. */
1211 List2BQ( doc, &doc->root );
1212 BQ2Div( doc, &doc->root );
1213
1214 /* replaces i by em and b by strong */
1215 if ( logical )
1216 EmFromI( doc, &doc->root );
1217
1218 if ( word2K && IsWord2000(doc) )
1219 {
1220 /* prune Word2000's <![if ...]> ... <![endif]> */
1221 DropSections( doc, &doc->root );
1222
1223 /* drop style & class attributes and empty p, span elements */
1224 CleanWord2000( doc, &doc->root );
1225 DropEmptyElements(doc, &doc->root);
1226 }
1227
1228 /* replaces presentational markup by style rules */
1229 if ( clean || dropFont )
1230 CleanDocument( doc );
1231
1232 /* Move terminating <br /> tags from out of paragraphs */
1233 /*! Do we want to do this for all block-level elements? */
1234
1235 /* This is disabled due to http://tidy.sf.net/bug/681116 */
1236 #if 0
1237 FixBrakes( doc, FindBody( doc ));
1238 #endif
1239
1240 /* Reconcile http-equiv meta element with output encoding */
1241 if (cfg( doc, TidyOutCharEncoding) != RAW
1242 #ifndef NO_NATIVE_ISO2022_SUPPORT
1243 && cfg( doc, TidyOutCharEncoding) != ISO2022
1244 #endif
1245 )
1246 VerifyHTTPEquiv( doc, FindHEAD( doc ));
1247
1248 if ( !CheckNodeIntegrity( &doc->root ) )
1249 FatalError( integrity );
1250
1251 /* remember given doctype for reporting */
1252 node = FindDocType(doc);
1253 if (node)
1254 {
1255 AttVal* fpi = GetAttrByName(node, "PUBLIC");
1256 if (AttrHasValue(fpi))
1257 doc->givenDoctype = tmbstrdup(fpi->value);
1258 }
1259
1260 if ( doc->root.content )
1261 {
1262 /* If we had XHTML input but want HTML output */
1263 if ( htmlOut && doc->lexer->isvoyager )
1264 {
1265 Node* node = FindDocType(doc);
1266 /* Remove reference, but do not free */
1267 if (node)
1268 RemoveNode(node);
1269 }
1270
1271 if (xhtmlOut && !htmlOut)
1272 {
1273 SetXHTMLDocType(doc);
1274 FixAnchors(doc, &doc->root, yes, yes);
1275 FixXhtmlNamespace(doc, yes);
1276 FixLanguageInformation(doc, &doc->root, yes, yes);
1277 }
1278 else
1279 {
1280 FixDocType(doc);
1281 FixAnchors(doc, &doc->root, yes, yes);
1282 FixXhtmlNamespace(doc, no);
1283 FixLanguageInformation(doc, &doc->root, no, yes);
1284 }
1285
1286 if (tidyMark )
1287 AddGenerator(doc);
1288 }
1289
1290 /* ensure presence of initial <?xml version="1.0"?> */
1291 if ( xmlOut && xmlDecl )
1292 FixXmlDecl( doc );
1293
1294 return tidyDocStatus( doc );
1295 }
1296
1297 int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )