~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

TidyLib
tidy/src/tidylib.c

Version: ~ [ 1.0 ] ~

  1 /* tidylib.c -- internal library definitions
  2 
  3   (c) 1998-2005 (W3C) MIT, ERCIM, Keio University
  4   See tidy.h for the copyright notice.
  5 
  6   CVS Info :
  7 
  8     $Author: arnaud02 $ 
  9     $Date: 2005/08/03 18:07:01 $ 
 10     $Revision: 1.59 $ 
 11 
 12   Defines HTML Tidy API implemented by tidy library.
 13   
 14   Very rough initial cut for discussion purposes.
 15 
 16   Public interface is const-correct and doesn't explicitly depend
 17   on any globals.  Thus, thread-safety may be introduced w/out
 18   changing the interface.
 19 
 20   Looking ahead to a C++ wrapper, C functions always pass 
 21   this-equivalent as 1st arg.
 22 
 23   Created 2001-05-20 by Charles Reitzel
 24 
 25 */
 26 
 27 #include <errno.h>
 28 
 29 #include "tidy-int.h"
 30 #include "parser.h"
 31 #include "clean.h"
 32 #include "config.h"
 33 #include "message.h"
 34 #include "pprint.h"
 35 #include "entities.h"
 36 #include "tmbstr.h"
 37 #include "utf8.h"
 38 
 39 #ifdef TIDY_WIN32_MLANG_SUPPORT
 40 #include "win32tc.h"
 41 #endif
 42 
 43 #ifdef NEVER
 44 TidyDocImpl* tidyDocToImpl( TidyDoc tdoc )
 45 {
 46   return (TidyDocImpl*) tdoc;
 47 }
 48 TidyDoc      tidyImplToDoc( TidyDocImpl* impl )
 49 {
 50   return (TidyDoc) impl;
 51 }
 52 
 53 Node*        tidyNodeToImpl( TidyNode tnod )
 54 {
 55   return (Node*) tnod;
 56 }
 57 TidyNode     tidyImplToNode( Node* node )
 58 {
 59   return (TidyNode) node;
 60 }
 61 
 62 AttVal*      tidyAttrToImpl( TidyAttr tattr )
 63 {
 64   return (AttVal*) tattr;
 65 }
 66 TidyAttr     tidyImplToAttr( AttVal* attval )
 67 {
 68   return (TidyAttr) attval;
 69 }
 70 
 71 const TidyOptionImpl* tidyOptionToImpl( TidyOption topt )
 72 {
 73   return (const TidyOptionImpl*) topt;
 74 }
 75 TidyOption   tidyImplToOption( const TidyOptionImpl* option )
 76 {
 77   return (TidyOption) option;
 78 }
 79 #endif
 80 
 81 /* Tidy public interface
 82 **
 83 ** Most functions return an integer:
 84 **
 85 ** 0    -> SUCCESS
 86 ** >0   -> WARNING
 87 ** <0   -> ERROR
 88 ** 
 89 */
 90 
 91 TidyDoc TIDY_CALL       tidyCreate(void)
 92 {
 93   TidyDocImpl* impl = tidyDocCreate();
 94   return tidyImplToDoc( impl );
 95 }
 96 
 97 void TIDY_CALL          tidyRelease( TidyDoc tdoc )
 98 {
 99   TidyDocImpl* impl = tidyDocToImpl( tdoc );
100   tidyDocRelease( impl );
101 }
102 
103 TidyDocImpl* tidyDocCreate(void)
104 {
105     TidyDocImpl* doc = (TidyDocImpl*)MemAlloc( sizeof(TidyDocImpl) );
106     ClearMemory( doc, sizeof(*doc) );
107 
108     InitMap();
109     InitTags( doc );
110     InitAttrs( doc );
111     InitConfig( doc );
112     InitPrintBuf( doc );
113 
114     /* By default, wire tidy messages to standard error.
115     ** Document input will be set by parsing routines.
116     ** Document output will be set by pretty print routines.
117     ** Config input will be set by config parsing routines.
118     ** But we need to start off with a way to report errors.
119     */
120     doc->errout = StdErrOutput();
121     return doc;
122 }
123 
124 void          tidyDocRelease( TidyDocImpl* doc )
125 {
126     /* doc in/out opened and closed by parse/print routines */
127     if ( doc )
128     {
129         assert( doc->docIn == NULL );
130         assert( doc->docOut == NULL );
131 
132         ReleaseStreamOut( doc->errout );
133         doc->errout = NULL;
134 
135         FreePrintBuf( doc );
136         FreeLexer( doc );
137         FreeNode(doc, &doc->root);
138         ClearMemory(&doc->root, sizeof(Node));
139 
140         if (doc->givenDoctype)
141             MemFree(doc->givenDoctype);
142 
143         FreeConfig( doc );
144         FreeAttrTable( doc );
145         FreeTags( doc );
146         MemFree( doc );
147     }
148 }
149 
150 /* Let application store a chunk of data w/ each Tidy tdocance.
151 ** Useful for callbacks.
152 */
153 void TIDY_CALL        tidySetAppData( TidyDoc tdoc, ulong appData )
154 {
155   TidyDocImpl* impl = tidyDocToImpl( tdoc );
156   if ( impl )
157     impl->appData = appData;
158 }
159 ulong TIDY_CALL       tidyGetAppData( TidyDoc tdoc )
160 {
161   TidyDocImpl* impl = tidyDocToImpl( tdoc );
162   if ( impl )
163     return impl->appData;
164   return 0;
165 }
166 
167 ctmbstr TIDY_CALL     tidyReleaseDate(void)
168 {
169     return ReleaseDate();
170 }
171 
172 
173 /* Get/set configuration options
174 */
175 Bool TIDY_CALL        tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback )
176 {
177   TidyDocImpl* impl = tidyDocToImpl( tdoc );
178   if ( impl )
179   {
180     impl->pOptCallback = pOptCallback;
181     return yes;
182   }
183   return no;
184 }
185 
186 
187 int TIDY_CALL     tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil )
188 {
189     TidyDocImpl* impl = tidyDocToImpl( tdoc );
190     if ( impl )
191         return ParseConfigFile( impl, cfgfil );
192     return -EINVAL;
193 }
194 
195 int TIDY_CALL     tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr cfgfil, ctmbstr charenc )
196 {
197     TidyDocImpl* impl = tidyDocToImpl( tdoc );
198     if ( impl )
199         return ParseConfigFileEnc( impl, cfgfil, charenc );
200     return -EINVAL;
201 }
202 
203 int TIDY_CALL         tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam )
204 {
205     TidyDocImpl* impl = tidyDocToImpl( tdoc );
206     if ( impl )
207     {
208         int enc = CharEncodingId( encnam );
209         if ( enc >= 0 && AdjustCharEncoding(impl, enc) )
210             return 0;
211 
212         ReportBadArgument( impl, "char-encoding" );
213     }
214     return -EINVAL;
215 }
216 
217 int TIDY_CALL           tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam )
218 {
219     TidyDocImpl* impl = tidyDocToImpl( tdoc );
220     if ( impl )
221     {
222         int enc = CharEncodingId( encnam );
223         if ( enc >= 0 && SetOptionInt( impl, TidyInCharEncoding, enc ) )
224             return 0;
225 
226         ReportBadArgument( impl, "in-char-encoding" );
227     }
228     return -EINVAL;
229 }
230 
231 int TIDY_CALL           tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam )
232 {
233     TidyDocImpl* impl = tidyDocToImpl( tdoc );
234     if ( impl )
235     {
236         int enc = CharEncodingId( encnam );
237         if ( enc >= 0 && SetOptionInt( impl, TidyOutCharEncoding, enc ) )
238             return 0;
239 
240         ReportBadArgument( impl, "out-char-encoding" );
241     }
242     return -EINVAL;
243 }
244 
245 TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam )
246 {
247     const TidyOptionImpl* option = lookupOption( optnam );
248     if ( option )
249         return option->id;
250     return N_TIDY_OPTIONS;  /* Error */
251 }
252 
253 TidyIterator TIDY_CALL  tidyGetOptionList( TidyDoc tdoc )
254 {
255     TidyDocImpl* impl = tidyDocToImpl( tdoc );
256     if ( impl )
257         return getOptionList( impl );
258     return (TidyIterator) -1;
259 }
260 
261 TidyOption TIDY_CALL    tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos )
262 {
263     TidyDocImpl* impl = tidyDocToImpl( tdoc );
264     const TidyOptionImpl* option = NULL;
265     if ( impl )
266         option = getNextOption( impl, pos );
267     else if ( pos )
268         *pos = 0;
269     return tidyImplToOption( option );
270 }
271 
272 
273 TidyOption TIDY_CALL    tidyGetOption( TidyDoc ARG_UNUSED(tdoc), TidyOptionId optId )
274 {
275     const TidyOptionImpl* option = getOption( optId );
276     return tidyImplToOption( option );
277 }
278 TidyOption TIDY_CALL    tidyGetOptionByName( TidyDoc ARG_UNUSED(doc), ctmbstr optnam )
279 {
280     const TidyOptionImpl* option = lookupOption( optnam );
281     return tidyImplToOption( option );
282 }
283 
284 TidyOptionId TIDY_CALL  tidyOptGetId( TidyOption topt )
285 {
286     const TidyOptionImpl* option = tidyOptionToImpl( topt );
287     if ( option )
288         return option->id;
289     return N_TIDY_OPTIONS;
290 }
291 ctmbstr TIDY_CALL       tidyOptGetName( TidyOption topt )
292 {
293     const TidyOptionImpl* option = tidyOptionToImpl( topt );
294     if ( option )
295         return option->name;
296     return NULL;
297 }
298 TidyOptionType TIDY_CALL tidyOptGetType( TidyOption topt )
299 {
300     const TidyOptionImpl* option = tidyOptionToImpl( topt );
301     if ( option )
302         return option->type;
303     return (TidyOptionType) -1;
304 }
305 TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption topt )
306 {
307     const TidyOptionImpl* option = tidyOptionToImpl( topt );
308     if ( option )
309         return option->category;
310     return (TidyConfigCategory) -1;
311 }
312 ctmbstr TIDY_CALL       tidyOptGetDefault( TidyOption topt )
313 {
314     const TidyOptionImpl* option = tidyOptionToImpl( topt );
315     if ( option && option->type == TidyString )
316         return (ctmbstr) option->dflt;
317     return NULL;
318 }
319 ulong TIDY_CALL          tidyOptGetDefaultInt( TidyOption topt )
320 {
321     const TidyOptionImpl* option = tidyOptionToImpl( topt );
322     if ( option && option->type != TidyString )
323         return option->dflt;
324     return ~0U;
325 }
326 Bool TIDY_CALL          tidyOptGetDefaultBool( TidyOption topt )
327 {
328     const TidyOptionImpl* option = tidyOptionToImpl( topt );
329     if ( option && option->type != TidyString )
330         return ( option->dflt ? yes : no );
331     return no;
332 }
333 Bool TIDY_CALL          tidyOptIsReadOnly( TidyOption topt )
334 {
335     const TidyOptionImpl* option = tidyOptionToImpl( topt );
336     if ( option  )
337         return ( option->parser == NULL );
338     return yes;
339 }
340 
341 
342 TidyIterator TIDY_CALL  tidyOptGetPickList( TidyOption topt )
343 {
344     const TidyOptionImpl* option = tidyOptionToImpl( topt );
345     if ( option )
346       return getOptionPickList( option );
347     return (TidyIterator) -1;
348 }
349 ctmbstr TIDY_CALL       tidyOptGetNextPick( TidyOption topt, TidyIterator* pos )
350 {
351     const TidyOptionImpl* option = tidyOptionToImpl( topt );
352     if ( option )
353         return getNextOptionPick( option, pos );
354     return NULL;
355 }
356 
357 
358 ctmbstr TIDY_CALL       tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId )
359 {
360   TidyDocImpl* impl = tidyDocToImpl( tdoc );
361   ctmbstr optval = NULL;
362   if ( impl )
363     optval = cfgStr( impl, optId );
364   return optval;
365 }
366 Bool TIDY_CALL        tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val )
367 {
368   TidyDocImpl* impl = tidyDocToImpl( tdoc );
369   if ( impl )
370     return ParseConfigValue( impl, optId, val );
371   return no;
372 }
373 Bool TIDY_CALL        tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val )
374 {
375   TidyDocImpl* impl = tidyDocToImpl( tdoc );
376   if ( impl )
377     return ParseConfigOption( impl, optnam, val );
378   return no;
379 }
380 
381 ulong TIDY_CALL        tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId )
382 {
383     TidyDocImpl* impl = tidyDocToImpl( tdoc );
384     ulong opti = 0;
385     if ( impl )
386         opti = cfg( impl, optId );
387     return opti;
388 }
389 
390 Bool TIDY_CALL        tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val )
391 {
392     TidyDocImpl* impl = tidyDocToImpl( tdoc );
393     if ( impl )
394         return SetOptionInt( impl, optId, val );
395     return no;
396 }
397 
398 Bool TIDY_CALL         tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId )
399 {
400     TidyDocImpl* impl = tidyDocToImpl( tdoc );
401     Bool optb = no;
402     if ( impl )
403     {
404         const TidyOptionImpl* option = getOption( optId );
405         if ( option )
406         {
407             optb = cfgBool( impl, optId );
408         }
409     }
410     return optb;
411 }
412 
413 Bool TIDY_CALL        tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val )
414 {
415     TidyDocImpl* impl = tidyDocToImpl( tdoc );
416     if ( impl )
417         return SetOptionBool( impl, optId, val );
418     return no;
419 }
420 
421 ctmbstr TIDY_CALL       tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId )
422 {
423   uint enc = tidyOptGetInt( tdoc, optId );
424   return CharEncodingOptName( enc );
425 }
426 
427 ctmbstr TIDY_CALL       tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId )
428 {
429     const TidyOptionImpl* option = getOption( optId );
430     if ( option && option->pickList )
431     {
432         uint ix, pick = tidyOptGetInt( tdoc, optId );
433         const ctmbstr* pL = option->pickList;
434         for ( ix=0; *pL && ix < pick; ++ix )
435             ++pL;
436         if ( *pL )
437             return *pL;
438     }
439     return NULL;
440 }
441 
442 
443 TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc )
444 {
445     TidyDocImpl* impl = tidyDocToImpl( tdoc );
446     TidyIterator declIter = 0;
447     if ( impl )
448         declIter = GetDeclaredTagList( impl );
449     return declIter;
450 }
451 
452 ctmbstr TIDY_CALL       tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId,
453                                      TidyIterator* iter )
454 {
455     TidyDocImpl* impl = tidyDocToImpl( tdoc );
456     ctmbstr tagnam = NULL;
457     if ( impl )
458     {
459         UserTagType tagtyp = tagtype_null;
460         if ( optId == TidyInlineTags )
461             tagtyp = tagtype_inline;
462         else if ( optId == TidyBlockTags )
463             tagtyp = tagtype_block;
464         else if ( optId == TidyEmptyTags )
465             tagtyp = tagtype_empty;
466         else if ( optId == TidyPreTags )
467             tagtyp = tagtype_pre;
468         if ( tagtyp != tagtype_null )
469             tagnam = GetNextDeclaredTag( impl, tagtyp, iter );
470     }
471     return tagnam;
472 }
473 
474 ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
475 {
476     const TidyOptionId optId = tidyOptGetId( opt );
477     const TidyOptionDoc* docDesc = tidyOptGetDocDesc( optId );
478     return docDesc ? docDesc->doc : NULL;
479 }
480 
481 TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
482 {
483     const TidyOptionId optId = tidyOptGetId( opt );
484     const TidyOptionDoc* docDesc = tidyOptGetDocDesc( optId );
485     if (docDesc && docDesc->links)
486         return (TidyIterator)docDesc->links;
487     return (TidyIterator)NULL;
488 }
489 
490 TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, TidyIterator* pos )
491 {
492     const TidyOptionId* curr = (TidyOptionId *)*pos;
493     TidyOption opt;
494 
495     if (*curr == TidyUnknownOption)
496     {
497         *pos = (TidyIterator)NULL;
498         return (TidyOption)0;
499     }
500     opt = tidyGetOption(tdoc, *curr);
501     curr++;
502     *pos = (*curr == TidyUnknownOption ) ?
503         (TidyIterator)NULL:(TidyIterator)curr;
504     return opt;
505 }
506 
507 int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil )
508 {
509     TidyDocImpl* impl = tidyDocToImpl( tdoc );
510     if ( impl )
511         return SaveConfigFile( impl, cfgfil );
512     return -EINVAL;
513 }
514 
515 int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink )
516 {
517     TidyDocImpl* impl = tidyDocToImpl( tdoc );
518     if ( impl )
519         return SaveConfigSink( impl, sink );
520     return -EINVAL;
521 }
522 
523 Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc )
524 {
525     TidyDocImpl* impl = tidyDocToImpl( tdoc );
526     if ( impl )
527     {
528         TakeConfigSnapshot( impl );
529         return yes;
530     }
531     return no;
532 }
533 Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc )
534 {
535     TidyDocImpl* impl = tidyDocToImpl( tdoc );
536     if ( impl )
537     {
538         ResetConfigToSnapshot( impl );
539         return yes;
540     }
541     return no;
542 }
543 Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc )
544 {
545     TidyDocImpl* impl = tidyDocToImpl( tdoc );
546     if ( impl )
547     {
548         ResetConfigToDefault( impl );
549         return yes;
550     }
551     return no;
552 }
553 
554 Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId optId )
555 {
556     TidyDocImpl* impl = tidyDocToImpl( tdoc );
557     if ( impl )
558         return ResetOptionToDefault( impl, optId );
559     return no;
560 }
561 
562 Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc )
563 {
564     TidyDocImpl* impl = tidyDocToImpl( tdoc );
565     if ( impl )
566         return ConfigDiffThanDefault( impl );
567     return no;
568 }
569 Bool TIDY_CALL          tidyOptDiffThanSnapshot( TidyDoc tdoc )
570 {
571     TidyDocImpl* impl = tidyDocToImpl( tdoc );
572     if ( impl )
573         return ConfigDiffThanSnapshot( impl );
574     return no;
575 }
576 
577 Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from )
578 {
579     TidyDocImpl* docTo = tidyDocToImpl( to );
580     TidyDocImpl* docFrom = tidyDocToImpl( from );
581     if ( docTo && docFrom )
582     {
583         CopyConfig( docTo, docFrom );
584         return yes;
585     }
586     return no;
587 }
588 
589 
590 /* I/O and Message handling interface
591 **
592 ** By default, Tidy will define, create and use 
593 ** tdocances of input and output handlers for 
594 ** standard C buffered I/O (i.e. FILE* stdin,
595 ** FILE* stdout and FILE* stderr for content
596 ** input, content output and diagnostic output,
597 ** respectively.  A FILE* cfgFile input handler
598 ** will be used for config files.  Command line
599 ** options will just be set directly.
600 */
601 
602 /* Use TidyReportFilter to filter messages by diagnostic level:
603 ** info, warning, etc.  Just set diagnostic output 
604 ** handler to redirect all diagnostics output.  Return true
605 ** to proceed with output, false to cancel.
606 */
607 Bool TIDY_CALL        tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt )
608 {
609   TidyDocImpl* impl = tidyDocToImpl( tdoc );
610   if ( impl )
611   {
612     impl->mssgFilt = filt;
613     return yes;
614   }
615   return no;
616 }
617 
618 #if 0   /* Not yet */
619 int         tidySetContentOutputSink( TidyDoc tdoc, TidyOutputSink* outp )
620 {
621   TidyDocImpl* impl = tidyDocToImpl( tdoc );
622   if ( impl )
623   {
624     impl->docOut = outp;
625     return 0;
626   }
627   return -EINVAL;
628 }
629 int         tidySetDiagnosticOutputSink( TidyDoc tdoc, TidyOutputSink* outp )
630 {
631   TidyDocImpl* impl = tidyDocToImpl( tdoc );
632   if ( impl )
633   {
634     impl->msgOut = outp;
635     return 0;
636   }
637   return -EINVAL;
638 }
639 
640 
641 /* Library helpers
642 */
643 cmbstr       tidyLookupMessage( TidyDoc tdoc, int errorNo )
644 {
645   TidyDocImpl* impl = tidyDocToImpl( tdoc );
646   cmbstr mssg = NULL;
647   if ( impl )
648     mssg = tidyMessage_Lookup( impl->messages, errorNo );
649   return mssg;
650 }
651 #endif
652 
653 
654 FILE* TIDY_CALL   tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam )
655 {
656     TidyDocImpl* impl = tidyDocToImpl( tdoc );
657     if ( impl )
658     {
659         FILE* errout = fopen( errfilnam, "wb" );
660         if ( errout )
661         {
662             uint outenc = cfg( impl, TidyOutCharEncoding );
663             uint nl = cfg( impl, TidyNewline );
664             ReleaseStreamOut( impl->errout );
665             impl->errout = FileOutput( errout, outenc, nl );
666             return errout;
667         }
668         else /* Emit message to current error sink */
669             FileError( impl, errfilnam, TidyError );
670     }
671     return NULL;
672 }
673 
674 int TIDY_CALL    tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf )
675 {
676     TidyDocImpl* impl = tidyDocToImpl( tdoc );
677     if ( impl )
678     {
679         uint outenc = cfg( impl, TidyOutCharEncoding );
680         uint nl = cfg( impl, TidyNewline );
681         ReleaseStreamOut( impl->errout );
682         impl->errout = BufferOutput( errbuf, outenc, nl );
683         return ( impl->errout ? 0 : -ENOMEM );
684     }
685     return -EINVAL;
686 }
687 
688 int TIDY_CALL    tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink )
689 {
690     TidyDocImpl* impl = tidyDocToImpl( tdoc );
691     if ( impl )
692     {
693         uint outenc = cfg( impl, TidyOutCharEncoding );
694         uint nl = cfg( impl, TidyNewline );
695         ReleaseStreamOut( impl->errout );
696         impl->errout = UserOutput( sink, outenc, nl );
697         return ( impl->errout ? 0 : -ENOMEM );
698     }
699     return -EINVAL;
700 }
701 
702 
703 /* Document info */
704 int TIDY_CALL        tidyStatus( TidyDoc tdoc )
705 {
706     TidyDocImpl* impl = tidyDocToImpl( tdoc );
707     int tidyStat = -EINVAL;
708     if ( impl )
709         tidyStat = tidyDocStatus( impl );
710     return tidyStat;
711 }
712 int TIDY_CALL        tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) )
713 {
714 /*    TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
715     return 0;
716 }
717 Bool TIDY_CALL        tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) )
718 {
719 /*    TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
720     return no;
721 }
722 Bool TIDY_CALL        tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) )
723 {
724 /*    TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
725     return no;
726 }
727 
728 uint TIDY_CALL       tidyErrorCount( TidyDoc tdoc )
729 {
730     TidyDocImpl* impl = tidyDocToImpl( tdoc );
731     uint count = 0xFFFFFFFF;
732     if ( impl )
733         count = impl->errors;
734     return count;
735 }
736 uint TIDY_CALL       tidyWarningCount( TidyDoc tdoc )
737 {
738     TidyDocImpl* impl = tidyDocToImpl( tdoc );
739     uint count = 0xFFFFFFFF;
740     if ( impl )
741         count = impl->warnings;
742     return count;
743 }
744 uint TIDY_CALL       tidyAccessWarningCount( TidyDoc tdoc )
745 {
746     TidyDocImpl* impl = tidyDocToImpl( tdoc );
747     uint count = 0xFFFFFFFF;
748     if ( impl )
749         count = impl->accessErrors;
750     return count;
751 }
752 uint TIDY_CALL       tidyConfigErrorCount( TidyDoc tdoc )
753 {
754     TidyDocImpl* impl = tidyDocToImpl( tdoc );
755     uint count = 0xFFFFFFFF;
756     if ( impl )
757         count = impl->optionErrors;
758     return count;
759 }
760 
761 
762 /* Error reporting functions 
763 */
764 void TIDY_CALL         tidyErrorSummary( TidyDoc tdoc )
765 {
766     TidyDocImpl* impl = tidyDocToImpl( tdoc );
767     if ( impl )
768         ErrorSummary( impl );
769 }
770 void TIDY_CALL         tidyGeneralInfo( TidyDoc tdoc )
771 {
772     TidyDocImpl* impl = tidyDocToImpl( tdoc );
773     if ( impl )
774         GeneralInfo( impl );
775 }
776 
777 
778 /* I/O Functions
779 **
780 ** Initial version supports only whole-file operations.
781 ** Do not expose Tidy StreamIn or Out data structures - yet.
782 */
783 
784 /* Parse/load Functions
785 **
786 ** HTML/XHTML version determined from input.
787 */
788 int TIDY_CALL  tidyParseFile( TidyDoc tdoc, ctmbstr filnam )
789 {
790     TidyDocImpl* doc = tidyDocToImpl( tdoc );
791     return tidyDocParseFile( doc, filnam );
792 }
793 int TIDY_CALL  tidyParseStdin( TidyDoc tdoc )
794 {
795     TidyDocImpl* doc = tidyDocToImpl( tdoc );
796     return tidyDocParseStdin( doc );
797 }
798 int TIDY_CALL  tidyParseString( TidyDoc tdoc, ctmbstr content )
799 {
800     TidyDocImpl* doc = tidyDocToImpl( tdoc );
801     return tidyDocParseString( doc, content );
802 }
803 int TIDY_CALL  tidyParseBuffer( TidyDoc tdoc, TidyBuffer* inbuf )
804 {
805     TidyDocImpl* doc = tidyDocToImpl( tdoc );
806     return tidyDocParseBuffer( doc, inbuf );
807 }
808 int TIDY_CALL  tidyParseSource( TidyDoc tdoc, TidyInputSource* source )
809 {
810     TidyDocImpl* doc = tidyDocToImpl( tdoc );
811     return tidyDocParseSource( doc, source );
812 }
813 
814 
815 int   tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam )
816 {
817     int status = -ENOENT;
818     FILE* fin = fopen( filnam, "rb" );
819 
820 #if PRESERVE_FILE_TIMES
821     struct stat sbuf = {0};
822     /* get last modified time */
823     ClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
824     if ( fin && cfgBool(doc,TidyKeepFileTimes) &&
825          fstat(fileno(fin), &sbuf) != -1 )
826     {
827           doc->filetimes.actime  = sbuf.st_atime;
828           doc->filetimes.modtime = sbuf.st_mtime;
829     }
830 #endif
831 
832     if ( fin )
833     {
834         StreamIn* in = FileInput( doc, fin, cfg( doc, TidyInCharEncoding ));
835         status = tidyDocParseStream( doc, in );
836         freeFileSource(&in->source, yes);
837         freeStreamIn(in);
838     }
839     else /* Error message! */
840         FileError( doc, filnam, TidyError );
841     return status;
842 }
843 
844 int   tidyDocParseStdin( TidyDocImpl* doc )
845 {
846     StreamIn* in = FileInput( doc, stdin, cfg( doc, TidyInCharEncoding ));
847     int status = tidyDocParseStream( doc, in );
848     freeStreamIn(in);
849     return status;
850 }
851 
852 int   tidyDocParseBuffer( TidyDocImpl* doc, TidyBuffer* inbuf )
853 {
854     int status = -EINVAL;
855     if ( inbuf )
856     {
857         StreamIn* in = BufferInput( doc, inbuf, cfg( doc, TidyInCharEncoding ));
858         status = tidyDocParseStream( doc, in );
859         freeStreamIn(in);
860     }
861     return status;
862 }
863 
864 int   tidyDocParseString( TidyDocImpl* doc, ctmbstr content )
865 {
866     int status = -EINVAL;
867     TidyBuffer inbuf = {0};
868     StreamIn* in = NULL;
869 
870     if ( content )
871     {
872         tidyBufAttach( &inbuf, (byte*)content, tmbstrlen(content)+1 );
873         in = BufferInput( doc, &inbuf, cfg( doc, TidyInCharEncoding ));
874         status = tidyDocParseStream( doc, in );
875         tidyBufDetach( &inbuf );
876         freeStreamIn(in);
877     }
878     return status;
879 }
880 
881 int   tidyDocParseSource( TidyDocImpl* doc, TidyInputSource* source )
882 {
883     StreamIn* in = UserInput( doc, source, cfg( doc, TidyInCharEncoding ));
884     int status = tidyDocParseStream( doc, in );
885     freeStreamIn(in);
886     return status;
887 }
888 
889 
890 /* Print/save Functions
891 **
892 */
893 int TIDY_CALL        tidySaveFile( TidyDoc tdoc, ctmbstr filnam )
894 {
895     TidyDocImpl* doc = tidyDocToImpl( tdoc );
896     return tidyDocSaveFile( doc, filnam );
897 }
898 int TIDY_CALL        tidySaveStdout( TidyDoc tdoc )
899 {
900     TidyDocImpl* doc = tidyDocToImpl( tdoc );
901     return tidyDocSaveStdout( doc );
902 }
903 int TIDY_CALL        tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen )
904 {
905     TidyDocImpl* doc = tidyDocToImpl( tdoc );
906     return tidyDocSaveString( doc, buffer, buflen );
907 }
908 int TIDY_CALL        tidySaveBuffer( TidyDoc tdoc, TidyBuffer* outbuf )
909 {
910     TidyDocImpl* doc = tidyDocToImpl( tdoc );
911     return tidyDocSaveBuffer( doc, outbuf );
912 }
913 int TIDY_CALL        tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink )
914 {
915     TidyDocImpl* doc = tidyDocToImpl( tdoc );
916     return tidyDocSaveSink( doc, sink );
917 }
918 
919 int         tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
920 {
921     int status = -ENOENT;
922     FILE* fout = NULL;
923 
924     /* Don't zap input file if no output */
925     if ( doc->errors > 0 &&
926          cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) )
927         status = tidyDocStatus( doc );
928     else 
929         fout = fopen( filnam, "wb" );
930 
931     if ( fout )
932     {
933         uint outenc = cfg( doc, TidyOutCharEncoding );
934         uint nl = cfg( doc, TidyNewline );
935         StreamOut* out = FileOutput( fout, outenc, nl );
936 
937         status = tidyDocSaveStream( doc, out );
938 
939         fclose( fout );
940         MemFree( out );
941 
942 #if PRESERVE_FILE_TIMES
943         if ( doc->filetimes.actime )
944         {
945             /* set file last accessed/modified times to original values */
946             utime( filnam, &doc->filetimes );
947             ClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
948         }
949 #endif /* PRESERVFILETIMES */
950     }
951     if ( status < 0 ) /* Error message! */
952         FileError( doc, filnam, TidyError );
953     return status;
954 }
955 
956 
957 
958 /* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3.
959 ** The code has been left in in case it works w/ other compilers
960 ** or operating systems.  If stdout is in Text mode, be aware that
961 ** it will garble UTF16 documents.  In text mode, when it encounters
962 ** a single byte of value 10 (0xA), it will insert a single byte 
963 ** value 13 (0xD) just before it.  This has the effect of garbling
964 ** the entire document.
965 */
966 
967 #if !defined(NO_SETMODE_SUPPORT)
968 
969 #if defined(_WIN32) || defined(OS2_OS)
970 #include <fcntl.h>
971 #include <io.h>
972 #endif
973 
974 #endif
975 
976 int         tidyDocSaveStdout( TidyDocImpl* doc )
977 {
978 #if !defined(NO_SETMODE_SUPPORT)
979 
980 #if defined(_WIN32) || defined(OS2_OS)
981     int oldstdoutmode = -1, oldstderrmode = -1;
982 #endif
983 
984 #endif
985     int status = 0;
986     uint outenc = cfg( doc, TidyOutCharEncoding );
987     uint nl = cfg( doc, TidyNewline );
988     StreamOut* out = FileOutput( stdout, outenc, nl );
989 
990 #if !defined(NO_SETMODE_SUPPORT)
991 
992 #if defined(_WIN32) || defined(OS2_OS)
993     oldstdoutmode = setmode( fileno(stdout), _O_BINARY );
994     oldstderrmode = setmode( fileno(stderr), _O_BINARY );
995 #endif
996 
997 #endif
998 
999     if ( 0 == status )
1000       status = tidyDocSaveStream( doc, out );
1001 
1002     fflush(stdout);
1003     fflush(stderr);
1004 
1005 #if !defined(NO_SETMODE_SUPPORT)
1006 
1007 #if defined(_WIN32) || defined(OS2_OS)
1008     if ( oldstdoutmode != -1 )
1009         oldstdoutmode = setmode( fileno(stdout), oldstdoutmode );
1010     if ( oldstderrmode != -1 )
1011         oldstderrmode = setmode( fileno(stderr), oldstderrmode );
1012 #endif
1013 
1014 #endif
1015 
1016     MemFree( out );
1017     return status;
1018 }
1019 
1020 int         tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen )
1021 {
1022     uint outenc = cfg( doc, TidyOutCharEncoding );
1023     uint nl = cfg( doc, TidyNewline );
1024     TidyBuffer outbuf = {0};
1025 
1026     StreamOut* out = BufferOutput( &outbuf, outenc, nl );
1027     int status = tidyDocSaveStream( doc, out );
1028 
1029     if ( outbuf.size > *buflen )
1030         status = -ENOMEM;
1031     else
1032         memcpy( buffer, outbuf.bp, outbuf.size );
1033 
1034     *buflen = outbuf.size;
1035     tidyBufFree( &outbuf );
1036     MemFree( out );
1037     return status;
1038 }
1039 
1040 int         tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
1041 {
1042     int status = -EINVAL;
1043     if ( outbuf )
1044     {
1045         uint outenc = cfg( doc, TidyOutCharEncoding );
1046         uint nl = cfg( doc, TidyNewline );
1047         StreamOut* out = BufferOutput( outbuf, outenc, nl );
1048     
1049         status = tidyDocSaveStream( doc, out );
1050         MemFree( out );
1051     }
1052     return status;
1053 }
1054 
1055 int         tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink )
1056 {
1057     uint outenc = cfg( doc, TidyOutCharEncoding );
1058     uint nl = cfg( doc, TidyNewline );
1059     StreamOut* out = UserOutput( sink, outenc, nl );
1060     int status = tidyDocSaveStream( doc, out );
1061     MemFree( out );
1062     return status;
1063 }
1064 
1065 int         tidyDocStatus( TidyDocImpl* doc )
1066 {
1067     if ( doc->errors > 0 )
1068         return 2;
1069     if ( doc->warnings > 0 || doc->accessErrors > 0 )
1070         return 1;
1071     return 0;
1072 }
1073 
1074 
1075 
1076 int TIDY_CALL        tidyCleanAndRepair( TidyDoc tdoc )
1077 {
1078     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1079     if ( impl )
1080       return tidyDocCleanAndRepair( impl );
1081     return -EINVAL;
1082 }
1083 
1084 int TIDY_CALL        tidyRunDiagnostics( TidyDoc tdoc )
1085 {
1086     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1087     if ( impl )
1088       return tidyDocRunDiagnostics( impl );
1089     return -EINVAL;
1090 }
1091 
1092 
1093 /* Workhorse functions.
1094 **
1095 ** Parse requires input source, all input config items 
1096 ** and diagnostic sink to have all been set before calling.
1097 **
1098 ** Emit likewise requires that document sink and all
1099 ** pretty printing options have been set.
1100 */
1101 static ctmbstr integrity = "\nPanic - tree has lost its integrity\n";
1102 
1103 int         tidyDocParseStream( TidyDocImpl* doc, StreamIn* in )
1104 {
1105     Bool xmlIn = cfgBool( doc, TidyXmlTags );
1106     int bomEnc;
1107 
1108     assert( doc != NULL && in != NULL );
1109     assert( doc->docIn == NULL );
1110     doc->docIn = in;
1111 
1112     TakeConfigSnapshot( doc );    /* Save config state */
1113     FreeLexer( doc );
1114     FreeAnchors( doc );
1115 
1116     FreeNode(doc, &doc->root);
1117     ClearMemory(&doc->root, sizeof(Node));
1118 
1119     if (doc->givenDoctype)
1120         MemFree(doc->givenDoctype);
1121 
1122     doc->givenDoctype = NULL;
1123 
1124     doc->lexer = NewLexer( doc );
1125     /* doc->lexer->root = &doc->root; */
1126     doc->root.line = doc->lexer->lines;
1127     doc->root.column = doc->lexer->columns;
1128     doc->inputHadBOM = no;
1129 
1130     bomEnc = ReadBOMEncoding(in);
1131 
1132     if (bomEnc != -1)
1133     {
1134         in->encoding = bomEnc;
1135         SetOptionInt(doc, TidyInCharEncoding, bomEnc);
1136     }
1137 
1138 #ifdef TIDY_WIN32_MLANG_SUPPORT
1139     if (in->encoding > WIN32MLANG)
1140         Win32MLangInitInputTranscoder(in, in->encoding);
1141 #endif /* TIDY_WIN32_MLANG_SUPPORT */
1142 
1143     /* Tidy doesn't alter the doctype for generic XML docs */
1144     if ( xmlIn )
1145     {
1146         ParseXMLDocument( doc );
1147         if ( !CheckNodeIntegrity( &doc->root ) )
1148             FatalError( integrity );
1149     }
1150     else
1151     {
1152         doc->warnings = 0;
1153         ParseDocument( doc );
1154         if ( !CheckNodeIntegrity( &doc->root ) )
1155             FatalError( integrity );
1156     }
1157 
1158 #ifdef TIDY_WIN32_MLANG_SUPPORT
1159     Win32MLangUninitInputTranscoder(in);
1160 #endif /* TIDY_WIN32_MLANG_SUPPORT */
1161 
1162     doc->docIn = NULL;
1163     return tidyDocStatus( doc );
1164 }
1165 
1166 int         tidyDocRunDiagnostics( TidyDocImpl* doc )
1167 {
1168     uint acclvl = cfg( doc, TidyAccessibilityCheckLevel );
1169     Bool quiet = cfgBool( doc, TidyQuiet );
1170     Bool force = cfgBool( doc, TidyForceOutput );
1171 
1172     if ( !quiet )
1173     {
1174 
1175         ReportMarkupVersion( doc );
1176         ReportNumWarnings( doc );
1177     }
1178     
1179     if ( doc->errors > 0 && !force )
1180         NeedsAuthorIntervention( doc );
1181 
1182 #if SUPPORT_ACCESSIBILITY_CHECKS
1183      if ( acclvl > 0 )
1184          AccessibilityChecks( doc );
1185 #endif
1186 
1187      return tidyDocStatus( doc );
1188 }
1189 
1190 int         tidyDocCleanAndRepair( TidyDocImpl* doc )
1191 {
1192     Bool word2K   = cfgBool( doc, TidyWord2000 );
1193     Bool logical  = cfgBool( doc, TidyLogicalEmphasis );
1194     Bool clean    = cfgBool( doc, TidyMakeClean );
1195     Bool dropFont = cfgBool( doc, TidyDropFontTags );
1196     Bool htmlOut  = cfgBool( doc, TidyHtmlOut );
1197     Bool xmlOut   = cfgBool( doc, TidyXmlOut );
1198     Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1199     Bool xmlDecl  = cfgBool( doc, TidyXmlDecl );
1200     Bool tidyMark = cfgBool( doc, TidyMark );
1201     Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
1202     Node* node;
1203 
1204     if (tidyXmlTags)
1205        return tidyDocStatus( doc );
1206 
1207     /* simplifies <b><b> ... </b> ...</b> etc. */
1208     NestedEmphasis( doc, &doc->root );
1209 
1210     /* cleans up <dir>indented text</dir> etc. */
1211     List2BQ( doc, &doc->root );
1212     BQ2Div( doc, &doc->root );
1213 
1214     /* replaces i by em and b by strong */
1215     if ( logical )
1216         EmFromI( doc, &doc->root );
1217 
1218     if ( word2K && IsWord2000(doc) )
1219     {
1220         /* prune Word2000's <![if ...]> ... <![endif]> */
1221         DropSections( doc, &doc->root );
1222 
1223         /* drop style & class attributes and empty p, span elements */
1224         CleanWord2000( doc, &doc->root );
1225         DropEmptyElements(doc, &doc->root);
1226     }
1227 
1228     /* replaces presentational markup by style rules */
1229     if ( clean || dropFont )
1230         CleanDocument( doc );
1231 
1232     /*  Move terminating <br /> tags from out of paragraphs  */
1233     /*!  Do we want to do this for all block-level elements?  */
1234 
1235     /* This is disabled due to http://tidy.sf.net/bug/681116 */
1236 #if 0
1237     FixBrakes( doc, FindBody( doc ));
1238 #endif
1239 
1240     /*  Reconcile http-equiv meta element with output encoding  */
1241     if (cfg( doc, TidyOutCharEncoding) != RAW
1242 #ifndef NO_NATIVE_ISO2022_SUPPORT
1243         && cfg( doc, TidyOutCharEncoding) != ISO2022
1244 #endif
1245         )
1246         VerifyHTTPEquiv( doc, FindHEAD( doc ));
1247 
1248     if ( !CheckNodeIntegrity( &doc->root ) )
1249         FatalError( integrity );
1250 
1251     /* remember given doctype for reporting */
1252     node = FindDocType(doc);
1253     if (node)
1254     {
1255         AttVal* fpi = GetAttrByName(node, "PUBLIC");
1256         if (AttrHasValue(fpi))
1257             doc->givenDoctype = tmbstrdup(fpi->value);
1258     }
1259 
1260     if ( doc->root.content )
1261     {
1262         /* If we had XHTML input but want HTML output */
1263         if ( htmlOut && doc->lexer->isvoyager )
1264         {
1265             Node* node = FindDocType(doc);
1266             /* Remove reference, but do not free */
1267             if (node)
1268               RemoveNode(node);
1269         }
1270 
1271         if (xhtmlOut && !htmlOut)
1272         {
1273             SetXHTMLDocType(doc);
1274             FixAnchors(doc, &doc->root, yes, yes);
1275             FixXhtmlNamespace(doc, yes);
1276             FixLanguageInformation(doc, &doc->root, yes, yes);
1277         }
1278         else
1279         {
1280             FixDocType(doc);
1281             FixAnchors(doc, &doc->root, yes, yes);
1282             FixXhtmlNamespace(doc, no);
1283             FixLanguageInformation(doc, &doc->root, no, yes);
1284         }
1285 
1286         if (tidyMark )
1287             AddGenerator(doc);
1288     }
1289 
1290     /* ensure presence of initial <?xml version="1.0"?> */
1291     if ( xmlOut && xmlDecl )
1292         FixXmlDecl( doc );
1293 
1294     return tidyDocStatus( doc );
1295 }
1296 
1297 int         tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )