~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

TidyLib
tidy/src/tidylib.c

Version: ~ [ 1.0 ] ~

** Warning: Cannot open xref database.

1 /* tidylib.c -- internal library definitions 2 3 (c) 1998-2005 (W3C) MIT, ERCIM, Keio University 4 See tidy.h for the copyright notice. 5 6 CVS Info : 7 8 $Author: arnaud02 $ 9 $Date: 2005/08/03 18:07:01 $ 10 $Revision: 1.59 $ 11 12 Defines HTML Tidy API implemented by tidy library. 13 14 Very rough initial cut for discussion purposes. 15 16 Public interface is const-correct and doesn't explicitly depend 17 on any globals. Thus, thread-safety may be introduced w/out 18 changing the interface. 19 20 Looking ahead to a C++ wrapper, C functions always pass 21 this-equivalent as 1st arg. 22 23 Created 2001-05-20 by Charles Reitzel 24 25 */ 26 27 #include <errno.h> 28 29 #include "tidy-int.h" 30 #include "parser.h" 31 #include "clean.h" 32 #include "config.h" 33 #include "message.h" 34 #include "pprint.h" 35 #include "entities.h" 36 #include "tmbstr.h" 37 #include "utf8.h" 38 39 #ifdef TIDY_WIN32_MLANG_SUPPORT 40 #include "win32tc.h" 41 #endif 42 43 #ifdef NEVER 44 TidyDocImpl* tidyDocToImpl( TidyDoc tdoc ) 45 { 46 return (TidyDocImpl*) tdoc; 47 } 48 TidyDoc tidyImplToDoc( TidyDocImpl* impl ) 49 { 50 return (TidyDoc) impl; 51 } 52 53 Node* tidyNodeToImpl( TidyNode tnod ) 54 { 55 return (Node*) tnod; 56 } 57 TidyNode tidyImplToNode( Node* node ) 58 { 59 return (TidyNode) node; 60 } 61 62 AttVal* tidyAttrToImpl( TidyAttr tattr ) 63 { 64 return (AttVal*) tattr; 65 } 66 TidyAttr tidyImplToAttr( AttVal* attval ) 67 { 68 return (TidyAttr) attval; 69 } 70 71 const TidyOptionImpl* tidyOptionToImpl( TidyOption topt ) 72 { 73 return (const TidyOptionImpl*) topt; 74 } 75 TidyOption tidyImplToOption( const TidyOptionImpl* option ) 76 { 77 return (TidyOption) option; 78 } 79 #endif 80 81 /* Tidy public interface 82 ** 83 ** Most functions return an integer: 84 ** 85 ** 0 -> SUCCESS 86 ** >0 -> WARNING 87 ** <0 -> ERROR 88 ** 89 */ 90 91 TidyDoc TIDY_CALL tidyCreate(void) 92 { 93 TidyDocImpl* impl = tidyDocCreate(); 94 return tidyImplToDoc( impl ); 95 } 96 97 void TIDY_CALL tidyRelease( TidyDoc tdoc ) 98 { 99 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 100 tidyDocRelease( impl ); 101 } 102 103 TidyDocImpl* tidyDocCreate(void) 104 { 105 TidyDocImpl* doc = (TidyDocImpl*)MemAlloc( sizeof(TidyDocImpl) ); 106 ClearMemory( doc, sizeof(*doc) ); 107 108 InitMap(); 109 InitTags( doc ); 110 InitAttrs( doc ); 111 InitConfig( doc ); 112 InitPrintBuf( doc ); 113 114 /* By default, wire tidy messages to standard error. 115 ** Document input will be set by parsing routines. 116 ** Document output will be set by pretty print routines. 117 ** Config input will be set by config parsing routines. 118 ** But we need to start off with a way to report errors. 119 */ 120 doc->errout = StdErrOutput(); 121 return doc; 122 } 123 124 void tidyDocRelease( TidyDocImpl* doc ) 125 { 126 /* doc in/out opened and closed by parse/print routines */ 127 if ( doc ) 128 { 129 assert( doc->docIn == NULL ); 130 assert( doc->docOut == NULL ); 131 132 ReleaseStreamOut( doc->errout ); 133 doc->errout = NULL; 134 135 FreePrintBuf( doc ); 136 FreeLexer( doc ); 137 FreeNode(doc, &doc->root); 138 ClearMemory(&doc->root, sizeof(Node)); 139 140 if (doc->givenDoctype) 141 MemFree(doc->givenDoctype); 142 143 FreeConfig( doc ); 144 FreeAttrTable( doc ); 145 FreeTags( doc ); 146 MemFree( doc ); 147 } 148 } 149 150 /* Let application store a chunk of data w/ each Tidy tdocance. 151 ** Useful for callbacks. 152 */ 153 void TIDY_CALL tidySetAppData( TidyDoc tdoc, ulong appData ) 154 { 155 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 156 if ( impl ) 157 impl->appData = appData; 158 } 159 ulong TIDY_CALL tidyGetAppData( TidyDoc tdoc ) 160 { 161 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 162 if ( impl ) 163 return impl->appData; 164 return 0; 165 } 166 167 ctmbstr TIDY_CALL tidyReleaseDate(void) 168 { 169 return ReleaseDate(); 170 } 171 172 173 /* Get/set configuration options 174 */ 175 Bool TIDY_CALL tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback ) 176 { 177 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 178 if ( impl ) 179 { 180 impl->pOptCallback = pOptCallback; 181 return yes; 182 } 183 return no; 184 } 185 186 187 int TIDY_CALL tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil ) 188 { 189 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 190 if ( impl ) 191 return ParseConfigFile( impl, cfgfil ); 192 return -EINVAL; 193 } 194 195 int TIDY_CALL tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr cfgfil, ctmbstr charenc ) 196 { 197 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 198 if ( impl ) 199 return ParseConfigFileEnc( impl, cfgfil, charenc ); 200 return -EINVAL; 201 } 202 203 int TIDY_CALL tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam ) 204 { 205 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 206 if ( impl ) 207 { 208 int enc = CharEncodingId( encnam ); 209 if ( enc >= 0 && AdjustCharEncoding(impl, enc) ) 210 return 0; 211 212 ReportBadArgument( impl, "char-encoding" ); 213 } 214 return -EINVAL; 215 } 216 217 int TIDY_CALL tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam ) 218 { 219 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 220 if ( impl ) 221 { 222 int enc = CharEncodingId( encnam ); 223 if ( enc >= 0 && SetOptionInt( impl, TidyInCharEncoding, enc ) ) 224 return 0; 225 226 ReportBadArgument( impl, "in-char-encoding" ); 227 } 228 return -EINVAL; 229 } 230 231 int TIDY_CALL tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam ) 232 { 233 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 234 if ( impl ) 235 { 236 int enc = CharEncodingId( encnam ); 237 if ( enc >= 0 && SetOptionInt( impl, TidyOutCharEncoding, enc ) ) 238 return 0; 239 240 ReportBadArgument( impl, "out-char-encoding" ); 241 } 242 return -EINVAL; 243 } 244 245 TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam ) 246 { 247 const TidyOptionImpl* option = lookupOption( optnam ); 248 if ( option ) 249 return option->id; 250 return N_TIDY_OPTIONS; /* Error */ 251 } 252 253 TidyIterator TIDY_CALL tidyGetOptionList( TidyDoc tdoc ) 254 { 255 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 256 if ( impl ) 257 return getOptionList( impl ); 258 return (TidyIterator) -1; 259 } 260 261 TidyOption TIDY_CALL tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos ) 262 { 263 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 264 const TidyOptionImpl* option = NULL; 265 if ( impl ) 266 option = getNextOption( impl, pos ); 267 else if ( pos ) 268 *pos = 0; 269 return tidyImplToOption( option ); 270 } 271 272 273 TidyOption TIDY_CALL tidyGetOption( TidyDoc ARG_UNUSED(tdoc), TidyOptionId optId ) 274 { 275 const TidyOptionImpl* option = getOption( optId ); 276 return tidyImplToOption( option ); 277 } 278 TidyOption TIDY_CALL tidyGetOptionByName( TidyDoc ARG_UNUSED(doc), ctmbstr optnam ) 279 { 280 const TidyOptionImpl* option = lookupOption( optnam ); 281 return tidyImplToOption( option ); 282 } 283 284 TidyOptionId TIDY_CALL tidyOptGetId( TidyOption topt ) 285 { 286 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 287 if ( option ) 288 return option->id; 289 return N_TIDY_OPTIONS; 290 } 291 ctmbstr TIDY_CALL tidyOptGetName( TidyOption topt ) 292 { 293 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 294 if ( option ) 295 return option->name; 296 return NULL; 297 } 298 TidyOptionType TIDY_CALL tidyOptGetType( TidyOption topt ) 299 { 300 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 301 if ( option ) 302 return option->type; 303 return (TidyOptionType) -1; 304 } 305 TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption topt ) 306 { 307 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 308 if ( option ) 309 return option->category; 310 return (TidyConfigCategory) -1; 311 } 312 ctmbstr TIDY_CALL tidyOptGetDefault( TidyOption topt ) 313 { 314 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 315 if ( option && option->type == TidyString ) 316 return (ctmbstr) option->dflt; 317 return NULL; 318 } 319 ulong TIDY_CALL tidyOptGetDefaultInt( TidyOption topt ) 320 { 321 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 322 if ( option && option->type != TidyString ) 323 return option->dflt; 324 return ~0U; 325 } 326 Bool TIDY_CALL tidyOptGetDefaultBool( TidyOption topt ) 327 { 328 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 329 if ( option && option->type != TidyString ) 330 return ( option->dflt ? yes : no ); 331 return no; 332 } 333 Bool TIDY_CALL tidyOptIsReadOnly( TidyOption topt ) 334 { 335 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 336 if ( option ) 337 return ( option->parser == NULL ); 338 return yes; 339 } 340 341 342 TidyIterator TIDY_CALL tidyOptGetPickList( TidyOption topt ) 343 { 344 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 345 if ( option ) 346 return getOptionPickList( option ); 347 return (TidyIterator) -1; 348 } 349 ctmbstr TIDY_CALL tidyOptGetNextPick( TidyOption topt, TidyIterator* pos ) 350 { 351 const TidyOptionImpl* option = tidyOptionToImpl( topt ); 352 if ( option ) 353 return getNextOptionPick( option, pos ); 354 return NULL; 355 } 356 357 358 ctmbstr TIDY_CALL tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId ) 359 { 360 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 361 ctmbstr optval = NULL; 362 if ( impl ) 363 optval = cfgStr( impl, optId ); 364 return optval; 365 } 366 Bool TIDY_CALL tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val ) 367 { 368 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 369 if ( impl ) 370 return ParseConfigValue( impl, optId, val ); 371 return no; 372 } 373 Bool TIDY_CALL tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val ) 374 { 375 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 376 if ( impl ) 377 return ParseConfigOption( impl, optnam, val ); 378 return no; 379 } 380 381 ulong TIDY_CALL tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId ) 382 { 383 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 384 ulong opti = 0; 385 if ( impl ) 386 opti = cfg( impl, optId ); 387 return opti; 388 } 389 390 Bool TIDY_CALL tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val ) 391 { 392 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 393 if ( impl ) 394 return SetOptionInt( impl, optId, val ); 395 return no; 396 } 397 398 Bool TIDY_CALL tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId ) 399 { 400 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 401 Bool optb = no; 402 if ( impl ) 403 { 404 const TidyOptionImpl* option = getOption( optId ); 405 if ( option ) 406 { 407 optb = cfgBool( impl, optId ); 408 } 409 } 410 return optb; 411 } 412 413 Bool TIDY_CALL tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val ) 414 { 415 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 416 if ( impl ) 417 return SetOptionBool( impl, optId, val ); 418 return no; 419 } 420 421 ctmbstr TIDY_CALL tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId ) 422 { 423 uint enc = tidyOptGetInt( tdoc, optId ); 424 return CharEncodingOptName( enc ); 425 } 426 427 ctmbstr TIDY_CALL tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId ) 428 { 429 const TidyOptionImpl* option = getOption( optId ); 430 if ( option && option->pickList ) 431 { 432 uint ix, pick = tidyOptGetInt( tdoc, optId ); 433 const ctmbstr* pL = option->pickList; 434 for ( ix=0; *pL && ix < pick; ++ix ) 435 ++pL; 436 if ( *pL ) 437 return *pL; 438 } 439 return NULL; 440 } 441 442 443 TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc ) 444 { 445 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 446 TidyIterator declIter = 0; 447 if ( impl ) 448 declIter = GetDeclaredTagList( impl ); 449 return declIter; 450 } 451 452 ctmbstr TIDY_CALL tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId, 453 TidyIterator* iter ) 454 { 455 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 456 ctmbstr tagnam = NULL; 457 if ( impl ) 458 { 459 UserTagType tagtyp = tagtype_null; 460 if ( optId == TidyInlineTags ) 461 tagtyp = tagtype_inline; 462 else if ( optId == TidyBlockTags ) 463 tagtyp = tagtype_block; 464 else if ( optId == TidyEmptyTags ) 465 tagtyp = tagtype_empty; 466 else if ( optId == TidyPreTags ) 467 tagtyp = tagtype_pre; 468 if ( tagtyp != tagtype_null ) 469 tagnam = GetNextDeclaredTag( impl, tagtyp, iter ); 470 } 471 return tagnam; 472 } 473 474 ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt ) 475 { 476 const TidyOptionId optId = tidyOptGetId( opt ); 477 const TidyOptionDoc* docDesc = tidyOptGetDocDesc( optId ); 478 return docDesc ? docDesc->doc : NULL; 479 } 480 481 TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt ) 482 { 483 const TidyOptionId optId = tidyOptGetId( opt ); 484 const TidyOptionDoc* docDesc = tidyOptGetDocDesc( optId ); 485 if (docDesc && docDesc->links) 486 return (TidyIterator)docDesc->links; 487 return (TidyIterator)NULL; 488 } 489 490 TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, TidyIterator* pos ) 491 { 492 const TidyOptionId* curr = (TidyOptionId *)*pos; 493 TidyOption opt; 494 495 if (*curr == TidyUnknownOption) 496 { 497 *pos = (TidyIterator)NULL; 498 return (TidyOption)0; 499 } 500 opt = tidyGetOption(tdoc, *curr); 501 curr++; 502 *pos = (*curr == TidyUnknownOption ) ? 503 (TidyIterator)NULL:(TidyIterator)curr; 504 return opt; 505 } 506 507 int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil ) 508 { 509 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 510 if ( impl ) 511 return SaveConfigFile( impl, cfgfil ); 512 return -EINVAL; 513 } 514 515 int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink ) 516 { 517 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 518 if ( impl ) 519 return SaveConfigSink( impl, sink ); 520 return -EINVAL; 521 } 522 523 Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc ) 524 { 525 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 526 if ( impl ) 527 { 528 TakeConfigSnapshot( impl ); 529 return yes; 530 } 531 return no; 532 } 533 Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc ) 534 { 535 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 536 if ( impl ) 537 { 538 ResetConfigToSnapshot( impl ); 539 return yes; 540 } 541 return no; 542 } 543 Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc ) 544 { 545 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 546 if ( impl ) 547 { 548 ResetConfigToDefault( impl ); 549 return yes; 550 } 551 return no; 552 } 553 554 Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId optId ) 555 { 556 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 557 if ( impl ) 558 return ResetOptionToDefault( impl, optId ); 559 return no; 560 } 561 562 Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc ) 563 { 564 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 565 if ( impl ) 566 return ConfigDiffThanDefault( impl ); 567 return no; 568 } 569 Bool TIDY_CALL tidyOptDiffThanSnapshot( TidyDoc tdoc ) 570 { 571 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 572 if ( impl ) 573 return ConfigDiffThanSnapshot( impl ); 574 return no; 575 } 576 577 Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from ) 578 { 579 TidyDocImpl* docTo = tidyDocToImpl( to ); 580 TidyDocImpl* docFrom = tidyDocToImpl( from ); 581 if ( docTo && docFrom ) 582 { 583 CopyConfig( docTo, docFrom ); 584 return yes; 585 } 586 return no; 587 } 588 589 590 /* I/O and Message handling interface 591 ** 592 ** By default, Tidy will define, create and use 593 ** tdocances of input and output handlers for 594 ** standard C buffered I/O (i.e. FILE* stdin, 595 ** FILE* stdout and FILE* stderr for content 596 ** input, content output and diagnostic output, 597 ** respectively. A FILE* cfgFile input handler 598 ** will be used for config files. Command line 599 ** options will just be set directly. 600 */ 601 602 /* Use TidyReportFilter to filter messages by diagnostic level: 603 ** info, warning, etc. Just set diagnostic output 604 ** handler to redirect all diagnostics output. Return true 605 ** to proceed with output, false to cancel. 606 */ 607 Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt ) 608 { 609 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 610 if ( impl ) 611 { 612 impl->mssgFilt = filt; 613 return yes; 614 } 615 return no; 616 } 617 618 #if 0 /* Not yet */ 619 int tidySetContentOutputSink( TidyDoc tdoc, TidyOutputSink* outp ) 620 { 621 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 622 if ( impl ) 623 { 624 impl->docOut = outp; 625 return 0; 626 } 627 return -EINVAL; 628 } 629 int tidySetDiagnosticOutputSink( TidyDoc tdoc, TidyOutputSink* outp ) 630 { 631 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 632 if ( impl ) 633 { 634 impl->msgOut = outp; 635 return 0; 636 } 637 return -EINVAL; 638 } 639 640 641 /* Library helpers 642 */ 643 cmbstr tidyLookupMessage( TidyDoc tdoc, int errorNo ) 644 { 645 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 646 cmbstr mssg = NULL; 647 if ( impl ) 648 mssg = tidyMessage_Lookup( impl->messages, errorNo ); 649 return mssg; 650 } 651 #endif 652 653 654 FILE* TIDY_CALL tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam ) 655 { 656 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 657 if ( impl ) 658 { 659 FILE* errout = fopen( errfilnam, "wb" ); 660 if ( errout ) 661 { 662 uint outenc = cfg( impl, TidyOutCharEncoding ); 663 uint nl = cfg( impl, TidyNewline ); 664 ReleaseStreamOut( impl->errout ); 665 impl->errout = FileOutput( errout, outenc, nl ); 666 return errout; 667 } 668 else /* Emit message to current error sink */ 669 FileError( impl, errfilnam, TidyError ); 670 } 671 return NULL; 672 } 673 674 int TIDY_CALL tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf ) 675 { 676 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 677 if ( impl ) 678 { 679 uint outenc = cfg( impl, TidyOutCharEncoding ); 680 uint nl = cfg( impl, TidyNewline ); 681 ReleaseStreamOut( impl->errout ); 682 impl->errout = BufferOutput( errbuf, outenc, nl ); 683 return ( impl->errout ? 0 : -ENOMEM ); 684 } 685 return -EINVAL; 686 } 687 688 int TIDY_CALL tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink ) 689 { 690 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 691 if ( impl ) 692 { 693 uint outenc = cfg( impl, TidyOutCharEncoding ); 694 uint nl = cfg( impl, TidyNewline ); 695 ReleaseStreamOut( impl->errout ); 696 impl->errout = UserOutput( sink, outenc, nl ); 697 return ( impl->errout ? 0 : -ENOMEM ); 698 } 699 return -EINVAL; 700 } 701 702 703 /* Document info */ 704 int TIDY_CALL tidyStatus( TidyDoc tdoc ) 705 { 706 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 707 int tidyStat = -EINVAL; 708 if ( impl ) 709 tidyStat = tidyDocStatus( impl ); 710 return tidyStat; 711 } 712 int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) ) 713 { 714 /* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ 715 return 0; 716 } 717 Bool TIDY_CALL tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) ) 718 { 719 /* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ 720 return no; 721 } 722 Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) ) 723 { 724 /* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */ 725 return no; 726 } 727 728 uint TIDY_CALL tidyErrorCount( TidyDoc tdoc ) 729 { 730 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 731 uint count = 0xFFFFFFFF; 732 if ( impl ) 733 count = impl->errors; 734 return count; 735 } 736 uint TIDY_CALL tidyWarningCount( TidyDoc tdoc ) 737 { 738 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 739 uint count = 0xFFFFFFFF; 740 if ( impl ) 741 count = impl->warnings; 742 return count; 743 } 744 uint TIDY_CALL tidyAccessWarningCount( TidyDoc tdoc ) 745 { 746 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 747 uint count = 0xFFFFFFFF; 748 if ( impl ) 749 count = impl->accessErrors; 750 return count; 751 } 752 uint TIDY_CALL tidyConfigErrorCount( TidyDoc tdoc ) 753 { 754 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 755 uint count = 0xFFFFFFFF; 756 if ( impl ) 757 count = impl->optionErrors; 758 return count; 759 } 760 761 762 /* Error reporting functions 763 */ 764 void TIDY_CALL tidyErrorSummary( TidyDoc tdoc ) 765 { 766 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 767 if ( impl ) 768 ErrorSummary( impl ); 769 } 770 void TIDY_CALL tidyGeneralInfo( TidyDoc tdoc ) 771 { 772 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 773 if ( impl ) 774 GeneralInfo( impl ); 775 } 776 777 778 /* I/O Functions 779 ** 780 ** Initial version supports only whole-file operations. 781 ** Do not expose Tidy StreamIn or Out data structures - yet. 782 */ 783 784 /* Parse/load Functions 785 ** 786 ** HTML/XHTML version determined from input. 787 */ 788 int TIDY_CALL tidyParseFile( TidyDoc tdoc, ctmbstr filnam ) 789 { 790 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 791 return tidyDocParseFile( doc, filnam ); 792 } 793 int TIDY_CALL tidyParseStdin( TidyDoc tdoc ) 794 { 795 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 796 return tidyDocParseStdin( doc ); 797 } 798 int TIDY_CALL tidyParseString( TidyDoc tdoc, ctmbstr content ) 799 { 800 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 801 return tidyDocParseString( doc, content ); 802 } 803 int TIDY_CALL tidyParseBuffer( TidyDoc tdoc, TidyBuffer* inbuf ) 804 { 805 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 806 return tidyDocParseBuffer( doc, inbuf ); 807 } 808 int TIDY_CALL tidyParseSource( TidyDoc tdoc, TidyInputSource* source ) 809 { 810 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 811 return tidyDocParseSource( doc, source ); 812 } 813 814 815 int tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam ) 816 { 817 int status = -ENOENT; 818 FILE* fin = fopen( filnam, "rb" ); 819 820 #if PRESERVE_FILE_TIMES 821 struct stat sbuf = {0}; 822 /* get last modified time */ 823 ClearMemory( &doc->filetimes, sizeof(doc->filetimes) ); 824 if ( fin && cfgBool(doc,TidyKeepFileTimes) && 825 fstat(fileno(fin), &sbuf) != -1 ) 826 { 827 doc->filetimes.actime = sbuf.st_atime; 828 doc->filetimes.modtime = sbuf.st_mtime; 829 } 830 #endif 831 832 if ( fin ) 833 { 834 StreamIn* in = FileInput( doc, fin, cfg( doc, TidyInCharEncoding )); 835 status = tidyDocParseStream( doc, in ); 836 freeFileSource(&in->source, yes); 837 freeStreamIn(in); 838 } 839 else /* Error message! */ 840 FileError( doc, filnam, TidyError ); 841 return status; 842 } 843 844 int tidyDocParseStdin( TidyDocImpl* doc ) 845 { 846 StreamIn* in = FileInput( doc, stdin, cfg( doc, TidyInCharEncoding )); 847 int status = tidyDocParseStream( doc, in ); 848 freeStreamIn(in); 849 return status; 850 } 851 852 int tidyDocParseBuffer( TidyDocImpl* doc, TidyBuffer* inbuf ) 853 { 854 int status = -EINVAL; 855 if ( inbuf ) 856 { 857 StreamIn* in = BufferInput( doc, inbuf, cfg( doc, TidyInCharEncoding )); 858 status = tidyDocParseStream( doc, in ); 859 freeStreamIn(in); 860 } 861 return status; 862 } 863 864 int tidyDocParseString( TidyDocImpl* doc, ctmbstr content ) 865 { 866 int status = -EINVAL; 867 TidyBuffer inbuf = {0}; 868 StreamIn* in = NULL; 869 870 if ( content ) 871 { 872 tidyBufAttach( &inbuf, (byte*)content, tmbstrlen(content)+1 ); 873 in = BufferInput( doc, &inbuf, cfg( doc, TidyInCharEncoding )); 874 status = tidyDocParseStream( doc, in ); 875 tidyBufDetach( &inbuf ); 876 freeStreamIn(in); 877 } 878 return status; 879 } 880 881 int tidyDocParseSource( TidyDocImpl* doc, TidyInputSource* source ) 882 { 883 StreamIn* in = UserInput( doc, source, cfg( doc, TidyInCharEncoding )); 884 int status = tidyDocParseStream( doc, in ); 885 freeStreamIn(in); 886 return status; 887 } 888 889 890 /* Print/save Functions 891 ** 892 */ 893 int TIDY_CALL tidySaveFile( TidyDoc tdoc, ctmbstr filnam ) 894 { 895 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 896 return tidyDocSaveFile( doc, filnam ); 897 } 898 int TIDY_CALL tidySaveStdout( TidyDoc tdoc ) 899 { 900 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 901 return tidyDocSaveStdout( doc ); 902 } 903 int TIDY_CALL tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen ) 904 { 905 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 906 return tidyDocSaveString( doc, buffer, buflen ); 907 } 908 int TIDY_CALL tidySaveBuffer( TidyDoc tdoc, TidyBuffer* outbuf ) 909 { 910 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 911 return tidyDocSaveBuffer( doc, outbuf ); 912 } 913 int TIDY_CALL tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink ) 914 { 915 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 916 return tidyDocSaveSink( doc, sink ); 917 } 918 919 int tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam ) 920 { 921 int status = -ENOENT; 922 FILE* fout = NULL; 923 924 /* Don't zap input file if no output */ 925 if ( doc->errors > 0 && 926 cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) ) 927 status = tidyDocStatus( doc ); 928 else 929 fout = fopen( filnam, "wb" ); 930 931 if ( fout ) 932 { 933 uint outenc = cfg( doc, TidyOutCharEncoding ); 934 uint nl = cfg( doc, TidyNewline ); 935 StreamOut* out = FileOutput( fout, outenc, nl ); 936 937 status = tidyDocSaveStream( doc, out ); 938 939 fclose( fout ); 940 MemFree( out ); 941 942 #if PRESERVE_FILE_TIMES 943 if ( doc->filetimes.actime ) 944 { 945 /* set file last accessed/modified times to original values */ 946 utime( filnam, &doc->filetimes ); 947 ClearMemory( &doc->filetimes, sizeof(doc->filetimes) ); 948 } 949 #endif /* PRESERVFILETIMES */ 950 } 951 if ( status < 0 ) /* Error message! */ 952 FileError( doc, filnam, TidyError ); 953 return status; 954 } 955 956 957 958 /* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3. 959 ** The code has been left in in case it works w/ other compilers 960 ** or operating systems. If stdout is in Text mode, be aware that 961 ** it will garble UTF16 documents. In text mode, when it encounters 962 ** a single byte of value 10 (0xA), it will insert a single byte 963 ** value 13 (0xD) just before it. This has the effect of garbling 964 ** the entire document. 965 */ 966 967 #if !defined(NO_SETMODE_SUPPORT) 968 969 #if defined(_WIN32) || defined(OS2_OS) 970 #include <fcntl.h> 971 #include <io.h> 972 #endif 973 974 #endif 975 976 int tidyDocSaveStdout( TidyDocImpl* doc ) 977 { 978 #if !defined(NO_SETMODE_SUPPORT) 979 980 #if defined(_WIN32) || defined(OS2_OS) 981 int oldstdoutmode = -1, oldstderrmode = -1; 982 #endif 983 984 #endif 985 int status = 0; 986 uint outenc = cfg( doc, TidyOutCharEncoding ); 987 uint nl = cfg( doc, TidyNewline ); 988 StreamOut* out = FileOutput( stdout, outenc, nl ); 989 990 #if !defined(NO_SETMODE_SUPPORT) 991 992 #if defined(_WIN32) || defined(OS2_OS) 993 oldstdoutmode = setmode( fileno(stdout), _O_BINARY ); 994 oldstderrmode = setmode( fileno(stderr), _O_BINARY ); 995 #endif 996 997 #endif 998 999 if ( 0 == status ) 1000 status = tidyDocSaveStream( doc, out ); 1001 1002 fflush(stdout); 1003 fflush(stderr); 1004 1005 #if !defined(NO_SETMODE_SUPPORT) 1006 1007 #if defined(_WIN32) || defined(OS2_OS) 1008 if ( oldstdoutmode != -1 ) 1009 oldstdoutmode = setmode( fileno(stdout), oldstdoutmode ); 1010 if ( oldstderrmode != -1 ) 1011 oldstderrmode = setmode( fileno(stderr), oldstderrmode ); 1012 #endif 1013 1014 #endif 1015 1016 MemFree( out ); 1017 return status; 1018 } 1019 1020 int tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen ) 1021 { 1022 uint outenc = cfg( doc, TidyOutCharEncoding ); 1023 uint nl = cfg( doc, TidyNewline ); 1024 TidyBuffer outbuf = {0}; 1025 1026 StreamOut* out = BufferOutput( &outbuf, outenc, nl ); 1027 int status = tidyDocSaveStream( doc, out ); 1028 1029 if ( outbuf.size > *buflen ) 1030 status = -ENOMEM; 1031 else 1032 memcpy( buffer, outbuf.bp, outbuf.size ); 1033 1034 *buflen = outbuf.size; 1035 tidyBufFree( &outbuf ); 1036 MemFree( out ); 1037 return status; 1038 } 1039 1040 int tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf ) 1041 { 1042 int status = -EINVAL; 1043 if ( outbuf ) 1044 { 1045 uint outenc = cfg( doc, TidyOutCharEncoding ); 1046 uint nl = cfg( doc, TidyNewline ); 1047 StreamOut* out = BufferOutput( outbuf, outenc, nl ); 1048 1049 status = tidyDocSaveStream( doc, out ); 1050 MemFree( out ); 1051 } 1052 return status; 1053 } 1054 1055 int tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink ) 1056 { 1057 uint outenc = cfg( doc, TidyOutCharEncoding ); 1058 uint nl = cfg( doc, TidyNewline ); 1059 StreamOut* out = UserOutput( sink, outenc, nl ); 1060 int status = tidyDocSaveStream( doc, out ); 1061 MemFree( out ); 1062 return status; 1063 } 1064 1065 int tidyDocStatus( TidyDocImpl* doc ) 1066 { 1067 if ( doc->errors > 0 ) 1068 return 2; 1069 if ( doc->warnings > 0 || doc->accessErrors > 0 ) 1070 return 1; 1071 return 0; 1072 } 1073 1074 1075 1076 int TIDY_CALL tidyCleanAndRepair( TidyDoc tdoc ) 1077 { 1078 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1079 if ( impl ) 1080 return tidyDocCleanAndRepair( impl ); 1081 return -EINVAL; 1082 } 1083 1084 int TIDY_CALL tidyRunDiagnostics( TidyDoc tdoc ) 1085 { 1086 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1087 if ( impl ) 1088 return tidyDocRunDiagnostics( impl ); 1089 return -EINVAL; 1090 } 1091 1092 1093 /* Workhorse functions. 1094 ** 1095 ** Parse requires input source, all input config items 1096 ** and diagnostic sink to have all been set before calling. 1097 ** 1098 ** Emit likewise requires that document sink and all 1099 ** pretty printing options have been set. 1100 */ 1101 static ctmbstr integrity = "\nPanic - tree has lost its integrity\n"; 1102 1103 int tidyDocParseStream( TidyDocImpl* doc, StreamIn* in ) 1104 { 1105 Bool xmlIn = cfgBool( doc, TidyXmlTags ); 1106 int bomEnc; 1107 1108 assert( doc != NULL && in != NULL ); 1109 assert( doc->docIn == NULL ); 1110 doc->docIn = in; 1111 1112 TakeConfigSnapshot( doc ); /* Save config state */ 1113 FreeLexer( doc ); 1114 FreeAnchors( doc ); 1115 1116 FreeNode(doc, &doc->root); 1117 ClearMemory(&doc->root, sizeof(Node)); 1118 1119 if (doc->givenDoctype) 1120 MemFree(doc->givenDoctype); 1121 1122 doc->givenDoctype = NULL; 1123 1124 doc->lexer = NewLexer( doc ); 1125 /* doc->lexer->root = &doc->root; */ 1126 doc->root.line = doc->lexer->lines; 1127 doc->root.column = doc->lexer->columns; 1128 doc->inputHadBOM = no; 1129 1130 bomEnc = ReadBOMEncoding(in); 1131 1132 if (bomEnc != -1) 1133 { 1134 in->encoding = bomEnc; 1135 SetOptionInt(doc, TidyInCharEncoding, bomEnc); 1136 } 1137 1138 #ifdef TIDY_WIN32_MLANG_SUPPORT 1139 if (in->encoding > WIN32MLANG) 1140 Win32MLangInitInputTranscoder(in, in->encoding); 1141 #endif /* TIDY_WIN32_MLANG_SUPPORT */ 1142 1143 /* Tidy doesn't alter the doctype for generic XML docs */ 1144 if ( xmlIn ) 1145 { 1146 ParseXMLDocument( doc ); 1147 if ( !CheckNodeIntegrity( &doc->root ) ) 1148 FatalError( integrity ); 1149 } 1150 else 1151 { 1152 doc->warnings = 0; 1153 ParseDocument( doc ); 1154 if ( !CheckNodeIntegrity( &doc->root ) ) 1155 FatalError( integrity ); 1156 } 1157 1158 #ifdef TIDY_WIN32_MLANG_SUPPORT 1159 Win32MLangUninitInputTranscoder(in); 1160 #endif /* TIDY_WIN32_MLANG_SUPPORT */ 1161 1162 doc->docIn = NULL; 1163 return tidyDocStatus( doc ); 1164 } 1165 1166 int tidyDocRunDiagnostics( TidyDocImpl* doc ) 1167 { 1168 uint acclvl = cfg( doc, TidyAccessibilityCheckLevel ); 1169 Bool quiet = cfgBool( doc, TidyQuiet ); 1170 Bool force = cfgBool( doc, TidyForceOutput ); 1171 1172 if ( !quiet ) 1173 { 1174 1175 ReportMarkupVersion( doc ); 1176 ReportNumWarnings( doc ); 1177 } 1178 1179 if ( doc->errors > 0 && !force ) 1180 NeedsAuthorIntervention( doc ); 1181 1182 #if SUPPORT_ACCESSIBILITY_CHECKS 1183 if ( acclvl > 0 ) 1184 AccessibilityChecks( doc ); 1185 #endif 1186 1187 return tidyDocStatus( doc ); 1188 } 1189 1190 int tidyDocCleanAndRepair( TidyDocImpl* doc ) 1191 { 1192 Bool word2K = cfgBool( doc, TidyWord2000 ); 1193 Bool logical = cfgBool( doc, TidyLogicalEmphasis ); 1194 Bool clean = cfgBool( doc, TidyMakeClean ); 1195 Bool dropFont = cfgBool( doc, TidyDropFontTags ); 1196 Bool htmlOut = cfgBool( doc, TidyHtmlOut ); 1197 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1198 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1199 Bool xmlDecl = cfgBool( doc, TidyXmlDecl ); 1200 Bool tidyMark = cfgBool( doc, TidyMark ); 1201 Bool tidyXmlTags = cfgBool( doc, TidyXmlTags ); 1202 Node* node; 1203 1204 if (tidyXmlTags) 1205 return tidyDocStatus( doc ); 1206 1207 /* simplifies <b><b> ... </b> ...</b> etc. */ 1208 NestedEmphasis( doc, &doc->root ); 1209 1210 /* cleans up <dir>indented text</dir> etc. */ 1211 List2BQ( doc, &doc->root ); 1212 BQ2Div( doc, &doc->root ); 1213 1214 /* replaces i by em and b by strong */ 1215 if ( logical ) 1216 EmFromI( doc, &doc->root ); 1217 1218 if ( word2K && IsWord2000(doc) ) 1219 { 1220 /* prune Word2000's <![if ...]> ... <![endif]> */ 1221 DropSections( doc, &doc->root ); 1222 1223 /* drop style & class attributes and empty p, span elements */ 1224 CleanWord2000( doc, &doc->root ); 1225 DropEmptyElements(doc, &doc->root); 1226 } 1227 1228 /* replaces presentational markup by style rules */ 1229 if ( clean || dropFont ) 1230 CleanDocument( doc ); 1231 1232 /* Move terminating <br /> tags from out of paragraphs */ 1233 /*! Do we want to do this for all block-level elements? */ 1234 1235 /* This is disabled due to http://tidy.sf.net/bug/681116 */ 1236 #if 0 1237 FixBrakes( doc, FindBody( doc )); 1238 #endif 1239 1240 /* Reconcile http-equiv meta element with output encoding */ 1241 if (cfg( doc, TidyOutCharEncoding) != RAW 1242 #ifndef NO_NATIVE_ISO2022_SUPPORT 1243 && cfg( doc, TidyOutCharEncoding) != ISO2022 1244 #endif 1245 ) 1246 VerifyHTTPEquiv( doc, FindHEAD( doc )); 1247 1248 if ( !CheckNodeIntegrity( &doc->root ) ) 1249 FatalError( integrity ); 1250 1251 /* remember given doctype for reporting */ 1252 node = FindDocType(doc); 1253 if (node) 1254 { 1255 AttVal* fpi = GetAttrByName(node, "PUBLIC"); 1256 if (AttrHasValue(fpi)) 1257 doc->givenDoctype = tmbstrdup(fpi->value); 1258 } 1259 1260 if ( doc->root.content ) 1261 { 1262 /* If we had XHTML input but want HTML output */ 1263 if ( htmlOut && doc->lexer->isvoyager ) 1264 { 1265 Node* node = FindDocType(doc); 1266 /* Remove reference, but do not free */ 1267 if (node) 1268 RemoveNode(node); 1269 } 1270 1271 if (xhtmlOut && !htmlOut) 1272 { 1273 SetXHTMLDocType(doc); 1274 FixAnchors(doc, &doc->root, yes, yes); 1275 FixXhtmlNamespace(doc, yes); 1276 FixLanguageInformation(doc, &doc->root, yes, yes); 1277 } 1278 else 1279 { 1280 FixDocType(doc); 1281 FixAnchors(doc, &doc->root, yes, yes); 1282 FixXhtmlNamespace(doc, no); 1283 FixLanguageInformation(doc, &doc->root, no, yes); 1284 } 1285 1286 if (tidyMark ) 1287 AddGenerator(doc); 1288 } 1289 1290 /* ensure presence of initial <?xml version="1.0"?> */ 1291 if ( xmlOut && xmlDecl ) 1292 FixXmlDecl( doc ); 1293 1294 return tidyDocStatus( doc ); 1295 } 1296 1297 int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out ) 1298 { 1299 Bool showMarkup = cfgBool( doc, TidyShowMarkup ); 1300 Bool forceOutput = cfgBool( doc, TidyForceOutput ); 1301 #if SUPPORT_UTF16_ENCODINGS 1302 Bool outputBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyYesState ); 1303 Bool smartBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyAutoState ); 1304 #endif 1305 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1306 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1307 Bool bodyOnly = cfgBool( doc, TidyBodyOnly ); 1308 1309 Bool dropComments = cfgBool(doc, TidyHideComments); 1310 Bool makeClean = cfgBool(doc, TidyMakeClean); 1311 Bool asciiChars = cfgBool(doc, TidyAsciiChars); 1312 Bool makeBare = cfgBool(doc, TidyMakeBare); 1313 Bool escapeCDATA = cfgBool(doc, TidyEscapeCdata); 1314 1315 if (escapeCDATA) 1316 ConvertCDATANodes(doc, &doc->root); 1317 1318 if (dropComments) 1319 DropComments(doc, &doc->root); 1320 1321 if (makeClean) 1322 { 1323 /* noop */ 1324 DropFontElements(doc, &doc->root, NULL); 1325 WbrToSpace(doc, &doc->root); 1326 } 1327 1328 if ((makeClean && asciiChars) || makeBare) 1329 DowngradeTypography(doc, &doc->root); 1330 1331 if (makeBare) 1332 /* Note: no longer replaces &nbsp; in */ 1333 /* attribute values / non-text tokens */ 1334 NormalizeSpaces(doc->lexer, &doc->root); 1335 else 1336 ReplacePreformattedSpaces(doc, &doc->root); 1337 1338 if ( showMarkup && (doc->errors == 0 || forceOutput) ) 1339 { 1340 #if SUPPORT_UTF16_ENCODINGS 1341 /* Output a Byte Order Mark if required */ 1342 if ( outputBOM || (doc->inputHadBOM && smartBOM) ) 1343 outBOM( out ); 1344 #endif 1345 1346 /* No longer necessary. No DOCTYPE == HTML 3.2, 1347 ** which gives you only the basic character entities, 1348 ** which are safe in any browser. 1349 ** if ( !FindDocType(doc) ) 1350 ** SetOptionBool( doc, TidyNumEntities, yes ); 1351 */ 1352 1353 doc->docOut = out; 1354 if ( xmlOut && !xhtmlOut ) 1355 PPrintXMLTree( doc, NORMAL, 0, &doc->root ); 1356 else if ( bodyOnly ) 1357 PrintBody( doc ); 1358 else 1359 PPrintTree( doc, NORMAL, 0, &doc->root ); 1360 1361 PFlushLine( doc, 0 ); 1362 doc->docOut = NULL; 1363 } 1364 1365 ResetConfigToSnapshot( doc ); 1366 return tidyDocStatus( doc ); 1367 } 1368 1369 /* Tree traversal functions 1370 ** 1371 ** The big issue here is the degree to which we should mimic 1372 ** a DOM and/or SAX nodes. 1373 ** 1374 ** Is it 100% possible (and, if so, how difficult is it) to 1375 ** emit SAX events from this API? If SAX events are possible, 1376 ** is that 100% of data needed to build a DOM? 1377 */ 1378 1379 TidyNode TIDY_CALL tidyGetRoot( TidyDoc tdoc ) 1380 { 1381 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1382 return tidyImplToNode( &impl->root ); 1383 } 1384 1385 TidyNode TIDY_CALL tidyGetHtml( TidyDoc tdoc ) 1386 { 1387 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1388 Node* node = NULL; 1389 if ( impl ) 1390 node = FindHTML( impl ); 1391 return tidyImplToNode( node ); 1392 } 1393 1394 TidyNode TIDY_CALL tidyGetHead( TidyDoc tdoc ) 1395 { 1396 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1397 Node* node = NULL; 1398 if ( impl ) 1399 node = FindHEAD( impl ); 1400 return tidyImplToNode( node ); 1401 } 1402 1403 TidyNode TIDY_CALL tidyGetBody( TidyDoc tdoc ) 1404 { 1405 TidyDocImpl* impl = tidyDocToImpl( tdoc ); 1406 Node* node = NULL; 1407 if ( impl ) 1408 node = FindBody( impl ); 1409 return tidyImplToNode( node ); 1410 } 1411 1412 /* parent / child */ 1413 TidyNode TIDY_CALL tidyGetParent( TidyNode tnod ) 1414 { 1415 Node* nimp = tidyNodeToImpl( tnod ); 1416 return tidyImplToNode( nimp->parent ); 1417 } 1418 TidyNode TIDY_CALL tidyGetChild( TidyNode tnod ) 1419 { 1420 Node* nimp = tidyNodeToImpl( tnod ); 1421 return tidyImplToNode( nimp->content ); 1422 } 1423 1424 /* siblings */ 1425 TidyNode TIDY_CALL tidyGetNext( TidyNode tnod ) 1426 { 1427 Node* nimp = tidyNodeToImpl( tnod ); 1428 return tidyImplToNode( nimp->next ); 1429 } 1430 TidyNode TIDY_CALL tidyGetPrev( TidyNode tnod ) 1431 { 1432 Node* nimp = tidyNodeToImpl( tnod ); 1433 return tidyImplToNode( nimp->prev ); 1434 } 1435 1436 /* Node info */ 1437 TidyNodeType TIDY_CALL tidyNodeGetType( TidyNode tnod ) 1438 { 1439 Node* nimp = tidyNodeToImpl( tnod ); 1440 TidyNodeType ntyp = TidyNode_Root; 1441 if ( nimp ) 1442 ntyp = (TidyNodeType) nimp->type; 1443 return ntyp; 1444 } 1445 1446 uint TIDY_CALL tidyNodeLine( TidyNode tnod ) 1447 { 1448 Node* nimp = tidyNodeToImpl( tnod ); 1449 uint line = 0; 1450 if ( nimp ) 1451 line = nimp->line; 1452 return line; 1453 } 1454 uint TIDY_CALL tidyNodeColumn( TidyNode tnod ) 1455 { 1456 Node* nimp = tidyNodeToImpl( tnod ); 1457 uint col = 0; 1458 if ( nimp ) 1459 col = nimp->column; 1460 return col; 1461 } 1462 1463 ctmbstr TIDY_CALL tidyNodeGetName( TidyNode tnod ) 1464 { 1465 Node* nimp = tidyNodeToImpl( tnod ); 1466 ctmbstr nnam = NULL; 1467 if ( nimp ) 1468 nnam = nimp->element; 1469 return nnam; 1470 } 1471 1472 1473 Bool TIDY_CALL tidyNodeHasText( TidyDoc tdoc, TidyNode tnod ) 1474 { 1475 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 1476 if ( doc ) 1477 return nodeHasText( doc, tidyNodeToImpl(tnod) ); 1478 return no; 1479 } 1480 1481 1482 Bool TIDY_CALL tidyNodeGetText( TidyDoc tdoc, TidyNode tnod, TidyBuffer* outbuf ) 1483 { 1484 TidyDocImpl* doc = tidyDocToImpl( tdoc ); 1485 Node* nimp = tidyNodeToImpl( tnod ); 1486 if ( doc && nimp && outbuf ) 1487 { 1488 uint outenc = cfg( doc, TidyOutCharEncoding ); 1489 uint nl = cfg( doc, TidyNewline ); 1490 StreamOut* out = BufferOutput( outbuf, outenc, nl ); 1491 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1492 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1493 1494 doc->docOut = out; 1495 if ( xmlOut && !xhtmlOut ) 1496 PPrintXMLTree( doc, NORMAL, 0, nimp ); 1497 else 1498 PPrintTree( doc, NORMAL, 0, nimp ); 1499 1500 PFlushLine( doc, 0 ); 1501 doc->docOut = NULL; 1502 1503 MemFree( out ); 1504 return yes; 1505 } 1506 return no; 1507 } 1508 1509 1510 Bool TIDY_CALL tidyNodeIsProp( TidyDoc ARG_UNUSED(tdoc), TidyNode tnod ) 1511 { 1512 Node* nimp = tidyNodeToImpl( tnod ); 1513 Bool isProprietary = yes; 1514 if ( nimp ) 1515 { 1516 switch ( nimp->type ) 1517 { 1518 case RootNode: 1519 case DocTypeTag: 1520 case CommentTag: 1521 case XmlDecl: 1522 case ProcInsTag: 1523 case TextNode: 1524 case CDATATag: 1525 isProprietary = no; 1526 break; 1527 1528 case SectionTag: 1529 case AspTag: 1530 case JsteTag: 1531 case PhpTag: 1532 isProprietary = yes; 1533 break; 1534 1535 case StartTag: 1536 case EndTag: 1537 case StartEndTag: 1538 isProprietary = ( nimp->tag 1539 ? (nimp->tag->versions&VERS_PROPRIETARY)!=0 1540 : yes ); 1541 break; 1542 } 1543 } 1544 return isProprietary; 1545 } 1546 1547 TidyTagId TIDY_CALL tidyNodeGetId(TidyNode tnod) 1548 { 1549 Node* nimp = tidyNodeToImpl(tnod); 1550 1551 TidyTagId tagId = TidyTag_UNKNOWN; 1552 if (nimp && nimp->tag) 1553 tagId = nimp->tag->id; 1554 1555 return tagId; 1556 } 1557 1558 1559 /* Null for non-element nodes and all pure HTML 1560 cmbstr tidyNodeNsLocal( TidyNode tnod ) 1561 { 1562 } 1563 cmbstr tidyNodeNsPrefix( TidyNode tnod ) 1564 { 1565 } 1566 cmbstr tidyNodeNsUri( TidyNode tnod ) 1567 { 1568 } 1569 */ 1570 1571 /* Iterate over attribute values */ 1572 TidyAttr TIDY_CALL tidyAttrFirst( TidyNode tnod ) 1573 { 1574 Node* nimp = tidyNodeToImpl( tnod ); 1575 AttVal* attval = NULL; 1576 if ( nimp ) 1577 attval = nimp->attributes; 1578 return tidyImplToAttr( attval ); 1579 } 1580 TidyAttr TIDY_CALL tidyAttrNext( TidyAttr tattr ) 1581 { 1582 AttVal* attval = tidyAttrToImpl( tattr ); 1583 AttVal* nxtval = NULL; 1584 if ( attval ) 1585 nxtval = attval->next; 1586 return tidyImplToAttr( nxtval ); 1587 } 1588 1589 ctmbstr TIDY_CALL tidyAttrName( TidyAttr tattr ) 1590 { 1591 AttVal* attval = tidyAttrToImpl( tattr ); 1592 ctmbstr anam = NULL; 1593 if ( attval ) 1594 anam = attval->attribute; 1595 return anam; 1596 } 1597 ctmbstr TIDY_CALL tidyAttrValue( TidyAttr tattr ) 1598 { 1599 AttVal* attval = tidyAttrToImpl( tattr ); 1600 ctmbstr aval = NULL; 1601 if ( attval ) 1602 aval = attval->value; 1603 return aval; 1604 } 1605 1606 /* Null for pure HTML 1607 ctmbstr tidyAttrNsLocal( TidyAttr tattr ) 1608 { 1609 } 1610 ctmbstr tidyAttrNsPrefix( TidyAttr tattr ) 1611 { 1612 } 1613 ctmbstr tidyAttrNsUri( TidyAttr tattr ) 1614 { 1615 } 1616 */ 1617 1618 TidyAttrId TIDY_CALL tidyAttrGetId( TidyAttr tattr ) 1619 { 1620 AttVal* attval = tidyAttrToImpl( tattr ); 1621 TidyAttrId attrId = TidyAttr_UNKNOWN; 1622 if ( attval && attval->dict ) 1623 attrId = attval->dict->id; 1624 return attrId; 1625 } 1626 Bool TIDY_CALL tidyAttrIsProp( TidyAttr tattr ) 1627 { 1628 AttVal* attval = tidyAttrToImpl( tattr ); 1629 Bool isProprietary = yes; 1630 if ( attval ) 1631 isProprietary = ( attval->dict 1632 ? (attval->dict->versions & VERS_PROPRIETARY) != 0 1633 : yes ); 1634 return isProprietary; 1635 } 1636

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.