~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

TidyLib
tidy/src/clean.c

Version: ~ [ 1.0 ] ~

** Warning: Cannot open xref database.

1 /* 2 clean.c -- clean up misuse of presentation markup 3 4 (c) 1998-2005 (W3C) MIT, ERCIM, Keio University 5 See tidy.h for the copyright notice. 6 7 CVS Info : 8 9 $Author: arnaud02 $ 10 $Date: 2005/08/03 18:06:59 $ 11 $Revision: 1.98 $ 12 13 Filters from other formats such as Microsoft Word 14 often make excessive use of presentation markup such 15 as font tags, B, I, and the align attribute. By applying 16 a set of production rules, it is straight forward to 17 transform this to use CSS. 18 19 Some rules replace some of the children of an element by 20 style properties on the element, e.g. 21 22 <p><b>...</b></p> -> <p style="font-weight: bold">...</p> 23 24 Such rules are applied to the element's content and then 25 to the element itself until none of the rules more apply. 26 Having applied all the rules to an element, it will have 27 a style attribute with one or more properties. 28 29 Other rules strip the element they apply to, replacing 30 it by style properties on the contents, e.g. 31 32 <dir><li><p>...</li></dir> -> <p style="margin-left 1em">... 33 34 These rules are applied to an element before processing 35 its content and replace the current element by the first 36 element in the exposed content. 37 38 After applying both sets of rules, you can replace the 39 style attribute by a class value and style rule in the 40 document head. To support this, an association of styles 41 and class names is built. 42 43 A naive approach is to rely on string matching to test 44 when two property lists are the same. A better approach 45 would be to first sort the properties before matching. 46 47 */ 48 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <string.h> 52 53 #include "tidy-int.h" 54 #include "clean.h" 55 #include "lexer.h" 56 #include "parser.h" 57 #include "attrs.h" 58 #include "message.h" 59 #include "tmbstr.h" 60 #include "utf8.h" 61 62 void RenameElem( Node* node, TidyTagId tid ) 63 { 64 const Dict* dict = LookupTagDef( tid ); 65 MemFree( node->element ); 66 node->element = tmbstrdup( dict->name ); 67 node->tag = dict; 68 } 69 70 static void FreeStyleProps(StyleProp *props) 71 { 72 StyleProp *next; 73 74 while (props) 75 { 76 next = props->next; 77 MemFree(props->name); 78 MemFree(props->value); 79 MemFree(props); 80 props = next; 81 } 82 } 83 84 static StyleProp *InsertProperty( StyleProp* props, ctmbstr name, ctmbstr value ) 85 { 86 StyleProp *first, *prev, *prop; 87 int cmp; 88 89 prev = NULL; 90 first = props; 91 92 while (props) 93 { 94 cmp = tmbstrcmp(props->name, name); 95 96 if (cmp == 0) 97 { 98 /* this property is already defined, ignore new value */ 99 return first; 100 } 101 102 if (cmp > 0) 103 { 104 /* insert before this */ 105 106 prop = (StyleProp *)MemAlloc(sizeof(StyleProp)); 107 prop->name = tmbstrdup(name); 108 prop->value = tmbstrdup(value); 109 prop->next = props; 110 111 if (prev) 112 prev->next = prop; 113 else 114 first = prop; 115 116 return first; 117 } 118 119 prev = props; 120 props = props->next; 121 } 122 123 prop = (StyleProp *)MemAlloc(sizeof(StyleProp)); 124 prop->name = tmbstrdup(name); 125 prop->value = tmbstrdup(value); 126 prop->next = NULL; 127 128 if (prev) 129 prev->next = prop; 130 else 131 first = prop; 132 133 return first; 134 } 135 136 /* 137 Create sorted linked list of properties from style string 138 It temporarily places nulls in place of ':' and ';' to 139 delimit the strings for the property name and value. 140 Some systems don't allow you to NULL literal strings, 141 so to avoid this, a copy is made first. 142 */ 143 static StyleProp* CreateProps( StyleProp* prop, ctmbstr style ) 144 { 145 tmbstr name, value = NULL, name_end, value_end, line; 146 Bool more; 147 148 line = tmbstrdup(style); 149 name = line; 150 151 while (*name) 152 { 153 while (*name == ' ') 154 ++name; 155 156 name_end = name; 157 158 while (*name_end) 159 { 160 if (*name_end == ':') 161 { 162 value = name_end + 1; 163 break; 164 } 165 166 ++name_end; 167 } 168 169 if (*name_end != ':') 170 break; 171 172 while ( value && *value == ' ') 173 ++value; 174 175 value_end = value; 176 more = no; 177 178 while (*value_end) 179 { 180 if (*value_end == ';') 181 { 182 more = yes; 183 break; 184 } 185 186 ++value_end; 187 } 188 189 *name_end = '\0'; 190 *value_end = '\0'; 191 192 prop = InsertProperty(prop, name, value); 193 *name_end = ':'; 194 195 if (more) 196 { 197 *value_end = ';'; 198 name = value_end + 1; 199 continue; 200 } 201 202 break; 203 } 204 205 MemFree(line); /* free temporary copy */ 206 return prop; 207 } 208 209 static tmbstr CreatePropString(StyleProp *props) 210 { 211 tmbstr style, p, s; 212 uint len; 213 StyleProp *prop; 214 215 /* compute length */ 216 217 for (len = 0, prop = props; prop; prop = prop->next) 218 { 219 len += tmbstrlen(prop->name) + 2; 220 if (prop->value) 221 len += tmbstrlen(prop->value) + 2; 222 } 223 224 style = (tmbstr) MemAlloc(len+1); 225 style[0] = '\0'; 226 227 for (p = style, prop = props; prop; prop = prop->next) 228 { 229 s = prop->name; 230 231 while((*p++ = *s++)) 232 continue; 233 234 if (prop->value) 235 { 236 *--p = ':'; 237 *++p = ' '; 238 ++p; 239 240 s = prop->value; 241 while((*p++ = *s++)) 242 continue; 243 } 244 if (prop->next == NULL) 245 break; 246 247 *--p = ';'; 248 *++p = ' '; 249 ++p; 250 } 251 252 return style; 253 } 254 255 /* 256 create string with merged properties 257 static tmbstr AddProperty( ctmbstr style, ctmbstr property ) 258 { 259 tmbstr line; 260 StyleProp *prop; 261 262 prop = CreateProps(NULL, style); 263 prop = CreateProps(prop, property); 264 line = CreatePropString(prop); 265 FreeStyleProps(prop); 266 return line; 267 } 268 */ 269 270 void FreeStyles( TidyDocImpl* doc ) 271 { 272 Lexer* lexer = doc->lexer; 273 if ( lexer ) 274 { 275 TagStyle *style, *next; 276 for ( style = lexer->styles; style; style = next ) 277 { 278 next = style->next; 279 MemFree( style->tag ); 280 MemFree( style->tag_class ); 281 MemFree( style->properties ); 282 MemFree( style ); 283 } 284 } 285 } 286 287 static tmbstr GensymClass( TidyDocImpl* doc ) 288 { 289 tmbchar buf[512]; /* CSSPrefix is limited to 256 characters */ 290 ctmbstr pfx = cfgStr(doc, TidyCSSPrefix); 291 if ( pfx == NULL || *pfx == 0 ) 292 pfx = "c"; 293 294 tmbsnprintf(buf, sizeof(buf), "%s%u", pfx, ++doc->nClassId ); 295 return tmbstrdup(buf); 296 } 297 298 static ctmbstr FindStyle( TidyDocImpl* doc, ctmbstr tag, ctmbstr properties ) 299 { 300 Lexer* lexer = doc->lexer; 301 TagStyle* style; 302 303 for (style = lexer->styles; style; style=style->next) 304 { 305 if (tmbstrcmp(style->tag, tag) == 0 && 306 tmbstrcmp(style->properties, properties) == 0) 307 return style->tag_class; 308 } 309 310 style = (TagStyle *)MemAlloc( sizeof(TagStyle) ); 311 style->tag = tmbstrdup(tag); 312 style->tag_class = GensymClass( doc ); 313 style->properties = tmbstrdup( properties ); 314 style->next = lexer->styles; 315 lexer->styles = style; 316 return style->tag_class; 317 } 318 319 /* 320 Add class="foo" to node 321 */ 322 void AddClass( TidyDocImpl* doc, Node* node, ctmbstr classname ) 323 { 324 AttVal *classattr = AttrGetById(node, TidyAttr_CLASS);; 325 326 /* 327 if there already is a class attribute 328 then append class name after a space. 329 */ 330 if (classattr) 331 { 332 uint len = tmbstrlen(classattr->value) + 333 tmbstrlen(classname) + 2; 334 tmbstr s = (tmbstr) MemAlloc( len ); 335 tmbstrcpy( s, classattr->value ); 336 tmbstrcat( s, " " ); 337 tmbstrcat( s, classname ); 338 MemFree( classattr->value ); 339 classattr->value = s; 340 } 341 else /* create new class attribute */ 342 AddAttribute( doc, node, "class", classname ); 343 } 344 345 346 /* 347 Find style attribute in node, and replace it 348 by corresponding class attribute. Search for 349 class in style dictionary otherwise gensym 350 new class and add to dictionary. 351 352 Assumes that node doesn't have a class attribute 353 */ 354 static void Style2Rule( TidyDocImpl* doc, Node *node) 355 { 356 AttVal *styleattr, *classattr; 357 ctmbstr classname; 358 359 styleattr = AttrGetById(node, TidyAttr_STYLE); 360 361 if (styleattr) 362 { 363 /* fix for http://tidy.sf.net/bug/850215 */ 364 if (!styleattr->value) 365 { 366 RemoveAttribute(doc, node, styleattr); 367 return; 368 } 369 370 classname = FindStyle( doc, node->element, styleattr->value ); 371 classattr = AttrGetById(node, TidyAttr_CLASS); 372 373 /* 374 if there already is a class attribute 375 then append class name after an underscore 376 */ 377 if (classattr) 378 { 379 uint len = tmbstrlen(classattr->value) + 380 tmbstrlen(classname) + 2; 381 tmbstr s = (tmbstr) MemAlloc( len ); 382 s[0] = '\0'; 383 if (classattr->value) 384 { 385 tmbstrcpy(s, classattr->value); 386 tmbstrcat(s, " "); 387 } 388 tmbstrcat(s, classname); 389 if (classattr->value) 390 MemFree(classattr->value); 391 classattr->value = s; 392 RemoveAttribute( doc, node, styleattr ); 393 } 394 else /* reuse style attribute for class attribute */ 395 { 396 MemFree(styleattr->attribute); 397 MemFree(styleattr->value); 398 styleattr->attribute = tmbstrdup("class"); 399 styleattr->value = tmbstrdup(classname); 400 } 401 } 402 } 403 404 static void AddColorRule( Lexer* lexer, ctmbstr selector, ctmbstr color ) 405 { 406 if ( selector && color ) 407 { 408 AddStringLiteral(lexer, selector); 409 AddStringLiteral(lexer, " { color: "); 410 AddStringLiteral(lexer, color); 411 AddStringLiteral(lexer, " }\n"); 412 } 413 } 414 415 /* 416 move presentation attribs from body to style element 417 418 background="foo" -> body { background-image: url(foo) } 419 bgcolor="foo" -> body { background-color: foo } 420 text="foo" -> body { color: foo } 421 link="foo" -> :link { color: foo } 422 vlink="foo" -> :visited { color: foo } 423 alink="foo" -> :active { color: foo } 424 */ 425 static void CleanBodyAttrs( TidyDocImpl* doc, Node* body ) 426 { 427 Lexer* lexer = doc->lexer; 428 tmbstr bgurl = NULL; 429 tmbstr bgcolor = NULL; 430 tmbstr color = NULL; 431 AttVal* attr; 432 433 if (NULL != (attr = AttrGetById(body, TidyAttr_BACKGROUND))) 434 { 435 bgurl = attr->value; 436 attr->value = NULL; 437 RemoveAttribute( doc, body, attr ); 438 } 439 440 if (NULL != (attr = AttrGetById(body, TidyAttr_BGCOLOR))) 441 { 442 bgcolor = attr->value; 443 attr->value = NULL; 444 RemoveAttribute( doc, body, attr ); 445 } 446 447 if (NULL != (attr = AttrGetById(body, TidyAttr_TEXT))) 448 { 449 color = attr->value; 450 attr->value = NULL; 451 RemoveAttribute( doc, body, attr ); 452 } 453 454 if ( bgurl || bgcolor || color ) 455 { 456 AddStringLiteral(lexer, " body {\n"); 457 if (bgurl) 458 { 459 AddStringLiteral(lexer, " background-image: url("); 460 AddStringLiteral(lexer, bgurl); 461 AddStringLiteral(lexer, ");\n"); 462 MemFree(bgurl); 463 } 464 if (bgcolor) 465 { 466 AddStringLiteral(lexer, " background-color: "); 467 AddStringLiteral(lexer, bgcolor); 468 AddStringLiteral(lexer, ";\n"); 469 MemFree(bgcolor); 470 } 471 if (color) 472 { 473 AddStringLiteral(lexer, " color: "); 474 AddStringLiteral(lexer, color); 475 AddStringLiteral(lexer, ";\n"); 476 MemFree(color); 477 } 478 479 AddStringLiteral(lexer, " }\n"); 480 } 481 482 if (NULL != (attr = AttrGetById(body, TidyAttr_LINK))) 483 { 484 AddColorRule(lexer, " :link", attr->value); 485 RemoveAttribute( doc, body, attr ); 486 } 487 488 if (NULL != (attr = AttrGetById(body, TidyAttr_VLINK))) 489 { 490 AddColorRule(lexer, " :visited", attr->value); 491 RemoveAttribute( doc, body, attr ); 492 } 493 494 if (NULL != (attr = AttrGetById(body, TidyAttr_ALINK))) 495 { 496 AddColorRule(lexer, " :active", attr->value); 497 RemoveAttribute( doc, body, attr ); 498 } 499 } 500 501 static Bool NiceBody( TidyDocImpl* doc ) 502 { 503 Node* node = FindBody(doc); 504 if (node) 505 { 506 if (AttrGetById(node, TidyAttr_BACKGROUND) || 507 AttrGetById(node, TidyAttr_BGCOLOR) || 508 AttrGetById(node, TidyAttr_TEXT) || 509 AttrGetById(node, TidyAttr_LINK) || 510 AttrGetById(node, TidyAttr_VLINK) || 511 AttrGetById(node, TidyAttr_ALINK)) 512 { 513 doc->badLayout |= USING_BODY; 514 return no; 515 } 516 } 517 518 return yes; 519 } 520 521 /* create style element using rules from dictionary */ 522 static void CreateStyleElement( TidyDocImpl* doc ) 523 { 524 Lexer* lexer = doc->lexer; 525 Node *node, *head, *body; 526 TagStyle *style; 527 AttVal *av; 528 529 if ( lexer->styles == NULL && NiceBody(doc) ) 530 return; 531 532 node = NewNode( lexer ); 533 node->type = StartTag; 534 node->implicit = yes; 535 node->element = tmbstrdup("style"); 536 FindTag( doc, node ); 537 538 /* insert type attribute */ 539 av = NewAttributeEx( doc, "type", "text/css", '"' ); 540 InsertAttributeAtStart( node, av ); 541 542 body = FindBody( doc ); 543 lexer->txtstart = lexer->lexsize; 544 if ( body ) 545 CleanBodyAttrs( doc, body ); 546 547 for (style = lexer->styles; style; style = style->next) 548 { 549 AddCharToLexer(lexer, ' '); 550 AddStringLiteral(lexer, style->tag); 551 AddCharToLexer(lexer, '.'); 552 AddStringLiteral(lexer, style->tag_class); 553 AddCharToLexer(lexer, ' '); 554 AddCharToLexer(lexer, '{'); 555 AddStringLiteral(lexer, style->properties); 556 AddCharToLexer(lexer, '}'); 557 AddCharToLexer(lexer, '\n'); 558 } 559 560 lexer->txtend = lexer->lexsize; 561 562 InsertNodeAtEnd( node, TextToken(lexer) ); 563 564 /* 565 now insert style element into document head 566 567 doc is root node. search its children for html node 568 the head node should be first child of html node 569 */ 570 if ( NULL != (head = FindHEAD( doc )) ) 571 InsertNodeAtEnd( head, node ); 572 } 573 574 575 /* ensure bidirectional links are consistent */ 576 void FixNodeLinks(Node *node) 577 { 578 Node *child; 579 580 if (node->prev) 581 node->prev->next = node; 582 else 583 node->parent->content = node; 584 585 if (node->next) 586 node->next->prev = node; 587 else 588 node->parent->last = node; 589 590 for (child = node->content; child; child = child->next) 591 child->parent = node; 592 } 593 594 /* 595 used to strip child of node when 596 the node has one and only one child 597 */ 598 static void StripOnlyChild(TidyDocImpl* doc, Node *node) 599 { 600 Node *child; 601 602 child = node->content; 603 node->content = child->content; 604 node->last = child->last; 605 child->content = NULL; 606 FreeNode(doc, child); 607 608 for (child = node->content; child; child = child->next) 609 child->parent = node; 610 } 611 612 /* 613 used to strip font start and end tags. 614 Extricate "element", replace it by its content and delete it. 615 */ 616 static void DiscardContainer( TidyDocImpl* doc, Node *element, Node **pnode) 617 { 618 if (element->content) 619 { 620 Node *node, *parent = element->parent; 621 622 element->last->next = element->next; 623 624 if (element->next) 625 { 626 element->next->prev = element->last; 627 } 628 else 629 parent->last = element->last; 630 631 if (element->prev) 632 { 633 element->content->prev = element->prev; 634 element->prev->next = element->content; 635 } 636 else 637 parent->content = element->content; 638 639 for (node = element->content; node; node = node->next) 640 node->parent = parent; 641 642 *pnode = element->content; 643 644 element->next = element->content = NULL; 645 FreeNode(doc, element); 646 } 647 else 648 { 649 *pnode = DiscardElement(doc, element); 650 } 651 } 652 653 /* 654 Create new string that consists of the 655 combined style properties in s1 and s2 656 657 To merge property lists, we build a linked 658 list of property/values and insert properties 659 into the list in order, merging values for 660 the same property name. 661 */ 662 static tmbstr MergeProperties( ctmbstr s1, ctmbstr s2 ) 663 { 664 tmbstr s; 665 StyleProp *prop; 666 667 prop = CreateProps(NULL, s1); 668 prop = CreateProps(prop, s2); 669 s = CreatePropString(prop); 670 FreeStyleProps(prop); 671 return s; 672 } 673 674 /* 675 Add style property to element, creating style 676 attribute as needed and adding ; delimiter 677 */ 678 static void AddStyleProperty(TidyDocImpl* doc, Node *node, ctmbstr property ) 679 { 680 AttVal *av = AttrGetById(node, TidyAttr_STYLE); 681 682 /* if style attribute already exists then insert property */ 683 684 if ( av ) 685 { 686 if (av->value != NULL) 687 { 688 tmbstr s = MergeProperties( av->value, property ); 689 MemFree( av->value ); 690 av->value = s; 691 } 692 else 693 { 694 av->value = tmbstrdup( property ); 695 } 696 } 697 else /* else create new style attribute */ 698 { 699 av = NewAttributeEx( doc, "style", property, '"' ); 700 InsertAttributeAtStart( node, av ); 701 } 702 } 703 704 static void MergeClasses(TidyDocImpl* doc, Node *node, Node *child) 705 { 706 AttVal *av; 707 tmbstr s1, s2, names; 708 709 for (s2 = NULL, av = child->attributes; av; av = av->next) 710 { 711 if (attrIsCLASS(av)) 712 { 713 s2 = av->value; 714 break; 715 } 716 } 717 718 for (s1 = NULL, av = node->attributes; av; av = av->next) 719 { 720 if (attrIsCLASS(av)) 721 { 722 s1 = av->value; 723 break; 724 } 725 } 726 727 if (s1) 728 { 729 if (s2) /* merge class names from both */ 730 { 731 uint l1, l2; 732 l1 = tmbstrlen(s1); 733 l2 = tmbstrlen(s2); 734 names = (tmbstr) MemAlloc(l1 + l2 + 2); 735 tmbstrcpy(names, s1); 736 names[l1] = ' '; 737 tmbstrcpy(names+l1+1, s2); 738 MemFree(av->value); 739 av->value = names; 740 } 741 } 742 else if (s2) /* copy class names from child */ 743 { 744 av = NewAttributeEx( doc, "class", s2, '"' ); 745 InsertAttributeAtStart( node, av ); 746 } 747 } 748 749 static void MergeStyles(TidyDocImpl* doc, Node *node, Node *child) 750 { 751 AttVal *av; 752 tmbstr s1, s2, style; 753 754 /* 755 the child may have a class attribute used 756 for attaching styles, if so the class name 757 needs to be copied to node's class 758 */ 759 MergeClasses(doc, node, child); 760 761 for (s2 = NULL, av = child->attributes; av; av = av->next) 762 { 763 if (attrIsSTYLE(av)) 764 { 765 s2 = av->value; 766 break; 767 } 768 } 769 770 for (s1 = NULL, av = node->attributes; av; av = av->next) 771 { 772 if (attrIsSTYLE(av)) 773 { 774 s1 = av->value; 775 break; 776 } 777 } 778 779 if (s1) 780 { 781 if (s2) /* merge styles from both */ 782 { 783 style = MergeProperties(s1, s2); 784 MemFree(av->value); 785 av->value = style; 786 } 787 } 788 else if (s2) /* copy style of child */ 789 { 790 av = NewAttributeEx( doc, "style", s2, '"' ); 791 InsertAttributeAtStart( node, av ); 792 } 793 } 794 795 static ctmbstr FontSize2Name(ctmbstr size) 796 { 797 static const ctmbstr sizes[7] = 798 { 799 "60%", "70%", "80%", NULL, 800 "120%", "150%", "200%" 801 }; 802 803 /* increment of 0.8 */ 804 static const ctmbstr minussizes[] = 805 { 806 "100%", "80%", "64%", "51%", 807 "40%", "32%", "26%" 808 }; 809 810 /* increment of 1.2 */ 811 static const ctmbstr plussizes[] = 812 { 813 "100%", "120%", "144%", "172%", 814 "207%", "248%", "298%" 815 }; 816 817 if (size[0] == '\0') 818 return NULL; 819 820 if ('' <= size[0] && size[0] <= '6') 821 { 822 int n = size[0] - ''; 823 return sizes[n]; 824 } 825 826 if (size[0] == '-') 827 { 828 if ('' <= size[1] && size[1] <= '6') 829 { 830 int n = size[1] - ''; 831 return minussizes[n]; 832 } 833 return "smaller"; /*"70%"; */ 834 } 835 836 if ('' <= size[1] && size[1] <= '6') 837 { 838 int n = size[1] - ''; 839 return plussizes[n]; 840 } 841 842 return "larger"; /* "140%" */ 843 } 844 845 static void AddFontFace( TidyDocImpl* doc, Node *node, ctmbstr face ) 846 { 847 tmbchar buf[256]; 848 tmbsnprintf(buf, sizeof(buf), "font-family: %s", face ); 849 AddStyleProperty( doc, node, buf ); 850 } 851 852 static void AddFontSize( TidyDocImpl* doc, Node* node, ctmbstr size ) 853 { 854 ctmbstr value = NULL; 855 856 if (nodeIsP(node)) 857 { 858 if (tmbstrcmp(size, "6") == 0) 859 value = "h1"; 860 else if (tmbstrcmp(size, "5") == 0) 861 value = "h2"; 862 else if (tmbstrcmp(size, "4") == 0) 863 value = "h3"; 864 865 if (value) 866 { 867 MemFree(node->element); 868 node->element = tmbstrdup(value); 869 FindTag(doc, node); 870 return; 871 } 872 } 873 874 value = FontSize2Name(size); 875 876 if (value) 877 { 878 tmbchar buf[64]; 879 tmbsnprintf(buf, sizeof(buf), "font-size: %s", value); 880 AddStyleProperty( doc, node, buf ); 881 } 882 } 883 884 static void AddFontColor( TidyDocImpl* doc, Node *node, ctmbstr color) 885 { 886 tmbchar buf[128]; 887 tmbsnprintf(buf, sizeof(buf), "color: %s", color); 888 AddStyleProperty( doc, node, buf ); 889 } 890 891 /* force alignment value to lower case */ 892 static void AddAlign( TidyDocImpl* doc, Node *node, ctmbstr align ) 893 { 894 uint i; 895 tmbchar buf[128]; 896 897 tmbstrcpy( buf, "text-align: " ); 898 for ( i = 12; i < sizeof(buf)/sizeof(buf[0])-1; ++i ) 899 { 900 if ( (buf[i] = (tmbchar)ToLower(*align++)) == '\0' ) 901 break; 902 } 903 buf[i] = '\0'; 904 AddStyleProperty( doc, node, buf ); 905 } 906 907 /* 908 add style properties to node corresponding to 909 the font face, size and color attributes 910 */ 911 static void AddFontStyles( TidyDocImpl* doc, Node *node, AttVal *av) 912 { 913 while (av) 914 { 915 if (AttrHasValue(av)) 916 { 917 if (attrIsFACE(av)) 918 AddFontFace( doc, node, av->value ); 919 else if (attrIsSIZE(av)) 920 AddFontSize( doc, node, av->value ); 921 else if (attrIsCOLOR(av)) 922 AddFontColor( doc, node, av->value ); 923 } 924 av = av->next; 925 } 926 } 927 928 /* 929 Symptom: <p align=center> 930 Action: <p style="text-align: center"> 931 */ 932 static void TextAlign( TidyDocImpl* doc, Node* node ) 933 { 934 AttVal *av, *prev; 935 936 prev = NULL; 937 938 for (av = node->attributes; av; av = av->next) 939 { 940 if (attrIsALIGN(av)) 941 { 942 if (prev) 943 prev->next = av->next; 944 else 945 node->attributes = av->next; 946 947 if (av->value) 948 AddAlign( doc, node, av->value ); 949 950 FreeAttribute(doc, av); 951 break; 952 } 953 954 prev = av; 955 } 956 } 957 958 /* 959 The clean up rules use the pnode argument to return the 960 next node when the original node has been deleted 961 */ 962 963 /* 964 Symptom: <dir> <li> where <li> is only child 965 Action: coerce <dir> <li> to <div> with indent. 966 */ 967 968 static Bool Dir2Div( TidyDocImpl* doc, Node *node, Node **ARG_UNUSED(pnode)) 969 { 970 Node *child; 971 972 if ( nodeIsDIR(node) || nodeIsUL(node) || nodeIsOL(node) ) 973 { 974 child = node->content; 975 976 if (child == NULL) 977 return no; 978 979 /* check child has no peers */ 980 981 if (child->next) 982 return no; 983 984 if ( !nodeIsLI(child) ) 985 return no; 986 987 if ( !child->implicit ) 988 return no; 989 990 /* coerce dir to div */ 991 node->tag = LookupTagDef( TidyTag_DIV ); 992 MemFree( node->element ); 993 node->element = tmbstrdup("div"); 994 AddStyleProperty( doc, node, "margin-left: 2em" ); 995 StripOnlyChild( doc, node ); 996 return yes; 997 } 998 999 return no; 1000 } 1001 1002 /* 1003 Symptom: <center> 1004 Action: replace <center> by <div style="text-align: center"> 1005 */ 1006 1007 static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode) 1008 { 1009 if ( nodeIsCENTER(node) ) 1010 { 1011 if ( cfgBool(doc, TidyDropFontTags) ) 1012 { 1013 if (node->content) 1014 { 1015 Node *last = node->last; 1016 DiscardContainer( doc, node, pnode ); 1017 1018 node = InferredTag(doc, TidyTag_BR); 1019 InsertNodeAfterElement(last, node); 1020 } 1021 else 1022 { 1023 Node *prev = node->prev, *next = node->next, 1024 *parent = node->parent; 1025 DiscardContainer( doc, node, pnode ); 1026 1027 node = InferredTag(doc, TidyTag_BR); 1028 if (next) 1029 InsertNodeBeforeElement(next, node); 1030 else if (prev) 1031 InsertNodeAfterElement(prev, node); 1032 else 1033 InsertNodeAtStart(parent, node); 1034 } 1035 1036 return yes; 1037 } 1038 1039 RenameElem( node, TidyTag_DIV ); 1040 AddStyleProperty( doc, node, "text-align: center" ); 1041 return yes; 1042 } 1043 1044 return no; 1045 } 1046 1047 /* Copy child attributes to node. Duplicate attributes are overwritten. 1048 Unique attributes (such as ID) disable the action. 1049 Attributes style and class are not dealt with. A call to MergeStyles 1050 will do that. 1051 */ 1052 static Bool CopyAttrs( TidyDocImpl* doc, Node *node, Node *child) 1053 { 1054 AttVal *av1, *av2; 1055 TidyAttrId id; 1056 1057 /* Detect attributes that cannot be merged or overwritten. */ 1058 if (AttrGetById(child, TidyAttr_ID) != NULL 1059 && AttrGetById(node, TidyAttr_ID) != NULL) 1060 return no; 1061 1062 /* Move child attributes to node. Attributes in node 1063 can be overwritten or merged. */ 1064 for (av2 = child->attributes; av2; ) 1065 { 1066 /* Dealt by MergeStyles. */ 1067 if (attrIsSTYLE(av2) || attrIsCLASS(av2)) 1068 { 1069 av2 = av2->next; 1070 continue; 1071 } 1072 /* Avoid duplicates in node */ 1073 if ((id=AttrId(av2)) != TidyAttr_UNKNOWN 1074 && (av1=AttrGetById(node, id))!= NULL) 1075 RemoveAttribute( doc, node, av1 ); 1076 1077 /* Move attribute from child to node */ 1078 DetachAttribute( child, av2 ); 1079 av1 = av2; 1080 av2 = av2->next; 1081 av1->next = NULL; 1082 InsertAttributeAtEnd( node, av1 ); 1083 } 1084 1085 return yes; 1086 } 1087 1088 /* 1089 Symptom <XX><XX>...</XX></XX> 1090 Action: merge the two XXs 1091 1092 For instance, this is useful after nested <dir>s used by Word 1093 for indenting have been converted to <div>s 1094 1095 If state is "no", no merging. 1096 If state is "yes", inner element is discarded. Only Style and Class 1097 attributes are merged using MergeStyles(). 1098 If state is "auto", atttibutes are merged as described in CopyAttrs(). 1099 Style and Class attributes are merged using MergeStyles(). 1100 */ 1101 static Bool MergeNestedElements( TidyDocImpl* doc, 1102 TidyTagId Id, TidyTriState state, Node *node, 1103 Node **ARG_UNUSED(pnode)) 1104 { 1105 Node *child; 1106 1107 if ( state == TidyNoState 1108 || !TagIsId(node, Id) ) 1109 return no; 1110 1111 child = node->content; 1112 1113 if ( child == NULL 1114 || child->next != NULL 1115 || !TagIsId(child, Id) ) 1116 return no; 1117 1118 if ( state == TidyAutoState 1119 && CopyAttrs(doc, node, child) == no ) 1120 return no; 1121 1122 MergeStyles( doc, node, child ); 1123 StripOnlyChild( doc, node ); 1124 return yes; 1125 } 1126 1127 /* 1128 Symptom: <ul><li><ul>...</ul></li></ul> 1129 Action: discard outer list 1130 */ 1131 1132 static Bool NestedList( TidyDocImpl* doc, Node *node, Node **pnode ) 1133 { 1134 Node *child, *list; 1135 1136 if ( nodeIsUL(node) || nodeIsOL(node) ) 1137 { 1138 child = node->content; 1139 1140 if (child == NULL) 1141 return no; 1142 1143 /* check child has no peers */ 1144 1145 if (child->next) 1146 return no; 1147 1148 list = child->content; 1149 1150 if (!list) 1151 return no; 1152 1153 if (list->tag != node->tag) 1154 return no; 1155 1156 /* check list has no peers */ 1157 if (list->next) 1158 return no; 1159 1160 *pnode = list; /* Set node to resume iteration */ 1161 1162 /* move inner list node into position of outer node */ 1163 list->prev = node->prev; 1164 list->next = node->next; 1165 list->parent = node->parent; 1166 FixNodeLinks(list); 1167 1168 /* get rid of outer ul and its li */ 1169 child->content = NULL; 1170 FreeNode( doc, child ); /* See test #427841. */ 1171 child = NULL; 1172 node->content = NULL; 1173 node->next = NULL; 1174 FreeNode( doc, node ); 1175 node = NULL; 1176 1177 /* 1178 If prev node was a list the chances are this node 1179 should be appended to that list. Word has no way of 1180 recognizing nested lists and just uses indents 1181 */ 1182 1183 if (list->prev) 1184 { 1185 if ( (nodeIsUL(list->prev) || nodeIsOL(list->prev)) 1186 && list->prev->last ) 1187 { 1188 node = list; 1189 list = node->prev; 1190 1191 child = list->last; /* <li> */ 1192 1193 list->next = node->next; 1194 FixNodeLinks(list); 1195 1196 node->parent = child; 1197 node->next = NULL; 1198 node->prev = child->last; 1199 FixNodeLinks(node); 1200 CleanNode( doc, node ); 1201 } 1202 } 1203 1204 return yes; 1205 } 1206 1207 return no; 1208 } 1209 1210 /* 1211 Some necessary conditions to apply BlockStyle(). 1212 */ 1213 1214 static Bool CanApplyBlockStyle( Node *node ) 1215 { 1216 if (node->tag->model & (CM_BLOCK | CM_LIST | CM_DEFLIST | CM_TABLE) 1217 && !nodeIsTABLE(node) && !nodeIsTR(node) && !nodeIsLI(node) ) 1218 { 1219 return yes; 1220 } 1221 return no; 1222 } 1223 1224 /* 1225 Symptom: the only child of a block-level element is a 1226 presentation element such as B, I or FONT 1227 1228 Action: add style "font-weight: bold" to the block and 1229 strip the <b> element, leaving its children. 1230 1231 example: 1232 1233 <p> 1234 <b><font face="Arial" size="6">Draft Recommended Practice</font></b> 1235 </p> 1236 1237 becomes: 1238 1239 <p style="font-weight: bold; font-family: Arial; font-size: 6"> 1240 Draft Recommended Practice 1241 </p> 1242 1243 This code also replaces the align attribute by a style attribute. 1244 However, to avoid CSS problems with Navigator 4, this isn't done 1245 for the elements: caption, tr and table 1246 */ 1247 static Bool BlockStyle( TidyDocImpl* doc, Node *node, Node **ARG_UNUSED(pnode) ) 1248 { 1249 Node *child; 1250 1251 if (CanApplyBlockStyle(node)) 1252 { 1253 /* check for align attribute */ 1254 if ( !nodeIsCAPTION(node) ) 1255 TextAlign( doc, node ); 1256 1257 child = node->content; 1258 if (child == NULL) 1259 return no; 1260 1261 /* check child has no peers */ 1262 if (child->next) 1263 return no; 1264 1265 if ( nodeIsB(child) ) 1266 { 1267 MergeStyles( doc, node, child ); 1268 AddStyleProperty( doc, node, "font-weight: bold" ); 1269 StripOnlyChild( doc, node ); 1270 return yes; 1271 } 1272 1273 if ( nodeIsI(child) ) 1274 { 1275 MergeStyles( doc, node, child ); 1276 AddStyleProperty( doc, node, "font-style: italic" ); 1277 StripOnlyChild( doc, node ); 1278 return yes; 1279 } 1280 1281 if ( nodeIsFONT(child) ) 1282 { 1283 MergeStyles( doc, node, child ); 1284 AddFontStyles( doc, node, child->attributes ); 1285 StripOnlyChild( doc, node ); 1286 return yes; 1287 } 1288 } 1289 1290 return no; 1291 } 1292 1293 /* the only child of table cell or an inline element such as em */ 1294 static Bool InlineStyle( TidyDocImpl* doc, Node *node, Node **ARG_UNUSED(pnode) ) 1295 { 1296 Node *child; 1297 1298 if ( !nodeIsFONT(node) && nodeHasCM(node, CM_INLINE|CM_ROW) ) 1299 { 1300 child = node->content; 1301 1302 if (child == NULL) 1303 return no; 1304 1305 /* check child has no peers */ 1306 1307 if (child->next) 1308 return no; 1309 1310 if ( nodeIsB(child) && cfgBool(doc, TidyLogicalEmphasis) ) 1311 { 1312 MergeStyles( doc, node, child ); 1313 AddStyleProperty( doc, node, "font-weight: bold" ); 1314 StripOnlyChild( doc, node ); 1315 return yes; 1316 } 1317 1318 if ( nodeIsI(child) && cfgBool(doc, TidyLogicalEmphasis) ) 1319 { 1320 MergeStyles( doc, node, child ); 1321 AddStyleProperty( doc, node, "font-style: italic" ); 1322 StripOnlyChild( doc, node ); 1323 return yes; 1324 } 1325 1326 if ( nodeIsFONT(child) ) 1327 { 1328 MergeStyles( doc, node, child ); 1329 AddFontStyles( doc, node, child->attributes ); 1330 StripOnlyChild( doc, node ); 1331 return yes; 1332 } 1333 } 1334 1335 return no; 1336 } 1337 1338 /* 1339 Replace font elements by span elements, deleting 1340 the font element's attributes and replacing them 1341 by a single style attribute. 1342 */ 1343 static Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode ) 1344 { 1345 AttVal *av, *style, *next; 1346 1347 if ( nodeIsFONT(node) ) 1348 { 1349 if ( cfgBool(doc, TidyDropFontTags) ) 1350 { 1351 DiscardContainer( doc, node, pnode ); 1352 return yes; 1353 } 1354 1355 /* if FONT is only child of parent element then leave alone 1356 Do so only if BlockStyle may be succesful. */ 1357 if ( node->parent->content == node && node->next == NULL && 1358 CanApplyBlockStyle(node->parent) ) 1359 return no; 1360 1361 AddFontStyles( doc, node, node->attributes ); 1362 1363 /* extract style attribute and free the rest */ 1364 av = node->attributes; 1365 style = NULL; 1366 1367 while (av) 1368 { 1369 next = av->next; 1370 1371 if (attrIsSTYLE(av)) 1372 { 1373 av->next = NULL; 1374 style = av; 1375 } 1376 else 1377 { 1378 FreeAttribute( doc, av ); 1379 } 1380 av = next; 1381 } 1382 1383 node->attributes = style; 1384 RenameElem( node, TidyTag_SPAN ); 1385 return yes; 1386 } 1387 1388 return no; 1389 } 1390 1391 /* 1392 Applies all matching rules to a node. 1393 */ 1394 Node* CleanNode( TidyDocImpl* doc, Node *node ) 1395 { 1396 Node *next = NULL; 1397 TidyTriState mergeDivs = cfgAutoBool(doc, TidyMergeDivs); 1398 1399 for (next = node; nodeIsElement(node); node = next) 1400 { 1401 if ( Dir2Div(doc, node, &next) ) 1402 continue; 1403 1404 /* Special case: true result means 1405 ** that arg node and its parent no longer exist. 1406 ** So we must jump back up the CreateStyleProperties() 1407 ** call stack until we have a valid node reference. 1408 */ 1409 if ( NestedList(doc, node, &next) ) 1410 return next; 1411 1412 if ( Center2Div(doc, node, &next) ) 1413 continue; 1414 1415 if ( MergeNestedElements(doc, TidyTag_DIV, mergeDivs, node, &next) ) 1416 continue; 1417 1418 if ( BlockStyle(doc, node, &next) ) 1419 continue; 1420 1421 if ( InlineStyle(doc, node, &next) ) 1422 continue; 1423 1424 if ( Font2Span(doc, node, &next) ) 1425 continue; 1426 1427 break; 1428 } 1429 1430 return next; 1431 } 1432 1433 /* Special case: if the current node is destroyed by 1434 ** CleanNode() lower in the tree, this node and its parent 1435 ** no longer exist. So we must jump back up the CleanTree() 1436 ** call stack until we have a valid node reference. 1437 */ 1438 1439 static Node* CleanTree( TidyDocImpl* doc, Node *node ) 1440 { 1441 if (node->content) 1442 { 1443 Node *child; 1444 for (child = node->content; child != NULL; child = child->next) 1445 { 1446 child = CleanTree( doc, child ); 1447 if ( !child ) 1448 break; 1449 } 1450 } 1451 1452 return CleanNode( doc, node ); 1453 } 1454 1455 static void DefineStyleRules( TidyDocImpl* doc, Node *node ) 1456 { 1457 Node *child; 1458 1459 if (node->content) 1460 { 1461 for (child = node->content; 1462 child != NULL; child = child->next) 1463 { 1464 DefineStyleRules( doc, child ); 1465 } 1466 } 1467 1468 Style2Rule( doc, node ); 1469 } 1470 1471 void CleanDocument( TidyDocImpl* doc ) 1472 { 1473 /* placeholder. CleanTree()/CleanNode() will not 1474 ** zap root element 1475 */ 1476 CleanTree( doc, &doc->root ); 1477 1478 if ( cfgBool(doc, TidyMakeClean) ) 1479 { 1480 DefineStyleRules( doc, &doc->root ); 1481 CreateStyleElement( doc ); 1482 } 1483 } 1484 1485 /* simplifies <b><b> ... </b> ...</b> etc. */ 1486 void NestedEmphasis( TidyDocImpl* doc, Node* node ) 1487 { 1488 Node *next; 1489 1490 while (node) 1491 { 1492 next = node->next; 1493 1494 if ( (nodeIsB(node) || nodeIsI(node)) 1495 && node->parent && node->parent->tag == node->tag) 1496 { 1497 /* strip redundant inner element */ 1498 DiscardContainer( doc, node, &next ); 1499 node = next; 1500 continue; 1501 } 1502 1503 if ( node->content ) 1504 NestedEmphasis( doc, node->content ); 1505 1506 node = next; 1507 } 1508 } 1509 1510 1511 1512 /* replace i by em and b by strong */ 1513 void EmFromI( TidyDocImpl* doc, Node* node ) 1514 { 1515 while (node) 1516 { 1517 if ( nodeIsI(node) ) 1518 RenameElem( node, TidyTag_EM ); 1519 else if ( nodeIsB(node) ) 1520 RenameElem( node, TidyTag_STRONG ); 1521 1522 if ( node->content ) 1523 EmFromI( doc, node->content ); 1524 1525 node = node->next; 1526 } 1527 } 1528 1529 static Bool HasOneChild(Node *node) 1530 { 1531 return (node->content && node->content->next == NULL); 1532 } 1533 1534 /* 1535 Some people use dir or ul without an li 1536 to indent the content. The pattern to 1537 look for is a list with a single implicit 1538 li. This is recursively replaced by an 1539 implicit blockquote. 1540 */ 1541 void List2BQ( TidyDocImpl* doc, Node* node ) 1542 { 1543 while (node) 1544 { 1545 if (node->content) 1546 List2BQ( doc, node->content ); 1547 1548 if ( node->tag && node->tag->parser == ParseList && 1549 HasOneChild(node) && node->content->implicit ) 1550 { 1551 StripOnlyChild( doc, node ); 1552 RenameElem( node, TidyTag_BLOCKQUOTE ); 1553 node->implicit = yes; 1554 } 1555 1556 node = node->next; 1557 } 1558 } 1559 1560 1561 /* 1562 Replace implicit blockquote by div with an indent 1563 taking care to reduce nested blockquotes to a single 1564 div with the indent set to match the nesting depth 1565 */ 1566 void BQ2Div( TidyDocImpl* doc, Node *node ) 1567 { 1568 tmbchar indent_buf[ 32 ]; 1569 uint indent; 1570 1571 while (node) 1572 { 1573 if ( nodeIsBLOCKQUOTE(node) && node->implicit ) 1574 { 1575 indent = 1; 1576 1577 while( HasOneChild(node) && 1578 nodeIsBLOCKQUOTE(node->content) && 1579 node->implicit) 1580 { 1581 ++indent; 1582 StripOnlyChild( doc, node ); 1583 } 1584 1585 if (node->content) 1586 BQ2Div( doc, node->content ); 1587 1588 tmbsnprintf(indent_buf, sizeof(indent_buf), "margin-left: %dem", 1589 2*indent); 1590 1591 RenameElem( node, TidyTag_DIV ); 1592 AddStyleProperty(doc, node, indent_buf ); 1593 } 1594 else if (node->content) 1595 BQ2Div( doc, node->content ); 1596 1597 node = node->next; 1598 } 1599 } 1600 1601 1602 Node* FindEnclosingCell( TidyDocImpl* ARG_UNUSED(doc), Node *node) 1603 { 1604 Node *check; 1605 1606 for ( check=node; check; check = check->parent ) 1607 { 1608 if ( nodeIsTD(check) ) 1609 return check; 1610 } 1611 return NULL; 1612 } 1613 1614 /* node is <![if ...]> prune up to <![endif]> */ 1615 static Node* PruneSection( TidyDocImpl* doc, Node *node ) 1616 { 1617 Lexer* lexer = doc->lexer; 1618 1619 for (;;) 1620 { 1621 ctmbstr lexbuf = lexer->lexbuf + node->start; 1622 if ( tmbstrncmp(lexbuf, "if !supportEmptyParas", 21) == 0 ) 1623 { 1624 Node* cell = FindEnclosingCell( doc, node ); 1625 if ( cell ) 1626 { 1627 /* Need to put &nbsp; into cell so it doesn't look weird 1628 */ 1629 Node* nbsp = NewLiteralTextNode( lexer, "\240" ); 1630 assert( (byte)'\240' == (byte)160 ); 1631 InsertNodeBeforeElement( node, nbsp ); 1632 } 1633 } 1634 1635 /* discard node and returns next */ 1636 node = DiscardElement( doc, node ); 1637 1638 if (node == NULL) 1639 return NULL; 1640 1641 if (node->type == SectionTag) 1642 { 1643 if (tmbstrncmp(lexer->lexbuf + node->start, "if", 2) == 0) 1644 { 1645 node = PruneSection( doc, node ); 1646 continue; 1647 } 1648 1649 if (tmbstrncmp(lexer->lexbuf + node->start, "endif", 5) == 0) 1650 { 1651 node = DiscardElement( doc, node ); 1652 break; 1653 } 1654 } 1655 } 1656 1657 return node; 1658 } 1659 1660 void DropSections( TidyDocImpl* doc, Node* node ) 1661 { 1662 Lexer* lexer = doc->lexer; 1663 while (node) 1664 { 1665 if (node->type == SectionTag) 1666 { 1667 /* prune up to matching endif */ 1668 if ((tmbstrncmp(lexer->lexbuf + node->start, "if", 2) == 0) && 1669 (tmbstrncmp(lexer->lexbuf + node->start, "if !vml", 7) != 0)) /* #444394 - fix 13 Sep 01 */ 1670 { 1671 node = PruneSection( doc, node ); 1672 continue; 1673 } 1674 1675 /* discard others as well */ 1676 node = DiscardElement( doc, node ); 1677 continue; 1678 } 1679 1680 if (node->content) 1681 DropSections( doc, node->content ); 1682 1683 node = node->next; 1684 } 1685 } 1686 1687 static void PurgeWord2000Attributes( TidyDocImpl* ARG_UNUSED(doc), Node* node ) 1688 { 1689 AttVal *attr, *next, *prev = NULL; 1690 1691 for ( attr = node->attributes; attr; attr = next ) 1692 { 1693 next = attr->next; 1694 1695 /* special check for class="Code" denoting pre text */ 1696 /* Pass thru user defined styles as HTML class names */ 1697 if (attrIsCLASS(attr)) 1698 { 1699 if (AttrValueIs(attr, "Code") || 1700 tmbstrncmp(attr->value, "Mso", 3) != 0 ) 1701 { 1702 prev = attr; 1703 continue; 1704 } 1705 } 1706 1707 if (attrIsCLASS(attr) || 1708 attrIsSTYLE(attr) || 1709 attrIsLANG(attr) || 1710 ( (attrIsHEIGHT(attr) || attrIsWIDTH(attr)) && 1711 (nodeIsTD(node) || nodeIsTR(node) || nodeIsTH(node)) ) || 1712 (attr->attribute && tmbstrncmp(attr->attribute, "x:", 2) == 0) ) 1713 { 1714 if (prev) 1715 prev->next = next; 1716 else 1717 node->attributes = next; 1718 1719 FreeAttribute( doc, attr ); 1720 } 1721 else 1722 prev = attr; 1723 } 1724 } 1725 1726 /* Word2000 uses span excessively, so we strip span out */ 1727 static Node* StripSpan( TidyDocImpl* doc, Node* span ) 1728 { 1729 Node *node, *prev = NULL, *content; 1730 1731 /* 1732 deal with span elements that have content 1733 by splicing the content in place of the span 1734 after having processed it 1735 */ 1736 1737 CleanWord2000( doc, span->content ); 1738 content = span->content; 1739 1740 if (span->prev) 1741 prev = span->prev; 1742 else if (content) 1743 { 1744 node = content; 1745 content = content->next; 1746 RemoveNode(node); 1747 InsertNodeBeforeElement(span, node); 1748 prev = node; 1749 } 1750 1751 while (content) 1752 { 1753 node = content; 1754 content = content->next; 1755 RemoveNode(node); 1756 InsertNodeAfterElement(prev, node); 1757 prev = node; 1758 } 1759 1760 if (span->next == NULL) 1761 span->parent->last = prev; 1762 1763 node = span->next; 1764 span->content = NULL; 1765 DiscardElement( doc, span ); 1766 return node; 1767 } 1768 1769 /* map non-breaking spaces to regular spaces */ 1770 void NormalizeSpaces(Lexer *lexer, Node *node) 1771 { 1772 while ( node ) 1773 { 1774 if ( node->content ) 1775 NormalizeSpaces( lexer, node->content ); 1776 1777 if (nodeIsText(node)) 1778 { 1779 uint i, c; 1780 tmbstr p = lexer->lexbuf + node->start; 1781 1782 for (i = node->start; i < node->end; ++i) 1783 { 1784 c = (byte) lexer->lexbuf[i]; 1785 1786 /* look for UTF-8 multibyte character */ 1787 if ( c > 0x7F ) 1788 i += GetUTF8( lexer->lexbuf + i, &c ); 1789 1790 if ( c == 160 ) 1791 c = ' '; 1792 1793 p = PutUTF8(p, c); 1794 } 1795 node->end = p - lexer->lexbuf; 1796 } 1797 1798 node = node->next; 1799 } 1800 } 1801 1802 /* used to hunt for hidden preformatted sections */ 1803 Bool NoMargins(Node *node) 1804 { 1805 AttVal *attval = AttrGetById(node, TidyAttr_STYLE); 1806 1807 if ( !AttrHasValue(attval) ) 1808 return no; 1809 1810 /* search for substring "margin-top: 0" */ 1811 if (!tmbsubstr(attval->value, "margin-top: 0")) 1812 return no; 1813 1814 /* search for substring "margin-bottom: 0" */ 1815 if (!tmbsubstr(attval->value, "margin-bottom: 0")) 1816 return no; 1817 1818 return yes; 1819 } 1820 1821 /* does element have a single space as its content? */ 1822 static Bool SingleSpace( Lexer* lexer, Node* node ) 1823 { 1824 if ( node->content ) 1825 { 1826 node = node->content; 1827 1828 if ( node->next != NULL ) 1829 return no; 1830 1831 if ( node->type != TextNode ) 1832 return no; 1833 1834 if ( (node->end - node->start) == 1 && 1835 lexer->lexbuf[node->start] == ' ' ) 1836 return yes; 1837 1838 if ( (node->end - node->start) == 2 ) 1839 { 1840 uint c = 0; 1841 GetUTF8( lexer->lexbuf + node->start, &c ); 1842 if ( c == 160 ) 1843 return yes; 1844 } 1845 } 1846 1847 return no; 1848 } 1849 1850 /* 1851 This is a major clean up to strip out all the extra stuff you get 1852 when you save as web page from Word 2000. It doesn't yet know what 1853 to do with VML tags, but these will appear as errors unless you 1854 declare them as new tags, such as o:p which needs to be declared 1855 as inline. 1856 */ 1857 void CleanWord2000( TidyDocImpl* doc, Node *node) 1858 { 1859 /* used to a list from a sequence of bulletted p's */ 1860 Lexer* lexer = doc->lexer; 1861 Node* list = NULL; 1862 1863 while ( node ) 1864 { 1865 /* get rid of Word's xmlns attributes */ 1866 if ( nodeIsHTML(node) ) 1867 { 1868 /* check that it's a Word 2000 document */ 1869 if ( !GetAttrByName(node, "xmlns:o") && 1870 !cfgBool(doc, TidyMakeBare) ) 1871 return; 1872 1873 FreeAttrs( doc, node ); 1874 } 1875 1876 /* fix up preformatted sections by looking for a 1877 ** sequence of paragraphs with zero top/bottom margin 1878 */ 1879 if ( nodeIsP(node) ) 1880 { 1881 if (NoMargins(node)) 1882 { 1883 Node *pre, *next; 1884 CoerceNode(doc, node, TidyTag_PRE, no, yes); 1885 1886 PurgeWord2000Attributes( doc, node ); 1887 1888 if (node->content) 1889 CleanWord2000( doc, node->content ); 1890 1891 pre = node; 1892 node = node->next; 1893 1894 /* continue to strip p's */ 1895 1896 while ( nodeIsP(node) && NoMargins(node) ) 1897 { 1898 next = node->next; 1899 RemoveNode(node); 1900 InsertNodeAtEnd(pre, NewLineNode(lexer)); 1901 InsertNodeAtEnd(pre, node); 1902 StripSpan( doc, node ); 1903 node = next; 1904 } 1905 1906 if (node == NULL) 1907 break; 1908 } 1909 } 1910 1911 if (node->tag && (node->tag->model & CM_BLOCK) 1912 && SingleSpace(lexer, node)) 1913 { 1914 node = StripSpan( doc, node ); 1915 continue; 1916 } 1917 /* discard Word's style verbiage */ 1918 if ( nodeIsSTYLE(node) || nodeIsMETA(node) || 1919 node->type == CommentTag ) 1920 { 1921 node = DiscardElement( doc, node ); 1922 continue; 1923 } 1924 1925 /* strip out all span and font tags Word scatters so liberally! */ 1926 if ( nodeIsSPAN(node) || nodeIsFONT(node) ) 1927 { 1928 node = StripSpan( doc, node ); 1929 continue; 1930 } 1931 1932 if ( nodeIsLINK(node) ) 1933 { 1934 AttVal *attr = AttrGetById(node, TidyAttr_REL); 1935 1936 if (AttrValueIs(attr, "File-List")) 1937 { 1938 node = DiscardElement( doc, node ); 1939 continue; 1940 } 1941 } 1942 1943 /* discards <o:p> which encodes the paragraph mark */ 1944 if ( node->tag && tmbstrcmp(node->tag->name,"o:p")==0) 1945 { 1946 Node* next; 1947 DiscardContainer( doc, node, &next ); 1948 node = next; 1949 continue; 1950 } 1951 1952 /* discard empty paragraphs */ 1953 1954 if ( node->content == NULL && nodeIsP(node) ) 1955 { 1956 /* Use the existing function to ensure consistency */ 1957 node = TrimEmptyElement( doc, node ); 1958 continue; 1959 } 1960 1961 if ( nodeIsP(node) ) 1962 { 1963 AttVal *attr, *atrStyle; 1964 1965 attr = AttrGetById(node, TidyAttr_CLASS); 1966 atrStyle = AttrGetById(node, TidyAttr_STYLE); 1967 /* 1968 (JES) Sometimes Word marks a list item with the following hokie syntax 1969 <p class="MsoNormal" style="...;mso-list:l1 level1 lfo1; 1970 translate these into <li> 1971 */ 1972 /* map sequence of <p class="MsoListBullet"> to <ul>...</ul> */ 1973 /* map <p class="MsoListNumber"> to <ol>...</ol> */ 1974 if ( AttrValueIs(attr, "MsoListBullet") || 1975 AttrValueIs(attr, "MsoListNumber") || 1976 AttrContains(atrStyle, "mso-list:") ) 1977 { 1978 TidyTagId listType = TidyTag_UL; 1979 if (AttrValueIs(attr, "MsoListNumber")) 1980 listType = TidyTag_OL; 1981 1982 CoerceNode(doc, node, TidyTag_LI, no, yes); 1983 1984 if ( !list || TagId(list) != listType ) 1985 { 1986 const Dict* tag = LookupTagDef( listType ); 1987 list = InferredTag(doc, tag->id); 1988 InsertNodeBeforeElement(node, list); 1989 } 1990 1991 PurgeWord2000Attributes( doc, node ); 1992 1993 if ( node->content ) 1994 CleanWord2000( doc, node->content ); 1995 1996 /* remove node and append to contents of list */ 1997 RemoveNode(node); 1998 InsertNodeAtEnd(list, node); 1999 node = list; 2000 } 2001 /* map sequence of <p class="Code"> to <pre>...</pre> */ 2002 else if (AttrValueIs(attr, "Code")) 2003 { 2004 Node *br = NewLineNode(lexer); 2005 NormalizeSpaces(lexer, node->content); 2006 2007 if ( !list || TagId(list) != TidyTag_PRE ) 2008 { 2009 list = InferredTag(doc, TidyTag_PRE); 2010 InsertNodeBeforeElement(node, list); 2011 } 2012 2013 /* remove node and append to contents of list */ 2014 RemoveNode(node); 2015 InsertNodeAtEnd(list, node); 2016 StripSpan( doc, node ); 2017 InsertNodeAtEnd(list, br); 2018 node = list->next; 2019 } 2020 else 2021 list = NULL; 2022 } 2023 else 2024 list = NULL; 2025 2026 if (!node) 2027 return; 2028 2029 /* strip out style and class attributes */ 2030 if (nodeIsElement(node)) 2031 PurgeWord2000Attributes( doc, node ); 2032 2033 if (node->content) 2034 CleanWord2000( doc, node->content ); 2035 2036 node = node->next; 2037 } 2038 } 2039 2040 Bool IsWord2000( TidyDocImpl* doc ) 2041 { 2042 AttVal *attval; 2043 Node *node, *head; 2044 Node *html = FindHTML( doc ); 2045 2046 if (html && GetAttrByName(html, "xmlns:o")) 2047 return yes; 2048 2049 /* search for <meta name="GENERATOR" content="Microsoft ..."> */ 2050 head = FindHEAD( doc ); 2051 2052 if (head) 2053 { 2054 for (node = head->content; node; node = node->next) 2055 { 2056 if ( !nodeIsMETA(node) ) 2057 continue; 2058 2059 attval = AttrGetById( node, TidyAttr_NAME ); 2060 2061 if ( !AttrValueIs(attval, "generator") ) 2062 continue; 2063 2064 attval = AttrGetById( node, TidyAttr_CONTENT ); 2065 2066 if ( AttrContains(attval, "Microsoft") ) 2067 return yes; 2068 } 2069 } 2070 2071 return no; 2072 } 2073 2074 /* where appropriate move object elements from head to body */ 2075 void BumpObject( TidyDocImpl* doc, Node *html ) 2076 { 2077 Node *node, *next, *head = NULL, *body = NULL; 2078 2079 if (!html) 2080 return; 2081 2082 for ( node = html->content; node != NULL; node = node->next ) 2083 { 2084 if ( nodeIsHEAD(node) ) 2085 head = node; 2086 2087 if ( nodeIsBODY(node) ) 2088 body = node; 2089 } 2090 2091 if ( head != NULL && body != NULL ) 2092 { 2093 for (node = head->content; node != NULL; node = next) 2094 { 2095 next = node->next; 2096 2097 if ( nodeIsOBJECT(node) ) 2098 { 2099 Node *child; 2100 Bool bump = no; 2101 2102 for (child = node->content; child != NULL; child = child->next) 2103 { 2104 /* bump to body unless content is param */ 2105 if ( (nodeIsText(child) && !IsBlank(doc->lexer, node)) 2106 || !nodeIsPARAM(child) ) 2107 { 2108 bump = yes; 2109 break; 2110 } 2111 } 2112 2113 if ( bump ) 2114 { 2115 RemoveNode( node ); 2116 InsertNodeAtStart( body, node ); 2117 } 2118 } 2119 } 2120 } 2121 } 2122 2123 /* This is disabled due to http://tidy.sf.net/bug/681116 */ 2124 #if 0 2125 void FixBrakes( TidyDocImpl* pDoc, Node *pParent ) 2126 { 2127 Node *pNode; 2128 Bool bBRDeleted = no; 2129 2130 if (NULL == pParent) 2131 return; 2132 2133 /* First, check the status of All My Children */ 2134 pNode = pParent->content; 2135 while (NULL != pNode ) 2136 { 2137 /* The node may get trimmed, so save the next pointer, if any */ 2138 Node *pNext = pNode->next; 2139 FixBrakes( pDoc, pNode ); 2140 pNode = pNext; 2141 } 2142 2143 2144 /* As long as my last child is a <br />, move it to my last peer */ 2145 if ( nodeCMIsBlock( pParent )) 2146 { 2147 for ( pNode = pParent->last; 2148 NULL != pNode && nodeIsBR( pNode ); 2149 pNode = pParent->last ) 2150 { 2151 if ( NULL == pNode->attributes && no == bBRDeleted ) 2152 { 2153 DiscardElement( pDoc, pNode ); 2154 bBRDeleted = yes; 2155 } 2156 else 2157 { 2158 RemoveNode( pNode ); 2159 InsertNodeAfterElement( pParent, pNode ); 2160 } 2161 } 2162 TrimEmptyElement( pDoc, pParent ); 2163 } 2164 } 2165 #endif 2166 2167 void VerifyHTTPEquiv(TidyDocImpl* pDoc, Node *head) 2168 { 2169 Node *pNode; 2170 StyleProp *pFirstProp = NULL, *pLastProp = NULL, *prop = NULL; 2171 tmbstr s, pszBegin, pszEnd; 2172 ctmbstr enc = GetEncodingNameFromTidyId(cfg(pDoc, TidyOutCharEncoding)); 2173 2174 if (!enc) 2175 return; 2176 2177 if (!nodeIsHEAD(head)) 2178 head = FindHEAD(pDoc); 2179 2180 if (!head) 2181 return; 2182 2183 /* Find any <meta http-equiv='Content-Type' content='...' /> */ 2184 for (pNode = head->content; NULL != pNode; pNode = pNode->next) 2185 { 2186 AttVal* httpEquiv = AttrGetById(pNode, TidyAttr_HTTP_EQUIV); 2187 AttVal* metaContent = AttrGetById(pNode, TidyAttr_CONTENT); 2188 2189 if ( !nodeIsMETA(pNode) || !metaContent || 2190 !AttrValueIs(httpEquiv, "Content-Type") ) 2191 continue; 2192 2193 pszBegin = s = tmbstrdup( metaContent->value ); 2194 while (pszBegin && *pszBegin) 2195 { 2196 while (isspace( *pszBegin )) 2197 pszBegin++; 2198 pszEnd = pszBegin; 2199 while ('\0' != *pszEnd && ';' != *pszEnd) 2200 pszEnd++; 2201 if (';' == *pszEnd ) 2202 *(pszEnd++) = '\0'; 2203 if (pszEnd > pszBegin) 2204 { 2205 prop = (StyleProp *)MemAlloc(sizeof(StyleProp)); 2206 prop->name = tmbstrdup( pszBegin ); 2207 prop->value = NULL; 2208 prop->next = NULL; 2209 2210 if (NULL != pLastProp) 2211 pLastProp->next = prop; 2212 else 2213 pFirstProp = prop; 2214 2215 pLastProp = prop; 2216 pszBegin = pszEnd; 2217 } 2218 } 2219 MemFree( s ); 2220 2221 /* find the charset property */ 2222 for (prop = pFirstProp; NULL != prop; prop = prop->next) 2223 { 2224 if (0 != tmbstrncasecmp( prop->name, "charset", 7 )) 2225 continue; 2226 2227 MemFree( prop->name ); 2228 prop->name = (tmbstr)MemAlloc( 8 + tmbstrlen(enc) + 1 ); 2229 tmbstrcpy(prop->name, "charset="); 2230 tmbstrcpy(prop->name+8, enc); 2231 s = CreatePropString( pFirstProp ); 2232 MemFree( metaContent->value ); 2233 metaContent->value = s; 2234 break; 2235 } 2236 /* #718127, prevent memory leakage */ 2237 FreeStyleProps(pFirstProp); 2238 pFirstProp = NULL; 2239 pLastProp = NULL; 2240 } 2241 } 2242 2243 void DropComments(TidyDocImpl* doc, Node* node) 2244 { 2245 Node* next; 2246 2247 while (node) 2248 { 2249 next = node->next; 2250 2251 if (node->type == CommentTag) 2252 { 2253 RemoveNode(node); 2254 FreeNode(doc, node); 2255 node = next; 2256 continue; 2257 } 2258 2259 if (node->content) 2260 DropComments(doc, node->content); 2261 2262 node = next; 2263 } 2264 } 2265 2266 void DropFontElements(TidyDocImpl* doc, Node* node, Node **ARG_UNUSED(pnode)) 2267 { 2268 Node* next; 2269 2270 while (node) 2271 { 2272 next = node->next; 2273 2274 if (nodeIsFONT(node)) 2275 { 2276 DiscardContainer(doc, node, &next); 2277 node = next; 2278 continue; 2279 } 2280 2281 if (node->content) 2282 DropFontElements(doc, node->content, &next); 2283 2284 node = next; 2285 } 2286 } 2287 2288 void WbrToSpace(TidyDocImpl* doc, Node* node) 2289 { 2290 Node* next; 2291 2292 while (node) 2293 { 2294 next = node->next; 2295 2296 if (nodeIsWBR(node)) 2297 { 2298 Node* text; 2299 text = NewLiteralTextNode(doc->lexer, " "); 2300 InsertNodeAfterElement(node, text); 2301 RemoveNode(node); 2302 FreeNode(doc, node); 2303 node = next; 2304 continue; 2305 } 2306 2307 if (node->content) 2308 WbrToSpace(doc, node->content); 2309 2310 node = next; 2311 } 2312 } 2313 2314 /* 2315 Filters from Word and PowerPoint often use smart 2316 quotes resulting in character codes between 128 2317 and 159. Unfortunately, the corresponding HTML 4.0 2318 entities for these are not widely supported. The 2319 following converts dashes and quotation marks to 2320 the nearest ASCII equivalent. My thanks to 2321 Andrzej Novosiolov for his help with this code. 2322 2323 Note: The old code in the pretty printer applied 2324 this to all node types and attribute values while 2325 this routine applies it only to text nodes. First, 2326 Microsoft Office products rarely put the relevant 2327 characters into these tokens, second support for 2328 them is much better now and last but not least, it 2329 can be harmful to replace these characters since 2330 US-ASCII quote marks are often used as syntax 2331 characters, a simple 2332 2333 <a onmouseover="alert('&#x2018;')">...</a> 2334 2335 would be broken if the U+2018 is replaced by "'". 2336 The old code would neither take care whether the 2337 quote mark is already used as delimiter, 2338 2339 <p title='&#x2018;'>...</p> 2340 2341 got 2342 2343 <p title='''>...</p> 2344 2345 Since browser support is much better nowadays and 2346 high-quality typography is better than ASCII it'd 2347 be probably a good idea to drop the feature... 2348 */ 2349 void DowngradeTypography(TidyDocImpl* doc, Node* node) 2350 { 2351 Node* next; 2352 Lexer* lexer = doc->lexer; 2353 2354 while (node) 2355 { 2356 next = node->next; 2357 2358 if (nodeIsText(node)) 2359 { 2360 uint i, c; 2361 tmbstr p = lexer->lexbuf + node->start; 2362 2363 for (i = node->start; i < node->end; ++i) 2364 { 2365 c = (unsigned char) lexer->lexbuf[i]; 2366 2367 if (c > 0x7F) 2368 i += GetUTF8(lexer->lexbuf + i, &c); 2369 2370 if (c >= 0x2013 && c <= 0x201E) 2371 { 2372 switch (c) 2373 { 2374 case 0x2013: /* en dash */ 2375 case 0x2014: /* em dash */ 2376 c = '-'; 2377 break; 2378 case 0x2018: /* left single quotation mark */ 2379 case 0x2019: /* right single quotation mark */ 2380 case 0x201A: /* single low-9 quotation mark */ 2381 c = '\''; 2382 break; 2383 case 0x201C: /* left double quotation mark */ 2384 case 0x201D: /* right double quotation mark */ 2385 case 0x201E: /* double low-9 quotation mark */ 2386 c = '"'; 2387 break; 2388 } 2389 } 2390 2391 p = PutUTF8(p, c); 2392 } 2393 2394 node->end = p - lexer->lexbuf; 2395 } 2396 2397 if (node->content) 2398 DowngradeTypography(doc, node->content); 2399 2400 node = next; 2401 } 2402 } 2403 2404 void ReplacePreformattedSpaces(TidyDocImpl* doc, Node* node) 2405 { 2406 Node* next; 2407 2408 while (node) 2409 { 2410 next = node->next; 2411 2412 if (node->tag && node->tag->parser == ParsePre) 2413 { 2414 NormalizeSpaces(doc->lexer, node->content); 2415 node = next; 2416 continue; 2417 } 2418 2419 if (node->content) 2420 ReplacePreformattedSpaces(doc, node->content); 2421 2422 node = next; 2423 } 2424 } 2425 2426 void ConvertCDATANodes(TidyDocImpl* doc, Node* node) 2427 { 2428 Node* next; 2429 2430 while (node) 2431 { 2432 next = node->next; 2433 2434 if (node->type == CDATATag) 2435 node->type = TextNode; 2436 2437 if (node->content) 2438 ConvertCDATANodes(doc, node->content); 2439 2440 node = next; 2441 } 2442 } 2443 2444 /* 2445 FixLanguageInformation ensures that the document contains (only) 2446 the attributes for language information desired by the output 2447 document type. For example, for XHTML 1.0 documents both 2448 'xml:lang' and 'lang' are desired, for XHTML 1.1 only 'xml:lang' 2449 is desired and for HTML 4.01 only 'lang' is desired. 2450 */ 2451 void FixLanguageInformation(TidyDocImpl* doc, Node* node, Bool wantXmlLang, Bool wantLang) 2452 { 2453 Node* next; 2454 2455 while (node) 2456 { 2457 next = node->next; 2458 2459 /* todo: report modifications made here to the report system */ 2460 2461 if (nodeIsElement(node)) 2462 { 2463 AttVal* lang = AttrGetById(node, TidyAttr_LANG); 2464 AttVal* xmlLang = AttrGetById(node, TidyAttr_XML_LANG); 2465 2466 if (lang && xmlLang) 2467 { 2468 /* 2469 todo: check whether both attributes are in sync, 2470 here or elsewhere, where elsewhere is probably 2471 preferable. 2472 AD - March 2005: not mandatory according the standards. 2473 */ 2474 } 2475 else if (lang && wantXmlLang) 2476 { 2477 if (NodeAttributeVersions( node, TidyAttr_XML_LANG ) 2478 & doc->lexer->versionEmitted) 2479 RepairAttrValue(doc, node, "xml:lang", lang->value); 2480 } 2481 else if (xmlLang && wantLang) 2482 { 2483 if (NodeAttributeVersions( node, TidyAttr_LANG ) 2484 & doc->lexer->versionEmitted) 2485 RepairAttrValue(doc, node, "lang", xmlLang->value); 2486 } 2487 2488 if (lang && !wantLang) 2489 RemoveAttribute(doc, node, lang); 2490 2491 if (xmlLang && !wantXmlLang) 2492 RemoveAttribute(doc, node, xmlLang); 2493 } 2494 2495 if (node->content) 2496 FixLanguageInformation(doc, node->content, wantXmlLang, wantLang); 2497 2498 node = next; 2499 } 2500 } 2501 2502 /* 2503 Set/fix/remove <html xmlns='...'> 2504 */ 2505 void FixXhtmlNamespace(TidyDocImpl* doc, Bool wantXmlns) 2506 { 2507 Node* html = FindHTML(doc); 2508 AttVal* xmlns; 2509 2510 if (!html) 2511 return; 2512 2513 xmlns = AttrGetById(html, TidyAttr_XMLNS); 2514 2515 if (wantXmlns) 2516 { 2517 if (!AttrValueIs(xmlns, XHTML_NAMESPACE)) 2518 RepairAttrValue(doc, html, "xmlns", XHTML_NAMESPACE); 2519 } 2520 else if (xmlns) 2521 { 2522 RemoveAttribute(doc, html, xmlns); 2523 } 2524 } 2525 2526 /* 2527 ... 2528 */ 2529 void FixAnchors(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId) 2530 { 2531 Node* next; 2532 2533 while (node) 2534 { 2535 next = node->next; 2536 2537 if (IsAnchorElement(doc, node)) 2538 { 2539 AttVal *name = AttrGetById(node, TidyAttr_NAME); 2540 AttVal *id = AttrGetById(node, TidyAttr_ID); 2541 2542 /* todo: how are empty name/id attributes handled? */ 2543 2544 if (name && id) 2545 { 2546 Bool NameHasValue = AttrHasValue(name); 2547 Bool IdHasValue = AttrHasValue(id); 2548 if ( (NameHasValue != IdHasValue) || 2549 (NameHasValue && IdHasValue && 2550 tmbstrcmp(name->value, id->value) != 0 ) ) 2551 ReportAttrError( doc, node, name, ID_NAME_MISMATCH); 2552 } 2553 else if (name && wantId) 2554 { 2555 if (NodeAttributeVersions( node, TidyAttr_ID ) 2556 & doc->lexer->versionEmitted) 2557 { 2558 if (IsValidHTMLID(name->value)) 2559 { 2560 RepairAttrValue(doc, node, "id", name->value); 2561 } 2562 else 2563 { 2564 ReportAttrError(doc, node, name, INVALID_XML_ID); 2565 } 2566 } 2567 } 2568 else if (id && wantName) 2569 { 2570 if (NodeAttributeVersions( node, TidyAttr_NAME ) 2571 & doc->lexer->versionEmitted) 2572 /* todo: do not assume id is valid */ 2573 RepairAttrValue(doc, node, "name", id->value); 2574 } 2575 2576 if (id && !wantId) 2577 RemoveAttribute(doc, node, id); 2578 2579 if (name && !wantName) 2580 RemoveAttribute(doc, node, name); 2581 2582 if (AttrGetById(node, TidyAttr_NAME) == NULL && 2583 AttrGetById(node, TidyAttr_ID) == NULL) 2584 RemoveAnchorByNode(doc, node); 2585 } 2586 2587 if (node->content) 2588 FixAnchors(doc, node->content, wantName, wantId); 2589 2590 node = next; 2591 } 2592 } 2593

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.