~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

TidyLib
tidy/src/pprint.c

Version: ~ [ 1.0 ] ~

** Warning: Cannot open xref database.

1 /* 2 pprint.c -- pretty print parse tree 3 4 (c) 1998-2005 (W3C) MIT, ERCIM, Keio University 5 See tidy.h for the copyright notice. 6 7 CVS Info : 8 9 $Author: arnaud02 $ 10 $Date: 2005/08/02 10:07:29 $ 11 $Revision: 1.104 $ 12 13 */ 14 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 19 #include "pprint.h" 20 #include "tidy-int.h" 21 #include "parser.h" 22 #include "entities.h" 23 #include "tmbstr.h" 24 #include "utf8.h" 25 26 /* 27 Block-level and unknown elements are printed on 28 new lines and their contents indented 2 spaces 29 30 Inline elements are printed inline. 31 32 Inline content is wrapped on spaces (except in 33 attribute values or preformatted text, after 34 start tags and before end tags 35 */ 36 37 static void PPrintAsp( TidyDocImpl* doc, uint indent, Node* node ); 38 static void PPrintJste( TidyDocImpl* doc, uint indent, Node* node ); 39 static void PPrintPhp( TidyDocImpl* doc, uint indent, Node* node ); 40 static int TextEndsWithNewline( Lexer *lexer, Node *node, uint mode ); 41 static int TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode ); 42 static Bool InsideHead( TidyDocImpl* doc, Node *node ); 43 static Bool ShouldIndent( TidyDocImpl* doc, Node *node ); 44 45 #if SUPPORT_ASIAN_ENCODINGS 46 /* #431953 - start RJ Wraplen adjusted for smooth international ride */ 47 48 uint CWrapLen( TidyDocImpl* doc, uint ind ) 49 { 50 ctmbstr lang = cfgStr( doc, TidyLanguage ); 51 uint wraplen = cfg( doc, TidyWrapLen ); 52 53 if ( !tmbstrcasecmp(lang, "zh") ) 54 /* Chinese characters take two positions on a fixed-width screen */ 55 /* It would be more accurate to keep a parallel linelen and wraphere 56 incremented by 2 for Chinese characters and 1 otherwise, but this 57 is way simpler. 58 */ 59 return (ind + (( wraplen - ind ) / 2)) ; 60 61 if ( !tmbstrcasecmp(lang, "ja") ) 62 /* average Japanese text is 30% kanji */ 63 return (ind + ((( wraplen - ind ) * 7) / 10)) ; 64 65 return wraplen; 66 } 67 68 typedef enum 69 { 70 UC00, /* None */ 71 UCPC, /* Punctuation, Connector */ 72 UCPD, /* Punctuation, Dash */ 73 UCPE, /* Punctuation, Close */ 74 UCPS, /* Punctuation, Open */ 75 UCPI, /* Punctuation, Initial quote */ 76 UCPF, /* Punctuation, Final quote */ 77 UCPO, /* Punctuation, Other */ 78 UCZS, /* Separator, Space */ 79 UCZL, /* Separator, Line */ 80 UCZP /* Separator, Paragraph */ 81 } UnicodeCategory; 82 83 /* 84 From the original code, the following characters are removed: 85 86 U+2011 (non-breaking hyphen) 87 U+202F (narrow non-break space) 88 U+2044 (fraction slash) 89 U+200B (zero width space) 90 ...... (bidi formatting control characters) 91 92 U+2011 and U+202F are non-breaking, U+2044 is a Sm character, 93 U+200B is a non-visible space, wrapping after it would make 94 this space visible, bidi should be done using HTML features 95 and the characters are neither Px or Zx. 96 97 The following Unicode 3.0 punctuation characters are added: 98 99 U+2048 (question exclamation mark) 100 U+2049 (exclamation question mark) 101 U+204A (tironian sign et) 102 U+204B (reversed pilcrow sign) 103 U+204C (black leftwards bullet) 104 U+204D (black rightwards bullet) 105 U+3030 (wavy dash) 106 U+30FB (katakana middle dot) 107 U+FE63 (small hyphen-minus) 108 U+FE68 (small reverse solidus) 109 U+FF3F (fullwidth low line) 110 U+FF5B (fullwidth left curly bracket) 111 U+FF5D (fullwidth right curly bracket) 112 113 Other additional characters were not included in Unicode 3.0. 114 The table is based on Unicode 4.0. It must include only those 115 characters marking a wrapping point, "before" if the general 116 category is UCPS or UCPI, otherwise "after". 117 */ 118 static struct _unicode4cat 119 { 120 unsigned long code; 121 UnicodeCategory category; 122 } const unicode4cat[] = 123 { 124 #if 0 125 { 0x037E, UCPO }, { 0x0387, UCPO }, { 0x055A, UCPO }, { 0x055B, UCPO }, 126 { 0x055C, UCPO }, { 0x055D, UCPO }, { 0x055E, UCPO }, { 0x055F, UCPO }, 127 { 0x0589, UCPO }, { 0x058A, UCPD }, { 0x05BE, UCPO }, { 0x05C0, UCPO }, 128 { 0x05C3, UCPO }, { 0x05F3, UCPO }, { 0x05F4, UCPO }, { 0x060C, UCPO }, 129 { 0x060D, UCPO }, { 0x061B, UCPO }, { 0x061F, UCPO }, { 0x066A, UCPO }, 130 { 0x066B, UCPO }, { 0x066C, UCPO }, { 0x066D, UCPO }, { 0x06D4, UCPO }, 131 { 0x0700, UCPO }, { 0x0701, UCPO }, { 0x0702, UCPO }, { 0x0703, UCPO }, 132 { 0x0704, UCPO }, { 0x0705, UCPO }, { 0x0706, UCPO }, { 0x0707, UCPO }, 133 { 0x0708, UCPO }, { 0x0709, UCPO }, { 0x070A, UCPO }, { 0x070B, UCPO }, 134 { 0x070C, UCPO }, { 0x070D, UCPO }, { 0x0964, UCPO }, { 0x0965, UCPO }, 135 { 0x0970, UCPO }, { 0x0DF4, UCPO }, { 0x0E4F, UCPO }, { 0x0E5A, UCPO }, 136 { 0x0E5B, UCPO }, { 0x0F04, UCPO }, { 0x0F05, UCPO }, { 0x0F06, UCPO }, 137 { 0x0F07, UCPO }, { 0x0F08, UCPO }, { 0x0F09, UCPO }, { 0x0F0A, UCPO }, 138 { 0x0F0B, UCPO }, { 0x0F0D, UCPO }, { 0x0F0E, UCPO }, { 0x0F0F, UCPO }, 139 { 0x0F10, UCPO }, { 0x0F11, UCPO }, { 0x0F12, UCPO }, { 0x0F3A, UCPS }, 140 { 0x0F3B, UCPE }, { 0x0F3C, UCPS }, { 0x0F3D, UCPE }, { 0x0F85, UCPO }, 141 { 0x104A, UCPO }, { 0x104B, UCPO }, { 0x104C, UCPO }, { 0x104D, UCPO }, 142 { 0x104E, UCPO }, { 0x104F, UCPO }, { 0x10FB, UCPO }, { 0x1361, UCPO }, 143 { 0x1362, UCPO }, { 0x1363, UCPO }, { 0x1364, UCPO }, { 0x1365, UCPO }, 144 { 0x1366, UCPO }, { 0x1367, UCPO }, { 0x1368, UCPO }, { 0x166D, UCPO }, 145 { 0x166E, UCPO }, { 0x1680, UCZS }, { 0x169B, UCPS }, { 0x169C, UCPE }, 146 { 0x16EB, UCPO }, { 0x16EC, UCPO }, { 0x16ED, UCPO }, { 0x1735, UCPO }, 147 { 0x1736, UCPO }, { 0x17D4, UCPO }, { 0x17D5, UCPO }, { 0x17D6, UCPO }, 148 { 0x17D8, UCPO }, { 0x17D9, UCPO }, { 0x17DA, UCPO }, { 0x1800, UCPO }, 149 { 0x1801, UCPO }, { 0x1802, UCPO }, { 0x1803, UCPO }, { 0x1804, UCPO }, 150 { 0x1805, UCPO }, { 0x1806, UCPD }, { 0x1807, UCPO }, { 0x1808, UCPO }, 151 { 0x1809, UCPO }, { 0x180A, UCPO }, { 0x180E, UCZS }, { 0x1944, UCPO }, 152 { 0x1945, UCPO }, 153 #endif 154 { 0x2000, UCZS }, { 0x2001, UCZS }, { 0x2002, UCZS }, { 0x2003, UCZS }, 155 { 0x2004, UCZS }, { 0x2005, UCZS }, { 0x2006, UCZS }, { 0x2008, UCZS }, 156 { 0x2009, UCZS }, { 0x200A, UCZS }, { 0x2010, UCPD }, { 0x2012, UCPD }, 157 { 0x2013, UCPD }, { 0x2014, UCPD }, { 0x2015, UCPD }, { 0x2016, UCPO }, 158 { 0x2017, UCPO }, { 0x2018, UCPI }, { 0x2019, UCPF }, { 0x201A, UCPS }, 159 { 0x201B, UCPI }, { 0x201C, UCPI }, { 0x201D, UCPF }, { 0x201E, UCPS }, 160 { 0x201F, UCPI }, { 0x2020, UCPO }, { 0x2021, UCPO }, { 0x2022, UCPO }, 161 { 0x2023, UCPO }, { 0x2024, UCPO }, { 0x2025, UCPO }, { 0x2026, UCPO }, 162 { 0x2027, UCPO }, { 0x2028, UCZL }, { 0x2029, UCZP }, { 0x2030, UCPO }, 163 { 0x2031, UCPO }, { 0x2032, UCPO }, { 0x2033, UCPO }, { 0x2034, UCPO }, 164 { 0x2035, UCPO }, { 0x2036, UCPO }, { 0x2037, UCPO }, { 0x2038, UCPO }, 165 { 0x2039, UCPI }, { 0x203A, UCPF }, { 0x203B, UCPO }, { 0x203C, UCPO }, 166 { 0x203D, UCPO }, { 0x203E, UCPO }, { 0x203F, UCPC }, { 0x2040, UCPC }, 167 { 0x2041, UCPO }, { 0x2042, UCPO }, { 0x2043, UCPO }, { 0x2045, UCPS }, 168 { 0x2046, UCPE }, { 0x2047, UCPO }, { 0x2048, UCPO }, { 0x2049, UCPO }, 169 { 0x204A, UCPO }, { 0x204B, UCPO }, { 0x204C, UCPO }, { 0x204D, UCPO }, 170 { 0x204E, UCPO }, { 0x204F, UCPO }, { 0x2050, UCPO }, { 0x2051, UCPO }, 171 { 0x2053, UCPO }, { 0x2054, UCPC }, { 0x2057, UCPO }, { 0x205F, UCZS }, 172 { 0x207D, UCPS }, { 0x207E, UCPE }, { 0x208D, UCPS }, { 0x208E, UCPE }, 173 { 0x2329, UCPS }, { 0x232A, UCPE }, { 0x23B4, UCPS }, { 0x23B5, UCPE }, 174 { 0x23B6, UCPO }, { 0x2768, UCPS }, { 0x2769, UCPE }, { 0x276A, UCPS }, 175 { 0x276B, UCPE }, { 0x276C, UCPS }, { 0x276D, UCPE }, { 0x276E, UCPS }, 176 { 0x276F, UCPE }, { 0x2770, UCPS }, { 0x2771, UCPE }, { 0x2772, UCPS }, 177 { 0x2773, UCPE }, { 0x2774, UCPS }, { 0x2775, UCPE }, { 0x27E6, UCPS }, 178 { 0x27E7, UCPE }, { 0x27E8, UCPS }, { 0x27E9, UCPE }, { 0x27EA, UCPS }, 179 { 0x27EB, UCPE }, { 0x2983, UCPS }, { 0x2984, UCPE }, { 0x2985, UCPS }, 180 { 0x2986, UCPE }, { 0x2987, UCPS }, { 0x2988, UCPE }, { 0x2989, UCPS }, 181 { 0x298A, UCPE }, { 0x298B, UCPS }, { 0x298C, UCPE }, { 0x298D, UCPS }, 182 { 0x298E, UCPE }, { 0x298F, UCPS }, { 0x2990, UCPE }, { 0x2991, UCPS }, 183 { 0x2992, UCPE }, { 0x2993, UCPS }, { 0x2994, UCPE }, { 0x2995, UCPS }, 184 { 0x2996, UCPE }, { 0x2997, UCPS }, { 0x2998, UCPE }, { 0x29D8, UCPS }, 185 { 0x29D9, UCPE }, { 0x29DA, UCPS }, { 0x29DB, UCPE }, { 0x29FC, UCPS }, 186 { 0x29FD, UCPE }, { 0x3001, UCPO }, { 0x3002, UCPO }, { 0x3003, UCPO }, 187 { 0x3008, UCPS }, { 0x3009, UCPE }, { 0x300A, UCPS }, { 0x300B, UCPE }, 188 { 0x300C, UCPS }, { 0x300D, UCPE }, { 0x300E, UCPS }, { 0x300F, UCPE }, 189 { 0x3010, UCPS }, { 0x3011, UCPE }, { 0x3014, UCPS }, { 0x3015, UCPE }, 190 { 0x3016, UCPS }, { 0x3017, UCPE }, { 0x3018, UCPS }, { 0x3019, UCPE }, 191 { 0x301A, UCPS }, { 0x301B, UCPE }, { 0x301C, UCPD }, { 0x301D, UCPS }, 192 { 0x301E, UCPE }, { 0x301F, UCPE }, { 0x3030, UCPD }, { 0x303D, UCPO }, 193 { 0x30A0, UCPD }, { 0x30FB, UCPC }, { 0xFD3E, UCPS }, { 0xFD3F, UCPE }, 194 { 0xFE30, UCPO }, { 0xFE31, UCPD }, { 0xFE32, UCPD }, { 0xFE33, UCPC }, 195 { 0xFE34, UCPC }, { 0xFE35, UCPS }, { 0xFE36, UCPE }, { 0xFE37, UCPS }, 196 { 0xFE38, UCPE }, { 0xFE39, UCPS }, { 0xFE3A, UCPE }, { 0xFE3B, UCPS }, 197 { 0xFE3C, UCPE }, { 0xFE3D, UCPS }, { 0xFE3E, UCPE }, { 0xFE3F, UCPS }, 198 { 0xFE40, UCPE }, { 0xFE41, UCPS }, { 0xFE42, UCPE }, { 0xFE43, UCPS }, 199 { 0xFE44, UCPE }, { 0xFE45, UCPO }, { 0xFE46, UCPO }, { 0xFE47, UCPS }, 200 { 0xFE48, UCPE }, { 0xFE49, UCPO }, { 0xFE4A, UCPO }, { 0xFE4B, UCPO }, 201 { 0xFE4C, UCPO }, { 0xFE4D, UCPC }, { 0xFE4E, UCPC }, { 0xFE4F, UCPC }, 202 { 0xFE50, UCPO }, { 0xFE51, UCPO }, { 0xFE52, UCPO }, { 0xFE54, UCPO }, 203 { 0xFE55, UCPO }, { 0xFE56, UCPO }, { 0xFE57, UCPO }, { 0xFE58, UCPD }, 204 { 0xFE59, UCPS }, { 0xFE5A, UCPE }, { 0xFE5B, UCPS }, { 0xFE5C, UCPE }, 205 { 0xFE5D, UCPS }, { 0xFE5E, UCPE }, { 0xFE5F, UCPO }, { 0xFE60, UCPO }, 206 { 0xFE61, UCPO }, { 0xFE63, UCPD }, { 0xFE68, UCPO }, { 0xFE6A, UCPO }, 207 { 0xFE6B, UCPO }, { 0xFF01, UCPO }, { 0xFF02, UCPO }, { 0xFF03, UCPO }, 208 { 0xFF05, UCPO }, { 0xFF06, UCPO }, { 0xFF07, UCPO }, { 0xFF08, UCPS }, 209 { 0xFF09, UCPE }, { 0xFF0A, UCPO }, { 0xFF0C, UCPO }, { 0xFF0D, UCPD }, 210 { 0xFF0E, UCPO }, { 0xFF0F, UCPO }, { 0xFF1A, UCPO }, { 0xFF1B, UCPO }, 211 { 0xFF1F, UCPO }, { 0xFF20, UCPO }, { 0xFF3B, UCPS }, { 0xFF3C, UCPO }, 212 { 0xFF3D, UCPE }, { 0xFF3F, UCPC }, { 0xFF5B, UCPS }, { 0xFF5D, UCPE }, 213 { 0xFF5F, UCPS }, { 0xFF60, UCPE }, { 0xFF61, UCPO }, { 0xFF62, UCPS }, 214 { 0xFF63, UCPE }, { 0xFF64, UCPO }, { 0xFF65, UCPC }, { 0x10100,UCPO }, 215 { 0x10101,UCPO }, { 0x1039F,UCPO }, 216 217 /* final entry */ 218 { 0x0000, UC00 } 219 }; 220 221 typedef enum 222 { 223 NoWrapPoint, 224 WrapBefore, 225 WrapAfter 226 } WrapPoint; 227 228 /* 229 If long lines of text have no white space as defined in HTML 4 230 (U+0009, U+000A, U+000D, U+000C, U+0020) other characters could 231 be used to determine a wrap point. Since user agents would 232 normalize the inserted newline character to a space character, 233 this wrapping behaviour would insert visual whitespace into the 234 document. 235 236 Characters of the General Category Pi and Ps in the Unicode 237 character database (opening punctuation and intial quote 238 characters) mark a wrapping point before the character, other 239 punctuation characters (Pc, Pd, Pe, Pf, and Po), breakable 240 space characters (Zs), and paragraph and line separators 241 (Zl, Zp) mark a wrap point after the character. Using this 242 function Tidy can for example pretty print 243 244 <p>....................&ldquo;...quote...&rdquo;...</p> 245 as 246 <p>....................\n&ldquo;...quote...&rdquo;...</p> 247 or 248 <p>....................&ldquo;...quote...&rdquo;\n...</p> 249 250 if the next normal wrapping point would exceed the user 251 chosen wrapping column. 252 */ 253 static WrapPoint CharacterWrapPoint(tchar c) 254 { 255 int i; 256 for (i = 0; unicode4cat[i].code && unicode4cat[i].code <= c; ++i) 257 if (unicode4cat[i].code == c) 258 /* wrapping before opening punctuation and initial quotes */ 259 if (unicode4cat[i].category == UCPS || 260 unicode4cat[i].category == UCPI) 261 return WrapBefore; 262 /* else wrapping after this character */ 263 else 264 return WrapAfter; 265 /* character has no effect on line wrapping */ 266 return NoWrapPoint; 267 } 268 269 static WrapPoint Big5WrapPoint(tchar c) 270 { 271 if ((c & 0xFF00) == 0xA100) 272 { 273 /* opening brackets have odd codes: break before them */ 274 if ( c > 0x5C && c < 0xAD && (c & 1) == 1 ) 275 return WrapBefore; 276 return WrapAfter; 277 } 278 return NoWrapPoint; 279 } 280 281 #endif /* SUPPORT_ASIAN_ENCODINGS */ 282 283 static void InitIndent( TidyIndent* ind ) 284 { 285 ind->spaces = -1; 286 ind->attrValStart = -1; 287 ind->attrStringStart = -1; 288 } 289 290 void InitPrintBuf( TidyDocImpl* doc ) 291 { 292 ClearMemory( &doc->pprint, sizeof(TidyPrintImpl) ); 293 InitIndent( &doc->pprint.indent[0] ); 294 InitIndent( &doc->pprint.indent[1] ); 295 } 296 297 void FreePrintBuf( TidyDocImpl* doc ) 298 { 299 MemFree( doc->pprint.linebuf ); 300 InitPrintBuf( doc ); 301 } 302 303 static void expand( TidyPrintImpl* pprint, uint len ) 304 { 305 uint* ip; 306 uint buflen = pprint->lbufsize; 307 308 if ( buflen == 0 ) 309 buflen = 256; 310 while ( len >= buflen ) 311 buflen *= 2; 312 313 ip = (uint*) MemRealloc( pprint->linebuf, buflen*sizeof(uint) ); 314 if ( ip ) 315 { 316 ClearMemory( ip+pprint->lbufsize, 317 (buflen-pprint->lbufsize)*sizeof(uint) ); 318 pprint->lbufsize = buflen; 319 pprint->linebuf = ip; 320 } 321 } 322 323 static uint GetSpaces( TidyPrintImpl* pprint ) 324 { 325 int spaces = pprint->indent[ 0 ].spaces; 326 return ( spaces < 0 ? 0U : (uint) spaces ); 327 } 328 static int ClearInString( TidyPrintImpl* pprint ) 329 { 330 TidyIndent *ind = pprint->indent + pprint->ixInd; 331 return ind->attrStringStart = -1; 332 } 333 static int ToggleInString( TidyPrintImpl* pprint ) 334 { 335 TidyIndent *ind = pprint->indent + pprint->ixInd; 336 Bool inString = ( ind->attrStringStart >= 0 ); 337 return ind->attrStringStart = ( inString ? -1 : (int) pprint->linelen ); 338 } 339 static Bool IsInString( TidyPrintImpl* pprint ) 340 { 341 TidyIndent *ind = pprint->indent + 0; /* Always 1st */ 342 return ( ind->attrStringStart >= 0 && 343 ind->attrStringStart < (int) pprint->linelen ); 344 } 345 static Bool IsWrapInString( TidyPrintImpl* pprint ) 346 { 347 TidyIndent *ind = pprint->indent + 0; /* Always 1st */ 348 int wrap = (int) pprint->wraphere; 349 return ( ind->attrStringStart == 0 || 350 (ind->attrStringStart > 0 && ind->attrStringStart < wrap) ); 351 } 352 353 static Bool HasMixedContent (Node *element) 354 { 355 Node * node; 356 357 if (!element) 358 return no; 359 360 for (node = element->content; node; node = node->next) 361 if ( nodeIsText(node) ) 362 return yes; 363 364 return no; 365 } 366 367 static void ClearInAttrVal( TidyPrintImpl* pprint ) 368 { 369 TidyIndent *ind = pprint->indent + pprint->ixInd; 370 ind->attrValStart = -1; 371 } 372 static int SetInAttrVal( TidyPrintImpl* pprint ) 373 { 374 TidyIndent *ind = pprint->indent + pprint->ixInd; 375 return ind->attrValStart = (int) pprint->linelen; 376 } 377 static Bool IsWrapInAttrVal( TidyPrintImpl* pprint ) 378 { 379 TidyIndent *ind = pprint->indent + 0; /* Always 1st */ 380 int wrap = (int) pprint->wraphere; 381 return ( ind->attrValStart == 0 || 382 (ind->attrValStart > 0 && ind->attrValStart < wrap) ); 383 } 384 385 static Bool WantIndent( TidyDocImpl* doc ) 386 { 387 TidyPrintImpl* pprint = &doc->pprint; 388 Bool wantIt = GetSpaces(pprint) > 0; 389 if ( wantIt ) 390 { 391 Bool indentAttrs = cfgBool( doc, TidyIndentAttributes ); 392 wantIt = ( ( !IsWrapInAttrVal(pprint) || indentAttrs ) && 393 !IsWrapInString(pprint) ); 394 } 395 return wantIt; 396 } 397 398 399 static uint WrapOff( TidyDocImpl* doc ) 400 { 401 uint saveWrap = cfg( doc, TidyWrapLen ); 402 SetOptionInt( doc, TidyWrapLen, 0xFFFFFFFF ); /* very large number */ 403 return saveWrap; 404 } 405 406 static void WrapOn( TidyDocImpl* doc, uint saveWrap ) 407 { 408 SetOptionInt( doc, TidyWrapLen, saveWrap ); 409 } 410 411 static uint WrapOffCond( TidyDocImpl* doc, Bool onoff ) 412 { 413 if ( onoff ) 414 return WrapOff( doc ); 415 return cfg( doc, TidyWrapLen ); 416 } 417 418 419 static void AddC( TidyPrintImpl* pprint, uint c, uint string_index) 420 { 421 if ( string_index + 1 >= pprint->lbufsize ) 422 expand( pprint, string_index + 1 ); 423 pprint->linebuf[string_index] = c; 424 } 425 426 static uint AddChar( TidyPrintImpl* pprint, uint c ) 427 { 428 AddC( pprint, c, pprint->linelen ); 429 return ++pprint->linelen; 430 } 431 432 static uint AddAsciiString( TidyPrintImpl* pprint, ctmbstr str, uint string_index ) 433 { 434 uint ix, len = tmbstrlen( str ); 435 if ( string_index + len >= pprint->lbufsize ) 436 expand( pprint, string_index + len ); 437 438 for ( ix=0; ix<len; ++ix ) 439 pprint->linebuf[string_index + ix] = str[ ix ]; 440 return string_index + len; 441 } 442 443 static uint AddString( TidyPrintImpl* pprint, ctmbstr str ) 444 { 445 return pprint->linelen = AddAsciiString( pprint, str, pprint->linelen ); 446 } 447 448 /* Saves current output point as the wrap point, 449 ** but only if indentation would NOT overflow 450 ** the current line. Otherwise keep previous wrap point. 451 */ 452 static Bool SetWrap( TidyDocImpl* doc, uint indent ) 453 { 454 TidyPrintImpl* pprint = &doc->pprint; 455 Bool wrap = ( indent + pprint->linelen < cfg(doc, TidyWrapLen) ); 456 if ( wrap ) 457 { 458 if ( pprint->indent[0].spaces < 0 ) 459 pprint->indent[0].spaces = indent; 460 pprint->wraphere = pprint->linelen; 461 } 462 else if ( pprint->ixInd == 0 ) 463 { 464 /* Save indent 1st time we pass the the wrap line */ 465 pprint->indent[ 1 ].spaces = indent; 466 pprint->ixInd = 1; 467 } 468 return wrap; 469 } 470 471 static void CarryOver( int* valTo, int* valFrom, uint wrapPoint ) 472 { 473 if ( *valFrom > (int) wrapPoint ) 474 { 475 *valTo = *valFrom - wrapPoint; 476 *valFrom = -1; 477 } 478 } 479 480 481 static Bool SetWrapAttr( TidyDocImpl* doc, 482 uint indent, int attrStart, int strStart ) 483 { 484 TidyPrintImpl* pprint = &doc->pprint; 485 TidyIndent *ind = pprint->indent + 0; 486 487 Bool wrap = ( indent + pprint->linelen < cfg(doc, TidyWrapLen) ); 488 if ( wrap ) 489 { 490 if ( ind[0].spaces < 0 ) 491 ind[0].spaces = indent; 492 pprint->wraphere = pprint->linelen; 493 } 494 else if ( pprint->ixInd == 0 ) 495 { 496 /* Save indent 1st time we pass the the wrap line */ 497 pprint->indent[ 1 ].spaces = indent; 498 pprint->ixInd = 1; 499 500 /* Carry over string state */ 501 CarryOver( &ind[1].attrStringStart, &ind[0].attrStringStart, pprint->wraphere ); 502 CarryOver( &ind[1].attrValStart, &ind[0].attrValStart, pprint->wraphere ); 503 } 504 ind += doc->pprint.ixInd; 505 ind->attrValStart = attrStart; 506 ind->attrStringStart = strStart; 507 return wrap; 508 } 509 510 511 /* Reset indent state after flushing a new line 512 */ 513 static void ResetLine( TidyPrintImpl* pprint ) 514 { 515 TidyIndent* ind = pprint->indent + 0; 516 if ( pprint->ixInd > 0 ) 517 { 518 ind[0] = ind[1]; 519 InitIndent( &ind[1] ); 520 } 521 522 if ( pprint->wraphere > 0 ) 523 { 524 int wrap = (int) pprint->wraphere; 525 if ( ind[0].attrStringStart > wrap ) 526 ind[0].attrStringStart -= wrap; 527 if ( ind[0].attrValStart > wrap ) 528 ind[0].attrValStart -= wrap; 529 } 530 else 531 { 532 if ( ind[0].attrStringStart > 0 ) 533 ind[0].attrStringStart = 0; 534 if ( ind[0].attrValStart > 0 ) 535 ind[0].attrValStart = 0; 536 } 537 pprint->wraphere = pprint->ixInd = 0; 538 } 539 540 /* Shift text after wrap point to 541 ** beginning of next line. 542 */ 543 static void ResetLineAfterWrap( TidyPrintImpl* pprint ) 544 { 545 if ( pprint->linelen > pprint->wraphere ) 546 { 547 uint *p = pprint->linebuf; 548 uint *q = p + pprint->wraphere; 549 uint *end = p + pprint->linelen; 550 551 if ( ! IsWrapInAttrVal(pprint) ) 552 { 553 while ( q < end && *q == ' ' ) 554 ++q, ++pprint->wraphere; 555 } 556 557 while ( q < end ) 558 *p++ = *q++; 559 560 pprint->linelen -= pprint->wraphere; 561 } 562 else 563 { 564 pprint->linelen = 0; 565 } 566 567 ResetLine( pprint ); 568 } 569 570 /* Goes ahead with writing current line up to 571 ** previously saved wrap point. Shifts unwritten 572 ** text in output buffer to beginning of next line. 573 */ 574 static void WrapLine( TidyDocImpl* doc ) 575 { 576 TidyPrintImpl* pprint = &doc->pprint; 577 uint i; 578 579 if ( pprint->wraphere == 0 ) 580 return; 581 582 if ( WantIndent(doc) ) 583 { 584 uint spaces = GetSpaces( pprint ); 585 for ( i = 0; i < spaces; ++i ) 586 WriteChar( ' ', doc->docOut ); 587 } 588 589 for ( i = 0; i < pprint->wraphere; ++i ) 590 WriteChar( pprint->linebuf[i], doc->docOut ); 591 592 if ( IsWrapInString(pprint) ) 593 WriteChar( '\\', doc->docOut ); 594 595 WriteChar( '\n', doc->docOut ); 596 ResetLineAfterWrap( pprint ); 597 } 598 599 /* Checks current output line length along with current indent. 600 ** If combined they overflow output line length, go ahead 601 ** and flush output up to the current wrap point. 602 */ 603 static Bool CheckWrapLine( TidyDocImpl* doc ) 604 { 605 TidyPrintImpl* pprint = &doc->pprint; 606 if ( GetSpaces(pprint) + pprint->linelen >= cfg(doc, TidyWrapLen) ) 607 { 608 WrapLine( doc ); 609 return yes; 610 } 611 return no; 612 } 613 614 static Bool CheckWrapIndent( TidyDocImpl* doc, uint indent ) 615 { 616 TidyPrintImpl* pprint = &doc->pprint; 617 if ( GetSpaces(pprint) + pprint->linelen >= cfg(doc, TidyWrapLen) ) 618 { 619 WrapLine( doc ); 620 if ( pprint->indent[ 0 ].spaces < 0 ) 621 pprint->indent[ 0 ].spaces = indent; 622 return yes; 623 } 624 return no; 625 } 626 627 static void WrapAttrVal( TidyDocImpl* doc ) 628 { 629 TidyPrintImpl* pprint = &doc->pprint; 630 uint i; 631 632 /* assert( IsWrapInAttrVal(pprint) ); */ 633 if ( WantIndent(doc) ) 634 { 635 uint spaces = GetSpaces( pprint ); 636 for ( i = 0; i < spaces; ++i ) 637 WriteChar( ' ', doc->docOut ); 638 } 639 640 for ( i = 0; i < pprint->wraphere; ++i ) 641 WriteChar( pprint->linebuf[i], doc->docOut ); 642 643 if ( IsWrapInString(pprint) ) 644 WriteChar( '\\', doc->docOut ); 645 else 646 WriteChar( ' ', doc->docOut ); 647 648 WriteChar( '\n', doc->docOut ); 649 ResetLineAfterWrap( pprint ); 650 } 651 652 void PFlushLine( TidyDocImpl* doc, uint indent ) 653 { 654 TidyPrintImpl* pprint = &doc->pprint; 655 656 if ( pprint->linelen > 0 ) 657 { 658 uint i; 659 660 CheckWrapLine( doc ); 661 662 if ( WantIndent(doc) ) 663 { 664 uint spaces = GetSpaces( pprint ); 665 for ( i = 0; i < spaces; ++i ) 666 WriteChar( ' ', doc->docOut ); 667 } 668 669 for ( i = 0; i < pprint->linelen; ++i ) 670 WriteChar( pprint->linebuf[i], doc->docOut ); 671 672 if ( IsInString(pprint) ) 673 WriteChar( '\\', doc->docOut ); 674 ResetLine( pprint ); 675 pprint->linelen = 0; 676 } 677 678 WriteChar( '\n', doc->docOut ); 679 pprint->indent[ 0 ].spaces = indent; 680 } 681 682 void PCondFlushLine( TidyDocImpl* doc, uint indent ) 683 { 684 TidyPrintImpl* pprint = &doc->pprint; 685 if ( pprint->linelen > 0 ) 686 { 687 uint i; 688 689 CheckWrapLine( doc ); 690 691 if ( WantIndent(doc) ) 692 { 693 uint spaces = GetSpaces( pprint ); 694 for ( i = 0; i < spaces; ++i ) 695 WriteChar(' ', doc->docOut); 696 } 697 698 for ( i = 0; i < pprint->linelen; ++i ) 699 WriteChar( pprint->linebuf[i], doc->docOut ); 700 701 if ( IsInString(pprint) ) 702 WriteChar( '\\', doc->docOut ); 703 ResetLine( pprint ); 704 705 WriteChar( '\n', doc->docOut ); 706 pprint->indent[ 0 ].spaces = indent; 707 pprint->linelen = 0; 708 } 709 } 710 711 static void PPrintChar( TidyDocImpl* doc, uint c, uint mode ) 712 { 713 tmbchar entity[128]; 714 ctmbstr p; 715 TidyPrintImpl* pprint = &doc->pprint; 716 uint outenc = cfg( doc, TidyOutCharEncoding ); 717 Bool qmark = cfgBool( doc, TidyQuoteMarks ); 718 719 if ( c == ' ' && !(mode & (PREFORMATTED | COMMENT | ATTRIBVALUE | CDATA))) 720 { 721 /* coerce a space character to a non-breaking space */ 722 if (mode & NOWRAP) 723 { 724 ctmbstr ent = "&nbsp;"; 725 /* by default XML doesn't define &nbsp; */ 726 if ( cfgBool(doc, TidyNumEntities) || cfgBool(doc, TidyXmlTags) ) 727 ent = "&#160;"; 728 AddString( pprint, ent ); 729 return; 730 } 731 else 732 pprint->wraphere = pprint->linelen; 733 } 734 735 /* comment characters are passed raw */ 736 if ( mode & (COMMENT | CDATA) ) 737 { 738 AddChar( pprint, c ); 739 return; 740 } 741 742 /* except in CDATA map < to &lt; etc. */ 743 if ( !(mode & CDATA) ) 744 { 745 if ( c == '<') 746 { 747 AddString( pprint, "&lt;" ); 748 return; 749 } 750 751 if ( c == '>') 752 { 753 AddString( pprint, "&gt;" ); 754 return; 755 } 756 757 /* 758 naked '&' chars can be left alone or 759 quoted as &amp; The latter is required 760 for XML where naked '&' are illegal. 761 */ 762 if ( c == '&' && cfgBool(doc, TidyQuoteAmpersand) ) 763 { 764 AddString( pprint, "&amp;" ); 765 return; 766 } 767 768 if ( c == '"' && qmark ) 769 { 770 AddString( pprint, "&quot;" ); 771 return; 772 } 773 774 if ( c == '\'' && qmark ) 775 { 776 AddString( pprint, "&#39;" ); 777 return; 778 } 779 780 if ( c == 160 && outenc != RAW ) 781 { 782 if ( cfgBool(doc, TidyQuoteNbsp) ) 783 { 784 if ( cfgBool(doc, TidyNumEntities) || 785 cfgBool(doc, TidyXmlTags) ) 786 AddString( pprint, "&#160;" ); 787 else 788 AddString( pprint, "&nbsp;" ); 789 } 790 else 791 AddChar( pprint, c ); 792 return; 793 } 794 } 795 796 #if SUPPORT_ASIAN_ENCODINGS 797 798 /* #431953 - start RJ */ 799 /* Handle encoding-specific issues */ 800 switch ( outenc ) 801 { 802 case UTF8: 803 #if SUPPORT_UTF16_ENCODINGS 804 case UTF16: 805 case UTF16LE: 806 case UTF16BE: 807 #endif 808 if (!(mode & PREFORMATTED) && cfg(doc, TidyPunctWrap)) 809 { 810 WrapPoint wp = CharacterWrapPoint(c); 811 if (wp == WrapBefore) 812 pprint->wraphere = pprint->linelen; 813 else if (wp == WrapAfter) 814 pprint->wraphere = pprint->linelen + 1; 815 } 816 break; 817 818 case BIG5: 819 /* Allow linebreak at Chinese punctuation characters */ 820 /* There are not many spaces in Chinese */ 821 AddChar( pprint, c ); 822 if (!(mode & PREFORMATTED) && cfg(doc, TidyPunctWrap)) 823 { 824 WrapPoint wp = Big5WrapPoint(c); 825 if (wp == WrapBefore) 826 pprint->wraphere = pprint->linelen; 827 else if (wp == WrapAfter) 828 pprint->wraphere = pprint->linelen + 1; 829 } 830 return; 831 832 case SHIFTJIS: 833 #ifndef NO_NATIVE_ISO2022_SUPPORT 834 case ISO2022: /* ISO 2022 characters are passed raw */ 835 #endif 836 case RAW: 837 AddChar( pprint, c ); 838 return; 839 } 840 /* #431953 - end RJ */ 841 842 #else /* SUPPORT_ASIAN_ENCODINGS */ 843 844 /* otherwise ISO 2022 characters are passed raw */ 845 if ( 846 #ifndef NO_NATIVE_ISO2022_SUPPORT 847 outenc == ISO2022 || 848 #endif 849 outenc == RAW ) 850 { 851 AddChar( pprint, c ); 852 return; 853 } 854 855 #endif /* SUPPORT_ASIAN_ENCODINGS */ 856 857 /* don't map latin-1 chars to entities */ 858 if ( outenc == LATIN1 ) 859 { 860 if (c > 255) /* multi byte chars */ 861 { 862 uint vers = HTMLVersion( doc ); 863 if ( !cfgBool(doc, TidyNumEntities) && (p = EntityName(c, vers)) ) 864 tmbsnprintf(entity, sizeof(entity), "&%s;", p); 865 else 866 tmbsnprintf(entity, sizeof(entity), "&#%u;", c); 867 868 AddString( pprint, entity ); 869 return; 870 } 871 872 if (c > 126 && c < 160) 873 { 874 tmbsnprintf(entity, sizeof(entity), "&#%u;", c); 875 AddString( pprint, entity ); 876 return; 877 } 878 879 AddChar( pprint, c ); 880 return; 881 } 882 883 /* don't map UTF-8 chars to entities */ 884 if ( outenc == UTF8 ) 885 { 886 AddChar( pprint, c ); 887 return; 888 } 889 890 #if SUPPORT_UTF16_ENCODINGS 891 /* don't map UTF-16 chars to entities */ 892 if ( outenc == UTF16 || outenc == UTF16LE || outenc == UTF16BE ) 893 { 894 AddChar( pprint, c ); 895 return; 896 } 897 #endif 898 899 /* use numeric entities only for XML */ 900 if ( cfgBool(doc, TidyXmlTags) ) 901 { 902 /* if ASCII use numeric entities for chars > 127 */ 903 if ( c > 127 && outenc == ASCII ) 904 { 905 tmbsnprintf(entity, sizeof(entity), "&#%u;", c); 906 AddString( pprint, entity ); 907 return; 908 } 909 910 /* otherwise output char raw */ 911 AddChar( pprint, c ); 912 return; 913 } 914 915 /* default treatment for ASCII */ 916 if ( outenc == ASCII && (c > 126 || (c < ' ' && c != '\t')) ) 917 { 918 uint vers = HTMLVersion( doc ); 919 if (!cfgBool(doc, TidyNumEntities) && (p = EntityName(c, vers)) ) 920 tmbsnprintf(entity, sizeof(entity), "&%s;", p); 921 else 922 tmbsnprintf(entity, sizeof(entity), "&#%u;", c); 923 924 AddString( pprint, entity ); 925 return; 926 } 927 928 AddChar( pprint, c ); 929 } 930 931 static uint IncrWS( uint start, uint end, uint indent, int ixWS ) 932 { 933 if ( ixWS > 0 ) 934 { 935 uint st = start + MIN( (uint)ixWS, indent ); 936 start = MIN( st, end ); 937 } 938 return start; 939 } 940 /* 941 The line buffer is uint not char so we can 942 hold Unicode values unencoded. The translation 943 to UTF-8 is deferred to the WriteChar() routine called 944 to flush the line buffer. 945 */ 946 static void PPrintText( TidyDocImpl* doc, uint mode, uint indent, 947 Node* node ) 948 { 949 uint start = node->start; 950 uint end = node->end; 951 uint ix, c = 0; 952 int ixNL = TextEndsWithNewline( doc->lexer, node, mode ); 953 int ixWS = TextStartsWithWhitespace( doc->lexer, node, start, mode ); 954 if ( ixNL > 0 ) 955 end -= ixNL; 956 start = IncrWS( start, end, indent, ixWS ); 957 958 for ( ix = start; ix < end; ++ix ) 959 { 960 CheckWrapIndent( doc, indent ); 961 /* 962 if ( CheckWrapIndent(doc, indent) ) 963 { 964 ixWS = TextStartsWithWhitespace( doc->lexer, node, ix ); 965 ix = IncrWS( ix, end, indent, ixWS ); 966 } 967 */ 968 c = (byte) doc->lexer->lexbuf[ix]; 969 970 /* look for UTF-8 multibyte character */ 971 if ( c > 0x7F ) 972 ix += GetUTF8( doc->lexer->lexbuf + ix, &c ); 973 974 if ( c == '\n' ) 975 { 976 PFlushLine( doc, indent ); 977 ixWS = TextStartsWithWhitespace( doc->lexer, node, ix+1, mode ); 978 ix = IncrWS( ix, end, indent, ixWS ); 979 } 980 else 981 { 982 PPrintChar( doc, c, mode ); 983 } 984 } 985 } 986 987 #if 0 988 static void PPrintString( TidyDocImpl* doc, uint indent, ctmbstr str ) 989 { 990 while ( *str != '\0' ) 991 AddChar( &doc->pprint, *str++ ); 992 } 993 #endif /* 0 */ 994 995 996 static void PPrintAttrValue( TidyDocImpl* doc, uint indent, 997 ctmbstr value, uint delim, Bool wrappable, Bool scriptAttr ) 998 { 999 TidyPrintImpl* pprint = &doc->pprint; 1000 Bool scriptlets = cfgBool(doc, TidyWrapScriptlets); 1001 1002 int mode = PREFORMATTED | ATTRIBVALUE; 1003 if ( wrappable ) 1004 mode = NORMAL | ATTRIBVALUE; 1005 1006 /* look for ASP, Tango or PHP instructions for computed attribute value */ 1007 if ( value && value[0] == '<' ) 1008 { 1009 if ( value[1] == '%' || value[1] == '@'|| 1010 tmbstrncmp(value, "<?php", 5) == 0 ) 1011 mode |= CDATA; 1012 } 1013 1014 if ( delim == 0 ) 1015 delim = '"'; 1016 1017 AddChar( pprint, '=' ); 1018 1019 /* don't wrap after "=" for xml documents */ 1020 if ( !cfgBool(doc, TidyXmlOut) || cfgBool(doc, TidyXhtmlOut) ) 1021 { 1022 SetWrap( doc, indent ); 1023 CheckWrapIndent( doc, indent ); 1024 /* 1025 if ( !SetWrap(doc, indent) ) 1026 PCondFlushLine( doc, indent ); 1027 */ 1028 } 1029 1030 AddChar( pprint, delim ); 1031 1032 if ( value ) 1033 { 1034 uint wraplen = cfg( doc, TidyWrapLen ); 1035 int attrStart = SetInAttrVal( pprint ); 1036 int strStart = ClearInString( pprint ); 1037 1038 while (*value != '\0') 1039 { 1040 uint c = *value; 1041 1042 if ( wrappable && c == ' ' ) 1043 SetWrapAttr( doc, indent, attrStart, strStart ); 1044 1045 if ( wrappable && pprint->wraphere > 0 && 1046 GetSpaces(pprint) + pprint->linelen >= wraplen ) 1047 WrapAttrVal( doc ); 1048 1049 if ( c == delim ) 1050 { 1051 ctmbstr entity = (c == '"' ? "&quot;" : "&#39;"); 1052 AddString( pprint, entity ); 1053 ++value; 1054 continue; 1055 } 1056 else if (c == '"') 1057 { 1058 if ( cfgBool(doc, TidyQuoteMarks) ) 1059 AddString( pprint, "&quot;" ); 1060 else 1061 AddChar( pprint, c ); 1062 1063 if ( delim == '\'' && scriptAttr && scriptlets ) 1064 strStart = ToggleInString( pprint ); 1065 1066 ++value; 1067 continue; 1068 } 1069 else if ( c == '\'' ) 1070 { 1071 if ( cfgBool(doc, TidyQuoteMarks) ) 1072 AddString( pprint, "&#39;" ); 1073 else 1074 AddChar( pprint, c ); 1075 1076 if ( delim == '"' && scriptAttr && scriptlets ) 1077 strStart = ToggleInString( pprint ); 1078 1079 ++value; 1080 continue; 1081 } 1082 1083 /* look for UTF-8 multibyte character */ 1084 if ( c > 0x7F ) 1085 value += GetUTF8( value, &c ); 1086 ++value; 1087 1088 if ( c == '\n' ) 1089 { 1090 /* No indent inside Javascript literals */ 1091 PFlushLine( doc, (strStart < 0 ? indent : 0) ); 1092 continue; 1093 } 1094 PPrintChar( doc, c, mode ); 1095 } 1096 ClearInAttrVal( pprint ); 1097 ClearInString( pprint ); 1098 } 1099 AddChar( pprint, delim ); 1100 } 1101 1102 static uint AttrIndent( TidyDocImpl* doc, Node* node, AttVal* ARG_UNUSED(attr) ) 1103 { 1104 uint spaces = cfg( doc, TidyIndentSpaces ); 1105 uint xtra = 2; /* 1 for the '<', another for the ' ' */ 1106 if ( node->element == NULL ) 1107 return spaces; 1108 1109 if ( !nodeHasCM(node, CM_INLINE) || 1110 !ShouldIndent(doc, node->parent ? node->parent: node) ) 1111 return xtra + tmbstrlen( node->element ); 1112 1113 if ( NULL != (node = FindContainer(node)) ) 1114 return xtra + tmbstrlen( node->element ); 1115 return spaces; 1116 } 1117 1118 static Bool AttrNoIndentFirst( /*TidyDocImpl* doc,*/ Node* node, AttVal* attr ) 1119 { 1120 return ( attr==node->attributes ); 1121 1122 /*&& 1123 ( InsideHead(doc, node) || 1124 !nodeHasCM(node, CM_INLINE) ) ); 1125 */ 1126 } 1127 1128 static void PPrintAttribute( TidyDocImpl* doc, uint indent, 1129 Node *node, AttVal *attr ) 1130 { 1131 TidyPrintImpl* pprint = &doc->pprint; 1132 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1133 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1134 Bool wrapAttrs = cfgBool( doc, TidyWrapAttVals ); 1135 Bool ucAttrs = cfgBool( doc, TidyUpperCaseAttrs ); 1136 Bool indAttrs = cfgBool( doc, TidyIndentAttributes ); 1137 uint xtra = AttrIndent( doc, node, attr ); 1138 Bool first = AttrNoIndentFirst( /*doc,*/ node, attr ); 1139 tmbstr name = attr->attribute; 1140 Bool wrappable = no; 1141 tchar c; 1142 1143 /* fix for odd attribute indentation bug triggered by long values */ 1144 if (!indAttrs) 1145 xtra = 0; 1146 1147 if ( indAttrs ) 1148 { 1149 if ( nodeIsElement(node) && !first ) 1150 { 1151 indent += xtra; 1152 PCondFlushLine( doc, indent ); 1153 } 1154 else 1155 indAttrs = no; 1156 } 1157 1158 CheckWrapIndent( doc, indent ); 1159 1160 if ( !xmlOut && !xhtmlOut && attr->dict ) 1161 { 1162 if ( IsScript(doc, name) ) 1163 wrappable = cfgBool( doc, TidyWrapScriptlets ); 1164 else if (!(attrIsCONTENT(attr) || attrIsVALUE(attr) || attrIsALT(attr)) && wrapAttrs ) 1165 wrappable = yes; 1166 } 1167 1168 if ( !first && !SetWrap(doc, indent) ) 1169 { 1170 PFlushLine( doc, indent+xtra ); /* Put it on next line */ 1171 } 1172 else if ( pprint->linelen > 0 ) 1173 { 1174 AddChar( pprint, ' ' ); 1175 } 1176 1177 /* Attribute name */ 1178 while (*name) 1179 { 1180 c = (unsigned char)*name; 1181 1182 if (c > 0x7F) 1183 name += GetUTF8(name, &c); 1184 else if (ucAttrs) 1185 c = ToUpper(c); 1186 1187 AddChar(pprint, c); 1188 ++name; 1189 } 1190 1191 /* fix for bug 732038 */ 1192 #if 0 1193 /* If not indenting attributes, bump up indent for 1194 ** value after putting out name. 1195 */ 1196 if ( !indAttrs ) 1197 indent += xtra; 1198 #endif 1199 1200 CheckWrapIndent( doc, indent ); 1201 1202 if ( attr->value == NULL ) 1203 { 1204 Bool isB = IsBoolAttribute(attr); 1205 Bool scriptAttr = attrIsEvent(attr); 1206 1207 if ( xmlOut ) 1208 PPrintAttrValue( doc, indent, isB ? attr->attribute : NULLSTR, 1209 attr->delim, no, scriptAttr ); 1210 1211 else if ( !isB && !IsNewNode(node) ) 1212 PPrintAttrValue( doc, indent, "", attr->delim, yes, scriptAttr ); 1213 1214 else 1215 SetWrap( doc, indent ); 1216 } 1217 else 1218 PPrintAttrValue( doc, indent, attr->value, attr->delim, wrappable, no ); 1219 } 1220 1221 static void PPrintAttrs( TidyDocImpl* doc, uint indent, Node *node ) 1222 { 1223 TidyPrintImpl* pprint = &doc->pprint; 1224 AttVal* av; 1225 1226 /* add xml:space attribute to pre and other elements */ 1227 if ( cfgBool(doc, TidyXmlOut) && cfgBool(doc, TidyXmlSpace) && 1228 !GetAttrByName(node, "xml:space") && 1229 XMLPreserveWhiteSpace(doc, node) ) 1230 { 1231 AddAttribute( doc, node, "xml:space", "preserve" ); 1232 } 1233 1234 for ( av = node->attributes; av; av = av->next ) 1235 { 1236 if ( av->attribute != NULL ) 1237 { 1238 PPrintAttribute( doc, indent, node, av ); 1239 } 1240 else if ( av->asp != NULL ) 1241 { 1242 AddChar( pprint, ' ' ); 1243 PPrintAsp( doc, indent, av->asp ); 1244 } 1245 else if ( av->php != NULL ) 1246 { 1247 AddChar( pprint, ' ' ); 1248 PPrintPhp( doc, indent, av->php ); 1249 } 1250 } 1251 } 1252 1253 /* 1254 Line can be wrapped immediately after inline start tag provided 1255 if follows a text node ending in a space, or it follows a <br>, 1256 or its parent is an inline element that that rule applies to. 1257 This behaviour was reverse engineered from Netscape 3.0. 1258 1259 Line wrapping can occur if an element is not empty and before a block 1260 level. For instance: 1261 <p><span> 1262 x</span>y</p> 1263 will display properly. Whereas 1264 <p><img /> 1265 x<</p> won't. 1266 */ 1267 static Bool AfterSpaceImp(Lexer *lexer, Node *node, Bool isEmpty) 1268 { 1269 Node *prev; 1270 1271 if ( !nodeCMIsInline(node) ) 1272 return yes; 1273 1274 prev = node->prev; 1275 if (prev) 1276 { 1277 if (nodeIsText(prev) && prev->end > prev->start) 1278 { 1279 uint i, c = '\0'; /* initialised to avoid warnings */ 1280 for (i = prev->start; i < prev->end; ++i) 1281 { 1282 c = (byte) lexer->lexbuf[i]; 1283 if ( c > 0x7F ) 1284 i += GetUTF8( lexer->lexbuf + i, &c ); 1285 } 1286 1287 if ( c == ' ' || c == '\n' ) 1288 return yes; 1289 } 1290 else if (nodeIsBR(prev)) 1291 return yes; 1292 1293 return no; 1294 } 1295 1296 if ( isEmpty && !nodeCMIsInline(node->parent) ) 1297 return no; 1298 1299 return AfterSpaceImp(lexer, node->parent, isEmpty); 1300 } 1301 1302 static Bool AfterSpace(Lexer *lexer, Node *node) 1303 { 1304 return AfterSpaceImp(lexer, node, nodeCMIsEmpty(node)); 1305 } 1306 1307 static void PPrintTag( TidyDocImpl* doc, 1308 uint mode, uint indent, Node *node ) 1309 { 1310 TidyPrintImpl* pprint = &doc->pprint; 1311 Bool uc = cfgBool( doc, TidyUpperCaseTags ); 1312 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1313 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1314 tchar c; 1315 tmbstr s = node->element; 1316 1317 AddChar( pprint, '<' ); 1318 1319 if ( node->type == EndTag ) 1320 AddChar( pprint, '/' ); 1321 1322 if (s) 1323 { 1324 while (*s) 1325 { 1326 c = (unsigned char)*s; 1327 1328 if (c > 0x7F) 1329 s += GetUTF8(s, &c); 1330 else if (uc) 1331 c = ToUpper(c); 1332 1333 AddChar(pprint, c); 1334 ++s; 1335 } 1336 } 1337 1338 PPrintAttrs( doc, indent, node ); 1339 1340 if ( (xmlOut || xhtmlOut) && 1341 (node->type == StartEndTag || nodeCMIsEmpty(node)) ) 1342 { 1343 AddChar( pprint, ' ' ); /* Space is NS compatibility hack <br /> */ 1344 AddChar( pprint, '/' ); /* Required end tag marker */ 1345 } 1346 1347 AddChar( pprint, '>' ); 1348 1349 if ( (node->type != StartEndTag || xhtmlOut) && !(mode & PREFORMATTED) ) 1350 { 1351 uint wraplen = cfg( doc, TidyWrapLen ); 1352 CheckWrapIndent( doc, indent ); 1353 1354 if ( indent + pprint->linelen < wraplen ) 1355 { 1356 /* wrap after start tag if is <br/> or if it's not inline. 1357 Technically, it would be safe to call only AfterSpace. 1358 However, it would disrupt the existing algorithm. So let's 1359 leave as is. Note that AfterSpace returns true for non inline 1360 elements but can still be false for some <br>. So it has to 1361 stay as well. */ 1362 if (!(mode & NOWRAP) && (!nodeCMIsInline(node) || nodeIsBR(node)) 1363 && AfterSpace(doc->lexer, node)) 1364 { 1365 pprint->wraphere = pprint->linelen; 1366 } 1367 } 1368 /* flush the current buffer only if it is known to be safe, 1369 i.e. it will not introduce some spurious white spaces. 1370 See bug #996484 */ 1371 else if ( mode & NOWRAP || 1372 nodeIsBR(node) || AfterSpace(doc->lexer, node)) 1373 PCondFlushLine( doc, indent ); 1374 } 1375 } 1376 1377 static void PPrintEndTag( TidyDocImpl* doc, uint ARG_UNUSED(mode), 1378 uint ARG_UNUSED(indent), Node *node ) 1379 { 1380 TidyPrintImpl* pprint = &doc->pprint; 1381 Bool uc = cfgBool( doc, TidyUpperCaseTags ); 1382 tmbstr s = node->element; 1383 tchar c; 1384 1385 /* 1386 Netscape ignores SGML standard by not ignoring a 1387 line break before </A> or </U> etc. To avoid rendering 1388 this as an underlined space, I disable line wrapping 1389 before inline end tags by the #if 0 ... #endif 1390 */ 1391 #if 0 1392 if ( !(mode & NOWRAP) ) 1393 SetWrap( doc, indent ); 1394 #endif 1395 1396 AddString( pprint, "</" ); 1397 1398 if (s) 1399 { 1400 while (*s) 1401 { 1402 c = (unsigned char)*s; 1403 1404 if (c > 0x7F) 1405 s += GetUTF8(s, &c); 1406 else if (uc) 1407 c = ToUpper(c); 1408 1409 AddChar(pprint, c); 1410 ++s; 1411 } 1412 } 1413 1414 AddChar( pprint, '>' ); 1415 } 1416 1417 static void PPrintComment( TidyDocImpl* doc, uint indent, Node* node ) 1418 { 1419 TidyPrintImpl* pprint = &doc->pprint; 1420 1421 SetWrap( doc, indent ); 1422 AddString( pprint, "<!--" ); 1423 1424 #if 0 1425 SetWrap( doc, indent ); 1426 #endif 1427 1428 PPrintText(doc, COMMENT, 0, node); 1429 1430 #if 0 1431 SetWrap( doc, indent ); 1432 AddString( pprint, "--" ); 1433 #endif 1434 1435 AddString(pprint, "--"); 1436 AddChar( pprint, '>' ); 1437 if ( node->linebreak && node->next ) 1438 PFlushLine( doc, indent ); 1439 } 1440 1441 static void PPrintDocType( TidyDocImpl* doc, uint indent, Node *node ) 1442 { 1443 TidyPrintImpl* pprint = &doc->pprint; 1444 uint wraplen = cfg( doc, TidyWrapLen ); 1445 uint spaces = cfg( doc, TidyIndentSpaces ); 1446 AttVal* fpi = GetAttrByName(node, "PUBLIC"); 1447 AttVal* sys = GetAttrByName(node, "SYSTEM"); 1448 1449 /* todo: handle non-ASCII characters in FPI / SI / node->element */ 1450 1451 SetWrap( doc, indent ); 1452 PCondFlushLine( doc, indent ); 1453 1454 AddString( pprint, "<!DOCTYPE " ); 1455 SetWrap( doc, indent ); 1456 if (node->element) 1457 { 1458 AddString(pprint, node->element); 1459 } 1460 1461 if (fpi && fpi->value) 1462 { 1463 AddString(pprint, " PUBLIC "); 1464 AddChar(pprint, fpi->delim); 1465 AddString(pprint, fpi->value); 1466 AddChar(pprint, fpi->delim); 1467 } 1468 1469 if (fpi && fpi->value && sys && sys->value) 1470 { 1471 uint i = pprint->linelen - (tmbstrlen(sys->value) + 2) - 1; 1472 if (!(i>0&&tmbstrlen(sys->value)+2+i<wraplen&&i<=(spaces?spaces:2)*2)) 1473 i = 0; 1474 1475 PCondFlushLine(doc, i); 1476 if (pprint->linelen) 1477 AddChar(pprint, ' '); 1478 } 1479 else if (sys && sys->value) 1480 { 1481 AddString(pprint, " SYSTEM "); 1482 } 1483 1484 if (sys && sys->value) 1485 { 1486 AddChar(pprint, sys->delim); 1487 AddString(pprint, sys->value); 1488 AddChar(pprint, sys->delim); 1489 } 1490 1491 if (node->content) 1492 { 1493 PCondFlushLine(doc, indent); 1494 AddChar(pprint, '['); 1495 PPrintText(doc, CDATA, 0, node->content); 1496 AddChar(pprint, ']'); 1497 } 1498 1499 SetWrap( doc, 0 ); 1500 AddChar( pprint, '>' ); 1501 PCondFlushLine( doc, indent ); 1502 } 1503 1504 static void PPrintPI( TidyDocImpl* doc, uint indent, Node *node ) 1505 { 1506 TidyPrintImpl* pprint = &doc->pprint; 1507 tchar c; 1508 tmbstr s; 1509 1510 SetWrap( doc, indent ); 1511 AddString( pprint, "<?" ); 1512 1513 s = node->element; 1514 1515 while (s && *s) 1516 { 1517 c = (unsigned char)*s; 1518 if (c > 0x7F) 1519 s += GetUTF8(s, &c); 1520 AddChar(pprint, c); 1521 ++s; 1522 } 1523 1524 /* set CDATA to pass < and > unescaped */ 1525 PPrintText( doc, CDATA, indent, node ); 1526 1527 if (cfgBool(doc, TidyXmlOut) || 1528 cfgBool(doc, TidyXhtmlOut) || node->closed) 1529 AddChar( pprint, '?' ); 1530 1531 AddChar( pprint, '>' ); 1532 PCondFlushLine( doc, indent ); 1533 } 1534 1535 static void PPrintXmlDecl( TidyDocImpl* doc, uint indent, Node *node ) 1536 { 1537 AttVal* att; 1538 uint saveWrap; 1539 TidyPrintImpl* pprint = &doc->pprint; 1540 Bool ucAttrs; 1541 SetWrap( doc, indent ); 1542 saveWrap = WrapOff( doc ); 1543 1544 /* no case translation for XML declaration pseudo attributes */ 1545 ucAttrs = cfgBool(doc, TidyUpperCaseAttrs); 1546 SetOptionBool(doc, TidyUpperCaseAttrs, no); 1547 1548 AddString( pprint, "<?xml" ); 1549 1550 /* Force order of XML declaration attributes */ 1551 /* PPrintAttrs( doc, indent, node ); */ 1552 if ( NULL != (att = AttrGetById(node, TidyAttr_VERSION)) ) 1553 PPrintAttribute( doc, indent, node, att ); 1554 if ( NULL != (att = AttrGetById(node, TidyAttr_ENCODING)) ) 1555 PPrintAttribute( doc, indent, node, att ); 1556 if ( NULL != (att = GetAttrByName(node, "standalone")) ) 1557 PPrintAttribute( doc, indent, node, att ); 1558 1559 /* restore old config value */ 1560 SetOptionBool(doc, TidyUpperCaseAttrs, ucAttrs); 1561 1562 if ( node->end <= 0 || doc->lexer->lexbuf[node->end - 1] != '?' ) 1563 AddChar( pprint, '?' ); 1564 AddChar( pprint, '>' ); 1565 WrapOn( doc, saveWrap ); 1566 PFlushLine( doc, indent ); 1567 } 1568 1569 /* note ASP and JSTE share <% ... %> syntax */ 1570 static void PPrintAsp( TidyDocImpl* doc, uint indent, Node *node ) 1571 { 1572 TidyPrintImpl* pprint = &doc->pprint; 1573 Bool wrapAsp = cfgBool( doc, TidyWrapAsp ); 1574 Bool wrapJste = cfgBool( doc, TidyWrapJste ); 1575 uint saveWrap = WrapOffCond( doc, !wrapAsp || !wrapJste ); 1576 1577 #if 0 1578 SetWrap( doc, indent ); 1579 #endif 1580 AddString( pprint, "<%" ); 1581 PPrintText( doc, (wrapAsp ? CDATA : COMMENT), indent, node ); 1582 AddString( pprint, "%>" ); 1583 1584 /* PCondFlushLine( doc, indent ); */ 1585 WrapOn( doc, saveWrap ); 1586 } 1587 1588 /* JSTE also supports <# ... #> syntax */ 1589 static void PPrintJste( TidyDocImpl* doc, uint indent, Node *node ) 1590 { 1591 TidyPrintImpl* pprint = &doc->pprint; 1592 Bool wrapAsp = cfgBool( doc, TidyWrapAsp ); 1593 uint saveWrap = WrapOffCond( doc, !wrapAsp ); 1594 1595 AddString( pprint, "<#" ); 1596 PPrintText( doc, (cfgBool(doc, TidyWrapJste) ? CDATA : COMMENT), 1597 indent, node ); 1598 AddString( pprint, "#>" ); 1599 1600 /* PCondFlushLine( doc, indent ); */ 1601 WrapOn( doc, saveWrap ); 1602 } 1603 1604 /* PHP is based on XML processing instructions */ 1605 static void PPrintPhp( TidyDocImpl* doc, uint indent, Node *node ) 1606 { 1607 TidyPrintImpl* pprint = &doc->pprint; 1608 Bool wrapPhp = cfgBool( doc, TidyWrapPhp ); 1609 uint saveWrap = WrapOffCond( doc, !wrapPhp ); 1610 #if 0 1611 SetWrap( doc, indent ); 1612 #endif 1613 1614 AddString( pprint, "<?" ); 1615 PPrintText( doc, (wrapPhp ? CDATA : COMMENT), 1616 indent, node ); 1617 AddString( pprint, "?>" ); 1618 1619 /* PCondFlushLine( doc, indent ); */ 1620 WrapOn( doc, saveWrap ); 1621 } 1622 1623 static void PPrintCDATA( TidyDocImpl* doc, uint indent, Node *node ) 1624 { 1625 uint saveWrap; 1626 TidyPrintImpl* pprint = &doc->pprint; 1627 Bool indentCData = cfgBool( doc, TidyIndentCdata ); 1628 if ( !indentCData ) 1629 indent = 0; 1630 1631 PCondFlushLine( doc, indent ); 1632 saveWrap = WrapOff( doc ); /* disable wrapping */ 1633 1634 AddString( pprint, "<![CDATA[" ); 1635 PPrintText( doc, COMMENT, indent, node ); 1636 AddString( pprint, "]]>" ); 1637 1638 PCondFlushLine( doc, indent ); 1639 WrapOn( doc, saveWrap ); /* restore wrapping */ 1640 } 1641 1642 static void PPrintSection( TidyDocImpl* doc, uint indent, Node *node ) 1643 { 1644 TidyPrintImpl* pprint = &doc->pprint; 1645 Bool wrapSect = cfgBool( doc, TidyWrapSection ); 1646 uint saveWrap = WrapOffCond( doc, !wrapSect ); 1647 #if 0 1648 SetWrap( doc, indent ); 1649 #endif 1650 1651 AddString( pprint, "<![" ); 1652 PPrintText( doc, (wrapSect ? CDATA : COMMENT), 1653 indent, node ); 1654 AddString( pprint, "]>" ); 1655 1656 /* PCondFlushLine( doc, indent ); */ 1657 WrapOn( doc, saveWrap ); 1658 } 1659 1660 1661 #if 0 1662 /* 1663 ** Print script and style elements. For XHTML, wrap the content as follows: 1664 ** 1665 ** JavaScript: 1666 ** //<![CDATA[ 1667 ** content 1668 ** //]]> 1669 ** VBScript: 1670 ** '<![CDATA[ 1671 ** content 1672 ** ']]> 1673 ** CSS: 1674 ** / *<![CDATA[* / Extra spaces to keep compiler happy 1675 ** content 1676 ** / *]]>* / 1677 ** other: 1678 ** <![CDATA[ 1679 ** content 1680 ** ]]> 1681 */ 1682 #endif 1683 1684 static ctmbstr CDATA_START = "<![CDATA["; 1685 static ctmbstr CDATA_END = "]]>"; 1686 static ctmbstr JS_COMMENT_START = "//"; 1687 static ctmbstr JS_COMMENT_END = ""; 1688 static ctmbstr VB_COMMENT_START = "\'"; 1689 static ctmbstr VB_COMMENT_END = ""; 1690 static ctmbstr CSS_COMMENT_START = "/*"; 1691 static ctmbstr CSS_COMMENT_END = "*/"; 1692 static ctmbstr DEFAULT_COMMENT_START = ""; 1693 static ctmbstr DEFAULT_COMMENT_END = ""; 1694 1695 static Bool InsideHead( TidyDocImpl* doc, Node *node ) 1696 { 1697 if ( nodeIsHEAD(node) ) 1698 return yes; 1699 1700 if ( node->parent != NULL ) 1701 return InsideHead( doc, node->parent ); 1702 1703 return no; 1704 } 1705 1706 /* Is text node and already ends w/ a newline? 1707 1708 Used to pretty print CDATA/PRE text content. 1709 If it already ends on a newline, it is not 1710 necessary to print another before printing end tag. 1711 */ 1712 static int TextEndsWithNewline(Lexer *lexer, Node *node, uint mode ) 1713 { 1714 if ( (mode & (CDATA|COMMENT)) && nodeIsText(node) && node->end > node->start ) 1715 { 1716 uint ch, ix = node->end - 1; 1717 /* Skip non-newline whitespace. */ 1718 while ( ix >= node->start && (ch = (lexer->lexbuf[ix] & 0xff)) 1719 && ( ch == ' ' || ch == '\t' || ch == '\r' ) ) 1720 --ix; 1721 1722 if ( lexer->lexbuf[ ix ] == '\n' ) 1723 return node->end - ix - 1; /* #543262 tidy eats all memory */ 1724 } 1725 return -1; 1726 } 1727 1728 static int TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode ) 1729 { 1730 assert( node != NULL ); 1731 if ( (mode & (CDATA|COMMENT)) && nodeIsText(node) && node->end > node->start && start >= node->start ) 1732 { 1733 uint ch, ix = start; 1734 /* Skip whitespace. */ 1735 while ( ix < node->end && (ch = (lexer->lexbuf[ix] & 0xff)) 1736 && ( ch==' ' || ch=='\t' || ch=='\r' ) ) 1737 ++ix; 1738 1739 if ( ix > start ) 1740 return ix - start; 1741 } 1742 return -1; 1743 } 1744 1745 static Bool HasCDATA( Lexer* lexer, Node* node ) 1746 { 1747 /* Scan forward through the textarray. Since the characters we're 1748 ** looking for are < 0x7f, we don't have to do any UTF-8 decoding. 1749 */ 1750 ctmbstr start = lexer->lexbuf + node->start; 1751 int len = node->end - node->start + 1; 1752 1753 if ( node->type != TextNode ) 1754 return no; 1755 1756 return ( NULL != tmbsubstrn( start, len, CDATA_START )); 1757 } 1758 1759 1760 void PPrintScriptStyle( TidyDocImpl* doc, uint mode, uint indent, Node *node ) 1761 { 1762 TidyPrintImpl* pprint = &doc->pprint; 1763 Node* content; 1764 ctmbstr commentStart = DEFAULT_COMMENT_START; 1765 ctmbstr commentEnd = DEFAULT_COMMENT_END; 1766 Bool hasCData = no; 1767 int contentIndent = -1; 1768 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1769 1770 /* fix for http://tidy.sf.net/bug/729972, restores 04Aug00 behaivour */ 1771 indent = 0; 1772 1773 if ( InsideHead(doc, node) ) 1774 PFlushLine( doc, indent ); 1775 1776 PPrintTag( doc, mode, indent, node ); 1777 PFlushLine( doc, indent ); 1778 1779 if ( xhtmlOut && node->content != NULL ) 1780 { 1781 AttVal* type = attrGetTYPE(node); 1782 1783 if (AttrValueIs(type, "text/javascript")) 1784 { 1785 commentStart = JS_COMMENT_START; 1786 commentEnd = JS_COMMENT_END; 1787 } 1788 else if (AttrValueIs(type, "text/css")) 1789 { 1790 commentStart = CSS_COMMENT_START; 1791 commentEnd = CSS_COMMENT_END; 1792 } 1793 else if (AttrValueIs(type, "text/vbscript")) 1794 { 1795 commentStart = VB_COMMENT_START; 1796 commentEnd = VB_COMMENT_END; 1797 } 1798 1799 hasCData = HasCDATA(doc->lexer, node->content); 1800 1801 if (!hasCData) 1802 { 1803 uint saveWrap = WrapOff( doc ); 1804 1805 AddString( pprint, commentStart ); 1806 AddString( pprint, CDATA_START ); 1807 AddString( pprint, commentEnd ); 1808 PCondFlushLine( doc, indent ); 1809 1810 WrapOn( doc, saveWrap ); 1811 } 1812 } 1813 1814 for ( content = node->content; 1815 content != NULL; 1816 content = content->next ) 1817 { 1818 PPrintTree( doc, (mode | PREFORMATTED | NOWRAP | CDATA), 1819 indent, content ); 1820 1821 if ( content == node->last ) 1822 contentIndent = TextEndsWithNewline( doc->lexer, content, CDATA ); 1823 } 1824 1825 if ( contentIndent < 0 ) 1826 { 1827 PCondFlushLine( doc, indent ); 1828 contentIndent = 0; 1829 } 1830 1831 if ( xhtmlOut && node->content != NULL ) 1832 { 1833 if ( ! hasCData ) 1834 { 1835 uint saveWrap = WrapOff( doc ); 1836 1837 AddString( pprint, commentStart ); 1838 AddString( pprint, CDATA_END ); 1839 AddString( pprint, commentEnd ); 1840 1841 WrapOn( doc, saveWrap ); 1842 PCondFlushLine( doc, indent ); 1843 } 1844 } 1845 1846 if ( node->content && pprint->indent[ 0 ].spaces != (int)indent ) 1847 { 1848 pprint->indent[ 0 ].spaces = indent; 1849 } 1850 PPrintEndTag( doc, mode, indent, node ); 1851 if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState 1852 && node->next != NULL && 1853 !( nodeHasCM(node, CM_INLINE) || nodeIsText(node) ) ) 1854 PFlushLine( doc, indent ); 1855 } 1856 1857 1858 1859 static Bool ShouldIndent( TidyDocImpl* doc, Node *node ) 1860 { 1861 TidyTriState indentContent = cfgAutoBool( doc, TidyIndentContent ); 1862 if ( indentContent == TidyNoState ) 1863 return no; 1864 1865 if ( nodeIsTEXTAREA(node) ) 1866 return no; 1867 1868 if ( indentContent == TidyAutoState ) 1869 { 1870 if ( node->content && nodeHasCM(node, CM_NO_INDENT) ) 1871 { 1872 for ( node = node->content; node; node = node->next ) 1873 if ( nodeHasCM(node, CM_BLOCK) ) 1874 return yes; 1875 return no; 1876 } 1877 1878 if ( nodeHasCM(node, CM_HEADING) ) 1879 return no; 1880 1881 if ( nodeIsHTML(node) ) 1882 return no; 1883 1884 if ( nodeIsP(node) ) 1885 return no; 1886 1887 if ( nodeIsTITLE(node) ) 1888 return no; 1889 } 1890 1891 if ( nodeHasCM(node, CM_FIELD | CM_OBJECT) ) 1892 return yes; 1893 1894 if ( nodeIsMAP(node) ) 1895 return yes; 1896 1897 return ( !nodeHasCM( node, CM_INLINE ) && node->content ); 1898 } 1899 1900 /* 1901 Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 1902 print just the content of the body element. 1903 useful when you want to reuse material from 1904 other documents. 1905 1906 -- Sebastiano Vigna <vigna@dsi.unimi.it> 1907 */ 1908 void PrintBody( TidyDocImpl* doc ) 1909 { 1910 Node *node = FindBody( doc ); 1911 1912 if ( node ) 1913 { 1914 for ( node = node->content; node != NULL; node = node->next ) 1915 PPrintTree( doc, NORMAL, 0, node ); 1916 } 1917 } 1918 1919 void PPrintTree( TidyDocImpl* doc, uint mode, uint indent, Node *node ) 1920 { 1921 Node *content, *last; 1922 uint spaces = cfg( doc, TidyIndentSpaces ); 1923 Bool xhtml = cfgBool( doc, TidyXhtmlOut ); 1924 1925 if ( node == NULL ) 1926 return; 1927 1928 if (node->type == TextNode) 1929 { 1930 PPrintText( doc, mode, indent, node ); 1931 } 1932 else if ( node->type == CommentTag ) 1933 { 1934 PPrintComment( doc, indent, node ); 1935 } 1936 else if ( node->type == RootNode ) 1937 { 1938 for ( content = node->content; content; content = content->next ) 1939 PPrintTree( doc, mode, indent, content ); 1940 } 1941 else if ( node->type == DocTypeTag ) 1942 PPrintDocType( doc, indent, node ); 1943 else if ( node->type == ProcInsTag) 1944 PPrintPI( doc, indent, node ); 1945 else if ( node->type == XmlDecl) 1946 PPrintXmlDecl( doc, indent, node ); 1947 else if ( node->type == CDATATag) 1948 PPrintCDATA( doc, indent, node ); 1949 else if ( node->type == SectionTag) 1950 PPrintSection( doc, indent, node ); 1951 else if ( node->type == AspTag) 1952 PPrintAsp( doc, indent, node ); 1953 else if ( node->type == JsteTag) 1954 PPrintJste( doc, indent, node ); 1955 else if ( node->type == PhpTag) 1956 PPrintPhp( doc, indent, node ); 1957 else if ( nodeCMIsEmpty(node) || 1958 (node->type == StartEndTag && !xhtml) ) 1959 { 1960 if ( ! nodeHasCM(node, CM_INLINE) ) 1961 PCondFlushLine( doc, indent ); 1962 1963 if ( nodeIsBR(node) && node->prev && 1964 !(nodeIsBR(node->prev) || (mode & PREFORMATTED)) && 1965 cfgBool(doc, TidyBreakBeforeBR) ) 1966 PFlushLine( doc, indent ); 1967 1968 if ( nodeIsHR(node) ) 1969 { 1970 /* insert extra newline for classic formatting */ 1971 Bool classic = cfgBool( doc, TidyVertSpace ); 1972 if (classic && node->parent && node->parent->content != node) 1973 { 1974 PFlushLine( doc, indent ); 1975 } 1976 } 1977 1978 PPrintTag( doc, mode, indent, node ); 1979 1980 if (node->next) 1981 { 1982 if (nodeIsPARAM(node) || nodeIsAREA(node)) 1983 PCondFlushLine(doc, indent); 1984 else if (nodeIsBR(node) || nodeIsHR(node)) 1985 PFlushLine(doc, indent); 1986 } 1987 } 1988 else /* some kind of container element */ 1989 { 1990 if ( node->type == StartEndTag ) 1991 node->type = StartTag; 1992 1993 if ( node->tag && 1994 (node->tag->parser == ParsePre || nodeIsTEXTAREA(node)) ) 1995 { 1996 Bool classic = cfgBool( doc, TidyVertSpace ); 1997 uint indprev = indent; 1998 PCondFlushLine( doc, indent ); 1999 2000 PCondFlushLine( doc, indent ); 2001 2002 /* insert extra newline for classic formatting */ 2003 if (classic && node->parent && node->parent->content != node) 2004 { 2005 PFlushLine( doc, indent ); 2006 } 2007 PPrintTag( doc, mode, indent, node ); 2008 2009 indent = 0; 2010 PFlushLine( doc, indent ); 2011 2012 for ( content = node->content; content; content = content->next ) 2013 { 2014 PPrintTree( doc, (mode | PREFORMATTED | NOWRAP), 2015 indent, content ); 2016 } 2017 PCondFlushLine( doc, indent ); 2018 indent = indprev; 2019 PPrintEndTag( doc, mode, indent, node ); 2020 2021 if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState 2022 && node->next != NULL ) 2023 PFlushLine( doc, indent ); 2024 } 2025 else if ( nodeIsSTYLE(node) || nodeIsSCRIPT(node) ) 2026 { 2027 PPrintScriptStyle( doc, (mode | PREFORMATTED | NOWRAP | CDATA), 2028 indent, node ); 2029 } 2030 else if ( nodeCMIsInline(node) ) 2031 { 2032 if ( cfgBool(doc, TidyMakeClean) ) 2033 { 2034 /* replace <nobr>...</nobr> by &nbsp; or &#160; etc. */ 2035 if ( nodeIsNOBR(node) ) 2036 { 2037 for ( content = node->content; 2038 content != NULL; 2039 content = content->next) 2040 PPrintTree( doc, mode|NOWRAP, indent, content ); 2041 return; 2042 } 2043 } 2044 2045 /* otherwise a normal inline element */ 2046 PPrintTag( doc, mode, indent, node ); 2047 2048 /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */ 2049 if ( ShouldIndent(doc, node) ) 2050 { 2051 indent += spaces; 2052 PCondFlushLine( doc, indent ); 2053 2054 for ( content = node->content; 2055 content != NULL; 2056 content = content->next ) 2057 PPrintTree( doc, mode, indent, content ); 2058 2059 indent -= spaces; 2060 PCondFlushLine( doc, indent ); 2061 /* PCondFlushLine( doc, indent ); */ 2062 } 2063 else 2064 { 2065 for ( content = node->content; 2066 content != NULL; 2067 content = content->next ) 2068 PPrintTree( doc, mode, indent, content ); 2069 } 2070 PPrintEndTag( doc, mode, indent, node ); 2071 } 2072 else /* other tags */ 2073 { 2074 Bool indcont = ( cfgAutoBool(doc, TidyIndentContent) != TidyNoState ); 2075 Bool indsmart = ( cfgAutoBool(doc, TidyIndentContent) == TidyAutoState ); 2076 Bool hideend = cfgBool( doc, TidyHideEndTags ); 2077 Bool classic = cfgBool( doc, TidyVertSpace ); 2078 uint contentIndent = indent; 2079 2080 /* insert extra newline for classic formatting */ 2081 if (classic && node->parent && node->parent->content != node && !nodeIsHTML(node)) 2082 { 2083 PFlushLine( doc, indent ); 2084 } 2085 2086 if ( ShouldIndent(doc, node) ) 2087 contentIndent += spaces; 2088 2089 PCondFlushLine( doc, indent ); 2090 if ( indsmart && node->prev != NULL ) 2091 PFlushLine( doc, indent ); 2092 2093 /* do not omit elements with attributes */ 2094 if ( !hideend || !nodeHasCM(node, CM_OMITST) || 2095 node->attributes != NULL ) 2096 { 2097 PPrintTag( doc, mode, indent, node ); 2098 2099 if ( ShouldIndent(doc, node) ) 2100 { 2101 /* fix for bug 530791, don't wrap after */ 2102 /* <li> if first child is text node */ 2103 if (!(nodeIsLI(node) && nodeIsText(node->content))) 2104 PCondFlushLine( doc, contentIndent ); 2105 } 2106 else if ( nodeHasCM(node, CM_HTML) || nodeIsNOFRAMES(node) || 2107 (nodeHasCM(node, CM_HEAD) && !nodeIsTITLE(node)) ) 2108 PFlushLine( doc, contentIndent ); 2109 } 2110 2111 last = NULL; 2112 for ( content = node->content; content; content = content->next ) 2113 { 2114 /* kludge for naked text before block level tag */ 2115 if ( last && !indcont && nodeIsText(last) && 2116 content->tag && !nodeHasCM(content, CM_INLINE) ) 2117 { 2118 /* PFlushLine(fout, indent); */ 2119 PFlushLine( doc, contentIndent ); 2120 } 2121 2122 PPrintTree( doc, mode, contentIndent, content ); 2123 last = content; 2124 } 2125 2126 /* don't flush line for td and th */ 2127 if ( ShouldIndent(doc, node) || 2128 ( !hideend && 2129 ( nodeHasCM(node, CM_HTML) || 2130 nodeIsNOFRAMES(node) || 2131 (nodeHasCM(node, CM_HEAD) && !nodeIsTITLE(node)) 2132 ) 2133 ) 2134 ) 2135 { 2136 PCondFlushLine( doc, indent ); 2137 if ( !hideend || !nodeHasCM(node, CM_OPT) ) 2138 { 2139 PPrintEndTag( doc, mode, indent, node ); 2140 /* PFlushLine( doc, indent ); */ 2141 } 2142 } 2143 else 2144 { 2145 if ( !hideend || !nodeHasCM(node, CM_OPT) ) 2146 { 2147 /* newline before endtag for classic formatting */ 2148 if ( classic && !HasMixedContent(node) ) 2149 PFlushLine( doc, indent ); 2150 PPrintEndTag( doc, mode, indent, node ); 2151 } 2152 } 2153 2154 if (!indcont && !hideend && !nodeIsHTML(node) && !classic) 2155 PFlushLine( doc, indent ); 2156 else if (classic && node->next != NULL && nodeHasCM(node, CM_LIST|CM_DEFLIST|CM_TABLE|CM_BLOCK/*|CM_HEADING*/)) 2157 PFlushLine( doc, indent ); 2158 } 2159 } 2160 } 2161 2162 void PPrintXMLTree( TidyDocImpl* doc, uint mode, uint indent, Node *node ) 2163 { 2164 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 2165 if (node == NULL) 2166 return; 2167 2168 if ( node->type == TextNode) 2169 { 2170 PPrintText( doc, mode, indent, node ); 2171 } 2172 else if ( node->type == CommentTag ) 2173 { 2174 PCondFlushLine( doc, indent ); 2175 PPrintComment( doc, indent, node); 2176 /* PCondFlushLine( doc, 0 ); */ 2177 } 2178 else if ( node->type == RootNode ) 2179 { 2180 Node *content; 2181 for ( content = node->content; 2182 content != NULL; 2183 content = content->next ) 2184 PPrintXMLTree( doc, mode, indent, content ); 2185 } 2186 else if ( node->type == DocTypeTag ) 2187 PPrintDocType( doc, indent, node ); 2188 else if ( node->type == ProcInsTag ) 2189 PPrintPI( doc, indent, node ); 2190 else if ( node->type == XmlDecl ) 2191 PPrintXmlDecl( doc, indent, node ); 2192 else if ( node->type == CDATATag ) 2193 PPrintCDATA( doc, indent, node ); 2194 else if ( node->type == SectionTag ) 2195 PPrintSection( doc, indent, node ); 2196 else if ( node->type == AspTag ) 2197 PPrintAsp( doc, indent, node ); 2198 else if ( node->type == JsteTag) 2199 PPrintJste( doc, indent, node ); 2200 else if ( node->type == PhpTag) 2201 PPrintPhp( doc, indent, node ); 2202 else if ( nodeHasCM(node, CM_EMPTY) || 2203 (node->type == StartEndTag && !xhtmlOut) ) 2204 { 2205 PCondFlushLine( doc, indent ); 2206 PPrintTag( doc, mode, indent, node ); 2207 /* PFlushLine( doc, indent ); */ 2208 } 2209 else /* some kind of container element */ 2210 { 2211 uint spaces = cfg( doc, TidyIndentSpaces ); 2212 Node *content; 2213 Bool mixed = no; 2214 int cindent; 2215 2216 for ( content = node->content; content; content = content->next ) 2217 { 2218 if ( nodeIsText(content) ) 2219 { 2220 mixed = yes; 2221 break; 2222 } 2223 } 2224 2225 PCondFlushLine( doc, indent ); 2226 2227 if ( XMLPreserveWhiteSpace(doc, node) ) 2228 { 2229 indent = 0; 2230 mixed = no; 2231 cindent = 0; 2232 } 2233 else if (mixed) 2234 cindent = indent; 2235 else 2236 cindent = indent + spaces; 2237 2238 PPrintTag( doc, mode, indent, node ); 2239 if ( !mixed && node->content ) 2240 PFlushLine( doc, cindent ); 2241 2242 for ( content = node->content; content; content = content->next ) 2243 PPrintXMLTree( doc, mode, cindent, content ); 2244 2245 if ( !mixed && node->content ) 2246 PCondFlushLine( doc, indent ); 2247 2248 PPrintEndTag( doc, mode, indent, node ); 2249 /* PCondFlushLine( doc, indent ); */ 2250 } 2251 } 2252 2253

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.