~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

TidyLib
tidy/src/attrs.c

Version: ~ [ 1.0 ] ~

** Warning: Cannot open xref database.

1 /* attrs.c -- recognize HTML attributes 2 3 (c) 1998-2005 (W3C) MIT, ERCIM, Keio University 4 See tidy.h for the copyright notice. 5 6 CVS Info : 7 8 $Author: arnaud02 $ 9 $Date: 2005/09/20 09:03:17 $ 10 $Revision: 1.115 $ 11 12 */ 13 14 #include "tidy-int.h" 15 #include "attrs.h" 16 #include "message.h" 17 #include "tmbstr.h" 18 #include "utf8.h" 19 20 static AttrCheck CheckAction; 21 22 #define CH_PCDATA NULL 23 #define CH_CHARSET NULL 24 #define CH_TYPE CheckType 25 #define CH_XTYPE NULL 26 #define CH_CHARACTER NULL 27 #define CH_URLS NULL 28 #define CH_URL CheckUrl 29 #define CH_SCRIPT CheckScript 30 #define CH_ALIGN CheckAlign 31 #define CH_VALIGN CheckValign 32 #define CH_COLOR CheckColor 33 #define CH_CLEAR CheckClear 34 #define CH_BORDER CheckBool /* kludge */ 35 #define CH_LANG CheckLang 36 #define CH_BOOL CheckBool 37 #define CH_COLS NULL 38 #define CH_NUMBER CheckNumber 39 #define CH_LENGTH CheckLength 40 #define CH_COORDS NULL 41 #define CH_DATE NULL 42 #define CH_TEXTDIR CheckTextDir 43 #define CH_IDREFS NULL 44 #define CH_IDREF NULL 45 #define CH_IDDEF CheckId 46 #define CH_NAME CheckName 47 #define CH_TFRAME NULL 48 #define CH_FBORDER NULL 49 #define CH_MEDIA NULL 50 #define CH_FSUBMIT CheckFsubmit 51 #define CH_LINKTYPES NULL 52 #define CH_TRULES NULL 53 #define CH_SCOPE CheckScope 54 #define CH_SHAPE CheckShape 55 #define CH_SCROLL CheckScroll 56 #define CH_TARGET CheckTarget 57 #define CH_VTYPE CheckVType 58 #define CH_ACTION CheckAction 59 60 static const Attribute attribute_defs [] = 61 { 62 { TidyAttr_UNKNOWN, "unknown!", VERS_PROPRIETARY, NULL }, 63 { TidyAttr_ABBR, "abbr", VERS_HTML40, CH_PCDATA }, 64 { TidyAttr_ACCEPT, "accept", VERS_ALL, CH_XTYPE }, 65 { TidyAttr_ACCEPT_CHARSET, "accept-charset", VERS_HTML40, CH_CHARSET }, 66 { TidyAttr_ACCESSKEY, "accesskey", VERS_HTML40, CH_CHARACTER }, 67 { TidyAttr_ACTION, "action", VERS_ALL, CH_ACTION }, 68 { TidyAttr_ADD_DATE, "add_date", VERS_NETSCAPE, CH_PCDATA }, /* A */ 69 { TidyAttr_ALIGN, "align", VERS_ALL, CH_ALIGN }, /* varies by element */ 70 { TidyAttr_ALINK, "alink", VERS_LOOSE, CH_COLOR }, 71 { TidyAttr_ALT, "alt", VERS_ALL, CH_PCDATA }, /* nowrap */ 72 { TidyAttr_ARCHIVE, "archive", VERS_HTML40, CH_URLS }, /* space or comma separated list */ 73 { TidyAttr_AXIS, "axis", VERS_HTML40, CH_PCDATA }, 74 { TidyAttr_BACKGROUND, "background", VERS_LOOSE, CH_URL }, 75 { TidyAttr_BGCOLOR, "bgcolor", VERS_LOOSE, CH_COLOR }, 76 { TidyAttr_BGPROPERTIES, "bgproperties", VERS_PROPRIETARY, CH_PCDATA }, /* BODY "fixed" fixes background */ 77 { TidyAttr_BORDER, "border", VERS_ALL, CH_BORDER }, /* like LENGTH + "border" */ 78 { TidyAttr_BORDERCOLOR, "bordercolor", VERS_MICROSOFT, CH_COLOR }, /* used on TABLE */ 79 { TidyAttr_BOTTOMMARGIN, "bottommargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */ 80 { TidyAttr_CELLPADDING, "cellpadding", VERS_FROM32, CH_LENGTH }, /* % or pixel values */ 81 { TidyAttr_CELLSPACING, "cellspacing", VERS_FROM32, CH_LENGTH }, 82 { TidyAttr_CHAR, "char", VERS_HTML40, CH_CHARACTER }, 83 { TidyAttr_CHAROFF, "charoff", VERS_HTML40, CH_LENGTH }, 84 { TidyAttr_CHARSET, "charset", VERS_HTML40, CH_CHARSET }, 85 { TidyAttr_CHECKED, "checked", VERS_ALL, CH_BOOL }, /* i.e. "checked" or absent */ 86 { TidyAttr_CITE, "cite", VERS_HTML40, CH_URL }, 87 { TidyAttr_CLASS, "class", VERS_HTML40, CH_PCDATA }, 88 { TidyAttr_CLASSID, "classid", VERS_HTML40, CH_URL }, 89 { TidyAttr_CLEAR, "clear", VERS_LOOSE, CH_CLEAR }, /* BR: left, right, all */ 90 { TidyAttr_CODE, "code", VERS_LOOSE, CH_PCDATA }, /* APPLET */ 91 { TidyAttr_CODEBASE, "codebase", VERS_HTML40, CH_URL }, /* OBJECT */ 92 { TidyAttr_CODETYPE, "codetype", VERS_HTML40, CH_XTYPE }, /* OBJECT */ 93 { TidyAttr_COLOR, "color", VERS_LOOSE, CH_COLOR }, /* BASEFONT, FONT */ 94 { TidyAttr_COLS, "cols", VERS_IFRAME, CH_COLS }, /* TABLE & FRAMESET */ 95 { TidyAttr_COLSPAN, "colspan", VERS_FROM32, CH_NUMBER }, 96 { TidyAttr_COMPACT, "compact", VERS_ALL, CH_BOOL }, /* lists */ 97 { TidyAttr_CONTENT, "content", VERS_ALL, CH_PCDATA }, 98 { TidyAttr_COORDS, "coords", VERS_FROM32, CH_COORDS }, /* AREA, A */ 99 { TidyAttr_DATA, "data", VERS_HTML40, CH_URL }, /* OBJECT */ 100 { TidyAttr_DATAFLD, "datafld", VERS_MICROSOFT, CH_PCDATA }, /* used on DIV, IMG */ 101 { TidyAttr_DATAFORMATAS, "dataformatas", VERS_MICROSOFT, CH_PCDATA }, /* used on DIV, IMG */ 102 { TidyAttr_DATAPAGESIZE, "datapagesize", VERS_MICROSOFT, CH_NUMBER }, /* used on DIV, IMG */ 103 { TidyAttr_DATASRC, "datasrc", VERS_MICROSOFT, CH_URL }, /* used on TABLE */ 104 { TidyAttr_DATETIME, "datetime", VERS_HTML40, CH_DATE }, /* INS, DEL */ 105 { TidyAttr_DECLARE, "declare", VERS_HTML40, CH_BOOL }, /* OBJECT */ 106 { TidyAttr_DEFER, "defer", VERS_HTML40, CH_BOOL }, /* SCRIPT */ 107 { TidyAttr_DIR, "dir", VERS_HTML40, CH_TEXTDIR }, /* ltr or rtl */ 108 { TidyAttr_DISABLED, "disabled", VERS_HTML40, CH_BOOL }, /* form fields */ 109 { TidyAttr_ENCODING, "encoding", VERS_XML, CH_PCDATA }, /* <?xml?> */ 110 { TidyAttr_ENCTYPE, "enctype", VERS_ALL, CH_XTYPE }, /* FORM */ 111 { TidyAttr_FACE, "face", VERS_LOOSE, CH_PCDATA }, /* BASEFONT, FONT */ 112 { TidyAttr_FOR, "for", VERS_HTML40, CH_IDREF }, /* LABEL */ 113 { TidyAttr_FRAME, "frame", VERS_HTML40, CH_TFRAME }, /* TABLE */ 114 { TidyAttr_FRAMEBORDER, "frameborder", VERS_FRAMESET, CH_FBORDER }, /* 0 or 1 */ 115 { TidyAttr_FRAMESPACING, "framespacing", VERS_PROPRIETARY, CH_NUMBER }, 116 { TidyAttr_GRIDX, "gridx", VERS_PROPRIETARY, CH_NUMBER }, /* TABLE Adobe golive*/ 117 { TidyAttr_GRIDY, "gridy", VERS_PROPRIETARY, CH_NUMBER }, /* TABLE Adobe golive */ 118 { TidyAttr_HEADERS, "headers", VERS_HTML40, CH_IDREFS }, /* table cells */ 119 { TidyAttr_HEIGHT, "height", VERS_ALL, CH_LENGTH }, /* pixels only for TH/TD */ 120 { TidyAttr_HREF, "href", VERS_ALL, CH_URL }, /* A, AREA, LINK and BASE */ 121 { TidyAttr_HREFLANG, "hreflang", VERS_HTML40, CH_LANG }, /* A, LINK */ 122 { TidyAttr_HSPACE, "hspace", VERS_ALL, CH_NUMBER }, /* APPLET, IMG, OBJECT */ 123 { TidyAttr_HTTP_EQUIV, "http-equiv", VERS_ALL, CH_PCDATA }, /* META */ 124 { TidyAttr_ID, "id", VERS_HTML40, CH_IDDEF }, 125 { TidyAttr_ISMAP, "ismap", VERS_ALL, CH_BOOL }, /* IMG */ 126 { TidyAttr_LABEL, "label", VERS_HTML40, CH_PCDATA }, /* OPT, OPTGROUP */ 127 { TidyAttr_LANG, "lang", VERS_HTML40, CH_LANG }, 128 { TidyAttr_LANGUAGE, "language", VERS_LOOSE, CH_PCDATA }, /* SCRIPT */ 129 { TidyAttr_LAST_MODIFIED, "last_modified", VERS_NETSCAPE, CH_PCDATA }, /* A */ 130 { TidyAttr_LAST_VISIT, "last_visit", VERS_NETSCAPE, CH_PCDATA }, /* A */ 131 { TidyAttr_LEFTMARGIN, "leftmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */ 132 { TidyAttr_LINK, "link", VERS_LOOSE, CH_COLOR }, /* BODY */ 133 { TidyAttr_LONGDESC, "longdesc", VERS_HTML40, CH_URL }, /* IMG */ 134 { TidyAttr_LOWSRC, "lowsrc", VERS_PROPRIETARY, CH_URL }, /* IMG */ 135 { TidyAttr_MARGINHEIGHT, "marginheight", VERS_IFRAME, CH_NUMBER }, /* FRAME, IFRAME, BODY */ 136 { TidyAttr_MARGINWIDTH, "marginwidth", VERS_IFRAME, CH_NUMBER }, /* ditto */ 137 { TidyAttr_MAXLENGTH, "maxlength", VERS_ALL, CH_NUMBER }, /* INPUT */ 138 { TidyAttr_MEDIA, "media", VERS_HTML40, CH_MEDIA }, /* STYLE, LINK */ 139 { TidyAttr_METHOD, "method", VERS_ALL, CH_FSUBMIT }, /* FORM: get or post */ 140 { TidyAttr_MULTIPLE, "multiple", VERS_ALL, CH_BOOL }, /* SELECT */ 141 { TidyAttr_NAME, "name", VERS_ALL, CH_NAME }, 142 { TidyAttr_NOHREF, "nohref", VERS_FROM32, CH_BOOL }, /* AREA */ 143 { TidyAttr_NORESIZE, "noresize", VERS_FRAMESET, CH_BOOL }, /* FRAME */ 144 { TidyAttr_NOSHADE, "noshade", VERS_LOOSE, CH_BOOL }, /* HR */ 145 { TidyAttr_NOWRAP, "nowrap", VERS_LOOSE, CH_BOOL }, /* table cells */ 146 { TidyAttr_OBJECT, "object", VERS_HTML40_LOOSE, CH_PCDATA }, /* APPLET */ 147 { TidyAttr_OnAFTERUPDATE, "onafterupdate", VERS_MICROSOFT, CH_SCRIPT }, 148 { TidyAttr_OnBEFOREUNLOAD, "onbeforeunload", VERS_MICROSOFT, CH_SCRIPT }, 149 { TidyAttr_OnBEFOREUPDATE, "onbeforeupdate", VERS_MICROSOFT, CH_SCRIPT }, 150 { TidyAttr_OnBLUR, "onblur", VERS_EVENTS, CH_SCRIPT }, /* event */ 151 { TidyAttr_OnCHANGE, "onchange", VERS_EVENTS, CH_SCRIPT }, /* event */ 152 { TidyAttr_OnCLICK, "onclick", VERS_EVENTS, CH_SCRIPT }, /* event */ 153 { TidyAttr_OnDATAAVAILABLE, "ondataavailable", VERS_MICROSOFT, CH_SCRIPT }, /* object, applet */ 154 { TidyAttr_OnDATASETCHANGED, "ondatasetchanged", VERS_MICROSOFT, CH_SCRIPT }, /* object, applet */ 155 { TidyAttr_OnDATASETCOMPLETE, "ondatasetcomplete", VERS_MICROSOFT, CH_SCRIPT }, 156 { TidyAttr_OnDBLCLICK, "ondblclick", VERS_EVENTS, CH_SCRIPT }, /* event */ 157 { TidyAttr_OnERRORUPDATE, "onerrorupdate", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */ 158 { TidyAttr_OnFOCUS, "onfocus", VERS_EVENTS, CH_SCRIPT }, /* event */ 159 { TidyAttr_OnKEYDOWN, "onkeydown", VERS_EVENTS, CH_SCRIPT }, /* event */ 160 { TidyAttr_OnKEYPRESS, "onkeypress", VERS_EVENTS, CH_SCRIPT }, /* event */ 161 { TidyAttr_OnKEYUP, "onkeyup", VERS_EVENTS, CH_SCRIPT }, /* event */ 162 { TidyAttr_OnLOAD, "onload", VERS_EVENTS, CH_SCRIPT }, /* event */ 163 { TidyAttr_OnMOUSEDOWN, "onmousedown", VERS_EVENTS, CH_SCRIPT }, /* event */ 164 { TidyAttr_OnMOUSEMOVE, "onmousemove", VERS_EVENTS, CH_SCRIPT }, /* event */ 165 { TidyAttr_OnMOUSEOUT, "onmouseout", VERS_EVENTS, CH_SCRIPT }, /* event */ 166 { TidyAttr_OnMOUSEOVER, "onmouseover", VERS_EVENTS, CH_SCRIPT }, /* event */ 167 { TidyAttr_OnMOUSEUP, "onmouseup", VERS_EVENTS, CH_SCRIPT }, /* event */ 168 { TidyAttr_OnRESET, "onreset", VERS_EVENTS, CH_SCRIPT }, /* event */ 169 { TidyAttr_OnROWENTER, "onrowenter", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */ 170 { TidyAttr_OnROWEXIT, "onrowexit", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */ 171 { TidyAttr_OnSELECT, "onselect", VERS_EVENTS, CH_SCRIPT }, /* event */ 172 { TidyAttr_OnSUBMIT, "onsubmit", VERS_EVENTS, CH_SCRIPT }, /* event */ 173 { TidyAttr_OnUNLOAD, "onunload", VERS_EVENTS, CH_SCRIPT }, /* event */ 174 { TidyAttr_PROFILE, "profile", VERS_HTML40, CH_URL }, /* HEAD */ 175 { TidyAttr_PROMPT, "prompt", VERS_LOOSE, CH_PCDATA }, /* ISINDEX */ 176 { TidyAttr_RBSPAN, "rbspan", VERS_XHTML11, CH_NUMBER }, /* ruby markup */ 177 { TidyAttr_READONLY, "readonly", VERS_HTML40, CH_BOOL }, /* form fields */ 178 { TidyAttr_REL, "rel", VERS_ALL, CH_LINKTYPES }, 179 { TidyAttr_REV, "rev", VERS_ALL, CH_LINKTYPES }, 180 { TidyAttr_RIGHTMARGIN, "rightmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */ 181 { TidyAttr_ROWS, "rows", VERS_ALL, CH_NUMBER }, /* TEXTAREA */ 182 { TidyAttr_ROWSPAN, "rowspan", VERS_ALL, CH_NUMBER }, /* table cells */ 183 { TidyAttr_RULES, "rules", VERS_HTML40, CH_TRULES }, /* TABLE */ 184 { TidyAttr_SCHEME, "scheme", VERS_HTML40, CH_PCDATA }, /* META */ 185 { TidyAttr_SCOPE, "scope", VERS_HTML40, CH_SCOPE }, /* table cells */ 186 { TidyAttr_SCROLLING, "scrolling", VERS_IFRAME, CH_SCROLL }, /* yes, no or auto */ 187 { TidyAttr_SELECTED, "selected", VERS_ALL, CH_BOOL }, /* OPTION */ 188 { TidyAttr_SHAPE, "shape", VERS_FROM32, CH_SHAPE }, /* AREA, A */ 189 { TidyAttr_SHOWGRID, "showgrid", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive */ 190 { TidyAttr_SHOWGRIDX, "showgridx", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive*/ 191 { TidyAttr_SHOWGRIDY, "showgridy", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive*/ 192 { TidyAttr_SIZE, "size", VERS_LOOSE, CH_NUMBER }, /* HR, FONT, BASEFONT, SELECT */ 193 { TidyAttr_SPAN, "span", VERS_HTML40, CH_NUMBER }, /* COL, COLGROUP */ 194 { TidyAttr_SRC, "src", VERS_ALL, CH_URL }, /* IMG, FRAME, IFRAME */ 195 { TidyAttr_STANDBY, "standby", VERS_HTML40, CH_PCDATA }, /* OBJECT */ 196 { TidyAttr_START, "start", VERS_ALL, CH_NUMBER }, /* OL */ 197 { TidyAttr_STYLE, "style", VERS_HTML40, CH_PCDATA }, 198 { TidyAttr_SUMMARY, "summary", VERS_HTML40, CH_PCDATA }, /* TABLE */ 199 { TidyAttr_TABINDEX, "tabindex", VERS_HTML40, CH_NUMBER }, /* fields, OBJECT and A */ 200 { TidyAttr_TARGET, "target", VERS_HTML40, CH_TARGET }, /* names a frame/window */ 201 { TidyAttr_TEXT, "text", VERS_LOOSE, CH_COLOR }, /* BODY */ 202 { TidyAttr_TITLE, "title", VERS_HTML40, CH_PCDATA }, /* text tool tip */ 203 { TidyAttr_TOPMARGIN, "topmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */ 204 { TidyAttr_TYPE, "type", VERS_FROM32, CH_TYPE }, /* also used by SPACER */ 205 { TidyAttr_USEMAP, "usemap", VERS_ALL, CH_URL }, /* things with images */ 206 { TidyAttr_VALIGN, "valign", VERS_FROM32, CH_VALIGN }, 207 { TidyAttr_VALUE, "value", VERS_ALL, CH_PCDATA }, 208 { TidyAttr_VALUETYPE, "valuetype", VERS_HTML40, CH_VTYPE }, /* PARAM: data, ref, object */ 209 { TidyAttr_VERSION, "version", VERS_ALL|VERS_XML, CH_PCDATA }, /* HTML <?xml?> */ 210 { TidyAttr_VLINK, "vlink", VERS_LOOSE, CH_COLOR }, /* BODY */ 211 { TidyAttr_VSPACE, "vspace", VERS_LOOSE, CH_NUMBER }, /* IMG, OBJECT, APPLET */ 212 { TidyAttr_WIDTH, "width", VERS_ALL, CH_LENGTH }, /* pixels only for TD/TH */ 213 { TidyAttr_WRAP, "wrap", VERS_NETSCAPE, CH_PCDATA }, /* textarea */ 214 { TidyAttr_XML_LANG, "xml:lang", VERS_XML, CH_LANG }, /* XML language */ 215 { TidyAttr_XML_SPACE, "xml:space", VERS_XML, CH_PCDATA }, /* XML white space */ 216 217 /* todo: VERS_ALL is wrong! */ 218 { TidyAttr_XMLNS, "xmlns", VERS_ALL, CH_PCDATA }, /* name space */ 219 { TidyAttr_EVENT, "event", VERS_HTML40, CH_PCDATA }, /* reserved for <script> */ 220 { TidyAttr_METHODS, "methods", VERS_HTML20, CH_PCDATA }, /* for <a>, never implemented */ 221 { TidyAttr_N, "n", VERS_HTML20, CH_PCDATA }, /* for <nextid> */ 222 { TidyAttr_SDAFORM, "sdaform", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ 223 { TidyAttr_SDAPREF, "sdapref", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ 224 { TidyAttr_SDASUFF, "sdasuff", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */ 225 { TidyAttr_URN, "urn", VERS_HTML20, CH_PCDATA }, /* for <a>, never implemented */ 226 227 /* this must be the final entry */ 228 { N_TIDY_ATTRIBS, NULL, VERS_UNKNOWN, NULL } 229 }; 230 231 static uint AttributeVersions(Node* node, AttVal* attval) 232 { 233 uint i; 234 235 if (!attval || !attval->dict) 236 return VERS_UNKNOWN; 237 238 if (!node || !node->tag || !node->tag->attrvers) 239 return attval->dict->versions; 240 241 for (i = 0; node->tag->attrvers[i].attribute; ++i) 242 if (node->tag->attrvers[i].attribute == attval->dict->id) 243 return node->tag->attrvers[i].versions; 244 245 return attval->dict->versions & VERS_ALL 246 ? VERS_UNKNOWN 247 : attval->dict->versions; 248 249 } 250 251 252 /* return the version of the attribute "id" of element "node" */ 253 uint NodeAttributeVersions( Node* node, TidyAttrId id ) 254 { 255 uint i; 256 257 if (!node || !node->tag || !node->tag->attrvers) 258 return VERS_UNKNOWN; 259 260 for (i = 0; node->tag->attrvers[i].attribute; ++i) 261 if (node->tag->attrvers[i].attribute == id) 262 return node->tag->attrvers[i].versions; 263 264 return VERS_UNKNOWN; 265 } 266 267 /* returns true if the element is a W3C defined element */ 268 /* but the element/attribute combination is not */ 269 static Bool AttributeIsProprietary(Node* node, AttVal* attval) 270 { 271 if (!node || !attval) 272 return no; 273 274 if (!node->tag) 275 return no; 276 277 if (!(node->tag->versions & VERS_ALL)) 278 return no; 279 280 if (AttributeVersions(node, attval) & VERS_ALL) 281 return no; 282 283 return yes; 284 } 285 286 /* used by CheckColor() */ 287 struct _colors 288 { 289 ctmbstr name; 290 ctmbstr hex; 291 }; 292 293 static const struct _colors colors[] = 294 { 295 { "black", "#000000" }, 296 { "green", "#008000" }, 297 { "silver", "#C0C0C0" }, 298 { "lime", "#00FF00" }, 299 { "gray", "#808080" }, 300 { "olive", "#808000" }, 301 { "white", "#FFFFFF" }, 302 { "yellow", "#FFFF00" }, 303 { "maroon", "#800000" }, 304 { "navy", "#000080" }, 305 { "red", "#FF0000" }, 306 { "blue", "#0000FF" }, 307 { "purple", "#800080" }, 308 { "teal", "#008080" }, 309 { "fuchsia", "#FF00FF" }, 310 { "aqua", "#00FFFF" }, 311 { NULL, NULL } 312 }; 313 314 static ctmbstr GetColorCode(ctmbstr name) 315 { 316 uint i; 317 318 for (i = 0; colors[i].name; ++i) 319 if (tmbstrcasecmp(name, colors[i].name) == 0) 320 return colors[i].hex; 321 322 return NULL; 323 } 324 325 static ctmbstr GetColorName(ctmbstr code) 326 { 327 uint i; 328 329 for (i = 0; colors[i].name; ++i) 330 if (tmbstrcasecmp(code, colors[i].hex) == 0) 331 return colors[i].name; 332 333 return NULL; 334 } 335 336 #if 0 337 static const struct _colors fancy_colors[] = 338 { 339 { "darkgreen", "#006400" }, 340 { "antiquewhite", "#FAEBD7" }, 341 { "aqua", "#00FFFF" }, 342 { "aquamarine", "#7FFFD4" }, 343 { "azure", "#F0FFFF" }, 344 { "beige", "#F5F5DC" }, 345 { "bisque", "#FFE4C4" }, 346 { "black", "#000000" }, 347 { "blanchedalmond", "#FFEBCD" }, 348 { "blue", "#0000FF" }, 349 { "blueviolet", "#8A2BE2" }, 350 { "brown", "#A52A2A" }, 351 { "burlywood", "#DEB887" }, 352 { "cadetblue", "#5F9EA0" }, 353 { "chartreuse", "#7FFF00" }, 354 { "chocolate", "#D2691E" }, 355 { "coral", "#FF7F50" }, 356 { "cornflowerblue", "#6495ED" }, 357 { "cornsilk", "#FFF8DC" }, 358 { "crimson", "#DC143C" }, 359 { "cyan", "#00FFFF" }, 360 { "darkblue", "#00008B" }, 361 { "darkcyan", "#008B8B" }, 362 { "darkgoldenrod", "#B8860B" }, 363 { "darkgray", "#A9A9A9" }, 364 { "darkgreen", "#006400" }, 365 { "darkkhaki", "#BDB76B" }, 366 { "darkmagenta", "#8B008B" }, 367 { "darkolivegreen", "#556B2F" }, 368 { "darkorange", "#FF8C00" }, 369 { "darkorchid", "#9932CC" }, 370 { "darkred", "#8B0000" }, 371 { "darksalmon", "#E9967A" }, 372 { "darkseagreen", "#8FBC8F" }, 373 { "darkslateblue", "#483D8B" }, 374 { "darkslategray", "#2F4F4F" }, 375 { "darkturquoise", "#00CED1" }, 376 { "darkviolet", "#9400D3" }, 377 { "deeppink", "#FF1493" }, 378 { "deepskyblue", "#00BFFF" }, 379 { "dimgray", "#696969" }, 380 { "dodgerblue", "#1E90FF" }, 381 { "firebrick", "#B22222" }, 382 { "floralwhite", "#FFFAF0" }, 383 { "forestgreen", "#228B22" }, 384 { "fuchsia", "#FF00FF" }, 385 { "gainsboro", "#DCDCDC" }, 386 { "ghostwhite", "#F8F8FF" }, 387 { "gold", "#FFD700" }, 388 { "goldenrod", "#DAA520" }, 389 { "gray", "#808080" }, 390 { "green", "#008000" }, 391 { "greenyellow", "#ADFF2F" }, 392 { "honeydew", "#F0FFF0" }, 393 { "hotpink", "#FF69B4" }, 394 { "indianred", "#CD5C5C" }, 395 { "indigo", "#4B0082" }, 396 { "ivory", "#FFFFF0" }, 397 { "khaki", "#F0E68C" }, 398 { "lavender", "#E6E6FA" }, 399 { "lavenderblush", "#FFF0F5" }, 400 { "lawngreen", "#7CFC00" }, 401 { "lemonchiffon", "#FFFACD" }, 402 { "lightblue", "#ADD8E6" }, 403 { "lightcoral", "#F08080" }, 404 { "lightcyan", "#E0FFFF" }, 405 { "lightgoldenrodyellow", "#FAFAD2" }, 406 { "lightgreen", "#90EE90" }, 407 { "lightgrey", "#D3D3D3" }, 408 { "lightpink", "#FFB6C1" }, 409 { "lightsalmon", "#FFA07A" }, 410 { "lightseagreen", "#20B2AA" }, 411 { "lightskyblue", "#87CEFA" }, 412 { "lightslategray", "#778899" }, 413 { "lightsteelblue", "#B0C4DE" }, 414 { "lightyellow", "#FFFFE0" }, 415 { "lime", "#00FF00" }, 416 { "limegreen", "#32CD32" }, 417 { "linen", "#FAF0E6" }, 418 { "magenta", "#FF00FF" }, 419 { "maroon", "#800000" }, 420 { "mediumaquamarine", "#66CDAA" }, 421 { "mediumblue", "#0000CD" }, 422 { "mediumorchid", "#BA55D3" }, 423 { "mediumpurple", "#9370DB" }, 424 { "mediumseagreen", "#3CB371" }, 425 { "mediumslateblue", "#7B68EE" }, 426 { "mediumspringgreen", "#00FA9A" }, 427 { "mediumturquoise", "#48D1CC" }, 428 { "mediumvioletred", "#C71585" }, 429 { "midnightblue", "#191970" }, 430 { "mintcream", "#F5FFFA" }, 431 { "mistyrose", "#FFE4E1" }, 432 { "moccasin", "#FFE4B5" }, 433 { "navajowhite", "#FFDEAD" }, 434 { "navy", "#000080" }, 435 { "oldlace", "#FDF5E6" }, 436 { "olive", "#808000" }, 437 { "olivedrab", "#6B8E23" }, 438 { "orange", "#FFA500" }, 439 { "orangered", "#FF4500" }, 440 { "orchid", "#DA70D6" }, 441 { "palegoldenrod", "#EEE8AA" }, 442 { "palegreen", "#98FB98" }, 443 { "paleturquoise", "#AFEEEE" }, 444 { "palevioletred", "#DB7093" }, 445 { "papayawhip", "#FFEFD5" }, 446 { "peachpuff", "#FFDAB9" }, 447 { "peru", "#CD853F" }, 448 { "pink", "#FFC0CB" }, 449 { "plum", "#DDA0DD" }, 450 { "powderblue", "#B0E0E6" }, 451 { "purple", "#800080" }, 452 { "red", "#FF0000" }, 453 { "rosybrown", "#BC8F8F" }, 454 { "royalblue", "#4169E1" }, 455 { "saddlebrown", "#8B4513" }, 456 { "salmon", "#FA8072" }, 457 { "sandybrown", "#F4A460" }, 458 { "seagreen", "#2E8B57" }, 459 { "seashell", "#FFF5EE" }, 460 { "sienna", "#A0522D" }, 461 { "silver", "#C0C0C0" }, 462 { "skyblue", "#87CEEB" }, 463 { "slateblue", "#6A5ACD" }, 464 { "slategray", "#708090" }, 465 { "snow", "#FFFAFA" }, 466 { "springgreen", "#00FF7F" }, 467 { "steelblue", "#4682B4" }, 468 { "tan", "#D2B48C" }, 469 { "teal", "#008080" }, 470 { "thistle", "#D8BFD8" }, 471 { "tomato", "#FF6347" }, 472 { "turquoise", "#40E0D0" }, 473 { "violet", "#EE82EE" }, 474 { "wheat", "#F5DEB3" }, 475 { "white", "#FFFFFF" }, 476 { "whitesmoke", "#F5F5F5" }, 477 { "yellow", "#FFFF00" }, 478 { "yellowgreen", "#9ACD32" }, 479 { NULL, NULL } 480 }; 481 #endif 482 483 #ifdef ATTRIBUTE_HASH_LOOKUP 484 static uint hash(ctmbstr s) 485 { 486 uint hashval; 487 488 for (hashval = 0; *s != '\0'; s++) 489 hashval = *s + 31*hashval; 490 491 return hashval % ATTRIBUTE_HASH_SIZE; 492 } 493 494 static Attribute *install(TidyAttribImpl * attribs, const Attribute* old) 495 { 496 Attribute *np; 497 uint hashval; 498 499 np = (Attribute *)MemAlloc(sizeof(*np)); 500 501 np->name = tmbstrdup(old->name); 502 503 hashval = hash(np->name); 504 np->next = attribs->hashtab[hashval]; 505 attribs->hashtab[hashval] = np; 506 507 np->id = old->id; 508 np->versions = old->versions; 509 np->attrchk = old->attrchk; 510 511 return np; 512 } 513 #endif 514 515 static const Attribute* lookup(TidyAttribImpl* ARG_UNUSED(attribs), 516 ctmbstr atnam) 517 { 518 const Attribute *np; 519 520 if (!atnam) 521 return NULL; 522 523 #ifdef ATTRIBUTE_HASH_LOOKUP 524 for (np = attribs->hashtab[hash(atnam)]; np != NULL; np = np->next) 525 if (tmbstrcmp(atnam, np->name) == 0) 526 return np; 527 528 for (np = attribute_defs; np && np->name; ++np) 529 if (tmbstrcmp(atnam, np->name) == 0) 530 return install(attribs, np); 531 #else 532 for (np = attribute_defs; np && np->name; ++np) 533 if (tmbstrcmp(atnam, np->name) == 0) 534 return np; 535 #endif 536 537 return NULL; 538 } 539 540 541 /* Locate attributes by type */ 542 AttVal* AttrGetById( Node* node, TidyAttrId id ) 543 { 544 AttVal* av; 545 for ( av = node->attributes; av; av = av->next ) 546 { 547 if ( AttrIsId(av, id) ) 548 return av; 549 } 550 return NULL; 551 } 552 553 /* public method for finding attribute definition by name */ 554 const Attribute* FindAttribute( TidyDocImpl* doc, AttVal *attval ) 555 { 556 if ( attval ) 557 return lookup( &doc->attribs, attval->attribute ); 558 return NULL; 559 } 560 561 AttVal* GetAttrByName( Node *node, ctmbstr name ) 562 { 563 AttVal *attr; 564 for (attr = node->attributes; attr != NULL; attr = attr->next) 565 { 566 if (attr->attribute && tmbstrcmp(attr->attribute, name) == 0) 567 break; 568 } 569 return attr; 570 } 571 572 AttVal* AddAttribute( TidyDocImpl* doc, 573 Node *node, ctmbstr name, ctmbstr value ) 574 { 575 AttVal *av = NewAttribute(); 576 av->delim = '"'; 577 av->attribute = tmbstrdup(name); 578 579 if (value) 580 av->value = tmbstrdup(value); 581 else 582 av->value = NULL; 583 584 av->dict = lookup(&doc->attribs, name); 585 586 InsertAttributeAtEnd(node, av); 587 return av; 588 } 589 590 AttVal* RepairAttrValue(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value) 591 { 592 AttVal* old = GetAttrByName(node, name); 593 594 if (old) 595 { 596 if (old->value) 597 MemFree(old->value); 598 if (value) 599 old->value = tmbstrdup(value); 600 else 601 old->value = NULL; 602 603 return old; 604 } 605 else 606 return AddAttribute(doc, node, name, value); 607 } 608 609 static Bool CheckAttrType( TidyDocImpl* doc, 610 ctmbstr attrname, AttrCheck type ) 611 { 612 const Attribute* np = lookup( &doc->attribs, attrname ); 613 return (Bool)( np && np->attrchk == type ); 614 } 615 616 Bool IsUrl( TidyDocImpl* doc, ctmbstr attrname ) 617 { 618 return CheckAttrType( doc, attrname, CH_URL ); 619 } 620 621 Bool IsBool( TidyDocImpl* doc, ctmbstr attrname ) 622 { 623 return CheckAttrType( doc, attrname, CH_BOOL ); 624 } 625 626 Bool IsScript( TidyDocImpl* doc, ctmbstr attrname ) 627 { 628 return CheckAttrType( doc, attrname, CH_SCRIPT ); 629 } 630 631 /* may id or name serve as anchor? */ 632 Bool IsAnchorElement( TidyDocImpl* ARG_UNUSED(doc), Node* node) 633 { 634 TidyTagId tid = TagId( node ); 635 if ( tid == TidyTag_A || 636 tid == TidyTag_APPLET || 637 tid == TidyTag_FORM || 638 tid == TidyTag_FRAME || 639 tid == TidyTag_IFRAME || 640 tid == TidyTag_IMG || 641 tid == TidyTag_MAP ) 642 return yes; 643 644 return no; 645 } 646 647 /* 648 In CSS1, selectors can contain only the characters A-Z, 0-9, 649 and Unicode characters 161-255, plus dash (-); they cannot start 650 with a dash or a digit; they can also contain escaped characters 651 and any Unicode character as a numeric code (see next item). 652 653 The backslash followed by at most four hexadecimal digits 654 (0..9A..F) stands for the Unicode character with that number. 655 656 Any character except a hexadecimal digit can be escaped to remove 657 its special meaning, by putting a backslash in front. 658 659 #508936 - CSS class naming for -clean option 660 */ 661 Bool IsCSS1Selector( ctmbstr buf ) 662 { 663 Bool valid = yes; 664 int esclen = 0; 665 byte c; 666 int pos; 667 668 for ( pos=0; valid && (c = *buf++); ++pos ) 669 { 670 if ( c == '\\' ) 671 { 672 esclen = 1; /* ab\555\444 is 4 chars {'a', 'b', \555, \444} */ 673 } 674 else if ( isdigit( c ) ) 675 { 676 /* Digit not 1st, unless escaped (Max length "\112F") */ 677 if ( esclen > 0 ) 678 valid = ( ++esclen < 6 ); 679 if ( valid ) 680 valid = ( pos>0 || esclen>0 ); 681 } 682 else 683 { 684 valid = ( 685 esclen > 0 /* Escaped? Anything goes. */ 686 || ( pos>0 && c == '-' ) /* Dash cannot be 1st char */ 687 || isalpha(c) /* a-z, A-Z anywhere */ 688 || ( c >= 161 ) /* Unicode 161-255 anywhere */ 689 ); 690 esclen = 0; 691 } 692 } 693 return valid; 694 } 695 696 /* free single anchor */ 697 static void FreeAnchor(Anchor *a) 698 { 699 if ( a ) 700 MemFree( a->name ); 701 MemFree( a ); 702 } 703 704 /* removes anchor for specific node */ 705 void RemoveAnchorByNode( TidyDocImpl* doc, Node *node ) 706 { 707 TidyAttribImpl* attribs = &doc->attribs; 708 Anchor *delme = NULL, *curr, *prev = NULL; 709 710 for ( curr=attribs->anchor_list; curr!=NULL; curr=curr->next ) 711 { 712 if ( curr->node == node ) 713 { 714 if ( prev ) 715 prev->next = curr->next; 716 else 717 attribs->anchor_list = curr->next; 718 delme = curr; 719 break; 720 } 721 prev = curr; 722 } 723 FreeAnchor( delme ); 724 } 725 726 /* initialize new anchor */ 727 static Anchor* NewAnchor( ctmbstr name, Node* node ) 728 { 729 Anchor *a = (Anchor*) MemAlloc( sizeof(Anchor) ); 730 731 a->name = tmbstrdup( name ); 732 a->name = tmbstrtolower(a->name); 733 a->node = node; 734 a->next = NULL; 735 736 return a; 737 } 738 739 /* add new anchor to namespace */ 740 Anchor* AddAnchor( TidyDocImpl* doc, ctmbstr name, Node *node ) 741 { 742 TidyAttribImpl* attribs = &doc->attribs; 743 Anchor *a = NewAnchor( name, node ); 744 745 if ( attribs->anchor_list == NULL) 746 attribs->anchor_list = a; 747 else 748 { 749 Anchor *here = attribs->anchor_list; 750 while (here->next) 751 here = here->next; 752 here->next = a; 753 } 754 755 return attribs->anchor_list; 756 } 757 758 /* return node associated with anchor */ 759 Node* GetNodeByAnchor( TidyDocImpl* doc, ctmbstr name ) 760 { 761 TidyAttribImpl* attribs = &doc->attribs; 762 Anchor *found; 763 tmbstr lname = tmbstrdup(name); 764 lname = tmbstrtolower(lname); 765 766 for ( found = attribs->anchor_list; found != NULL; found = found->next ) 767 { 768 if ( tmbstrcmp(found->name, lname) == 0 ) 769 break; 770 } 771 772 MemFree(lname); 773 if ( found ) 774 return found->node; 775 return NULL; 776 } 777 778 /* free all anchors */ 779 void FreeAnchors( TidyDocImpl* doc ) 780 { 781 TidyAttribImpl* attribs = &doc->attribs; 782 Anchor* a; 783 while (NULL != (a = attribs->anchor_list) ) 784 { 785 attribs->anchor_list = a->next; 786 FreeAnchor(a); 787 } 788 } 789 790 /* public method for inititializing attribute dictionary */ 791 void InitAttrs( TidyDocImpl* doc ) 792 { 793 ClearMemory( &doc->attribs, sizeof(TidyAttribImpl) ); 794 #ifdef _DEBUG 795 { 796 /* Attribute ID is index position in Attribute type lookup table */ 797 uint ix; 798 for ( ix=0; ix < N_TIDY_ATTRIBS; ++ix ) 799 { 800 const Attribute* dict = &attribute_defs[ ix ]; 801 assert( (uint) dict->id == ix ); 802 } 803 } 804 #endif 805 } 806 807 /* free all declared attributes */ 808 static void FreeDeclaredAttributes( TidyDocImpl* doc ) 809 { 810 TidyAttribImpl* attribs = &doc->attribs; 811 Attribute* dict; 812 while ( NULL != (dict = attribs->declared_attr_list) ) 813 { 814 attribs->declared_attr_list = dict->next; 815 MemFree( dict->name ); 816 MemFree( dict ); 817 } 818 } 819 820 void FreeAttrTable( TidyDocImpl* doc ) 821 { 822 #ifdef ATTRIBUTE_HASH_LOOKUP 823 Attribute *dict, *next; 824 uint i; 825 826 for (i = 0; i < ATTRIBUTE_HASH_SIZE; ++i) 827 { 828 dict = doc->attribs.hashtab[i]; 829 830 while(dict) 831 { 832 next = dict->next; 833 MemFree(dict->name); 834 MemFree(dict); 835 dict = next; 836 } 837 838 doc->attribs.hashtab[i] = NULL; 839 } 840 #endif 841 842 FreeAnchors( doc ); 843 FreeDeclaredAttributes( doc ); 844 } 845 846 /* 847 the same attribute name can't be used 848 more than once in each element 849 */ 850 void RepairDuplicateAttributes( TidyDocImpl* doc, Node *node) 851 { 852 AttVal *first; 853 854 for (first = node->attributes; first != NULL;) 855 { 856 AttVal *second; 857 Bool firstRedefined = no; 858 859 if (!(first->asp == NULL && first->php == NULL)) 860 { 861 first = first->next; 862 continue; 863 } 864 865 for (second = first->next; second != NULL;) 866 { 867 AttVal *temp; 868 869 if (!(second->asp == NULL && second->php == NULL && 870 AttrsHaveSameId(first, second))) 871 { 872 second = second->next; 873 continue; 874 } 875 876 /* first and second attribute have same local name */ 877 /* now determine what to do with this duplicate... */ 878 879 if (attrIsCLASS(first) && cfgBool(doc, TidyJoinClasses) && AttrHasValue(first) && AttrHasValue(second)) 880 { 881 /* concatenate classes */ 882 883 first->value = (tmbstr) MemRealloc(first->value, tmbstrlen(first->value) + 884 tmbstrlen(second->value) + 2); 885 tmbstrcat(first->value, " "); 886 tmbstrcat(first->value, second->value); 887 888 temp = second->next; 889 890 ReportAttrError( doc, node, second, JOINING_ATTRIBUTE); 891 RemoveAttribute( doc, node, second ); 892 893 second = temp; 894 } 895 else if (attrIsSTYLE(first) && cfgBool(doc, TidyJoinStyles) && AttrHasValue(first) && AttrHasValue(second)) 896 { 897 /* concatenate styles */ 898 899 /* 900 this doesn't handle CSS comments and 901 leading/trailing white-space very well 902 see http://www.w3.org/TR/css-style-attr 903 */ 904 905 uint end = tmbstrlen(first->value); 906 907 if (end >0 && first->value[end - 1] == ';') 908 { 909 /* attribute ends with declaration seperator */ 910 911 first->value = (tmbstr) MemRealloc(first->value, 912 end + tmbstrlen(second->value) + 2); 913 914 tmbstrcat(first->value, " "); 915 tmbstrcat(first->value, second->value); 916 } 917 else if (end >0 && first->value[end - 1] == '}') 918 { 919 /* attribute ends with rule set */ 920 921 first->value = (tmbstr) MemRealloc(first->value, 922 end + tmbstrlen(second->value) + 6); 923 924 tmbstrcat(first->value, " { "); 925 tmbstrcat(first->value, second->value); 926 tmbstrcat(first->value, " }"); 927 } 928 else 929 { 930 /* attribute ends with property value */ 931 932 first->value = (tmbstr) MemRealloc(first->value, 933 end + tmbstrlen(second->value) + 3); 934 935 if (end > 0) 936 tmbstrcat(first->value, "; "); 937 tmbstrcat(first->value, second->value); 938 } 939 940 temp = second->next; 941 942 ReportAttrError( doc, node, second, JOINING_ATTRIBUTE); 943 RemoveAttribute( doc, node, second ); 944 second = temp; 945 946 } 947 else if ( cfg(doc, TidyDuplicateAttrs) == TidyKeepLast ) 948 { 949 temp = first->next; 950 ReportAttrError( doc, node, first, REPEATED_ATTRIBUTE); 951 RemoveAttribute( doc, node, first ); 952 firstRedefined = yes; 953 first = temp; 954 second = second->next; 955 } 956 else /* TidyDuplicateAttrs == TidyKeepFirst */ 957 { 958 temp = second->next; 959 960 ReportAttrError( doc, node, second, REPEATED_ATTRIBUTE); 961 RemoveAttribute( doc, node, second ); 962 963 second = temp; 964 } 965 } 966 if (!firstRedefined) 967 first = first->next; 968 } 969 } 970 971 /* ignore unknown attributes for proprietary elements */ 972 const Attribute* CheckAttribute( TidyDocImpl* doc, Node *node, AttVal *attval ) 973 { 974 const Attribute* attribute = attval->dict; 975 976 if ( attribute != NULL ) 977 { 978 if (attribute->versions & VERS_XML) 979 { 980 doc->lexer->isvoyager = yes; 981 if (!cfgBool(doc, TidyHtmlOut)) 982 { 983 SetOptionBool(doc, TidyXhtmlOut, yes); 984 SetOptionBool(doc, TidyXmlOut, yes); 985 } 986 } 987 988 ConstrainVersion(doc, AttributeVersions(node, attval)); 989 990 if (attribute->attrchk) 991 attribute->attrchk( doc, node, attval ); 992 } 993 994 if (AttributeIsProprietary(node, attval)) 995 { 996 ReportAttrError(doc, node, attval, PROPRIETARY_ATTRIBUTE); 997 998 if (cfgBool(doc, TidyDropPropAttrs)) 999 RemoveAttribute( doc, node, attval ); 1000 } 1001 1002 return attribute; 1003 } 1004 1005 Bool IsBoolAttribute(AttVal *attval) 1006 { 1007 const Attribute *attribute = ( attval ? attval->dict : NULL ); 1008 if ( attribute && attribute->attrchk == CH_BOOL ) 1009 return yes; 1010 return no; 1011 } 1012 1013 Bool attrIsEvent( AttVal* attval ) 1014 { 1015 TidyAttrId atid = AttrId( attval ); 1016 1017 return (atid == TidyAttr_OnAFTERUPDATE || 1018 atid == TidyAttr_OnBEFOREUNLOAD || 1019 atid == TidyAttr_OnBEFOREUPDATE || 1020 atid == TidyAttr_OnBLUR || 1021 atid == TidyAttr_OnCHANGE || 1022 atid == TidyAttr_OnCLICK || 1023 atid == TidyAttr_OnDATAAVAILABLE || 1024 atid == TidyAttr_OnDATASETCHANGED || 1025 atid == TidyAttr_OnDATASETCOMPLETE || 1026 atid == TidyAttr_OnDBLCLICK || 1027 atid == TidyAttr_OnERRORUPDATE || 1028 atid == TidyAttr_OnFOCUS || 1029 atid == TidyAttr_OnKEYDOWN || 1030 atid == TidyAttr_OnKEYPRESS || 1031 atid == TidyAttr_OnKEYUP || 1032 atid == TidyAttr_OnLOAD || 1033 atid == TidyAttr_OnMOUSEDOWN || 1034 atid == TidyAttr_OnMOUSEMOVE || 1035 atid == TidyAttr_OnMOUSEOUT || 1036 atid == TidyAttr_OnMOUSEOVER || 1037 atid == TidyAttr_OnMOUSEUP || 1038 atid == TidyAttr_OnRESET || 1039 atid == TidyAttr_OnROWENTER || 1040 atid == TidyAttr_OnROWEXIT || 1041 atid == TidyAttr_OnSELECT || 1042 atid == TidyAttr_OnSUBMIT || 1043 atid == TidyAttr_OnUNLOAD); 1044 } 1045 1046 static void CheckLowerCaseAttrValue( TidyDocImpl* doc, Node *node, AttVal *attval) 1047 { 1048 tmbstr p; 1049 Bool hasUpper = no; 1050 1051 if (!AttrHasValue(attval)) 1052 return; 1053 1054 p = attval->value; 1055 1056 while (*p) 1057 { 1058 if (IsUpper(*p)) /* #501230 - fix by Terry Teague - 09 Jan 02 */ 1059 { 1060 hasUpper = yes; 1061 break; 1062 } 1063 p++; 1064 } 1065 1066 if (hasUpper) 1067 { 1068 Lexer* lexer = doc->lexer; 1069 if (lexer->isvoyager) 1070 ReportAttrError( doc, node, attval, ATTR_VALUE_NOT_LCASE); 1071 1072 if ( lexer->isvoyager || cfgBool(doc, TidyLowerLiterals) ) 1073 attval->value = tmbstrtolower(attval->value); 1074 } 1075 } 1076 1077 /* methods for checking value of a specific attribute */ 1078 1079 void CheckUrl( TidyDocImpl* doc, Node *node, AttVal *attval) 1080 { 1081 tmbchar c; 1082 tmbstr dest, p; 1083 uint escape_count = 0, backslash_count = 0; 1084 uint i, pos = 0; 1085 uint len; 1086 1087 if (!AttrHasValue(attval)) 1088 { 1089 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1090 return; 1091 } 1092 1093 p = attval->value; 1094 1095 for (i = 0; 0 != (c = p[i]); ++i) 1096 { 1097 if (c == '\\') 1098 { 1099 ++backslash_count; 1100 if ( cfgBool(doc, TidyFixBackslash) ) 1101 p[i] = '/'; 1102 } 1103 else if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c))) 1104 ++escape_count; 1105 } 1106 1107 if ( cfgBool(doc, TidyFixUri) && escape_count ) 1108 { 1109 len = tmbstrlen(p) + escape_count * 2 + 1; 1110 dest = (tmbstr) MemAlloc(len); 1111 1112 for (i = 0; 0 != (c = p[i]); ++i) 1113 { 1114 if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c))) 1115 pos += sprintf( dest + pos, "%%%02X", (byte)c ); 1116 else 1117 dest[pos++] = c; 1118 } 1119 dest[pos] = 0; 1120 1121 MemFree(attval->value); 1122 attval->value = dest; 1123 } 1124 if ( backslash_count ) 1125 { 1126 if ( cfgBool(doc, TidyFixBackslash) ) 1127 ReportAttrError( doc, node, attval, FIXED_BACKSLASH ); 1128 else 1129 ReportAttrError( doc, node, attval, BACKSLASH_IN_URI ); 1130 } 1131 if ( escape_count ) 1132 { 1133 if ( cfgBool(doc, TidyFixUri) ) 1134 ReportAttrError( doc, node, attval, ESCAPED_ILLEGAL_URI); 1135 else 1136 ReportAttrError( doc, node, attval, ILLEGAL_URI_REFERENCE); 1137 1138 doc->badChars |= BC_INVALID_URI; 1139 } 1140 } 1141 1142 /* RFC 2396, section 4.2 states: 1143 "[...] in the case of HTML's FORM element, [...] an 1144 empty URI reference represents the base URI of the 1145 current document and should be replaced by that URI 1146 when transformed into a request." 1147 */ 1148 void CheckAction( TidyDocImpl* doc, Node *node, AttVal *attval) 1149 { 1150 if (AttrHasValue(attval)) 1151 CheckUrl( doc, node, attval ); 1152 } 1153 1154 void CheckScript( TidyDocImpl* ARG_UNUSED(doc), Node* ARG_UNUSED(node), 1155 AttVal* ARG_UNUSED(attval)) 1156 { 1157 } 1158 1159 Bool IsValidHTMLID(ctmbstr id) 1160 { 1161 ctmbstr s = id; 1162 1163 if (!s) 1164 return no; 1165 1166 if (!IsLetter(*s++)) 1167 return no; 1168 1169 while (*s) 1170 if (!IsNamechar(*s++)) 1171 return no; 1172 1173 return yes; 1174 1175 } 1176 1177 Bool IsValidXMLID(ctmbstr id) 1178 { 1179 ctmbstr s = id; 1180 tchar c; 1181 1182 if (!s) 1183 return no; 1184 1185 c = *s++; 1186 if (c > 0x7F) 1187 s += GetUTF8(s, &c); 1188 1189 if (!(IsXMLLetter(c) || c == '_' || c == ':')) 1190 return no; 1191 1192 while (*s) 1193 { 1194 c = (unsigned char)*s; 1195 1196 if (c > 0x7F) 1197 s += GetUTF8(s, &c); 1198 1199 ++s; 1200 1201 if (!IsXMLNamechar(c)) 1202 return no; 1203 } 1204 1205 return yes; 1206 } 1207 1208 static Bool IsValidNMTOKEN(ctmbstr name) 1209 { 1210 ctmbstr s = name; 1211 tchar c; 1212 1213 if (!s) 1214 return no; 1215 1216 while (*s) 1217 { 1218 c = (unsigned char)*s; 1219 1220 if (c > 0x7F) 1221 s += GetUTF8(s, &c); 1222 1223 ++s; 1224 1225 if (!IsXMLNamechar(c)) 1226 return no; 1227 } 1228 1229 return yes; 1230 } 1231 1232 static Bool AttrValueIsAmong(AttVal *attval, ctmbstr const list[]) 1233 { 1234 const ctmbstr *v; 1235 for (v = list; *v; ++v) 1236 if (AttrValueIs(attval, *v)) 1237 return yes; 1238 return no; 1239 } 1240 1241 static void CheckAttrValidity( TidyDocImpl* doc, Node *node, AttVal *attval, 1242 ctmbstr const list[]) 1243 { 1244 if (!AttrHasValue(attval)) 1245 { 1246 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1247 return; 1248 } 1249 1250 CheckLowerCaseAttrValue( doc, node, attval ); 1251 1252 if (!AttrValueIsAmong(attval, list)) 1253 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1254 } 1255 1256 void CheckName( TidyDocImpl* doc, Node *node, AttVal *attval) 1257 { 1258 Node *old; 1259 1260 if (!AttrHasValue(attval)) 1261 { 1262 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1263 return; 1264 } 1265 1266 if ( IsAnchorElement(doc, node) ) 1267 { 1268 if (cfgBool(doc, TidyXmlOut) && !IsValidNMTOKEN(attval->value)) 1269 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1270 1271 if ((old = GetNodeByAnchor(doc, attval->value)) && old != node) 1272 { 1273 ReportAttrError( doc, node, attval, ANCHOR_NOT_UNIQUE); 1274 } 1275 else 1276 AddAnchor( doc, attval->value, node ); 1277 } 1278 } 1279 1280 void CheckId( TidyDocImpl* doc, Node *node, AttVal *attval ) 1281 { 1282 Lexer* lexer = doc->lexer; 1283 Node *old; 1284 1285 if (!AttrHasValue(attval)) 1286 { 1287 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1288 return; 1289 } 1290 1291 if (!IsValidHTMLID(attval->value)) 1292 { 1293 if (lexer->isvoyager && IsValidXMLID(attval->value)) 1294 ReportAttrError( doc, node, attval, XML_ID_SYNTAX); 1295 else 1296 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1297 } 1298 1299 if ((old = GetNodeByAnchor(doc, attval->value)) && old != node) 1300 { 1301 ReportAttrError( doc, node, attval, ANCHOR_NOT_UNIQUE); 1302 } 1303 else 1304 AddAnchor( doc, attval->value, node ); 1305 } 1306 1307 void CheckBool( TidyDocImpl* doc, Node *node, AttVal *attval) 1308 { 1309 if (!AttrHasValue(attval)) 1310 return; 1311 1312 CheckLowerCaseAttrValue( doc, node, attval ); 1313 } 1314 1315 void CheckAlign( TidyDocImpl* doc, Node *node, AttVal *attval) 1316 { 1317 ctmbstr const values[] = {"left", "right", "center", "justify", NULL}; 1318 1319 /* IMG, OBJECT, APPLET and EMBED use align for vertical position */ 1320 if (node->tag && (node->tag->model & CM_IMG)) 1321 { 1322 CheckValign( doc, node, attval ); 1323 return; 1324 } 1325 1326 if (!AttrHasValue(attval)) 1327 { 1328 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1329 return; 1330 } 1331 1332 CheckLowerCaseAttrValue( doc, node, attval); 1333 1334 /* currently CheckCaption(...) takes care of the remaining cases */ 1335 if (nodeIsCAPTION(node)) 1336 return; 1337 1338 if (!AttrValueIsAmong(attval, values)) 1339 { 1340 /* align="char" is allowed for elements with CM_TABLE|CM_ROW 1341 except CAPTION which is excluded above, */ 1342 if( !(AttrValueIs(attval, "char") 1343 && node->tag && (node->tag->model & CM_TABLE|CM_ROW))) 1344 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1345 } 1346 } 1347 1348 void CheckValign( TidyDocImpl* doc, Node *node, AttVal *attval) 1349 { 1350 ctmbstr const values[] = {"top", "middle", "bottom", "baseline", NULL}; 1351 ctmbstr const values2[] = {"left", "right", NULL}; 1352 ctmbstr const valuesp[] = {"texttop", "absmiddle", "absbottom", 1353 "textbottom", NULL}; 1354 1355 if (!AttrHasValue(attval)) 1356 { 1357 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1358 return; 1359 } 1360 1361 CheckLowerCaseAttrValue( doc, node, attval ); 1362 1363 if (AttrValueIsAmong(attval, values)) 1364 { 1365 /* all is fine */ 1366 } 1367 else if (AttrValueIsAmong(attval, values2)) 1368 { 1369 if (!(node->tag && (node->tag->model & CM_IMG))) 1370 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1371 } 1372 else if (AttrValueIsAmong(attval, valuesp)) 1373 { 1374 ConstrainVersion( doc, VERS_PROPRIETARY ); 1375 ReportAttrError( doc, node, attval, PROPRIETARY_ATTR_VALUE); 1376 } 1377 else 1378 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1379 } 1380 1381 void CheckLength( TidyDocImpl* doc, Node *node, AttVal *attval) 1382 { 1383 tmbstr p; 1384 1385 if (!AttrHasValue(attval)) 1386 { 1387 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1388 return; 1389 } 1390 1391 /* don't check for <col width=...> and <colgroup width=...> */ 1392 if (attrIsWIDTH(attval) && (nodeIsCOL(node) || nodeIsCOLGROUP(node))) 1393 return; 1394 1395 p = attval->value; 1396 1397 if (!IsDigit(*p++)) 1398 { 1399 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1400 } 1401 else 1402 { 1403 while (*p) 1404 { 1405 if (!IsDigit(*p) && *p != '%') 1406 { 1407 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1408 break; 1409 } 1410 ++p; 1411 } 1412 } 1413 } 1414 1415 void CheckTarget( TidyDocImpl* doc, Node *node, AttVal *attval) 1416 { 1417 ctmbstr const values[] = {"_blank", "_self", "_parent", "_top", NULL}; 1418 1419 if (!AttrHasValue(attval)) 1420 { 1421 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1422 return; 1423 } 1424 1425 /* target names must begin with A-Za-z ... */ 1426 if (IsLetter(attval->value[0])) 1427 return; 1428 1429 /* or be one of the allowed list */ 1430 if (!AttrValueIsAmong(attval, values)) 1431 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1432 } 1433 1434 void CheckFsubmit( TidyDocImpl* doc, Node *node, AttVal *attval) 1435 { 1436 ctmbstr const values[] = {"get", "post", NULL}; 1437 CheckAttrValidity( doc, node, attval, values ); 1438 } 1439 1440 void CheckClear( TidyDocImpl* doc, Node *node, AttVal *attval) 1441 { 1442 ctmbstr const values[] = {"none", "left", "right", "all", NULL}; 1443 1444 if (!AttrHasValue(attval)) 1445 { 1446 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1447 if (attval->value == NULL) 1448 attval->value = tmbstrdup( "none" ); 1449 return; 1450 } 1451 1452 CheckLowerCaseAttrValue( doc, node, attval ); 1453 1454 if (!AttrValueIsAmong(attval, values)) 1455 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1456 } 1457 1458 void CheckShape( TidyDocImpl* doc, Node *node, AttVal *attval) 1459 { 1460 ctmbstr const values[] = {"rect", "default", "circle", "poly", NULL}; 1461 CheckAttrValidity( doc, node, attval, values ); 1462 } 1463 1464 void CheckScope( TidyDocImpl* doc, Node *node, AttVal *attval) 1465 { 1466 ctmbstr const values[] = {"row", "rowgroup", "col", "colgroup", NULL}; 1467 CheckAttrValidity( doc, node, attval, values ); 1468 } 1469 1470 void CheckNumber( TidyDocImpl* doc, Node *node, AttVal *attval) 1471 { 1472 tmbstr p; 1473 1474 if (!AttrHasValue(attval)) 1475 { 1476 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1477 return; 1478 } 1479 1480 /* don't check <frameset cols=... rows=...> */ 1481 if ( nodeIsFRAMESET(node) && 1482 (attrIsCOLS(attval) || attrIsROWS(attval))) 1483 return; 1484 1485 p = attval->value; 1486 1487 /* font size may be preceded by + or - */ 1488 if ( nodeIsFONT(node) && (*p == '+' || *p == '-') ) 1489 ++p; 1490 1491 while (*p) 1492 { 1493 if (!IsDigit(*p)) 1494 { 1495 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1496 break; 1497 } 1498 ++p; 1499 } 1500 } 1501 1502 /* check hexadecimal color value */ 1503 static Bool IsValidColorCode(ctmbstr color) 1504 { 1505 uint i; 1506 1507 if (tmbstrlen(color) != 6) 1508 return no; 1509 1510 /* check if valid hex digits and letters */ 1511 for (i = 0; i < 6; i++) 1512 if (!IsDigit(color[i]) && !strchr("abcdef", ToLower(color[i]))) 1513 return no; 1514 1515 return yes; 1516 } 1517 1518 /* check color syntax and beautify value by option */ 1519 void CheckColor( TidyDocImpl* doc, Node *node, AttVal *attval) 1520 { 1521 Bool valid = no; 1522 tmbstr given; 1523 1524 if (!AttrHasValue(attval)) 1525 { 1526 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1527 return; 1528 } 1529 1530 given = attval->value; 1531 1532 /* 727851 - add hash to hash-less color values */ 1533 if (given[0] != '#' && (valid = IsValidColorCode(given))) 1534 { 1535 tmbstr cp, s; 1536 1537 cp = s = (tmbstr) MemAlloc(2 + tmbstrlen (given)); 1538 *cp++ = '#'; 1539 while ('\0' != (*cp++ = *given++)) 1540 continue; 1541 1542 ReportAttrError(doc, node, attval, BAD_ATTRIBUTE_VALUE_REPLACED); 1543 1544 MemFree(attval->value); 1545 given = attval->value = s; 1546 } 1547 1548 if (!valid && given[0] == '#') 1549 valid = IsValidColorCode(given + 1); 1550 1551 if (valid && given[0] == '#' && cfgBool(doc, TidyReplaceColor)) 1552 { 1553 ctmbstr newName = GetColorName(given); 1554 1555 if (newName) 1556 { 1557 MemFree(attval->value); 1558 given = attval->value = tmbstrdup(newName); 1559 } 1560 } 1561 1562 /* if it is not a valid color code, it is a color name */ 1563 if (!valid) 1564 valid = GetColorCode(given) != NULL; 1565 1566 if (valid && given[0] == '#') 1567 attval->value = tmbstrtoupper(attval->value); 1568 else if (valid) 1569 attval->value = tmbstrtolower(attval->value); 1570 1571 if (!valid) 1572 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1573 } 1574 1575 /* check valuetype attribute for element param */ 1576 void CheckVType( TidyDocImpl* doc, Node *node, AttVal *attval) 1577 { 1578 ctmbstr const values[] = {"data", "object", "ref", NULL}; 1579 CheckAttrValidity( doc, node, attval, values ); 1580 } 1581 1582 /* checks scrolling attribute */ 1583 void CheckScroll( TidyDocImpl* doc, Node *node, AttVal *attval) 1584 { 1585 ctmbstr const values[] = {"no", "auto", "yes", NULL}; 1586 CheckAttrValidity( doc, node, attval, values ); 1587 } 1588 1589 /* checks dir attribute */ 1590 void CheckTextDir( TidyDocImpl* doc, Node *node, AttVal *attval) 1591 { 1592 ctmbstr const values[] = {"rtl", "ltr", NULL}; 1593 CheckAttrValidity( doc, node, attval, values ); 1594 } 1595 1596 /* checks lang and xml:lang attributes */ 1597 void CheckLang( TidyDocImpl* doc, Node *node, AttVal *attval) 1598 { 1599 /* empty xml:lang is allowed through XML 1.0 SE errata */ 1600 if (!AttrHasValue(attval) && !attrIsXML_LANG(attval)) 1601 { 1602 if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 ) 1603 { 1604 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE ); 1605 } 1606 return; 1607 } 1608 } 1609 1610 /* checks type attribute */ 1611 void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval) 1612 { 1613 ctmbstr const valuesINPUT[] = {"text", "password", "checkbox", "radio", 1614 "submit", "reset", "file", "hidden", 1615 "image", "button", NULL}; 1616 ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL}; 1617 ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL}; 1618 ctmbstr const valuesOL[] = {"1", "a", "i", NULL}; 1619 1620 if (nodeIsINPUT(node)) 1621 CheckAttrValidity( doc, node, attval, valuesINPUT ); 1622 else if (nodeIsBUTTON(node)) 1623 CheckAttrValidity( doc, node, attval, valuesBUTTON ); 1624 else if (nodeIsUL(node)) 1625 CheckAttrValidity( doc, node, attval, valuesUL ); 1626 else if (nodeIsOL(node)) 1627 { 1628 if (!AttrHasValue(attval)) 1629 { 1630 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1631 return; 1632 } 1633 if (!AttrValueIsAmong(attval, valuesOL)) 1634 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1635 } 1636 else if (nodeIsLI(node)) 1637 { 1638 if (!AttrHasValue(attval)) 1639 { 1640 ReportAttrError( doc, node, attval, MISSING_ATTR_VALUE); 1641 return; 1642 } 1643 if (AttrValueIsAmong(attval, valuesUL)) 1644 CheckLowerCaseAttrValue( doc, node, attval ); 1645 else if (!AttrValueIsAmong(attval, valuesOL)) 1646 ReportAttrError( doc, node, attval, BAD_ATTRIBUTE_VALUE); 1647 } 1648 return; 1649 } 1650 1651 /* 1652 * local variables: 1653 * mode: c 1654 * indent-tabs-mode: nil 1655 * c-basic-offset: 4 1656 * eval: (c-set-offset 'substatement-open 0) 1657 * end: 1658 */ 1659

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.