001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * Copyright (c) 2007 Mozilla Foundation 004 * Portions of comments Copyright 2004-2007 Apple Computer, Inc., Mozilla 005 * Foundation, and Opera Software ASA. 006 * 007 * Permission is hereby granted, free of charge, to any person obtaining a 008 * copy of this software and associated documentation files (the "Software"), 009 * to deal in the Software without restriction, including without limitation 010 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 011 * and/or sell copies of the Software, and to permit persons to whom the 012 * Software is furnished to do so, subject to the following conditions: 013 * 014 * The above copyright notice and this permission notice shall be included in 015 * all copies or substantial portions of the Software. 016 * 017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 018 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 019 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 020 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 021 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 022 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 023 * DEALINGS IN THE SOFTWARE. 024 */ 025 026 /* 027 * The comments following this one that use the same comment syntax as this 028 * comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007 029 * amended as of June 28 2007. 030 * That document came with this statement: 031 * "© Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and 032 * Opera Software ASA. You are granted a license to use, reproduce and 033 * create derivative works of this document." 034 */ 035 036 package nu.validator.htmlparser.impl; 037 038 import java.util.Arrays; 039 040 import nu.validator.htmlparser.common.DoctypeExpectation; 041 import nu.validator.htmlparser.common.DocumentMode; 042 import nu.validator.htmlparser.common.DocumentModeHandler; 043 import nu.validator.htmlparser.common.XmlViolationPolicy; 044 045 import org.xml.sax.Attributes; 046 import org.xml.sax.ErrorHandler; 047 import org.xml.sax.SAXException; 048 import org.xml.sax.SAXParseException; 049 050 public abstract class TreeBuilder<T> implements TokenHandler { 051 052 private enum Phase { 053 INITIAL, ROOT_ELEMENT, BEFORE_HEAD, IN_HEAD, IN_HEAD_NOSCRIPT, AFTER_HEAD, IN_BODY, IN_TABLE, IN_CAPTION, IN_COLUMN_GROUP, IN_TABLE_BODY, IN_ROW, IN_CELL, IN_SELECT, AFTER_BODY, IN_FRAMESET, AFTER_FRAMESET, TRAILING_END 054 } 055 056 private class StackNode<S> { 057 final String name; 058 059 final S node; 060 061 final boolean scoping; 062 063 final boolean special; 064 065 final boolean fosterParenting; 066 067 /** 068 * @param name 069 * @param node 070 * @param scoping 071 * @param special 072 */ 073 StackNode(final String name, final S node, final boolean scoping, final boolean special, final boolean fosterParenting) { 074 this.name = name; 075 this.node = node; 076 this.scoping = scoping; 077 this.special = special; 078 this.fosterParenting = fosterParenting; 079 } 080 081 /** 082 * @param name 083 * @param node 084 */ 085 StackNode(final String name, final S node) { 086 this.name = name; 087 this.node = node; 088 this.scoping = ("table" == name || "caption" == name || "td" == name || "th" == name || "button" == name || "marquee" == name || "object" == name); 089 this.special = ("address" == name || "area" == name || "base" == name || "basefont" == name || "bgsound" == name || "blockquote" == name || "body" == name || "br" == name || "center" == name || "col" == name || "colgroup" == name || "dd" == name || "dir" == name || "div" == name || "dl" == name || "dt" == name || "embed" == name || "fieldset" == name || "form" == name || "frame" == name || "frameset" == name || "h1" == name || "h2" == name || "h3" == name || "h4" == name || "h5" == name || "h6" == name || "head" == name || "hr" == name || "iframe" == name || "image" == name || "img" == name || "input" == name || "isindex" == name || "li" == name || "link" == name || "listing" == name || "menu" == name || "meta" == name || "noembed" == name || "noframes" == name || "noscript" == name || "ol" == name || "optgroup" == name || "option" == name || "p" == name || "param" == name || "plaintext" == name || "pre" == name || "script" == name || "select" == name || "spacer" == name || "style" == name || "tbody" == name || "textarea" == name || "tfoot" == name || "thead" == name || "title" == name || "tr" == name || "ul" == name || "wbr" == name); 090 this.fosterParenting = ("table" == name || "tbody" == name || "tfoot" == name || "thead" == name || "tr" == name); 091 } 092 093 /** 094 * @see java.lang.Object#toString() 095 */ 096 @Override 097 public String toString() { 098 return name; 099 } 100 } 101 102 private final static char[] ISINDEX_PROMPT = "This is a searchable index. Insert your search keywords here: ".toCharArray(); 103 104 private final static String[] HTML4_PUBLIC_IDS = { 105 "-//W3C//DTD HTML 4.0 Frameset//EN", 106 "-//W3C//DTD HTML 4.0 Transitional//EN", 107 "-//W3C//DTD HTML 4.0//EN", 108 "-//W3C//DTD HTML 4.01 Frameset//EN", 109 "-//W3C//DTD HTML 4.01 Transitional//EN", 110 "-//W3C//DTD HTML 4.01//EN" 111 }; 112 113 private final static String[] QUIRKY_PUBLIC_IDS = { 114 "+//silmaril//dtd html pro v0r11 19970101//en", 115 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en", 116 "-//as//dtd html 3.0 aswedit + extensions//en", 117 "-//ietf//dtd html 2.0 level 1//en", 118 "-//ietf//dtd html 2.0 level 2//en", 119 "-//ietf//dtd html 2.0 strict level 1//en", 120 "-//ietf//dtd html 2.0 strict level 2//en", 121 "-//ietf//dtd html 2.0 strict//en", "-//ietf//dtd html 2.0//en", 122 "-//ietf//dtd html 2.1e//en", "-//ietf//dtd html 3.0//en", 123 "-//ietf//dtd html 3.0//en//", "-//ietf//dtd html 3.2 final//en", 124 "-//ietf//dtd html 3.2//en", "-//ietf//dtd html 3//en", 125 "-//ietf//dtd html level 0//en", 126 "-//ietf//dtd html level 0//en//2.0", 127 "-//ietf//dtd html level 1//en", 128 "-//ietf//dtd html level 1//en//2.0", 129 "-//ietf//dtd html level 2//en", 130 "-//ietf//dtd html level 2//en//2.0", 131 "-//ietf//dtd html level 3//en", 132 "-//ietf//dtd html level 3//en//3.0", 133 "-//ietf//dtd html strict level 0//en", 134 "-//ietf//dtd html strict level 0//en//2.0", 135 "-//ietf//dtd html strict level 1//en", 136 "-//ietf//dtd html strict level 1//en//2.0", 137 "-//ietf//dtd html strict level 2//en", 138 "-//ietf//dtd html strict level 2//en//2.0", 139 "-//ietf//dtd html strict level 3//en", 140 "-//ietf//dtd html strict level 3//en//3.0", 141 "-//ietf//dtd html strict//en", 142 "-//ietf//dtd html strict//en//2.0", 143 "-//ietf//dtd html strict//en//3.0", "-//ietf//dtd html//en", 144 "-//ietf//dtd html//en//2.0", "-//ietf//dtd html//en//3.0", 145 "-//metrius//dtd metrius presentational//en", 146 "-//microsoft//dtd internet explorer 2.0 html strict//en", 147 "-//microsoft//dtd internet explorer 2.0 html//en", 148 "-//microsoft//dtd internet explorer 2.0 tables//en", 149 "-//microsoft//dtd internet explorer 3.0 html strict//en", 150 "-//microsoft//dtd internet explorer 3.0 html//en", 151 "-//microsoft//dtd internet explorer 3.0 tables//en", 152 "-//netscape comm. corp.//dtd html//en", 153 "-//netscape comm. corp.//dtd strict html//en", 154 "-//o'reilly and associates//dtd html 2.0//en", 155 "-//o'reilly and associates//dtd html extended 1.0//en", 156 "-//spyglass//dtd html 2.0 extended//en", 157 "-//sq//dtd html 2.0 hotmetal + extensions//en", 158 "-//sun microsystems corp.//dtd hotjava html//en", 159 "-//sun microsystems corp.//dtd hotjava strict html//en", 160 "-//w3c//dtd html 3 1995-03-24//en", 161 "-//w3c//dtd html 3.2 draft//en", "-//w3c//dtd html 3.2 final//en", 162 "-//w3c//dtd html 3.2//en", "-//w3c//dtd html 3.2s draft//en", 163 "-//w3c//dtd html 4.0 frameset//en", 164 "-//w3c//dtd html 4.0 transitional//en", 165 "-//w3c//dtd html experimental 19960712//en", 166 "-//w3c//dtd html experimental 970421//en", 167 "-//w3c//dtd w3 html//en", "-//w3o//dtd w3 html 3.0//en", 168 "-//w3o//dtd w3 html 3.0//en//", 169 "-//w3o//dtd w3 html strict 3.0//en//", 170 "-//webtechs//dtd mozilla html 2.0//en", 171 "-//webtechs//dtd mozilla html//en", 172 "-/w3c/dtd html 4.0 transitional/en", "html" }; 173 174 private static final int NOT_FOUND_ON_STACK = Integer.MAX_VALUE; 175 176 private final StackNode<T> MARKER = new StackNode<T>(null, null); 177 178 private final boolean nonConformingAndStreaming; 179 180 private final boolean conformingAndStreaming; 181 182 private final boolean coalescingText; 183 184 private Phase phase = Phase.INITIAL; 185 186 protected Tokenizer tokenizer; 187 188 private ErrorHandler errorHandler; 189 190 private DocumentModeHandler documentModeHandler; 191 192 private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; 193 194 private int cdataOrRcdataTimesToPop; 195 196 private boolean scriptingEnabled = false; 197 198 private boolean needToDropLF; 199 200 private boolean wantingComments; 201 202 private String context; 203 204 private Phase previousPhaseBeforeTrailingEnd; 205 206 private StackNode<T>[] stack; 207 208 private int currentPtr = -1; 209 210 private StackNode<T>[] listOfActiveFormattingElements; 211 212 private int listPtr = -1; 213 214 private T formPointer; 215 216 private T headPointer; 217 218 private boolean reportingDoctype = true; 219 220 private char[] charBuffer; 221 222 private int charBufferLen = 0; 223 224 protected TreeBuilder(XmlViolationPolicy streamabilityViolationPolicy, boolean coalescingText) { 225 this.conformingAndStreaming = streamabilityViolationPolicy == XmlViolationPolicy.FATAL; 226 this.nonConformingAndStreaming = streamabilityViolationPolicy == XmlViolationPolicy.ALTER_INFOSET; 227 this.coalescingText = coalescingText; 228 if (coalescingText) { 229 charBuffer = new char[1024]; 230 } 231 } 232 233 /** 234 * Reports an condition that would make the infoset incompatible with XML 235 * 1.0 as fatal. 236 * 237 * @throws SAXException 238 * @throws SAXParseException 239 */ 240 protected final void fatal() throws SAXException { 241 SAXParseException spe = new SAXParseException("Last error required non-streamable recovery.", tokenizer); 242 if (errorHandler != null) { 243 errorHandler.fatalError(spe); 244 } 245 throw spe; 246 } 247 protected final void fatal(Exception e) throws SAXException { 248 SAXParseException spe = new SAXParseException(e.getMessage(), tokenizer, e);; 249 if (errorHandler != null) { 250 errorHandler.fatalError(spe); 251 } 252 throw spe; 253 } 254 255 /** 256 * Reports a Parse Error. 257 * 258 * @param message 259 * the message 260 * @throws SAXException 261 */ 262 protected final void err(String message) throws SAXException { 263 if (errorHandler == null) { 264 return; 265 } 266 SAXParseException spe = new SAXParseException(message, tokenizer); 267 errorHandler.error(spe); 268 } 269 270 /** 271 * Reports a warning 272 * 273 * @param message 274 * the message 275 * @throws SAXException 276 */ 277 protected final void warn(String message) throws SAXException { 278 if (errorHandler == null) { 279 return; 280 } 281 SAXParseException spe = new SAXParseException(message, tokenizer); 282 errorHandler.warning(spe); 283 } 284 285 public final void start(Tokenizer self) throws SAXException { 286 tokenizer = self; 287 stack = new StackNode[64]; 288 listOfActiveFormattingElements = new StackNode[64]; 289 needToDropLF = false; 290 cdataOrRcdataTimesToPop = 0; 291 currentPtr = -1; 292 formPointer = null; 293 wantingComments = wantsComments(); 294 start(context != null); 295 if (context == null) { 296 phase = Phase.INITIAL; 297 } else { 298 T elt = createHtmlElementSetAsRoot(tokenizer.newAttributes()); 299 StackNode<T> node = new StackNode<T>("html", elt); 300 currentPtr++; 301 stack[currentPtr] = node; 302 resetTheInsertionMode(); 303 if ("title" == context || "textarea" == context) { 304 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA, context); 305 } else if ("style" == context || "script" == context || "xmp" == context || "iframe" == context || "noembed" == context || "noframes" == context || (scriptingEnabled && "noscript" == context)) { 306 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, context); 307 } else if ("plaintext" == context) { 308 tokenizer.setContentModelFlag(ContentModelFlag.PLAINTEXT, context); 309 } else { 310 tokenizer.setContentModelFlag(ContentModelFlag.PCDATA, context); 311 } 312 } 313 } 314 315 public final void doctype(String name, String publicIdentifier, 316 String systemIdentifier, boolean correct) throws SAXException { 317 needToDropLF = false; 318 switch (phase) { 319 case INITIAL: 320 /* 321 * A DOCTYPE token If the DOCTYPE token's name does not 322 * case-insensitively match the string "HTML", or if the token's 323 * public identifier is not missing, or if the token's system 324 * identifier is not missing, then there is a parse error. 325 * Conformance checkers may, instead of reporting this error, 326 * switch to a conformance checking mode for another language 327 * (e.g. based on the DOCTYPE token a conformance checker could 328 * recognise that the document is an HTML4-era document, and 329 * defer to an HTML4 conformance checker.) 330 * 331 * Append a DocumentType node to the Document node, with the 332 * name attribute set to the name given in the DOCTYPE token; 333 * the publicId attribute set to the public identifier given in 334 * the DOCTYPE token, or the empty string if the public 335 * identifier was not set; the systemId attribute set to the 336 * system identifier given in the DOCTYPE token, or the empty 337 * string if the system identifier was not set; and the other 338 * attributes specific to DocumentType objects set to null and 339 * empty lists as appropriate. Associate the DocumentType node 340 * with the Document object so that it is returned as the value 341 * of the doctype attribute of the Document object. 342 */ 343 if (reportingDoctype ) { 344 appendDoctypeToDocument(name, publicIdentifier == null ? "" 345 : publicIdentifier, systemIdentifier == null ? "" 346 : systemIdentifier); 347 } 348 /* 349 * Then, if the DOCTYPE token matches one of the conditions in 350 * the following list, then set the document to quirks mode: 351 * 352 * Otherwise, if the DOCTYPE token matches one of the conditions 353 * in the following list, then set the document to limited 354 * quirks mode: + The public identifier is set to: "-//W3C//DTD 355 * XHTML 1.0 Frameset//EN" + The public identifier is set to: 356 * "-//W3C//DTD XHTML 1.0 Transitional//EN" + The system 357 * identifier is not missing and the public identifier is set 358 * to: "-//W3C//DTD HTML 4.01 Frameset//EN" + The system 359 * identifier is not missing and the public identifier is set 360 * to: "-//W3C//DTD HTML 4.01 Transitional//EN" 361 * 362 * The name, system identifier, and public identifier strings 363 * must be compared to the values given in the lists above in a 364 * case-insensitive manner. 365 */ 366 String publicIdentifierLC = toAsciiLowerCase(publicIdentifier); 367 String systemIdentifierLC = toAsciiLowerCase(systemIdentifier); 368 switch (doctypeExpectation) { 369 case HTML: 370 if (isQuirky(name, publicIdentifierLC, 371 systemIdentifierLC, correct)) { 372 err("Quirky doctype."); 373 documentModeInternal(DocumentMode.QUIRKS_MODE, 374 publicIdentifier, systemIdentifier, false); 375 } else if (isAlmostStandards(publicIdentifierLC, 376 systemIdentifierLC)) { 377 err("Almost standards mode doctype."); 378 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE, 379 publicIdentifier, systemIdentifier, false); 380 } else { 381 if (!(publicIdentifier == null && systemIdentifier == null)) { 382 err("Legacy doctype."); 383 } 384 documentModeInternal(DocumentMode.STANDARDS_MODE, 385 publicIdentifier, systemIdentifier, false); 386 } 387 break; 388 case HTML401_STRICT: 389 tokenizer.turnOnAdditionalHtml4Errors(); 390 if (isQuirky(name, publicIdentifierLC, 391 systemIdentifierLC, correct)) { 392 err("Quirky doctype."); 393 documentModeInternal(DocumentMode.QUIRKS_MODE, 394 publicIdentifier, systemIdentifier, true); 395 } else if (isAlmostStandards(publicIdentifierLC, 396 systemIdentifierLC)) { 397 err("Almost standards mode doctype."); 398 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE, 399 publicIdentifier, systemIdentifier, true); 400 } else { 401 if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) { 402 if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) { 403 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification."); 404 } 405 } else { 406 err("The doctype was not the HTML 4.01 Strict doctype."); 407 } 408 documentModeInternal(DocumentMode.STANDARDS_MODE, 409 publicIdentifier, systemIdentifier, true); 410 } 411 break; 412 case HTML401_TRANSITIONAL: 413 tokenizer.turnOnAdditionalHtml4Errors(); 414 if (isQuirky(name, publicIdentifierLC, 415 systemIdentifierLC, correct)) { 416 err("Quirky doctype."); 417 documentModeInternal(DocumentMode.QUIRKS_MODE, 418 publicIdentifier, systemIdentifier, true); 419 } else if (isAlmostStandards(publicIdentifierLC, 420 systemIdentifierLC)) { 421 if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier) 422 && systemIdentifier != null) { 423 if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) { 424 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification."); 425 } 426 } else { 427 err("The doctype was not a non-quirky HTML 4.01 Transitional doctype."); 428 } 429 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE, 430 publicIdentifier, systemIdentifier, true); 431 } else { 432 err("The doctype was not the HTML 4.01 Transitional doctype."); 433 documentModeInternal(DocumentMode.STANDARDS_MODE, 434 publicIdentifier, systemIdentifier, true); 435 } 436 break; 437 case AUTO: 438 boolean html4 = isHtml4Doctype(publicIdentifier); 439 if (html4) { 440 tokenizer.turnOnAdditionalHtml4Errors(); 441 } 442 if (isQuirky(name, publicIdentifierLC, 443 systemIdentifierLC, correct)) { 444 err("Quirky doctype."); 445 documentModeInternal(DocumentMode.QUIRKS_MODE, 446 publicIdentifier, systemIdentifier, html4); 447 } else if (isAlmostStandards(publicIdentifierLC, 448 systemIdentifierLC)) { 449 if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) { 450 tokenizer.turnOnAdditionalHtml4Errors(); 451 if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) { 452 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification."); 453 } 454 } else { 455 err("Almost standards mode doctype."); 456 } 457 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE, 458 publicIdentifier, systemIdentifier, html4); 459 } else { 460 if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) { 461 tokenizer.turnOnAdditionalHtml4Errors(); 462 if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) { 463 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification."); 464 } 465 } else { 466 if (!(publicIdentifier == null && systemIdentifier == null)) { 467 err("Legacy doctype."); 468 } 469 } 470 documentModeInternal(DocumentMode.STANDARDS_MODE, 471 publicIdentifier, systemIdentifier, html4); 472 } 473 break; 474 case NO_DOCTYPE_ERRORS: 475 if (isQuirky(name, publicIdentifierLC, 476 systemIdentifierLC, correct)) { 477 documentModeInternal(DocumentMode.QUIRKS_MODE, 478 publicIdentifier, systemIdentifier, false); 479 } else if (isAlmostStandards(publicIdentifierLC, 480 systemIdentifierLC)) { 481 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE, 482 publicIdentifier, systemIdentifier, false); 483 } else { 484 documentModeInternal(DocumentMode.STANDARDS_MODE, 485 publicIdentifier, systemIdentifier, false); 486 } 487 break; 488 } 489 490 /* 491 * 492 * Then, switch to the root element phase of the tree 493 * construction stage. 494 * 495 * 496 */ 497 phase = Phase.ROOT_ELEMENT; 498 return; 499 default: 500 /* 501 * A DOCTYPE token Parse error. 502 */ 503 err("Stray doctype."); 504 /* 505 * Ignore the token. 506 */ 507 return; 508 } 509 } 510 511 private boolean isHtml4Doctype(String publicIdentifier) { 512 if (publicIdentifier != null 513 && (Arrays.binarySearch(HTML4_PUBLIC_IDS, publicIdentifier) > -1)) { 514 return true; 515 } 516 return false; 517 } 518 519 public final void comment(char[] buf, int length) throws SAXException { 520 needToDropLF = false; 521 if (wantingComments) { 522 switch (phase) { 523 case INITIAL: 524 case ROOT_ELEMENT: 525 case TRAILING_END: 526 /* 527 * A comment token Append a Comment node to the Document 528 * object with the data attribute set to the data given in 529 * the comment token. 530 */ 531 appendCommentToDocument(buf, 0, length); 532 return; 533 case AFTER_BODY: 534 /* 535 * * A comment token Append a Comment node to the first 536 * element in the stack of open elements (the html element), 537 * with the data attribute set to the data given in the 538 * comment token. 539 * 540 */ 541 flushCharacters(); 542 appendComment(stack[0].node, buf, 0, length); 543 return; 544 default: 545 /* 546 * * A comment token Append a Comment node to the current 547 * node with the data attribute set to the data given in the 548 * comment token. 549 * 550 */ 551 flushCharacters(); 552 appendComment(stack[currentPtr].node, buf, 0, length); 553 return; 554 } 555 } 556 } 557 558 /** 559 * @see nu.validator.htmlparser.impl.TokenHandler#characters(char[], int, int) 560 */ 561 public final void characters(char[] buf, int start, int length) 562 throws SAXException { 563 if (needToDropLF) { 564 if (buf[start] == '\n') { 565 start++; 566 length--; 567 if (length == 0) { 568 return; 569 } 570 } 571 needToDropLF = false; 572 } else if (cdataOrRcdataTimesToPop > 0) { 573 accumulateCharacters(buf, start, length); 574 return; 575 } 576 577 // optimize the most common case 578 if (phase == Phase.IN_BODY || phase == Phase.IN_CELL 579 || phase == Phase.IN_CAPTION) { 580 reconstructTheActiveFormattingElements(); 581 accumulateCharacters(buf, start, length); 582 return; 583 } 584 585 int end = start + length; 586 loop: for (int i = start; i < end; i++) { 587 switch (buf[i]) { 588 case ' ': 589 case '\t': 590 case '\n': 591 case '\u000B': 592 case '\u000C': 593 /* 594 * A character token that is one of one of U+0009 CHARACTER 595 * TABULATION, U+000A LINE FEED (LF), U+000B LINE 596 * TABULATION, U+000C FORM FEED (FF), or U+0020 SPACE 597 */ 598 switch (phase) { 599 case INITIAL: 600 case ROOT_ELEMENT: 601 /* 602 * Ignore the token. 603 */ 604 start = i + 1; 605 continue; 606 case BEFORE_HEAD: 607 case IN_HEAD: 608 case IN_HEAD_NOSCRIPT: 609 case AFTER_HEAD: 610 case IN_TABLE: 611 case IN_COLUMN_GROUP: 612 case IN_TABLE_BODY: 613 case IN_ROW: 614 case IN_FRAMESET: 615 case AFTER_FRAMESET: 616 /* 617 * Append the character to the current node. 618 */ 619 continue; 620 case IN_BODY: 621 case IN_CELL: 622 case IN_CAPTION: 623 // XXX is this dead code? 624 if (start < i) { 625 accumulateCharacters(buf, start, i 626 - start); 627 start = i; 628 } 629 630 /* 631 * Reconstruct the active formatting elements, if 632 * any. 633 */ 634 reconstructTheActiveFormattingElements(); 635 /* Append the token's character to the current node. */ 636 break loop; 637 case IN_SELECT: 638 break loop; 639 case AFTER_BODY: 640 if (start < i) { 641 accumulateCharacters(buf, start, i 642 - start); 643 start = i; 644 } 645 /* 646 * Reconstruct the active formatting elements, if 647 * any. 648 */ 649 // XXX bug? 650 reconstructTheActiveFormattingElements(); 651 /* Append the token's character to the current node. */ 652 continue; 653 case TRAILING_END: 654 if (conformingAndStreaming) { 655 return; 656 } 657 if (previousPhaseBeforeTrailingEnd == Phase.AFTER_FRAMESET) { 658 continue; 659 } else { 660 if (start < i) { 661 accumulateCharacters(buf, start, i 662 - start); 663 start = i; 664 } 665 /* 666 * Reconstruct the active formatting elements, 667 * if any. 668 */ 669 // XXX bug? 670 reconstructTheActiveFormattingElements(); 671 /* 672 * Append the token's character to the current 673 * node. 674 */ 675 continue; 676 } 677 } 678 default: 679 /* 680 * A character token that is not one of one of U+0009 681 * CHARACTER TABULATION, U+000A LINE FEED (LF), U+000B LINE 682 * TABULATION, U+000C FORM FEED (FF), or U+0020 SPACE 683 */ 684 switch (phase) { 685 case INITIAL: 686 /* 687 * Parse error. 688 */ 689 if (doctypeExpectation != DoctypeExpectation.NO_DOCTYPE_ERRORS) { 690 err("Non-space characters found without seeing a doctype first."); 691 } 692 /* 693 * 694 * Set the document to quirks mode. 695 */ 696 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, 697 false); 698 /* 699 * Then, switch to the root element phase of the 700 * tree construction stage 701 */ 702 phase = Phase.ROOT_ELEMENT; 703 /* 704 * and reprocess the current token. 705 * 706 * 707 */ 708 i--; 709 continue; 710 case ROOT_ELEMENT: 711 /* 712 * Create an HTMLElement node with the tag name 713 * html, in the HTML namespace. Append it to the 714 * Document object. 715 */ 716 appendHtmlElementToDocumentAndPush(); 717 /* Switch to the main phase */ 718 phase = Phase.BEFORE_HEAD; 719 /* 720 * reprocess the current token. 721 * 722 */ 723 i--; 724 continue; 725 case BEFORE_HEAD: 726 if (start < i) { 727 accumulateCharacters(buf, start, i 728 - start); 729 start = i; 730 } 731 /* 732 * /*Act as if a start tag token with the tag name 733 * "head" and no attributes had been seen, 734 */ 735 appendToCurrentNodeAndPushHeadElement(EmptyAttributes.EMPTY_ATTRIBUTES); 736 phase = Phase.IN_HEAD; 737 /* 738 * then reprocess the current token. 739 * 740 * This will result in an empty head element being 741 * generated, with the current token being 742 * reprocessed in the "after head" insertion mode. 743 */ 744 i--; 745 continue; 746 case IN_HEAD: 747 if (start < i) { 748 accumulateCharacters(buf, start, i 749 - start); 750 start = i; 751 } 752 /* 753 * Act as if an end tag token with the tag name 754 * "head" had been seen, 755 */ 756 pop(); 757 phase = Phase.AFTER_HEAD; 758 /* 759 * and reprocess the current token. 760 */ 761 i--; 762 continue; 763 case IN_HEAD_NOSCRIPT: 764 if (start < i) { 765 accumulateCharacters(buf, start, i 766 - start); 767 start = i; 768 } 769 /* 770 * Parse error. Act as if an end tag with the tag 771 * name "noscript" had been seen 772 */ 773 err("Non-space character inside \u201Cnoscript\u201D inside \u201Chead\u201D."); 774 pop(); 775 phase = Phase.IN_HEAD; 776 /* 777 * and reprocess the current token. 778 */ 779 i--; 780 continue; 781 case AFTER_HEAD: 782 if (start < i) { 783 accumulateCharacters(buf, start, i 784 - start); 785 start = i; 786 } 787 /* 788 * Act as if a start tag token with the tag name 789 * "body" and no attributes had been seen, 790 */ 791 appendToCurrentNodeAndPushBodyElement(); 792 phase = Phase.IN_BODY; 793 /* 794 * and then reprocess the current token. 795 */ 796 i--; 797 continue; 798 case IN_BODY: 799 case IN_CELL: 800 case IN_CAPTION: 801 if (start < i) { 802 accumulateCharacters(buf, start, i 803 - start); 804 start = i; 805 } 806 /* 807 * Reconstruct the active formatting elements, if 808 * any. 809 */ 810 reconstructTheActiveFormattingElements(); 811 /* Append the token's character to the current node. */ 812 break loop; 813 case IN_TABLE: 814 case IN_TABLE_BODY: 815 case IN_ROW: 816 if (start < i) { 817 accumulateCharacters(buf, start, i 818 - start); 819 } 820 reconstructTheActiveFormattingElements(); 821 appendCharMayFoster(buf, i); 822 start = i + 1; 823 continue; 824 case IN_COLUMN_GROUP: 825 /* 826 * Act as if an end tag with the tag name "colgroup" 827 * had been seen, and then, if that token wasn't 828 * ignored, reprocess the current token. 829 */ 830 if (currentPtr == 0) { 831 err("Non-space in \u201Ccolgroup\u201D when parsing fragment."); 832 continue; 833 } 834 pop(); 835 phase = Phase.IN_TABLE; 836 i--; 837 continue; 838 case IN_SELECT: 839 break loop; 840 case AFTER_BODY: 841 err("Non-space character after body."); 842 if (conformingAndStreaming) { 843 fatal(); 844 } 845 phase = Phase.IN_BODY; 846 i--; 847 continue; 848 case IN_FRAMESET: 849 if (start < i) { 850 accumulateCharacters(buf, start, i 851 - start); 852 start = i; 853 } 854 /* 855 * Parse error. 856 */ 857 err("Non-space in \u201Cframeset\u201D."); 858 /* 859 * Ignore the token. 860 */ 861 start = i + 1; 862 continue; 863 case AFTER_FRAMESET: 864 if (start < i) { 865 accumulateCharacters(buf, start, i 866 - start); 867 start = i; 868 } 869 /* 870 * Parse error. 871 */ 872 err("Non-space after \u201Cframeset\u201D."); 873 /* 874 * Ignore the token. 875 */ 876 start = i + 1; 877 continue; 878 case TRAILING_END: 879 /* 880 * Parse error. 881 */ 882 err("Non-space character in page trailer."); 883 if (conformingAndStreaming) { 884 fatal(); 885 } 886 /* 887 * Switch back to the main phase and reprocess the 888 * token. 889 */ 890 phase = previousPhaseBeforeTrailingEnd; 891 i--; 892 continue; 893 } 894 } 895 } 896 if (start < end) { 897 accumulateCharacters(buf, start, end - start); 898 } 899 } 900 901 public final void eof() throws SAXException { 902 try { 903 flushCharacters(); 904 eofloop: for (;;) { 905 switch (phase) { 906 case INITIAL: 907 /* 908 * Parse error. 909 */ 910 if (doctypeExpectation != DoctypeExpectation.NO_DOCTYPE_ERRORS) { 911 err("End of file seen without seeing a doctype first."); 912 } 913 /* 914 * 915 * Set the document to quirks mode. 916 */ 917 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, 918 false); 919 /* 920 * Then, switch to the root element phase of the tree 921 * construction stage 922 */ 923 phase = Phase.ROOT_ELEMENT; 924 /* 925 * and reprocess the current token. 926 */ 927 continue; 928 case ROOT_ELEMENT: 929 /* 930 * Create an HTMLElement node with the tag name html, in 931 * the HTML namespace. Append it to the Document object. 932 */ 933 appendHtmlElementToDocumentAndPush(); 934 /* Switch to the main phase */ 935 phase = Phase.BEFORE_HEAD; 936 /* 937 * reprocess the current token. 938 */ 939 continue; 940 case BEFORE_HEAD: 941 appendToCurrentNodeAndPushHeadElement(EmptyAttributes.EMPTY_ATTRIBUTES); 942 phase = Phase.IN_HEAD; 943 continue; 944 case IN_HEAD: 945 if (currentPtr > 1) { 946 err("End of file seen and there were open elements."); 947 } 948 while (currentPtr > 0) { 949 pop(); 950 } 951 phase = Phase.AFTER_HEAD; 952 continue; 953 case IN_HEAD_NOSCRIPT: 954 err("End of file seen and there were open elements."); 955 while (currentPtr > 1) { 956 pop(); 957 } 958 phase = Phase.IN_HEAD; 959 continue; 960 case AFTER_HEAD: 961 appendToCurrentNodeAndPushBodyElement(); 962 phase = Phase.IN_BODY; 963 continue; 964 case IN_BODY: 965 case IN_TABLE: 966 case IN_CAPTION: 967 case IN_COLUMN_GROUP: 968 case IN_TABLE_BODY: 969 case IN_ROW: 970 case IN_CELL: 971 case IN_SELECT: 972 /* 973 * Generate implied end tags. 974 */ 975 generateImpliedEndTags(); 976 /* 977 * If there are more than two nodes on the stack of open 978 * elements, 979 */ 980 if (currentPtr > 1) { 981 err("End of file seen and there were open elements."); 982 } else if (currentPtr == 1 && stack[1].name != "body") { 983 /* 984 * or if there are two nodes but the second node is 985 * not a body node, this is a parse error. 986 */ 987 err("End of file seen and there were open elements."); 988 } 989 if (context != null) { 990 if (currentPtr > 0 && stack[1].name != "body") { 991 /* 992 * Otherwise, if the parser was originally 993 * created as part of the HTML fragment parsing 994 * algorithm, and there's more than one element 995 * in the stack of open elements, and the second 996 * node on the stack of open elements is not a 997 * body node, then this is a parse error. 998 * (fragment case) 999 */ 1000 err("End of file seen and there were open elements."); 1001 } 1002 } 1003 1004 /* Stop parsing. */ 1005 if (context == null) { 1006 bodyClosed(stack[1].node); 1007 } 1008 phase = Phase.AFTER_BODY; 1009 continue; 1010 /* 1011 * This fails because it doesn't imply HEAD and BODY tags. 1012 * We should probably expand out the insertion modes and 1013 * merge them with phases and then put the three things here 1014 * into each insertion mode instead of trying to factor them 1015 * out so carefully. 1016 * 1017 */ 1018 case IN_FRAMESET: 1019 err("End of file seen and there were open elements."); 1020 break eofloop; 1021 case AFTER_BODY: 1022 case AFTER_FRAMESET: 1023 if (context == null) { 1024 htmlClosed(stack[0].node); 1025 } 1026 case TRAILING_END: 1027 break eofloop; 1028 } 1029 } 1030 } finally { 1031 // XXX close elts for SAX 1032 /* Stop parsing. */ 1033 stack = null; 1034 listOfActiveFormattingElements = null; 1035 end(); 1036 } 1037 } 1038 1039 public final void startTag(String name, Attributes attributes) 1040 throws SAXException { 1041 needToDropLF = false; 1042 for (;;) { 1043 switch (phase) { 1044 case IN_TABLE_BODY: 1045 if ("tr" == name) { 1046 clearStackBackTo(findLastInTableScopeOrRootTbodyTheadTfoot()); 1047 appendToCurrentNodeAndPushElement(name, attributes); 1048 phase = Phase.IN_ROW; 1049 return; 1050 } else if ("td" == name || "th" == name) { 1051 err("\u201C" + name + "\u201D start tag in table body."); 1052 clearStackBackTo(findLastInTableScopeOrRootTbodyTheadTfoot()); 1053 appendToCurrentNodeAndPushElement("tr", 1054 EmptyAttributes.EMPTY_ATTRIBUTES); 1055 phase = Phase.IN_ROW; 1056 continue; 1057 } else if ("caption" == name || "col" == name 1058 || "colgroup" == name || "tbody" == name 1059 || "tfoot" == name || "thead" == name) { 1060 int eltPos = findLastInTableScopeOrRootTbodyTheadTfoot(); 1061 if (eltPos == 0) { 1062 err("Stray \u201C" + name + "\u201D start tag."); 1063 return; 1064 } else { 1065 clearStackBackTo(eltPos); 1066 pop(); 1067 phase = Phase.IN_TABLE; 1068 continue; 1069 } 1070 } else { 1071 // fall through to IN_TABLE 1072 } 1073 case IN_ROW: 1074 if ("td" == name || "th" == name) { 1075 clearStackBackTo(findLastOrRoot("tr")); 1076 appendToCurrentNodeAndPushElement(name, attributes); 1077 phase = Phase.IN_CELL; 1078 insertMarker(); 1079 return; 1080 } else if ("caption" == name || "col" == name 1081 || "colgroup" == name || "tbody" == name 1082 || "tfoot" == name || "thead" == name 1083 || "tr" == name) { 1084 int eltPos = findLastOrRoot("tr"); 1085 if (eltPos == 0) { 1086 assert context != null; 1087 err("No table row to close."); 1088 return; 1089 } 1090 clearStackBackTo(eltPos); 1091 pop(); 1092 phase = Phase.IN_TABLE_BODY; 1093 continue; 1094 } else { 1095 // fall through to IN_TABLE 1096 } 1097 case IN_TABLE: 1098 if ("caption" == name) { 1099 clearStackBackTo(findLastOrRoot("table")); 1100 insertMarker(); 1101 appendToCurrentNodeAndPushElement(name, attributes); 1102 phase = Phase.IN_CAPTION; 1103 return; 1104 } else if ("colgroup" == name) { 1105 clearStackBackTo(findLastOrRoot("table")); 1106 appendToCurrentNodeAndPushElement(name, attributes); 1107 phase = Phase.IN_COLUMN_GROUP; 1108 return; 1109 } else if ("col" == name) { 1110 clearStackBackTo(findLastOrRoot("table")); 1111 appendToCurrentNodeAndPushElement("colgroup", 1112 EmptyAttributes.EMPTY_ATTRIBUTES); 1113 phase = Phase.IN_COLUMN_GROUP; 1114 continue; 1115 } else if ("tbody" == name || "tfoot" == name 1116 || "thead" == name) { 1117 clearStackBackTo(findLastOrRoot("table")); 1118 appendToCurrentNodeAndPushElement(name, attributes); 1119 phase = Phase.IN_TABLE_BODY; 1120 return; 1121 } else if ("td" == name || "tr" == name || "th" == name) { 1122 clearStackBackTo(findLastOrRoot("table")); 1123 appendToCurrentNodeAndPushElement("tbody", 1124 EmptyAttributes.EMPTY_ATTRIBUTES); 1125 phase = Phase.IN_TABLE_BODY; 1126 continue; 1127 } else if ("table" == name) { 1128 err("Start tag for \u201Ctable\u201D seen but the previous \u201Ctable\u201D is still open."); 1129 int eltPos = findLastInTableScope(name); 1130 if (eltPos == NOT_FOUND_ON_STACK) { 1131 assert context != null; 1132 return; 1133 } 1134 generateImpliedEndTags(); 1135 // XXX is the next if dead code? 1136 if (!isCurrent("table")) { 1137 err("Unclosed elements on stack."); 1138 } 1139 while (currentPtr >= eltPos) { 1140 pop(); 1141 } 1142 resetTheInsertionMode(); 1143 continue; 1144 } else { 1145 err("Start tag \u201C" + name 1146 + "\u201D seen in \u201Ctable\u201D."); 1147 // fall through to IN_BODY 1148 } 1149 case IN_CAPTION: 1150 if ("caption" == name || "col" == name 1151 || "colgroup" == name || "tbody" == name 1152 || "td" == name || "tfoot" == name || "th" == name 1153 || "thead" == name || "tr" == name) { 1154 err("Stray \u201C" + name 1155 + "\u201D start tag in \u201Ccaption\u201D."); 1156 int eltPos = findLastInTableScope("caption"); 1157 if (eltPos == NOT_FOUND_ON_STACK) { 1158 return; 1159 } 1160 generateImpliedEndTags(); 1161 if (currentPtr != eltPos) { 1162 err("Unclosed elements on stack."); 1163 } 1164 while (currentPtr >= eltPos) { 1165 pop(); 1166 } 1167 clearTheListOfActiveFormattingElementsUpToTheLastMarker(); 1168 phase = Phase.IN_TABLE; 1169 continue; 1170 } else { 1171 // fall through to IN_BODY 1172 } 1173 case IN_CELL: 1174 if ("caption" == name || "col" == name 1175 || "colgroup" == name || "tbody" == name 1176 || "td" == name || "tfoot" == name || "th" == name 1177 || "thead" == name || "tr" == name) { 1178 int eltPos = findLastInTableScopeTdTh(); 1179 if (eltPos == NOT_FOUND_ON_STACK) { 1180 err("No cell to close."); 1181 return; 1182 } else { 1183 closeTheCell(eltPos); 1184 continue; 1185 } 1186 } else { 1187 // fall through to IN_BODY 1188 } 1189 case IN_BODY: 1190 if ("html" == name) { 1191 err("Stray \u201Chtml\u201D start tag."); 1192 addAttributesToElement(stack[0].node, attributes); 1193 return; 1194 } else if ("base" == name || "link" == name || "meta" == name 1195 || "style" == name || "script" == name) { 1196 // Fall through to IN_HEAD 1197 } else if ("title" == name) { 1198 err("\u201Ctitle\u201D element found inside \u201Cbody\u201D."); 1199 if (!nonConformingAndStreaming) { 1200 pushHeadPointerOntoStack(); 1201 } 1202 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1203 cdataOrRcdataTimesToPop = nonConformingAndStreaming ? 1 1204 : 2; // pops head 1205 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA, 1206 name); 1207 return; 1208 } else if ("body" == name) { 1209 err("\u201Cbody\u201D start tag found but the \u201Cbody\u201D element is already open."); 1210 addAttributesToBody(attributes); 1211 return; 1212 } else if ("p" == name || "div" == name || "h1" == name 1213 || "h2" == name || "h3" == name || "h4" == name 1214 || "h5" == name || "h6" == name 1215 || "blockquote" == name || "ol" == name 1216 || "ul" == name || "dl" == name 1217 || "fieldset" == name || "address" == name 1218 || "menu" == name || "center" == name 1219 || "dir" == name || "listing" == name) { 1220 implicitlyCloseP(); 1221 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1222 return; 1223 } else if ("pre" == name) { 1224 implicitlyCloseP(); 1225 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1226 needToDropLF = true; 1227 return; 1228 } else if ("form" == name) { 1229 if (formPointer != null) { 1230 err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element."); 1231 return; 1232 } else { 1233 implicitlyCloseP(); 1234 appendToCurrentNodeAndPushFormElementMayFoster(attributes); 1235 return; 1236 } 1237 } else if ("li" == name) { 1238 implicitlyCloseP(); 1239 int eltPos = findLiToPop(); 1240 if (eltPos < currentPtr) { 1241 err("A \u201Cli\u201D start tag was seen but the previous \u201Cli\u201D element had open children."); 1242 } 1243 while (currentPtr >= eltPos) { 1244 pop(); 1245 } 1246 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1247 return; 1248 } else if ("dd" == name || "dt" == name) { 1249 implicitlyCloseP(); 1250 int eltPos = findDdOrDtToPop(); 1251 if (eltPos < currentPtr) { 1252 err("A definition list item start tag was seen but the previous definition list item element had open children."); 1253 } 1254 while (currentPtr >= eltPos) { 1255 pop(); 1256 } 1257 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1258 return; 1259 } else if ("plaintext" == name) { 1260 implicitlyCloseP(); 1261 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1262 tokenizer.setContentModelFlag( 1263 ContentModelFlag.PLAINTEXT, name); 1264 return; 1265 } else if ("a" == name) { 1266 int activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker("a"); 1267 if (activeAPos != -1) { 1268 err("An \u201Ca\u201D start tag seen with already an active \u201Ca\u201D element."); 1269 StackNode<T> activeA = listOfActiveFormattingElements[activeAPos]; 1270 adoptionAgencyEndTag("a"); 1271 removeFromStack(activeA); 1272 activeAPos = findInListOfActiveFormattingElements(activeA); 1273 if (activeAPos != -1) { 1274 removeFromListOfActiveFormattingElements(activeAPos); 1275 } 1276 } 1277 reconstructTheActiveFormattingElements(); 1278 appendToCurrentNodeAndPushFormattingElementMayFoster(name, 1279 attributes); 1280 return; 1281 } else if ("i" == name || "b" == name || "em" == name 1282 || "strong" == name || "font" == name 1283 || "big" == name || "s" == name || "small" == name 1284 || "strike" == name || "tt" == name || "u" == name) { 1285 reconstructTheActiveFormattingElements(); 1286 appendToCurrentNodeAndPushFormattingElementMayFoster(name, 1287 attributes); 1288 return; 1289 } else if ("nobr" == name) { 1290 reconstructTheActiveFormattingElements(); 1291 if (NOT_FOUND_ON_STACK != findLastInScope("nobr")) { 1292 err("\u201Cnobr\u201D start tag seen when there was an open \u201Cnobr\u201D element in scope."); 1293 adoptionAgencyEndTag("nobr"); 1294 } 1295 appendToCurrentNodeAndPushFormattingElementMayFoster(name, 1296 attributes); 1297 return; 1298 } else if ("button" == name) { 1299 int eltPos = findLastInScope(name); 1300 if (eltPos != NOT_FOUND_ON_STACK) { 1301 err("\u201Cbutton\u201D start tag seen when there was an open \u201Cbutton\u201D element in scope."); 1302 generateImpliedEndTags(); 1303 if (!isCurrent("button")) { 1304 err("There was an open \u201Cbutton\u201D element in scope with unclosed children."); 1305 } 1306 while (currentPtr >= eltPos) { 1307 pop(); 1308 } 1309 clearTheListOfActiveFormattingElementsUpToTheLastMarker(); 1310 continue; 1311 } else { 1312 reconstructTheActiveFormattingElements(); 1313 // XXX form 1314 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1315 insertMarker(); 1316 return; 1317 } 1318 } else if ("object" == name || "marquee" == name) { 1319 reconstructTheActiveFormattingElements(); 1320 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1321 insertMarker(); 1322 return; 1323 } else if ("xmp" == name) { 1324 reconstructTheActiveFormattingElements(); 1325 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1326 cdataOrRcdataTimesToPop = 1; 1327 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, 1328 name); 1329 return; 1330 } else if ("table" == name) { 1331 implicitlyCloseP(); 1332 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1333 phase = Phase.IN_TABLE; 1334 return; 1335 } else if ("br" == name || "img" == name || "embed" == name 1336 || "param" == name || "area" == name 1337 || "basefont" == name || "bgsound" == name 1338 || "spacer" == name || "wbr" == name) { 1339 reconstructTheActiveFormattingElements(); 1340 appendVoidElementToCurrentMayFoster(name, attributes); 1341 return; 1342 } else if ("hr" == name) { 1343 implicitlyCloseP(); 1344 appendVoidElementToCurrentMayFoster(name, attributes); 1345 return; 1346 } else if ("image" == name) { 1347 err("Saw a start tag \u201Cimage\u201D."); 1348 name = "img"; 1349 continue; 1350 } else if ("input" == name) { 1351 reconstructTheActiveFormattingElements(); 1352 appendVoidElementToCurrentMayFoster(name, attributes, formPointer); 1353 return; 1354 } else if ("isindex" == name) { 1355 err("\u201Cisindex\u201D seen."); 1356 if (formPointer != null) { 1357 return; 1358 } 1359 implicitlyCloseP(); 1360 AttributesImpl formAttrs = tokenizer.newAttributes(); 1361 int actionIndex = attributes.getIndex("action"); 1362 if (actionIndex > -1) { 1363 formAttrs.addAttribute("action", 1364 attributes.getValue(actionIndex)); 1365 } 1366 appendToCurrentNodeAndPushFormElementMayFoster(formAttrs); 1367 appendVoidElementToCurrentMayFoster("hr", EmptyAttributes.EMPTY_ATTRIBUTES); 1368 appendToCurrentNodeAndPushElementMayFoster("p", 1369 EmptyAttributes.EMPTY_ATTRIBUTES); 1370 appendToCurrentNodeAndPushElementMayFoster("label", 1371 EmptyAttributes.EMPTY_ATTRIBUTES); 1372 int promptIndex = attributes.getIndex("prompt"); 1373 if (promptIndex > -1) { 1374 char[] prompt = attributes.getValue(promptIndex).toCharArray(); 1375 appendCharacters(stack[currentPtr].node, prompt, 1376 0, prompt.length); 1377 } else { 1378 // XXX localization 1379 appendCharacters(stack[currentPtr].node, ISINDEX_PROMPT, 1380 0, ISINDEX_PROMPT.length); 1381 } 1382 AttributesImpl inputAttributes = tokenizer.newAttributes(); 1383 inputAttributes.addAttribute("name", "isindex"); 1384 for (int i = 0; i < attributes.getLength(); i++) { 1385 String attributeQName = attributes.getQName(i); 1386 if (!("name".equals(attributeQName) 1387 || "action".equals(attributeQName) || "prompt".equals(attributeQName))) { 1388 inputAttributes.addAttribute(attributeQName, 1389 attributes.getValue(i)); 1390 } 1391 } 1392 appendVoidElementToCurrentMayFoster("input", inputAttributes, formPointer); 1393 // XXX localization 1394 pop(); // label 1395 pop(); // p 1396 appendVoidElementToCurrentMayFoster("hr", EmptyAttributes.EMPTY_ATTRIBUTES); 1397 pop(); // form 1398 return; 1399 } else if ("textarea" == name) { 1400 appendToCurrentNodeAndPushElementMayFoster(name, attributes, formPointer); 1401 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA, 1402 name); 1403 cdataOrRcdataTimesToPop = 1; 1404 needToDropLF = true; 1405 return; 1406 } else if ("iframe" == name || "noembed" == name 1407 || "noframes" == name 1408 || ("noscript" == name && scriptingEnabled)) { 1409 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1410 cdataOrRcdataTimesToPop = 1; 1411 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, 1412 name); 1413 return; 1414 } else if ("select" == name) { 1415 reconstructTheActiveFormattingElements(); 1416 // XXX form pointer 1417 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1418 phase = Phase.IN_SELECT; 1419 return; 1420 } else if ("caption" == name || "col" == name 1421 || "colgroup" == name || "frame" == name 1422 || "frameset" == name || "head" == name 1423 || "option" == name || "optgroup" == name 1424 || "tbody" == name || "td" == name 1425 || "tfoot" == name || "th" == name 1426 || "thead" == name || "tr" == name) { 1427 err("Stray start tag \u201C" + name + "\u201D."); 1428 return; 1429 } else { 1430 reconstructTheActiveFormattingElements(); 1431 appendToCurrentNodeAndPushElementMayFoster(name, attributes); 1432 return; 1433 } 1434 case IN_HEAD: 1435 if ("html" == name) { 1436 err("Stray \u201Chtml\u201D start tag."); 1437 addAttributesToElement(stack[0].node, attributes); 1438 return; 1439 } else if ("base" == name) { 1440 appendVoidElementToCurrentMayFoster(name, attributes); 1441 return; 1442 } else if ("meta" == name || "link" == name) { 1443 // Fall through to IN_HEAD_NOSCRIPT 1444 } else if ("title" == name) { 1445 appendToCurrentNodeAndPushElement(name, attributes); 1446 cdataOrRcdataTimesToPop = 1; 1447 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA, 1448 name); 1449 return; 1450 } else if ("style" == name 1451 || ("noscript" == name && scriptingEnabled)) { 1452 appendToCurrentNodeAndPushElement(name, attributes); 1453 cdataOrRcdataTimesToPop = 1; 1454 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, 1455 name); 1456 return; 1457 } else if ("noscript" == name && !scriptingEnabled) { 1458 appendToCurrentNodeAndPushElement(name, attributes); 1459 phase = Phase.IN_HEAD_NOSCRIPT; 1460 return; 1461 } else if ("script" == name) { 1462 // XXX need to manage much more stuff here if supporting 1463 // document.write() 1464 appendToCurrentNodeAndPushElement(name, attributes); 1465 cdataOrRcdataTimesToPop = 1; 1466 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, 1467 name); 1468 return; 1469 } else if ("head" == name) { 1470 /* Parse error. */ 1471 err("Start tag for \u201Chead\u201D seen when \u201Chead\u201D was already open."); 1472 /* Ignore the token. */ 1473 return; 1474 } else { 1475 pop(); 1476 phase = Phase.AFTER_HEAD; 1477 continue; 1478 } 1479 case IN_HEAD_NOSCRIPT: 1480 // XXX did Hixie really mean to omit "base" here? 1481 if ("html" == name) { 1482 err("Stray \u201Chtml\u201D start tag."); 1483 addAttributesToElement(stack[0].node, attributes); 1484 return; 1485 } else if ("link" == name) { 1486 appendVoidElementToCurrentMayFoster(name, attributes); 1487 return; 1488 } else if ("meta" == name) { 1489 // XXX do charset stuff 1490 appendVoidElementToCurrentMayFoster(name, attributes); 1491 return; 1492 } else if ("style" == name) { 1493 appendToCurrentNodeAndPushElement(name, attributes); 1494 cdataOrRcdataTimesToPop = 1; 1495 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, 1496 name); 1497 return; 1498 } else if ("head" == name) { 1499 err("Start tag for \u201Chead\u201D seen when \u201Chead\u201D was already open."); 1500 return; 1501 } else if ("noscript" == name) { 1502 err("Start tag for \u201Cnoscript\u201D seen when \u201Cnoscript\u201D was already open."); 1503 return; 1504 } else { 1505 err("Bad start tag in \u201Cnoscript\u201D in \u201Chead\u201D."); 1506 pop(); 1507 phase = Phase.IN_HEAD; 1508 continue; 1509 } 1510 case IN_COLUMN_GROUP: 1511 if ("html" == name) { 1512 err("Stray \u201Chtml\u201D start tag."); 1513 addAttributesToElement(stack[0].node, attributes); 1514 return; 1515 } else if ("col" == name) { 1516 appendVoidElementToCurrentMayFoster(name, attributes); 1517 return; 1518 } else { 1519 if (currentPtr == 0) { 1520 assert context != null; 1521 err("Garbage in \u201Ccolgroup\u201D fragment."); 1522 return; 1523 } 1524 pop(); 1525 phase = Phase.IN_TABLE; 1526 continue; 1527 } 1528 case IN_SELECT: 1529 if ("html" == name) { 1530 err("Stray \u201Chtml\u201D start tag."); 1531 addAttributesToElement(stack[0].node, attributes); 1532 return; 1533 } else if ("option" == name) { 1534 if (isCurrent("option")) { 1535 pop(); 1536 } 1537 appendToCurrentNodeAndPushElement(name, attributes); 1538 return; 1539 } else if ("optgroup" == name) { 1540 if (isCurrent("option")) { 1541 pop(); 1542 } 1543 if (isCurrent("optgroup")) { 1544 pop(); 1545 } 1546 appendToCurrentNodeAndPushElement(name, attributes); 1547 return; 1548 } else if ("select" == name) { 1549 err("\u201Cselect\u201D start tag where end tag expected."); 1550 int eltPos = findLastInTableScope(name); 1551 if (eltPos == NOT_FOUND_ON_STACK) { 1552 assert context != null; 1553 err("No \u201Cselect\u201D in table scope."); 1554 return; 1555 } else { 1556 while (currentPtr >= eltPos) { 1557 pop(); 1558 } 1559 resetTheInsertionMode(); 1560 return; 1561 } 1562 } else { 1563 err("Stray \u201C" + name + "\u201D start tag."); 1564 return; 1565 } 1566 case AFTER_BODY: 1567 if ("html" == name) { 1568 err("Stray \u201Chtml\u201D start tag."); 1569 addAttributesToElement(stack[0].node, attributes); 1570 return; 1571 } else { 1572 err("Stray \u201C" + name + "\u201D start tag."); 1573 if (conformingAndStreaming) { 1574 fatal(); 1575 } 1576 phase = Phase.IN_BODY; 1577 continue; 1578 } 1579 case IN_FRAMESET: 1580 if ("frameset" == name) { 1581 appendToCurrentNodeAndPushElement(name, attributes); 1582 return; 1583 } else if ("frame" == name) { 1584 appendVoidElementToCurrentMayFoster(name, attributes); 1585 return; 1586 } else { 1587 // fall through to AFTER_FRAMESET 1588 } 1589 case AFTER_FRAMESET: 1590 if ("html" == name) { 1591 err("Stray \u201Chtml\u201D start tag."); 1592 addAttributesToElement(stack[0].node, attributes); 1593 return; 1594 } else if ("noframes" == name) { 1595 appendToCurrentNodeAndPushElement(name, attributes); 1596 cdataOrRcdataTimesToPop = 1; 1597 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, 1598 name); 1599 return; 1600 } else { 1601 err("Stray \u201C" + name + "\u201D start tag."); 1602 return; 1603 } 1604 case INITIAL: 1605 /* 1606 * Parse error. 1607 */ 1608 if (doctypeExpectation != DoctypeExpectation.NO_DOCTYPE_ERRORS) { 1609 err("Start tag seen without seeing a doctype first."); 1610 } 1611 /* 1612 * 1613 * Set the document to quirks mode. 1614 */ 1615 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, false); 1616 /* 1617 * Then, switch to the root element phase of the tree 1618 * construction stage 1619 */ 1620 phase = Phase.ROOT_ELEMENT; 1621 /* 1622 * and reprocess the current token. 1623 */ 1624 continue; 1625 case ROOT_ELEMENT: 1626 // optimize error check and streaming SAX by hoisting 1627 // "html" handling here. 1628 if ("html" == name) { 1629 if (attributes.getLength() == 0) { 1630 // This has the right magic side effect that it 1631 // makes attributes in SAX Tree mutable. 1632 appendHtmlElementToDocumentAndPush(); 1633 } else { 1634 appendHtmlElementToDocumentAndPush(attributes); 1635 } 1636 phase = Phase.BEFORE_HEAD; 1637 return; 1638 } else { 1639 /* 1640 * Create an HTMLElement node with the tag name html, in 1641 * the HTML namespace. Append it to the Document object. 1642 */ 1643 appendHtmlElementToDocumentAndPush(); 1644 /* Switch to the main phase */ 1645 phase = Phase.BEFORE_HEAD; 1646 /* 1647 * reprocess the current token. 1648 * 1649 */ 1650 continue; 1651 } 1652 case BEFORE_HEAD: 1653 if ("html" == name) { 1654 err("Stray \u201Chtml\u201D start tag."); 1655 addAttributesToElement(stack[0].node, attributes); 1656 return; 1657 } else if ("head" == name) { 1658 /* 1659 * A start tag whose tag name is "head" 1660 * 1661 * Create an element for the token. 1662 * 1663 * Set the head element pointer to this new element 1664 * node. 1665 * 1666 * Append the new element to the current node and push 1667 * it onto the stack of open elements. 1668 */ 1669 appendToCurrentNodeAndPushHeadElement(attributes); 1670 /* 1671 * 1672 * Change the insertion mode to "in head". 1673 * 1674 */ 1675 phase = Phase.IN_HEAD; 1676 return; 1677 } 1678 1679 /* 1680 * Any other start tag token 1681 */ 1682 1683 /* 1684 * Act as if a start tag token with the tag name "head" and 1685 * no attributes had been seen, 1686 */ 1687 appendToCurrentNodeAndPushHeadElement(EmptyAttributes.EMPTY_ATTRIBUTES); 1688 phase = Phase.IN_HEAD; 1689 /* 1690 * then reprocess the current token. 1691 * 1692 * This will result in an empty head element being 1693 * generated, with the current token being reprocessed in 1694 * the "after head" insertion mode. 1695 */ 1696 continue; 1697 case AFTER_HEAD: 1698 if ("html" == name) { 1699 err("Stray \u201Chtml\u201D start tag."); 1700 addAttributesToElement(stack[0].node, attributes); 1701 return; 1702 } else if ("body" == name) { 1703 if (attributes.getLength() == 0) { 1704 // This has the right magic side effect that it 1705 // makes attributes in SAX Tree mutable. 1706 appendToCurrentNodeAndPushBodyElement(); 1707 } else { 1708 appendToCurrentNodeAndPushBodyElement(attributes); 1709 } 1710 phase = Phase.IN_BODY; 1711 return; 1712 } else if ("frameset" == name) { 1713 appendToCurrentNodeAndPushElement(name, attributes); 1714 phase = Phase.IN_FRAMESET; 1715 return; 1716 } else if ("base" == name) { 1717 err("\u201Cbase\u201D element outside \u201Chead\u201D."); 1718 if (!nonConformingAndStreaming) { 1719 pushHeadPointerOntoStack(); 1720 } 1721 appendVoidElementToCurrentMayFoster(name, attributes); 1722 if (!nonConformingAndStreaming) { 1723 pop(); // head 1724 } 1725 return; 1726 } else if ("link" == name) { 1727 err("\u201Clink\u201D element outside \u201Chead\u201D."); 1728 if (!nonConformingAndStreaming) { 1729 pushHeadPointerOntoStack(); 1730 } 1731 appendVoidElementToCurrentMayFoster(name, attributes); 1732 if (!nonConformingAndStreaming) { 1733 pop(); // head 1734 } 1735 return; 1736 } else if ("meta" == name) { 1737 err("\u201Cmeta\u201D element outside \u201Chead\u201D."); 1738 // XXX do chaset stuff 1739 if (!nonConformingAndStreaming) { 1740 pushHeadPointerOntoStack(); 1741 } 1742 appendVoidElementToCurrentMayFoster(name, attributes); 1743 if (!nonConformingAndStreaming) { 1744 pop(); // head 1745 } 1746 return; 1747 } else if ("script" == name) { 1748 err("\u201Cscript\u201D element between \u201Chead\u201D and \u201Cbody\u201D."); 1749 if (!nonConformingAndStreaming) { 1750 pushHeadPointerOntoStack(); 1751 } 1752 appendToCurrentNodeAndPushElement(name, attributes); 1753 cdataOrRcdataTimesToPop = nonConformingAndStreaming ? 1 1754 : 2; // pops head 1755 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, 1756 name); 1757 return; 1758 } else if ("style" == name) { 1759 err("\u201Cstyle\u201D element between \u201Chead\u201D and \u201Cbody\u201D."); 1760 if (!nonConformingAndStreaming) { 1761 pushHeadPointerOntoStack(); 1762 } 1763 appendToCurrentNodeAndPushElement(name, attributes); 1764 cdataOrRcdataTimesToPop = nonConformingAndStreaming ? 1 1765 : 2; // pops head 1766 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, 1767 name); 1768 return; 1769 } else if ("title" == name) { 1770 err("\u201Ctitle\u201D element outside \u201Chead\u201D."); 1771 if (!nonConformingAndStreaming) { 1772 pushHeadPointerOntoStack(); 1773 } 1774 appendToCurrentNodeAndPushElement(name, attributes); 1775 cdataOrRcdataTimesToPop = nonConformingAndStreaming ? 1 1776 : 2; // pops head 1777 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA, 1778 name); 1779 return; 1780 } else { 1781 appendToCurrentNodeAndPushBodyElement(); 1782 phase = Phase.IN_BODY; 1783 continue; 1784 } 1785 case TRAILING_END: 1786 err("Stray \u201C" + name + "\u201D start tag."); 1787 if (conformingAndStreaming) { 1788 fatal(); 1789 } 1790 phase = previousPhaseBeforeTrailingEnd; 1791 continue; 1792 } 1793 } 1794 } 1795 1796 public final void endTag(String name, Attributes attributes) 1797 throws SAXException { 1798 needToDropLF = false; 1799 if (cdataOrRcdataTimesToPop > 0) { 1800 while (cdataOrRcdataTimesToPop > 0) { 1801 pop(); 1802 cdataOrRcdataTimesToPop--; 1803 } 1804 return; 1805 } 1806 1807 for (;;) { 1808 switch (phase) { 1809 case IN_ROW: 1810 if ("tr" == name) { 1811 int eltPos = findLastOrRoot("tr"); 1812 if (eltPos == 0) { 1813 assert context != null; 1814 err("No table row to close."); 1815 return; 1816 } 1817 clearStackBackTo(eltPos); 1818 pop(); 1819 phase = Phase.IN_TABLE_BODY; 1820 return; 1821 } else if ("table" == name) { 1822 int eltPos = findLastOrRoot("tr"); 1823 if (eltPos == 0) { 1824 assert context != null; 1825 err("No table row to close."); 1826 return; 1827 } 1828 clearStackBackTo(eltPos); 1829 pop(); 1830 phase = Phase.IN_TABLE_BODY; 1831 continue; 1832 } else if ("tbody" == name || "thead" == name || "tfoot" == name) { 1833 if (findLastInTableScope(name) == NOT_FOUND_ON_STACK) { 1834 err("Stray end tag \u201C" + name + "\u201D."); 1835 return; 1836 } 1837 int eltPos = findLastOrRoot("tr"); 1838 if (eltPos == 0) { 1839 assert context != null; 1840 err("No table row to close."); 1841 return; 1842 } 1843 clearStackBackTo(eltPos); 1844 pop(); 1845 phase = Phase.IN_TABLE_BODY; 1846 continue; 1847 } else if ("body" == name || "caption" == name || "col" == name || "colgroup" == name || "html" == name || "td" == name || "th" == name) { 1848 err("Stray end tag \u201C" + name + "\u201D."); 1849 return; 1850 } else { 1851 // fall through to IN_TABLE 1852 } 1853 case IN_TABLE_BODY: 1854 if ("tbody" == name || "tfoot" == name || "thead" == name) { 1855 int eltPos = findLastOrRoot(name); 1856 if (eltPos == 0) { 1857 err("Stray end tag \u201C" + name + "\u201D."); 1858 return; 1859 } 1860 clearStackBackTo(eltPos); 1861 pop(); 1862 phase = Phase.IN_TABLE; 1863 return; 1864 } else if ("table" == name) { 1865 int eltPos = findLastInTableScopeOrRootTbodyTheadTfoot(); 1866 if (eltPos == 0) { 1867 assert context != null; 1868 err("Stray end tag \u201Ctable\u201D."); 1869 return; 1870 } 1871 clearStackBackTo(eltPos); 1872 pop(); 1873 phase = Phase.IN_TABLE; 1874 continue; 1875 } else if ("body" == name || "caption" == name || "col" == name || "colgroup" == name || "html" == name || "td" == name || "th" == name || "tr" == name) { 1876 err("Stray end tag \u201C" + name + "\u201D."); 1877 return; 1878 } else { 1879 // fall through to IN_TABLE 1880 } 1881 case IN_TABLE: 1882 if ("table" == name) { 1883 int eltPos = findLast("table"); 1884 if (eltPos == NOT_FOUND_ON_STACK) { 1885 assert context != null; 1886 err("Stray end tag \u201Ctable\u201D."); 1887 return; 1888 } 1889 generateImpliedEndTags(); 1890 if (currentPtr != eltPos) { 1891 err("There were unclosed elements."); 1892 } 1893 while (currentPtr >= eltPos) { 1894 pop(); 1895 } 1896 resetTheInsertionMode(); 1897 return; 1898 } else if ("body" == name || "caption" == name || "col" == name || "colgroup" == name || "html" == name || "tbody" == name || "td" == name || "tfoot" == name || "th" == name || "thead" == name || "tr" == name) { 1899 err("Stray end tag \u201C" + name + "\u201D."); 1900 return; 1901 } else { 1902 err("Stray end tag \u201C" + name + "\u201D."); 1903 // fall through to IN_BODY 1904 } 1905 case IN_CAPTION: 1906 if ("caption" == name) { 1907 int eltPos = findLastInTableScope("caption"); 1908 if (eltPos == NOT_FOUND_ON_STACK) { 1909 return; 1910 } 1911 generateImpliedEndTags(); 1912 if (currentPtr != eltPos) { 1913 err("Unclosed elements on stack."); 1914 } 1915 while (currentPtr >= eltPos) { 1916 pop(); 1917 } 1918 clearTheListOfActiveFormattingElementsUpToTheLastMarker(); 1919 phase = Phase.IN_TABLE; 1920 return; 1921 } else if ("table" == name) { 1922 err("\u201Ctable\u201D closed but \u201Ccaption\u201D was still open."); 1923 int eltPos = findLastInTableScope("caption"); 1924 if (eltPos == NOT_FOUND_ON_STACK) { 1925 return; 1926 } 1927 generateImpliedEndTags(); 1928 if (currentPtr != eltPos) { 1929 err("Unclosed elements on stack."); 1930 } 1931 while (currentPtr >= eltPos) { 1932 pop(); 1933 } 1934 clearTheListOfActiveFormattingElementsUpToTheLastMarker(); 1935 phase = Phase.IN_TABLE; 1936 continue; 1937 } else if ("body" == name || "col" == name || "colgroup" == name || "html" == name || "tbody" == name || "td" == name || "tfoot" == name || "th" == name || "thead" == name || "tr" == name) { 1938 err("Stray end tag \u201C" + name + "\u201D."); 1939 return; 1940 } else { 1941 // fall through to IN_BODY 1942 } 1943 case IN_CELL: 1944 if ("td" == name || "th" == name) { 1945 int eltPos = findLastInTableScope(name); 1946 if (eltPos == NOT_FOUND_ON_STACK) { 1947 err("Stray end tag \u201C" + name + "\u201D."); 1948 return; 1949 } 1950 generateImpliedEndTags(); 1951 if (!isCurrent(name)) { 1952 err("Unclosed elements."); 1953 } 1954 while (currentPtr >= eltPos) { 1955 pop(); 1956 } 1957 clearTheListOfActiveFormattingElementsUpToTheLastMarker(); 1958 phase = Phase.IN_ROW; 1959 return; 1960 } else if ("table" == name || "tbody" == name || "tfoot" == name || "thead" == name || "tr" == name) { 1961 if (findLastInTableScope(name) == NOT_FOUND_ON_STACK) { 1962 err("Stray end tag \u201C" + name + "\u201D."); 1963 return; 1964 } 1965 closeTheCell(findLastInTableScopeTdTh()); 1966 continue; 1967 } else if ("body" == name || "caption" == name || "col" == name || "colgroup" == name || "html" == name) { 1968 err("Stray end tag \u201C" + name + "\u201D."); 1969 return; 1970 } else { 1971 // fall through to IN_BODY 1972 } 1973 case IN_BODY: 1974 if ("body" == name) { 1975 if (!isSecondOnStackBody()) { 1976 assert context != null; 1977 err("Stray end tag \u201Cbody\u201D."); 1978 return; 1979 } 1980 assert currentPtr >= 1; 1981 for (int i = 2; i <= currentPtr; i++) { 1982 String stackName = stack[i].name; 1983 if (!("dd" == stackName || "dt" == stackName || "li" == stackName 1984 || "p" == stackName)) { 1985 err("End tag for \u201Cbody\u201D seen but there were unclosed elements."); 1986 break; 1987 } 1988 } 1989 if (conformingAndStreaming) { 1990 while(currentPtr > 1) { 1991 pop(); 1992 } 1993 } 1994 if (context == null) { 1995 bodyClosed(stack[1].node); 1996 } 1997 phase = Phase.AFTER_BODY; 1998 return; 1999 } else if ("html" == name) { 2000 if (!isSecondOnStackBody()) { 2001 assert context != null; 2002 err("Stray end tag \u201Chtml\u201D."); 2003 return; 2004 } 2005 for (int i = 0; i <= currentPtr; i++) { 2006 String stackName = stack[i].name; 2007 if (!("dd" == stackName || "dt" == stackName || "li" == stackName 2008 || "p" == stackName || "tbody" == stackName || "td" == stackName 2009 || "tfoot" == stackName || "th" == stackName || "thead" == stackName || "tr" == stackName || "body" == stackName || "html" == stackName)) { 2010 err("End tag for \u201Chtml\u201D seen but there were unclosed elements."); 2011 break; 2012 } 2013 } 2014 if (context == null) { 2015 bodyClosed(stack[1].node); 2016 } 2017 phase = Phase.AFTER_BODY; 2018 continue; 2019 } else if ("div" == name || "blockquote" == name 2020 || "ul" == name || "ol" == name || "pre" == name 2021 || "dl" == name || "fieldset" == name 2022 || "address" == name || "center" == name 2023 || "dir" == name || "listing" == name 2024 || "menu" == name) { 2025 int eltPos = findLastInScope(name); 2026 if (eltPos != NOT_FOUND_ON_STACK) { 2027 generateImpliedEndTags(); 2028 } 2029 if (!isCurrent(name)) { 2030 err("End tag \u201C" + name + "\u201D seen but there were unclosed elements."); 2031 } 2032 while (currentPtr >= eltPos) { 2033 pop(); 2034 } 2035 return; 2036 } else if ("form" == name) { 2037 int eltPos = findLastInScope(name); 2038 if (eltPos != NOT_FOUND_ON_STACK) { 2039 generateImpliedEndTags(); 2040 } 2041 if (!isCurrent(name)) { 2042 err("End tag \u201Cform\u201D seen but there were unclosed elements."); 2043 } else { 2044 pop(); 2045 } 2046 formPointer = null; 2047 return; 2048 } else if ("p" == name) { 2049 if (!isCurrent(name)) { 2050 err("End tag \u201Cp\u201D seen but there were unclosed elements."); 2051 } 2052 int eltPos = findLastInScope(name); 2053 if (eltPos != NOT_FOUND_ON_STACK) { 2054 while (currentPtr >= eltPos) { 2055 pop(); 2056 } 2057 } else { 2058 appendVoidElementToCurrentMayFoster(name, EmptyAttributes.EMPTY_ATTRIBUTES); 2059 } 2060 return; 2061 } else if ("dd" == name || "dt" == name || "li" == name) { 2062 int eltPos = findLastInScope(name); 2063 if (eltPos != NOT_FOUND_ON_STACK) { 2064 generateImpliedEndTagsExceptFor(name); 2065 } 2066 if (!isCurrent(name)) { 2067 err("End tag \u201C" + name + "\u201D seen but there were unclosed elements."); 2068 } 2069 while (currentPtr >= eltPos) { 2070 pop(); 2071 } 2072 return; 2073 } else if ("h1" == name || "h2" == name || "h3" == name 2074 || "h4" == name || "h5" == name || "h6" == name) { 2075 int eltPos = findLastInScopeHn(); 2076 if (eltPos != NOT_FOUND_ON_STACK) { 2077 generateImpliedEndTags(); 2078 } 2079 if (!isCurrent(name)) { 2080 err("End tag \u201C" + name + "\u201D seen but there were unclosed elements."); 2081 } 2082 while (currentPtr >= eltPos) { 2083 pop(); 2084 } 2085 return; 2086 } else if ("a" == name || "b" == name || "big" == name || "em" == name || "font" == name || "i" == name || "nobr" == name || "s" == name || "small" == name || "strike" == name || "strong" == name || "tt" == name || "u" == name) { 2087 adoptionAgencyEndTag(name); 2088 return; 2089 } else if ("button" == name || "marquee" == name || "object" == name) { 2090 int eltPos = findLastInScope(name); 2091 if (eltPos != NOT_FOUND_ON_STACK) { 2092 generateImpliedEndTags(); 2093 } 2094 if (!isCurrent(name)) { 2095 err("End tag \u201C" + name + "\u201D seen but there were unclosed elements."); 2096 } 2097 while (currentPtr >= eltPos) { 2098 pop(); 2099 } 2100 clearTheListOfActiveFormattingElementsUpToTheLastMarker(); 2101 return; 2102 } else if ("br" == name) { 2103 err("End tag \u201Cbr\u201D."); 2104 reconstructTheActiveFormattingElements(); 2105 appendVoidElementToCurrentMayFoster(name, EmptyAttributes.EMPTY_ATTRIBUTES); 2106 return; 2107 } else if ("area" == name || "basefont" == name || "bgsound" == name || "embed" == name || "hr" == name || "iframe" == name || "image" == name || "img" == name || "input" == name || "isindex" == name || "noembed" == name || "noframes" == name || "param" == name || "select" == name || "spacer" == name || "table" == name || "textarea" == name || "wbr" == name || (scriptingEnabled && "noscript" == name)) { 2108 err("Stray end tag \u201C" + name + "\u201D."); 2109 return; 2110 } else { 2111 if (isCurrent(name)) { 2112 pop(); 2113 return; 2114 } 2115 for(;;) { 2116 generateImpliedEndTags(); 2117 if (isCurrent(name)) { 2118 pop(); 2119 return; 2120 } 2121 StackNode<T> node = stack[currentPtr]; 2122 if (!(node.scoping || node.special)) { 2123 err("Unclosed element \u201C" + node.name 2124 + "\u201D."); 2125 pop(); 2126 } else { 2127 return; 2128 } 2129 } 2130 } 2131 case IN_COLUMN_GROUP: 2132 if ("colgroup" == name) { 2133 if (currentPtr == 0) { 2134 assert context != null; 2135 err("Garbage in \u201Ccolgroup\u201D fragment."); 2136 return; 2137 } 2138 pop(); 2139 phase = Phase.IN_TABLE; 2140 return; 2141 } else if ("col" == name) { 2142 err("Stray end tag \u201Ccol\u201D."); 2143 return; 2144 } else { 2145 if (currentPtr == 0) { 2146 assert context != null; 2147 err("Garbage in \u201Ccolgroup\u201D fragment."); 2148 return; 2149 } 2150 pop(); 2151 phase = Phase.IN_TABLE; 2152 continue; 2153 } 2154 case IN_SELECT: 2155 if ("option" == name) { 2156 if (isCurrent("option")) { 2157 pop(); 2158 return; 2159 } else { 2160 err("Stray end tag \u201Coption\u201D"); 2161 return; 2162 } 2163 } else if ("optgroup" == name) { 2164 if (isCurrent("option") && "optgroup" == stack[currentPtr - 1].name) { 2165 pop(); 2166 } 2167 if (isCurrent("optgroup")) { 2168 pop(); 2169 } else { 2170 err("Stray end tag \u201Coptgroup\u201D"); 2171 } 2172 return; 2173 } else if ("select" == name) { 2174 int eltPos = findLastInTableScope("select"); 2175 if (eltPos == NOT_FOUND_ON_STACK) { 2176 assert context != null; 2177 err("Stray end tag \u201Cselect\u201D"); 2178 return; 2179 } 2180 while (currentPtr >= eltPos) { 2181 pop(); 2182 } 2183 resetTheInsertionMode(); 2184 return; 2185 } else { 2186 err("Stray end tag \u201C" + name + "\u201D"); 2187 return; 2188 } 2189 case AFTER_BODY: 2190 if ("html" == name) { 2191 if (context != null) { 2192 err("Stray end tag \u201Chtml\u201D"); 2193 return; 2194 } else { 2195 previousPhaseBeforeTrailingEnd = Phase.AFTER_BODY; 2196 if (context == null) { 2197 htmlClosed(stack[0].node); 2198 } 2199 phase = Phase.TRAILING_END; 2200 return; 2201 } 2202 } else { 2203 err("Saw an end tag after \u201Cbody\u201D had been closed."); 2204 if (conformingAndStreaming) { 2205 fatal(); 2206 } 2207 phase = Phase.IN_BODY; 2208 continue; 2209 } 2210 case IN_FRAMESET: 2211 if ("frameset" == name) { 2212 if (currentPtr == 0) { 2213 assert context != null; 2214 err("Stray end tag \u201Cframeset\u201D"); 2215 return; 2216 } 2217 pop(); 2218 if ((context == null) && !isCurrent("frameset")) { 2219 phase = Phase.AFTER_FRAMESET; 2220 } 2221 return; 2222 } else { 2223 err("Stray end tag \u201C" + name + "\u201D"); 2224 return; 2225 } 2226 case AFTER_FRAMESET: 2227 if ("html" == name) { 2228 previousPhaseBeforeTrailingEnd = Phase.AFTER_FRAMESET; 2229 if (context == null) { 2230 htmlClosed(stack[0].node); 2231 } 2232 phase = Phase.TRAILING_END; 2233 return; 2234 } else { 2235 err("Stray end tag \u201C" + name + "\u201D"); 2236 return; 2237 } 2238 case INITIAL: 2239 /* 2240 * Parse error. 2241 */ 2242 if (doctypeExpectation != DoctypeExpectation.NO_DOCTYPE_ERRORS) { 2243 err("End tag seen without seeing a doctype first."); 2244 } 2245 /* 2246 * 2247 * Set the document to quirks mode. 2248 */ 2249 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, false); 2250 /* 2251 * Then, switch to the root element phase of the tree 2252 * construction stage 2253 */ 2254 phase = Phase.ROOT_ELEMENT; 2255 /* 2256 * and reprocess the current token. 2257 */ 2258 continue; 2259 case ROOT_ELEMENT: 2260 /* 2261 * Create an HTMLElement node with the tag name html, in the 2262 * HTML namespace. Append it to the Document object. 2263 */ 2264 appendHtmlElementToDocumentAndPush(); 2265 /* Switch to the main phase */ 2266 phase = Phase.BEFORE_HEAD; 2267 /* 2268 * reprocess the current token. 2269 * 2270 */ 2271 continue; 2272 case BEFORE_HEAD: 2273 if ("head" == name || "body" == name || "html" == name || "p" == name || "br" == name) { 2274 appendToCurrentNodeAndPushHeadElement(EmptyAttributes.EMPTY_ATTRIBUTES); 2275 phase = Phase.IN_HEAD; 2276 continue; 2277 } else { 2278 err("Stray end tag \u201C" + name + "\u201D."); 2279 return; 2280 } 2281 case IN_HEAD: 2282 if ("head" == name) { 2283 pop(); 2284 phase = Phase.AFTER_HEAD; 2285 return; 2286 } else if ("body" == name || "html" == name || "p" == name || "br" == name) { 2287 pop(); 2288 phase = Phase.AFTER_HEAD; 2289 continue; 2290 } else { 2291 err("Stray end tag \u201C" + name + "\u201D."); 2292 return; 2293 } 2294 case IN_HEAD_NOSCRIPT: 2295 if ("noscript" == name) { 2296 pop(); 2297 phase = Phase.IN_HEAD; 2298 return; 2299 } else if ("p" == name || "br" == name) { 2300 err("Stray end tag \u201C" + name + "\u201D."); 2301 pop(); 2302 phase = Phase.IN_HEAD; 2303 continue; 2304 } else { 2305 err("Stray end tag \u201C" + name + "\u201D."); 2306 return; 2307 } 2308 case AFTER_HEAD: 2309 appendToCurrentNodeAndPushBodyElement(); 2310 phase = Phase.IN_BODY; 2311 continue; 2312 case TRAILING_END: 2313 err("Stray \u201C" + name + "\u201D end tag."); 2314 if (conformingAndStreaming) { 2315 fatal(); 2316 } 2317 phase = previousPhaseBeforeTrailingEnd; 2318 continue; 2319 } 2320 } 2321 } 2322 2323 private int findLastInTableScopeOrRootTbodyTheadTfoot() { 2324 for (int i = currentPtr; i > 0; i--) { 2325 if (stack[i].name == "tbody" || stack[i].name == "thead" || stack[i].name == "tfoot") { 2326 return i; 2327 } 2328 } 2329 return 0; 2330 } 2331 2332 private int findLast(String name) { 2333 for (int i = currentPtr; i > 0; i--) { 2334 if (stack[i].name == name) { 2335 return i; 2336 } 2337 } 2338 return NOT_FOUND_ON_STACK; 2339 } 2340 2341 private int findLastInTableScope(String name) { 2342 for (int i = currentPtr; i > 0; i--) { 2343 if (stack[i].name == name) { 2344 return i; 2345 } else if (stack[i].name == "table") { 2346 return NOT_FOUND_ON_STACK; 2347 } 2348 } 2349 return NOT_FOUND_ON_STACK; 2350 } 2351 2352 private int findLastInScope(String name) { 2353 for (int i = currentPtr; i > 0; i--) { 2354 if (stack[i].name == name) { 2355 return i; 2356 } else if (stack[i].scoping) { 2357 return NOT_FOUND_ON_STACK; 2358 } 2359 } 2360 return NOT_FOUND_ON_STACK; 2361 } 2362 2363 private int findLastInScopeHn() { 2364 for (int i = currentPtr; i > 0; i--) { 2365 String name = stack[i].name; 2366 if ("h1" == name || "h2" == name || "h3" == name || "h4" == name 2367 || "h5" == name || "h6" == name) { 2368 return i; 2369 } else if (stack[i].scoping) { 2370 return NOT_FOUND_ON_STACK; 2371 } 2372 } 2373 return NOT_FOUND_ON_STACK; 2374 } 2375 2376 private void generateImpliedEndTagsExceptFor(String name) throws SAXException { 2377 for (;;) { 2378 String stackName = stack[currentPtr].name; 2379 if (name != stackName && ("p" == stackName || "li" == stackName || "dd" == stackName || "dt" == stackName)) { 2380 pop(); 2381 } else { 2382 return; 2383 } 2384 } 2385 } 2386 2387 private void generateImpliedEndTags() throws SAXException { 2388 for (;;) { 2389 String stackName = stack[currentPtr].name; 2390 if ("p" == stackName || "li" == stackName || "dd" == stackName || "dt" == stackName) { 2391 pop(); 2392 } else { 2393 return; 2394 } 2395 } 2396 } 2397 2398 private boolean isSecondOnStackBody() { 2399 return currentPtr >= 1 && stack[1].name == "body"; 2400 } 2401 2402 private void documentModeInternal(DocumentMode mode, String publicIdentifier, 2403 String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) throws SAXException { 2404 if (documentModeHandler != null) { 2405 documentModeHandler.documentMode(mode, publicIdentifier, 2406 systemIdentifier, html4SpecificAdditionalErrorChecks); 2407 } 2408 documentMode(mode, publicIdentifier, 2409 systemIdentifier, html4SpecificAdditionalErrorChecks); 2410 } 2411 2412 private boolean isAlmostStandards(String publicIdentifierLC, 2413 String systemIdentifierLC) { 2414 if ("-//w3c//dtd xhtml 1.0 transitional//en".equals(publicIdentifierLC)) { 2415 return true; 2416 } 2417 if ("-//w3c//dtd xhtml 1.0 frameset//en".equals(publicIdentifierLC)) { 2418 return true; 2419 } 2420 if (systemIdentifierLC != null) { 2421 if ("-//w3c//dtd html 4.01 transitional//en".equals(publicIdentifierLC)) { 2422 return true; 2423 } 2424 if ("-//w3c//dtd html 4.01 frameset//en".equals(publicIdentifierLC)) { 2425 return true; 2426 } 2427 } 2428 return false; 2429 } 2430 2431 private boolean isQuirky(String name, String publicIdentifierLC, 2432 String systemIdentifierLC, boolean correct) { 2433 if (!correct) { 2434 return true; 2435 } 2436 if (!"HTML".equalsIgnoreCase(name)) { 2437 return true; 2438 } 2439 if (publicIdentifierLC != null 2440 && (Arrays.binarySearch(QUIRKY_PUBLIC_IDS, publicIdentifierLC) > -1)) { 2441 return true; 2442 } 2443 if (systemIdentifierLC == null) { 2444 if ("-//w3c//dtd html 4.01 transitional//en".equals(publicIdentifierLC)) { 2445 return true; 2446 } else if ("-//w3c//dtd html 4.01 frameset//en".equals(publicIdentifierLC)) { 2447 return true; 2448 } 2449 } else if ("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd".equals(systemIdentifierLC)) { 2450 return true; 2451 } 2452 return false; 2453 } 2454 2455 private String toAsciiLowerCase(String str) { 2456 if (str == null) { 2457 return null; 2458 } 2459 char[] buf = new char[str.length()]; 2460 for (int i = 0; i < str.length(); i++) { 2461 char c = str.charAt(i); 2462 if (c >= 'A' && c <= 'Z') { 2463 c += 0x20; 2464 } 2465 buf[i] = c; 2466 } 2467 return new String(buf); 2468 } 2469 2470 private void closeTheCell(int eltPos) throws SAXException { 2471 generateImpliedEndTags(); 2472 if (eltPos != currentPtr) { 2473 err("Unclosed elements."); 2474 } 2475 while (currentPtr >= eltPos) { 2476 pop(); 2477 } 2478 clearTheListOfActiveFormattingElementsUpToTheLastMarker(); 2479 phase = Phase.IN_ROW; 2480 return; 2481 } 2482 2483 private int findLastInTableScopeTdTh() { 2484 for (int i = currentPtr; i > 0; i--) { 2485 String name = stack[i].name; 2486 if ("td" == name || "th" == name) { 2487 return i; 2488 } else if (name == "table") { 2489 return NOT_FOUND_ON_STACK; 2490 } 2491 } 2492 return NOT_FOUND_ON_STACK; 2493 } 2494 2495 private void clearStackBackTo(int eltPos) throws SAXException { 2496 if (eltPos != currentPtr) { 2497 err("Unclosed elements."); 2498 while(currentPtr > eltPos) { // > not >= intentional 2499 pop(); 2500 } 2501 } 2502 } 2503 2504 private void resetTheInsertionMode() { 2505 String name; 2506 for (int i = currentPtr; i >= 0; i--) { 2507 name = stack[i].name; 2508 if (i == 0) { 2509 if (!(context == "td" || context == "th")) { 2510 name = context; 2511 } 2512 } 2513 if ("select" == name) { 2514 phase = Phase.IN_SELECT; 2515 return; 2516 } else if ("td" == name || "th" == name) { 2517 phase = Phase.IN_CELL; 2518 return; 2519 } else if ("tr" == name) { 2520 phase = Phase.IN_ROW; 2521 return; 2522 } else if ("tbody" == name || "thead" == name || "tfoot" == name) { 2523 phase = Phase.IN_TABLE_BODY; 2524 return; 2525 } else if ("caption" == name) { 2526 phase = Phase.IN_CAPTION; 2527 return; 2528 } else if ("colgroup" == name) { 2529 phase = Phase.IN_COLUMN_GROUP; 2530 return; 2531 } else if ("table" == name) { 2532 phase = Phase.IN_TABLE; 2533 return; 2534 } else if ("head" == name) { 2535 phase = Phase.IN_BODY; // really 2536 return; 2537 } else if ("body" == name) { 2538 phase = Phase.IN_BODY; 2539 return; 2540 } else if ("frameset" == name) { 2541 phase = Phase.IN_FRAMESET; 2542 return; 2543 } else if ("html" == name) { 2544 if (headPointer == null) { 2545 phase = Phase.BEFORE_HEAD; 2546 } else { 2547 phase = Phase.AFTER_HEAD; 2548 } 2549 return; 2550 } else if (i == 0) { 2551 phase = Phase.IN_BODY; 2552 return; 2553 } 2554 } 2555 } 2556 2557 /** 2558 * @throws SAXException 2559 * 2560 */ 2561 private void implicitlyCloseP() throws SAXException { 2562 int eltPos = findLastInScope("p"); 2563 if (eltPos == NOT_FOUND_ON_STACK) { 2564 return; 2565 } 2566 if (currentPtr != eltPos) { 2567 err("Unclosed elements."); 2568 } 2569 while (currentPtr >= eltPos) { 2570 pop(); 2571 } 2572 } 2573 2574 private boolean clearLastStackSlot() { 2575 stack[currentPtr] = null; 2576 return true; 2577 } 2578 2579 private boolean clearLastListSlot() { 2580 listOfActiveFormattingElements[listPtr] = null; 2581 return true; 2582 } 2583 2584 private void push(StackNode<T> node) throws SAXException { 2585 currentPtr++; 2586 if (currentPtr == stack.length) { 2587 StackNode<T>[] newStack = new StackNode[stack.length + 64]; 2588 System.arraycopy(stack, 0, newStack, 0, stack.length); 2589 stack = newStack; 2590 } 2591 stack[currentPtr] = node; 2592 elementPushed(node.name, node.node); 2593 } 2594 2595 private void append(StackNode<T> node) { 2596 listPtr++; 2597 if (listPtr == listOfActiveFormattingElements.length) { 2598 StackNode<T>[] newList = new StackNode[listOfActiveFormattingElements.length + 64]; 2599 System.arraycopy(listOfActiveFormattingElements, 0, newList, 0, listOfActiveFormattingElements.length); 2600 listOfActiveFormattingElements = newList; 2601 } 2602 listOfActiveFormattingElements[listPtr] = node; 2603 } 2604 2605 private void insertMarker() { 2606 append(MARKER); 2607 } 2608 2609 private void clearTheListOfActiveFormattingElementsUpToTheLastMarker() { 2610 while (listPtr > -1) { 2611 if (listOfActiveFormattingElements[listPtr--] == MARKER) { 2612 return; 2613 } 2614 } 2615 } 2616 2617 private boolean isCurrent(String name) { 2618 return name == stack[currentPtr].name; 2619 } 2620 2621 private void removeFromStack(int pos) throws SAXException { 2622 if (currentPtr == pos) { 2623 pop(); 2624 } else { 2625 if (conformingAndStreaming) { 2626 fatal(); 2627 } else if (nonConformingAndStreaming) { 2628 throw new UnsupportedOperationException(); 2629 } else { 2630 System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos); 2631 assert clearLastStackSlot(); 2632 currentPtr--; 2633 } 2634 } 2635 } 2636 2637 private void removeFromStack(StackNode<T> node) throws SAXException { 2638 if (stack[currentPtr] == node) { 2639 pop(); 2640 } else { 2641 int pos = currentPtr - 1; 2642 while (pos >= 0 && stack[pos] != node) { 2643 pos--; 2644 } 2645 if (pos == -1) { 2646 // dead code? 2647 return; 2648 } 2649 if (conformingAndStreaming) { 2650 fatal(); 2651 } else if (nonConformingAndStreaming) { 2652 throw new UnsupportedOperationException(); 2653 } else { 2654 System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos); 2655 currentPtr--; 2656 } 2657 } 2658 } 2659 2660 private void removeFromListOfActiveFormattingElements(int pos) { 2661 if (pos == listPtr) { 2662 assert clearLastListSlot(); 2663 listPtr--; 2664 return; 2665 } 2666 assert pos < listPtr; 2667 System.arraycopy(listOfActiveFormattingElements, pos + 1, listOfActiveFormattingElements, pos, listPtr - pos); 2668 assert clearLastListSlot(); 2669 listPtr--; 2670 } 2671 2672 private void adoptionAgencyEndTag(String name) throws SAXException { 2673 flushCharacters(); 2674 for (;;) { 2675 int formattingEltListPos = listPtr; 2676 while (formattingEltListPos > -1) { 2677 String listName = listOfActiveFormattingElements[formattingEltListPos].name; 2678 if (listName == name) { 2679 break; 2680 } else if (listName == null) { 2681 formattingEltListPos = -1; 2682 break; 2683 } 2684 formattingEltListPos--; 2685 } 2686 if (formattingEltListPos == -1) { 2687 err("No element \u201C" + name + "\u201D to close."); 2688 return; 2689 } 2690 StackNode<T> formattingElt = listOfActiveFormattingElements[formattingEltListPos]; 2691 int formattingEltStackPos = currentPtr; 2692 boolean inScope = true; 2693 while (formattingEltStackPos > -1) { 2694 StackNode<T> node = stack[formattingEltStackPos]; 2695 if (node == formattingElt) { 2696 break; 2697 } else if (node.scoping) { 2698 inScope = false; 2699 } 2700 formattingEltStackPos--; 2701 } 2702 if (formattingEltStackPos == -1) { 2703 err("No element \u201C" + name + "\u201D to close."); 2704 removeFromListOfActiveFormattingElements(formattingEltListPos); 2705 return; 2706 } 2707 if (!inScope) { 2708 err("No element \u201C" + name + "\u201D to close."); 2709 return; 2710 } 2711 // stackPos now points to the formatting element and it is in scope 2712 if (formattingEltStackPos != currentPtr) { 2713 err("End tag \u201C" + name + "\u201D violates nesting rules."); 2714 } 2715 int furthestBlockPos = formattingEltStackPos + 1; 2716 while (furthestBlockPos <= currentPtr) { 2717 StackNode<T> node = stack[furthestBlockPos]; 2718 if (node.scoping || node.special) { 2719 break; 2720 } 2721 furthestBlockPos++; 2722 } 2723 if (furthestBlockPos > currentPtr) { 2724 // no furthest block 2725 while (currentPtr >= formattingEltStackPos) { 2726 pop(); 2727 } 2728 removeFromListOfActiveFormattingElements(formattingEltListPos); 2729 return; 2730 } 2731 StackNode<T> commonAncestor = stack[formattingEltStackPos - 1]; 2732 StackNode<T> furthestBlock = stack[furthestBlockPos]; 2733 detachFromParent(furthestBlock.node); 2734 int bookmark = formattingEltListPos; 2735 int nodePos = furthestBlockPos; 2736 StackNode<T> lastNode = furthestBlock; 2737 for(;;) { 2738 nodePos--; 2739 StackNode<T> node = stack[nodePos]; 2740 int nodeListPos = findInListOfActiveFormattingElements(node); 2741 if (nodeListPos == -1) { 2742 assert formattingEltStackPos < nodePos; 2743 assert bookmark < nodePos; 2744 assert furthestBlockPos > nodePos; 2745 removeFromStack(nodePos); 2746 furthestBlockPos--; 2747 continue; 2748 } 2749 if (nodePos == formattingEltStackPos) { 2750 break; 2751 } 2752 if (nodePos == furthestBlockPos) { 2753 bookmark = nodeListPos + 1; 2754 } 2755 if (hasChildren(node.node)) { 2756 assert node == listOfActiveFormattingElements[nodeListPos]; 2757 assert node == stack[nodePos]; 2758 T clone = shallowClone(node.node); 2759 node = new StackNode<T>(node.name, clone, node.scoping, node.special, node.fosterParenting); 2760 listOfActiveFormattingElements[nodeListPos] = node; 2761 stack[nodePos] = node; 2762 } 2763 detachFromParentAndAppendToNewParent(lastNode.node, node.node); 2764 lastNode = node; 2765 } 2766 detachFromParentAndAppendToNewParent(lastNode.node, commonAncestor.node); 2767 T clone = shallowClone(formattingElt.node); 2768 StackNode<T> formattingClone = new StackNode<T>(formattingElt.name, clone, formattingElt.scoping, formattingElt.special, formattingElt.fosterParenting); 2769 appendChildrenToNewParent(furthestBlock.node, clone); 2770 detachFromParentAndAppendToNewParent(clone, furthestBlock.node); 2771 removeFromListOfActiveFormattingElements(formattingEltListPos); 2772 insertIntoListOfActiveFormattingElements(formattingClone, bookmark); 2773 assert formattingEltStackPos < furthestBlockPos; 2774 removeFromStack(formattingEltStackPos); 2775 // furthestBlockPos is now off by one and points to the slot after it 2776 insertIntoStack(formattingClone, furthestBlockPos); 2777 } 2778 } 2779 2780 private void insertIntoStack(StackNode<T> node, int position) throws SAXException { 2781 assert currentPtr + 1 < stack.length; 2782 assert position <= currentPtr + 1; 2783 if (position == currentPtr + 1) { 2784 flushCharacters(); 2785 push(node); 2786 } else { 2787 System.arraycopy(stack, position, stack, position + 1, (currentPtr - position) + 1); 2788 currentPtr++; 2789 stack[position] = node; 2790 } 2791 } 2792 2793 private void insertIntoListOfActiveFormattingElements(StackNode<T> formattingClone, int bookmark) { 2794 assert listPtr + 1 < listOfActiveFormattingElements.length; 2795 if (bookmark <= listPtr) { 2796 System.arraycopy(listOfActiveFormattingElements, bookmark, listOfActiveFormattingElements, bookmark + 1, (listPtr - bookmark) + 1); 2797 } 2798 listPtr++; 2799 listOfActiveFormattingElements[bookmark] = formattingClone; 2800 } 2801 2802 private int findInListOfActiveFormattingElements(StackNode<T> node) { 2803 for (int i = listPtr; i >= 0; i--) { 2804 if (node == listOfActiveFormattingElements[i]) { 2805 return i; 2806 } 2807 } 2808 return -1; 2809 } 2810 2811 private int findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker( 2812 String name) { 2813 for (int i = listPtr; i >= 0; i--) { 2814 StackNode<T> node = listOfActiveFormattingElements[i]; 2815 if (node.name == name) { 2816 return i; 2817 } else if (node == MARKER) { 2818 return -1; 2819 } 2820 } 2821 return -1; 2822 } 2823 2824 private int findDdOrDtToPop() { 2825 for (int i = currentPtr; i >= 0; i--) { 2826 StackNode<T> node = stack[i]; 2827 if ("dd" == node.name || "dt" == node.name) { 2828 return i; 2829 } else if ((node.scoping || node.special) && !("div" == node.name || "address" == node.name)) { 2830 return NOT_FOUND_ON_STACK; 2831 } 2832 } 2833 return NOT_FOUND_ON_STACK; 2834 } 2835 2836 private int findLiToPop() { 2837 for (int i = currentPtr; i >= 0; i--) { 2838 StackNode<T> node = stack[i]; 2839 if ("li" == node.name) { 2840 return i; 2841 } else if ((node.scoping || node.special) && !("div" == node.name || "address" == node.name)) { 2842 return NOT_FOUND_ON_STACK; 2843 } 2844 } 2845 return NOT_FOUND_ON_STACK; 2846 } 2847 2848 private int findLastOrRoot(String name) { 2849 for (int i = currentPtr; i > 0; i--) { 2850 if (stack[i].name == name) { 2851 return i; 2852 } 2853 } 2854 return 0; 2855 } 2856 2857 private void addAttributesToBody(Attributes attributes) throws SAXException { 2858 if (currentPtr >= 1) { 2859 StackNode<T> body = stack[1]; 2860 if (body.name == "body") { 2861 addAttributesToElement(body.node, attributes); 2862 } 2863 } 2864 } 2865 2866 private void pushHeadPointerOntoStack() throws SAXException { 2867 flushCharacters(); 2868 if (conformingAndStreaming) { 2869 fatal(); 2870 } 2871 if (headPointer == null) { 2872 assert context != null; 2873 push(stack[currentPtr]); 2874 } else { 2875 push(new StackNode<T>("head", headPointer)); 2876 } 2877 } 2878 2879 /** 2880 * @throws SAXException 2881 * 2882 */ 2883 private void reconstructTheActiveFormattingElements() throws SAXException { 2884 if (listPtr == -1) { 2885 return; 2886 } 2887 StackNode<T> mostRecent = listOfActiveFormattingElements[listPtr]; 2888 if (mostRecent == MARKER || isInStack(mostRecent)) { 2889 return; 2890 } 2891 int entryPos = listPtr; 2892 for(;;) { 2893 entryPos--; 2894 if (entryPos == -1) { 2895 break; 2896 } 2897 if (listOfActiveFormattingElements[entryPos] == MARKER) { 2898 break; 2899 } 2900 if (isInStack(listOfActiveFormattingElements[entryPos])) { 2901 break; 2902 } 2903 } 2904 if (entryPos < listPtr) { 2905 flushCharacters(); 2906 } 2907 while (entryPos < listPtr) { 2908 entryPos++; 2909 StackNode<T> entry = listOfActiveFormattingElements[entryPos]; 2910 T clone = shallowClone(entry.node); 2911 StackNode<T> entryClone = new StackNode<T>(entry.name, clone, entry.scoping, entry.special, entry.fosterParenting); 2912 StackNode<T> currentNode = stack[currentPtr]; 2913 if (currentNode.fosterParenting) { 2914 insertIntoFosterParent(clone); 2915 } else { 2916 detachFromParentAndAppendToNewParent(clone, currentNode.node); 2917 } 2918 push(entryClone); 2919 listOfActiveFormattingElements[entryPos] = entryClone; 2920 } 2921 } 2922 2923 private void insertIntoFosterParent(T child) throws SAXException { 2924 int eltPos = findLastOrRoot("table"); 2925 T elt = stack[eltPos].node; 2926 if (eltPos == 0) { 2927 detachFromParentAndAppendToNewParent(child, elt); 2928 return; 2929 } 2930 T parent = parentElementFor(elt); 2931 if (parent == null) { 2932 detachFromParentAndAppendToNewParent(child, stack[eltPos - 1].node); 2933 } else { 2934 insertBefore(child, elt, parent); 2935 } 2936 } 2937 2938 private boolean isInStack(StackNode<T> node) { 2939 for (int i = currentPtr; i >= 0; i--) { 2940 if (stack[i] == node) { 2941 return true; 2942 } 2943 } 2944 return false; 2945 } 2946 2947 private void pop() throws SAXException { 2948 flushCharacters(); 2949 StackNode<T> node = stack[currentPtr]; 2950 assert clearLastStackSlot(); 2951 currentPtr--; 2952 elementPopped(node.name, node.node); 2953 } 2954 2955 private void appendCharMayFoster(char[] buf, int i) throws SAXException { 2956 StackNode<T> current = stack[currentPtr]; 2957 if (current.fosterParenting) { 2958 if (conformingAndStreaming) { 2959 fatal(); 2960 } else if (nonConformingAndStreaming) { 2961 return; 2962 } else { 2963 int eltPos = findLastOrRoot("table"); 2964 T elt = stack[eltPos].node; 2965 if (eltPos == 0) { 2966 appendCharacters(elt, buf, i, 1); 2967 return; 2968 } 2969 T parent = parentElementFor(elt); 2970 if (parent == null) { 2971 appendCharacters(stack[eltPos - 1].node, buf, i, 1); 2972 } else { 2973 insertCharactersBefore(buf, i, 1, elt, parent); 2974 } 2975 } 2976 } else { 2977 accumulateCharacters(buf, i, 1); 2978 } 2979 } 2980 2981 private void appendHtmlElementToDocumentAndPush(Attributes attributes) throws SAXException { 2982 T elt = createHtmlElementSetAsRoot(attributes); 2983 StackNode<T> node = new StackNode<T>("html", elt); 2984 push(node); 2985 } 2986 2987 private void appendHtmlElementToDocumentAndPush() throws SAXException { 2988 appendHtmlElementToDocumentAndPush(tokenizer.newAttributes()); 2989 } 2990 2991 private void appendToCurrentNodeAndPushHeadElement( 2992 Attributes attributes) throws SAXException { 2993 flushCharacters(); 2994 T elt = createElement("head", attributes); 2995 detachFromParentAndAppendToNewParent(elt, stack[currentPtr].node); 2996 headPointer = elt; 2997 StackNode<T> node = new StackNode<T>("head", elt); 2998 push(node); 2999 } 3000 3001 private void appendToCurrentNodeAndPushBodyElement( 3002 Attributes attributes) throws SAXException { 3003 appendToCurrentNodeAndPushElement("body", attributes); 3004 } 3005 3006 private void appendToCurrentNodeAndPushBodyElement() throws SAXException { 3007 appendToCurrentNodeAndPushBodyElement(tokenizer.newAttributes()); 3008 } 3009 3010 private void appendToCurrentNodeAndPushFormElementMayFoster(Attributes attributes) throws SAXException { 3011 flushCharacters(); 3012 T elt = createElement("form", attributes); 3013 formPointer = elt; 3014 StackNode<T> current = stack[currentPtr]; 3015 if (current.fosterParenting) { 3016 if (conformingAndStreaming) { 3017 fatal(); 3018 } else if (nonConformingAndStreaming) { 3019 return; 3020 } else { 3021 insertIntoFosterParent(elt); 3022 } 3023 } else { 3024 detachFromParentAndAppendToNewParent(elt, current.node); 3025 } 3026 StackNode<T> node = new StackNode<T>("form", elt); 3027 push(node); 3028 } 3029 3030 private void appendToCurrentNodeAndPushFormattingElementMayFoster(String name, 3031 Attributes attributes) throws SAXException { 3032 flushCharacters(); 3033 T elt = createElement(name, attributes, formPointer); 3034 StackNode<T> current = stack[currentPtr]; 3035 if (current.fosterParenting) { 3036 if (conformingAndStreaming) { 3037 fatal(); 3038 } else if (nonConformingAndStreaming) { 3039 return; 3040 } else { 3041 insertIntoFosterParent(elt); 3042 } 3043 } else { 3044 detachFromParentAndAppendToNewParent(elt, current.node); 3045 } 3046 StackNode<T> node = new StackNode<T>(name, elt); 3047 push(node); 3048 append(node); 3049 } 3050 3051 private void appendToCurrentNodeAndPushElement(String name, 3052 Attributes attributes) throws SAXException { 3053 flushCharacters(); 3054 T elt = createElement(name, attributes); 3055 detachFromParentAndAppendToNewParent(elt, stack[currentPtr].node); 3056 StackNode<T> node = new StackNode<T>(name, elt); 3057 push(node); 3058 } 3059 3060 private void appendToCurrentNodeAndPushElementMayFoster(String name, 3061 Attributes attributes) throws SAXException { 3062 flushCharacters(); 3063 T elt = createElement(name, attributes); 3064 StackNode<T> current = stack[currentPtr]; 3065 if (current.fosterParenting) { 3066 if (conformingAndStreaming) { 3067 fatal(); 3068 } else if (nonConformingAndStreaming) { 3069 return; 3070 } else { 3071 insertIntoFosterParent(elt); 3072 } 3073 } else { 3074 detachFromParentAndAppendToNewParent(elt, current.node); 3075 } 3076 StackNode<T> node = new StackNode<T>(name, elt); 3077 push(node); 3078 } 3079 3080 private void appendToCurrentNodeAndPushElementMayFoster(String name, Attributes attributes, T form) throws SAXException { 3081 flushCharacters(); 3082 T elt = createElement(name, attributes, formPointer); 3083 StackNode<T> current = stack[currentPtr]; 3084 if (current.fosterParenting) { 3085 if (conformingAndStreaming) { 3086 fatal(); 3087 } else if (nonConformingAndStreaming) { 3088 return; 3089 } else { 3090 insertIntoFosterParent(elt); 3091 } 3092 } else { 3093 detachFromParentAndAppendToNewParent(elt, current.node); 3094 } 3095 StackNode<T> node = new StackNode<T>(name, elt); 3096 push(node); 3097 } 3098 3099 private void appendVoidElementToCurrentMayFoster(String name, 3100 Attributes attributes, T form) throws SAXException { 3101 flushCharacters(); 3102 T elt = createElement(name, attributes, formPointer); 3103 StackNode<T> current = stack[currentPtr]; 3104 if (current.fosterParenting) { 3105 if (conformingAndStreaming) { 3106 fatal(); 3107 } else if (nonConformingAndStreaming) { 3108 return; 3109 } else { 3110 insertIntoFosterParent(elt); 3111 } 3112 } else { 3113 detachFromParentAndAppendToNewParent(elt, current.node); 3114 } 3115 if (conformingAndStreaming || nonConformingAndStreaming) { 3116 elementPushed(name, (T) attributes); 3117 elementPopped(name, null); 3118 } 3119 } 3120 3121 private void appendVoidElementToCurrentMayFoster(String name, Attributes attributes) throws SAXException { 3122 flushCharacters(); 3123 T elt = createElement(name, attributes); 3124 StackNode<T> current = stack[currentPtr]; 3125 if (current.fosterParenting) { 3126 if (conformingAndStreaming) { 3127 fatal(); 3128 } else if (nonConformingAndStreaming) { 3129 return; 3130 } else { 3131 insertIntoFosterParent(elt); 3132 } 3133 } else { 3134 detachFromParentAndAppendToNewParent(elt, current.node); 3135 } 3136 if (conformingAndStreaming || nonConformingAndStreaming) { 3137 elementPushed(name, (T) attributes); 3138 elementPopped(name, null); 3139 } 3140 } 3141 3142 private void accumulateCharacters(char[] buf, int start, int length) throws SAXException { 3143 if (coalescingText) { 3144 int newLen = charBufferLen + length; 3145 if (newLen > charBuffer.length) { 3146 char[] newBuf = new char[newLen]; 3147 System.arraycopy(charBuffer, 0, newBuf, 0, charBuffer.length); 3148 charBuffer = newBuf; 3149 } 3150 System.arraycopy(buf, start, charBuffer, charBufferLen, length); 3151 charBufferLen = newLen; 3152 } else { 3153 appendCharacters(stack[currentPtr].node, buf, start, length); 3154 } 3155 } 3156 3157 private void flushCharacters() throws SAXException { 3158 if (charBufferLen > 0) { 3159 appendCharacters(stack[currentPtr].node, charBuffer, 0, charBufferLen); 3160 charBufferLen = 0; 3161 } 3162 } 3163 3164 // ------------------------------- // 3165 3166 protected abstract T createElement(String name, Attributes attributes) throws SAXException; 3167 3168 protected T createElement(String name, Attributes attributes, T form) throws SAXException { 3169 return createElement(name, attributes); 3170 } 3171 3172 protected abstract T createHtmlElementSetAsRoot(Attributes attributes) throws SAXException; 3173 3174 protected abstract void detachFromParent(T element) throws SAXException; 3175 3176 protected abstract boolean hasChildren(T element) throws SAXException; 3177 3178 protected abstract T shallowClone(T element) throws SAXException; 3179 3180 protected abstract void detachFromParentAndAppendToNewParent(T child, T newParent) throws SAXException; 3181 3182 protected abstract void appendChildrenToNewParent(T oldParent, T newParent) throws SAXException; 3183 3184 /** 3185 * Get the parent element. MUST return <code>null</code> if there is no parent 3186 * <em>or</em> the parent is not an element. 3187 */ 3188 protected abstract T parentElementFor(T child) throws SAXException; 3189 3190 protected abstract void insertBefore(T child, T sibling, T parent) throws SAXException; 3191 3192 protected abstract void insertCharactersBefore(char[] buf, int start, int length, T sibling, T parent) throws SAXException; 3193 3194 protected abstract void appendCharacters(T parent, 3195 char[] buf, int start, int length) throws SAXException; 3196 3197 protected abstract void appendComment(T parent, char[] buf, int start, int length) throws SAXException; 3198 3199 protected abstract void appendCommentToDocument(char[] buf, int start, int length) throws SAXException; 3200 3201 protected abstract void addAttributesToElement(T element, Attributes attributes) throws SAXException; 3202 3203 protected void start(boolean fragment) throws SAXException { 3204 3205 } 3206 3207 protected void end() throws SAXException { 3208 3209 } 3210 3211 protected void bodyClosed(T body) throws SAXException { 3212 3213 } 3214 3215 protected void htmlClosed(T html) throws SAXException { 3216 3217 } 3218 3219 protected void appendDoctypeToDocument(String name, 3220 String publicIdentifier, String systemIdentifier) throws SAXException { 3221 3222 } 3223 3224 protected void elementPushed(String name, T node) throws SAXException { 3225 3226 } 3227 3228 protected void elementPopped(String name, T node) throws SAXException { 3229 3230 } 3231 3232 protected void documentMode(DocumentMode mode, String publicIdentifier, String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) throws SAXException { 3233 3234 } 3235 3236 /** 3237 * @see nu.validator.htmlparser.impl.TokenHandler#wantsComments() 3238 */ 3239 public boolean wantsComments() { 3240 return wantingComments; 3241 } 3242 3243 public void setIgnoringComments(boolean ignoreComments) { 3244 wantingComments = !ignoreComments; 3245 } 3246 3247 /** 3248 * Sets the errorHandler. 3249 * 3250 * @param errorHandler the errorHandler to set 3251 */ 3252 public final void setErrorHandler(ErrorHandler errorHandler) { 3253 this.errorHandler = errorHandler; 3254 } 3255 3256 public final void setFragmentContext(String context) { 3257 this.context = context == null ? null : context.intern(); 3258 } 3259 3260 protected final T currentNode() { 3261 return stack[currentPtr].node; 3262 } 3263 3264 /** 3265 * Returns the scriptingEnabled. 3266 * 3267 * @return the scriptingEnabled 3268 */ 3269 public boolean isScriptingEnabled() { 3270 return scriptingEnabled; 3271 } 3272 3273 /** 3274 * Sets the scriptingEnabled. 3275 * 3276 * @param scriptingEnabled the scriptingEnabled to set 3277 */ 3278 public void setScriptingEnabled(boolean scriptingEnabled) { 3279 this.scriptingEnabled = scriptingEnabled; 3280 } 3281 3282 /** 3283 * Sets the doctypeExpectation. 3284 * 3285 * @param doctypeExpectation the doctypeExpectation to set 3286 */ 3287 public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { 3288 this.doctypeExpectation = doctypeExpectation; 3289 } 3290 3291 /** 3292 * Sets the documentModeHandler. 3293 * 3294 * @param documentModeHandler the documentModeHandler to set 3295 */ 3296 public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { 3297 this.documentModeHandler = documentModeHandler; 3298 } 3299 3300 /** 3301 * Sets the reportingDoctype. 3302 * 3303 * @param reportingDoctype the reportingDoctype to set 3304 */ 3305 public void setReportingDoctype(boolean reportingDoctype) { 3306 this.reportingDoctype = reportingDoctype; 3307 } 3308 }