001 /*
002 * Copyright (c) 2007 Henri Sivonen
003 * Copyright (c) 2007 Mozilla Foundation
004 * Portions of comments Copyright 2004-2007 Apple Computer, Inc., Mozilla
005 * Foundation, and Opera Software ASA.
006 *
007 * Permission is hereby granted, free of charge, to any person obtaining a
008 * copy of this software and associated documentation files (the "Software"),
009 * to deal in the Software without restriction, including without limitation
010 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
011 * and/or sell copies of the Software, and to permit persons to whom the
012 * Software is furnished to do so, subject to the following conditions:
013 *
014 * The above copyright notice and this permission notice shall be included in
015 * all copies or substantial portions of the Software.
016 *
017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
018 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
019 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
020 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
021 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
022 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
023 * DEALINGS IN THE SOFTWARE.
024 */
025
026 /*
027 * The comments following this one that use the same comment syntax as this
028 * comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007
029 * amended as of June 28 2007.
030 * That document came with this statement:
031 * "© Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and
032 * Opera Software ASA. You are granted a license to use, reproduce and
033 * create derivative works of this document."
034 */
035
036 package nu.validator.htmlparser.impl;
037
038 import java.util.Arrays;
039
040 import nu.validator.htmlparser.common.DoctypeExpectation;
041 import nu.validator.htmlparser.common.DocumentMode;
042 import nu.validator.htmlparser.common.DocumentModeHandler;
043 import nu.validator.htmlparser.common.XmlViolationPolicy;
044
045 import org.xml.sax.Attributes;
046 import org.xml.sax.ErrorHandler;
047 import org.xml.sax.SAXException;
048 import org.xml.sax.SAXParseException;
049
050 public abstract class TreeBuilder<T> implements TokenHandler {
051
052 private enum Phase {
053 INITIAL, ROOT_ELEMENT, BEFORE_HEAD, IN_HEAD, IN_HEAD_NOSCRIPT, AFTER_HEAD, IN_BODY, IN_TABLE, IN_CAPTION, IN_COLUMN_GROUP, IN_TABLE_BODY, IN_ROW, IN_CELL, IN_SELECT, AFTER_BODY, IN_FRAMESET, AFTER_FRAMESET, TRAILING_END
054 }
055
056 private class StackNode<S> {
057 final String name;
058
059 final S node;
060
061 final boolean scoping;
062
063 final boolean special;
064
065 final boolean fosterParenting;
066
067 /**
068 * @param name
069 * @param node
070 * @param scoping
071 * @param special
072 */
073 StackNode(final String name, final S node, final boolean scoping, final boolean special, final boolean fosterParenting) {
074 this.name = name;
075 this.node = node;
076 this.scoping = scoping;
077 this.special = special;
078 this.fosterParenting = fosterParenting;
079 }
080
081 /**
082 * @param name
083 * @param node
084 */
085 StackNode(final String name, final S node) {
086 this.name = name;
087 this.node = node;
088 this.scoping = ("table" == name || "caption" == name || "td" == name || "th" == name || "button" == name || "marquee" == name || "object" == name);
089 this.special = ("address" == name || "area" == name || "base" == name || "basefont" == name || "bgsound" == name || "blockquote" == name || "body" == name || "br" == name || "center" == name || "col" == name || "colgroup" == name || "dd" == name || "dir" == name || "div" == name || "dl" == name || "dt" == name || "embed" == name || "fieldset" == name || "form" == name || "frame" == name || "frameset" == name || "h1" == name || "h2" == name || "h3" == name || "h4" == name || "h5" == name || "h6" == name || "head" == name || "hr" == name || "iframe" == name || "image" == name || "img" == name || "input" == name || "isindex" == name || "li" == name || "link" == name || "listing" == name || "menu" == name || "meta" == name || "noembed" == name || "noframes" == name || "noscript" == name || "ol" == name || "optgroup" == name || "option" == name || "p" == name || "param" == name || "plaintext" == name || "pre" == name || "script" == name || "select" == name || "spacer" == name || "style" == name || "tbody" == name || "textarea" == name || "tfoot" == name || "thead" == name || "title" == name || "tr" == name || "ul" == name || "wbr" == name);
090 this.fosterParenting = ("table" == name || "tbody" == name || "tfoot" == name || "thead" == name || "tr" == name);
091 }
092
093 /**
094 * @see java.lang.Object#toString()
095 */
096 @Override
097 public String toString() {
098 return name;
099 }
100 }
101
102 private final static char[] ISINDEX_PROMPT = "This is a searchable index. Insert your search keywords here: ".toCharArray();
103
104 private final static String[] HTML4_PUBLIC_IDS = {
105 "-//W3C//DTD HTML 4.0 Frameset//EN",
106 "-//W3C//DTD HTML 4.0 Transitional//EN",
107 "-//W3C//DTD HTML 4.0//EN",
108 "-//W3C//DTD HTML 4.01 Frameset//EN",
109 "-//W3C//DTD HTML 4.01 Transitional//EN",
110 "-//W3C//DTD HTML 4.01//EN"
111 };
112
113 private final static String[] QUIRKY_PUBLIC_IDS = {
114 "+//silmaril//dtd html pro v0r11 19970101//en",
115 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
116 "-//as//dtd html 3.0 aswedit + extensions//en",
117 "-//ietf//dtd html 2.0 level 1//en",
118 "-//ietf//dtd html 2.0 level 2//en",
119 "-//ietf//dtd html 2.0 strict level 1//en",
120 "-//ietf//dtd html 2.0 strict level 2//en",
121 "-//ietf//dtd html 2.0 strict//en", "-//ietf//dtd html 2.0//en",
122 "-//ietf//dtd html 2.1e//en", "-//ietf//dtd html 3.0//en",
123 "-//ietf//dtd html 3.0//en//", "-//ietf//dtd html 3.2 final//en",
124 "-//ietf//dtd html 3.2//en", "-//ietf//dtd html 3//en",
125 "-//ietf//dtd html level 0//en",
126 "-//ietf//dtd html level 0//en//2.0",
127 "-//ietf//dtd html level 1//en",
128 "-//ietf//dtd html level 1//en//2.0",
129 "-//ietf//dtd html level 2//en",
130 "-//ietf//dtd html level 2//en//2.0",
131 "-//ietf//dtd html level 3//en",
132 "-//ietf//dtd html level 3//en//3.0",
133 "-//ietf//dtd html strict level 0//en",
134 "-//ietf//dtd html strict level 0//en//2.0",
135 "-//ietf//dtd html strict level 1//en",
136 "-//ietf//dtd html strict level 1//en//2.0",
137 "-//ietf//dtd html strict level 2//en",
138 "-//ietf//dtd html strict level 2//en//2.0",
139 "-//ietf//dtd html strict level 3//en",
140 "-//ietf//dtd html strict level 3//en//3.0",
141 "-//ietf//dtd html strict//en",
142 "-//ietf//dtd html strict//en//2.0",
143 "-//ietf//dtd html strict//en//3.0", "-//ietf//dtd html//en",
144 "-//ietf//dtd html//en//2.0", "-//ietf//dtd html//en//3.0",
145 "-//metrius//dtd metrius presentational//en",
146 "-//microsoft//dtd internet explorer 2.0 html strict//en",
147 "-//microsoft//dtd internet explorer 2.0 html//en",
148 "-//microsoft//dtd internet explorer 2.0 tables//en",
149 "-//microsoft//dtd internet explorer 3.0 html strict//en",
150 "-//microsoft//dtd internet explorer 3.0 html//en",
151 "-//microsoft//dtd internet explorer 3.0 tables//en",
152 "-//netscape comm. corp.//dtd html//en",
153 "-//netscape comm. corp.//dtd strict html//en",
154 "-//o'reilly and associates//dtd html 2.0//en",
155 "-//o'reilly and associates//dtd html extended 1.0//en",
156 "-//spyglass//dtd html 2.0 extended//en",
157 "-//sq//dtd html 2.0 hotmetal + extensions//en",
158 "-//sun microsystems corp.//dtd hotjava html//en",
159 "-//sun microsystems corp.//dtd hotjava strict html//en",
160 "-//w3c//dtd html 3 1995-03-24//en",
161 "-//w3c//dtd html 3.2 draft//en", "-//w3c//dtd html 3.2 final//en",
162 "-//w3c//dtd html 3.2//en", "-//w3c//dtd html 3.2s draft//en",
163 "-//w3c//dtd html 4.0 frameset//en",
164 "-//w3c//dtd html 4.0 transitional//en",
165 "-//w3c//dtd html experimental 19960712//en",
166 "-//w3c//dtd html experimental 970421//en",
167 "-//w3c//dtd w3 html//en", "-//w3o//dtd w3 html 3.0//en",
168 "-//w3o//dtd w3 html 3.0//en//",
169 "-//w3o//dtd w3 html strict 3.0//en//",
170 "-//webtechs//dtd mozilla html 2.0//en",
171 "-//webtechs//dtd mozilla html//en",
172 "-/w3c/dtd html 4.0 transitional/en", "html" };
173
174 private static final int NOT_FOUND_ON_STACK = Integer.MAX_VALUE;
175
176 private final StackNode<T> MARKER = new StackNode<T>(null, null);
177
178 private final boolean nonConformingAndStreaming;
179
180 private final boolean conformingAndStreaming;
181
182 private final boolean coalescingText;
183
184 private Phase phase = Phase.INITIAL;
185
186 protected Tokenizer tokenizer;
187
188 private ErrorHandler errorHandler;
189
190 private DocumentModeHandler documentModeHandler;
191
192 private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
193
194 private int cdataOrRcdataTimesToPop;
195
196 private boolean scriptingEnabled = false;
197
198 private boolean needToDropLF;
199
200 private boolean wantingComments;
201
202 private String context;
203
204 private Phase previousPhaseBeforeTrailingEnd;
205
206 private StackNode<T>[] stack;
207
208 private int currentPtr = -1;
209
210 private StackNode<T>[] listOfActiveFormattingElements;
211
212 private int listPtr = -1;
213
214 private T formPointer;
215
216 private T headPointer;
217
218 private boolean reportingDoctype = true;
219
220 private char[] charBuffer;
221
222 private int charBufferLen = 0;
223
224 protected TreeBuilder(XmlViolationPolicy streamabilityViolationPolicy, boolean coalescingText) {
225 this.conformingAndStreaming = streamabilityViolationPolicy == XmlViolationPolicy.FATAL;
226 this.nonConformingAndStreaming = streamabilityViolationPolicy == XmlViolationPolicy.ALTER_INFOSET;
227 this.coalescingText = coalescingText;
228 if (coalescingText) {
229 charBuffer = new char[1024];
230 }
231 }
232
233 /**
234 * Reports an condition that would make the infoset incompatible with XML
235 * 1.0 as fatal.
236 *
237 * @throws SAXException
238 * @throws SAXParseException
239 */
240 protected final void fatal() throws SAXException {
241 SAXParseException spe = new SAXParseException("Last error required non-streamable recovery.", tokenizer);
242 if (errorHandler != null) {
243 errorHandler.fatalError(spe);
244 }
245 throw spe;
246 }
247 protected final void fatal(Exception e) throws SAXException {
248 SAXParseException spe = new SAXParseException(e.getMessage(), tokenizer, e);;
249 if (errorHandler != null) {
250 errorHandler.fatalError(spe);
251 }
252 throw spe;
253 }
254
255 /**
256 * Reports a Parse Error.
257 *
258 * @param message
259 * the message
260 * @throws SAXException
261 */
262 protected final void err(String message) throws SAXException {
263 if (errorHandler == null) {
264 return;
265 }
266 SAXParseException spe = new SAXParseException(message, tokenizer);
267 errorHandler.error(spe);
268 }
269
270 /**
271 * Reports a warning
272 *
273 * @param message
274 * the message
275 * @throws SAXException
276 */
277 protected final void warn(String message) throws SAXException {
278 if (errorHandler == null) {
279 return;
280 }
281 SAXParseException spe = new SAXParseException(message, tokenizer);
282 errorHandler.warning(spe);
283 }
284
285 public final void start(Tokenizer self) throws SAXException {
286 tokenizer = self;
287 stack = new StackNode[64];
288 listOfActiveFormattingElements = new StackNode[64];
289 needToDropLF = false;
290 cdataOrRcdataTimesToPop = 0;
291 currentPtr = -1;
292 formPointer = null;
293 wantingComments = wantsComments();
294 start(context != null);
295 if (context == null) {
296 phase = Phase.INITIAL;
297 } else {
298 T elt = createHtmlElementSetAsRoot(tokenizer.newAttributes());
299 StackNode<T> node = new StackNode<T>("html", elt);
300 currentPtr++;
301 stack[currentPtr] = node;
302 resetTheInsertionMode();
303 if ("title" == context || "textarea" == context) {
304 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA, context);
305 } else if ("style" == context || "script" == context || "xmp" == context || "iframe" == context || "noembed" == context || "noframes" == context || (scriptingEnabled && "noscript" == context)) {
306 tokenizer.setContentModelFlag(ContentModelFlag.CDATA, context);
307 } else if ("plaintext" == context) {
308 tokenizer.setContentModelFlag(ContentModelFlag.PLAINTEXT, context);
309 } else {
310 tokenizer.setContentModelFlag(ContentModelFlag.PCDATA, context);
311 }
312 }
313 }
314
315 public final void doctype(String name, String publicIdentifier,
316 String systemIdentifier, boolean correct) throws SAXException {
317 needToDropLF = false;
318 switch (phase) {
319 case INITIAL:
320 /*
321 * A DOCTYPE token If the DOCTYPE token's name does not
322 * case-insensitively match the string "HTML", or if the token's
323 * public identifier is not missing, or if the token's system
324 * identifier is not missing, then there is a parse error.
325 * Conformance checkers may, instead of reporting this error,
326 * switch to a conformance checking mode for another language
327 * (e.g. based on the DOCTYPE token a conformance checker could
328 * recognise that the document is an HTML4-era document, and
329 * defer to an HTML4 conformance checker.)
330 *
331 * Append a DocumentType node to the Document node, with the
332 * name attribute set to the name given in the DOCTYPE token;
333 * the publicId attribute set to the public identifier given in
334 * the DOCTYPE token, or the empty string if the public
335 * identifier was not set; the systemId attribute set to the
336 * system identifier given in the DOCTYPE token, or the empty
337 * string if the system identifier was not set; and the other
338 * attributes specific to DocumentType objects set to null and
339 * empty lists as appropriate. Associate the DocumentType node
340 * with the Document object so that it is returned as the value
341 * of the doctype attribute of the Document object.
342 */
343 if (reportingDoctype ) {
344 appendDoctypeToDocument(name, publicIdentifier == null ? ""
345 : publicIdentifier, systemIdentifier == null ? ""
346 : systemIdentifier);
347 }
348 /*
349 * Then, if the DOCTYPE token matches one of the conditions in
350 * the following list, then set the document to quirks mode:
351 *
352 * Otherwise, if the DOCTYPE token matches one of the conditions
353 * in the following list, then set the document to limited
354 * quirks mode: + The public identifier is set to: "-//W3C//DTD
355 * XHTML 1.0 Frameset//EN" + The public identifier is set to:
356 * "-//W3C//DTD XHTML 1.0 Transitional//EN" + The system
357 * identifier is not missing and the public identifier is set
358 * to: "-//W3C//DTD HTML 4.01 Frameset//EN" + The system
359 * identifier is not missing and the public identifier is set
360 * to: "-//W3C//DTD HTML 4.01 Transitional//EN"
361 *
362 * The name, system identifier, and public identifier strings
363 * must be compared to the values given in the lists above in a
364 * case-insensitive manner.
365 */
366 String publicIdentifierLC = toAsciiLowerCase(publicIdentifier);
367 String systemIdentifierLC = toAsciiLowerCase(systemIdentifier);
368 switch (doctypeExpectation) {
369 case HTML:
370 if (isQuirky(name, publicIdentifierLC,
371 systemIdentifierLC, correct)) {
372 err("Quirky doctype.");
373 documentModeInternal(DocumentMode.QUIRKS_MODE,
374 publicIdentifier, systemIdentifier, false);
375 } else if (isAlmostStandards(publicIdentifierLC,
376 systemIdentifierLC)) {
377 err("Almost standards mode doctype.");
378 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE,
379 publicIdentifier, systemIdentifier, false);
380 } else {
381 if (!(publicIdentifier == null && systemIdentifier == null)) {
382 err("Legacy doctype.");
383 }
384 documentModeInternal(DocumentMode.STANDARDS_MODE,
385 publicIdentifier, systemIdentifier, false);
386 }
387 break;
388 case HTML401_STRICT:
389 tokenizer.turnOnAdditionalHtml4Errors();
390 if (isQuirky(name, publicIdentifierLC,
391 systemIdentifierLC, correct)) {
392 err("Quirky doctype.");
393 documentModeInternal(DocumentMode.QUIRKS_MODE,
394 publicIdentifier, systemIdentifier, true);
395 } else if (isAlmostStandards(publicIdentifierLC,
396 systemIdentifierLC)) {
397 err("Almost standards mode doctype.");
398 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE,
399 publicIdentifier, systemIdentifier, true);
400 } else {
401 if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
402 if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
403 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification.");
404 }
405 } else {
406 err("The doctype was not the HTML 4.01 Strict doctype.");
407 }
408 documentModeInternal(DocumentMode.STANDARDS_MODE,
409 publicIdentifier, systemIdentifier, true);
410 }
411 break;
412 case HTML401_TRANSITIONAL:
413 tokenizer.turnOnAdditionalHtml4Errors();
414 if (isQuirky(name, publicIdentifierLC,
415 systemIdentifierLC, correct)) {
416 err("Quirky doctype.");
417 documentModeInternal(DocumentMode.QUIRKS_MODE,
418 publicIdentifier, systemIdentifier, true);
419 } else if (isAlmostStandards(publicIdentifierLC,
420 systemIdentifierLC)) {
421 if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)
422 && systemIdentifier != null) {
423 if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
424 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification.");
425 }
426 } else {
427 err("The doctype was not a non-quirky HTML 4.01 Transitional doctype.");
428 }
429 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE,
430 publicIdentifier, systemIdentifier, true);
431 } else {
432 err("The doctype was not the HTML 4.01 Transitional doctype.");
433 documentModeInternal(DocumentMode.STANDARDS_MODE,
434 publicIdentifier, systemIdentifier, true);
435 }
436 break;
437 case AUTO:
438 boolean html4 = isHtml4Doctype(publicIdentifier);
439 if (html4) {
440 tokenizer.turnOnAdditionalHtml4Errors();
441 }
442 if (isQuirky(name, publicIdentifierLC,
443 systemIdentifierLC, correct)) {
444 err("Quirky doctype.");
445 documentModeInternal(DocumentMode.QUIRKS_MODE,
446 publicIdentifier, systemIdentifier, html4);
447 } else if (isAlmostStandards(publicIdentifierLC,
448 systemIdentifierLC)) {
449 if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
450 tokenizer.turnOnAdditionalHtml4Errors();
451 if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
452 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification.");
453 }
454 } else {
455 err("Almost standards mode doctype.");
456 }
457 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE,
458 publicIdentifier, systemIdentifier, html4);
459 } else {
460 if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
461 tokenizer.turnOnAdditionalHtml4Errors();
462 if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
463 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification.");
464 }
465 } else {
466 if (!(publicIdentifier == null && systemIdentifier == null)) {
467 err("Legacy doctype.");
468 }
469 }
470 documentModeInternal(DocumentMode.STANDARDS_MODE,
471 publicIdentifier, systemIdentifier, html4);
472 }
473 break;
474 case NO_DOCTYPE_ERRORS:
475 if (isQuirky(name, publicIdentifierLC,
476 systemIdentifierLC, correct)) {
477 documentModeInternal(DocumentMode.QUIRKS_MODE,
478 publicIdentifier, systemIdentifier, false);
479 } else if (isAlmostStandards(publicIdentifierLC,
480 systemIdentifierLC)) {
481 documentModeInternal(DocumentMode.ALMOST_STANDARDS_MODE,
482 publicIdentifier, systemIdentifier, false);
483 } else {
484 documentModeInternal(DocumentMode.STANDARDS_MODE,
485 publicIdentifier, systemIdentifier, false);
486 }
487 break;
488 }
489
490 /*
491 *
492 * Then, switch to the root element phase of the tree
493 * construction stage.
494 *
495 *
496 */
497 phase = Phase.ROOT_ELEMENT;
498 return;
499 default:
500 /*
501 * A DOCTYPE token Parse error.
502 */
503 err("Stray doctype.");
504 /*
505 * Ignore the token.
506 */
507 return;
508 }
509 }
510
511 private boolean isHtml4Doctype(String publicIdentifier) {
512 if (publicIdentifier != null
513 && (Arrays.binarySearch(HTML4_PUBLIC_IDS, publicIdentifier) > -1)) {
514 return true;
515 }
516 return false;
517 }
518
519 public final void comment(char[] buf, int length) throws SAXException {
520 needToDropLF = false;
521 if (wantingComments) {
522 switch (phase) {
523 case INITIAL:
524 case ROOT_ELEMENT:
525 case TRAILING_END:
526 /*
527 * A comment token Append a Comment node to the Document
528 * object with the data attribute set to the data given in
529 * the comment token.
530 */
531 appendCommentToDocument(buf, 0, length);
532 return;
533 case AFTER_BODY:
534 /*
535 * * A comment token Append a Comment node to the first
536 * element in the stack of open elements (the html element),
537 * with the data attribute set to the data given in the
538 * comment token.
539 *
540 */
541 flushCharacters();
542 appendComment(stack[0].node, buf, 0, length);
543 return;
544 default:
545 /*
546 * * A comment token Append a Comment node to the current
547 * node with the data attribute set to the data given in the
548 * comment token.
549 *
550 */
551 flushCharacters();
552 appendComment(stack[currentPtr].node, buf, 0, length);
553 return;
554 }
555 }
556 }
557
558 /**
559 * @see nu.validator.htmlparser.impl.TokenHandler#characters(char[], int, int)
560 */
561 public final void characters(char[] buf, int start, int length)
562 throws SAXException {
563 if (needToDropLF) {
564 if (buf[start] == '\n') {
565 start++;
566 length--;
567 if (length == 0) {
568 return;
569 }
570 }
571 needToDropLF = false;
572 } else if (cdataOrRcdataTimesToPop > 0) {
573 accumulateCharacters(buf, start, length);
574 return;
575 }
576
577 // optimize the most common case
578 if (phase == Phase.IN_BODY || phase == Phase.IN_CELL
579 || phase == Phase.IN_CAPTION) {
580 reconstructTheActiveFormattingElements();
581 accumulateCharacters(buf, start, length);
582 return;
583 }
584
585 int end = start + length;
586 loop: for (int i = start; i < end; i++) {
587 switch (buf[i]) {
588 case ' ':
589 case '\t':
590 case '\n':
591 case '\u000B':
592 case '\u000C':
593 /*
594 * A character token that is one of one of U+0009 CHARACTER
595 * TABULATION, U+000A LINE FEED (LF), U+000B LINE
596 * TABULATION, U+000C FORM FEED (FF), or U+0020 SPACE
597 */
598 switch (phase) {
599 case INITIAL:
600 case ROOT_ELEMENT:
601 /*
602 * Ignore the token.
603 */
604 start = i + 1;
605 continue;
606 case BEFORE_HEAD:
607 case IN_HEAD:
608 case IN_HEAD_NOSCRIPT:
609 case AFTER_HEAD:
610 case IN_TABLE:
611 case IN_COLUMN_GROUP:
612 case IN_TABLE_BODY:
613 case IN_ROW:
614 case IN_FRAMESET:
615 case AFTER_FRAMESET:
616 /*
617 * Append the character to the current node.
618 */
619 continue;
620 case IN_BODY:
621 case IN_CELL:
622 case IN_CAPTION:
623 // XXX is this dead code?
624 if (start < i) {
625 accumulateCharacters(buf, start, i
626 - start);
627 start = i;
628 }
629
630 /*
631 * Reconstruct the active formatting elements, if
632 * any.
633 */
634 reconstructTheActiveFormattingElements();
635 /* Append the token's character to the current node. */
636 break loop;
637 case IN_SELECT:
638 break loop;
639 case AFTER_BODY:
640 if (start < i) {
641 accumulateCharacters(buf, start, i
642 - start);
643 start = i;
644 }
645 /*
646 * Reconstruct the active formatting elements, if
647 * any.
648 */
649 // XXX bug?
650 reconstructTheActiveFormattingElements();
651 /* Append the token's character to the current node. */
652 continue;
653 case TRAILING_END:
654 if (conformingAndStreaming) {
655 return;
656 }
657 if (previousPhaseBeforeTrailingEnd == Phase.AFTER_FRAMESET) {
658 continue;
659 } else {
660 if (start < i) {
661 accumulateCharacters(buf, start, i
662 - start);
663 start = i;
664 }
665 /*
666 * Reconstruct the active formatting elements,
667 * if any.
668 */
669 // XXX bug?
670 reconstructTheActiveFormattingElements();
671 /*
672 * Append the token's character to the current
673 * node.
674 */
675 continue;
676 }
677 }
678 default:
679 /*
680 * A character token that is not one of one of U+0009
681 * CHARACTER TABULATION, U+000A LINE FEED (LF), U+000B LINE
682 * TABULATION, U+000C FORM FEED (FF), or U+0020 SPACE
683 */
684 switch (phase) {
685 case INITIAL:
686 /*
687 * Parse error.
688 */
689 if (doctypeExpectation != DoctypeExpectation.NO_DOCTYPE_ERRORS) {
690 err("Non-space characters found without seeing a doctype first.");
691 }
692 /*
693 *
694 * Set the document to quirks mode.
695 */
696 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
697 false);
698 /*
699 * Then, switch to the root element phase of the
700 * tree construction stage
701 */
702 phase = Phase.ROOT_ELEMENT;
703 /*
704 * and reprocess the current token.
705 *
706 *
707 */
708 i--;
709 continue;
710 case ROOT_ELEMENT:
711 /*
712 * Create an HTMLElement node with the tag name
713 * html, in the HTML namespace. Append it to the
714 * Document object.
715 */
716 appendHtmlElementToDocumentAndPush();
717 /* Switch to the main phase */
718 phase = Phase.BEFORE_HEAD;
719 /*
720 * reprocess the current token.
721 *
722 */
723 i--;
724 continue;
725 case BEFORE_HEAD:
726 if (start < i) {
727 accumulateCharacters(buf, start, i
728 - start);
729 start = i;
730 }
731 /*
732 * /*Act as if a start tag token with the tag name
733 * "head" and no attributes had been seen,
734 */
735 appendToCurrentNodeAndPushHeadElement(EmptyAttributes.EMPTY_ATTRIBUTES);
736 phase = Phase.IN_HEAD;
737 /*
738 * then reprocess the current token.
739 *
740 * This will result in an empty head element being
741 * generated, with the current token being
742 * reprocessed in the "after head" insertion mode.
743 */
744 i--;
745 continue;
746 case IN_HEAD:
747 if (start < i) {
748 accumulateCharacters(buf, start, i
749 - start);
750 start = i;
751 }
752 /*
753 * Act as if an end tag token with the tag name
754 * "head" had been seen,
755 */
756 pop();
757 phase = Phase.AFTER_HEAD;
758 /*
759 * and reprocess the current token.
760 */
761 i--;
762 continue;
763 case IN_HEAD_NOSCRIPT:
764 if (start < i) {
765 accumulateCharacters(buf, start, i
766 - start);
767 start = i;
768 }
769 /*
770 * Parse error. Act as if an end tag with the tag
771 * name "noscript" had been seen
772 */
773 err("Non-space character inside \u201Cnoscript\u201D inside \u201Chead\u201D.");
774 pop();
775 phase = Phase.IN_HEAD;
776 /*
777 * and reprocess the current token.
778 */
779 i--;
780 continue;
781 case AFTER_HEAD:
782 if (start < i) {
783 accumulateCharacters(buf, start, i
784 - start);
785 start = i;
786 }
787 /*
788 * Act as if a start tag token with the tag name
789 * "body" and no attributes had been seen,
790 */
791 appendToCurrentNodeAndPushBodyElement();
792 phase = Phase.IN_BODY;
793 /*
794 * and then reprocess the current token.
795 */
796 i--;
797 continue;
798 case IN_BODY:
799 case IN_CELL:
800 case IN_CAPTION:
801 if (start < i) {
802 accumulateCharacters(buf, start, i
803 - start);
804 start = i;
805 }
806 /*
807 * Reconstruct the active formatting elements, if
808 * any.
809 */
810 reconstructTheActiveFormattingElements();
811 /* Append the token's character to the current node. */
812 break loop;
813 case IN_TABLE:
814 case IN_TABLE_BODY:
815 case IN_ROW:
816 if (start < i) {
817 accumulateCharacters(buf, start, i
818 - start);
819 }
820 reconstructTheActiveFormattingElements();
821 appendCharMayFoster(buf, i);
822 start = i + 1;
823 continue;
824 case IN_COLUMN_GROUP:
825 /*
826 * Act as if an end tag with the tag name "colgroup"
827 * had been seen, and then, if that token wasn't
828 * ignored, reprocess the current token.
829 */
830 if (currentPtr == 0) {
831 err("Non-space in \u201Ccolgroup\u201D when parsing fragment.");
832 continue;
833 }
834 pop();
835 phase = Phase.IN_TABLE;
836 i--;
837 continue;
838 case IN_SELECT:
839 break loop;
840 case AFTER_BODY:
841 err("Non-space character after body.");
842 if (conformingAndStreaming) {
843 fatal();
844 }
845 phase = Phase.IN_BODY;
846 i--;
847 continue;
848 case IN_FRAMESET:
849 if (start < i) {
850 accumulateCharacters(buf, start, i
851 - start);
852 start = i;
853 }
854 /*
855 * Parse error.
856 */
857 err("Non-space in \u201Cframeset\u201D.");
858 /*
859 * Ignore the token.
860 */
861 start = i + 1;
862 continue;
863 case AFTER_FRAMESET:
864 if (start < i) {
865 accumulateCharacters(buf, start, i
866 - start);
867 start = i;
868 }
869 /*
870 * Parse error.
871 */
872 err("Non-space after \u201Cframeset\u201D.");
873 /*
874 * Ignore the token.
875 */
876 start = i + 1;
877 continue;
878 case TRAILING_END:
879 /*
880 * Parse error.
881 */
882 err("Non-space character in page trailer.");
883 if (conformingAndStreaming) {
884 fatal();
885 }
886 /*
887 * Switch back to the main phase and reprocess the
888 * token.
889 */
890 phase = previousPhaseBeforeTrailingEnd;
891 i--;
892 continue;
893 }
894 }
895 }
896 if (start < end) {
897 accumulateCharacters(buf, start, end - start);
898 }
899 }
900
901 public final void eof() throws SAXException {
902 try {
903 flushCharacters();
904 eofloop: for (;;) {
905 switch (phase) {
906 case INITIAL:
907 /*
908 * Parse error.
909 */
910 if (doctypeExpectation != DoctypeExpectation.NO_DOCTYPE_ERRORS) {
911 err("End of file seen without seeing a doctype first.");
912 }
913 /*
914 *
915 * Set the document to quirks mode.
916 */
917 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
918 false);
919 /*
920 * Then, switch to the root element phase of the tree
921 * construction stage
922 */
923 phase = Phase.ROOT_ELEMENT;
924 /*
925 * and reprocess the current token.
926 */
927 continue;
928 case ROOT_ELEMENT:
929 /*
930 * Create an HTMLElement node with the tag name html, in
931 * the HTML namespace. Append it to the Document object.
932 */
933 appendHtmlElementToDocumentAndPush();
934 /* Switch to the main phase */
935 phase = Phase.BEFORE_HEAD;
936 /*
937 * reprocess the current token.
938 */
939 continue;
940 case BEFORE_HEAD:
941 appendToCurrentNodeAndPushHeadElement(EmptyAttributes.EMPTY_ATTRIBUTES);
942 phase = Phase.IN_HEAD;
943 continue;
944 case IN_HEAD:
945 if (currentPtr > 1) {
946 err("End of file seen and there were open elements.");
947 }
948 while (currentPtr > 0) {
949 pop();
950 }
951 phase = Phase.AFTER_HEAD;
952 continue;
953 case IN_HEAD_NOSCRIPT:
954 err("End of file seen and there were open elements.");
955 while (currentPtr > 1) {
956 pop();
957 }
958 phase = Phase.IN_HEAD;
959 continue;
960 case AFTER_HEAD:
961 appendToCurrentNodeAndPushBodyElement();
962 phase = Phase.IN_BODY;
963 continue;
964 case IN_BODY:
965 case IN_TABLE:
966 case IN_CAPTION:
967 case IN_COLUMN_GROUP:
968 case IN_TABLE_BODY:
969 case IN_ROW:
970 case IN_CELL:
971 case IN_SELECT:
972 /*
973 * Generate implied end tags.
974 */
975 generateImpliedEndTags();
976 /*
977 * If there are more than two nodes on the stack of open
978 * elements,
979 */
980 if (currentPtr > 1) {
981 err("End of file seen and there were open elements.");
982 } else if (currentPtr == 1 && stack[1].name != "body") {
983 /*
984 * or if there are two nodes but the second node is
985 * not a body node, this is a parse error.
986 */
987 err("End of file seen and there were open elements.");
988 }
989 if (context != null) {
990 if (currentPtr > 0 && stack[1].name != "body") {
991 /*
992 * Otherwise, if the parser was originally
993 * created as part of the HTML fragment parsing
994 * algorithm, and there's more than one element
995 * in the stack of open elements, and the second
996 * node on the stack of open elements is not a
997 * body node, then this is a parse error.
998 * (fragment case)
999 */
1000 err("End of file seen and there were open elements.");
1001 }
1002 }
1003
1004 /* Stop parsing. */
1005 if (context == null) {
1006 bodyClosed(stack[1].node);
1007 }
1008 phase = Phase.AFTER_BODY;
1009 continue;
1010 /*
1011 * This fails because it doesn't imply HEAD and BODY tags.
1012 * We should probably expand out the insertion modes and
1013 * merge them with phases and then put the three things here
1014 * into each insertion mode instead of trying to factor them
1015 * out so carefully.
1016 *
1017 */
1018 case IN_FRAMESET:
1019 err("End of file seen and there were open elements.");
1020 break eofloop;
1021 case AFTER_BODY:
1022 case AFTER_FRAMESET:
1023 if (context == null) {
1024 htmlClosed(stack[0].node);
1025 }
1026 case TRAILING_END:
1027 break eofloop;
1028 }
1029 }
1030 } finally {
1031 // XXX close elts for SAX
1032 /* Stop parsing. */
1033 stack = null;
1034 listOfActiveFormattingElements = null;
1035 end();
1036 }
1037 }
1038
1039 public final void startTag(String name, Attributes attributes)
1040 throws SAXException {
1041 needToDropLF = false;
1042 for (;;) {
1043 switch (phase) {
1044 case IN_TABLE_BODY:
1045 if ("tr" == name) {
1046 clearStackBackTo(findLastInTableScopeOrRootTbodyTheadTfoot());
1047 appendToCurrentNodeAndPushElement(name, attributes);
1048 phase = Phase.IN_ROW;
1049 return;
1050 } else if ("td" == name || "th" == name) {
1051 err("\u201C" + name + "\u201D start tag in table body.");
1052 clearStackBackTo(findLastInTableScopeOrRootTbodyTheadTfoot());
1053 appendToCurrentNodeAndPushElement("tr",
1054 EmptyAttributes.EMPTY_ATTRIBUTES);
1055 phase = Phase.IN_ROW;
1056 continue;
1057 } else if ("caption" == name || "col" == name
1058 || "colgroup" == name || "tbody" == name
1059 || "tfoot" == name || "thead" == name) {
1060 int eltPos = findLastInTableScopeOrRootTbodyTheadTfoot();
1061 if (eltPos == 0) {
1062 err("Stray \u201C" + name + "\u201D start tag.");
1063 return;
1064 } else {
1065 clearStackBackTo(eltPos);
1066 pop();
1067 phase = Phase.IN_TABLE;
1068 continue;
1069 }
1070 } else {
1071 // fall through to IN_TABLE
1072 }
1073 case IN_ROW:
1074 if ("td" == name || "th" == name) {
1075 clearStackBackTo(findLastOrRoot("tr"));
1076 appendToCurrentNodeAndPushElement(name, attributes);
1077 phase = Phase.IN_CELL;
1078 insertMarker();
1079 return;
1080 } else if ("caption" == name || "col" == name
1081 || "colgroup" == name || "tbody" == name
1082 || "tfoot" == name || "thead" == name
1083 || "tr" == name) {
1084 int eltPos = findLastOrRoot("tr");
1085 if (eltPos == 0) {
1086 assert context != null;
1087 err("No table row to close.");
1088 return;
1089 }
1090 clearStackBackTo(eltPos);
1091 pop();
1092 phase = Phase.IN_TABLE_BODY;
1093 continue;
1094 } else {
1095 // fall through to IN_TABLE
1096 }
1097 case IN_TABLE:
1098 if ("caption" == name) {
1099 clearStackBackTo(findLastOrRoot("table"));
1100 insertMarker();
1101 appendToCurrentNodeAndPushElement(name, attributes);
1102 phase = Phase.IN_CAPTION;
1103 return;
1104 } else if ("colgroup" == name) {
1105 clearStackBackTo(findLastOrRoot("table"));
1106 appendToCurrentNodeAndPushElement(name, attributes);
1107 phase = Phase.IN_COLUMN_GROUP;
1108 return;
1109 } else if ("col" == name) {
1110 clearStackBackTo(findLastOrRoot("table"));
1111 appendToCurrentNodeAndPushElement("colgroup",
1112 EmptyAttributes.EMPTY_ATTRIBUTES);
1113 phase = Phase.IN_COLUMN_GROUP;
1114 continue;
1115 } else if ("tbody" == name || "tfoot" == name
1116 || "thead" == name) {
1117 clearStackBackTo(findLastOrRoot("table"));
1118 appendToCurrentNodeAndPushElement(name, attributes);
1119 phase = Phase.IN_TABLE_BODY;
1120 return;
1121 } else if ("td" == name || "tr" == name || "th" == name) {
1122 clearStackBackTo(findLastOrRoot("table"));
1123 appendToCurrentNodeAndPushElement("tbody",
1124 EmptyAttributes.EMPTY_ATTRIBUTES);
1125 phase = Phase.IN_TABLE_BODY;
1126 continue;
1127 } else if ("table" == name) {
1128 err("Start tag for \u201Ctable\u201D seen but the previous \u201Ctable\u201D is still open.");
1129 int eltPos = findLastInTableScope(name);
1130 if (eltPos == NOT_FOUND_ON_STACK) {
1131 assert context != null;
1132 return;
1133 }
1134 generateImpliedEndTags();
1135 // XXX is the next if dead code?
1136 if (!isCurrent("table")) {
1137 err("Unclosed elements on stack.");
1138 }
1139 while (currentPtr >= eltPos) {
1140 pop();
1141 }
1142 resetTheInsertionMode();
1143 continue;
1144 } else {
1145 err("Start tag \u201C" + name
1146 + "\u201D seen in \u201Ctable\u201D.");
1147 // fall through to IN_BODY
1148 }
1149 case IN_CAPTION:
1150 if ("caption" == name || "col" == name
1151 || "colgroup" == name || "tbody" == name
1152 || "td" == name || "tfoot" == name || "th" == name
1153 || "thead" == name || "tr" == name) {
1154 err("Stray \u201C" + name
1155 + "\u201D start tag in \u201Ccaption\u201D.");
1156 int eltPos = findLastInTableScope("caption");
1157 if (eltPos == NOT_FOUND_ON_STACK) {
1158 return;
1159 }
1160 generateImpliedEndTags();
1161 if (currentPtr != eltPos) {
1162 err("Unclosed elements on stack.");
1163 }
1164 while (currentPtr >= eltPos) {
1165 pop();
1166 }
1167 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
1168 phase = Phase.IN_TABLE;
1169 continue;
1170 } else {
1171 // fall through to IN_BODY
1172 }
1173 case IN_CELL:
1174 if ("caption" == name || "col" == name
1175 || "colgroup" == name || "tbody" == name
1176 || "td" == name || "tfoot" == name || "th" == name
1177 || "thead" == name || "tr" == name) {
1178 int eltPos = findLastInTableScopeTdTh();
1179 if (eltPos == NOT_FOUND_ON_STACK) {
1180 err("No cell to close.");
1181 return;
1182 } else {
1183 closeTheCell(eltPos);
1184 continue;
1185 }
1186 } else {
1187 // fall through to IN_BODY
1188 }
1189 case IN_BODY:
1190 if ("html" == name) {
1191 err("Stray \u201Chtml\u201D start tag.");
1192 addAttributesToElement(stack[0].node, attributes);
1193 return;
1194 } else if ("base" == name || "link" == name || "meta" == name
1195 || "style" == name || "script" == name) {
1196 // Fall through to IN_HEAD
1197 } else if ("title" == name) {
1198 err("\u201Ctitle\u201D element found inside \u201Cbody\u201D.");
1199 if (!nonConformingAndStreaming) {
1200 pushHeadPointerOntoStack();
1201 }
1202 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1203 cdataOrRcdataTimesToPop = nonConformingAndStreaming ? 1
1204 : 2; // pops head
1205 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA,
1206 name);
1207 return;
1208 } else if ("body" == name) {
1209 err("\u201Cbody\u201D start tag found but the \u201Cbody\u201D element is already open.");
1210 addAttributesToBody(attributes);
1211 return;
1212 } else if ("p" == name || "div" == name || "h1" == name
1213 || "h2" == name || "h3" == name || "h4" == name
1214 || "h5" == name || "h6" == name
1215 || "blockquote" == name || "ol" == name
1216 || "ul" == name || "dl" == name
1217 || "fieldset" == name || "address" == name
1218 || "menu" == name || "center" == name
1219 || "dir" == name || "listing" == name) {
1220 implicitlyCloseP();
1221 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1222 return;
1223 } else if ("pre" == name) {
1224 implicitlyCloseP();
1225 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1226 needToDropLF = true;
1227 return;
1228 } else if ("form" == name) {
1229 if (formPointer != null) {
1230 err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element.");
1231 return;
1232 } else {
1233 implicitlyCloseP();
1234 appendToCurrentNodeAndPushFormElementMayFoster(attributes);
1235 return;
1236 }
1237 } else if ("li" == name) {
1238 implicitlyCloseP();
1239 int eltPos = findLiToPop();
1240 if (eltPos < currentPtr) {
1241 err("A \u201Cli\u201D start tag was seen but the previous \u201Cli\u201D element had open children.");
1242 }
1243 while (currentPtr >= eltPos) {
1244 pop();
1245 }
1246 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1247 return;
1248 } else if ("dd" == name || "dt" == name) {
1249 implicitlyCloseP();
1250 int eltPos = findDdOrDtToPop();
1251 if (eltPos < currentPtr) {
1252 err("A definition list item start tag was seen but the previous definition list item element had open children.");
1253 }
1254 while (currentPtr >= eltPos) {
1255 pop();
1256 }
1257 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1258 return;
1259 } else if ("plaintext" == name) {
1260 implicitlyCloseP();
1261 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1262 tokenizer.setContentModelFlag(
1263 ContentModelFlag.PLAINTEXT, name);
1264 return;
1265 } else if ("a" == name) {
1266 int activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker("a");
1267 if (activeAPos != -1) {
1268 err("An \u201Ca\u201D start tag seen with already an active \u201Ca\u201D element.");
1269 StackNode<T> activeA = listOfActiveFormattingElements[activeAPos];
1270 adoptionAgencyEndTag("a");
1271 removeFromStack(activeA);
1272 activeAPos = findInListOfActiveFormattingElements(activeA);
1273 if (activeAPos != -1) {
1274 removeFromListOfActiveFormattingElements(activeAPos);
1275 }
1276 }
1277 reconstructTheActiveFormattingElements();
1278 appendToCurrentNodeAndPushFormattingElementMayFoster(name,
1279 attributes);
1280 return;
1281 } else if ("i" == name || "b" == name || "em" == name
1282 || "strong" == name || "font" == name
1283 || "big" == name || "s" == name || "small" == name
1284 || "strike" == name || "tt" == name || "u" == name) {
1285 reconstructTheActiveFormattingElements();
1286 appendToCurrentNodeAndPushFormattingElementMayFoster(name,
1287 attributes);
1288 return;
1289 } else if ("nobr" == name) {
1290 reconstructTheActiveFormattingElements();
1291 if (NOT_FOUND_ON_STACK != findLastInScope("nobr")) {
1292 err("\u201Cnobr\u201D start tag seen when there was an open \u201Cnobr\u201D element in scope.");
1293 adoptionAgencyEndTag("nobr");
1294 }
1295 appendToCurrentNodeAndPushFormattingElementMayFoster(name,
1296 attributes);
1297 return;
1298 } else if ("button" == name) {
1299 int eltPos = findLastInScope(name);
1300 if (eltPos != NOT_FOUND_ON_STACK) {
1301 err("\u201Cbutton\u201D start tag seen when there was an open \u201Cbutton\u201D element in scope.");
1302 generateImpliedEndTags();
1303 if (!isCurrent("button")) {
1304 err("There was an open \u201Cbutton\u201D element in scope with unclosed children.");
1305 }
1306 while (currentPtr >= eltPos) {
1307 pop();
1308 }
1309 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
1310 continue;
1311 } else {
1312 reconstructTheActiveFormattingElements();
1313 // XXX form
1314 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1315 insertMarker();
1316 return;
1317 }
1318 } else if ("object" == name || "marquee" == name) {
1319 reconstructTheActiveFormattingElements();
1320 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1321 insertMarker();
1322 return;
1323 } else if ("xmp" == name) {
1324 reconstructTheActiveFormattingElements();
1325 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1326 cdataOrRcdataTimesToPop = 1;
1327 tokenizer.setContentModelFlag(ContentModelFlag.CDATA,
1328 name);
1329 return;
1330 } else if ("table" == name) {
1331 implicitlyCloseP();
1332 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1333 phase = Phase.IN_TABLE;
1334 return;
1335 } else if ("br" == name || "img" == name || "embed" == name
1336 || "param" == name || "area" == name
1337 || "basefont" == name || "bgsound" == name
1338 || "spacer" == name || "wbr" == name) {
1339 reconstructTheActiveFormattingElements();
1340 appendVoidElementToCurrentMayFoster(name, attributes);
1341 return;
1342 } else if ("hr" == name) {
1343 implicitlyCloseP();
1344 appendVoidElementToCurrentMayFoster(name, attributes);
1345 return;
1346 } else if ("image" == name) {
1347 err("Saw a start tag \u201Cimage\u201D.");
1348 name = "img";
1349 continue;
1350 } else if ("input" == name) {
1351 reconstructTheActiveFormattingElements();
1352 appendVoidElementToCurrentMayFoster(name, attributes, formPointer);
1353 return;
1354 } else if ("isindex" == name) {
1355 err("\u201Cisindex\u201D seen.");
1356 if (formPointer != null) {
1357 return;
1358 }
1359 implicitlyCloseP();
1360 AttributesImpl formAttrs = tokenizer.newAttributes();
1361 int actionIndex = attributes.getIndex("action");
1362 if (actionIndex > -1) {
1363 formAttrs.addAttribute("action",
1364 attributes.getValue(actionIndex));
1365 }
1366 appendToCurrentNodeAndPushFormElementMayFoster(formAttrs);
1367 appendVoidElementToCurrentMayFoster("hr", EmptyAttributes.EMPTY_ATTRIBUTES);
1368 appendToCurrentNodeAndPushElementMayFoster("p",
1369 EmptyAttributes.EMPTY_ATTRIBUTES);
1370 appendToCurrentNodeAndPushElementMayFoster("label",
1371 EmptyAttributes.EMPTY_ATTRIBUTES);
1372 int promptIndex = attributes.getIndex("prompt");
1373 if (promptIndex > -1) {
1374 char[] prompt = attributes.getValue(promptIndex).toCharArray();
1375 appendCharacters(stack[currentPtr].node, prompt,
1376 0, prompt.length);
1377 } else {
1378 // XXX localization
1379 appendCharacters(stack[currentPtr].node, ISINDEX_PROMPT,
1380 0, ISINDEX_PROMPT.length);
1381 }
1382 AttributesImpl inputAttributes = tokenizer.newAttributes();
1383 inputAttributes.addAttribute("name", "isindex");
1384 for (int i = 0; i < attributes.getLength(); i++) {
1385 String attributeQName = attributes.getQName(i);
1386 if (!("name".equals(attributeQName)
1387 || "action".equals(attributeQName) || "prompt".equals(attributeQName))) {
1388 inputAttributes.addAttribute(attributeQName,
1389 attributes.getValue(i));
1390 }
1391 }
1392 appendVoidElementToCurrentMayFoster("input", inputAttributes, formPointer);
1393 // XXX localization
1394 pop(); // label
1395 pop(); // p
1396 appendVoidElementToCurrentMayFoster("hr", EmptyAttributes.EMPTY_ATTRIBUTES);
1397 pop(); // form
1398 return;
1399 } else if ("textarea" == name) {
1400 appendToCurrentNodeAndPushElementMayFoster(name, attributes, formPointer);
1401 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA,
1402 name);
1403 cdataOrRcdataTimesToPop = 1;
1404 needToDropLF = true;
1405 return;
1406 } else if ("iframe" == name || "noembed" == name
1407 || "noframes" == name
1408 || ("noscript" == name && scriptingEnabled)) {
1409 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1410 cdataOrRcdataTimesToPop = 1;
1411 tokenizer.setContentModelFlag(ContentModelFlag.CDATA,
1412 name);
1413 return;
1414 } else if ("select" == name) {
1415 reconstructTheActiveFormattingElements();
1416 // XXX form pointer
1417 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1418 phase = Phase.IN_SELECT;
1419 return;
1420 } else if ("caption" == name || "col" == name
1421 || "colgroup" == name || "frame" == name
1422 || "frameset" == name || "head" == name
1423 || "option" == name || "optgroup" == name
1424 || "tbody" == name || "td" == name
1425 || "tfoot" == name || "th" == name
1426 || "thead" == name || "tr" == name) {
1427 err("Stray start tag \u201C" + name + "\u201D.");
1428 return;
1429 } else {
1430 reconstructTheActiveFormattingElements();
1431 appendToCurrentNodeAndPushElementMayFoster(name, attributes);
1432 return;
1433 }
1434 case IN_HEAD:
1435 if ("html" == name) {
1436 err("Stray \u201Chtml\u201D start tag.");
1437 addAttributesToElement(stack[0].node, attributes);
1438 return;
1439 } else if ("base" == name) {
1440 appendVoidElementToCurrentMayFoster(name, attributes);
1441 return;
1442 } else if ("meta" == name || "link" == name) {
1443 // Fall through to IN_HEAD_NOSCRIPT
1444 } else if ("title" == name) {
1445 appendToCurrentNodeAndPushElement(name, attributes);
1446 cdataOrRcdataTimesToPop = 1;
1447 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA,
1448 name);
1449 return;
1450 } else if ("style" == name
1451 || ("noscript" == name && scriptingEnabled)) {
1452 appendToCurrentNodeAndPushElement(name, attributes);
1453 cdataOrRcdataTimesToPop = 1;
1454 tokenizer.setContentModelFlag(ContentModelFlag.CDATA,
1455 name);
1456 return;
1457 } else if ("noscript" == name && !scriptingEnabled) {
1458 appendToCurrentNodeAndPushElement(name, attributes);
1459 phase = Phase.IN_HEAD_NOSCRIPT;
1460 return;
1461 } else if ("script" == name) {
1462 // XXX need to manage much more stuff here if supporting
1463 // document.write()
1464 appendToCurrentNodeAndPushElement(name, attributes);
1465 cdataOrRcdataTimesToPop = 1;
1466 tokenizer.setContentModelFlag(ContentModelFlag.CDATA,
1467 name);
1468 return;
1469 } else if ("head" == name) {
1470 /* Parse error. */
1471 err("Start tag for \u201Chead\u201D seen when \u201Chead\u201D was already open.");
1472 /* Ignore the token. */
1473 return;
1474 } else {
1475 pop();
1476 phase = Phase.AFTER_HEAD;
1477 continue;
1478 }
1479 case IN_HEAD_NOSCRIPT:
1480 // XXX did Hixie really mean to omit "base" here?
1481 if ("html" == name) {
1482 err("Stray \u201Chtml\u201D start tag.");
1483 addAttributesToElement(stack[0].node, attributes);
1484 return;
1485 } else if ("link" == name) {
1486 appendVoidElementToCurrentMayFoster(name, attributes);
1487 return;
1488 } else if ("meta" == name) {
1489 // XXX do charset stuff
1490 appendVoidElementToCurrentMayFoster(name, attributes);
1491 return;
1492 } else if ("style" == name) {
1493 appendToCurrentNodeAndPushElement(name, attributes);
1494 cdataOrRcdataTimesToPop = 1;
1495 tokenizer.setContentModelFlag(ContentModelFlag.CDATA,
1496 name);
1497 return;
1498 } else if ("head" == name) {
1499 err("Start tag for \u201Chead\u201D seen when \u201Chead\u201D was already open.");
1500 return;
1501 } else if ("noscript" == name) {
1502 err("Start tag for \u201Cnoscript\u201D seen when \u201Cnoscript\u201D was already open.");
1503 return;
1504 } else {
1505 err("Bad start tag in \u201Cnoscript\u201D in \u201Chead\u201D.");
1506 pop();
1507 phase = Phase.IN_HEAD;
1508 continue;
1509 }
1510 case IN_COLUMN_GROUP:
1511 if ("html" == name) {
1512 err("Stray \u201Chtml\u201D start tag.");
1513 addAttributesToElement(stack[0].node, attributes);
1514 return;
1515 } else if ("col" == name) {
1516 appendVoidElementToCurrentMayFoster(name, attributes);
1517 return;
1518 } else {
1519 if (currentPtr == 0) {
1520 assert context != null;
1521 err("Garbage in \u201Ccolgroup\u201D fragment.");
1522 return;
1523 }
1524 pop();
1525 phase = Phase.IN_TABLE;
1526 continue;
1527 }
1528 case IN_SELECT:
1529 if ("html" == name) {
1530 err("Stray \u201Chtml\u201D start tag.");
1531 addAttributesToElement(stack[0].node, attributes);
1532 return;
1533 } else if ("option" == name) {
1534 if (isCurrent("option")) {
1535 pop();
1536 }
1537 appendToCurrentNodeAndPushElement(name, attributes);
1538 return;
1539 } else if ("optgroup" == name) {
1540 if (isCurrent("option")) {
1541 pop();
1542 }
1543 if (isCurrent("optgroup")) {
1544 pop();
1545 }
1546 appendToCurrentNodeAndPushElement(name, attributes);
1547 return;
1548 } else if ("select" == name) {
1549 err("\u201Cselect\u201D start tag where end tag expected.");
1550 int eltPos = findLastInTableScope(name);
1551 if (eltPos == NOT_FOUND_ON_STACK) {
1552 assert context != null;
1553 err("No \u201Cselect\u201D in table scope.");
1554 return;
1555 } else {
1556 while (currentPtr >= eltPos) {
1557 pop();
1558 }
1559 resetTheInsertionMode();
1560 return;
1561 }
1562 } else {
1563 err("Stray \u201C" + name + "\u201D start tag.");
1564 return;
1565 }
1566 case AFTER_BODY:
1567 if ("html" == name) {
1568 err("Stray \u201Chtml\u201D start tag.");
1569 addAttributesToElement(stack[0].node, attributes);
1570 return;
1571 } else {
1572 err("Stray \u201C" + name + "\u201D start tag.");
1573 if (conformingAndStreaming) {
1574 fatal();
1575 }
1576 phase = Phase.IN_BODY;
1577 continue;
1578 }
1579 case IN_FRAMESET:
1580 if ("frameset" == name) {
1581 appendToCurrentNodeAndPushElement(name, attributes);
1582 return;
1583 } else if ("frame" == name) {
1584 appendVoidElementToCurrentMayFoster(name, attributes);
1585 return;
1586 } else {
1587 // fall through to AFTER_FRAMESET
1588 }
1589 case AFTER_FRAMESET:
1590 if ("html" == name) {
1591 err("Stray \u201Chtml\u201D start tag.");
1592 addAttributesToElement(stack[0].node, attributes);
1593 return;
1594 } else if ("noframes" == name) {
1595 appendToCurrentNodeAndPushElement(name, attributes);
1596 cdataOrRcdataTimesToPop = 1;
1597 tokenizer.setContentModelFlag(ContentModelFlag.CDATA,
1598 name);
1599 return;
1600 } else {
1601 err("Stray \u201C" + name + "\u201D start tag.");
1602 return;
1603 }
1604 case INITIAL:
1605 /*
1606 * Parse error.
1607 */
1608 if (doctypeExpectation != DoctypeExpectation.NO_DOCTYPE_ERRORS) {
1609 err("Start tag seen without seeing a doctype first.");
1610 }
1611 /*
1612 *
1613 * Set the document to quirks mode.
1614 */
1615 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, false);
1616 /*
1617 * Then, switch to the root element phase of the tree
1618 * construction stage
1619 */
1620 phase = Phase.ROOT_ELEMENT;
1621 /*
1622 * and reprocess the current token.
1623 */
1624 continue;
1625 case ROOT_ELEMENT:
1626 // optimize error check and streaming SAX by hoisting
1627 // "html" handling here.
1628 if ("html" == name) {
1629 if (attributes.getLength() == 0) {
1630 // This has the right magic side effect that it
1631 // makes attributes in SAX Tree mutable.
1632 appendHtmlElementToDocumentAndPush();
1633 } else {
1634 appendHtmlElementToDocumentAndPush(attributes);
1635 }
1636 phase = Phase.BEFORE_HEAD;
1637 return;
1638 } else {
1639 /*
1640 * Create an HTMLElement node with the tag name html, in
1641 * the HTML namespace. Append it to the Document object.
1642 */
1643 appendHtmlElementToDocumentAndPush();
1644 /* Switch to the main phase */
1645 phase = Phase.BEFORE_HEAD;
1646 /*
1647 * reprocess the current token.
1648 *
1649 */
1650 continue;
1651 }
1652 case BEFORE_HEAD:
1653 if ("html" == name) {
1654 err("Stray \u201Chtml\u201D start tag.");
1655 addAttributesToElement(stack[0].node, attributes);
1656 return;
1657 } else if ("head" == name) {
1658 /*
1659 * A start tag whose tag name is "head"
1660 *
1661 * Create an element for the token.
1662 *
1663 * Set the head element pointer to this new element
1664 * node.
1665 *
1666 * Append the new element to the current node and push
1667 * it onto the stack of open elements.
1668 */
1669 appendToCurrentNodeAndPushHeadElement(attributes);
1670 /*
1671 *
1672 * Change the insertion mode to "in head".
1673 *
1674 */
1675 phase = Phase.IN_HEAD;
1676 return;
1677 }
1678
1679 /*
1680 * Any other start tag token
1681 */
1682
1683 /*
1684 * Act as if a start tag token with the tag name "head" and
1685 * no attributes had been seen,
1686 */
1687 appendToCurrentNodeAndPushHeadElement(EmptyAttributes.EMPTY_ATTRIBUTES);
1688 phase = Phase.IN_HEAD;
1689 /*
1690 * then reprocess the current token.
1691 *
1692 * This will result in an empty head element being
1693 * generated, with the current token being reprocessed in
1694 * the "after head" insertion mode.
1695 */
1696 continue;
1697 case AFTER_HEAD:
1698 if ("html" == name) {
1699 err("Stray \u201Chtml\u201D start tag.");
1700 addAttributesToElement(stack[0].node, attributes);
1701 return;
1702 } else if ("body" == name) {
1703 if (attributes.getLength() == 0) {
1704 // This has the right magic side effect that it
1705 // makes attributes in SAX Tree mutable.
1706 appendToCurrentNodeAndPushBodyElement();
1707 } else {
1708 appendToCurrentNodeAndPushBodyElement(attributes);
1709 }
1710 phase = Phase.IN_BODY;
1711 return;
1712 } else if ("frameset" == name) {
1713 appendToCurrentNodeAndPushElement(name, attributes);
1714 phase = Phase.IN_FRAMESET;
1715 return;
1716 } else if ("base" == name) {
1717 err("\u201Cbase\u201D element outside \u201Chead\u201D.");
1718 if (!nonConformingAndStreaming) {
1719 pushHeadPointerOntoStack();
1720 }
1721 appendVoidElementToCurrentMayFoster(name, attributes);
1722 if (!nonConformingAndStreaming) {
1723 pop(); // head
1724 }
1725 return;
1726 } else if ("link" == name) {
1727 err("\u201Clink\u201D element outside \u201Chead\u201D.");
1728 if (!nonConformingAndStreaming) {
1729 pushHeadPointerOntoStack();
1730 }
1731 appendVoidElementToCurrentMayFoster(name, attributes);
1732 if (!nonConformingAndStreaming) {
1733 pop(); // head
1734 }
1735 return;
1736 } else if ("meta" == name) {
1737 err("\u201Cmeta\u201D element outside \u201Chead\u201D.");
1738 // XXX do chaset stuff
1739 if (!nonConformingAndStreaming) {
1740 pushHeadPointerOntoStack();
1741 }
1742 appendVoidElementToCurrentMayFoster(name, attributes);
1743 if (!nonConformingAndStreaming) {
1744 pop(); // head
1745 }
1746 return;
1747 } else if ("script" == name) {
1748 err("\u201Cscript\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
1749 if (!nonConformingAndStreaming) {
1750 pushHeadPointerOntoStack();
1751 }
1752 appendToCurrentNodeAndPushElement(name, attributes);
1753 cdataOrRcdataTimesToPop = nonConformingAndStreaming ? 1
1754 : 2; // pops head
1755 tokenizer.setContentModelFlag(ContentModelFlag.CDATA,
1756 name);
1757 return;
1758 } else if ("style" == name) {
1759 err("\u201Cstyle\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
1760 if (!nonConformingAndStreaming) {
1761 pushHeadPointerOntoStack();
1762 }
1763 appendToCurrentNodeAndPushElement(name, attributes);
1764 cdataOrRcdataTimesToPop = nonConformingAndStreaming ? 1
1765 : 2; // pops head
1766 tokenizer.setContentModelFlag(ContentModelFlag.CDATA,
1767 name);
1768 return;
1769 } else if ("title" == name) {
1770 err("\u201Ctitle\u201D element outside \u201Chead\u201D.");
1771 if (!nonConformingAndStreaming) {
1772 pushHeadPointerOntoStack();
1773 }
1774 appendToCurrentNodeAndPushElement(name, attributes);
1775 cdataOrRcdataTimesToPop = nonConformingAndStreaming ? 1
1776 : 2; // pops head
1777 tokenizer.setContentModelFlag(ContentModelFlag.RCDATA,
1778 name);
1779 return;
1780 } else {
1781 appendToCurrentNodeAndPushBodyElement();
1782 phase = Phase.IN_BODY;
1783 continue;
1784 }
1785 case TRAILING_END:
1786 err("Stray \u201C" + name + "\u201D start tag.");
1787 if (conformingAndStreaming) {
1788 fatal();
1789 }
1790 phase = previousPhaseBeforeTrailingEnd;
1791 continue;
1792 }
1793 }
1794 }
1795
1796 public final void endTag(String name, Attributes attributes)
1797 throws SAXException {
1798 needToDropLF = false;
1799 if (cdataOrRcdataTimesToPop > 0) {
1800 while (cdataOrRcdataTimesToPop > 0) {
1801 pop();
1802 cdataOrRcdataTimesToPop--;
1803 }
1804 return;
1805 }
1806
1807 for (;;) {
1808 switch (phase) {
1809 case IN_ROW:
1810 if ("tr" == name) {
1811 int eltPos = findLastOrRoot("tr");
1812 if (eltPos == 0) {
1813 assert context != null;
1814 err("No table row to close.");
1815 return;
1816 }
1817 clearStackBackTo(eltPos);
1818 pop();
1819 phase = Phase.IN_TABLE_BODY;
1820 return;
1821 } else if ("table" == name) {
1822 int eltPos = findLastOrRoot("tr");
1823 if (eltPos == 0) {
1824 assert context != null;
1825 err("No table row to close.");
1826 return;
1827 }
1828 clearStackBackTo(eltPos);
1829 pop();
1830 phase = Phase.IN_TABLE_BODY;
1831 continue;
1832 } else if ("tbody" == name || "thead" == name || "tfoot" == name) {
1833 if (findLastInTableScope(name) == NOT_FOUND_ON_STACK) {
1834 err("Stray end tag \u201C" + name + "\u201D.");
1835 return;
1836 }
1837 int eltPos = findLastOrRoot("tr");
1838 if (eltPos == 0) {
1839 assert context != null;
1840 err("No table row to close.");
1841 return;
1842 }
1843 clearStackBackTo(eltPos);
1844 pop();
1845 phase = Phase.IN_TABLE_BODY;
1846 continue;
1847 } else if ("body" == name || "caption" == name || "col" == name || "colgroup" == name || "html" == name || "td" == name || "th" == name) {
1848 err("Stray end tag \u201C" + name + "\u201D.");
1849 return;
1850 } else {
1851 // fall through to IN_TABLE
1852 }
1853 case IN_TABLE_BODY:
1854 if ("tbody" == name || "tfoot" == name || "thead" == name) {
1855 int eltPos = findLastOrRoot(name);
1856 if (eltPos == 0) {
1857 err("Stray end tag \u201C" + name + "\u201D.");
1858 return;
1859 }
1860 clearStackBackTo(eltPos);
1861 pop();
1862 phase = Phase.IN_TABLE;
1863 return;
1864 } else if ("table" == name) {
1865 int eltPos = findLastInTableScopeOrRootTbodyTheadTfoot();
1866 if (eltPos == 0) {
1867 assert context != null;
1868 err("Stray end tag \u201Ctable\u201D.");
1869 return;
1870 }
1871 clearStackBackTo(eltPos);
1872 pop();
1873 phase = Phase.IN_TABLE;
1874 continue;
1875 } else if ("body" == name || "caption" == name || "col" == name || "colgroup" == name || "html" == name || "td" == name || "th" == name || "tr" == name) {
1876 err("Stray end tag \u201C" + name + "\u201D.");
1877 return;
1878 } else {
1879 // fall through to IN_TABLE
1880 }
1881 case IN_TABLE:
1882 if ("table" == name) {
1883 int eltPos = findLast("table");
1884 if (eltPos == NOT_FOUND_ON_STACK) {
1885 assert context != null;
1886 err("Stray end tag \u201Ctable\u201D.");
1887 return;
1888 }
1889 generateImpliedEndTags();
1890 if (currentPtr != eltPos) {
1891 err("There were unclosed elements.");
1892 }
1893 while (currentPtr >= eltPos) {
1894 pop();
1895 }
1896 resetTheInsertionMode();
1897 return;
1898 } else if ("body" == name || "caption" == name || "col" == name || "colgroup" == name || "html" == name || "tbody" == name || "td" == name || "tfoot" == name || "th" == name || "thead" == name || "tr" == name) {
1899 err("Stray end tag \u201C" + name + "\u201D.");
1900 return;
1901 } else {
1902 err("Stray end tag \u201C" + name + "\u201D.");
1903 // fall through to IN_BODY
1904 }
1905 case IN_CAPTION:
1906 if ("caption" == name) {
1907 int eltPos = findLastInTableScope("caption");
1908 if (eltPos == NOT_FOUND_ON_STACK) {
1909 return;
1910 }
1911 generateImpliedEndTags();
1912 if (currentPtr != eltPos) {
1913 err("Unclosed elements on stack.");
1914 }
1915 while (currentPtr >= eltPos) {
1916 pop();
1917 }
1918 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
1919 phase = Phase.IN_TABLE;
1920 return;
1921 } else if ("table" == name) {
1922 err("\u201Ctable\u201D closed but \u201Ccaption\u201D was still open.");
1923 int eltPos = findLastInTableScope("caption");
1924 if (eltPos == NOT_FOUND_ON_STACK) {
1925 return;
1926 }
1927 generateImpliedEndTags();
1928 if (currentPtr != eltPos) {
1929 err("Unclosed elements on stack.");
1930 }
1931 while (currentPtr >= eltPos) {
1932 pop();
1933 }
1934 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
1935 phase = Phase.IN_TABLE;
1936 continue;
1937 } else if ("body" == name || "col" == name || "colgroup" == name || "html" == name || "tbody" == name || "td" == name || "tfoot" == name || "th" == name || "thead" == name || "tr" == name) {
1938 err("Stray end tag \u201C" + name + "\u201D.");
1939 return;
1940 } else {
1941 // fall through to IN_BODY
1942 }
1943 case IN_CELL:
1944 if ("td" == name || "th" == name) {
1945 int eltPos = findLastInTableScope(name);
1946 if (eltPos == NOT_FOUND_ON_STACK) {
1947 err("Stray end tag \u201C" + name + "\u201D.");
1948 return;
1949 }
1950 generateImpliedEndTags();
1951 if (!isCurrent(name)) {
1952 err("Unclosed elements.");
1953 }
1954 while (currentPtr >= eltPos) {
1955 pop();
1956 }
1957 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
1958 phase = Phase.IN_ROW;
1959 return;
1960 } else if ("table" == name || "tbody" == name || "tfoot" == name || "thead" == name || "tr" == name) {
1961 if (findLastInTableScope(name) == NOT_FOUND_ON_STACK) {
1962 err("Stray end tag \u201C" + name + "\u201D.");
1963 return;
1964 }
1965 closeTheCell(findLastInTableScopeTdTh());
1966 continue;
1967 } else if ("body" == name || "caption" == name || "col" == name || "colgroup" == name || "html" == name) {
1968 err("Stray end tag \u201C" + name + "\u201D.");
1969 return;
1970 } else {
1971 // fall through to IN_BODY
1972 }
1973 case IN_BODY:
1974 if ("body" == name) {
1975 if (!isSecondOnStackBody()) {
1976 assert context != null;
1977 err("Stray end tag \u201Cbody\u201D.");
1978 return;
1979 }
1980 assert currentPtr >= 1;
1981 for (int i = 2; i <= currentPtr; i++) {
1982 String stackName = stack[i].name;
1983 if (!("dd" == stackName || "dt" == stackName || "li" == stackName
1984 || "p" == stackName)) {
1985 err("End tag for \u201Cbody\u201D seen but there were unclosed elements.");
1986 break;
1987 }
1988 }
1989 if (conformingAndStreaming) {
1990 while(currentPtr > 1) {
1991 pop();
1992 }
1993 }
1994 if (context == null) {
1995 bodyClosed(stack[1].node);
1996 }
1997 phase = Phase.AFTER_BODY;
1998 return;
1999 } else if ("html" == name) {
2000 if (!isSecondOnStackBody()) {
2001 assert context != null;
2002 err("Stray end tag \u201Chtml\u201D.");
2003 return;
2004 }
2005 for (int i = 0; i <= currentPtr; i++) {
2006 String stackName = stack[i].name;
2007 if (!("dd" == stackName || "dt" == stackName || "li" == stackName
2008 || "p" == stackName || "tbody" == stackName || "td" == stackName
2009 || "tfoot" == stackName || "th" == stackName || "thead" == stackName || "tr" == stackName || "body" == stackName || "html" == stackName)) {
2010 err("End tag for \u201Chtml\u201D seen but there were unclosed elements.");
2011 break;
2012 }
2013 }
2014 if (context == null) {
2015 bodyClosed(stack[1].node);
2016 }
2017 phase = Phase.AFTER_BODY;
2018 continue;
2019 } else if ("div" == name || "blockquote" == name
2020 || "ul" == name || "ol" == name || "pre" == name
2021 || "dl" == name || "fieldset" == name
2022 || "address" == name || "center" == name
2023 || "dir" == name || "listing" == name
2024 || "menu" == name) {
2025 int eltPos = findLastInScope(name);
2026 if (eltPos != NOT_FOUND_ON_STACK) {
2027 generateImpliedEndTags();
2028 }
2029 if (!isCurrent(name)) {
2030 err("End tag \u201C" + name + "\u201D seen but there were unclosed elements.");
2031 }
2032 while (currentPtr >= eltPos) {
2033 pop();
2034 }
2035 return;
2036 } else if ("form" == name) {
2037 int eltPos = findLastInScope(name);
2038 if (eltPos != NOT_FOUND_ON_STACK) {
2039 generateImpliedEndTags();
2040 }
2041 if (!isCurrent(name)) {
2042 err("End tag \u201Cform\u201D seen but there were unclosed elements.");
2043 } else {
2044 pop();
2045 }
2046 formPointer = null;
2047 return;
2048 } else if ("p" == name) {
2049 if (!isCurrent(name)) {
2050 err("End tag \u201Cp\u201D seen but there were unclosed elements.");
2051 }
2052 int eltPos = findLastInScope(name);
2053 if (eltPos != NOT_FOUND_ON_STACK) {
2054 while (currentPtr >= eltPos) {
2055 pop();
2056 }
2057 } else {
2058 appendVoidElementToCurrentMayFoster(name, EmptyAttributes.EMPTY_ATTRIBUTES);
2059 }
2060 return;
2061 } else if ("dd" == name || "dt" == name || "li" == name) {
2062 int eltPos = findLastInScope(name);
2063 if (eltPos != NOT_FOUND_ON_STACK) {
2064 generateImpliedEndTagsExceptFor(name);
2065 }
2066 if (!isCurrent(name)) {
2067 err("End tag \u201C" + name + "\u201D seen but there were unclosed elements.");
2068 }
2069 while (currentPtr >= eltPos) {
2070 pop();
2071 }
2072 return;
2073 } else if ("h1" == name || "h2" == name || "h3" == name
2074 || "h4" == name || "h5" == name || "h6" == name) {
2075 int eltPos = findLastInScopeHn();
2076 if (eltPos != NOT_FOUND_ON_STACK) {
2077 generateImpliedEndTags();
2078 }
2079 if (!isCurrent(name)) {
2080 err("End tag \u201C" + name + "\u201D seen but there were unclosed elements.");
2081 }
2082 while (currentPtr >= eltPos) {
2083 pop();
2084 }
2085 return;
2086 } else if ("a" == name || "b" == name || "big" == name || "em" == name || "font" == name || "i" == name || "nobr" == name || "s" == name || "small" == name || "strike" == name || "strong" == name || "tt" == name || "u" == name) {
2087 adoptionAgencyEndTag(name);
2088 return;
2089 } else if ("button" == name || "marquee" == name || "object" == name) {
2090 int eltPos = findLastInScope(name);
2091 if (eltPos != NOT_FOUND_ON_STACK) {
2092 generateImpliedEndTags();
2093 }
2094 if (!isCurrent(name)) {
2095 err("End tag \u201C" + name + "\u201D seen but there were unclosed elements.");
2096 }
2097 while (currentPtr >= eltPos) {
2098 pop();
2099 }
2100 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
2101 return;
2102 } else if ("br" == name) {
2103 err("End tag \u201Cbr\u201D.");
2104 reconstructTheActiveFormattingElements();
2105 appendVoidElementToCurrentMayFoster(name, EmptyAttributes.EMPTY_ATTRIBUTES);
2106 return;
2107 } else if ("area" == name || "basefont" == name || "bgsound" == name || "embed" == name || "hr" == name || "iframe" == name || "image" == name || "img" == name || "input" == name || "isindex" == name || "noembed" == name || "noframes" == name || "param" == name || "select" == name || "spacer" == name || "table" == name || "textarea" == name || "wbr" == name || (scriptingEnabled && "noscript" == name)) {
2108 err("Stray end tag \u201C" + name + "\u201D.");
2109 return;
2110 } else {
2111 if (isCurrent(name)) {
2112 pop();
2113 return;
2114 }
2115 for(;;) {
2116 generateImpliedEndTags();
2117 if (isCurrent(name)) {
2118 pop();
2119 return;
2120 }
2121 StackNode<T> node = stack[currentPtr];
2122 if (!(node.scoping || node.special)) {
2123 err("Unclosed element \u201C" + node.name
2124 + "\u201D.");
2125 pop();
2126 } else {
2127 return;
2128 }
2129 }
2130 }
2131 case IN_COLUMN_GROUP:
2132 if ("colgroup" == name) {
2133 if (currentPtr == 0) {
2134 assert context != null;
2135 err("Garbage in \u201Ccolgroup\u201D fragment.");
2136 return;
2137 }
2138 pop();
2139 phase = Phase.IN_TABLE;
2140 return;
2141 } else if ("col" == name) {
2142 err("Stray end tag \u201Ccol\u201D.");
2143 return;
2144 } else {
2145 if (currentPtr == 0) {
2146 assert context != null;
2147 err("Garbage in \u201Ccolgroup\u201D fragment.");
2148 return;
2149 }
2150 pop();
2151 phase = Phase.IN_TABLE;
2152 continue;
2153 }
2154 case IN_SELECT:
2155 if ("option" == name) {
2156 if (isCurrent("option")) {
2157 pop();
2158 return;
2159 } else {
2160 err("Stray end tag \u201Coption\u201D");
2161 return;
2162 }
2163 } else if ("optgroup" == name) {
2164 if (isCurrent("option") && "optgroup" == stack[currentPtr - 1].name) {
2165 pop();
2166 }
2167 if (isCurrent("optgroup")) {
2168 pop();
2169 } else {
2170 err("Stray end tag \u201Coptgroup\u201D");
2171 }
2172 return;
2173 } else if ("select" == name) {
2174 int eltPos = findLastInTableScope("select");
2175 if (eltPos == NOT_FOUND_ON_STACK) {
2176 assert context != null;
2177 err("Stray end tag \u201Cselect\u201D");
2178 return;
2179 }
2180 while (currentPtr >= eltPos) {
2181 pop();
2182 }
2183 resetTheInsertionMode();
2184 return;
2185 } else {
2186 err("Stray end tag \u201C" + name + "\u201D");
2187 return;
2188 }
2189 case AFTER_BODY:
2190 if ("html" == name) {
2191 if (context != null) {
2192 err("Stray end tag \u201Chtml\u201D");
2193 return;
2194 } else {
2195 previousPhaseBeforeTrailingEnd = Phase.AFTER_BODY;
2196 if (context == null) {
2197 htmlClosed(stack[0].node);
2198 }
2199 phase = Phase.TRAILING_END;
2200 return;
2201 }
2202 } else {
2203 err("Saw an end tag after \u201Cbody\u201D had been closed.");
2204 if (conformingAndStreaming) {
2205 fatal();
2206 }
2207 phase = Phase.IN_BODY;
2208 continue;
2209 }
2210 case IN_FRAMESET:
2211 if ("frameset" == name) {
2212 if (currentPtr == 0) {
2213 assert context != null;
2214 err("Stray end tag \u201Cframeset\u201D");
2215 return;
2216 }
2217 pop();
2218 if ((context == null) && !isCurrent("frameset")) {
2219 phase = Phase.AFTER_FRAMESET;
2220 }
2221 return;
2222 } else {
2223 err("Stray end tag \u201C" + name + "\u201D");
2224 return;
2225 }
2226 case AFTER_FRAMESET:
2227 if ("html" == name) {
2228 previousPhaseBeforeTrailingEnd = Phase.AFTER_FRAMESET;
2229 if (context == null) {
2230 htmlClosed(stack[0].node);
2231 }
2232 phase = Phase.TRAILING_END;
2233 return;
2234 } else {
2235 err("Stray end tag \u201C" + name + "\u201D");
2236 return;
2237 }
2238 case INITIAL:
2239 /*
2240 * Parse error.
2241 */
2242 if (doctypeExpectation != DoctypeExpectation.NO_DOCTYPE_ERRORS) {
2243 err("End tag seen without seeing a doctype first.");
2244 }
2245 /*
2246 *
2247 * Set the document to quirks mode.
2248 */
2249 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, false);
2250 /*
2251 * Then, switch to the root element phase of the tree
2252 * construction stage
2253 */
2254 phase = Phase.ROOT_ELEMENT;
2255 /*
2256 * and reprocess the current token.
2257 */
2258 continue;
2259 case ROOT_ELEMENT:
2260 /*
2261 * Create an HTMLElement node with the tag name html, in the
2262 * HTML namespace. Append it to the Document object.
2263 */
2264 appendHtmlElementToDocumentAndPush();
2265 /* Switch to the main phase */
2266 phase = Phase.BEFORE_HEAD;
2267 /*
2268 * reprocess the current token.
2269 *
2270 */
2271 continue;
2272 case BEFORE_HEAD:
2273 if ("head" == name || "body" == name || "html" == name || "p" == name || "br" == name) {
2274 appendToCurrentNodeAndPushHeadElement(EmptyAttributes.EMPTY_ATTRIBUTES);
2275 phase = Phase.IN_HEAD;
2276 continue;
2277 } else {
2278 err("Stray end tag \u201C" + name + "\u201D.");
2279 return;
2280 }
2281 case IN_HEAD:
2282 if ("head" == name) {
2283 pop();
2284 phase = Phase.AFTER_HEAD;
2285 return;
2286 } else if ("body" == name || "html" == name || "p" == name || "br" == name) {
2287 pop();
2288 phase = Phase.AFTER_HEAD;
2289 continue;
2290 } else {
2291 err("Stray end tag \u201C" + name + "\u201D.");
2292 return;
2293 }
2294 case IN_HEAD_NOSCRIPT:
2295 if ("noscript" == name) {
2296 pop();
2297 phase = Phase.IN_HEAD;
2298 return;
2299 } else if ("p" == name || "br" == name) {
2300 err("Stray end tag \u201C" + name + "\u201D.");
2301 pop();
2302 phase = Phase.IN_HEAD;
2303 continue;
2304 } else {
2305 err("Stray end tag \u201C" + name + "\u201D.");
2306 return;
2307 }
2308 case AFTER_HEAD:
2309 appendToCurrentNodeAndPushBodyElement();
2310 phase = Phase.IN_BODY;
2311 continue;
2312 case TRAILING_END:
2313 err("Stray \u201C" + name + "\u201D end tag.");
2314 if (conformingAndStreaming) {
2315 fatal();
2316 }
2317 phase = previousPhaseBeforeTrailingEnd;
2318 continue;
2319 }
2320 }
2321 }
2322
2323 private int findLastInTableScopeOrRootTbodyTheadTfoot() {
2324 for (int i = currentPtr; i > 0; i--) {
2325 if (stack[i].name == "tbody" || stack[i].name == "thead" || stack[i].name == "tfoot") {
2326 return i;
2327 }
2328 }
2329 return 0;
2330 }
2331
2332 private int findLast(String name) {
2333 for (int i = currentPtr; i > 0; i--) {
2334 if (stack[i].name == name) {
2335 return i;
2336 }
2337 }
2338 return NOT_FOUND_ON_STACK;
2339 }
2340
2341 private int findLastInTableScope(String name) {
2342 for (int i = currentPtr; i > 0; i--) {
2343 if (stack[i].name == name) {
2344 return i;
2345 } else if (stack[i].name == "table") {
2346 return NOT_FOUND_ON_STACK;
2347 }
2348 }
2349 return NOT_FOUND_ON_STACK;
2350 }
2351
2352 private int findLastInScope(String name) {
2353 for (int i = currentPtr; i > 0; i--) {
2354 if (stack[i].name == name) {
2355 return i;
2356 } else if (stack[i].scoping) {
2357 return NOT_FOUND_ON_STACK;
2358 }
2359 }
2360 return NOT_FOUND_ON_STACK;
2361 }
2362
2363 private int findLastInScopeHn() {
2364 for (int i = currentPtr; i > 0; i--) {
2365 String name = stack[i].name;
2366 if ("h1" == name || "h2" == name || "h3" == name || "h4" == name
2367 || "h5" == name || "h6" == name) {
2368 return i;
2369 } else if (stack[i].scoping) {
2370 return NOT_FOUND_ON_STACK;
2371 }
2372 }
2373 return NOT_FOUND_ON_STACK;
2374 }
2375
2376 private void generateImpliedEndTagsExceptFor(String name) throws SAXException {
2377 for (;;) {
2378 String stackName = stack[currentPtr].name;
2379 if (name != stackName && ("p" == stackName || "li" == stackName || "dd" == stackName || "dt" == stackName)) {
2380 pop();
2381 } else {
2382 return;
2383 }
2384 }
2385 }
2386
2387 private void generateImpliedEndTags() throws SAXException {
2388 for (;;) {
2389 String stackName = stack[currentPtr].name;
2390 if ("p" == stackName || "li" == stackName || "dd" == stackName || "dt" == stackName) {
2391 pop();
2392 } else {
2393 return;
2394 }
2395 }
2396 }
2397
2398 private boolean isSecondOnStackBody() {
2399 return currentPtr >= 1 && stack[1].name == "body";
2400 }
2401
2402 private void documentModeInternal(DocumentMode mode, String publicIdentifier,
2403 String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) throws SAXException {
2404 if (documentModeHandler != null) {
2405 documentModeHandler.documentMode(mode, publicIdentifier,
2406 systemIdentifier, html4SpecificAdditionalErrorChecks);
2407 }
2408 documentMode(mode, publicIdentifier,
2409 systemIdentifier, html4SpecificAdditionalErrorChecks);
2410 }
2411
2412 private boolean isAlmostStandards(String publicIdentifierLC,
2413 String systemIdentifierLC) {
2414 if ("-//w3c//dtd xhtml 1.0 transitional//en".equals(publicIdentifierLC)) {
2415 return true;
2416 }
2417 if ("-//w3c//dtd xhtml 1.0 frameset//en".equals(publicIdentifierLC)) {
2418 return true;
2419 }
2420 if (systemIdentifierLC != null) {
2421 if ("-//w3c//dtd html 4.01 transitional//en".equals(publicIdentifierLC)) {
2422 return true;
2423 }
2424 if ("-//w3c//dtd html 4.01 frameset//en".equals(publicIdentifierLC)) {
2425 return true;
2426 }
2427 }
2428 return false;
2429 }
2430
2431 private boolean isQuirky(String name, String publicIdentifierLC,
2432 String systemIdentifierLC, boolean correct) {
2433 if (!correct) {
2434 return true;
2435 }
2436 if (!"HTML".equalsIgnoreCase(name)) {
2437 return true;
2438 }
2439 if (publicIdentifierLC != null
2440 && (Arrays.binarySearch(QUIRKY_PUBLIC_IDS, publicIdentifierLC) > -1)) {
2441 return true;
2442 }
2443 if (systemIdentifierLC == null) {
2444 if ("-//w3c//dtd html 4.01 transitional//en".equals(publicIdentifierLC)) {
2445 return true;
2446 } else if ("-//w3c//dtd html 4.01 frameset//en".equals(publicIdentifierLC)) {
2447 return true;
2448 }
2449 } else if ("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd".equals(systemIdentifierLC)) {
2450 return true;
2451 }
2452 return false;
2453 }
2454
2455 private String toAsciiLowerCase(String str) {
2456 if (str == null) {
2457 return null;
2458 }
2459 char[] buf = new char[str.length()];
2460 for (int i = 0; i < str.length(); i++) {
2461 char c = str.charAt(i);
2462 if (c >= 'A' && c <= 'Z') {
2463 c += 0x20;
2464 }
2465 buf[i] = c;
2466 }
2467 return new String(buf);
2468 }
2469
2470 private void closeTheCell(int eltPos) throws SAXException {
2471 generateImpliedEndTags();
2472 if (eltPos != currentPtr) {
2473 err("Unclosed elements.");
2474 }
2475 while (currentPtr >= eltPos) {
2476 pop();
2477 }
2478 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
2479 phase = Phase.IN_ROW;
2480 return;
2481 }
2482
2483 private int findLastInTableScopeTdTh() {
2484 for (int i = currentPtr; i > 0; i--) {
2485 String name = stack[i].name;
2486 if ("td" == name || "th" == name) {
2487 return i;
2488 } else if (name == "table") {
2489 return NOT_FOUND_ON_STACK;
2490 }
2491 }
2492 return NOT_FOUND_ON_STACK;
2493 }
2494
2495 private void clearStackBackTo(int eltPos) throws SAXException {
2496 if (eltPos != currentPtr) {
2497 err("Unclosed elements.");
2498 while(currentPtr > eltPos) { // > not >= intentional
2499 pop();
2500 }
2501 }
2502 }
2503
2504 private void resetTheInsertionMode() {
2505 String name;
2506 for (int i = currentPtr; i >= 0; i--) {
2507 name = stack[i].name;
2508 if (i == 0) {
2509 if (!(context == "td" || context == "th")) {
2510 name = context;
2511 }
2512 }
2513 if ("select" == name) {
2514 phase = Phase.IN_SELECT;
2515 return;
2516 } else if ("td" == name || "th" == name) {
2517 phase = Phase.IN_CELL;
2518 return;
2519 } else if ("tr" == name) {
2520 phase = Phase.IN_ROW;
2521 return;
2522 } else if ("tbody" == name || "thead" == name || "tfoot" == name) {
2523 phase = Phase.IN_TABLE_BODY;
2524 return;
2525 } else if ("caption" == name) {
2526 phase = Phase.IN_CAPTION;
2527 return;
2528 } else if ("colgroup" == name) {
2529 phase = Phase.IN_COLUMN_GROUP;
2530 return;
2531 } else if ("table" == name) {
2532 phase = Phase.IN_TABLE;
2533 return;
2534 } else if ("head" == name) {
2535 phase = Phase.IN_BODY; // really
2536 return;
2537 } else if ("body" == name) {
2538 phase = Phase.IN_BODY;
2539 return;
2540 } else if ("frameset" == name) {
2541 phase = Phase.IN_FRAMESET;
2542 return;
2543 } else if ("html" == name) {
2544 if (headPointer == null) {
2545 phase = Phase.BEFORE_HEAD;
2546 } else {
2547 phase = Phase.AFTER_HEAD;
2548 }
2549 return;
2550 } else if (i == 0) {
2551 phase = Phase.IN_BODY;
2552 return;
2553 }
2554 }
2555 }
2556
2557 /**
2558 * @throws SAXException
2559 *
2560 */
2561 private void implicitlyCloseP() throws SAXException {
2562 int eltPos = findLastInScope("p");
2563 if (eltPos == NOT_FOUND_ON_STACK) {
2564 return;
2565 }
2566 if (currentPtr != eltPos) {
2567 err("Unclosed elements.");
2568 }
2569 while (currentPtr >= eltPos) {
2570 pop();
2571 }
2572 }
2573
2574 private boolean clearLastStackSlot() {
2575 stack[currentPtr] = null;
2576 return true;
2577 }
2578
2579 private boolean clearLastListSlot() {
2580 listOfActiveFormattingElements[listPtr] = null;
2581 return true;
2582 }
2583
2584 private void push(StackNode<T> node) throws SAXException {
2585 currentPtr++;
2586 if (currentPtr == stack.length) {
2587 StackNode<T>[] newStack = new StackNode[stack.length + 64];
2588 System.arraycopy(stack, 0, newStack, 0, stack.length);
2589 stack = newStack;
2590 }
2591 stack[currentPtr] = node;
2592 elementPushed(node.name, node.node);
2593 }
2594
2595 private void append(StackNode<T> node) {
2596 listPtr++;
2597 if (listPtr == listOfActiveFormattingElements.length) {
2598 StackNode<T>[] newList = new StackNode[listOfActiveFormattingElements.length + 64];
2599 System.arraycopy(listOfActiveFormattingElements, 0, newList, 0, listOfActiveFormattingElements.length);
2600 listOfActiveFormattingElements = newList;
2601 }
2602 listOfActiveFormattingElements[listPtr] = node;
2603 }
2604
2605 private void insertMarker() {
2606 append(MARKER);
2607 }
2608
2609 private void clearTheListOfActiveFormattingElementsUpToTheLastMarker() {
2610 while (listPtr > -1) {
2611 if (listOfActiveFormattingElements[listPtr--] == MARKER) {
2612 return;
2613 }
2614 }
2615 }
2616
2617 private boolean isCurrent(String name) {
2618 return name == stack[currentPtr].name;
2619 }
2620
2621 private void removeFromStack(int pos) throws SAXException {
2622 if (currentPtr == pos) {
2623 pop();
2624 } else {
2625 if (conformingAndStreaming) {
2626 fatal();
2627 } else if (nonConformingAndStreaming) {
2628 throw new UnsupportedOperationException();
2629 } else {
2630 System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos);
2631 assert clearLastStackSlot();
2632 currentPtr--;
2633 }
2634 }
2635 }
2636
2637 private void removeFromStack(StackNode<T> node) throws SAXException {
2638 if (stack[currentPtr] == node) {
2639 pop();
2640 } else {
2641 int pos = currentPtr - 1;
2642 while (pos >= 0 && stack[pos] != node) {
2643 pos--;
2644 }
2645 if (pos == -1) {
2646 // dead code?
2647 return;
2648 }
2649 if (conformingAndStreaming) {
2650 fatal();
2651 } else if (nonConformingAndStreaming) {
2652 throw new UnsupportedOperationException();
2653 } else {
2654 System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos);
2655 currentPtr--;
2656 }
2657 }
2658 }
2659
2660 private void removeFromListOfActiveFormattingElements(int pos) {
2661 if (pos == listPtr) {
2662 assert clearLastListSlot();
2663 listPtr--;
2664 return;
2665 }
2666 assert pos < listPtr;
2667 System.arraycopy(listOfActiveFormattingElements, pos + 1, listOfActiveFormattingElements, pos, listPtr - pos);
2668 assert clearLastListSlot();
2669 listPtr--;
2670 }
2671
2672 private void adoptionAgencyEndTag(String name) throws SAXException {
2673 flushCharacters();
2674 for (;;) {
2675 int formattingEltListPos = listPtr;
2676 while (formattingEltListPos > -1) {
2677 String listName = listOfActiveFormattingElements[formattingEltListPos].name;
2678 if (listName == name) {
2679 break;
2680 } else if (listName == null) {
2681 formattingEltListPos = -1;
2682 break;
2683 }
2684 formattingEltListPos--;
2685 }
2686 if (formattingEltListPos == -1) {
2687 err("No element \u201C" + name + "\u201D to close.");
2688 return;
2689 }
2690 StackNode<T> formattingElt = listOfActiveFormattingElements[formattingEltListPos];
2691 int formattingEltStackPos = currentPtr;
2692 boolean inScope = true;
2693 while (formattingEltStackPos > -1) {
2694 StackNode<T> node = stack[formattingEltStackPos];
2695 if (node == formattingElt) {
2696 break;
2697 } else if (node.scoping) {
2698 inScope = false;
2699 }
2700 formattingEltStackPos--;
2701 }
2702 if (formattingEltStackPos == -1) {
2703 err("No element \u201C" + name + "\u201D to close.");
2704 removeFromListOfActiveFormattingElements(formattingEltListPos);
2705 return;
2706 }
2707 if (!inScope) {
2708 err("No element \u201C" + name + "\u201D to close.");
2709 return;
2710 }
2711 // stackPos now points to the formatting element and it is in scope
2712 if (formattingEltStackPos != currentPtr) {
2713 err("End tag \u201C" + name + "\u201D violates nesting rules.");
2714 }
2715 int furthestBlockPos = formattingEltStackPos + 1;
2716 while (furthestBlockPos <= currentPtr) {
2717 StackNode<T> node = stack[furthestBlockPos];
2718 if (node.scoping || node.special) {
2719 break;
2720 }
2721 furthestBlockPos++;
2722 }
2723 if (furthestBlockPos > currentPtr) {
2724 // no furthest block
2725 while (currentPtr >= formattingEltStackPos) {
2726 pop();
2727 }
2728 removeFromListOfActiveFormattingElements(formattingEltListPos);
2729 return;
2730 }
2731 StackNode<T> commonAncestor = stack[formattingEltStackPos - 1];
2732 StackNode<T> furthestBlock = stack[furthestBlockPos];
2733 detachFromParent(furthestBlock.node);
2734 int bookmark = formattingEltListPos;
2735 int nodePos = furthestBlockPos;
2736 StackNode<T> lastNode = furthestBlock;
2737 for(;;) {
2738 nodePos--;
2739 StackNode<T> node = stack[nodePos];
2740 int nodeListPos = findInListOfActiveFormattingElements(node);
2741 if (nodeListPos == -1) {
2742 assert formattingEltStackPos < nodePos;
2743 assert bookmark < nodePos;
2744 assert furthestBlockPos > nodePos;
2745 removeFromStack(nodePos);
2746 furthestBlockPos--;
2747 continue;
2748 }
2749 if (nodePos == formattingEltStackPos) {
2750 break;
2751 }
2752 if (nodePos == furthestBlockPos) {
2753 bookmark = nodeListPos + 1;
2754 }
2755 if (hasChildren(node.node)) {
2756 assert node == listOfActiveFormattingElements[nodeListPos];
2757 assert node == stack[nodePos];
2758 T clone = shallowClone(node.node);
2759 node = new StackNode<T>(node.name, clone, node.scoping, node.special, node.fosterParenting);
2760 listOfActiveFormattingElements[nodeListPos] = node;
2761 stack[nodePos] = node;
2762 }
2763 detachFromParentAndAppendToNewParent(lastNode.node, node.node);
2764 lastNode = node;
2765 }
2766 detachFromParentAndAppendToNewParent(lastNode.node, commonAncestor.node);
2767 T clone = shallowClone(formattingElt.node);
2768 StackNode<T> formattingClone = new StackNode<T>(formattingElt.name, clone, formattingElt.scoping, formattingElt.special, formattingElt.fosterParenting);
2769 appendChildrenToNewParent(furthestBlock.node, clone);
2770 detachFromParentAndAppendToNewParent(clone, furthestBlock.node);
2771 removeFromListOfActiveFormattingElements(formattingEltListPos);
2772 insertIntoListOfActiveFormattingElements(formattingClone, bookmark);
2773 assert formattingEltStackPos < furthestBlockPos;
2774 removeFromStack(formattingEltStackPos);
2775 // furthestBlockPos is now off by one and points to the slot after it
2776 insertIntoStack(formattingClone, furthestBlockPos);
2777 }
2778 }
2779
2780 private void insertIntoStack(StackNode<T> node, int position) throws SAXException {
2781 assert currentPtr + 1 < stack.length;
2782 assert position <= currentPtr + 1;
2783 if (position == currentPtr + 1) {
2784 flushCharacters();
2785 push(node);
2786 } else {
2787 System.arraycopy(stack, position, stack, position + 1, (currentPtr - position) + 1);
2788 currentPtr++;
2789 stack[position] = node;
2790 }
2791 }
2792
2793 private void insertIntoListOfActiveFormattingElements(StackNode<T> formattingClone, int bookmark) {
2794 assert listPtr + 1 < listOfActiveFormattingElements.length;
2795 if (bookmark <= listPtr) {
2796 System.arraycopy(listOfActiveFormattingElements, bookmark, listOfActiveFormattingElements, bookmark + 1, (listPtr - bookmark) + 1);
2797 }
2798 listPtr++;
2799 listOfActiveFormattingElements[bookmark] = formattingClone;
2800 }
2801
2802 private int findInListOfActiveFormattingElements(StackNode<T> node) {
2803 for (int i = listPtr; i >= 0; i--) {
2804 if (node == listOfActiveFormattingElements[i]) {
2805 return i;
2806 }
2807 }
2808 return -1;
2809 }
2810
2811 private int findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker(
2812 String name) {
2813 for (int i = listPtr; i >= 0; i--) {
2814 StackNode<T> node = listOfActiveFormattingElements[i];
2815 if (node.name == name) {
2816 return i;
2817 } else if (node == MARKER) {
2818 return -1;
2819 }
2820 }
2821 return -1;
2822 }
2823
2824 private int findDdOrDtToPop() {
2825 for (int i = currentPtr; i >= 0; i--) {
2826 StackNode<T> node = stack[i];
2827 if ("dd" == node.name || "dt" == node.name) {
2828 return i;
2829 } else if ((node.scoping || node.special) && !("div" == node.name || "address" == node.name)) {
2830 return NOT_FOUND_ON_STACK;
2831 }
2832 }
2833 return NOT_FOUND_ON_STACK;
2834 }
2835
2836 private int findLiToPop() {
2837 for (int i = currentPtr; i >= 0; i--) {
2838 StackNode<T> node = stack[i];
2839 if ("li" == node.name) {
2840 return i;
2841 } else if ((node.scoping || node.special) && !("div" == node.name || "address" == node.name)) {
2842 return NOT_FOUND_ON_STACK;
2843 }
2844 }
2845 return NOT_FOUND_ON_STACK;
2846 }
2847
2848 private int findLastOrRoot(String name) {
2849 for (int i = currentPtr; i > 0; i--) {
2850 if (stack[i].name == name) {
2851 return i;
2852 }
2853 }
2854 return 0;
2855 }
2856
2857 private void addAttributesToBody(Attributes attributes) throws SAXException {
2858 if (currentPtr >= 1) {
2859 StackNode<T> body = stack[1];
2860 if (body.name == "body") {
2861 addAttributesToElement(body.node, attributes);
2862 }
2863 }
2864 }
2865
2866 private void pushHeadPointerOntoStack() throws SAXException {
2867 flushCharacters();
2868 if (conformingAndStreaming) {
2869 fatal();
2870 }
2871 if (headPointer == null) {
2872 assert context != null;
2873 push(stack[currentPtr]);
2874 } else {
2875 push(new StackNode<T>("head", headPointer));
2876 }
2877 }
2878
2879 /**
2880 * @throws SAXException
2881 *
2882 */
2883 private void reconstructTheActiveFormattingElements() throws SAXException {
2884 if (listPtr == -1) {
2885 return;
2886 }
2887 StackNode<T> mostRecent = listOfActiveFormattingElements[listPtr];
2888 if (mostRecent == MARKER || isInStack(mostRecent)) {
2889 return;
2890 }
2891 int entryPos = listPtr;
2892 for(;;) {
2893 entryPos--;
2894 if (entryPos == -1) {
2895 break;
2896 }
2897 if (listOfActiveFormattingElements[entryPos] == MARKER) {
2898 break;
2899 }
2900 if (isInStack(listOfActiveFormattingElements[entryPos])) {
2901 break;
2902 }
2903 }
2904 if (entryPos < listPtr) {
2905 flushCharacters();
2906 }
2907 while (entryPos < listPtr) {
2908 entryPos++;
2909 StackNode<T> entry = listOfActiveFormattingElements[entryPos];
2910 T clone = shallowClone(entry.node);
2911 StackNode<T> entryClone = new StackNode<T>(entry.name, clone, entry.scoping, entry.special, entry.fosterParenting);
2912 StackNode<T> currentNode = stack[currentPtr];
2913 if (currentNode.fosterParenting) {
2914 insertIntoFosterParent(clone);
2915 } else {
2916 detachFromParentAndAppendToNewParent(clone, currentNode.node);
2917 }
2918 push(entryClone);
2919 listOfActiveFormattingElements[entryPos] = entryClone;
2920 }
2921 }
2922
2923 private void insertIntoFosterParent(T child) throws SAXException {
2924 int eltPos = findLastOrRoot("table");
2925 T elt = stack[eltPos].node;
2926 if (eltPos == 0) {
2927 detachFromParentAndAppendToNewParent(child, elt);
2928 return;
2929 }
2930 T parent = parentElementFor(elt);
2931 if (parent == null) {
2932 detachFromParentAndAppendToNewParent(child, stack[eltPos - 1].node);
2933 } else {
2934 insertBefore(child, elt, parent);
2935 }
2936 }
2937
2938 private boolean isInStack(StackNode<T> node) {
2939 for (int i = currentPtr; i >= 0; i--) {
2940 if (stack[i] == node) {
2941 return true;
2942 }
2943 }
2944 return false;
2945 }
2946
2947 private void pop() throws SAXException {
2948 flushCharacters();
2949 StackNode<T> node = stack[currentPtr];
2950 assert clearLastStackSlot();
2951 currentPtr--;
2952 elementPopped(node.name, node.node);
2953 }
2954
2955 private void appendCharMayFoster(char[] buf, int i) throws SAXException {
2956 StackNode<T> current = stack[currentPtr];
2957 if (current.fosterParenting) {
2958 if (conformingAndStreaming) {
2959 fatal();
2960 } else if (nonConformingAndStreaming) {
2961 return;
2962 } else {
2963 int eltPos = findLastOrRoot("table");
2964 T elt = stack[eltPos].node;
2965 if (eltPos == 0) {
2966 appendCharacters(elt, buf, i, 1);
2967 return;
2968 }
2969 T parent = parentElementFor(elt);
2970 if (parent == null) {
2971 appendCharacters(stack[eltPos - 1].node, buf, i, 1);
2972 } else {
2973 insertCharactersBefore(buf, i, 1, elt, parent);
2974 }
2975 }
2976 } else {
2977 accumulateCharacters(buf, i, 1);
2978 }
2979 }
2980
2981 private void appendHtmlElementToDocumentAndPush(Attributes attributes) throws SAXException {
2982 T elt = createHtmlElementSetAsRoot(attributes);
2983 StackNode<T> node = new StackNode<T>("html", elt);
2984 push(node);
2985 }
2986
2987 private void appendHtmlElementToDocumentAndPush() throws SAXException {
2988 appendHtmlElementToDocumentAndPush(tokenizer.newAttributes());
2989 }
2990
2991 private void appendToCurrentNodeAndPushHeadElement(
2992 Attributes attributes) throws SAXException {
2993 flushCharacters();
2994 T elt = createElement("head", attributes);
2995 detachFromParentAndAppendToNewParent(elt, stack[currentPtr].node);
2996 headPointer = elt;
2997 StackNode<T> node = new StackNode<T>("head", elt);
2998 push(node);
2999 }
3000
3001 private void appendToCurrentNodeAndPushBodyElement(
3002 Attributes attributes) throws SAXException {
3003 appendToCurrentNodeAndPushElement("body", attributes);
3004 }
3005
3006 private void appendToCurrentNodeAndPushBodyElement() throws SAXException {
3007 appendToCurrentNodeAndPushBodyElement(tokenizer.newAttributes());
3008 }
3009
3010 private void appendToCurrentNodeAndPushFormElementMayFoster(Attributes attributes) throws SAXException {
3011 flushCharacters();
3012 T elt = createElement("form", attributes);
3013 formPointer = elt;
3014 StackNode<T> current = stack[currentPtr];
3015 if (current.fosterParenting) {
3016 if (conformingAndStreaming) {
3017 fatal();
3018 } else if (nonConformingAndStreaming) {
3019 return;
3020 } else {
3021 insertIntoFosterParent(elt);
3022 }
3023 } else {
3024 detachFromParentAndAppendToNewParent(elt, current.node);
3025 }
3026 StackNode<T> node = new StackNode<T>("form", elt);
3027 push(node);
3028 }
3029
3030 private void appendToCurrentNodeAndPushFormattingElementMayFoster(String name,
3031 Attributes attributes) throws SAXException {
3032 flushCharacters();
3033 T elt = createElement(name, attributes, formPointer);
3034 StackNode<T> current = stack[currentPtr];
3035 if (current.fosterParenting) {
3036 if (conformingAndStreaming) {
3037 fatal();
3038 } else if (nonConformingAndStreaming) {
3039 return;
3040 } else {
3041 insertIntoFosterParent(elt);
3042 }
3043 } else {
3044 detachFromParentAndAppendToNewParent(elt, current.node);
3045 }
3046 StackNode<T> node = new StackNode<T>(name, elt);
3047 push(node);
3048 append(node);
3049 }
3050
3051 private void appendToCurrentNodeAndPushElement(String name,
3052 Attributes attributes) throws SAXException {
3053 flushCharacters();
3054 T elt = createElement(name, attributes);
3055 detachFromParentAndAppendToNewParent(elt, stack[currentPtr].node);
3056 StackNode<T> node = new StackNode<T>(name, elt);
3057 push(node);
3058 }
3059
3060 private void appendToCurrentNodeAndPushElementMayFoster(String name,
3061 Attributes attributes) throws SAXException {
3062 flushCharacters();
3063 T elt = createElement(name, attributes);
3064 StackNode<T> current = stack[currentPtr];
3065 if (current.fosterParenting) {
3066 if (conformingAndStreaming) {
3067 fatal();
3068 } else if (nonConformingAndStreaming) {
3069 return;
3070 } else {
3071 insertIntoFosterParent(elt);
3072 }
3073 } else {
3074 detachFromParentAndAppendToNewParent(elt, current.node);
3075 }
3076 StackNode<T> node = new StackNode<T>(name, elt);
3077 push(node);
3078 }
3079
3080 private void appendToCurrentNodeAndPushElementMayFoster(String name, Attributes attributes, T form) throws SAXException {
3081 flushCharacters();
3082 T elt = createElement(name, attributes, formPointer);
3083 StackNode<T> current = stack[currentPtr];
3084 if (current.fosterParenting) {
3085 if (conformingAndStreaming) {
3086 fatal();
3087 } else if (nonConformingAndStreaming) {
3088 return;
3089 } else {
3090 insertIntoFosterParent(elt);
3091 }
3092 } else {
3093 detachFromParentAndAppendToNewParent(elt, current.node);
3094 }
3095 StackNode<T> node = new StackNode<T>(name, elt);
3096 push(node);
3097 }
3098
3099 private void appendVoidElementToCurrentMayFoster(String name,
3100 Attributes attributes, T form) throws SAXException {
3101 flushCharacters();
3102 T elt = createElement(name, attributes, formPointer);
3103 StackNode<T> current = stack[currentPtr];
3104 if (current.fosterParenting) {
3105 if (conformingAndStreaming) {
3106 fatal();
3107 } else if (nonConformingAndStreaming) {
3108 return;
3109 } else {
3110 insertIntoFosterParent(elt);
3111 }
3112 } else {
3113 detachFromParentAndAppendToNewParent(elt, current.node);
3114 }
3115 if (conformingAndStreaming || nonConformingAndStreaming) {
3116 elementPushed(name, (T) attributes);
3117 elementPopped(name, null);
3118 }
3119 }
3120
3121 private void appendVoidElementToCurrentMayFoster(String name, Attributes attributes) throws SAXException {
3122 flushCharacters();
3123 T elt = createElement(name, attributes);
3124 StackNode<T> current = stack[currentPtr];
3125 if (current.fosterParenting) {
3126 if (conformingAndStreaming) {
3127 fatal();
3128 } else if (nonConformingAndStreaming) {
3129 return;
3130 } else {
3131 insertIntoFosterParent(elt);
3132 }
3133 } else {
3134 detachFromParentAndAppendToNewParent(elt, current.node);
3135 }
3136 if (conformingAndStreaming || nonConformingAndStreaming) {
3137 elementPushed(name, (T) attributes);
3138 elementPopped(name, null);
3139 }
3140 }
3141
3142 private void accumulateCharacters(char[] buf, int start, int length) throws SAXException {
3143 if (coalescingText) {
3144 int newLen = charBufferLen + length;
3145 if (newLen > charBuffer.length) {
3146 char[] newBuf = new char[newLen];
3147 System.arraycopy(charBuffer, 0, newBuf, 0, charBuffer.length);
3148 charBuffer = newBuf;
3149 }
3150 System.arraycopy(buf, start, charBuffer, charBufferLen, length);
3151 charBufferLen = newLen;
3152 } else {
3153 appendCharacters(stack[currentPtr].node, buf, start, length);
3154 }
3155 }
3156
3157 private void flushCharacters() throws SAXException {
3158 if (charBufferLen > 0) {
3159 appendCharacters(stack[currentPtr].node, charBuffer, 0, charBufferLen);
3160 charBufferLen = 0;
3161 }
3162 }
3163
3164 // ------------------------------- //
3165
3166 protected abstract T createElement(String name, Attributes attributes) throws SAXException;
3167
3168 protected T createElement(String name, Attributes attributes, T form) throws SAXException {
3169 return createElement(name, attributes);
3170 }
3171
3172 protected abstract T createHtmlElementSetAsRoot(Attributes attributes) throws SAXException;
3173
3174 protected abstract void detachFromParent(T element) throws SAXException;
3175
3176 protected abstract boolean hasChildren(T element) throws SAXException;
3177
3178 protected abstract T shallowClone(T element) throws SAXException;
3179
3180 protected abstract void detachFromParentAndAppendToNewParent(T child, T newParent) throws SAXException;
3181
3182 protected abstract void appendChildrenToNewParent(T oldParent, T newParent) throws SAXException;
3183
3184 /**
3185 * Get the parent element. MUST return <code>null</code> if there is no parent
3186 * <em>or</em> the parent is not an element.
3187 */
3188 protected abstract T parentElementFor(T child) throws SAXException;
3189
3190 protected abstract void insertBefore(T child, T sibling, T parent) throws SAXException;
3191
3192 protected abstract void insertCharactersBefore(char[] buf, int start, int length, T sibling, T parent) throws SAXException;
3193
3194 protected abstract void appendCharacters(T parent,
3195 char[] buf, int start, int length) throws SAXException;
3196
3197 protected abstract void appendComment(T parent, char[] buf, int start, int length) throws SAXException;
3198
3199 protected abstract void appendCommentToDocument(char[] buf, int start, int length) throws SAXException;
3200
3201 protected abstract void addAttributesToElement(T element, Attributes attributes) throws SAXException;
3202
3203 protected void start(boolean fragment) throws SAXException {
3204
3205 }
3206
3207 protected void end() throws SAXException {
3208
3209 }
3210
3211 protected void bodyClosed(T body) throws SAXException {
3212
3213 }
3214
3215 protected void htmlClosed(T html) throws SAXException {
3216
3217 }
3218
3219 protected void appendDoctypeToDocument(String name,
3220 String publicIdentifier, String systemIdentifier) throws SAXException {
3221
3222 }
3223
3224 protected void elementPushed(String name, T node) throws SAXException {
3225
3226 }
3227
3228 protected void elementPopped(String name, T node) throws SAXException {
3229
3230 }
3231
3232 protected void documentMode(DocumentMode mode, String publicIdentifier, String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) throws SAXException {
3233
3234 }
3235
3236 /**
3237 * @see nu.validator.htmlparser.impl.TokenHandler#wantsComments()
3238 */
3239 public boolean wantsComments() {
3240 return wantingComments;
3241 }
3242
3243 public void setIgnoringComments(boolean ignoreComments) {
3244 wantingComments = !ignoreComments;
3245 }
3246
3247 /**
3248 * Sets the errorHandler.
3249 *
3250 * @param errorHandler the errorHandler to set
3251 */
3252 public final void setErrorHandler(ErrorHandler errorHandler) {
3253 this.errorHandler = errorHandler;
3254 }
3255
3256 public final void setFragmentContext(String context) {
3257 this.context = context == null ? null : context.intern();
3258 }
3259
3260 protected final T currentNode() {
3261 return stack[currentPtr].node;
3262 }
3263
3264 /**
3265 * Returns the scriptingEnabled.
3266 *
3267 * @return the scriptingEnabled
3268 */
3269 public boolean isScriptingEnabled() {
3270 return scriptingEnabled;
3271 }
3272
3273 /**
3274 * Sets the scriptingEnabled.
3275 *
3276 * @param scriptingEnabled the scriptingEnabled to set
3277 */
3278 public void setScriptingEnabled(boolean scriptingEnabled) {
3279 this.scriptingEnabled = scriptingEnabled;
3280 }
3281
3282 /**
3283 * Sets the doctypeExpectation.
3284 *
3285 * @param doctypeExpectation the doctypeExpectation to set
3286 */
3287 public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
3288 this.doctypeExpectation = doctypeExpectation;
3289 }
3290
3291 /**
3292 * Sets the documentModeHandler.
3293 *
3294 * @param documentModeHandler the documentModeHandler to set
3295 */
3296 public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
3297 this.documentModeHandler = documentModeHandler;
3298 }
3299
3300 /**
3301 * Sets the reportingDoctype.
3302 *
3303 * @param reportingDoctype the reportingDoctype to set
3304 */
3305 public void setReportingDoctype(boolean reportingDoctype) {
3306 this.reportingDoctype = reportingDoctype;
3307 }
3308 }