001    /*
002     * Copyright (c) 2005-2007 Henri Sivonen
003     * Copyright (c) 2007-2010 Mozilla Foundation
004     * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla 
005     * Foundation, and Opera Software ASA.
006     *
007     * Permission is hereby granted, free of charge, to any person obtaining a 
008     * copy of this software and associated documentation files (the "Software"), 
009     * to deal in the Software without restriction, including without limitation 
010     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
011     * and/or sell copies of the Software, and to permit persons to whom the 
012     * Software is furnished to do so, subject to the following conditions:
013     *
014     * The above copyright notice and this permission notice shall be included in 
015     * all copies or substantial portions of the Software.
016     *
017     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
018     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
019     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
020     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
021     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
022     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
023     * DEALINGS IN THE SOFTWARE.
024     */
025    
026    /*
027     * The comments following this one that use the same comment syntax as this 
028     * comment are quotes from the WHATWG HTML 5 spec as of 2 June 2007 
029     * amended as of June 18 2008 and May 31 2010.
030     * That document came with this statement:
031     * "© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and 
032     * Opera Software ASA. You are granted a license to use, reproduce and 
033     * create derivative works of this document."
034     */
035    
036    package nu.validator.htmlparser.impl;
037    
038    import nu.validator.htmlparser.annotation.Auto;
039    import nu.validator.htmlparser.annotation.CharacterName;
040    import nu.validator.htmlparser.annotation.Const;
041    import nu.validator.htmlparser.annotation.Inline;
042    import nu.validator.htmlparser.annotation.Local;
043    import nu.validator.htmlparser.annotation.NoLength;
044    import nu.validator.htmlparser.common.EncodingDeclarationHandler;
045    import nu.validator.htmlparser.common.Interner;
046    import nu.validator.htmlparser.common.TokenHandler;
047    import nu.validator.htmlparser.common.XmlViolationPolicy;
048    
049    import org.xml.sax.ErrorHandler;
050    import org.xml.sax.Locator;
051    import org.xml.sax.SAXException;
052    import org.xml.sax.SAXParseException;
053    
054    /**
055     * An implementation of
056     * http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
057     * 
058     * This class implements the <code>Locator</code> interface. This is not an
059     * incidental implementation detail: Users of this class are encouraged to make
060     * use of the <code>Locator</code> nature.
061     * 
062     * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer
063     * can be configured to treat these conditions as fatal or to coerce the infoset
064     * to something that XML 1.0 allows.
065     * 
066     * @version $Id$
067     * @author hsivonen
068     */
069    public class Tokenizer implements Locator {
070    
071        private static final int DATA_AND_RCDATA_MASK = ~1;
072    
073        public static final int DATA = 0;
074    
075        public static final int RCDATA = 1;
076    
077        public static final int SCRIPT_DATA = 2;
078    
079        public static final int RAWTEXT = 3;
080    
081        public static final int SCRIPT_DATA_ESCAPED = 4;
082    
083        public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
084    
085        public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
086    
087        public static final int ATTRIBUTE_VALUE_UNQUOTED = 7;
088    
089        public static final int PLAINTEXT = 8;
090    
091        public static final int TAG_OPEN = 9;
092    
093        public static final int CLOSE_TAG_OPEN = 10;
094    
095        public static final int TAG_NAME = 11;
096    
097        public static final int BEFORE_ATTRIBUTE_NAME = 12;
098    
099        public static final int ATTRIBUTE_NAME = 13;
100    
101        public static final int AFTER_ATTRIBUTE_NAME = 14;
102    
103        public static final int BEFORE_ATTRIBUTE_VALUE = 15;
104    
105        public static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
106    
107        public static final int BOGUS_COMMENT = 17;
108    
109        public static final int MARKUP_DECLARATION_OPEN = 18;
110    
111        public static final int DOCTYPE = 19;
112    
113        public static final int BEFORE_DOCTYPE_NAME = 20;
114    
115        public static final int DOCTYPE_NAME = 21;
116    
117        public static final int AFTER_DOCTYPE_NAME = 22;
118    
119        public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
120    
121        public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
122    
123        public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
124    
125        public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
126    
127        public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
128    
129        public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
130    
131        public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
132    
133        public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
134    
135        public static final int BOGUS_DOCTYPE = 31;
136    
137        public static final int COMMENT_START = 32;
138    
139        public static final int COMMENT_START_DASH = 33;
140    
141        public static final int COMMENT = 34;
142    
143        public static final int COMMENT_END_DASH = 35;
144    
145        public static final int COMMENT_END = 36;
146    
147        public static final int COMMENT_END_BANG = 37;
148    
149        public static final int NON_DATA_END_TAG_NAME = 38;
150    
151        public static final int MARKUP_DECLARATION_HYPHEN = 39;
152    
153        public static final int MARKUP_DECLARATION_OCTYPE = 40;
154    
155        public static final int DOCTYPE_UBLIC = 41;
156    
157        public static final int DOCTYPE_YSTEM = 42;
158    
159        public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
160    
161        public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
162    
163        public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
164    
165        public static final int CONSUME_CHARACTER_REFERENCE = 46;
166    
167        public static final int CONSUME_NCR = 47;
168    
169        public static final int CHARACTER_REFERENCE_TAIL = 48;
170    
171        public static final int HEX_NCR_LOOP = 49;
172    
173        public static final int DECIMAL_NRC_LOOP = 50;
174    
175        public static final int HANDLE_NCR_VALUE = 51;
176    
177        public static final int HANDLE_NCR_VALUE_RECONSUME = 52;
178    
179        public static final int CHARACTER_REFERENCE_HILO_LOOKUP = 53;
180    
181        public static final int SELF_CLOSING_START_TAG = 54;
182    
183        public static final int CDATA_START = 55;
184    
185        public static final int CDATA_SECTION = 56;
186    
187        public static final int CDATA_RSQB = 57;
188    
189        public static final int CDATA_RSQB_RSQB = 58;
190    
191        public static final int SCRIPT_DATA_LESS_THAN_SIGN = 59;
192    
193        public static final int SCRIPT_DATA_ESCAPE_START = 60;
194    
195        public static final int SCRIPT_DATA_ESCAPE_START_DASH = 61;
196    
197        public static final int SCRIPT_DATA_ESCAPED_DASH = 62;
198    
199        public static final int SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
200    
201        public static final int BOGUS_COMMENT_HYPHEN = 64;
202    
203        public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
204    
205        public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
206    
207        public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
208    
209        public static final int SCRIPT_DATA_DOUBLE_ESCAPED = 68;
210    
211        public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
212    
213        public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
214    
215        public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
216    
217        public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
218    
219        /**
220         * Magic value for UTF-16 operations.
221         */
222        private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
223    
224        /**
225         * UTF-16 code unit array containing less than and greater than for emitting
226         * those characters on certain parse errors.
227         */
228        private static final @NoLength char[] LT_GT = { '<', '>' };
229    
230        /**
231         * UTF-16 code unit array containing less than and solidus for emitting
232         * those characters on certain parse errors.
233         */
234        private static final @NoLength char[] LT_SOLIDUS = { '<', '/' };
235    
236        /**
237         * UTF-16 code unit array containing ]] for emitting those characters on
238         * state transitions.
239         */
240        private static final @NoLength char[] RSQB_RSQB = { ']', ']' };
241    
242        /**
243         * Array version of U+FFFD.
244         */
245        private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
246    
247        // [NOCPP[
248    
249        /**
250         * Array version of space.
251         */
252        private static final @NoLength char[] SPACE = { ' ' };
253    
254        // ]NOCPP]
255    
256        /**
257         * Array version of line feed.
258         */
259        private static final @NoLength char[] LF = { '\n' };
260    
261        /**
262         * Buffer growth parameter.
263         */
264        private static final int BUFFER_GROW_BY = 1024;
265    
266        /**
267         * "CDATA[" as <code>char[]</code>
268         */
269        private static final @NoLength char[] CDATA_LSQB = "CDATA[".toCharArray();
270    
271        /**
272         * "octype" as <code>char[]</code>
273         */
274        private static final @NoLength char[] OCTYPE = "octype".toCharArray();
275    
276        /**
277         * "ublic" as <code>char[]</code>
278         */
279        private static final @NoLength char[] UBLIC = "ublic".toCharArray();
280    
281        /**
282         * "ystem" as <code>char[]</code>
283         */
284        private static final @NoLength char[] YSTEM = "ystem".toCharArray();
285    
286        private static final char[] TITLE_ARR = { 't', 'i', 't', 'l', 'e' };
287    
288        private static final char[] SCRIPT_ARR = { 's', 'c', 'r', 'i', 'p', 't' };
289    
290        private static final char[] STYLE_ARR = { 's', 't', 'y', 'l', 'e' };
291    
292        private static final char[] PLAINTEXT_ARR = { 'p', 'l', 'a', 'i', 'n', 't',
293                'e', 'x', 't' };
294    
295        private static final char[] XMP_ARR = { 'x', 'm', 'p' };
296    
297        private static final char[] TEXTAREA_ARR = { 't', 'e', 'x', 't', 'a', 'r',
298                'e', 'a' };
299    
300        private static final char[] IFRAME_ARR = { 'i', 'f', 'r', 'a', 'm', 'e' };
301    
302        private static final char[] NOEMBED_ARR = { 'n', 'o', 'e', 'm', 'b', 'e',
303                'd' };
304    
305        private static final char[] NOSCRIPT_ARR = { 'n', 'o', 's', 'c', 'r', 'i',
306                'p', 't' };
307    
308        private static final char[] NOFRAMES_ARR = { 'n', 'o', 'f', 'r', 'a', 'm',
309                'e', 's' };
310    
311        /**
312         * The token handler.
313         */
314        protected final TokenHandler tokenHandler;
315    
316        protected EncodingDeclarationHandler encodingDeclarationHandler;
317    
318        // [NOCPP[
319    
320        /**
321         * The error handler.
322         */
323        protected ErrorHandler errorHandler;
324    
325        // ]NOCPP]
326    
327        /**
328         * Whether the previous char read was CR.
329         */
330        protected boolean lastCR;
331    
332        protected int stateSave;
333    
334        private int returnStateSave;
335    
336        protected int index;
337    
338        private boolean forceQuirks;
339    
340        private char additional;
341    
342        private int entCol;
343    
344        private int firstCharKey;
345    
346        private int lo;
347    
348        private int hi;
349    
350        private int candidate;
351    
352        private int strBufMark;
353    
354        private int prevValue;
355    
356        protected int value;
357    
358        private boolean seenDigits;
359    
360        protected int cstart;
361    
362        /**
363         * The SAX public id for the resource being tokenized. (Only passed to back
364         * as part of locator data.)
365         */
366        private String publicId;
367    
368        /**
369         * The SAX system id for the resource being tokenized. (Only passed to back
370         * as part of locator data.)
371         */
372        private String systemId;
373    
374        /**
375         * Buffer for short identifiers.
376         */
377        private @Auto char[] strBuf;
378    
379        /**
380         * Number of significant <code>char</code>s in <code>strBuf</code>.
381         */
382        private int strBufLen;
383    
384        /**
385         * <code>-1</code> to indicate that <code>strBuf</code> is used or otherwise
386         * an offset to the main buffer.
387         */
388        // private int strBufOffset = -1;
389        /**
390         * Buffer for long strings.
391         */
392        private @Auto char[] longStrBuf;
393    
394        /**
395         * Number of significant <code>char</code>s in <code>longStrBuf</code>.
396         */
397        private int longStrBufLen;
398    
399        /**
400         * <code>-1</code> to indicate that <code>longStrBuf</code> is used or
401         * otherwise an offset to the main buffer.
402         */
403        // private int longStrBufOffset = -1;
404    
405        /**
406         * Buffer for expanding NCRs falling into the Basic Multilingual Plane.
407         */
408        private final @Auto char[] bmpChar;
409    
410        /**
411         * Buffer for expanding astral NCRs.
412         */
413        private final @Auto char[] astralChar;
414    
415        /**
416         * The element whose end tag closes the current CDATA or RCDATA element.
417         */
418        protected ElementName endTagExpectation = null;
419    
420        private char[] endTagExpectationAsArray; // not @Auto!
421    
422        /**
423         * <code>true</code> if tokenizing an end tag
424         */
425        protected boolean endTag;
426    
427        /**
428         * The current tag token name.
429         */
430        private ElementName tagName = null;
431    
432        /**
433         * The current attribute name.
434         */
435        protected AttributeName attributeName = null;
436    
437        // [NOCPP[
438    
439        /**
440         * Whether comment tokens are emitted.
441         */
442        private boolean wantsComments = false;
443    
444        /**
445         * <code>true</code> when HTML4-specific additional errors are requested.
446         */
447        protected boolean html4;
448    
449        /**
450         * Whether the stream is past the first 512 bytes.
451         */
452        private boolean metaBoundaryPassed;
453    
454        // ]NOCPP]
455    
456        /**
457         * The name of the current doctype token.
458         */
459        private @Local String doctypeName;
460    
461        /**
462         * The public id of the current doctype token.
463         */
464        private String publicIdentifier;
465    
466        /**
467         * The system id of the current doctype token.
468         */
469        private String systemIdentifier;
470    
471        /**
472         * The attribute holder.
473         */
474        private HtmlAttributes attributes;
475    
476        // [NOCPP[
477    
478        /**
479         * The policy for vertical tab and form feed.
480         */
481        private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.ALTER_INFOSET;
482    
483        /**
484         * The policy for comments.
485         */
486        private XmlViolationPolicy commentPolicy = XmlViolationPolicy.ALTER_INFOSET;
487    
488        private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.ALTER_INFOSET;
489    
490        private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
491    
492        private boolean html4ModeCompatibleWithXhtml1Schemata;
493    
494        private final boolean newAttributesEachTime;
495    
496        // ]NOCPP]
497    
498        private int mappingLangToXmlLang;
499    
500        private boolean shouldSuspend;
501    
502        protected boolean confident;
503    
504        private int line;
505    
506        private Interner interner;
507    
508        // [NOCPP[
509    
510        protected LocatorImpl ampersandLocation;
511    
512        public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime) {
513            this.tokenHandler = tokenHandler;
514            this.encodingDeclarationHandler = null;
515            this.newAttributesEachTime = newAttributesEachTime;
516            this.bmpChar = new char[1];
517            this.astralChar = new char[2];
518            this.tagName = null;
519            this.attributeName = null;
520            this.doctypeName = null;
521            this.publicIdentifier = null;
522            this.systemIdentifier = null;
523            this.attributes = null;
524        }
525    
526        // ]NOCPP]
527    
528        /**
529         * The constructor.
530         * 
531         * @param tokenHandler
532         *            the handler for receiving tokens
533         */
534        public Tokenizer(TokenHandler tokenHandler) {
535            this.tokenHandler = tokenHandler;
536            this.encodingDeclarationHandler = null;
537            // [NOCPP[
538            this.newAttributesEachTime = false;
539            // ]NOCPP]
540            this.bmpChar = new char[1];
541            this.astralChar = new char[2];
542            this.tagName = null;
543            this.attributeName = null;
544            this.doctypeName = null;
545            this.publicIdentifier = null;
546            this.systemIdentifier = null;
547            this.attributes = null;
548        }
549    
550        public void setInterner(Interner interner) {
551            this.interner = interner;
552        }
553    
554        public void initLocation(String newPublicId, String newSystemId) {
555            this.systemId = newSystemId;
556            this.publicId = newPublicId;
557    
558        }
559    
560        // [NOCPP[
561    
562        /**
563         * Returns the mappingLangToXmlLang.
564         * 
565         * @return the mappingLangToXmlLang
566         */
567        public boolean isMappingLangToXmlLang() {
568            return mappingLangToXmlLang == AttributeName.HTML_LANG;
569        }
570    
571        /**
572         * Sets the mappingLangToXmlLang.
573         * 
574         * @param mappingLangToXmlLang
575         *            the mappingLangToXmlLang to set
576         */
577        public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
578            this.mappingLangToXmlLang = mappingLangToXmlLang ? AttributeName.HTML_LANG
579                    : AttributeName.HTML;
580        }
581    
582        /**
583         * Sets the error handler.
584         * 
585         * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
586         */
587        public void setErrorHandler(ErrorHandler eh) {
588            this.errorHandler = eh;
589        }
590    
591        public ErrorHandler getErrorHandler() {
592            return this.errorHandler;
593        }
594    
595        /**
596         * Sets the commentPolicy.
597         * 
598         * @param commentPolicy
599         *            the commentPolicy to set
600         */
601        public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
602            this.commentPolicy = commentPolicy;
603        }
604    
605        /**
606         * Sets the contentNonXmlCharPolicy.
607         * 
608         * @param contentNonXmlCharPolicy
609         *            the contentNonXmlCharPolicy to set
610         */
611        public void setContentNonXmlCharPolicy(
612                XmlViolationPolicy contentNonXmlCharPolicy) {
613            if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) {
614                throw new IllegalArgumentException(
615                        "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW.");
616            }
617        }
618    
619        /**
620         * Sets the contentSpacePolicy.
621         * 
622         * @param contentSpacePolicy
623         *            the contentSpacePolicy to set
624         */
625        public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
626            this.contentSpacePolicy = contentSpacePolicy;
627        }
628    
629        /**
630         * Sets the xmlnsPolicy.
631         * 
632         * @param xmlnsPolicy
633         *            the xmlnsPolicy to set
634         */
635        public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
636            if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
637                throw new IllegalArgumentException("Can't use FATAL here.");
638            }
639            this.xmlnsPolicy = xmlnsPolicy;
640        }
641    
642        public void setNamePolicy(XmlViolationPolicy namePolicy) {
643            this.namePolicy = namePolicy;
644        }
645    
646        /**
647         * Sets the html4ModeCompatibleWithXhtml1Schemata.
648         * 
649         * @param html4ModeCompatibleWithXhtml1Schemata
650         *            the html4ModeCompatibleWithXhtml1Schemata to set
651         */
652        public void setHtml4ModeCompatibleWithXhtml1Schemata(
653                boolean html4ModeCompatibleWithXhtml1Schemata) {
654            this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
655        }
656    
657        // ]NOCPP]
658    
659        // For the token handler to call
660        /**
661         * Sets the tokenizer state and the associated element name. This should 
662         * only ever used to put the tokenizer into one of the states that have
663         * a special end tag expectation.
664         * 
665         * @param specialTokenizerState
666         *            the tokenizer state to set
667         * @param endTagExpectation
668         *            the expected end tag for transitioning back to normal
669         */
670        public void setStateAndEndTagExpectation(int specialTokenizerState,
671                @Local String endTagExpectation) {
672            this.stateSave = specialTokenizerState;
673            if (specialTokenizerState == Tokenizer.DATA) {
674                return;
675            }
676            @Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation);
677            this.endTagExpectation = ElementName.elementNameByBuffer(asArray, 0,
678                    asArray.length, interner);
679            endTagExpectationToArray();
680        }
681    
682        /**
683         * Sets the tokenizer state and the associated element name. This should 
684         * only ever used to put the tokenizer into one of the states that have
685         * a special end tag expectation.
686         * 
687         * @param specialTokenizerState
688         *            the tokenizer state to set
689         * @param endTagExpectation
690         *            the expected end tag for transitioning back to normal
691         */
692        public void setStateAndEndTagExpectation(int specialTokenizerState,
693                ElementName endTagExpectation) {
694            this.stateSave = specialTokenizerState;
695            this.endTagExpectation = endTagExpectation;
696            endTagExpectationToArray();
697        }
698    
699        private void endTagExpectationToArray() {
700            switch (endTagExpectation.getGroup()) {
701                case TreeBuilder.TITLE:
702                    endTagExpectationAsArray = TITLE_ARR;
703                    return;
704                case TreeBuilder.SCRIPT:
705                    endTagExpectationAsArray = SCRIPT_ARR;
706                    return;
707                case TreeBuilder.STYLE:
708                    endTagExpectationAsArray = STYLE_ARR;
709                    return;
710                case TreeBuilder.PLAINTEXT:
711                    endTagExpectationAsArray = PLAINTEXT_ARR;
712                    return;
713                case TreeBuilder.XMP:
714                    endTagExpectationAsArray = XMP_ARR;
715                    return;
716                case TreeBuilder.TEXTAREA:
717                    endTagExpectationAsArray = TEXTAREA_ARR;
718                    return;
719                case TreeBuilder.IFRAME:
720                    endTagExpectationAsArray = IFRAME_ARR;
721                    return;
722                case TreeBuilder.NOEMBED:
723                    endTagExpectationAsArray = NOEMBED_ARR;
724                    return;
725                case TreeBuilder.NOSCRIPT:
726                    endTagExpectationAsArray = NOSCRIPT_ARR;
727                    return;
728                case TreeBuilder.NOFRAMES:
729                    endTagExpectationAsArray = NOFRAMES_ARR;
730                    return;
731                default:
732                    assert false: "Bad end tag expectation.";
733                    return;
734            }
735        }
736    
737        /**
738         * For C++ use only.
739         */
740        public void setLineNumber(int line) {
741            this.line = line;
742        }
743    
744        // start Locator impl
745    
746        /**
747         * @see org.xml.sax.Locator#getLineNumber()
748         */
749        @Inline public int getLineNumber() {
750            return line;
751        }
752    
753        // [NOCPP[
754    
755        /**
756         * @see org.xml.sax.Locator#getColumnNumber()
757         */
758        @Inline public int getColumnNumber() {
759            return -1;
760        }
761    
762        /**
763         * @see org.xml.sax.Locator#getPublicId()
764         */
765        public String getPublicId() {
766            return publicId;
767        }
768    
769        /**
770         * @see org.xml.sax.Locator#getSystemId()
771         */
772        public String getSystemId() {
773            return systemId;
774        }
775    
776        // end Locator impl
777    
778        // end public API
779    
780        public void notifyAboutMetaBoundary() {
781            metaBoundaryPassed = true;
782        }
783    
784        void turnOnAdditionalHtml4Errors() {
785            html4 = true;
786        }
787    
788        // ]NOCPP]
789    
790        HtmlAttributes emptyAttributes() {
791            // [NOCPP[
792            if (newAttributesEachTime) {
793                return new HtmlAttributes(mappingLangToXmlLang);
794            } else {
795                // ]NOCPP]
796                return HtmlAttributes.EMPTY_ATTRIBUTES;
797                // [NOCPP[
798            }
799            // ]NOCPP]
800        }
801    
802        @Inline private void clearStrBufAndAppend(char c) {
803            strBuf[0] = c;
804            strBufLen = 1;
805        }
806    
807        @Inline private void clearStrBuf() {
808            strBufLen = 0;
809        }
810    
811        /**
812         * Appends to the smaller buffer.
813         * 
814         * @param c
815         *            the UTF-16 code unit to append
816         */
817        private void appendStrBuf(char c) {
818            if (strBufLen == strBuf.length) {
819                char[] newBuf = new char[strBuf.length + Tokenizer.BUFFER_GROW_BY];
820                System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
821                strBuf = newBuf;
822            }
823            strBuf[strBufLen++] = c;
824        }
825    
826        /**
827         * The smaller buffer as a String. Currently only used for error reporting.
828         * 
829         * <p>
830         * C++ memory note: The return value must be released.
831         * 
832         * @return the smaller buffer as a string
833         */
834        protected String strBufToString() {
835            return Portability.newStringFromBuffer(strBuf, 0, strBufLen);
836        }
837    
838        /**
839         * Returns the short buffer as a local name. The return value is released in
840         * emitDoctypeToken().
841         * 
842         * @return the smaller buffer as local name
843         */
844        private void strBufToDoctypeName() {
845            doctypeName = Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen,
846                    interner);
847        }
848    
849        /**
850         * Emits the smaller buffer as character tokens.
851         * 
852         * @throws SAXException
853         *             if the token handler threw
854         */
855        private void emitStrBuf() throws SAXException {
856            if (strBufLen > 0) {
857                tokenHandler.characters(strBuf, 0, strBufLen);
858            }
859        }
860    
861        @Inline private void clearLongStrBuf() {
862            longStrBufLen = 0;
863        }
864    
865        @Inline private void clearLongStrBufAndAppend(char c) {
866            longStrBuf[0] = c;
867            longStrBufLen = 1;
868        }
869    
870        /**
871         * Appends to the larger buffer.
872         * 
873         * @param c
874         *            the UTF-16 code unit to append
875         */
876        private void appendLongStrBuf(char c) {
877            if (longStrBufLen == longStrBuf.length) {
878                char[] newBuf = new char[longStrBufLen + (longStrBufLen >> 1)];
879                System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length);
880                longStrBuf = newBuf;
881            }
882            longStrBuf[longStrBufLen++] = c;
883        }
884    
885        @Inline private void appendSecondHyphenToBogusComment() throws SAXException {
886            // [NOCPP[
887            switch (commentPolicy) {
888                case ALTER_INFOSET:
889                    // detachLongStrBuf();
890                    appendLongStrBuf(' ');
891                    // FALLTHROUGH
892                case ALLOW:
893                    warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
894                    // ]NOCPP]
895                    appendLongStrBuf('-');
896                    // [NOCPP[
897                    break;
898                case FATAL:
899                    fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
900                    break;
901            }
902            // ]NOCPP]
903        }
904    
905        // [NOCPP[
906        private void maybeAppendSpaceToBogusComment() throws SAXException {
907            switch (commentPolicy) {
908                case ALTER_INFOSET:
909                    // detachLongStrBuf();
910                    appendLongStrBuf(' ');
911                    // FALLTHROUGH
912                case ALLOW:
913                    warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
914                    break;
915                case FATAL:
916                    fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
917                    break;
918            }
919        }
920    
921        // ]NOCPP]
922    
923        @Inline private void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c)
924                throws SAXException {
925            errConsecutiveHyphens();
926            // [NOCPP[
927            switch (commentPolicy) {
928                case ALTER_INFOSET:
929                    // detachLongStrBuf();
930                    longStrBufLen--;
931                    appendLongStrBuf(' ');
932                    appendLongStrBuf('-');
933                    // FALLTHROUGH
934                case ALLOW:
935                    warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
936                    // ]NOCPP]
937                    appendLongStrBuf(c);
938                    // [NOCPP[
939                    break;
940                case FATAL:
941                    fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
942                    break;
943            }
944            // ]NOCPP]
945        }
946    
947        private void appendLongStrBuf(@NoLength char[] buffer, int offset, int length) {
948            int reqLen = longStrBufLen + length;
949            if (longStrBuf.length < reqLen) {
950                char[] newBuf = new char[reqLen + (reqLen >> 1)];
951                System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length);
952                longStrBuf = newBuf;
953            }
954            System.arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
955            longStrBufLen = reqLen;
956        }
957    
958        /**
959         * Append the contents of the smaller buffer to the larger one.
960         */
961        @Inline private void appendStrBufToLongStrBuf() {
962            appendLongStrBuf(strBuf, 0, strBufLen);
963        }
964    
965        /**
966         * The larger buffer as a string.
967         * 
968         * <p>
969         * C++ memory note: The return value must be released.
970         * 
971         * @return the larger buffer as a string
972         */
973        private String longStrBufToString() {
974            return Portability.newStringFromBuffer(longStrBuf, 0, longStrBufLen);
975        }
976    
977        /**
978         * Emits the current comment token.
979         * 
980         * @param pos
981         *            TODO
982         * 
983         * @throws SAXException
984         */
985        private void emitComment(int provisionalHyphens, int pos)
986                throws SAXException {
987            // [NOCPP[
988            if (wantsComments) {
989                // ]NOCPP]
990                // if (longStrBufOffset != -1) {
991                // tokenHandler.comment(buf, longStrBufOffset, longStrBufLen
992                // - provisionalHyphens);
993                // } else {
994                tokenHandler.comment(longStrBuf, 0, longStrBufLen
995                        - provisionalHyphens);
996                // }
997                // [NOCPP[
998            }
999            // ]NOCPP]
1000            cstart = pos + 1;
1001        }
1002    
1003        /**
1004         * Flushes coalesced character tokens.
1005         * 
1006         * @param buf
1007         *            TODO
1008         * @param pos
1009         *            TODO
1010         * 
1011         * @throws SAXException
1012         */
1013        protected void flushChars(@NoLength char[] buf, int pos)
1014                throws SAXException {
1015            if (pos > cstart) {
1016                tokenHandler.characters(buf, cstart, pos - cstart);
1017            }
1018            cstart = Integer.MAX_VALUE;
1019        }
1020    
1021        /**
1022         * Reports an condition that would make the infoset incompatible with XML
1023         * 1.0 as fatal.
1024         * 
1025         * @param message
1026         *            the message
1027         * @throws SAXException
1028         * @throws SAXParseException
1029         */
1030        public void fatal(String message) throws SAXException {
1031            SAXParseException spe = new SAXParseException(message, this);
1032            if (errorHandler != null) {
1033                errorHandler.fatalError(spe);
1034            }
1035            throw spe;
1036        }
1037    
1038        /**
1039         * Reports a Parse Error.
1040         * 
1041         * @param message
1042         *            the message
1043         * @throws SAXException
1044         */
1045        public void err(String message) throws SAXException {
1046            if (errorHandler == null) {
1047                return;
1048            }
1049            SAXParseException spe = new SAXParseException(message, this);
1050            errorHandler.error(spe);
1051        }
1052    
1053        public void errTreeBuilder(String message) throws SAXException {
1054            ErrorHandler eh = null;
1055            if (tokenHandler instanceof TreeBuilder<?>) {
1056                TreeBuilder<?> treeBuilder = (TreeBuilder<?>) tokenHandler;
1057                eh = treeBuilder.getErrorHandler();
1058            }
1059            if (eh == null) {
1060                eh = errorHandler;
1061            }
1062            if (eh == null) {
1063                return;
1064            }
1065            SAXParseException spe = new SAXParseException(message, this);
1066            eh.error(spe);
1067        }
1068    
1069        /**
1070         * Reports a warning
1071         * 
1072         * @param message
1073         *            the message
1074         * @throws SAXException
1075         */
1076        public void warn(String message) throws SAXException {
1077            if (errorHandler == null) {
1078                return;
1079            }
1080            SAXParseException spe = new SAXParseException(message, this);
1081            errorHandler.warning(spe);
1082        }
1083    
1084        /**
1085         * 
1086         */
1087        private void resetAttributes() {
1088            // [NOCPP[
1089            if (newAttributesEachTime) {
1090                // ]NOCPP]
1091                attributes = null;
1092                // [NOCPP[
1093            } else {
1094                attributes.clear(mappingLangToXmlLang);
1095            }
1096            // ]NOCPP]
1097        }
1098    
1099        private void strBufToElementNameString() {
1100            // if (strBufOffset != -1) {
1101            // return ElementName.elementNameByBuffer(buf, strBufOffset, strBufLen);
1102            // } else {
1103            tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen,
1104                    interner);
1105            // }
1106        }
1107    
1108        private int emitCurrentTagToken(boolean selfClosing, int pos)
1109                throws SAXException {
1110            cstart = pos + 1;
1111            maybeErrSlashInEndTag(selfClosing);
1112            stateSave = Tokenizer.DATA;
1113            HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES
1114                    : attributes);
1115            if (endTag) {
1116                /*
1117                 * When an end tag token is emitted, the content model flag must be
1118                 * switched to the PCDATA state.
1119                 */
1120                maybeErrAttributesOnEndTag(attrs);
1121                tokenHandler.endTag(tagName);
1122                Portability.delete(attributes);
1123            } else {
1124                tokenHandler.startTag(tagName, attrs, selfClosing);
1125            }
1126            tagName.release();
1127            tagName = null;
1128            resetAttributes();
1129            /*
1130             * The token handler may have called setStateAndEndTagExpectation
1131             * and changed stateSave since the start of this method.
1132             */
1133            return stateSave;
1134        }
1135    
1136        private void attributeNameComplete() throws SAXException {
1137            // if (strBufOffset != -1) {
1138            // attributeName = AttributeName.nameByBuffer(buf, strBufOffset,
1139            // strBufLen, namePolicy != XmlViolationPolicy.ALLOW);
1140            // } else {
1141            attributeName = AttributeName.nameByBuffer(strBuf, 0, strBufLen
1142            // [NOCPP[
1143                    , namePolicy != XmlViolationPolicy.ALLOW
1144                    // ]NOCPP]
1145                    , interner);
1146            // }
1147    
1148            if (attributes == null) {
1149                attributes = new HtmlAttributes(mappingLangToXmlLang);
1150            }
1151    
1152            /*
1153             * When the user agent leaves the attribute name state (and before
1154             * emitting the tag token, if appropriate), the complete attribute's
1155             * name must be compared to the other attributes on the same token; if
1156             * there is already an attribute on the token with the exact same name,
1157             * then this is a parse error and the new attribute must be dropped,
1158             * along with the value that gets associated with it (if any).
1159             */
1160            if (attributes.contains(attributeName)) {
1161                errDuplicateAttribute();
1162                attributeName.release();
1163                attributeName = null;
1164            }
1165        }
1166    
1167        private void addAttributeWithoutValue() throws SAXException {
1168            noteAttributeWithoutValue();
1169    
1170            // [NOCPP[
1171            if (metaBoundaryPassed && AttributeName.CHARSET == attributeName
1172                    && ElementName.META == tagName) {
1173                err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
1174            }
1175            // ]NOCPP]
1176            if (attributeName != null) {
1177                // [NOCPP[
1178                if (html4) {
1179                    if (attributeName.isBoolean()) {
1180                        if (html4ModeCompatibleWithXhtml1Schemata) {
1181                            attributes.addAttribute(attributeName,
1182                                    attributeName.getLocal(AttributeName.HTML),
1183                                    xmlnsPolicy);
1184                        } else {
1185                            attributes.addAttribute(attributeName, "", xmlnsPolicy);
1186                        }
1187                    } else {
1188                        err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
1189                        attributes.addAttribute(attributeName, "", xmlnsPolicy);
1190                    }
1191                } else {
1192                    if (AttributeName.SRC == attributeName
1193                            || AttributeName.HREF == attributeName) {
1194                        warn("Attribute \u201C"
1195                                + attributeName.getLocal(AttributeName.HTML)
1196                                + "\u201D without an explicit value seen. The attribute may be dropped by IE7.");
1197                    }
1198                    // ]NOCPP]
1199                    attributes.addAttribute(attributeName,
1200                            Portability.newEmptyString()
1201                            // [NOCPP[
1202                            , xmlnsPolicy
1203                    // ]NOCPP]
1204                    );
1205                    // [NOCPP[
1206                }
1207                // ]NOCPP]
1208                attributeName = null; // attributeName has been adopted by the
1209                // |attributes| object
1210            }
1211        }
1212    
1213        private void addAttributeWithValue() throws SAXException {
1214            // [NOCPP[
1215            if (metaBoundaryPassed && ElementName.META == tagName
1216                    && AttributeName.CHARSET == attributeName) {
1217                err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
1218            }
1219            // ]NOCPP]
1220            if (attributeName != null) {
1221                String val = longStrBufToString(); // Ownership transferred to
1222                // HtmlAttributes
1223                // [NOCPP[
1224                if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata
1225                        && attributeName.isCaseFolded()) {
1226                    val = newAsciiLowerCaseStringFromString(val);
1227                }
1228                // ]NOCPP]
1229                attributes.addAttribute(attributeName, val
1230                // [NOCPP[
1231                        , xmlnsPolicy
1232                // ]NOCPP]
1233                );
1234                attributeName = null; // attributeName has been adopted by the
1235                // |attributes| object
1236            }
1237        }
1238    
1239        // [NOCPP[
1240    
1241        private static String newAsciiLowerCaseStringFromString(String str) {
1242            if (str == null) {
1243                return null;
1244            }
1245            char[] buf = new char[str.length()];
1246            for (int i = 0; i < str.length(); i++) {
1247                char c = str.charAt(i);
1248                if (c >= 'A' && c <= 'Z') {
1249                    c += 0x20;
1250                }
1251                buf[i] = c;
1252            }
1253            return new String(buf);
1254        }
1255    
1256        protected void startErrorReporting() throws SAXException {
1257    
1258        }
1259    
1260        // ]NOCPP]
1261        
1262        public void start() throws SAXException {
1263            initializeWithoutStarting();
1264            tokenHandler.startTokenization(this);
1265            // [NOCPP[
1266            startErrorReporting();
1267            // ]NOCPP]
1268        }
1269    
1270        public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
1271            int state = stateSave;
1272            int returnState = returnStateSave;
1273            char c = '\u0000';
1274            shouldSuspend = false;
1275            lastCR = false;
1276    
1277            int start = buffer.getStart();
1278            /**
1279             * The index of the last <code>char</code> read from <code>buf</code>.
1280             */
1281            int pos = start - 1;
1282    
1283            /**
1284             * The index of the first <code>char</code> in <code>buf</code> that is
1285             * part of a coalesced run of character tokens or
1286             * <code>Integer.MAX_VALUE</code> if there is not a current run being
1287             * coalesced.
1288             */
1289            switch (state) {
1290                case DATA:
1291                case RCDATA:
1292                case SCRIPT_DATA:
1293                case PLAINTEXT:
1294                case RAWTEXT:
1295                case CDATA_SECTION:
1296                case SCRIPT_DATA_ESCAPED:
1297                case SCRIPT_DATA_ESCAPE_START:
1298                case SCRIPT_DATA_ESCAPE_START_DASH:
1299                case SCRIPT_DATA_ESCAPED_DASH:
1300                case SCRIPT_DATA_ESCAPED_DASH_DASH:
1301                case SCRIPT_DATA_DOUBLE_ESCAPE_START:
1302                case SCRIPT_DATA_DOUBLE_ESCAPED:
1303                case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
1304                case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
1305                case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
1306                case SCRIPT_DATA_DOUBLE_ESCAPE_END:
1307                    cstart = start;
1308                    break;
1309                default:
1310                    cstart = Integer.MAX_VALUE;
1311                    break;
1312            }
1313    
1314            /**
1315             * The number of <code>char</code>s in <code>buf</code> that have
1316             * meaning. (The rest of the array is garbage and should not be
1317             * examined.)
1318             */
1319            pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
1320                    buffer.getEnd());
1321            if (pos == buffer.getEnd()) {
1322                // exiting due to end of buffer
1323                buffer.setStart(pos);
1324            } else {
1325                buffer.setStart(pos + 1);
1326            }
1327            return lastCR;
1328        }
1329    
1330        @SuppressWarnings("unused") private int stateLoop(int state, char c,
1331                int pos, @NoLength char[] buf, boolean reconsume, int returnState,
1332                int endPos) throws SAXException {
1333            /*
1334             * Idioms used in this code:
1335             * 
1336             * 
1337             * Consuming the next input character
1338             * 
1339             * To consume the next input character, the code does this: if (++pos ==
1340             * endPos) { break stateloop; } c = checkChar(buf, pos);
1341             * 
1342             * 
1343             * Staying in a state
1344             * 
1345             * When there's a state that the tokenizer may stay in over multiple
1346             * input characters, the state has a wrapper |for(;;)| loop and staying
1347             * in the state continues the loop.
1348             * 
1349             * 
1350             * Switching to another state
1351             * 
1352             * To switch to another state, the code sets the state variable to the
1353             * magic number of the new state. Then it either continues stateloop or
1354             * breaks out of the state's own wrapper loop if the target state is
1355             * right after the current state in source order. (This is a partial
1356             * workaround for Java's lack of goto.)
1357             * 
1358             * 
1359             * Reconsume support
1360             * 
1361             * The spec sometimes says that an input character is reconsumed in
1362             * another state. If a state can ever be entered so that an input
1363             * character can be reconsumed in it, the state's code starts with an
1364             * |if (reconsume)| that sets reconsume to false and skips over the
1365             * normal code for consuming a new character.
1366             * 
1367             * To reconsume the current character in another state, the code sets
1368             * |reconsume| to true and then switches to the other state.
1369             * 
1370             * 
1371             * Emitting character tokens
1372             * 
1373             * This method emits character tokens lazily. Whenever a new range of
1374             * character tokens starts, the field cstart must be set to the start
1375             * index of the range. The flushChars() method must be called at the end
1376             * of a range to flush it.
1377             * 
1378             * 
1379             * U+0000 handling
1380             * 
1381             * The various states have to handle the replacement of U+0000 with
1382             * U+FFFD. However, if U+0000 would be reconsumed in another state, the
1383             * replacement doesn't need to happen, because it's handled by the
1384             * reconsuming state.
1385             * 
1386             * 
1387             * LF handling
1388             * 
1389             * Every state needs to increment the line number upon LF unless the LF
1390             * gets reconsumed by another state which increments the line number.
1391             * 
1392             * 
1393             * CR handling
1394             * 
1395             * Every state needs to handle CR unless the CR gets reconsumed and is
1396             * handled by the reconsuming state. The CR needs to be handled as if it
1397             * were and LF, the lastCR field must be set to true and then this
1398             * method must return. The IO driver will then swallow the next
1399             * character if it is an LF to coalesce CRLF.
1400             */
1401            stateloop: for (;;) {
1402                switch (state) {
1403                    case DATA:
1404                        dataloop: for (;;) {
1405                            if (reconsume) {
1406                                reconsume = false;
1407                            } else {
1408                                if (++pos == endPos) {
1409                                    break stateloop;
1410                                }
1411                                c = checkChar(buf, pos);
1412                            }
1413                            switch (c) {
1414                                case '&':
1415                                    /*
1416                                     * U+0026 AMPERSAND (&) Switch to the character
1417                                     * reference in data state.
1418                                     */
1419                                    flushChars(buf, pos);
1420                                    clearStrBufAndAppend(c);
1421                                    setAdditionalAndRememberAmpersandLocation('\u0000');
1422                                    returnState = state;
1423                                    state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
1424                                    continue stateloop;
1425                                case '<':
1426                                    /*
1427                                     * U+003C LESS-THAN SIGN (<) Switch to the tag
1428                                     * open state.
1429                                     */
1430                                    flushChars(buf, pos);
1431    
1432                                    state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
1433                                    break dataloop; // FALL THROUGH continue
1434                                // stateloop;
1435                                case '\u0000':
1436                                    emitReplacementCharacter(buf, pos);
1437                                    continue;
1438                                case '\r':
1439                                    emitCarriageReturn(buf, pos);
1440                                    break stateloop;
1441                                case '\n':
1442                                    silentLineFeed();
1443                                default:
1444                                    /*
1445                                     * Anything else Emit the input character as a
1446                                     * character token.
1447                                     * 
1448                                     * Stay in the data state.
1449                                     */
1450                                    continue;
1451                            }
1452                        }
1453                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
1454                    case TAG_OPEN:
1455                        tagopenloop: for (;;) {
1456                            /*
1457                             * The behavior of this state depends on the content
1458                             * model flag.
1459                             */
1460                            if (++pos == endPos) {
1461                                break stateloop;
1462                            }
1463                            c = checkChar(buf, pos);
1464                            /*
1465                             * If the content model flag is set to the PCDATA state
1466                             * Consume the next input character:
1467                             */
1468                            if (c >= 'A' && c <= 'Z') {
1469                                /*
1470                                 * U+0041 LATIN CAPITAL LETTER A through to U+005A
1471                                 * LATIN CAPITAL LETTER Z Create a new start tag
1472                                 * token,
1473                                 */
1474                                endTag = false;
1475                                /*
1476                                 * set its tag name to the lowercase version of the
1477                                 * input character (add 0x0020 to the character's
1478                                 * code point),
1479                                 */
1480                                clearStrBufAndAppend((char) (c + 0x20));
1481                                /* then switch to the tag name state. */
1482                                state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
1483                                /*
1484                                 * (Don't emit the token yet; further details will
1485                                 * be filled in before it is emitted.)
1486                                 */
1487                                break tagopenloop;
1488                                // continue stateloop;
1489                            } else if (c >= 'a' && c <= 'z') {
1490                                /*
1491                                 * U+0061 LATIN SMALL LETTER A through to U+007A
1492                                 * LATIN SMALL LETTER Z Create a new start tag
1493                                 * token,
1494                                 */
1495                                endTag = false;
1496                                /*
1497                                 * set its tag name to the input character,
1498                                 */
1499                                clearStrBufAndAppend(c);
1500                                /* then switch to the tag name state. */
1501                                state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
1502                                /*
1503                                 * (Don't emit the token yet; further details will
1504                                 * be filled in before it is emitted.)
1505                                 */
1506                                break tagopenloop;
1507                                // continue stateloop;
1508                            }
1509                            switch (c) {
1510                                case '!':
1511                                    /*
1512                                     * U+0021 EXCLAMATION MARK (!) Switch to the
1513                                     * markup declaration open state.
1514                                     */
1515                                    state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
1516                                    continue stateloop;
1517                                case '/':
1518                                    /*
1519                                     * U+002F SOLIDUS (/) Switch to the close tag
1520                                     * open state.
1521                                     */
1522                                    state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
1523                                    continue stateloop;
1524                                case '?':
1525                                    /*
1526                                     * U+003F QUESTION MARK (?) Parse error.
1527                                     */
1528                                    errProcessingInstruction();
1529                                    /*
1530                                     * Switch to the bogus comment state.
1531                                     */
1532                                    clearLongStrBufAndAppend(c);
1533                                    state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
1534                                    continue stateloop;
1535                                case '>':
1536                                    /*
1537                                     * U+003E GREATER-THAN SIGN (>) Parse error.
1538                                     */
1539                                    errLtGt();
1540                                    /*
1541                                     * Emit a U+003C LESS-THAN SIGN character token
1542                                     * and a U+003E GREATER-THAN SIGN character
1543                                     * token.
1544                                     */
1545                                    tokenHandler.characters(Tokenizer.LT_GT, 0, 2);
1546                                    /* Switch to the data state. */
1547                                    cstart = pos + 1;
1548                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
1549                                    continue stateloop;
1550                                default:
1551                                    /*
1552                                     * Anything else Parse error.
1553                                     */
1554                                    errBadCharAfterLt(c);
1555                                    /*
1556                                     * Emit a U+003C LESS-THAN SIGN character token
1557                                     */
1558                                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
1559                                    /*
1560                                     * and reconsume the current input character in
1561                                     * the data state.
1562                                     */
1563                                    cstart = pos;
1564                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
1565                                    reconsume = true;
1566                                    continue stateloop;
1567                            }
1568                        }
1569                        // FALL THROUGH DON'T REORDER
1570                    case TAG_NAME:
1571                        tagnameloop: for (;;) {
1572                            if (++pos == endPos) {
1573                                break stateloop;
1574                            }
1575                            c = checkChar(buf, pos);
1576                            /*
1577                             * Consume the next input character:
1578                             */
1579                            switch (c) {
1580                                case '\r':
1581                                    silentCarriageReturn();
1582                                    strBufToElementNameString();
1583                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
1584                                    break stateloop;
1585                                case '\n':
1586                                    silentLineFeed();
1587                                case ' ':
1588                                case '\t':
1589                                case '\u000C':
1590                                    /*
1591                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
1592                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
1593                                     * Switch to the before attribute name state.
1594                                     */
1595                                    strBufToElementNameString();
1596                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
1597                                    break tagnameloop;
1598                                // continue stateloop;
1599                                case '/':
1600                                    /*
1601                                     * U+002F SOLIDUS (/) Switch to the self-closing
1602                                     * start tag state.
1603                                     */
1604                                    strBufToElementNameString();
1605                                    state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
1606                                    continue stateloop;
1607                                case '>':
1608                                    /*
1609                                     * U+003E GREATER-THAN SIGN (>) Emit the current
1610                                     * tag token.
1611                                     */
1612                                    strBufToElementNameString();
1613                                    state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
1614                                    if (shouldSuspend) {
1615                                        break stateloop;
1616                                    }
1617                                    /*
1618                                     * Switch to the data state.
1619                                     */
1620                                    continue stateloop;
1621                                case '\u0000':
1622                                    c = '\uFFFD';
1623                                    // fall thru
1624                                default:
1625                                    if (c >= 'A' && c <= 'Z') {
1626                                        /*
1627                                         * U+0041 LATIN CAPITAL LETTER A through to
1628                                         * U+005A LATIN CAPITAL LETTER Z Append the
1629                                         * lowercase version of the current input
1630                                         * character (add 0x0020 to the character's
1631                                         * code point) to the current tag token's
1632                                         * tag name.
1633                                         */
1634                                        c += 0x20;
1635                                    }
1636                                    /*
1637                                     * Anything else Append the current input
1638                                     * character to the current tag token's tag
1639                                     * name.
1640                                     */
1641                                    appendStrBuf(c);
1642                                    /*
1643                                     * Stay in the tag name state.
1644                                     */
1645                                    continue;
1646                            }
1647                        }
1648                        // FALLTHRU DON'T REORDER
1649                    case BEFORE_ATTRIBUTE_NAME:
1650                        beforeattributenameloop: for (;;) {
1651                            if (reconsume) {
1652                                reconsume = false;
1653                            } else {
1654                                if (++pos == endPos) {
1655                                    break stateloop;
1656                                }
1657                                c = checkChar(buf, pos);
1658                            }
1659                            /*
1660                             * Consume the next input character:
1661                             */
1662                            switch (c) {
1663                                case '\r':
1664                                    silentCarriageReturn();
1665                                    break stateloop;
1666                                case '\n':
1667                                    silentLineFeed();
1668                                    // fall thru
1669                                case ' ':
1670                                case '\t':
1671                                case '\u000C':
1672                                    /*
1673                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
1674                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
1675                                     * in the before attribute name state.
1676                                     */
1677                                    continue;
1678                                case '/':
1679                                    /*
1680                                     * U+002F SOLIDUS (/) Switch to the self-closing
1681                                     * start tag state.
1682                                     */
1683                                    state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
1684                                    continue stateloop;
1685                                case '>':
1686                                    /*
1687                                     * U+003E GREATER-THAN SIGN (>) Emit the current
1688                                     * tag token.
1689                                     */
1690                                    state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
1691                                    if (shouldSuspend) {
1692                                        break stateloop;
1693                                    }
1694                                    /*
1695                                     * Switch to the data state.
1696                                     */
1697                                    continue stateloop;
1698                                case '\u0000':
1699                                    c = '\uFFFD';
1700                                    // fall thru
1701                                case '\"':
1702                                case '\'':
1703                                case '<':
1704                                case '=':
1705                                    /*
1706                                     * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
1707                                     * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
1708                                     * SIGN (=) Parse error.
1709                                     */
1710                                    errBadCharBeforeAttributeNameOrNull(c);
1711                                    /*
1712                                     * Treat it as per the "anything else" entry
1713                                     * below.
1714                                     */
1715                                default:
1716                                    /*
1717                                     * Anything else Start a new attribute in the
1718                                     * current tag token.
1719                                     */
1720                                    if (c >= 'A' && c <= 'Z') {
1721                                        /*
1722                                         * U+0041 LATIN CAPITAL LETTER A through to
1723                                         * U+005A LATIN CAPITAL LETTER Z Set that
1724                                         * attribute's name to the lowercase version
1725                                         * of the current input character (add
1726                                         * 0x0020 to the character's code point)
1727                                         */
1728                                        c += 0x20;
1729                                    }
1730                                    /*
1731                                     * Set that attribute's name to the current
1732                                     * input character,
1733                                     */
1734                                    clearStrBufAndAppend(c);
1735                                    /*
1736                                     * and its value to the empty string.
1737                                     */
1738                                    // Will do later.
1739                                    /*
1740                                     * Switch to the attribute name state.
1741                                     */
1742                                    state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
1743                                    break beforeattributenameloop;
1744                                // continue stateloop;
1745                            }
1746                        }
1747                        // FALLTHRU DON'T REORDER
1748                    case ATTRIBUTE_NAME:
1749                        attributenameloop: for (;;) {
1750                            if (++pos == endPos) {
1751                                break stateloop;
1752                            }
1753                            c = checkChar(buf, pos);
1754                            /*
1755                             * Consume the next input character:
1756                             */
1757                            switch (c) {
1758                                case '\r':
1759                                    silentCarriageReturn();
1760                                    attributeNameComplete();
1761                                    state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
1762                                    break stateloop;
1763                                case '\n':
1764                                    silentLineFeed();
1765                                    // fall thru
1766                                case ' ':
1767                                case '\t':
1768                                case '\u000C':
1769                                    /*
1770                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
1771                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
1772                                     * Switch to the after attribute name state.
1773                                     */
1774                                    attributeNameComplete();
1775                                    state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
1776                                    continue stateloop;
1777                                case '/':
1778                                    /*
1779                                     * U+002F SOLIDUS (/) Switch to the self-closing
1780                                     * start tag state.
1781                                     */
1782                                    attributeNameComplete();
1783                                    addAttributeWithoutValue();
1784                                    state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
1785                                    continue stateloop;
1786                                case '=':
1787                                    /*
1788                                     * U+003D EQUALS SIGN (=) Switch to the before
1789                                     * attribute value state.
1790                                     */
1791                                    attributeNameComplete();
1792                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
1793                                    break attributenameloop;
1794                                // continue stateloop;
1795                                case '>':
1796                                    /*
1797                                     * U+003E GREATER-THAN SIGN (>) Emit the current
1798                                     * tag token.
1799                                     */
1800                                    attributeNameComplete();
1801                                    addAttributeWithoutValue();
1802                                    state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
1803                                    if (shouldSuspend) {
1804                                        break stateloop;
1805                                    }
1806                                    /*
1807                                     * Switch to the data state.
1808                                     */
1809                                    continue stateloop;
1810                                case '\u0000':
1811                                    c = '\uFFFD';
1812                                    // fall thru
1813                                case '\"':
1814                                case '\'':
1815                                case '<':
1816                                    /*
1817                                     * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
1818                                     * (') U+003C LESS-THAN SIGN (<) Parse error.
1819                                     */
1820                                    errQuoteOrLtInAttributeNameOrNull(c);
1821                                    /*
1822                                     * Treat it as per the "anything else" entry
1823                                     * below.
1824                                     */
1825                                default:
1826                                    if (c >= 'A' && c <= 'Z') {
1827                                        /*
1828                                         * U+0041 LATIN CAPITAL LETTER A through to
1829                                         * U+005A LATIN CAPITAL LETTER Z Append the
1830                                         * lowercase version of the current input
1831                                         * character (add 0x0020 to the character's
1832                                         * code point) to the current attribute's
1833                                         * name.
1834                                         */
1835                                        c += 0x20;
1836                                    }
1837                                    /*
1838                                     * Anything else Append the current input
1839                                     * character to the current attribute's name.
1840                                     */
1841                                    appendStrBuf(c);
1842                                    /*
1843                                     * Stay in the attribute name state.
1844                                     */
1845                                    continue;
1846                            }
1847                        }
1848                        // FALLTHRU DON'T REORDER
1849                    case BEFORE_ATTRIBUTE_VALUE:
1850                        beforeattributevalueloop: for (;;) {
1851                            if (++pos == endPos) {
1852                                break stateloop;
1853                            }
1854                            c = checkChar(buf, pos);
1855                            /*
1856                             * Consume the next input character:
1857                             */
1858                            switch (c) {
1859                                case '\r':
1860                                    silentCarriageReturn();
1861                                    break stateloop;
1862                                case '\n':
1863                                    silentLineFeed();
1864                                    // fall thru
1865                                case ' ':
1866                                case '\t':
1867                                case '\u000C':
1868                                    /*
1869                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
1870                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
1871                                     * in the before attribute value state.
1872                                     */
1873                                    continue;
1874                                case '"':
1875                                    /*
1876                                     * U+0022 QUOTATION MARK (") Switch to the
1877                                     * attribute value (double-quoted) state.
1878                                     */
1879                                    clearLongStrBuf();
1880                                    state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
1881                                    break beforeattributevalueloop;
1882                                // continue stateloop;
1883                                case '&':
1884                                    /*
1885                                     * U+0026 AMPERSAND (&) Switch to the attribute
1886                                     * value (unquoted) state and reconsume this
1887                                     * input character.
1888                                     */
1889                                    clearLongStrBuf();
1890                                    state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
1891                                    noteUnquotedAttributeValue();
1892                                    reconsume = true;
1893                                    continue stateloop;
1894                                case '\'':
1895                                    /*
1896                                     * U+0027 APOSTROPHE (') Switch to the attribute
1897                                     * value (single-quoted) state.
1898                                     */
1899                                    clearLongStrBuf();
1900                                    state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
1901                                    continue stateloop;
1902                                case '>':
1903                                    /*
1904                                     * U+003E GREATER-THAN SIGN (>) Parse error.
1905                                     */
1906                                    errAttributeValueMissing();
1907                                    /*
1908                                     * Emit the current tag token.
1909                                     */
1910                                    addAttributeWithoutValue();
1911                                    state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
1912                                    if (shouldSuspend) {
1913                                        break stateloop;
1914                                    }
1915                                    /*
1916                                     * Switch to the data state.
1917                                     */
1918                                    continue stateloop;
1919                                case '\u0000':
1920                                    c = '\uFFFD';
1921                                    // fall thru
1922                                case '<':
1923                                case '=':
1924                                case '`':
1925                                    /*
1926                                     * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
1927                                     * (=) U+0060 GRAVE ACCENT (`)
1928                                     */
1929                                    errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
1930                                    /*
1931                                     * Treat it as per the "anything else" entry
1932                                     * below.
1933                                     */
1934                                default:
1935                                    // [NOCPP[
1936                                    errHtml4NonNameInUnquotedAttribute(c);
1937                                    // ]NOCPP]
1938                                    /*
1939                                     * Anything else Append the current input
1940                                     * character to the current attribute's value.
1941                                     */
1942                                    clearLongStrBufAndAppend(c);
1943                                    /*
1944                                     * Switch to the attribute value (unquoted)
1945                                     * state.
1946                                     */
1947    
1948                                    state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
1949                                    noteUnquotedAttributeValue();
1950                                    continue stateloop;
1951                            }
1952                        }
1953                        // FALLTHRU DON'T REORDER
1954                    case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
1955                        attributevaluedoublequotedloop: for (;;) {
1956                            if (reconsume) {
1957                                reconsume = false;
1958                            } else {
1959                                if (++pos == endPos) {
1960                                    break stateloop;
1961                                }
1962                                c = checkChar(buf, pos);
1963                            }
1964                            /*
1965                             * Consume the next input character:
1966                             */
1967                            switch (c) {
1968                                case '"':
1969                                    /*
1970                                     * U+0022 QUOTATION MARK (") Switch to the after
1971                                     * attribute value (quoted) state.
1972                                     */
1973                                    addAttributeWithValue();
1974    
1975                                    state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
1976                                    break attributevaluedoublequotedloop;
1977                                // continue stateloop;
1978                                case '&':
1979                                    /*
1980                                     * U+0026 AMPERSAND (&) Switch to the character
1981                                     * reference in attribute value state, with the
1982                                     * additional allowed character being U+0022
1983                                     * QUOTATION MARK (").
1984                                     */
1985                                    clearStrBufAndAppend(c);
1986                                    setAdditionalAndRememberAmpersandLocation('\"');
1987                                    returnState = state;
1988                                    state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
1989                                    continue stateloop;
1990                                case '\r':
1991                                    appendLongStrBufCarriageReturn();
1992                                    break stateloop;
1993                                case '\n':
1994                                    appendLongStrBufLineFeed();
1995                                    continue;
1996                                case '\u0000':
1997                                    c = '\uFFFD';
1998                                    // fall thru
1999                                default:
2000                                    /*
2001                                     * Anything else Append the current input
2002                                     * character to the current attribute's value.
2003                                     */
2004                                    appendLongStrBuf(c);
2005                                    /*
2006                                     * Stay in the attribute value (double-quoted)
2007                                     * state.
2008                                     */
2009                                    continue;
2010                            }
2011                        }
2012                        // FALLTHRU DON'T REORDER
2013                    case AFTER_ATTRIBUTE_VALUE_QUOTED:
2014                        afterattributevaluequotedloop: for (;;) {
2015                            if (++pos == endPos) {
2016                                break stateloop;
2017                            }
2018                            c = checkChar(buf, pos);
2019                            /*
2020                             * Consume the next input character:
2021                             */
2022                            switch (c) {
2023                                case '\r':
2024                                    silentCarriageReturn();
2025                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
2026                                    break stateloop;
2027                                case '\n':
2028                                    silentLineFeed();
2029                                    // fall thru
2030                                case ' ':
2031                                case '\t':
2032                                case '\u000C':
2033                                    /*
2034                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
2035                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
2036                                     * Switch to the before attribute name state.
2037                                     */
2038                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
2039                                    continue stateloop;
2040                                case '/':
2041                                    /*
2042                                     * U+002F SOLIDUS (/) Switch to the self-closing
2043                                     * start tag state.
2044                                     */
2045                                    state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
2046                                    break afterattributevaluequotedloop;
2047                                // continue stateloop;
2048                                case '>':
2049                                    /*
2050                                     * U+003E GREATER-THAN SIGN (>) Emit the current
2051                                     * tag token.
2052                                     */
2053                                    state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
2054                                    if (shouldSuspend) {
2055                                        break stateloop;
2056                                    }
2057                                    /*
2058                                     * Switch to the data state.
2059                                     */
2060                                    continue stateloop;
2061                                default:
2062                                    /*
2063                                     * Anything else Parse error.
2064                                     */
2065                                    errNoSpaceBetweenAttributes();
2066                                    /*
2067                                     * Reconsume the character in the before
2068                                     * attribute name state.
2069                                     */
2070                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
2071                                    reconsume = true;
2072                                    continue stateloop;
2073                            }
2074                        }
2075                        // FALLTHRU DON'T REORDER
2076                    case SELF_CLOSING_START_TAG:
2077                        if (++pos == endPos) {
2078                            break stateloop;
2079                        }
2080                        c = checkChar(buf, pos);
2081                        /*
2082                         * Consume the next input character:
2083                         */
2084                        switch (c) {
2085                            case '>':
2086                                /*
2087                                 * U+003E GREATER-THAN SIGN (>) Set the self-closing
2088                                 * flag of the current tag token. Emit the current
2089                                 * tag token.
2090                                 */
2091                                // [NOCPP[
2092                                errHtml4XmlVoidSyntax();
2093                                // ]NOCPP]
2094                                state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos);
2095                                if (shouldSuspend) {
2096                                    break stateloop;
2097                                }
2098                                /*
2099                                 * Switch to the data state.
2100                                 */
2101                                continue stateloop;
2102                            default:
2103                                /* Anything else Parse error. */
2104                                errSlashNotFollowedByGt();
2105                                /*
2106                                 * Reconsume the character in the before attribute
2107                                 * name state.
2108                                 */
2109                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
2110                                reconsume = true;
2111                                continue stateloop;
2112                        }
2113                        // XXX reorder point
2114                    case ATTRIBUTE_VALUE_UNQUOTED:
2115                        for (;;) {
2116                            if (reconsume) {
2117                                reconsume = false;
2118                            } else {
2119                                if (++pos == endPos) {
2120                                    break stateloop;
2121                                }
2122                                c = checkChar(buf, pos);
2123                            }
2124                            /*
2125                             * Consume the next input character:
2126                             */
2127                            switch (c) {
2128                                case '\r':
2129                                    silentCarriageReturn();
2130                                    addAttributeWithValue();
2131                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
2132                                    break stateloop;
2133                                case '\n':
2134                                    silentLineFeed();
2135                                    // fall thru
2136                                case ' ':
2137                                case '\t':
2138                                case '\u000C':
2139                                    /*
2140                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
2141                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
2142                                     * Switch to the before attribute name state.
2143                                     */
2144                                    addAttributeWithValue();
2145                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
2146                                    continue stateloop;
2147                                case '&':
2148                                    /*
2149                                     * U+0026 AMPERSAND (&) Switch to the character
2150                                     * reference in attribute value state, with the
2151                                     * additional allowed character being U+003E
2152                                     * GREATER-THAN SIGN (>)
2153                                     */
2154                                    clearStrBufAndAppend(c);
2155                                    setAdditionalAndRememberAmpersandLocation('>');
2156                                    returnState = state;
2157                                    state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
2158                                    continue stateloop;
2159                                case '>':
2160                                    /*
2161                                     * U+003E GREATER-THAN SIGN (>) Emit the current
2162                                     * tag token.
2163                                     */
2164                                    addAttributeWithValue();
2165                                    state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
2166                                    if (shouldSuspend) {
2167                                        break stateloop;
2168                                    }
2169                                    /*
2170                                     * Switch to the data state.
2171                                     */
2172                                    continue stateloop;
2173                                case '\u0000':
2174                                    c = '\uFFFD';
2175                                    // fall thru
2176                                case '<':
2177                                case '\"':
2178                                case '\'':
2179                                case '=':
2180                                case '`':
2181                                    /*
2182                                     * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
2183                                     * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
2184                                     * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
2185                                     */
2186                                    errUnquotedAttributeValOrNull(c);
2187                                    /*
2188                                     * Treat it as per the "anything else" entry
2189                                     * below.
2190                                     */
2191                                    // fall through
2192                                default:
2193                                    // [NOCPP]
2194                                    errHtml4NonNameInUnquotedAttribute(c);
2195                                    // ]NOCPP]
2196                                    /*
2197                                     * Anything else Append the current input
2198                                     * character to the current attribute's value.
2199                                     */
2200                                    appendLongStrBuf(c);
2201                                    /*
2202                                     * Stay in the attribute value (unquoted) state.
2203                                     */
2204                                    continue;
2205                            }
2206                        }
2207                        // XXX reorder point
2208                    case AFTER_ATTRIBUTE_NAME:
2209                        for (;;) {
2210                            if (++pos == endPos) {
2211                                break stateloop;
2212                            }
2213                            c = checkChar(buf, pos);
2214                            /*
2215                             * Consume the next input character:
2216                             */
2217                            switch (c) {
2218                                case '\r':
2219                                    silentCarriageReturn();
2220                                    break stateloop;
2221                                case '\n':
2222                                    silentLineFeed();
2223                                    // fall thru
2224                                case ' ':
2225                                case '\t':
2226                                case '\u000C':
2227                                    /*
2228                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
2229                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
2230                                     * in the after attribute name state.
2231                                     */
2232                                    continue;
2233                                case '/':
2234                                    /*
2235                                     * U+002F SOLIDUS (/) Switch to the self-closing
2236                                     * start tag state.
2237                                     */
2238                                    addAttributeWithoutValue();
2239                                    state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
2240                                    continue stateloop;
2241                                case '=':
2242                                    /*
2243                                     * U+003D EQUALS SIGN (=) Switch to the before
2244                                     * attribute value state.
2245                                     */
2246                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
2247                                    continue stateloop;
2248                                case '>':
2249                                    /*
2250                                     * U+003E GREATER-THAN SIGN (>) Emit the current
2251                                     * tag token.
2252                                     */
2253                                    addAttributeWithoutValue();
2254                                    state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
2255                                    if (shouldSuspend) {
2256                                        break stateloop;
2257                                    }
2258                                    /*
2259                                     * Switch to the data state.
2260                                     */
2261                                    continue stateloop;
2262                                case '\u0000':
2263                                    c = '\uFFFD';
2264                                    // fall thru
2265                                case '\"':
2266                                case '\'':
2267                                case '<':
2268                                    errQuoteOrLtInAttributeNameOrNull(c);
2269                                    /*
2270                                     * Treat it as per the "anything else" entry
2271                                     * below.
2272                                     */
2273                                default:
2274                                    addAttributeWithoutValue();
2275                                    /*
2276                                     * Anything else Start a new attribute in the
2277                                     * current tag token.
2278                                     */
2279                                    if (c >= 'A' && c <= 'Z') {
2280                                        /*
2281                                         * U+0041 LATIN CAPITAL LETTER A through to
2282                                         * U+005A LATIN CAPITAL LETTER Z Set that
2283                                         * attribute's name to the lowercase version
2284                                         * of the current input character (add
2285                                         * 0x0020 to the character's code point)
2286                                         */
2287                                        c += 0x20;
2288                                    }
2289                                    /*
2290                                     * Set that attribute's name to the current
2291                                     * input character,
2292                                     */
2293                                    clearStrBufAndAppend(c);
2294                                    /*
2295                                     * and its value to the empty string.
2296                                     */
2297                                    // Will do later.
2298                                    /*
2299                                     * Switch to the attribute name state.
2300                                     */
2301                                    state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
2302                                    continue stateloop;
2303                            }
2304                        }
2305                        // XXX reorder point
2306                    case MARKUP_DECLARATION_OPEN:
2307                        markupdeclarationopenloop: for (;;) {
2308                            if (++pos == endPos) {
2309                                break stateloop;
2310                            }
2311                            c = checkChar(buf, pos);
2312                            /*
2313                             * If the next two characters are both U+002D
2314                             * HYPHEN-MINUS characters (-), consume those two
2315                             * characters, create a comment token whose data is the
2316                             * empty string, and switch to the comment start state.
2317                             * 
2318                             * Otherwise, if the next seven characters are an ASCII
2319                             * case-insensitive match for the word "DOCTYPE", then
2320                             * consume those characters and switch to the DOCTYPE
2321                             * state.
2322                             * 
2323                             * Otherwise, if the insertion mode is
2324                             * "in foreign content" and the current node is not an
2325                             * element in the HTML namespace and the next seven
2326                             * characters are an case-sensitive match for the string
2327                             * "[CDATA[" (the five uppercase letters "CDATA" with a
2328                             * U+005B LEFT SQUARE BRACKET character before and
2329                             * after), then consume those characters and switch to
2330                             * the CDATA section state.
2331                             * 
2332                             * Otherwise, is is a parse error. Switch to the bogus
2333                             * comment state. The next character that is consumed,
2334                             * if any, is the first character that will be in the
2335                             * comment.
2336                             */
2337                            switch (c) {
2338                                case '-':
2339                                    clearLongStrBufAndAppend(c);
2340                                    state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
2341                                    break markupdeclarationopenloop;
2342                                // continue stateloop;
2343                                case 'd':
2344                                case 'D':
2345                                    clearLongStrBufAndAppend(c);
2346                                    index = 0;
2347                                    state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
2348                                    continue stateloop;
2349                                case '[':
2350                                    if (tokenHandler.cdataSectionAllowed()) {
2351                                        clearLongStrBufAndAppend(c);
2352                                        index = 0;
2353                                        state = transition(state, Tokenizer.CDATA_START, reconsume, pos);
2354                                        continue stateloop;
2355                                    }
2356                                    // else fall through
2357                                default:
2358                                    errBogusComment();
2359                                    clearLongStrBuf();
2360                                    state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
2361                                    reconsume = true;
2362                                    continue stateloop;
2363                            }
2364                        }
2365                        // FALLTHRU DON'T REORDER
2366                    case MARKUP_DECLARATION_HYPHEN:
2367                        markupdeclarationhyphenloop: for (;;) {
2368                            if (++pos == endPos) {
2369                                break stateloop;
2370                            }
2371                            c = checkChar(buf, pos);
2372                            switch (c) {
2373                                case '\u0000':
2374                                    break stateloop;
2375                                case '-':
2376                                    clearLongStrBuf();
2377                                    state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
2378                                    break markupdeclarationhyphenloop;
2379                                // continue stateloop;
2380                                default:
2381                                    errBogusComment();
2382                                    state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
2383                                    reconsume = true;
2384                                    continue stateloop;
2385                            }
2386                        }
2387                        // FALLTHRU DON'T REORDER
2388                    case COMMENT_START:
2389                        commentstartloop: for (;;) {
2390                            if (++pos == endPos) {
2391                                break stateloop;
2392                            }
2393                            c = checkChar(buf, pos);
2394                            /*
2395                             * Comment start state
2396                             * 
2397                             * 
2398                             * Consume the next input character:
2399                             */
2400                            switch (c) {
2401                                case '-':
2402                                    /*
2403                                     * U+002D HYPHEN-MINUS (-) Switch to the comment
2404                                     * start dash state.
2405                                     */
2406                                    appendLongStrBuf(c);
2407                                    state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
2408                                    continue stateloop;
2409                                case '>':
2410                                    /*
2411                                     * U+003E GREATER-THAN SIGN (>) Parse error.
2412                                     */
2413                                    errPrematureEndOfComment();
2414                                    /* Emit the comment token. */
2415                                    emitComment(0, pos);
2416                                    /*
2417                                     * Switch to the data state.
2418                                     */
2419                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
2420                                    continue stateloop;
2421                                case '\r':
2422                                    appendLongStrBufCarriageReturn();
2423                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2424                                    break stateloop;
2425                                case '\n':
2426                                    appendLongStrBufLineFeed();
2427                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2428                                    break commentstartloop;
2429                                case '\u0000':
2430                                    c = '\uFFFD';
2431                                    // fall thru
2432                                default:
2433                                    /*
2434                                     * Anything else Append the input character to
2435                                     * the comment token's data.
2436                                     */
2437                                    appendLongStrBuf(c);
2438                                    /*
2439                                     * Switch to the comment state.
2440                                     */
2441                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2442                                    break commentstartloop;
2443                                // continue stateloop;
2444                            }
2445                        }
2446                        // FALLTHRU DON'T REORDER
2447                    case COMMENT:
2448                        commentloop: for (;;) {
2449                            if (++pos == endPos) {
2450                                break stateloop;
2451                            }
2452                            c = checkChar(buf, pos);
2453                            /*
2454                             * Comment state Consume the next input character:
2455                             */
2456                            switch (c) {
2457                                case '-':
2458                                    /*
2459                                     * U+002D HYPHEN-MINUS (-) Switch to the comment
2460                                     * end dash state
2461                                     */
2462                                    appendLongStrBuf(c);
2463                                    state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
2464                                    break commentloop;
2465                                // continue stateloop;
2466                                case '\r':
2467                                    appendLongStrBufCarriageReturn();
2468                                    break stateloop;
2469                                case '\n':
2470                                    appendLongStrBufLineFeed();
2471                                    continue;
2472                                case '\u0000':
2473                                    c = '\uFFFD';
2474                                    // fall thru
2475                                default:
2476                                    /*
2477                                     * Anything else Append the input character to
2478                                     * the comment token's data.
2479                                     */
2480                                    appendLongStrBuf(c);
2481                                    /*
2482                                     * Stay in the comment state.
2483                                     */
2484                                    continue;
2485                            }
2486                        }
2487                        // FALLTHRU DON'T REORDER
2488                    case COMMENT_END_DASH:
2489                        commentenddashloop: for (;;) {
2490                            if (++pos == endPos) {
2491                                break stateloop;
2492                            }
2493                            c = checkChar(buf, pos);
2494                            /*
2495                             * Comment end dash state Consume the next input
2496                             * character:
2497                             */
2498                            switch (c) {
2499                                case '-':
2500                                    /*
2501                                     * U+002D HYPHEN-MINUS (-) Switch to the comment
2502                                     * end state
2503                                     */
2504                                    appendLongStrBuf(c);
2505                                    state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
2506                                    break commentenddashloop;
2507                                // continue stateloop;
2508                                case '\r':
2509                                    appendLongStrBufCarriageReturn();
2510                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2511                                    break stateloop;
2512                                case '\n':
2513                                    appendLongStrBufLineFeed();
2514                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2515                                    continue stateloop;
2516                                case '\u0000':
2517                                    c = '\uFFFD';
2518                                    // fall thru
2519                                default:
2520                                    /*
2521                                     * Anything else Append a U+002D HYPHEN-MINUS
2522                                     * (-) character and the input character to the
2523                                     * comment token's data.
2524                                     */
2525                                    appendLongStrBuf(c);
2526                                    /*
2527                                     * Switch to the comment state.
2528                                     */
2529                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2530                                    continue stateloop;
2531                            }
2532                        }
2533                        // FALLTHRU DON'T REORDER
2534                    case COMMENT_END:
2535                        commentendloop: for (;;) {
2536                            if (++pos == endPos) {
2537                                break stateloop;
2538                            }
2539                            c = checkChar(buf, pos);
2540                            /*
2541                             * Comment end dash state Consume the next input
2542                             * character:
2543                             */
2544                            switch (c) {
2545                                case '>':
2546                                    /*
2547                                     * U+003E GREATER-THAN SIGN (>) Emit the comment
2548                                     * token.
2549                                     */
2550                                    emitComment(2, pos);
2551                                    /*
2552                                     * Switch to the data state.
2553                                     */
2554                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
2555                                    continue stateloop;
2556                                case '-':
2557                                    /* U+002D HYPHEN-MINUS (-) Parse error. */
2558                                    /*
2559                                     * Append a U+002D HYPHEN-MINUS (-) character to
2560                                     * the comment token's data.
2561                                     */
2562                                    adjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
2563                                    /*
2564                                     * Stay in the comment end state.
2565                                     */
2566                                    continue;
2567                                case '\r':
2568                                    adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn();
2569                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2570                                    break stateloop;
2571                                case '\n':
2572                                    adjustDoubleHyphenAndAppendToLongStrBufLineFeed();
2573                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2574                                    continue stateloop;
2575                                case '!':
2576                                    errHyphenHyphenBang();
2577                                    appendLongStrBuf(c);
2578                                    state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
2579                                    continue stateloop;
2580                                case '\u0000':
2581                                    c = '\uFFFD';
2582                                    // fall thru
2583                                default:
2584                                    /*
2585                                     * Append two U+002D HYPHEN-MINUS (-) characters
2586                                     * and the input character to the comment
2587                                     * token's data.
2588                                     */
2589                                    adjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
2590                                    /*
2591                                     * Switch to the comment state.
2592                                     */
2593                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2594                                    continue stateloop;
2595                            }
2596                        }
2597                        // XXX reorder point
2598                    case COMMENT_END_BANG:
2599                        for (;;) {
2600                            if (++pos == endPos) {
2601                                break stateloop;
2602                            }
2603                            c = checkChar(buf, pos);
2604                            /*
2605                             * Comment end bang state
2606                             * 
2607                             * Consume the next input character:
2608                             */
2609                            switch (c) {
2610                                case '>':
2611                                    /*
2612                                     * U+003E GREATER-THAN SIGN (>) Emit the comment
2613                                     * token.
2614                                     */
2615                                    emitComment(3, pos);
2616                                    /*
2617                                     * Switch to the data state.
2618                                     */
2619                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
2620                                    continue stateloop;
2621                                case '-':
2622                                    /*
2623                                     * Append two U+002D HYPHEN-MINUS (-) characters
2624                                     * and a U+0021 EXCLAMATION MARK (!) character
2625                                     * to the comment token's data.
2626                                     */
2627                                    appendLongStrBuf(c);
2628                                    /*
2629                                     * Switch to the comment end dash state.
2630                                     */
2631                                    state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
2632                                    continue stateloop;
2633                                case '\r':
2634                                    appendLongStrBufCarriageReturn();
2635                                    break stateloop;
2636                                case '\n':
2637                                    appendLongStrBufLineFeed();
2638                                    continue;
2639                                case '\u0000':
2640                                    c = '\uFFFD';
2641                                    // fall thru
2642                                default:
2643                                    /*
2644                                     * Anything else Append two U+002D HYPHEN-MINUS
2645                                     * (-) characters, a U+0021 EXCLAMATION MARK (!)
2646                                     * character, and the input character to the
2647                                     * comment token's data. Switch to the comment
2648                                     * state.
2649                                     */
2650                                    appendLongStrBuf(c);
2651                                    /*
2652                                     * Switch to the comment state.
2653                                     */
2654                                    state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2655                                    continue stateloop;
2656                            }
2657                        }
2658                        // XXX reorder point
2659                    case COMMENT_START_DASH:
2660                        if (++pos == endPos) {
2661                            break stateloop;
2662                        }
2663                        c = checkChar(buf, pos);
2664                        /*
2665                         * Comment start dash state
2666                         * 
2667                         * Consume the next input character:
2668                         */
2669                        switch (c) {
2670                            case '-':
2671                                /*
2672                                 * U+002D HYPHEN-MINUS (-) Switch to the comment end
2673                                 * state
2674                                 */
2675                                appendLongStrBuf(c);
2676                                state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
2677                                continue stateloop;
2678                            case '>':
2679                                errPrematureEndOfComment();
2680                                /* Emit the comment token. */
2681                                emitComment(1, pos);
2682                                /*
2683                                 * Switch to the data state.
2684                                 */
2685                                state = transition(state, Tokenizer.DATA, reconsume, pos);
2686                                continue stateloop;
2687                            case '\r':
2688                                appendLongStrBufCarriageReturn();
2689                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2690                                break stateloop;
2691                            case '\n':
2692                                appendLongStrBufLineFeed();
2693                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2694                                continue stateloop;
2695                            case '\u0000':
2696                                c = '\uFFFD';
2697                                // fall thru
2698                            default:
2699                                /*
2700                                 * Append a U+002D HYPHEN-MINUS character (-) and
2701                                 * the current input character to the comment
2702                                 * token's data.
2703                                 */
2704                                appendLongStrBuf(c);
2705                                /*
2706                                 * Switch to the comment state.
2707                                 */
2708                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2709                                continue stateloop;
2710                        }
2711                        // XXX reorder point
2712                    case CDATA_START:
2713                        for (;;) {
2714                            if (++pos == endPos) {
2715                                break stateloop;
2716                            }
2717                            c = checkChar(buf, pos);
2718                            if (index < 6) { // CDATA_LSQB.length
2719                                if (c == Tokenizer.CDATA_LSQB[index]) {
2720                                    appendLongStrBuf(c);
2721                                } else {
2722                                    errBogusComment();
2723                                    state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
2724                                    reconsume = true;
2725                                    continue stateloop;
2726                                }
2727                                index++;
2728                                continue;
2729                            } else {
2730                                cstart = pos; // start coalescing
2731                                state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
2732                                reconsume = true;
2733                                break; // FALL THROUGH continue stateloop;
2734                            }
2735                        }
2736                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
2737                    case CDATA_SECTION:
2738                        cdatasectionloop: for (;;) {
2739                            if (reconsume) {
2740                                reconsume = false;
2741                            } else {
2742                                if (++pos == endPos) {
2743                                    break stateloop;
2744                                }
2745                                c = checkChar(buf, pos);
2746                            }
2747                            switch (c) {
2748                                case ']':
2749                                    flushChars(buf, pos);
2750                                    state = transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
2751                                    break cdatasectionloop; // FALL THROUGH
2752                                case '\u0000':
2753                                    emitReplacementCharacter(buf, pos);
2754                                    continue;
2755                                case '\r':
2756                                    emitCarriageReturn(buf, pos);
2757                                    break stateloop;
2758                                case '\n':
2759                                    silentLineFeed();
2760                                    // fall thru
2761                                default:
2762                                    continue;
2763                            }
2764                        }
2765                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
2766                    case CDATA_RSQB:
2767                        cdatarsqb: for (;;) {
2768                            if (++pos == endPos) {
2769                                break stateloop;
2770                            }
2771                            c = checkChar(buf, pos);
2772                            switch (c) {
2773                                case ']':
2774                                    state = transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
2775                                    break cdatarsqb;
2776                                default:
2777                                    tokenHandler.characters(Tokenizer.RSQB_RSQB, 0,
2778                                            1);
2779                                    cstart = pos;
2780                                    state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
2781                                    reconsume = true;
2782                                    continue stateloop;
2783                            }
2784                        }
2785                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
2786                    case CDATA_RSQB_RSQB:
2787                        if (++pos == endPos) {
2788                            break stateloop;
2789                        }
2790                        c = checkChar(buf, pos);
2791                        switch (c) {
2792                            case '>':
2793                                cstart = pos + 1;
2794                                state = transition(state, Tokenizer.DATA, reconsume, pos);
2795                                continue stateloop;
2796                            default:
2797                                tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
2798                                cstart = pos;
2799                                state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
2800                                reconsume = true;
2801                                continue stateloop;
2802    
2803                        }
2804                        // XXX reorder point
2805                    case ATTRIBUTE_VALUE_SINGLE_QUOTED:
2806                        attributevaluesinglequotedloop: for (;;) {
2807                            if (reconsume) {
2808                                reconsume = false;
2809                            } else {
2810                                if (++pos == endPos) {
2811                                    break stateloop;
2812                                }
2813                                c = checkChar(buf, pos);
2814                            }
2815                            /*
2816                             * Consume the next input character:
2817                             */
2818                            switch (c) {
2819                                case '\'':
2820                                    /*
2821                                     * U+0027 APOSTROPHE (') Switch to the after
2822                                     * attribute value (quoted) state.
2823                                     */
2824                                    addAttributeWithValue();
2825    
2826                                    state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
2827                                    continue stateloop;
2828                                case '&':
2829                                    /*
2830                                     * U+0026 AMPERSAND (&) Switch to the character
2831                                     * reference in attribute value state, with the
2832                                     * + additional allowed character being U+0027
2833                                     * APOSTROPHE (').
2834                                     */
2835                                    clearStrBufAndAppend(c);
2836                                    setAdditionalAndRememberAmpersandLocation('\'');
2837                                    returnState = state;
2838                                    state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
2839                                    break attributevaluesinglequotedloop;
2840                                // continue stateloop;
2841                                case '\r':
2842                                    appendLongStrBufCarriageReturn();
2843                                    break stateloop;
2844                                case '\n':
2845                                    appendLongStrBufLineFeed();
2846                                    continue;
2847                                case '\u0000':
2848                                    c = '\uFFFD';
2849                                    // fall thru
2850                                default:
2851                                    /*
2852                                     * Anything else Append the current input
2853                                     * character to the current attribute's value.
2854                                     */
2855                                    appendLongStrBuf(c);
2856                                    /*
2857                                     * Stay in the attribute value (double-quoted)
2858                                     * state.
2859                                     */
2860                                    continue;
2861                            }
2862                        }
2863                        // FALLTHRU DON'T REORDER
2864                    case CONSUME_CHARACTER_REFERENCE:
2865                        if (++pos == endPos) {
2866                            break stateloop;
2867                        }
2868                        c = checkChar(buf, pos);
2869                        if (c == '\u0000') {
2870                            break stateloop;
2871                        }
2872                        /*
2873                         * Unlike the definition is the spec, this state does not
2874                         * return a value and never requires the caller to
2875                         * backtrack. This state takes care of emitting characters
2876                         * or appending to the current attribute value. It also
2877                         * takes care of that in the case when consuming the
2878                         * character reference fails.
2879                         */
2880                        /*
2881                         * This section defines how to consume a character
2882                         * reference. This definition is used when parsing character
2883                         * references in text and in attributes.
2884                         * 
2885                         * The behavior depends on the identity of the next
2886                         * character (the one immediately after the U+0026 AMPERSAND
2887                         * character):
2888                         */
2889                        switch (c) {
2890                            case ' ':
2891                            case '\t':
2892                            case '\n':
2893                            case '\r': // we'll reconsume!
2894                            case '\u000C':
2895                            case '<':
2896                            case '&':
2897                                emitOrAppendStrBuf(returnState);
2898                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
2899                                    cstart = pos;
2900                                }
2901                                state = transition(state, returnState, reconsume, pos);
2902                                reconsume = true;
2903                                continue stateloop;
2904                            case '#':
2905                                /*
2906                                 * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
2907                                 * SIGN.
2908                                 */
2909                                appendStrBuf('#');
2910                                state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
2911                                continue stateloop;
2912                            default:
2913                                if (c == additional) {
2914                                    emitOrAppendStrBuf(returnState);
2915                                    state = transition(state, returnState, reconsume, pos);
2916                                    reconsume = true;
2917                                    continue stateloop;
2918                                }
2919                                if (c >= 'a' && c <= 'z') {
2920                                    firstCharKey = c - 'a' + 26;
2921                                } else if (c >= 'A' && c <= 'Z') {
2922                                    firstCharKey = c - 'A';
2923                                } else {
2924                                    // No match
2925                                    /*
2926                                     * If no match can be made, then this is a parse
2927                                     * error.
2928                                     */
2929                                    errNoNamedCharacterMatch();
2930                                    emitOrAppendStrBuf(returnState);
2931                                    if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
2932                                        cstart = pos;
2933                                    }
2934                                    state = transition(state, returnState, reconsume, pos);
2935                                    reconsume = true;
2936                                    continue stateloop;
2937                                }
2938                                // Didn't fail yet
2939                                appendStrBuf(c);
2940                                state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
2941                                // FALL THROUGH continue stateloop;
2942                        }
2943                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
2944                    case CHARACTER_REFERENCE_HILO_LOOKUP:
2945                        {
2946                            if (++pos == endPos) {
2947                                break stateloop;
2948                            }
2949                            c = checkChar(buf, pos);
2950                            if (c == '\u0000') {
2951                                break stateloop;
2952                            }
2953                            /*
2954                             * The data structure is as follows:
2955                             * 
2956                             * HILO_ACCEL is a two-dimensional int array whose major
2957                             * index corresponds to the second character of the
2958                             * character reference (code point as index) and the
2959                             * minor index corresponds to the first character of the
2960                             * character reference (packed so that A-Z runs from 0
2961                             * to 25 and a-z runs from 26 to 51). This layout makes
2962                             * it easier to use the sparseness of the data structure
2963                             * to omit parts of it: The second dimension of the
2964                             * table is null when no character reference starts with
2965                             * the character corresponding to that row.
2966                             * 
2967                             * The int value HILO_ACCEL (by these indeces) is zero
2968                             * if there exists no character reference starting with
2969                             * that two-letter prefix. Otherwise, the value is an
2970                             * int that packs two shorts so that the higher short is
2971                             * the index of the highest character reference name
2972                             * with that prefix in NAMES and the lower short
2973                             * corresponds to the index of the lowest character
2974                             * reference name with that prefix. (It happens that the
2975                             * first two character reference names share their
2976                             * prefix so the packed int cannot be 0 by packing the
2977                             * two shorts.)
2978                             * 
2979                             * NAMES is an array of byte arrays where each byte
2980                             * array encodes the name of a character references as
2981                             * ASCII. The names omit the first two letters of the
2982                             * name. (Since storing the first two letters would be
2983                             * redundant with the data contained in HILO_ACCEL.) The
2984                             * entries are lexically sorted.
2985                             * 
2986                             * For a given index in NAMES, the same index in VALUES
2987                             * contains the corresponding expansion as an array of
2988                             * two UTF-16 code units (either the character and
2989                             * U+0000 or a suggogate pair).
2990                             */
2991                            int hilo = 0;
2992                            if (c <= 'z') {
2993                                @Const @NoLength int[] row = NamedCharactersAccel.HILO_ACCEL[c];
2994                                if (row != null) {
2995                                    hilo = row[firstCharKey];
2996                                }
2997                            }
2998                            if (hilo == 0) {
2999                                /*
3000                                 * If no match can be made, then this is a parse
3001                                 * error.
3002                                 */
3003                                errNoNamedCharacterMatch();
3004                                emitOrAppendStrBuf(returnState);
3005                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3006                                    cstart = pos;
3007                                }
3008                                state = transition(state, returnState, reconsume, pos);
3009                                reconsume = true;
3010                                continue stateloop;
3011                            }
3012                            // Didn't fail yet
3013                            appendStrBuf(c);
3014                            lo = hilo & 0xFFFF;
3015                            hi = hilo >> 16;
3016                            entCol = -1;
3017                            candidate = -1;
3018                            strBufMark = 0;
3019                            state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
3020                            // FALL THROUGH continue stateloop;
3021                        }
3022                    case CHARACTER_REFERENCE_TAIL:
3023                        outer: for (;;) {
3024                            if (++pos == endPos) {
3025                                break stateloop;
3026                            }
3027                            c = checkChar(buf, pos);
3028                            if (c == '\u0000') {
3029                                break stateloop;
3030                            }
3031                            entCol++;
3032                            /*
3033                             * Consume the maximum number of characters possible,
3034                             * with the consumed characters matching one of the
3035                             * identifiers in the first column of the named
3036                             * character references table (in a case-sensitive
3037                             * manner).
3038                             */
3039                            loloop: for (;;) {
3040                                if (hi < lo) {
3041                                    break outer;
3042                                }
3043                                if (entCol == NamedCharacters.NAMES[lo].length()) {
3044                                    candidate = lo;
3045                                    strBufMark = strBufLen;
3046                                    lo++;
3047                                } else if (entCol > NamedCharacters.NAMES[lo].length()) {
3048                                    break outer;
3049                                } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
3050                                    lo++;
3051                                } else {
3052                                    break loloop;
3053                                }
3054                            }
3055    
3056                            hiloop: for (;;) {
3057                                if (hi < lo) {
3058                                    break outer;
3059                                }
3060                                if (entCol == NamedCharacters.NAMES[hi].length()) {
3061                                    break hiloop;
3062                                }
3063                                if (entCol > NamedCharacters.NAMES[hi].length()) {
3064                                    break outer;
3065                                } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
3066                                    hi--;
3067                                } else {
3068                                    break hiloop;
3069                                }
3070                            }
3071    
3072                            if (hi < lo) {
3073                                break outer;
3074                            }
3075                            appendStrBuf(c);
3076                            continue;
3077                        }
3078    
3079                        if (candidate == -1) {
3080                            // reconsume deals with CR, LF or nul
3081                            /*
3082                             * If no match can be made, then this is a parse error.
3083                             */
3084                            errNoNamedCharacterMatch();
3085                            emitOrAppendStrBuf(returnState);
3086                            if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3087                                cstart = pos;
3088                            }
3089                            state = transition(state, returnState, reconsume, pos);
3090                            reconsume = true;
3091                            continue stateloop;
3092                        } else {
3093                            // c can't be CR, LF or nul if we got here
3094                            @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
3095                            if (candidateName.length() == 0
3096                                    || candidateName.charAt(candidateName.length() - 1) != ';') {
3097                                /*
3098                                 * If the last character matched is not a U+003B
3099                                 * SEMICOLON (;), there is a parse error.
3100                                 */
3101                                if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
3102                                    /*
3103                                     * If the entity is being consumed as part of an
3104                                     * attribute, and the last character matched is
3105                                     * not a U+003B SEMICOLON (;),
3106                                     */
3107                                    char ch;
3108                                    if (strBufMark == strBufLen) {
3109                                        ch = c;
3110                                    } else {
3111                                        // if (strBufOffset != -1) {
3112                                        // ch = buf[strBufOffset + strBufMark];
3113                                        // } else {
3114                                        ch = strBuf[strBufMark];
3115                                        // }
3116                                    }
3117                                    if (ch == '=' || (ch >= '0' && ch <= '9')
3118                                            || (ch >= 'A' && ch <= 'Z')
3119                                            || (ch >= 'a' && ch <= 'z')) {
3120                                        /*
3121                                         * and the next character is either a U+003D
3122                                         * EQUALS SIGN character (=) or in the range
3123                                         * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
3124                                         * U+0041 LATIN CAPITAL LETTER A to U+005A
3125                                         * LATIN CAPITAL LETTER Z, or U+0061 LATIN
3126                                         * SMALL LETTER A to U+007A LATIN SMALL
3127                                         * LETTER Z, then, for historical reasons,
3128                                         * all the characters that were matched
3129                                         * after the U+0026 AMPERSAND (&) must be
3130                                         * unconsumed, and nothing is returned.
3131                                         */
3132                                        errNoNamedCharacterMatch();
3133                                        appendStrBufToLongStrBuf();
3134                                        state = transition(state, returnState, reconsume, pos);
3135                                        reconsume = true;
3136                                        continue stateloop;
3137                                    }
3138                                }
3139                                if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
3140                                    errUnescapedAmpersandInterpretedAsCharacterReference();
3141                                } else {
3142                                    errNotSemicolonTerminated();
3143                                }
3144                            }
3145    
3146                            /*
3147                             * Otherwise, return a character token for the character
3148                             * corresponding to the entity name (as given by the
3149                             * second column of the named character references
3150                             * table).
3151                             */
3152                            @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
3153                            if (
3154                            // [NOCPP[
3155                            val.length == 1
3156                            // ]NOCPP]
3157                            // CPPONLY: val[1] == 0
3158                            ) {
3159                                emitOrAppendOne(val, returnState);
3160                            } else {
3161                                emitOrAppendTwo(val, returnState);
3162                            }
3163                            // this is so complicated!
3164                            if (strBufMark < strBufLen) {
3165                                // if (strBufOffset != -1) {
3166                                // if ((returnState & (~1)) != 0) {
3167                                // for (int i = strBufMark; i < strBufLen; i++) {
3168                                // appendLongStrBuf(buf[strBufOffset + i]);
3169                                // }
3170                                // } else {
3171                                // tokenHandler.characters(buf, strBufOffset
3172                                // + strBufMark, strBufLen
3173                                // - strBufMark);
3174                                // }
3175                                // } else {
3176                                if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
3177                                    for (int i = strBufMark; i < strBufLen; i++) {
3178                                        appendLongStrBuf(strBuf[i]);
3179                                    }
3180                                } else {
3181                                    tokenHandler.characters(strBuf, strBufMark,
3182                                            strBufLen - strBufMark);
3183                                }
3184                                // }
3185                            }
3186                            if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3187                                cstart = pos;
3188                            }
3189                            state = transition(state, returnState, reconsume, pos);
3190                            reconsume = true;
3191                            continue stateloop;
3192                            /*
3193                             * If the markup contains I'm &notit; I tell you, the
3194                             * entity is parsed as "not", as in, I'm ¬it; I tell
3195                             * you. But if the markup was I'm &notin; I tell you,
3196                             * the entity would be parsed as "notin;", resulting in
3197                             * I'm ∉ I tell you.
3198                             */
3199                        }
3200                        // XXX reorder point
3201                    case CONSUME_NCR:
3202                        if (++pos == endPos) {
3203                            break stateloop;
3204                        }
3205                        c = checkChar(buf, pos);
3206                        prevValue = -1;
3207                        value = 0;
3208                        seenDigits = false;
3209                        /*
3210                         * The behavior further depends on the character after the
3211                         * U+0023 NUMBER SIGN:
3212                         */
3213                        switch (c) {
3214                            case 'x':
3215                            case 'X':
3216    
3217                                /*
3218                                 * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
3219                                 * LETTER X Consume the X.
3220                                 * 
3221                                 * Follow the steps below, but using the range of
3222                                 * characters U+0030 DIGIT ZERO through to U+0039
3223                                 * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
3224                                 * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
3225                                 * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
3226                                 * LETTER F (in other words, 0-9, A-F, a-f).
3227                                 * 
3228                                 * When it comes to interpreting the number,
3229                                 * interpret it as a hexadecimal number.
3230                                 */
3231                                appendStrBuf(c);
3232                                state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
3233                                continue stateloop;
3234                            default:
3235                                /*
3236                                 * Anything else Follow the steps below, but using
3237                                 * the range of characters U+0030 DIGIT ZERO through
3238                                 * to U+0039 DIGIT NINE (i.e. just 0-9).
3239                                 * 
3240                                 * When it comes to interpreting the number,
3241                                 * interpret it as a decimal number.
3242                                 */
3243                                state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
3244                                reconsume = true;
3245                                // FALL THROUGH continue stateloop;
3246                        }
3247                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
3248                    case DECIMAL_NRC_LOOP:
3249                        decimalloop: for (;;) {
3250                            if (reconsume) {
3251                                reconsume = false;
3252                            } else {
3253                                if (++pos == endPos) {
3254                                    break stateloop;
3255                                }
3256                                c = checkChar(buf, pos);
3257                            }
3258                            // Deal with overflow gracefully
3259                            if (value < prevValue) {
3260                                value = 0x110000; // Value above Unicode range but
3261                                // within int
3262                                // range
3263                            }
3264                            prevValue = value;
3265                            /*
3266                             * Consume as many characters as match the range of
3267                             * characters given above.
3268                             */
3269                            if (c >= '0' && c <= '9') {
3270                                seenDigits = true;
3271                                value *= 10;
3272                                value += c - '0';
3273                                continue;
3274                            } else if (c == ';') {
3275                                if (seenDigits) {
3276                                    if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3277                                        cstart = pos + 1;
3278                                    }
3279                                    state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
3280                                    // FALL THROUGH continue stateloop;
3281                                    break decimalloop;
3282                                } else {
3283                                    errNoDigitsInNCR();
3284                                    appendStrBuf(';');
3285                                    emitOrAppendStrBuf(returnState);
3286                                    if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3287                                        cstart = pos + 1;
3288                                    }
3289                                    state = transition(state, returnState, reconsume, pos);
3290                                    continue stateloop;
3291                                }
3292                            } else {
3293                                /*
3294                                 * If no characters match the range, then don't
3295                                 * consume any characters (and unconsume the U+0023
3296                                 * NUMBER SIGN character and, if appropriate, the X
3297                                 * character). This is a parse error; nothing is
3298                                 * returned.
3299                                 * 
3300                                 * Otherwise, if the next character is a U+003B
3301                                 * SEMICOLON, consume that too. If it isn't, there
3302                                 * is a parse error.
3303                                 */
3304                                if (!seenDigits) {
3305                                    errNoDigitsInNCR();
3306                                    emitOrAppendStrBuf(returnState);
3307                                    if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3308                                        cstart = pos;
3309                                    }
3310                                    state = transition(state, returnState, reconsume, pos);
3311                                    reconsume = true;
3312                                    continue stateloop;
3313                                } else {
3314                                    errCharRefLacksSemicolon();
3315                                    if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3316                                        cstart = pos;
3317                                    }
3318                                    state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
3319                                    reconsume = true;
3320                                    // FALL THROUGH continue stateloop;
3321                                    break decimalloop;
3322                                }
3323                            }
3324                        }
3325                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
3326                    case HANDLE_NCR_VALUE:
3327                        // WARNING previous state sets reconsume
3328                        // XXX inline this case if the method size can take it
3329                        handleNcrValue(returnState);
3330                        state = transition(state, returnState, reconsume, pos);
3331                        continue stateloop;
3332                        // XXX reorder point
3333                    case HEX_NCR_LOOP:
3334                        for (;;) {
3335                            if (++pos == endPos) {
3336                                break stateloop;
3337                            }
3338                            c = checkChar(buf, pos);
3339                            // Deal with overflow gracefully
3340                            if (value < prevValue) {
3341                                value = 0x110000; // Value above Unicode range but
3342                                // within int
3343                                // range
3344                            }
3345                            prevValue = value;
3346                            /*
3347                             * Consume as many characters as match the range of
3348                             * characters given above.
3349                             */
3350                            if (c >= '0' && c <= '9') {
3351                                seenDigits = true;
3352                                value *= 16;
3353                                value += c - '0';
3354                                continue;
3355                            } else if (c >= 'A' && c <= 'F') {
3356                                seenDigits = true;
3357                                value *= 16;
3358                                value += c - 'A' + 10;
3359                                continue;
3360                            } else if (c >= 'a' && c <= 'f') {
3361                                seenDigits = true;
3362                                value *= 16;
3363                                value += c - 'a' + 10;
3364                                continue;
3365                            } else if (c == ';') {
3366                                if (seenDigits) {
3367                                    if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3368                                        cstart = pos + 1;
3369                                    }
3370                                    state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
3371                                    continue stateloop;
3372                                } else {
3373                                    errNoDigitsInNCR();
3374                                    appendStrBuf(';');
3375                                    emitOrAppendStrBuf(returnState);
3376                                    if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3377                                        cstart = pos + 1;
3378                                    }
3379                                    state = transition(state, returnState, reconsume, pos);
3380                                    continue stateloop;
3381                                }
3382                            } else {
3383                                /*
3384                                 * If no characters match the range, then don't
3385                                 * consume any characters (and unconsume the U+0023
3386                                 * NUMBER SIGN character and, if appropriate, the X
3387                                 * character). This is a parse error; nothing is
3388                                 * returned.
3389                                 * 
3390                                 * Otherwise, if the next character is a U+003B
3391                                 * SEMICOLON, consume that too. If it isn't, there
3392                                 * is a parse error.
3393                                 */
3394                                if (!seenDigits) {
3395                                    errNoDigitsInNCR();
3396                                    emitOrAppendStrBuf(returnState);
3397                                    if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3398                                        cstart = pos;
3399                                    }
3400                                    state = transition(state, returnState, reconsume, pos);
3401                                    reconsume = true;
3402                                    continue stateloop;
3403                                } else {
3404                                    errCharRefLacksSemicolon();
3405                                    if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
3406                                        cstart = pos;
3407                                    }
3408                                    state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
3409                                    reconsume = true;
3410                                    continue stateloop;
3411                                }
3412                            }
3413                        }
3414                        // XXX reorder point
3415                    case PLAINTEXT:
3416                        plaintextloop: for (;;) {
3417                            if (reconsume) {
3418                                reconsume = false;
3419                            } else {
3420                                if (++pos == endPos) {
3421                                    break stateloop;
3422                                }
3423                                c = checkChar(buf, pos);
3424                            }
3425                            switch (c) {
3426                                case '\u0000':
3427                                    emitPlaintextReplacementCharacter(buf, pos);
3428                                    continue;
3429                                case '\r':
3430                                    emitCarriageReturn(buf, pos);
3431                                    break stateloop;
3432                                case '\n':
3433                                    silentLineFeed();
3434                                default:
3435                                    /*
3436                                     * Anything else Emit the current input
3437                                     * character as a character token. Stay in the
3438                                     * RAWTEXT state.
3439                                     */
3440                                    continue;
3441                            }
3442                        }
3443                        // XXX reorder point
3444                    case CLOSE_TAG_OPEN:
3445                        if (++pos == endPos) {
3446                            break stateloop;
3447                        }
3448                        c = checkChar(buf, pos);
3449                        /*
3450                         * Otherwise, if the content model flag is set to the PCDATA
3451                         * state, or if the next few characters do match that tag
3452                         * name, consume the next input character:
3453                         */
3454                        switch (c) {
3455                            case '>':
3456                                /* U+003E GREATER-THAN SIGN (>) Parse error. */
3457                                errLtSlashGt();
3458                                /*
3459                                 * Switch to the data state.
3460                                 */
3461                                cstart = pos + 1;
3462                                state = transition(state, Tokenizer.DATA, reconsume, pos);
3463                                continue stateloop;
3464                            case '\r':
3465                                silentCarriageReturn();
3466                                /* Anything else Parse error. */
3467                                errGarbageAfterLtSlash();
3468                                /*
3469                                 * Switch to the bogus comment state.
3470                                 */
3471                                clearLongStrBufAndAppend('\n');
3472                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
3473                                break stateloop;
3474                            case '\n':
3475                                silentLineFeed();
3476                                /* Anything else Parse error. */
3477                                errGarbageAfterLtSlash();
3478                                /*
3479                                 * Switch to the bogus comment state.
3480                                 */
3481                                clearLongStrBufAndAppend('\n');
3482                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
3483                                continue stateloop;
3484                            case '\u0000':
3485                                c = '\uFFFD';
3486                                // fall thru
3487                            default:
3488                                if (c >= 'A' && c <= 'Z') {
3489                                    c += 0x20;
3490                                }
3491                                if (c >= 'a' && c <= 'z') {
3492                                    /*
3493                                     * U+0061 LATIN SMALL LETTER A through to U+007A
3494                                     * LATIN SMALL LETTER Z Create a new end tag
3495                                     * token,
3496                                     */
3497                                    endTag = true;
3498                                    /*
3499                                     * set its tag name to the input character,
3500                                     */
3501                                    clearStrBufAndAppend(c);
3502                                    /*
3503                                     * then switch to the tag name state. (Don't
3504                                     * emit the token yet; further details will be
3505                                     * filled in before it is emitted.)
3506                                     */
3507                                    state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
3508                                    continue stateloop;
3509                                } else {
3510                                    /* Anything else Parse error. */
3511                                    errGarbageAfterLtSlash();
3512                                    /*
3513                                     * Switch to the bogus comment state.
3514                                     */
3515                                    clearLongStrBufAndAppend(c);
3516                                    state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
3517                                    continue stateloop;
3518                                }
3519                        }
3520                        // XXX reorder point
3521                    case RCDATA:
3522                        rcdataloop: for (;;) {
3523                            if (reconsume) {
3524                                reconsume = false;
3525                            } else {
3526                                if (++pos == endPos) {
3527                                    break stateloop;
3528                                }
3529                                c = checkChar(buf, pos);
3530                            }
3531                            switch (c) {
3532                                case '&':
3533                                    /*
3534                                     * U+0026 AMPERSAND (&) Switch to the character
3535                                     * reference in RCDATA state.
3536                                     */
3537                                    flushChars(buf, pos);
3538                                    clearStrBufAndAppend(c);
3539                                    additional = '\u0000';
3540                                    returnState = state;
3541                                    state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
3542                                    continue stateloop;
3543                                case '<':
3544                                    /*
3545                                     * U+003C LESS-THAN SIGN (<) Switch to the
3546                                     * RCDATA less-than sign state.
3547                                     */
3548                                    flushChars(buf, pos);
3549    
3550                                    returnState = state;
3551                                    state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
3552                                    continue stateloop;
3553                                case '\u0000':
3554                                    emitReplacementCharacter(buf, pos);
3555                                    continue;
3556                                case '\r':
3557                                    emitCarriageReturn(buf, pos);
3558                                    break stateloop;
3559                                case '\n':
3560                                    silentLineFeed();
3561                                default:
3562                                    /*
3563                                     * Emit the current input character as a
3564                                     * character token. Stay in the RCDATA state.
3565                                     */
3566                                    continue;
3567                            }
3568                        }
3569                        // XXX reorder point
3570                    case RAWTEXT:
3571                        rawtextloop: for (;;) {
3572                            if (reconsume) {
3573                                reconsume = false;
3574                            } else {
3575                                if (++pos == endPos) {
3576                                    break stateloop;
3577                                }
3578                                c = checkChar(buf, pos);
3579                            }
3580                            switch (c) {
3581                                case '<':
3582                                    /*
3583                                     * U+003C LESS-THAN SIGN (<) Switch to the
3584                                     * RAWTEXT less-than sign state.
3585                                     */
3586                                    flushChars(buf, pos);
3587    
3588                                    returnState = state;
3589                                    state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
3590                                    break rawtextloop;
3591                                // FALL THRU continue stateloop;
3592                                case '\u0000':
3593                                    emitReplacementCharacter(buf, pos);
3594                                    continue;
3595                                case '\r':
3596                                    emitCarriageReturn(buf, pos);
3597                                    break stateloop;
3598                                case '\n':
3599                                    silentLineFeed();
3600                                default:
3601                                    /*
3602                                     * Emit the current input character as a
3603                                     * character token. Stay in the RAWTEXT state.
3604                                     */
3605                                    continue;
3606                            }
3607                        }
3608                        // XXX fallthru don't reorder
3609                    case RAWTEXT_RCDATA_LESS_THAN_SIGN:
3610                        rawtextrcdatalessthansignloop: for (;;) {
3611                            if (++pos == endPos) {
3612                                break stateloop;
3613                            }
3614                            c = checkChar(buf, pos);
3615                            switch (c) {
3616                                case '/':
3617                                    /*
3618                                     * U+002F SOLIDUS (/) Set the temporary buffer
3619                                     * to the empty string. Switch to the script
3620                                     * data end tag open state.
3621                                     */
3622                                    index = 0;
3623                                    clearStrBuf();
3624                                    state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
3625                                    break rawtextrcdatalessthansignloop;
3626                                // FALL THRU continue stateloop;
3627                                default:
3628                                    /*
3629                                     * Otherwise, emit a U+003C LESS-THAN SIGN
3630                                     * character token
3631                                     */
3632                                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
3633                                    /*
3634                                     * and reconsume the current input character in
3635                                     * the data state.
3636                                     */
3637                                    cstart = pos;
3638                                    state = transition(state, returnState, reconsume, pos);
3639                                    reconsume = true;
3640                                    continue stateloop;
3641                            }
3642                        }
3643                        // XXX fall thru. don't reorder.
3644                    case NON_DATA_END_TAG_NAME:
3645                        for (;;) {
3646                            if (++pos == endPos) {
3647                                break stateloop;
3648                            }
3649                            c = checkChar(buf, pos);
3650                            /*
3651                             * ASSERT! when entering this state, set index to 0 and
3652                             * call clearStrBuf() assert (contentModelElement !=
3653                             * null); Let's implement the above without lookahead.
3654                             * strBuf is the 'temporary buffer'.
3655                             */
3656                            if (index < endTagExpectationAsArray.length) {
3657                                char e = endTagExpectationAsArray[index];
3658                                char folded = c;
3659                                if (c >= 'A' && c <= 'Z') {
3660                                    folded += 0x20;
3661                                }
3662                                if (folded != e) {
3663                                    // [NOCPP[
3664                                    errHtml4LtSlashInRcdata(folded);
3665                                    // ]NOCPP]
3666                                    tokenHandler.characters(Tokenizer.LT_SOLIDUS,
3667                                            0, 2);
3668                                    emitStrBuf();
3669                                    cstart = pos;
3670                                    state = transition(state, returnState, reconsume, pos);
3671                                    reconsume = true;
3672                                    continue stateloop;
3673                                }
3674                                appendStrBuf(c);
3675                                index++;
3676                                continue;
3677                            } else {
3678                                endTag = true;
3679                                // XXX replace contentModelElement with different
3680                                // type
3681                                tagName = endTagExpectation;
3682                                switch (c) {
3683                                    case '\r':
3684                                        silentCarriageReturn();
3685                                        state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
3686                                        break stateloop;
3687                                    case '\n':
3688                                        silentLineFeed();
3689                                        // fall thru
3690                                    case ' ':
3691                                    case '\t':
3692                                    case '\u000C':
3693                                        /*
3694                                         * U+0009 CHARACTER TABULATION U+000A LINE
3695                                         * FEED (LF) U+000C FORM FEED (FF) U+0020
3696                                         * SPACE If the current end tag token is an
3697                                         * appropriate end tag token, then switch to
3698                                         * the before attribute name state.
3699                                         */
3700                                        state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
3701                                        continue stateloop;
3702                                    case '/':
3703                                        /*
3704                                         * U+002F SOLIDUS (/) If the current end tag
3705                                         * token is an appropriate end tag token,
3706                                         * then switch to the self-closing start tag
3707                                         * state.
3708                                         */
3709                                        state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
3710                                        continue stateloop;
3711                                    case '>':
3712                                        /*
3713                                         * U+003E GREATER-THAN SIGN (>) If the
3714                                         * current end tag token is an appropriate
3715                                         * end tag token, then emit the current tag
3716                                         * token and switch to the data state.
3717                                         */
3718                                        state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
3719                                        if (shouldSuspend) {
3720                                            break stateloop;
3721                                        }
3722                                        continue stateloop;
3723                                    default:
3724                                        /*
3725                                         * Emit a U+003C LESS-THAN SIGN character
3726                                         * token, a U+002F SOLIDUS character token,
3727                                         * a character token for each of the
3728                                         * characters in the temporary buffer (in
3729                                         * the order they were added to the buffer),
3730                                         * and reconsume the current input character
3731                                         * in the RAWTEXT state.
3732                                         */
3733                                        // [NOCPP[
3734                                        errWarnLtSlashInRcdata();
3735                                        // ]NOCPP]
3736                                        tokenHandler.characters(
3737                                                Tokenizer.LT_SOLIDUS, 0, 2);
3738                                        emitStrBuf();
3739                                        if (c == '\u0000') {
3740                                            emitReplacementCharacter(buf, pos);
3741                                        } else {
3742                                            cstart = pos; // don't drop the
3743                                            // character
3744                                        }
3745                                        state = transition(state, returnState, reconsume, pos);
3746                                        continue stateloop;
3747                                }
3748                            }
3749                        }
3750                        // XXX reorder point
3751                        // BEGIN HOTSPOT WORKAROUND
3752                    case BOGUS_COMMENT:
3753                        boguscommentloop: for (;;) {
3754                            if (reconsume) {
3755                                reconsume = false;
3756                            } else {
3757                                if (++pos == endPos) {
3758                                    break stateloop;
3759                                }
3760                                c = checkChar(buf, pos);
3761                            }
3762                            /*
3763                             * Consume every character up to and including the first
3764                             * U+003E GREATER-THAN SIGN character (>) or the end of
3765                             * the file (EOF), whichever comes first. Emit a comment
3766                             * token whose data is the concatenation of all the
3767                             * characters starting from and including the character
3768                             * that caused the state machine to switch into the
3769                             * bogus comment state, up to and including the
3770                             * character immediately before the last consumed
3771                             * character (i.e. up to the character just before the
3772                             * U+003E or EOF character). (If the comment was started
3773                             * by the end of the file (EOF), the token is empty.)
3774                             * 
3775                             * Switch to the data state.
3776                             * 
3777                             * If the end of the file was reached, reconsume the EOF
3778                             * character.
3779                             */
3780                            switch (c) {
3781                                case '>':
3782                                    emitComment(0, pos);
3783                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
3784                                    continue stateloop;
3785                                case '-':
3786                                    appendLongStrBuf(c);
3787                                    state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
3788                                    break boguscommentloop;
3789                                case '\r':
3790                                    appendLongStrBufCarriageReturn();
3791                                    break stateloop;
3792                                case '\n':
3793                                    appendLongStrBufLineFeed();
3794                                    continue;
3795                                case '\u0000':
3796                                    c = '\uFFFD';
3797                                    // fall thru
3798                                default:
3799                                    appendLongStrBuf(c);
3800                                    continue;
3801                            }
3802                        }
3803                        // FALLTHRU DON'T REORDER
3804                    case BOGUS_COMMENT_HYPHEN:
3805                        boguscommenthyphenloop: for (;;) {
3806                            if (++pos == endPos) {
3807                                break stateloop;
3808                            }
3809                            c = checkChar(buf, pos);
3810                            switch (c) {
3811                                case '>':
3812                                    // [NOCPP[
3813                                    maybeAppendSpaceToBogusComment();
3814                                    // ]NOCPP]
3815                                    emitComment(0, pos);
3816                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
3817                                    continue stateloop;
3818                                case '-':
3819                                    appendSecondHyphenToBogusComment();
3820                                    continue boguscommenthyphenloop;
3821                                case '\r':
3822                                    appendLongStrBufCarriageReturn();
3823                                    state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
3824                                    break stateloop;
3825                                case '\n':
3826                                    appendLongStrBufLineFeed();
3827                                    state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
3828                                    continue stateloop;
3829                                case '\u0000':
3830                                    c = '\uFFFD';
3831                                    // fall thru
3832                                default:
3833                                    appendLongStrBuf(c);
3834                                    state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
3835                                    continue stateloop;
3836                            }
3837                        }
3838                        // XXX reorder point
3839                    case SCRIPT_DATA:
3840                        scriptdataloop: for (;;) {
3841                            if (reconsume) {
3842                                reconsume = false;
3843                            } else {
3844                                if (++pos == endPos) {
3845                                    break stateloop;
3846                                }
3847                                c = checkChar(buf, pos);
3848                            }
3849                            switch (c) {
3850                                case '<':
3851                                    /*
3852                                     * U+003C LESS-THAN SIGN (<) Switch to the
3853                                     * script data less-than sign state.
3854                                     */
3855                                    flushChars(buf, pos);
3856                                    returnState = state;
3857                                    state = transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
3858                                    break scriptdataloop; // FALL THRU continue
3859                                // stateloop;
3860                                case '\u0000':
3861                                    emitReplacementCharacter(buf, pos);
3862                                    continue;
3863                                case '\r':
3864                                    emitCarriageReturn(buf, pos);
3865                                    break stateloop;
3866                                case '\n':
3867                                    silentLineFeed();
3868                                default:
3869                                    /*
3870                                     * Anything else Emit the current input
3871                                     * character as a character token. Stay in the
3872                                     * script data state.
3873                                     */
3874                                    continue;
3875                            }
3876                        }
3877                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
3878                    case SCRIPT_DATA_LESS_THAN_SIGN:
3879                        scriptdatalessthansignloop: for (;;) {
3880                            if (++pos == endPos) {
3881                                break stateloop;
3882                            }
3883                            c = checkChar(buf, pos);
3884                            switch (c) {
3885                                case '/':
3886                                    /*
3887                                     * U+002F SOLIDUS (/) Set the temporary buffer
3888                                     * to the empty string. Switch to the script
3889                                     * data end tag open state.
3890                                     */
3891                                    index = 0;
3892                                    clearStrBuf();
3893                                    state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
3894                                    continue stateloop;
3895                                case '!':
3896                                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
3897                                    cstart = pos;
3898                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
3899                                    break scriptdatalessthansignloop; // FALL THRU
3900                                // continue
3901                                // stateloop;
3902                                default:
3903                                    /*
3904                                     * Otherwise, emit a U+003C LESS-THAN SIGN
3905                                     * character token
3906                                     */
3907                                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
3908                                    /*
3909                                     * and reconsume the current input character in
3910                                     * the data state.
3911                                     */
3912                                    cstart = pos;
3913                                    state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
3914                                    reconsume = true;
3915                                    continue stateloop;
3916                            }
3917                        }
3918                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
3919                    case SCRIPT_DATA_ESCAPE_START:
3920                        scriptdataescapestartloop: for (;;) {
3921                            if (++pos == endPos) {
3922                                break stateloop;
3923                            }
3924                            c = checkChar(buf, pos);
3925                            /*
3926                             * Consume the next input character:
3927                             */
3928                            switch (c) {
3929                                case '-':
3930                                    /*
3931                                     * U+002D HYPHEN-MINUS (-) Emit a U+002D
3932                                     * HYPHEN-MINUS character token. Switch to the
3933                                     * script data escape start dash state.
3934                                     */
3935                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
3936                                    break scriptdataescapestartloop; // FALL THRU
3937                                // continue
3938                                // stateloop;
3939                                default:
3940                                    /*
3941                                     * Anything else Reconsume the current input
3942                                     * character in the script data state.
3943                                     */
3944                                    state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
3945                                    reconsume = true;
3946                                    continue stateloop;
3947                            }
3948                        }
3949                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
3950                    case SCRIPT_DATA_ESCAPE_START_DASH:
3951                        scriptdataescapestartdashloop: for (;;) {
3952                            if (++pos == endPos) {
3953                                break stateloop;
3954                            }
3955                            c = checkChar(buf, pos);
3956                            /*
3957                             * Consume the next input character:
3958                             */
3959                            switch (c) {
3960                                case '-':
3961                                    /*
3962                                     * U+002D HYPHEN-MINUS (-) Emit a U+002D
3963                                     * HYPHEN-MINUS character token. Switch to the
3964                                     * script data escaped dash dash state.
3965                                     */
3966                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
3967                                    break scriptdataescapestartdashloop;
3968                                // continue stateloop;
3969                                default:
3970                                    /*
3971                                     * Anything else Reconsume the current input
3972                                     * character in the script data state.
3973                                     */
3974                                    state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
3975                                    reconsume = true;
3976                                    continue stateloop;
3977                            }
3978                        }
3979                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
3980                    case SCRIPT_DATA_ESCAPED_DASH_DASH:
3981                        scriptdataescapeddashdashloop: for (;;) {
3982                            if (++pos == endPos) {
3983                                break stateloop;
3984                            }
3985                            c = checkChar(buf, pos);
3986                            /*
3987                             * Consume the next input character:
3988                             */
3989                            switch (c) {
3990                                case '-':
3991                                    /*
3992                                     * U+002D HYPHEN-MINUS (-) Emit a U+002D
3993                                     * HYPHEN-MINUS character token. Stay in the
3994                                     * script data escaped dash dash state.
3995                                     */
3996                                    continue;
3997                                case '<':
3998                                    /*
3999                                     * U+003C LESS-THAN SIGN (<) Switch to the
4000                                     * script data escaped less-than sign state.
4001                                     */
4002                                    flushChars(buf, pos);
4003                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
4004                                    continue stateloop;
4005                                case '>':
4006                                    /*
4007                                     * U+003E GREATER-THAN SIGN (>) Emit a U+003E
4008                                     * GREATER-THAN SIGN character token. Switch to
4009                                     * the script data state.
4010                                     */
4011                                    state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
4012                                    continue stateloop;
4013                                case '\u0000':
4014                                    emitReplacementCharacter(buf, pos);
4015                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4016                                    break scriptdataescapeddashdashloop;
4017                                case '\r':
4018                                    emitCarriageReturn(buf, pos);
4019                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4020                                    break stateloop;
4021                                case '\n':
4022                                    silentLineFeed();
4023                                default:
4024                                    /*
4025                                     * Anything else Emit the current input
4026                                     * character as a character token. Switch to the
4027                                     * script data escaped state.
4028                                     */
4029                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4030                                    break scriptdataescapeddashdashloop;
4031                                // continue stateloop;
4032                            }
4033                        }
4034                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
4035                    case SCRIPT_DATA_ESCAPED:
4036                        scriptdataescapedloop: for (;;) {
4037                            if (reconsume) {
4038                                reconsume = false;
4039                            } else {
4040                                if (++pos == endPos) {
4041                                    break stateloop;
4042                                }
4043                                c = checkChar(buf, pos);
4044                            }
4045                            /*
4046                             * Consume the next input character:
4047                             */
4048                            switch (c) {
4049                                case '-':
4050                                    /*
4051                                     * U+002D HYPHEN-MINUS (-) Emit a U+002D
4052                                     * HYPHEN-MINUS character token. Switch to the
4053                                     * script data escaped dash state.
4054                                     */
4055                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
4056                                    break scriptdataescapedloop; // FALL THRU
4057                                // continue
4058                                // stateloop;
4059                                case '<':
4060                                    /*
4061                                     * U+003C LESS-THAN SIGN (<) Switch to the
4062                                     * script data escaped less-than sign state.
4063                                     */
4064                                    flushChars(buf, pos);
4065                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
4066                                    continue stateloop;
4067                                case '\u0000':
4068                                    emitReplacementCharacter(buf, pos);
4069                                    continue;
4070                                case '\r':
4071                                    emitCarriageReturn(buf, pos);
4072                                    break stateloop;
4073                                case '\n':
4074                                    silentLineFeed();
4075                                default:
4076                                    /*
4077                                     * Anything else Emit the current input
4078                                     * character as a character token. Stay in the
4079                                     * script data escaped state.
4080                                     */
4081                                    continue;
4082                            }
4083                        }
4084                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
4085                    case SCRIPT_DATA_ESCAPED_DASH:
4086                        scriptdataescapeddashloop: for (;;) {
4087                            if (++pos == endPos) {
4088                                break stateloop;
4089                            }
4090                            c = checkChar(buf, pos);
4091                            /*
4092                             * Consume the next input character:
4093                             */
4094                            switch (c) {
4095                                case '-':
4096                                    /*
4097                                     * U+002D HYPHEN-MINUS (-) Emit a U+002D
4098                                     * HYPHEN-MINUS character token. Switch to the
4099                                     * script data escaped dash dash state.
4100                                     */
4101                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
4102                                    continue stateloop;
4103                                case '<':
4104                                    /*
4105                                     * U+003C LESS-THAN SIGN (<) Switch to the
4106                                     * script data escaped less-than sign state.
4107                                     */
4108                                    flushChars(buf, pos);
4109                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
4110                                    break scriptdataescapeddashloop;
4111                                // continue stateloop;
4112                                case '\u0000':
4113                                    emitReplacementCharacter(buf, pos);
4114                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4115                                    continue stateloop;
4116                                case '\r':
4117                                    emitCarriageReturn(buf, pos);
4118                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4119                                    break stateloop;
4120                                case '\n':
4121                                    silentLineFeed();
4122                                default:
4123                                    /*
4124                                     * Anything else Emit the current input
4125                                     * character as a character token. Switch to the
4126                                     * script data escaped state.
4127                                     */
4128                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4129                                    continue stateloop;
4130                            }
4131                        }
4132                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
4133                    case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
4134                        scriptdataescapedlessthanloop: for (;;) {
4135                            if (++pos == endPos) {
4136                                break stateloop;
4137                            }
4138                            c = checkChar(buf, pos);
4139                            /*
4140                             * Consume the next input character:
4141                             */
4142                            switch (c) {
4143                                case '/':
4144                                    /*
4145                                     * U+002F SOLIDUS (/) Set the temporary buffer
4146                                     * to the empty string. Switch to the script
4147                                     * data escaped end tag open state.
4148                                     */
4149                                    index = 0;
4150                                    clearStrBuf();
4151                                    returnState = Tokenizer.SCRIPT_DATA_ESCAPED;
4152                                    state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
4153                                    continue stateloop;
4154                                case 'S':
4155                                case 's':
4156                                    /*
4157                                     * U+0041 LATIN CAPITAL LETTER A through to
4158                                     * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
4159                                     * LESS-THAN SIGN character token and the
4160                                     * current input character as a character token.
4161                                     */
4162                                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
4163                                    cstart = pos;
4164                                    index = 1;
4165                                    /*
4166                                     * Set the temporary buffer to the empty string.
4167                                     * Append the lowercase version of the current
4168                                     * input character (add 0x0020 to the
4169                                     * character's code point) to the temporary
4170                                     * buffer. Switch to the script data double
4171                                     * escape start state.
4172                                     */
4173                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
4174                                    break scriptdataescapedlessthanloop;
4175                                // continue stateloop;
4176                                default:
4177                                    /*
4178                                     * Anything else Emit a U+003C LESS-THAN SIGN
4179                                     * character token and reconsume the current
4180                                     * input character in the script data escaped
4181                                     * state.
4182                                     */
4183                                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
4184                                    cstart = pos;
4185                                    reconsume = true;
4186                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4187                                    continue stateloop;
4188                            }
4189                        }
4190                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
4191                    case SCRIPT_DATA_DOUBLE_ESCAPE_START:
4192                        scriptdatadoubleescapestartloop: for (;;) {
4193                            if (++pos == endPos) {
4194                                break stateloop;
4195                            }
4196                            c = checkChar(buf, pos);
4197                            assert (index > 0);
4198                            if (index < 6) { // SCRIPT_ARR.length
4199                                char folded = c;
4200                                if (c >= 'A' && c <= 'Z') {
4201                                    folded += 0x20;
4202                                }
4203                                if (folded != Tokenizer.SCRIPT_ARR[index]) {
4204                                    reconsume = true;
4205                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4206                                    continue stateloop;
4207                                }
4208                                index++;
4209                                continue;
4210                            }
4211                            switch (c) {
4212                                case '\r':
4213                                    emitCarriageReturn(buf, pos);
4214                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4215                                    break stateloop;
4216                                case '\n':
4217                                    silentLineFeed();
4218                                case ' ':
4219                                case '\t':
4220                                case '\u000C':
4221                                case '/':
4222                                case '>':
4223                                    /*
4224                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
4225                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
4226                                     * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
4227                                     * (>) Emit the current input character as a
4228                                     * character token. If the temporary buffer is
4229                                     * the string "script", then switch to the
4230                                     * script data double escaped state.
4231                                     */
4232                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4233                                    break scriptdatadoubleescapestartloop;
4234                                // continue stateloop;
4235                                default:
4236                                    /*
4237                                     * Anything else Reconsume the current input
4238                                     * character in the script data escaped state.
4239                                     */
4240                                    reconsume = true;
4241                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4242                                    continue stateloop;
4243                            }
4244                        }
4245                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
4246                    case SCRIPT_DATA_DOUBLE_ESCAPED:
4247                        scriptdatadoubleescapedloop: for (;;) {
4248                            if (reconsume) {
4249                                reconsume = false;
4250                            } else {
4251                                if (++pos == endPos) {
4252                                    break stateloop;
4253                                }
4254                                c = checkChar(buf, pos);
4255                            }
4256                            /*
4257                             * Consume the next input character:
4258                             */
4259                            switch (c) {
4260                                case '-':
4261                                    /*
4262                                     * U+002D HYPHEN-MINUS (-) Emit a U+002D
4263                                     * HYPHEN-MINUS character token. Switch to the
4264                                     * script data double escaped dash state.
4265                                     */
4266                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
4267                                    break scriptdatadoubleescapedloop; // FALL THRU
4268                                // continue
4269                                // stateloop;
4270                                case '<':
4271                                    /*
4272                                     * U+003C LESS-THAN SIGN (<) Emit a U+003C
4273                                     * LESS-THAN SIGN character token. Switch to the
4274                                     * script data double escaped less-than sign
4275                                     * state.
4276                                     */
4277                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
4278                                    continue stateloop;
4279                                case '\u0000':
4280                                    emitReplacementCharacter(buf, pos);
4281                                    continue;
4282                                case '\r':
4283                                    emitCarriageReturn(buf, pos);
4284                                    break stateloop;
4285                                case '\n':
4286                                    silentLineFeed();
4287                                default:
4288                                    /*
4289                                     * Anything else Emit the current input
4290                                     * character as a character token. Stay in the
4291                                     * script data double escaped state.
4292                                     */
4293                                    continue;
4294                            }
4295                        }
4296                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
4297                    case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
4298                        scriptdatadoubleescapeddashloop: for (;;) {
4299                            if (++pos == endPos) {
4300                                break stateloop;
4301                            }
4302                            c = checkChar(buf, pos);
4303                            /*
4304                             * Consume the next input character:
4305                             */
4306                            switch (c) {
4307                                case '-':
4308                                    /*
4309                                     * U+002D HYPHEN-MINUS (-) Emit a U+002D
4310                                     * HYPHEN-MINUS character token. Switch to the
4311                                     * script data double escaped dash dash state.
4312                                     */
4313                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
4314                                    break scriptdatadoubleescapeddashloop;
4315                                // continue stateloop;
4316                                case '<':
4317                                    /*
4318                                     * U+003C LESS-THAN SIGN (<) Emit a U+003C
4319                                     * LESS-THAN SIGN character token. Switch to the
4320                                     * script data double escaped less-than sign
4321                                     * state.
4322                                     */
4323                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
4324                                    continue stateloop;
4325                                case '\u0000':
4326                                    emitReplacementCharacter(buf, pos);
4327                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4328                                    continue stateloop;
4329                                case '\r':
4330                                    emitCarriageReturn(buf, pos);
4331                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4332                                    break stateloop;
4333                                case '\n':
4334                                    silentLineFeed();
4335                                default:
4336                                    /*
4337                                     * Anything else Emit the current input
4338                                     * character as a character token. Switch to the
4339                                     * script data double escaped state.
4340                                     */
4341                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4342                                    continue stateloop;
4343                            }
4344                        }
4345                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
4346                    case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
4347                        scriptdatadoubleescapeddashdashloop: for (;;) {
4348                            if (++pos == endPos) {
4349                                break stateloop;
4350                            }
4351                            c = checkChar(buf, pos);
4352                            /*
4353                             * Consume the next input character:
4354                             */
4355                            switch (c) {
4356                                case '-':
4357                                    /*
4358                                     * U+002D HYPHEN-MINUS (-) Emit a U+002D
4359                                     * HYPHEN-MINUS character token. Stay in the
4360                                     * script data double escaped dash dash state.
4361                                     */
4362                                    continue;
4363                                case '<':
4364                                    /*
4365                                     * U+003C LESS-THAN SIGN (<) Emit a U+003C
4366                                     * LESS-THAN SIGN character token. Switch to the
4367                                     * script data double escaped less-than sign
4368                                     * state.
4369                                     */
4370                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
4371                                    break scriptdatadoubleescapeddashdashloop;
4372                                case '>':
4373                                    /*
4374                                     * U+003E GREATER-THAN SIGN (>) Emit a U+003E
4375                                     * GREATER-THAN SIGN character token. Switch to
4376                                     * the script data state.
4377                                     */
4378                                    state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
4379                                    continue stateloop;
4380                                case '\u0000':
4381                                    emitReplacementCharacter(buf, pos);
4382                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4383                                    continue stateloop;
4384                                case '\r':
4385                                    emitCarriageReturn(buf, pos);
4386                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4387                                    break stateloop;
4388                                case '\n':
4389                                    silentLineFeed();
4390                                default:
4391                                    /*
4392                                     * Anything else Emit the current input
4393                                     * character as a character token. Switch to the
4394                                     * script data double escaped state.
4395                                     */
4396                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4397                                    continue stateloop;
4398                            }
4399                        }
4400                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
4401                    case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
4402                        scriptdatadoubleescapedlessthanloop: for (;;) {
4403                            if (++pos == endPos) {
4404                                break stateloop;
4405                            }
4406                            c = checkChar(buf, pos);
4407                            /*
4408                             * Consume the next input character:
4409                             */
4410                            switch (c) {
4411                                case '/':
4412                                    /*
4413                                     * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
4414                                     * character token. Set the temporary buffer to
4415                                     * the empty string. Switch to the script data
4416                                     * double escape end state.
4417                                     */
4418                                    index = 0;
4419                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
4420                                    break scriptdatadoubleescapedlessthanloop;
4421                                default:
4422                                    /*
4423                                     * Anything else Reconsume the current input
4424                                     * character in the script data double escaped
4425                                     * state.
4426                                     */
4427                                    reconsume = true;
4428                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4429                                    continue stateloop;
4430                            }
4431                        }
4432                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
4433                    case SCRIPT_DATA_DOUBLE_ESCAPE_END:
4434                        scriptdatadoubleescapeendloop: for (;;) {
4435                            if (++pos == endPos) {
4436                                break stateloop;
4437                            }
4438                            c = checkChar(buf, pos);
4439                            if (index < 6) { // SCRIPT_ARR.length
4440                                char folded = c;
4441                                if (c >= 'A' && c <= 'Z') {
4442                                    folded += 0x20;
4443                                }
4444                                if (folded != Tokenizer.SCRIPT_ARR[index]) {
4445                                    reconsume = true;
4446                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4447                                    continue stateloop;
4448                                }
4449                                index++;
4450                                continue;
4451                            }
4452                            switch (c) {
4453                                case '\r':
4454                                    emitCarriageReturn(buf, pos);
4455                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4456                                    break stateloop;
4457                                case '\n':
4458                                    silentLineFeed();
4459                                case ' ':
4460                                case '\t':
4461                                case '\u000C':
4462                                case '/':
4463                                case '>':
4464                                    /*
4465                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
4466                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
4467                                     * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
4468                                     * (>) Emit the current input character as a
4469                                     * character token. If the temporary buffer is
4470                                     * the string "script", then switch to the
4471                                     * script data escaped state.
4472                                     */
4473                                    state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
4474                                    continue stateloop;
4475                                default:
4476                                    /*
4477                                     * Reconsume the current input character in the
4478                                     * script data double escaped state.
4479                                     */
4480                                    reconsume = true;
4481                                    state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
4482                                    continue stateloop;
4483                            }
4484                        }
4485                        // XXX reorder point
4486                    case MARKUP_DECLARATION_OCTYPE:
4487                        markupdeclarationdoctypeloop: for (;;) {
4488                            if (++pos == endPos) {
4489                                break stateloop;
4490                            }
4491                            c = checkChar(buf, pos);
4492                            if (index < 6) { // OCTYPE.length
4493                                char folded = c;
4494                                if (c >= 'A' && c <= 'Z') {
4495                                    folded += 0x20;
4496                                }
4497                                if (folded == Tokenizer.OCTYPE[index]) {
4498                                    appendLongStrBuf(c);
4499                                } else {
4500                                    errBogusComment();
4501                                    state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
4502                                    reconsume = true;
4503                                    continue stateloop;
4504                                }
4505                                index++;
4506                                continue;
4507                            } else {
4508                                state = transition(state, Tokenizer.DOCTYPE, reconsume, pos);
4509                                reconsume = true;
4510                                break markupdeclarationdoctypeloop;
4511                                // continue stateloop;
4512                            }
4513                        }
4514                        // FALLTHRU DON'T REORDER
4515                    case DOCTYPE:
4516                        doctypeloop: for (;;) {
4517                            if (reconsume) {
4518                                reconsume = false;
4519                            } else {
4520                                if (++pos == endPos) {
4521                                    break stateloop;
4522                                }
4523                                c = checkChar(buf, pos);
4524                            }
4525                            initDoctypeFields();
4526                            /*
4527                             * Consume the next input character:
4528                             */
4529                            switch (c) {
4530                                case '\r':
4531                                    silentCarriageReturn();
4532                                    state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
4533                                    break stateloop;
4534                                case '\n':
4535                                    silentLineFeed();
4536                                    // fall thru
4537                                case ' ':
4538                                case '\t':
4539                                case '\u000C':
4540                                    /*
4541                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
4542                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
4543                                     * Switch to the before DOCTYPE name state.
4544                                     */
4545                                    state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
4546                                    break doctypeloop;
4547                                // continue stateloop;
4548                                default:
4549                                    /*
4550                                     * Anything else Parse error.
4551                                     */
4552                                    errMissingSpaceBeforeDoctypeName();
4553                                    /*
4554                                     * Reconsume the current character in the before
4555                                     * DOCTYPE name state.
4556                                     */
4557                                    state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
4558                                    reconsume = true;
4559                                    break doctypeloop;
4560                                // continue stateloop;
4561                            }
4562                        }
4563                        // FALLTHRU DON'T REORDER
4564                    case BEFORE_DOCTYPE_NAME:
4565                        beforedoctypenameloop: for (;;) {
4566                            if (reconsume) {
4567                                reconsume = false;
4568                            } else {
4569                                if (++pos == endPos) {
4570                                    break stateloop;
4571                                }
4572                                c = checkChar(buf, pos);
4573                            }
4574                            /*
4575                             * Consume the next input character:
4576                             */
4577                            switch (c) {
4578                                case '\r':
4579                                    silentCarriageReturn();
4580                                    break stateloop;
4581                                case '\n':
4582                                    silentLineFeed();
4583                                    // fall thru
4584                                case ' ':
4585                                case '\t':
4586                                case '\u000C':
4587                                    /*
4588                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
4589                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
4590                                     * in the before DOCTYPE name state.
4591                                     */
4592                                    continue;
4593                                case '>':
4594                                    /*
4595                                     * U+003E GREATER-THAN SIGN (>) Parse error.
4596                                     */
4597                                    errNamelessDoctype();
4598                                    /*
4599                                     * Create a new DOCTYPE token. Set its
4600                                     * force-quirks flag to on.
4601                                     */
4602                                    forceQuirks = true;
4603                                    /*
4604                                     * Emit the token.
4605                                     */
4606                                    emitDoctypeToken(pos);
4607                                    /*
4608                                     * Switch to the data state.
4609                                     */
4610                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
4611                                    continue stateloop;
4612                                case '\u0000':
4613                                    c = '\uFFFD';
4614                                    // fall thru
4615                                default:
4616                                    if (c >= 'A' && c <= 'Z') {
4617                                        /*
4618                                         * U+0041 LATIN CAPITAL LETTER A through to
4619                                         * U+005A LATIN CAPITAL LETTER Z Create a
4620                                         * new DOCTYPE token. Set the token's name
4621                                         * to the lowercase version of the input
4622                                         * character (add 0x0020 to the character's
4623                                         * code point).
4624                                         */
4625                                        c += 0x20;
4626                                    }
4627                                    /* Anything else Create a new DOCTYPE token. */
4628                                    /*
4629                                     * Set the token's name name to the current
4630                                     * input character.
4631                                     */
4632                                    clearStrBufAndAppend(c);
4633                                    /*
4634                                     * Switch to the DOCTYPE name state.
4635                                     */
4636                                    state = transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
4637                                    break beforedoctypenameloop;
4638                                // continue stateloop;
4639                            }
4640                        }
4641                        // FALLTHRU DON'T REORDER
4642                    case DOCTYPE_NAME:
4643                        doctypenameloop: for (;;) {
4644                            if (++pos == endPos) {
4645                                break stateloop;
4646                            }
4647                            c = checkChar(buf, pos);
4648                            /*
4649                             * Consume the next input character:
4650                             */
4651                            switch (c) {
4652                                case '\r':
4653                                    silentCarriageReturn();
4654                                    strBufToDoctypeName();
4655                                    state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
4656                                    break stateloop;
4657                                case '\n':
4658                                    silentLineFeed();
4659                                    // fall thru
4660                                case ' ':
4661                                case '\t':
4662                                case '\u000C':
4663                                    /*
4664                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
4665                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
4666                                     * Switch to the after DOCTYPE name state.
4667                                     */
4668                                    strBufToDoctypeName();
4669                                    state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
4670                                    break doctypenameloop;
4671                                // continue stateloop;
4672                                case '>':
4673                                    /*
4674                                     * U+003E GREATER-THAN SIGN (>) Emit the current
4675                                     * DOCTYPE token.
4676                                     */
4677                                    strBufToDoctypeName();
4678                                    emitDoctypeToken(pos);
4679                                    /*
4680                                     * Switch to the data state.
4681                                     */
4682                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
4683                                    continue stateloop;
4684                                case '\u0000':
4685                                    c = '\uFFFD';
4686                                    // fall thru
4687                                default:
4688                                    /*
4689                                     * U+0041 LATIN CAPITAL LETTER A through to
4690                                     * U+005A LATIN CAPITAL LETTER Z Append the
4691                                     * lowercase version of the input character (add
4692                                     * 0x0020 to the character's code point) to the
4693                                     * current DOCTYPE token's name.
4694                                     */
4695                                    if (c >= 'A' && c <= 'Z') {
4696                                        c += 0x0020;
4697                                    }
4698                                    /*
4699                                     * Anything else Append the current input
4700                                     * character to the current DOCTYPE token's
4701                                     * name.
4702                                     */
4703                                    appendStrBuf(c);
4704                                    /*
4705                                     * Stay in the DOCTYPE name state.
4706                                     */
4707                                    continue;
4708                            }
4709                        }
4710                        // FALLTHRU DON'T REORDER
4711                    case AFTER_DOCTYPE_NAME:
4712                        afterdoctypenameloop: for (;;) {
4713                            if (++pos == endPos) {
4714                                break stateloop;
4715                            }
4716                            c = checkChar(buf, pos);
4717                            /*
4718                             * Consume the next input character:
4719                             */
4720                            switch (c) {
4721                                case '\r':
4722                                    silentCarriageReturn();
4723                                    break stateloop;
4724                                case '\n':
4725                                    silentLineFeed();
4726                                    // fall thru
4727                                case ' ':
4728                                case '\t':
4729                                case '\u000C':
4730                                    /*
4731                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
4732                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
4733                                     * in the after DOCTYPE name state.
4734                                     */
4735                                    continue;
4736                                case '>':
4737                                    /*
4738                                     * U+003E GREATER-THAN SIGN (>) Emit the current
4739                                     * DOCTYPE token.
4740                                     */
4741                                    emitDoctypeToken(pos);
4742                                    /*
4743                                     * Switch to the data state.
4744                                     */
4745                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
4746                                    continue stateloop;
4747                                case 'p':
4748                                case 'P':
4749                                    index = 0;
4750                                    state = transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
4751                                    break afterdoctypenameloop;
4752                                // continue stateloop;
4753                                case 's':
4754                                case 'S':
4755                                    index = 0;
4756                                    state = transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
4757                                    continue stateloop;
4758                                default:
4759                                    /*
4760                                     * Otherwise, this is the parse error.
4761                                     */
4762                                    bogusDoctype();
4763    
4764                                    /*
4765                                     * Set the DOCTYPE token's force-quirks flag to
4766                                     * on.
4767                                     */
4768                                    // done by bogusDoctype();
4769                                    /*
4770                                     * Switch to the bogus DOCTYPE state.
4771                                     */
4772                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
4773                                    continue stateloop;
4774                            }
4775                        }
4776                        // FALLTHRU DON'T REORDER
4777                    case DOCTYPE_UBLIC:
4778                        doctypeublicloop: for (;;) {
4779                            if (++pos == endPos) {
4780                                break stateloop;
4781                            }
4782                            c = checkChar(buf, pos);
4783                            /*
4784                             * If the six characters starting from the current input
4785                             * character are an ASCII case-insensitive match for the
4786                             * word "PUBLIC", then consume those characters and
4787                             * switch to the before DOCTYPE public identifier state.
4788                             */
4789                            if (index < 5) { // UBLIC.length
4790                                char folded = c;
4791                                if (c >= 'A' && c <= 'Z') {
4792                                    folded += 0x20;
4793                                }
4794                                if (folded != Tokenizer.UBLIC[index]) {
4795                                    bogusDoctype();
4796                                    // forceQuirks = true;
4797                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
4798                                    reconsume = true;
4799                                    continue stateloop;
4800                                }
4801                                index++;
4802                                continue;
4803                            } else {
4804                                state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
4805                                reconsume = true;
4806                                break doctypeublicloop;
4807                                // continue stateloop;
4808                            }
4809                        }
4810                        // FALLTHRU DON'T REORDER
4811                    case AFTER_DOCTYPE_PUBLIC_KEYWORD:
4812                        afterdoctypepublickeywordloop: for (;;) {
4813                            if (reconsume) {
4814                                reconsume = false;
4815                            } else {
4816                                if (++pos == endPos) {
4817                                    break stateloop;
4818                                }
4819                                c = checkChar(buf, pos);
4820                            }
4821                            /*
4822                             * Consume the next input character:
4823                             */
4824                            switch (c) {
4825                                case '\r':
4826                                    silentCarriageReturn();
4827                                    state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
4828                                    break stateloop;
4829                                case '\n':
4830                                    silentLineFeed();
4831                                    // fall thru
4832                                case ' ':
4833                                case '\t':
4834                                case '\u000C':
4835                                    /*
4836                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
4837                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
4838                                     * Switch to the before DOCTYPE public
4839                                     * identifier state.
4840                                     */
4841                                    state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
4842                                    break afterdoctypepublickeywordloop;
4843                                // FALL THROUGH continue stateloop
4844                                case '"':
4845                                    /*
4846                                     * U+0022 QUOTATION MARK (") Parse Error.
4847                                     */
4848                                    errNoSpaceBetweenDoctypePublicKeywordAndQuote();
4849                                    /*
4850                                     * Set the DOCTYPE token's public identifier to
4851                                     * the empty string (not missing),
4852                                     */
4853                                    clearLongStrBuf();
4854                                    /*
4855                                     * then switch to the DOCTYPE public identifier
4856                                     * (double-quoted) state.
4857                                     */
4858                                    state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
4859                                    continue stateloop;
4860                                case '\'':
4861                                    /*
4862                                     * U+0027 APOSTROPHE (') Parse Error.
4863                                     */
4864                                    errNoSpaceBetweenDoctypePublicKeywordAndQuote();
4865                                    /*
4866                                     * Set the DOCTYPE token's public identifier to
4867                                     * the empty string (not missing),
4868                                     */
4869                                    clearLongStrBuf();
4870                                    /*
4871                                     * then switch to the DOCTYPE public identifier
4872                                     * (single-quoted) state.
4873                                     */
4874                                    state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
4875                                    continue stateloop;
4876                                case '>':
4877                                    /* U+003E GREATER-THAN SIGN (>) Parse error. */
4878                                    errExpectedPublicId();
4879                                    /*
4880                                     * Set the DOCTYPE token's force-quirks flag to
4881                                     * on.
4882                                     */
4883                                    forceQuirks = true;
4884                                    /*
4885                                     * Emit that DOCTYPE token.
4886                                     */
4887                                    emitDoctypeToken(pos);
4888                                    /*
4889                                     * Switch to the data state.
4890                                     */
4891                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
4892                                    continue stateloop;
4893                                default:
4894                                    bogusDoctype();
4895                                    /*
4896                                     * Set the DOCTYPE token's force-quirks flag to
4897                                     * on.
4898                                     */
4899                                    // done by bogusDoctype();
4900                                    /*
4901                                     * Switch to the bogus DOCTYPE state.
4902                                     */
4903                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
4904                                    continue stateloop;
4905                            }
4906                        }
4907                        // FALLTHRU DON'T REORDER
4908                    case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
4909                        beforedoctypepublicidentifierloop: for (;;) {
4910                            if (++pos == endPos) {
4911                                break stateloop;
4912                            }
4913                            c = checkChar(buf, pos);
4914                            /*
4915                             * Consume the next input character:
4916                             */
4917                            switch (c) {
4918                                case '\r':
4919                                    silentCarriageReturn();
4920                                    break stateloop;
4921                                case '\n':
4922                                    silentLineFeed();
4923                                    // fall thru
4924                                case ' ':
4925                                case '\t':
4926                                case '\u000C':
4927                                    /*
4928                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
4929                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
4930                                     * in the before DOCTYPE public identifier
4931                                     * state.
4932                                     */
4933                                    continue;
4934                                case '"':
4935                                    /*
4936                                     * U+0022 QUOTATION MARK (") Set the DOCTYPE
4937                                     * token's public identifier to the empty string
4938                                     * (not missing),
4939                                     */
4940                                    clearLongStrBuf();
4941                                    /*
4942                                     * then switch to the DOCTYPE public identifier
4943                                     * (double-quoted) state.
4944                                     */
4945                                    state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
4946                                    break beforedoctypepublicidentifierloop;
4947                                // continue stateloop;
4948                                case '\'':
4949                                    /*
4950                                     * U+0027 APOSTROPHE (') Set the DOCTYPE token's
4951                                     * public identifier to the empty string (not
4952                                     * missing),
4953                                     */
4954                                    clearLongStrBuf();
4955                                    /*
4956                                     * then switch to the DOCTYPE public identifier
4957                                     * (single-quoted) state.
4958                                     */
4959                                    state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
4960                                    continue stateloop;
4961                                case '>':
4962                                    /* U+003E GREATER-THAN SIGN (>) Parse error. */
4963                                    errExpectedPublicId();
4964                                    /*
4965                                     * Set the DOCTYPE token's force-quirks flag to
4966                                     * on.
4967                                     */
4968                                    forceQuirks = true;
4969                                    /*
4970                                     * Emit that DOCTYPE token.
4971                                     */
4972                                    emitDoctypeToken(pos);
4973                                    /*
4974                                     * Switch to the data state.
4975                                     */
4976                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
4977                                    continue stateloop;
4978                                default:
4979                                    bogusDoctype();
4980                                    /*
4981                                     * Set the DOCTYPE token's force-quirks flag to
4982                                     * on.
4983                                     */
4984                                    // done by bogusDoctype();
4985                                    /*
4986                                     * Switch to the bogus DOCTYPE state.
4987                                     */
4988                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
4989                                    continue stateloop;
4990                            }
4991                        }
4992                        // FALLTHRU DON'T REORDER
4993                    case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
4994                        doctypepublicidentifierdoublequotedloop: for (;;) {
4995                            if (++pos == endPos) {
4996                                break stateloop;
4997                            }
4998                            c = checkChar(buf, pos);
4999                            /*
5000                             * Consume the next input character:
5001                             */
5002                            switch (c) {
5003                                case '"':
5004                                    /*
5005                                     * U+0022 QUOTATION MARK (") Switch to the after
5006                                     * DOCTYPE public identifier state.
5007                                     */
5008                                    publicIdentifier = longStrBufToString();
5009                                    state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
5010                                    break doctypepublicidentifierdoublequotedloop;
5011                                // continue stateloop;
5012                                case '>':
5013                                    /*
5014                                     * U+003E GREATER-THAN SIGN (>) Parse error.
5015                                     */
5016                                    errGtInPublicId();
5017                                    /*
5018                                     * Set the DOCTYPE token's force-quirks flag to
5019                                     * on.
5020                                     */
5021                                    forceQuirks = true;
5022                                    /*
5023                                     * Emit that DOCTYPE token.
5024                                     */
5025                                    publicIdentifier = longStrBufToString();
5026                                    emitDoctypeToken(pos);
5027                                    /*
5028                                     * Switch to the data state.
5029                                     */
5030                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5031                                    continue stateloop;
5032                                case '\r':
5033                                    appendLongStrBufCarriageReturn();
5034                                    break stateloop;
5035                                case '\n':
5036                                    appendLongStrBufLineFeed();
5037                                    continue;
5038                                case '\u0000':
5039                                    c = '\uFFFD';
5040                                    // fall thru
5041                                default:
5042                                    /*
5043                                     * Anything else Append the current input
5044                                     * character to the current DOCTYPE token's
5045                                     * public identifier.
5046                                     */
5047                                    appendLongStrBuf(c);
5048                                    /*
5049                                     * Stay in the DOCTYPE public identifier
5050                                     * (double-quoted) state.
5051                                     */
5052                                    continue;
5053                            }
5054                        }
5055                        // FALLTHRU DON'T REORDER
5056                    case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
5057                        afterdoctypepublicidentifierloop: for (;;) {
5058                            if (++pos == endPos) {
5059                                break stateloop;
5060                            }
5061                            c = checkChar(buf, pos);
5062                            /*
5063                             * Consume the next input character:
5064                             */
5065                            switch (c) {
5066                                case '\r':
5067                                    silentCarriageReturn();
5068                                    state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
5069                                    break stateloop;
5070                                case '\n':
5071                                    silentLineFeed();
5072                                    // fall thru
5073                                case ' ':
5074                                case '\t':
5075                                case '\u000C':
5076                                    /*
5077                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
5078                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
5079                                     * Switch to the between DOCTYPE public and
5080                                     * system identifiers state.
5081                                     */
5082                                    state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
5083                                    break afterdoctypepublicidentifierloop;
5084                                // continue stateloop;
5085                                case '>':
5086                                    /*
5087                                     * U+003E GREATER-THAN SIGN (>) Emit the current
5088                                     * DOCTYPE token.
5089                                     */
5090                                    emitDoctypeToken(pos);
5091                                    /*
5092                                     * Switch to the data state.
5093                                     */
5094                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5095                                    continue stateloop;
5096                                case '"':
5097                                    /*
5098                                     * U+0022 QUOTATION MARK (") Parse error.
5099                                     */
5100                                    errNoSpaceBetweenPublicAndSystemIds();
5101                                    /*
5102                                     * Set the DOCTYPE token's system identifier to
5103                                     * the empty string (not missing),
5104                                     */
5105                                    clearLongStrBuf();
5106                                    /*
5107                                     * then switch to the DOCTYPE system identifier
5108                                     * (double-quoted) state.
5109                                     */
5110                                    state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
5111                                    continue stateloop;
5112                                case '\'':
5113                                    /*
5114                                     * U+0027 APOSTROPHE (') Parse error.
5115                                     */
5116                                    errNoSpaceBetweenPublicAndSystemIds();
5117                                    /*
5118                                     * Set the DOCTYPE token's system identifier to
5119                                     * the empty string (not missing),
5120                                     */
5121                                    clearLongStrBuf();
5122                                    /*
5123                                     * then switch to the DOCTYPE system identifier
5124                                     * (single-quoted) state.
5125                                     */
5126                                    state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
5127                                    continue stateloop;
5128                                default:
5129                                    bogusDoctype();
5130                                    /*
5131                                     * Set the DOCTYPE token's force-quirks flag to
5132                                     * on.
5133                                     */
5134                                    // done by bogusDoctype();
5135                                    /*
5136                                     * Switch to the bogus DOCTYPE state.
5137                                     */
5138                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
5139                                    continue stateloop;
5140                            }
5141                        }
5142                        // FALLTHRU DON'T REORDER
5143                    case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
5144                        betweendoctypepublicandsystemidentifiersloop: for (;;) {
5145                            if (++pos == endPos) {
5146                                break stateloop;
5147                            }
5148                            c = checkChar(buf, pos);
5149                            /*
5150                             * Consume the next input character:
5151                             */
5152                            switch (c) {
5153                                case '\r':
5154                                    silentCarriageReturn();
5155                                    break stateloop;
5156                                case '\n':
5157                                    silentLineFeed();
5158                                    // fall thru
5159                                case ' ':
5160                                case '\t':
5161                                case '\u000C':
5162                                    /*
5163                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
5164                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
5165                                     * in the between DOCTYPE public and system
5166                                     * identifiers state.
5167                                     */
5168                                    continue;
5169                                case '>':
5170                                    /*
5171                                     * U+003E GREATER-THAN SIGN (>) Emit the current
5172                                     * DOCTYPE token.
5173                                     */
5174                                    emitDoctypeToken(pos);
5175                                    /*
5176                                     * Switch to the data state.
5177                                     */
5178                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5179                                    continue stateloop;
5180                                case '"':
5181                                    /*
5182                                     * U+0022 QUOTATION MARK (") Set the DOCTYPE
5183                                     * token's system identifier to the empty string
5184                                     * (not missing),
5185                                     */
5186                                    clearLongStrBuf();
5187                                    /*
5188                                     * then switch to the DOCTYPE system identifier
5189                                     * (double-quoted) state.
5190                                     */
5191                                    state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
5192                                    break betweendoctypepublicandsystemidentifiersloop;
5193                                // continue stateloop;
5194                                case '\'':
5195                                    /*
5196                                     * U+0027 APOSTROPHE (') Set the DOCTYPE token's
5197                                     * system identifier to the empty string (not
5198                                     * missing),
5199                                     */
5200                                    clearLongStrBuf();
5201                                    /*
5202                                     * then switch to the DOCTYPE system identifier
5203                                     * (single-quoted) state.
5204                                     */
5205                                    state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
5206                                    continue stateloop;
5207                                default:
5208                                    bogusDoctype();
5209                                    /*
5210                                     * Set the DOCTYPE token's force-quirks flag to
5211                                     * on.
5212                                     */
5213                                    // done by bogusDoctype();
5214                                    /*
5215                                     * Switch to the bogus DOCTYPE state.
5216                                     */
5217                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
5218                                    continue stateloop;
5219                            }
5220                        }
5221                        // FALLTHRU DON'T REORDER
5222                    case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
5223                        doctypesystemidentifierdoublequotedloop: for (;;) {
5224                            if (++pos == endPos) {
5225                                break stateloop;
5226                            }
5227                            c = checkChar(buf, pos);
5228                            /*
5229                             * Consume the next input character:
5230                             */
5231                            switch (c) {
5232                                case '"':
5233                                    /*
5234                                     * U+0022 QUOTATION MARK (") Switch to the after
5235                                     * DOCTYPE system identifier state.
5236                                     */
5237                                    systemIdentifier = longStrBufToString();
5238                                    state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
5239                                    continue stateloop;
5240                                case '>':
5241                                    /*
5242                                     * U+003E GREATER-THAN SIGN (>) Parse error.
5243                                     */
5244                                    errGtInSystemId();
5245                                    /*
5246                                     * Set the DOCTYPE token's force-quirks flag to
5247                                     * on.
5248                                     */
5249                                    forceQuirks = true;
5250                                    /*
5251                                     * Emit that DOCTYPE token.
5252                                     */
5253                                    systemIdentifier = longStrBufToString();
5254                                    emitDoctypeToken(pos);
5255                                    /*
5256                                     * Switch to the data state.
5257                                     */
5258                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5259                                    continue stateloop;
5260                                case '\r':
5261                                    appendLongStrBufCarriageReturn();
5262                                    break stateloop;
5263                                case '\n':
5264                                    appendLongStrBufLineFeed();
5265                                    continue;
5266                                case '\u0000':
5267                                    c = '\uFFFD';
5268                                    // fall thru
5269                                default:
5270                                    /*
5271                                     * Anything else Append the current input
5272                                     * character to the current DOCTYPE token's
5273                                     * system identifier.
5274                                     */
5275                                    appendLongStrBuf(c);
5276                                    /*
5277                                     * Stay in the DOCTYPE system identifier
5278                                     * (double-quoted) state.
5279                                     */
5280                                    continue;
5281                            }
5282                        }
5283                        // FALLTHRU DON'T REORDER
5284                    case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
5285                        afterdoctypesystemidentifierloop: for (;;) {
5286                            if (++pos == endPos) {
5287                                break stateloop;
5288                            }
5289                            c = checkChar(buf, pos);
5290                            /*
5291                             * Consume the next input character:
5292                             */
5293                            switch (c) {
5294                                case '\r':
5295                                    silentCarriageReturn();
5296                                    break stateloop;
5297                                case '\n':
5298                                    silentLineFeed();
5299                                    // fall thru
5300                                case ' ':
5301                                case '\t':
5302                                case '\u000C':
5303                                    /*
5304                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
5305                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
5306                                     * in the after DOCTYPE system identifier state.
5307                                     */
5308                                    continue;
5309                                case '>':
5310                                    /*
5311                                     * U+003E GREATER-THAN SIGN (>) Emit the current
5312                                     * DOCTYPE token.
5313                                     */
5314                                    emitDoctypeToken(pos);
5315                                    /*
5316                                     * Switch to the data state.
5317                                     */
5318                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5319                                    continue stateloop;
5320                                default:
5321                                    /*
5322                                     * Switch to the bogus DOCTYPE state. (This does
5323                                     * not set the DOCTYPE token's force-quirks flag
5324                                     * to on.)
5325                                     */
5326                                    bogusDoctypeWithoutQuirks();
5327                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
5328                                    break afterdoctypesystemidentifierloop;
5329                                // continue stateloop;
5330                            }
5331                        }
5332                        // FALLTHRU DON'T REORDER
5333                    case BOGUS_DOCTYPE:
5334                        for (;;) {
5335                            if (reconsume) {
5336                                reconsume = false;
5337                            } else {
5338                                if (++pos == endPos) {
5339                                    break stateloop;
5340                                }
5341                                c = checkChar(buf, pos);
5342                            }
5343                            /*
5344                             * Consume the next input character:
5345                             */
5346                            switch (c) {
5347                                case '>':
5348                                    /*
5349                                     * U+003E GREATER-THAN SIGN (>) Emit that
5350                                     * DOCTYPE token.
5351                                     */
5352                                    emitDoctypeToken(pos);
5353                                    /*
5354                                     * Switch to the data state.
5355                                     */
5356                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5357                                    continue stateloop;
5358                                case '\r':
5359                                    silentCarriageReturn();
5360                                    break stateloop;
5361                                case '\n':
5362                                    silentLineFeed();
5363                                    // fall thru
5364                                default:
5365                                    /*
5366                                     * Anything else Stay in the bogus DOCTYPE
5367                                     * state.
5368                                     */
5369                                    continue;
5370                            }
5371                        }
5372                        // XXX reorder point
5373                    case DOCTYPE_YSTEM:
5374                        doctypeystemloop: for (;;) {
5375                            if (++pos == endPos) {
5376                                break stateloop;
5377                            }
5378                            c = checkChar(buf, pos);
5379                            /*
5380                             * Otherwise, if the six characters starting from the
5381                             * current input character are an ASCII case-insensitive
5382                             * match for the word "SYSTEM", then consume those
5383                             * characters and switch to the before DOCTYPE system
5384                             * identifier state.
5385                             */
5386                            if (index < 5) { // YSTEM.length
5387                                char folded = c;
5388                                if (c >= 'A' && c <= 'Z') {
5389                                    folded += 0x20;
5390                                }
5391                                if (folded != Tokenizer.YSTEM[index]) {
5392                                    bogusDoctype();
5393                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
5394                                    reconsume = true;
5395                                    continue stateloop;
5396                                }
5397                                index++;
5398                                continue stateloop;
5399                            } else {
5400                                state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
5401                                reconsume = true;
5402                                break doctypeystemloop;
5403                                // continue stateloop;
5404                            }
5405                        }
5406                        // FALLTHRU DON'T REORDER
5407                    case AFTER_DOCTYPE_SYSTEM_KEYWORD:
5408                        afterdoctypesystemkeywordloop: for (;;) {
5409                            if (reconsume) {
5410                                reconsume = false;
5411                            } else {
5412                                if (++pos == endPos) {
5413                                    break stateloop;
5414                                }
5415                                c = checkChar(buf, pos);
5416                            }
5417                            /*
5418                             * Consume the next input character:
5419                             */
5420                            switch (c) {
5421                                case '\r':
5422                                    silentCarriageReturn();
5423                                    state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
5424                                    break stateloop;
5425                                case '\n':
5426                                    silentLineFeed();
5427                                    // fall thru
5428                                case ' ':
5429                                case '\t':
5430                                case '\u000C':
5431                                    /*
5432                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
5433                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE
5434                                     * Switch to the before DOCTYPE public
5435                                     * identifier state.
5436                                     */
5437                                    state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
5438                                    break afterdoctypesystemkeywordloop;
5439                                // FALL THROUGH continue stateloop
5440                                case '"':
5441                                    /*
5442                                     * U+0022 QUOTATION MARK (") Parse Error.
5443                                     */
5444                                    errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
5445                                    /*
5446                                     * Set the DOCTYPE token's system identifier to
5447                                     * the empty string (not missing),
5448                                     */
5449                                    clearLongStrBuf();
5450                                    /*
5451                                     * then switch to the DOCTYPE public identifier
5452                                     * (double-quoted) state.
5453                                     */
5454                                    state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
5455                                    continue stateloop;
5456                                case '\'':
5457                                    /*
5458                                     * U+0027 APOSTROPHE (') Parse Error.
5459                                     */
5460                                    errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
5461                                    /*
5462                                     * Set the DOCTYPE token's public identifier to
5463                                     * the empty string (not missing),
5464                                     */
5465                                    clearLongStrBuf();
5466                                    /*
5467                                     * then switch to the DOCTYPE public identifier
5468                                     * (single-quoted) state.
5469                                     */
5470                                    state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
5471                                    continue stateloop;
5472                                case '>':
5473                                    /* U+003E GREATER-THAN SIGN (>) Parse error. */
5474                                    errExpectedPublicId();
5475                                    /*
5476                                     * Set the DOCTYPE token's force-quirks flag to
5477                                     * on.
5478                                     */
5479                                    forceQuirks = true;
5480                                    /*
5481                                     * Emit that DOCTYPE token.
5482                                     */
5483                                    emitDoctypeToken(pos);
5484                                    /*
5485                                     * Switch to the data state.
5486                                     */
5487                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5488                                    continue stateloop;
5489                                default:
5490                                    bogusDoctype();
5491                                    /*
5492                                     * Set the DOCTYPE token's force-quirks flag to
5493                                     * on.
5494                                     */
5495                                    // done by bogusDoctype();
5496                                    /*
5497                                     * Switch to the bogus DOCTYPE state.
5498                                     */
5499                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
5500                                    continue stateloop;
5501                            }
5502                        }
5503                        // FALLTHRU DON'T REORDER
5504                    case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
5505                        beforedoctypesystemidentifierloop: for (;;) {
5506                            if (++pos == endPos) {
5507                                break stateloop;
5508                            }
5509                            c = checkChar(buf, pos);
5510                            /*
5511                             * Consume the next input character:
5512                             */
5513                            switch (c) {
5514                                case '\r':
5515                                    silentCarriageReturn();
5516                                    break stateloop;
5517                                case '\n':
5518                                    silentLineFeed();
5519                                    // fall thru
5520                                case ' ':
5521                                case '\t':
5522                                case '\u000C':
5523                                    /*
5524                                     * U+0009 CHARACTER TABULATION U+000A LINE FEED
5525                                     * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
5526                                     * in the before DOCTYPE system identifier
5527                                     * state.
5528                                     */
5529                                    continue;
5530                                case '"':
5531                                    /*
5532                                     * U+0022 QUOTATION MARK (") Set the DOCTYPE
5533                                     * token's system identifier to the empty string
5534                                     * (not missing),
5535                                     */
5536                                    clearLongStrBuf();
5537                                    /*
5538                                     * then switch to the DOCTYPE system identifier
5539                                     * (double-quoted) state.
5540                                     */
5541                                    state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
5542                                    continue stateloop;
5543                                case '\'':
5544                                    /*
5545                                     * U+0027 APOSTROPHE (') Set the DOCTYPE token's
5546                                     * system identifier to the empty string (not
5547                                     * missing),
5548                                     */
5549                                    clearLongStrBuf();
5550                                    /*
5551                                     * then switch to the DOCTYPE system identifier
5552                                     * (single-quoted) state.
5553                                     */
5554                                    state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
5555                                    break beforedoctypesystemidentifierloop;
5556                                // continue stateloop;
5557                                case '>':
5558                                    /* U+003E GREATER-THAN SIGN (>) Parse error. */
5559                                    errExpectedSystemId();
5560                                    /*
5561                                     * Set the DOCTYPE token's force-quirks flag to
5562                                     * on.
5563                                     */
5564                                    forceQuirks = true;
5565                                    /*
5566                                     * Emit that DOCTYPE token.
5567                                     */
5568                                    emitDoctypeToken(pos);
5569                                    /*
5570                                     * Switch to the data state.
5571                                     */
5572                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5573                                    continue stateloop;
5574                                default:
5575                                    bogusDoctype();
5576                                    /*
5577                                     * Set the DOCTYPE token's force-quirks flag to
5578                                     * on.
5579                                     */
5580                                    // done by bogusDoctype();
5581                                    /*
5582                                     * Switch to the bogus DOCTYPE state.
5583                                     */
5584                                    state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
5585                                    continue stateloop;
5586                            }
5587                        }
5588                        // FALLTHRU DON'T REORDER
5589                    case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
5590                        for (;;) {
5591                            if (++pos == endPos) {
5592                                break stateloop;
5593                            }
5594                            c = checkChar(buf, pos);
5595                            /*
5596                             * Consume the next input character:
5597                             */
5598                            switch (c) {
5599                                case '\'':
5600                                    /*
5601                                     * U+0027 APOSTROPHE (') Switch to the after
5602                                     * DOCTYPE system identifier state.
5603                                     */
5604                                    systemIdentifier = longStrBufToString();
5605                                    state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
5606                                    continue stateloop;
5607                                case '>':
5608                                    errGtInSystemId();
5609                                    /*
5610                                     * Set the DOCTYPE token's force-quirks flag to
5611                                     * on.
5612                                     */
5613                                    forceQuirks = true;
5614                                    /*
5615                                     * Emit that DOCTYPE token.
5616                                     */
5617                                    systemIdentifier = longStrBufToString();
5618                                    emitDoctypeToken(pos);
5619                                    /*
5620                                     * Switch to the data state.
5621                                     */
5622                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5623                                    continue stateloop;
5624                                case '\r':
5625                                    appendLongStrBufCarriageReturn();
5626                                    break stateloop;
5627                                case '\n':
5628                                    appendLongStrBufLineFeed();
5629                                    continue;
5630                                case '\u0000':
5631                                    c = '\uFFFD';
5632                                    // fall thru
5633                                default:
5634                                    /*
5635                                     * Anything else Append the current input
5636                                     * character to the current DOCTYPE token's
5637                                     * system identifier.
5638                                     */
5639                                    appendLongStrBuf(c);
5640                                    /*
5641                                     * Stay in the DOCTYPE system identifier
5642                                     * (double-quoted) state.
5643                                     */
5644                                    continue;
5645                            }
5646                        }
5647                        // XXX reorder point
5648                    case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
5649                        for (;;) {
5650                            if (++pos == endPos) {
5651                                break stateloop;
5652                            }
5653                            c = checkChar(buf, pos);
5654                            /*
5655                             * Consume the next input character:
5656                             */
5657                            switch (c) {
5658                                case '\'':
5659                                    /*
5660                                     * U+0027 APOSTROPHE (') Switch to the after
5661                                     * DOCTYPE public identifier state.
5662                                     */
5663                                    publicIdentifier = longStrBufToString();
5664                                    state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
5665                                    continue stateloop;
5666                                case '>':
5667                                    errGtInPublicId();
5668                                    /*
5669                                     * Set the DOCTYPE token's force-quirks flag to
5670                                     * on.
5671                                     */
5672                                    forceQuirks = true;
5673                                    /*
5674                                     * Emit that DOCTYPE token.
5675                                     */
5676                                    publicIdentifier = longStrBufToString();
5677                                    emitDoctypeToken(pos);
5678                                    /*
5679                                     * Switch to the data state.
5680                                     */
5681                                    state = transition(state, Tokenizer.DATA, reconsume, pos);
5682                                    continue stateloop;
5683                                case '\r':
5684                                    appendLongStrBufCarriageReturn();
5685                                    break stateloop;
5686                                case '\n':
5687                                    appendLongStrBufLineFeed();
5688                                    continue;
5689                                case '\u0000':
5690                                    c = '\uFFFD';
5691                                    // fall thru
5692                                default:
5693                                    /*
5694                                     * Anything else Append the current input
5695                                     * character to the current DOCTYPE token's
5696                                     * public identifier.
5697                                     */
5698                                    appendLongStrBuf(c);
5699                                    /*
5700                                     * Stay in the DOCTYPE public identifier
5701                                     * (single-quoted) state.
5702                                     */
5703                                    continue;
5704                            }
5705                        }
5706                        // END HOTSPOT WORKAROUND
5707                }
5708            }
5709            flushChars(buf, pos);
5710            /*
5711             * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
5712             */
5713            // Save locals
5714            stateSave = state;
5715            returnStateSave = returnState;
5716            return pos;
5717        }
5718        
5719        // HOTSPOT WORKAROUND INSERTION POINT
5720        
5721        // [NOCPP[
5722        
5723        protected int transition(int from, int to, boolean reconsume, int pos) throws SAXException {
5724            return to;
5725        }
5726    
5727        // ]NOCPP]
5728        
5729        private void initDoctypeFields() {
5730            doctypeName = "";
5731            if (systemIdentifier != null) {
5732                Portability.releaseString(systemIdentifier);
5733                systemIdentifier = null;
5734            }
5735            if (publicIdentifier != null) {
5736                Portability.releaseString(publicIdentifier);
5737                publicIdentifier = null;
5738            }
5739            forceQuirks = false;
5740        }
5741    
5742        @Inline private void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
5743                throws SAXException {
5744            silentCarriageReturn();
5745            adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
5746        }
5747    
5748        @Inline private void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
5749                throws SAXException {
5750            silentLineFeed();
5751            adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
5752        }
5753    
5754        @Inline private void appendLongStrBufLineFeed() {
5755            silentLineFeed();
5756            appendLongStrBuf('\n');
5757        }
5758    
5759        @Inline private void appendLongStrBufCarriageReturn() {
5760            silentCarriageReturn();
5761            appendLongStrBuf('\n');
5762        }
5763    
5764        @Inline protected void silentCarriageReturn() {
5765            ++line;
5766            lastCR = true;
5767        }
5768    
5769        @Inline protected void silentLineFeed() {
5770            ++line;
5771        }
5772    
5773        private void emitCarriageReturn(@NoLength char[] buf, int pos)
5774                throws SAXException {
5775            silentCarriageReturn();
5776            flushChars(buf, pos);
5777            tokenHandler.characters(Tokenizer.LF, 0, 1);
5778            cstart = Integer.MAX_VALUE;
5779        }
5780    
5781        private void emitReplacementCharacter(@NoLength char[] buf, int pos)
5782                throws SAXException {
5783            flushChars(buf, pos);
5784            tokenHandler.zeroOriginatingReplacementCharacter();
5785            cstart = pos + 1;
5786        }
5787    
5788        private void emitPlaintextReplacementCharacter(@NoLength char[] buf, int pos)
5789                throws SAXException {
5790            flushChars(buf, pos);
5791            tokenHandler.characters(REPLACEMENT_CHARACTER, 0, 1);
5792            cstart = pos + 1;
5793        }
5794    
5795        private void setAdditionalAndRememberAmpersandLocation(char add) {
5796            additional = add;
5797            // [NOCPP[
5798            ampersandLocation = new LocatorImpl(this);
5799            // ]NOCPP]
5800        }
5801    
5802        private void bogusDoctype() throws SAXException {
5803            errBogusDoctype();
5804            forceQuirks = true;
5805        }
5806    
5807        private void bogusDoctypeWithoutQuirks() throws SAXException {
5808            errBogusDoctype();
5809            forceQuirks = false;
5810        }
5811    
5812        private void emitOrAppendStrBuf(int returnState) throws SAXException {
5813            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
5814                appendStrBufToLongStrBuf();
5815            } else {
5816                emitStrBuf();
5817            }
5818        }
5819    
5820        private void handleNcrValue(int returnState) throws SAXException {
5821            /*
5822             * If one or more characters match the range, then take them all and
5823             * interpret the string of characters as a number (either hexadecimal or
5824             * decimal as appropriate).
5825             */
5826            if (value <= 0xFFFF) {
5827                if (value >= 0x80 && value <= 0x9f) {
5828                    /*
5829                     * If that number is one of the numbers in the first column of
5830                     * the following table, then this is a parse error.
5831                     */
5832                    errNcrInC1Range();
5833                    /*
5834                     * Find the row with that number in the first column, and return
5835                     * a character token for the Unicode character given in the
5836                     * second column of that row.
5837                     */
5838                    @NoLength char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
5839                    emitOrAppendOne(val, returnState);
5840                    // [NOCPP[
5841                } else if (value == 0xC
5842                        && contentSpacePolicy != XmlViolationPolicy.ALLOW) {
5843                    if (contentSpacePolicy == XmlViolationPolicy.ALTER_INFOSET) {
5844                        emitOrAppendOne(Tokenizer.SPACE, returnState);
5845                    } else if (contentSpacePolicy == XmlViolationPolicy.FATAL) {
5846                        fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
5847                    }
5848                    // ]NOCPP]
5849                } else if (value == 0x0) {
5850                    errNcrZero();
5851                    emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
5852                } else if ((value & 0xF800) == 0xD800) {
5853                    errNcrSurrogate();
5854                    emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
5855                } else {
5856                    /*
5857                     * Otherwise, return a character token for the Unicode character
5858                     * whose code point is that number.
5859                     */
5860                    char ch = (char) value;
5861                    // [NOCPP[
5862                    if (value == 0x0D) {
5863                        errNcrCr();
5864                    } else if ((value <= 0x0008) || (value == 0x000B)
5865                            || (value >= 0x000E && value <= 0x001F)) {
5866                        ch = errNcrControlChar(ch);
5867                    } else if (value >= 0xFDD0 && value <= 0xFDEF) {
5868                        errNcrUnassigned();
5869                    } else if ((value & 0xFFFE) == 0xFFFE) {
5870                        ch = errNcrNonCharacter(ch);
5871                    } else if (value >= 0x007F && value <= 0x009F) {
5872                        errNcrControlChar();
5873                    } else {
5874                        maybeWarnPrivateUse(ch);
5875                    }
5876                    // ]NOCPP]
5877                    bmpChar[0] = ch;
5878                    emitOrAppendOne(bmpChar, returnState);
5879                }
5880            } else if (value <= 0x10FFFF) {
5881                // [NOCPP[
5882                maybeWarnPrivateUseAstral();
5883                if ((value & 0xFFFE) == 0xFFFE) {
5884                    errAstralNonCharacter(value);
5885                }
5886                // ]NOCPP]
5887                astralChar[0] = (char) (Tokenizer.LEAD_OFFSET + (value >> 10));
5888                astralChar[1] = (char) (0xDC00 + (value & 0x3FF));
5889                emitOrAppendTwo(astralChar, returnState);
5890            } else {
5891                errNcrOutOfRange();
5892                emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
5893            }
5894        }
5895    
5896        public void eof() throws SAXException {
5897            int state = stateSave;
5898            int returnState = returnStateSave;
5899    
5900            eofloop: for (;;) {
5901                switch (state) {
5902                    case SCRIPT_DATA_LESS_THAN_SIGN:
5903                    case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
5904                        /*
5905                         * Otherwise, emit a U+003C LESS-THAN SIGN character token
5906                         */
5907                        tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
5908                        /*
5909                         * and reconsume the current input character in the data
5910                         * state.
5911                         */
5912                        break eofloop;
5913                    case TAG_OPEN:
5914                        /*
5915                         * The behavior of this state depends on the content model
5916                         * flag.
5917                         */
5918                        /*
5919                         * Anything else Parse error.
5920                         */
5921                        errEofAfterLt();
5922                        /*
5923                         * Emit a U+003C LESS-THAN SIGN character token
5924                         */
5925                        tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
5926                        /*
5927                         * and reconsume the current input character in the data
5928                         * state.
5929                         */
5930                        break eofloop;
5931                    case RAWTEXT_RCDATA_LESS_THAN_SIGN:
5932                        /*
5933                         * Emit a U+003C LESS-THAN SIGN character token
5934                         */
5935                        tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
5936                        /*
5937                         * and reconsume the current input character in the RCDATA
5938                         * state.
5939                         */
5940                        break eofloop;
5941                    case NON_DATA_END_TAG_NAME:
5942                        /*
5943                         * Emit a U+003C LESS-THAN SIGN character token, a U+002F
5944                         * SOLIDUS character token,
5945                         */
5946                        tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
5947                        /*
5948                         * a character token for each of the characters in the
5949                         * temporary buffer (in the order they were added to the
5950                         * buffer),
5951                         */
5952                        emitStrBuf();
5953                        /*
5954                         * and reconsume the current input character in the RCDATA
5955                         * state.
5956                         */
5957                        break eofloop;
5958                    case CLOSE_TAG_OPEN:
5959                        /* EOF Parse error. */
5960                        errEofAfterLt();
5961                        /*
5962                         * Emit a U+003C LESS-THAN SIGN character token and a U+002F
5963                         * SOLIDUS character token.
5964                         */
5965                        tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
5966                        /*
5967                         * Reconsume the EOF character in the data state.
5968                         */
5969                        break eofloop;
5970                    case TAG_NAME:
5971                        /*
5972                         * EOF Parse error.
5973                         */
5974                        errEofInTagName();
5975                        /*
5976                         * Reconsume the EOF character in the data state.
5977                         */
5978                        break eofloop;
5979                    case BEFORE_ATTRIBUTE_NAME:
5980                    case AFTER_ATTRIBUTE_VALUE_QUOTED:
5981                    case SELF_CLOSING_START_TAG:
5982                        /* EOF Parse error. */
5983                        errEofWithoutGt();
5984                        /*
5985                         * Reconsume the EOF character in the data state.
5986                         */
5987                        break eofloop;
5988                    case ATTRIBUTE_NAME:
5989                        /*
5990                         * EOF Parse error.
5991                         */
5992                        errEofInAttributeName();
5993                        /*
5994                         * Reconsume the EOF character in the data state.
5995                         */
5996                        break eofloop;
5997                    case AFTER_ATTRIBUTE_NAME:
5998                    case BEFORE_ATTRIBUTE_VALUE:
5999                        /* EOF Parse error. */
6000                        errEofWithoutGt();
6001                        /*
6002                         * Reconsume the EOF character in the data state.
6003                         */
6004                        break eofloop;
6005                    case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
6006                    case ATTRIBUTE_VALUE_SINGLE_QUOTED:
6007                    case ATTRIBUTE_VALUE_UNQUOTED:
6008                        /* EOF Parse error. */
6009                        errEofInAttributeValue();
6010                        /*
6011                         * Reconsume the EOF character in the data state.
6012                         */
6013                        break eofloop;
6014                    case BOGUS_COMMENT:
6015                        emitComment(0, 0);
6016                        break eofloop;
6017                    case BOGUS_COMMENT_HYPHEN:
6018                        // [NOCPP[
6019                        maybeAppendSpaceToBogusComment();
6020                        // ]NOCPP]
6021                        emitComment(0, 0);
6022                        break eofloop;
6023                    case MARKUP_DECLARATION_OPEN:
6024                        errBogusComment();
6025                        clearLongStrBuf();
6026                        emitComment(0, 0);
6027                        break eofloop;
6028                    case MARKUP_DECLARATION_HYPHEN:
6029                        errBogusComment();
6030                        emitComment(0, 0);
6031                        break eofloop;
6032                    case MARKUP_DECLARATION_OCTYPE:
6033                        if (index < 6) {
6034                            errBogusComment();
6035                            emitComment(0, 0);
6036                        } else {
6037                            /* EOF Parse error. */
6038                            errEofInDoctype();
6039                            /*
6040                             * Create a new DOCTYPE token. Set its force-quirks flag
6041                             * to on.
6042                             */
6043                            doctypeName = "";
6044                            if (systemIdentifier != null) {
6045                                Portability.releaseString(systemIdentifier);
6046                                systemIdentifier = null;
6047                            }
6048                            if (publicIdentifier != null) {
6049                                Portability.releaseString(publicIdentifier);
6050                                publicIdentifier = null;
6051                            }
6052                            forceQuirks = true;
6053                            /*
6054                             * Emit the token.
6055                             */
6056                            emitDoctypeToken(0);
6057                            /*
6058                             * Reconsume the EOF character in the data state.
6059                             */
6060                            break eofloop;
6061                        }
6062                        break eofloop;
6063                    case COMMENT_START:
6064                    case COMMENT:
6065                        /*
6066                         * EOF Parse error.
6067                         */
6068                        errEofInComment();
6069                        /* Emit the comment token. */
6070                        emitComment(0, 0);
6071                        /*
6072                         * Reconsume the EOF character in the data state.
6073                         */
6074                        break eofloop;
6075                    case COMMENT_END:
6076                        errEofInComment();
6077                        /* Emit the comment token. */
6078                        emitComment(2, 0);
6079                        /*
6080                         * Reconsume the EOF character in the data state.
6081                         */
6082                        break eofloop;
6083                    case COMMENT_END_DASH:
6084                    case COMMENT_START_DASH:
6085                        errEofInComment();
6086                        /* Emit the comment token. */
6087                        emitComment(1, 0);
6088                        /*
6089                         * Reconsume the EOF character in the data state.
6090                         */
6091                        break eofloop;
6092                    case COMMENT_END_BANG:
6093                        errEofInComment();
6094                        /* Emit the comment token. */
6095                        emitComment(3, 0);
6096                        /*
6097                         * Reconsume the EOF character in the data state.
6098                         */
6099                        break eofloop;
6100                    case DOCTYPE:
6101                    case BEFORE_DOCTYPE_NAME:
6102                        errEofInDoctype();
6103                        /*
6104                         * Create a new DOCTYPE token. Set its force-quirks flag to
6105                         * on.
6106                         */
6107                        forceQuirks = true;
6108                        /*
6109                         * Emit the token.
6110                         */
6111                        emitDoctypeToken(0);
6112                        /*
6113                         * Reconsume the EOF character in the data state.
6114                         */
6115                        break eofloop;
6116                    case DOCTYPE_NAME:
6117                        errEofInDoctype();
6118                        strBufToDoctypeName();
6119                        /*
6120                         * Set the DOCTYPE token's force-quirks flag to on.
6121                         */
6122                        forceQuirks = true;
6123                        /*
6124                         * Emit that DOCTYPE token.
6125                         */
6126                        emitDoctypeToken(0);
6127                        /*
6128                         * Reconsume the EOF character in the data state.
6129                         */
6130                        break eofloop;
6131                    case DOCTYPE_UBLIC:
6132                    case DOCTYPE_YSTEM:
6133                    case AFTER_DOCTYPE_NAME:
6134                    case AFTER_DOCTYPE_PUBLIC_KEYWORD:
6135                    case AFTER_DOCTYPE_SYSTEM_KEYWORD:
6136                    case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
6137                        errEofInDoctype();
6138                        /*
6139                         * Set the DOCTYPE token's force-quirks flag to on.
6140                         */
6141                        forceQuirks = true;
6142                        /*
6143                         * Emit that DOCTYPE token.
6144                         */
6145                        emitDoctypeToken(0);
6146                        /*
6147                         * Reconsume the EOF character in the data state.
6148                         */
6149                        break eofloop;
6150                    case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
6151                    case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
6152                        /* EOF Parse error. */
6153                        errEofInPublicId();
6154                        /*
6155                         * Set the DOCTYPE token's force-quirks flag to on.
6156                         */
6157                        forceQuirks = true;
6158                        /*
6159                         * Emit that DOCTYPE token.
6160                         */
6161                        publicIdentifier = longStrBufToString();
6162                        emitDoctypeToken(0);
6163                        /*
6164                         * Reconsume the EOF character in the data state.
6165                         */
6166                        break eofloop;
6167                    case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
6168                    case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
6169                    case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
6170                        errEofInDoctype();
6171                        /*
6172                         * Set the DOCTYPE token's force-quirks flag to on.
6173                         */
6174                        forceQuirks = true;
6175                        /*
6176                         * Emit that DOCTYPE token.
6177                         */
6178                        emitDoctypeToken(0);
6179                        /*
6180                         * Reconsume the EOF character in the data state.
6181                         */
6182                        break eofloop;
6183                    case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
6184                    case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
6185                        /* EOF Parse error. */
6186                        errEofInSystemId();
6187                        /*
6188                         * Set the DOCTYPE token's force-quirks flag to on.
6189                         */
6190                        forceQuirks = true;
6191                        /*
6192                         * Emit that DOCTYPE token.
6193                         */
6194                        systemIdentifier = longStrBufToString();
6195                        emitDoctypeToken(0);
6196                        /*
6197                         * Reconsume the EOF character in the data state.
6198                         */
6199                        break eofloop;
6200                    case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
6201                        errEofInDoctype();
6202                        /*
6203                         * Set the DOCTYPE token's force-quirks flag to on.
6204                         */
6205                        forceQuirks = true;
6206                        /*
6207                         * Emit that DOCTYPE token.
6208                         */
6209                        emitDoctypeToken(0);
6210                        /*
6211                         * Reconsume the EOF character in the data state.
6212                         */
6213                        break eofloop;
6214                    case BOGUS_DOCTYPE:
6215                        /*
6216                         * Emit that DOCTYPE token.
6217                         */
6218                        emitDoctypeToken(0);
6219                        /*
6220                         * Reconsume the EOF character in the data state.
6221                         */
6222                        break eofloop;
6223                    case CONSUME_CHARACTER_REFERENCE:
6224                        /*
6225                         * Unlike the definition is the spec, this state does not
6226                         * return a value and never requires the caller to
6227                         * backtrack. This state takes care of emitting characters
6228                         * or appending to the current attribute value. It also
6229                         * takes care of that in the case when consuming the entity
6230                         * fails.
6231                         */
6232                        /*
6233                         * This section defines how to consume an entity. This
6234                         * definition is used when parsing entities in text and in
6235                         * attributes.
6236                         * 
6237                         * The behavior depends on the identity of the next
6238                         * character (the one immediately after the U+0026 AMPERSAND
6239                         * character):
6240                         */
6241    
6242                        emitOrAppendStrBuf(returnState);
6243                        state = returnState;
6244                        continue;
6245                    case CHARACTER_REFERENCE_HILO_LOOKUP:
6246                        errNoNamedCharacterMatch();
6247                        emitOrAppendStrBuf(returnState);
6248                        state = returnState;
6249                        continue;
6250                    case CHARACTER_REFERENCE_TAIL:
6251                        outer: for (;;) {
6252                            char c = '\u0000';
6253                            entCol++;
6254                            /*
6255                             * Consume the maximum number of characters possible,
6256                             * with the consumed characters matching one of the
6257                             * identifiers in the first column of the named
6258                             * character references table (in a case-sensitive
6259                             * manner).
6260                             */
6261                            hiloop: for (;;) {
6262                                if (hi == -1) {
6263                                    break hiloop;
6264                                }
6265                                if (entCol == NamedCharacters.NAMES[hi].length()) {
6266                                    break hiloop;
6267                                }
6268                                if (entCol > NamedCharacters.NAMES[hi].length()) {
6269                                    break outer;
6270                                } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
6271                                    hi--;
6272                                } else {
6273                                    break hiloop;
6274                                }
6275                            }
6276    
6277                            loloop: for (;;) {
6278                                if (hi < lo) {
6279                                    break outer;
6280                                }
6281                                if (entCol == NamedCharacters.NAMES[lo].length()) {
6282                                    candidate = lo;
6283                                    strBufMark = strBufLen;
6284                                    lo++;
6285                                } else if (entCol > NamedCharacters.NAMES[lo].length()) {
6286                                    break outer;
6287                                } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
6288                                    lo++;
6289                                } else {
6290                                    break loloop;
6291                                }
6292                            }
6293                            if (hi < lo) {
6294                                break outer;
6295                            }
6296                            continue;
6297                        }
6298    
6299                        if (candidate == -1) {
6300                            /*
6301                             * If no match can be made, then this is a parse error.
6302                             */
6303                            errNoNamedCharacterMatch();
6304                            emitOrAppendStrBuf(returnState);
6305                            state = returnState;
6306                            continue eofloop;
6307                        } else {
6308                            @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
6309                            if (candidateName.length() == 0
6310                                    || candidateName.charAt(candidateName.length() - 1) != ';') {
6311                                /*
6312                                 * If the last character matched is not a U+003B
6313                                 * SEMICOLON (;), there is a parse error.
6314                                 */
6315                                if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
6316                                    /*
6317                                     * If the entity is being consumed as part of an
6318                                     * attribute, and the last character matched is
6319                                     * not a U+003B SEMICOLON (;),
6320                                     */
6321                                    char ch;
6322                                    if (strBufMark == strBufLen) {
6323                                        ch = '\u0000';
6324                                    } else {
6325                                        ch = strBuf[strBufMark];
6326                                    }
6327                                    if ((ch >= '0' && ch <= '9')
6328                                            || (ch >= 'A' && ch <= 'Z')
6329                                            || (ch >= 'a' && ch <= 'z')) {
6330                                        /*
6331                                         * and the next character is in the range
6332                                         * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
6333                                         * U+0041 LATIN CAPITAL LETTER A to U+005A
6334                                         * LATIN CAPITAL LETTER Z, or U+0061 LATIN
6335                                         * SMALL LETTER A to U+007A LATIN SMALL
6336                                         * LETTER Z, then, for historical reasons,
6337                                         * all the characters that were matched
6338                                         * after the U+0026 AMPERSAND (&) must be
6339                                         * unconsumed, and nothing is returned.
6340                                         */
6341                                        errNoNamedCharacterMatch();
6342                                        appendStrBufToLongStrBuf();
6343                                        state = returnState;
6344                                        continue eofloop;
6345                                    }
6346                                }
6347                                if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
6348                                    errUnescapedAmpersandInterpretedAsCharacterReference();
6349                                } else {
6350                                    errNotSemicolonTerminated();
6351                                }
6352                            }
6353    
6354                            /*
6355                             * Otherwise, return a character token for the character
6356                             * corresponding to the entity name (as given by the
6357                             * second column of the named character references
6358                             * table).
6359                             */
6360                            @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
6361                            if (
6362                            // [NOCPP[
6363                            val.length == 1
6364                            // ]NOCPP]
6365                            // CPPONLY: val[1] == 0
6366                            ) {
6367                                emitOrAppendOne(val, returnState);
6368                            } else {
6369                                emitOrAppendTwo(val, returnState);
6370                            }
6371                            // this is so complicated!
6372                            if (strBufMark < strBufLen) {
6373                                if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
6374                                    for (int i = strBufMark; i < strBufLen; i++) {
6375                                        appendLongStrBuf(strBuf[i]);
6376                                    }
6377                                } else {
6378                                    tokenHandler.characters(strBuf, strBufMark,
6379                                            strBufLen - strBufMark);
6380                                }
6381                            }
6382                            state = returnState;
6383                            continue eofloop;
6384                            /*
6385                             * If the markup contains I'm &notit; I tell you, the
6386                             * entity is parsed as "not", as in, I'm ¬it; I tell
6387                             * you. But if the markup was I'm &notin; I tell you,
6388                             * the entity would be parsed as "notin;", resulting in
6389                             * I'm ∉ I tell you.
6390                             */
6391                        }
6392                    case CONSUME_NCR:
6393                    case DECIMAL_NRC_LOOP:
6394                    case HEX_NCR_LOOP:
6395                        /*
6396                         * If no characters match the range, then don't consume any
6397                         * characters (and unconsume the U+0023 NUMBER SIGN
6398                         * character and, if appropriate, the X character). This is
6399                         * a parse error; nothing is returned.
6400                         * 
6401                         * Otherwise, if the next character is a U+003B SEMICOLON,
6402                         * consume that too. If it isn't, there is a parse error.
6403                         */
6404                        if (!seenDigits) {
6405                            errNoDigitsInNCR();
6406                            emitOrAppendStrBuf(returnState);
6407                            state = returnState;
6408                            continue;
6409                        } else {
6410                            errCharRefLacksSemicolon();
6411                        }
6412                        // WARNING previous state sets reconsume
6413                        handleNcrValue(returnState);
6414                        state = returnState;
6415                        continue;
6416                    case CDATA_RSQB:
6417                        tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
6418                        break eofloop;
6419                    case CDATA_RSQB_RSQB:
6420                        tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
6421                        break eofloop;
6422                    case DATA:
6423                    default:
6424                        break eofloop;
6425                }
6426            }
6427            // case DATA:
6428            /*
6429             * EOF Emit an end-of-file token.
6430             */
6431            tokenHandler.eof();
6432            return;
6433        }
6434    
6435        private void emitDoctypeToken(int pos) throws SAXException {
6436            cstart = pos + 1;
6437            tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier,
6438                    forceQuirks);
6439            // It is OK and sufficient to release these here, since
6440            // there's no way out of the doctype states than through paths
6441            // that call this method.
6442            doctypeName = null;
6443            Portability.releaseString(publicIdentifier);
6444            publicIdentifier = null;
6445            Portability.releaseString(systemIdentifier);
6446            systemIdentifier = null;
6447        }
6448    
6449        @Inline protected char checkChar(@NoLength char[] buf, int pos)
6450                throws SAXException {
6451            return buf[pos];
6452        }
6453    
6454        // [NOCPP[
6455    
6456        /**
6457         * Returns the alreadyComplainedAboutNonAscii.
6458         * 
6459         * @return the alreadyComplainedAboutNonAscii
6460         */
6461        public boolean isAlreadyComplainedAboutNonAscii() {
6462            return true;
6463        }
6464    
6465        // ]NOCPP]
6466    
6467        public boolean internalEncodingDeclaration(String internalCharset)
6468                throws SAXException {
6469            if (encodingDeclarationHandler != null) {
6470                return encodingDeclarationHandler.internalEncodingDeclaration(internalCharset);
6471            }
6472            return false;
6473        }
6474    
6475        /**
6476         * @param val
6477         * @throws SAXException
6478         */
6479        private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
6480                throws SAXException {
6481            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
6482                appendLongStrBuf(val[0]);
6483                appendLongStrBuf(val[1]);
6484            } else {
6485                tokenHandler.characters(val, 0, 2);
6486            }
6487        }
6488    
6489        private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
6490                throws SAXException {
6491            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
6492                appendLongStrBuf(val[0]);
6493            } else {
6494                tokenHandler.characters(val, 0, 1);
6495            }
6496        }
6497    
6498        public void end() throws SAXException {
6499            strBuf = null;
6500            longStrBuf = null;
6501            doctypeName = null;
6502            if (systemIdentifier != null) {
6503                Portability.releaseString(systemIdentifier);
6504                systemIdentifier = null;
6505            }
6506            if (publicIdentifier != null) {
6507                Portability.releaseString(publicIdentifier);
6508                publicIdentifier = null;
6509            }
6510            if (tagName != null) {
6511                tagName.release();
6512                tagName = null;
6513            }
6514            if (attributeName != null) {
6515                attributeName.release();
6516                attributeName = null;
6517            }
6518            tokenHandler.endTokenization();
6519            if (attributes != null) {
6520                attributes.clear(mappingLangToXmlLang);
6521                Portability.delete(attributes);
6522                attributes = null;
6523            }
6524        }
6525    
6526        public void requestSuspension() {
6527            shouldSuspend = true;
6528        }
6529    
6530        // [NOCPP[
6531        
6532        public void becomeConfident() {
6533            confident = true;
6534        }
6535    
6536        /**
6537         * Returns the nextCharOnNewLine.
6538         * 
6539         * @return the nextCharOnNewLine
6540         */
6541        public boolean isNextCharOnNewLine() {
6542            return false;
6543        }
6544    
6545        public boolean isPrevCR() {
6546            return lastCR;
6547        }
6548    
6549        /**
6550         * Returns the line.
6551         * 
6552         * @return the line
6553         */
6554        public int getLine() {
6555            return -1;
6556        }
6557    
6558        /**
6559         * Returns the col.
6560         * 
6561         * @return the col
6562         */
6563        public int getCol() {
6564            return -1;
6565        }
6566    
6567        // ]NOCPP]
6568        
6569        public boolean isInDataState() {
6570            return (stateSave == DATA);
6571        }
6572    
6573        public void resetToDataState() {
6574            strBufLen = 0;
6575            longStrBufLen = 0;
6576            stateSave = Tokenizer.DATA;
6577            // line = 1; XXX line numbers
6578            lastCR = false;
6579            index = 0;
6580            forceQuirks = false;
6581            additional = '\u0000';
6582            entCol = -1;
6583            firstCharKey = -1;
6584            lo = 0;
6585            hi = 0; // will always be overwritten before use anyway
6586            candidate = -1;
6587            strBufMark = 0;
6588            prevValue = -1;
6589            value = 0;
6590            seenDigits = false;
6591            endTag = false;
6592            shouldSuspend = false;
6593            initDoctypeFields();
6594            if (tagName != null) {
6595                tagName.release();
6596                tagName = null;
6597            }
6598            if (attributeName != null) {
6599                attributeName.release();
6600                attributeName = null;
6601            }
6602            // [NOCPP[
6603            if (newAttributesEachTime) {
6604                // ]NOCPP]
6605                if (attributes != null) {
6606                    Portability.delete(attributes);
6607                    attributes = null;
6608                }
6609                // [NOCPP[
6610            }
6611            // ]NOCPP]
6612        }
6613    
6614        public void loadState(Tokenizer other) throws SAXException {
6615            strBufLen = other.strBufLen;
6616            if (strBufLen > strBuf.length) {
6617                strBuf = new char[strBufLen];
6618            }
6619            System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen);
6620    
6621            longStrBufLen = other.longStrBufLen;
6622            if (longStrBufLen > longStrBuf.length) {
6623                longStrBuf = new char[longStrBufLen];
6624            }
6625            System.arraycopy(other.longStrBuf, 0, longStrBuf, 0, longStrBufLen);
6626    
6627            stateSave = other.stateSave;
6628            returnStateSave = other.returnStateSave;
6629            endTagExpectation = other.endTagExpectation;
6630            endTagExpectationAsArray = other.endTagExpectationAsArray;
6631            // line = 1; XXX line numbers
6632            lastCR = other.lastCR;
6633            index = other.index;
6634            forceQuirks = other.forceQuirks;
6635            additional = other.additional;
6636            entCol = other.entCol;
6637            firstCharKey = other.firstCharKey;
6638            lo = other.lo;
6639            hi = other.hi;
6640            candidate = other.candidate;
6641            strBufMark = other.strBufMark;
6642            prevValue = other.prevValue;
6643            value = other.value;
6644            seenDigits = other.seenDigits;
6645            endTag = other.endTag;
6646            shouldSuspend = false;
6647    
6648            if (other.doctypeName == null) {
6649                doctypeName = null;
6650            } else {
6651                doctypeName = Portability.newLocalFromLocal(other.doctypeName,
6652                        interner);
6653            }
6654    
6655            Portability.releaseString(systemIdentifier);
6656            if (other.systemIdentifier == null) {
6657                systemIdentifier = null;
6658            } else {
6659                systemIdentifier = Portability.newStringFromString(other.systemIdentifier);
6660            }
6661    
6662            Portability.releaseString(publicIdentifier);
6663            if (other.publicIdentifier == null) {
6664                publicIdentifier = null;
6665            } else {
6666                publicIdentifier = Portability.newStringFromString(other.publicIdentifier);
6667            }
6668    
6669            if (tagName != null) {
6670                tagName.release();
6671            }
6672            if (other.tagName == null) {
6673                tagName = null;
6674            } else {
6675                tagName = other.tagName.cloneElementName(interner);
6676            }
6677    
6678            if (attributeName != null) {
6679                attributeName.release();
6680            }
6681            if (other.attributeName == null) {
6682                attributeName = null;
6683            } else {
6684                attributeName = other.attributeName.cloneAttributeName(interner);
6685            }
6686    
6687            if (attributes != null) {
6688                Portability.delete(attributes);
6689            }
6690            if (other.attributes == null) {
6691                attributes = null;
6692            } else {
6693                attributes = other.attributes.cloneAttributes(interner);
6694            }
6695        }
6696    
6697        public void initializeWithoutStarting() throws SAXException {
6698            confident = false;
6699            strBuf = new char[64];
6700            longStrBuf = new char[1024];
6701            line = 1;
6702            // [NOCPP[
6703            html4 = false;
6704            metaBoundaryPassed = false;
6705            wantsComments = tokenHandler.wantsComments();
6706            if (!newAttributesEachTime) {
6707                attributes = new HtmlAttributes(mappingLangToXmlLang);
6708            }
6709            // ]NOCPP]
6710            resetToDataState();
6711        }
6712    
6713        protected void errGarbageAfterLtSlash() throws SAXException {
6714        }
6715    
6716        protected void errLtSlashGt() throws SAXException {
6717        }
6718    
6719        protected void errWarnLtSlashInRcdata() throws SAXException {
6720        }
6721    
6722        protected void errHtml4LtSlashInRcdata(char folded) throws SAXException {
6723        }
6724    
6725        protected void errCharRefLacksSemicolon() throws SAXException {
6726        }
6727    
6728        protected void errNoDigitsInNCR() throws SAXException {
6729        }
6730    
6731        protected void errGtInSystemId() throws SAXException {
6732        }
6733    
6734        protected void errGtInPublicId() throws SAXException {
6735        }
6736    
6737        protected void errNamelessDoctype() throws SAXException {
6738        }
6739    
6740        protected void errConsecutiveHyphens() throws SAXException {
6741        }
6742    
6743        protected void errPrematureEndOfComment() throws SAXException {
6744        }
6745    
6746        protected void errBogusComment() throws SAXException {
6747        }
6748    
6749        protected void errUnquotedAttributeValOrNull(char c) throws SAXException {
6750        }
6751    
6752        protected void errSlashNotFollowedByGt() throws SAXException {
6753        }
6754    
6755        protected void errHtml4XmlVoidSyntax() throws SAXException {
6756        }
6757    
6758        protected void errNoSpaceBetweenAttributes() throws SAXException {
6759        }
6760    
6761        protected void errHtml4NonNameInUnquotedAttribute(char c)
6762                throws SAXException {
6763        }
6764    
6765        protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
6766                throws SAXException {
6767        }
6768    
6769        protected void errAttributeValueMissing() throws SAXException {
6770        }
6771    
6772        protected void errBadCharBeforeAttributeNameOrNull(char c)
6773                throws SAXException {
6774        }
6775    
6776        protected void errEqualsSignBeforeAttributeName() throws SAXException {
6777        }
6778    
6779        protected void errBadCharAfterLt(char c) throws SAXException {
6780        }
6781    
6782        protected void errLtGt() throws SAXException {
6783        }
6784    
6785        protected void errProcessingInstruction() throws SAXException {
6786        }
6787    
6788        protected void errUnescapedAmpersandInterpretedAsCharacterReference()
6789                throws SAXException {
6790        }
6791    
6792        protected void errNotSemicolonTerminated() throws SAXException {
6793        }
6794    
6795        protected void errNoNamedCharacterMatch() throws SAXException {
6796        }
6797    
6798        protected void errQuoteBeforeAttributeName(char c) throws SAXException {
6799        }
6800    
6801        protected void errQuoteOrLtInAttributeNameOrNull(char c)
6802                throws SAXException {
6803        }
6804    
6805        protected void errExpectedPublicId() throws SAXException {
6806        }
6807    
6808        protected void errBogusDoctype() throws SAXException {
6809        }
6810    
6811        protected void maybeWarnPrivateUseAstral() throws SAXException {
6812        }
6813    
6814        protected void maybeWarnPrivateUse(char ch) throws SAXException {
6815        }
6816    
6817        protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
6818                throws SAXException {
6819        }
6820    
6821        protected void maybeErrSlashInEndTag(boolean selfClosing)
6822                throws SAXException {
6823        }
6824    
6825        protected char errNcrNonCharacter(char ch) throws SAXException {
6826            return ch;
6827        }
6828    
6829        protected void errAstralNonCharacter(int ch) throws SAXException {
6830        }
6831    
6832        protected void errNcrSurrogate() throws SAXException {
6833        }
6834    
6835        protected char errNcrControlChar(char ch) throws SAXException {
6836            return ch;
6837        }
6838    
6839        protected void errNcrCr() throws SAXException {
6840        }
6841    
6842        protected void errNcrInC1Range() throws SAXException {
6843        }
6844    
6845        protected void errEofInPublicId() throws SAXException {
6846        }
6847    
6848        protected void errEofInComment() throws SAXException {
6849        }
6850    
6851        protected void errEofInDoctype() throws SAXException {
6852        }
6853    
6854        protected void errEofInAttributeValue() throws SAXException {
6855        }
6856    
6857        protected void errEofInAttributeName() throws SAXException {
6858        }
6859    
6860        protected void errEofWithoutGt() throws SAXException {
6861        }
6862    
6863        protected void errEofInTagName() throws SAXException {
6864        }
6865    
6866        protected void errEofInEndTag() throws SAXException {
6867        }
6868    
6869        protected void errEofAfterLt() throws SAXException {
6870        }
6871    
6872        protected void errNcrOutOfRange() throws SAXException {
6873        }
6874    
6875        protected void errNcrUnassigned() throws SAXException {
6876        }
6877    
6878        protected void errDuplicateAttribute() throws SAXException {
6879        }
6880    
6881        protected void errEofInSystemId() throws SAXException {
6882        }
6883    
6884        protected void errExpectedSystemId() throws SAXException {
6885        }
6886    
6887        protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
6888        }
6889    
6890        protected void errHyphenHyphenBang() throws SAXException {
6891        }
6892    
6893        protected void errNcrControlChar() throws SAXException {
6894        }
6895    
6896        protected void errNcrZero() throws SAXException {
6897        }
6898    
6899        protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
6900                throws SAXException {
6901        }
6902    
6903        protected void errNoSpaceBetweenPublicAndSystemIds() throws SAXException {
6904        }
6905    
6906        protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
6907                throws SAXException {
6908        }
6909    
6910        protected void noteAttributeWithoutValue() throws SAXException {
6911        }
6912    
6913        protected void noteUnquotedAttributeValue() throws SAXException {
6914        }
6915    
6916        /**
6917         * Sets the encodingDeclarationHandler.
6918         * 
6919         * @param encodingDeclarationHandler
6920         *            the encodingDeclarationHandler to set
6921         */
6922        public void setEncodingDeclarationHandler(
6923                EncodingDeclarationHandler encodingDeclarationHandler) {
6924            this.encodingDeclarationHandler = encodingDeclarationHandler;
6925        }
6926        
6927        void destructor() {
6928            // The translator will write refcount tracing stuff here
6929        }
6930        
6931        // [NOCPP[
6932        
6933        /**
6934         * Sets an offset to be added to the position reported to 
6935         * <code>TransitionHandler</code>.
6936         * 
6937         * @param offset the offset
6938         */
6939        public void setTransitionBaseOffset(int offset) {
6940            
6941        }
6942        
6943        // ]NOCPP]
6944    
6945    }