001    /*
002     * Copyright (c) 2009-2010 Mozilla Foundation
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a 
005     * copy of this software and associated documentation files (the "Software"), 
006     * to deal in the Software without restriction, including without limitation 
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
008     * and/or sell copies of the Software, and to permit persons to whom the 
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in 
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
020     * DEALINGS IN THE SOFTWARE.
021     */
022    
023    package nu.validator.htmlparser.impl;
024    
025    import nu.validator.htmlparser.annotation.Inline;
026    import nu.validator.htmlparser.annotation.NoLength;
027    import nu.validator.htmlparser.common.TokenHandler;
028    import nu.validator.htmlparser.common.TransitionHandler;
029    import nu.validator.htmlparser.common.XmlViolationPolicy;
030    
031    import java.util.HashMap;
032    
033    import org.xml.sax.SAXException;
034    import org.xml.sax.SAXParseException;
035    
036    public class ErrorReportingTokenizer extends Tokenizer {
037    
038        /**
039         * Magic value for UTF-16 operations.
040         */
041        private static final int SURROGATE_OFFSET = (0x10000 - (0xD800 << 10) - 0xDC00);
042    
043        /**
044         * The policy for non-space non-XML characters.
045         */
046        private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.ALTER_INFOSET;
047    
048        /**
049         * Used together with <code>nonAsciiProhibited</code>.
050         */
051        private boolean alreadyComplainedAboutNonAscii;
052    
053        /**
054         * Keeps track of PUA warnings.
055         */
056        private boolean alreadyWarnedAboutPrivateUseCharacters;
057    
058        /**
059         * The current line number in the current resource being parsed. (First line
060         * is 1.) Passed on as locator data.
061         */
062        private int line;
063    
064        private int linePrev;
065    
066        /**
067         * The current column number in the current resource being tokenized. (First
068         * column is 1, counted by UTF-16 code units.) Passed on as locator data.
069         */
070        private int col;
071    
072        private int colPrev;
073    
074        private boolean nextCharOnNewLine;
075    
076        private char prev;
077    
078        private HashMap<String, String> errorProfileMap = null;
079    
080        private TransitionHandler transitionHandler = null;
081    
082        private int transitionBaseOffset = 0;
083    
084        /**
085         * @param tokenHandler
086         * @param newAttributesEachTime
087         */
088        public ErrorReportingTokenizer(TokenHandler tokenHandler,
089                boolean newAttributesEachTime) {
090            super(tokenHandler, newAttributesEachTime);
091        }
092    
093        /**
094         * @param tokenHandler
095         */
096        public ErrorReportingTokenizer(TokenHandler tokenHandler) {
097            super(tokenHandler);
098        }
099    
100        /**
101         * @see org.xml.sax.Locator#getLineNumber()
102         */
103        public int getLineNumber() {
104            if (line > 0) {
105                return line;
106            } else {
107                return -1;
108            }
109        }
110    
111        /**
112         * @see org.xml.sax.Locator#getColumnNumber()
113         */
114        public int getColumnNumber() {
115            if (col > 0) {
116                return col;
117            } else {
118                return -1;
119            }
120        }
121    
122        /**
123         * Sets the contentNonXmlCharPolicy.
124         * 
125         * @param contentNonXmlCharPolicy
126         *            the contentNonXmlCharPolicy to set
127         */
128        public void setContentNonXmlCharPolicy(
129                XmlViolationPolicy contentNonXmlCharPolicy) {
130            this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
131        }
132    
133        /**
134         * Sets the errorProfile.
135         * 
136         * @param errorProfile
137         */
138        public void setErrorProfile(HashMap<String, String> errorProfileMap) {
139            this.errorProfileMap = errorProfileMap;
140        }
141    
142        /**
143         * Reports on an event based on profile selected.
144         * 
145         * @param profile
146         *            the profile this message belongs to
147         * @param message
148         *            the message itself
149         * @throws SAXException
150         */
151        public void note(String profile, String message) throws SAXException {
152            if (errorProfileMap == null)
153                return;
154            String level = errorProfileMap.get(profile);
155            if ("warn".equals(level)) {
156                warn(message);
157            } else if ("err".equals(level)) {
158                err(message);
159                // } else if ("info".equals(level)) {
160                // info(message);
161            }
162        }
163    
164        protected void startErrorReporting() throws SAXException {
165            alreadyComplainedAboutNonAscii = false;
166            line = linePrev = 0;
167            col = colPrev = 1;
168            nextCharOnNewLine = true;
169            prev = '\u0000';
170            alreadyWarnedAboutPrivateUseCharacters = false;
171            transitionBaseOffset = 0;
172        }
173    
174        @Inline protected void silentCarriageReturn() {
175            nextCharOnNewLine = true;
176            lastCR = true;
177        }
178    
179        @Inline protected void silentLineFeed() {
180            nextCharOnNewLine = true;
181        }
182    
183        /**
184         * Returns the line.
185         * 
186         * @return the line
187         */
188        public int getLine() {
189            return line;
190        }
191    
192        /**
193         * Returns the col.
194         * 
195         * @return the col
196         */
197        public int getCol() {
198            return col;
199        }
200    
201        /**
202         * Returns the nextCharOnNewLine.
203         * 
204         * @return the nextCharOnNewLine
205         */
206        public boolean isNextCharOnNewLine() {
207            return nextCharOnNewLine;
208        }
209    
210        private void complainAboutNonAscii() throws SAXException {
211            String encoding = null;
212            if (encodingDeclarationHandler != null) {
213                encoding = encodingDeclarationHandler.getCharacterEncoding();
214            }
215            if (encoding == null) {
216                err("The character encoding of the document was not explicit but the document contains non-ASCII.");
217            } else {
218                err("No explicit character encoding declaration has been seen yet (assumed \u201C"
219                        + encoding + "\u201D) but the document contains non-ASCII.");
220            }
221        }
222    
223        /**
224         * Returns the alreadyComplainedAboutNonAscii.
225         * 
226         * @return the alreadyComplainedAboutNonAscii
227         */
228        public boolean isAlreadyComplainedAboutNonAscii() {
229            return alreadyComplainedAboutNonAscii;
230        }
231    
232        /**
233         * Flushes coalesced character tokens.
234         * 
235         * @param buf
236         *            TODO
237         * @param pos
238         *            TODO
239         * 
240         * @throws SAXException
241         */
242        @Override protected void flushChars(char[] buf, int pos)
243                throws SAXException {
244            if (pos > cstart) {
245                int currLine = line;
246                int currCol = col;
247                line = linePrev;
248                col = colPrev;
249                tokenHandler.characters(buf, cstart, pos - cstart);
250                line = currLine;
251                col = currCol;
252            }
253            cstart = 0x7fffffff;
254        }
255    
256        @Override protected char checkChar(@NoLength char[] buf, int pos)
257                throws SAXException {
258            linePrev = line;
259            colPrev = col;
260            if (nextCharOnNewLine) {
261                line++;
262                col = 1;
263                nextCharOnNewLine = false;
264            } else {
265                col++;
266            }
267    
268            char c = buf[pos];
269            if (!confident && !alreadyComplainedAboutNonAscii && c > '\u007F') {
270                complainAboutNonAscii();
271                alreadyComplainedAboutNonAscii = true;
272            }
273            switch (c) {
274                case '\u0000':
275                    err("Saw U+0000 in stream.");
276                case '\t':
277                case '\r':
278                case '\n':
279                    break;
280                case '\u000C':
281                    if (contentNonXmlCharPolicy == XmlViolationPolicy.FATAL) {
282                        fatal("This document is not mappable to XML 1.0 without data loss due to "
283                                + toUPlusString(c)
284                                + " which is not a legal XML 1.0 character.");
285                    } else {
286                        if (contentNonXmlCharPolicy == XmlViolationPolicy.ALTER_INFOSET) {
287                            c = buf[pos] = ' ';
288                        }
289                        warn("This document is not mappable to XML 1.0 without data loss due to "
290                                + toUPlusString(c)
291                                + " which is not a legal XML 1.0 character.");
292                    }
293                    break;
294                default:
295                    if ((c & 0xFC00) == 0xDC00) {
296                        // Got a low surrogate. See if prev was high
297                        // surrogate
298                        if ((prev & 0xFC00) == 0xD800) {
299                            int intVal = (prev << 10) + c + SURROGATE_OFFSET;
300                            if ((intVal & 0xFFFE) == 0xFFFE) {
301                                err("Astral non-character.");
302                            }
303                            if (isAstralPrivateUse(intVal)) {
304                                warnAboutPrivateUseChar();
305                            }
306                        }
307                    } else if ((c < ' ' || ((c & 0xFFFE) == 0xFFFE))) {
308                        switch (contentNonXmlCharPolicy) {
309                            case FATAL:
310                                fatal("Forbidden code point " + toUPlusString(c)
311                                        + ".");
312                                break;
313                            case ALTER_INFOSET:
314                                c = buf[pos] = '\uFFFD';
315                                // fall through
316                            case ALLOW:
317                                err("Forbidden code point " + toUPlusString(c)
318                                        + ".");
319                        }
320                    } else if ((c >= '\u007F') && (c <= '\u009F')
321                            || (c >= '\uFDD0') && (c <= '\uFDEF')) {
322                        err("Forbidden code point " + toUPlusString(c) + ".");
323                    } else if (isPrivateUse(c)) {
324                        warnAboutPrivateUseChar();
325                    }
326            }
327            prev = c;
328            return c;
329        }
330    
331        /**
332         * @throws SAXException
333         * @see nu.validator.htmlparser.impl.Tokenizer#transition(int, int, boolean,
334         *      int)
335         */
336        @Override protected int transition(int from, int to, boolean reconsume,
337                int pos) throws SAXException {
338            if (transitionHandler != null) {
339                transitionHandler.transition(from, to, reconsume,
340                        transitionBaseOffset + pos);
341            }
342            return to;
343        }
344    
345        private String toUPlusString(int c) {
346            String hexString = Integer.toHexString(c);
347            switch (hexString.length()) {
348                case 1:
349                    return "U+000" + hexString;
350                case 2:
351                    return "U+00" + hexString;
352                case 3:
353                    return "U+0" + hexString;
354                default:
355                    return "U+" + hexString;
356            }
357        }
358    
359        /**
360         * Emits a warning about private use characters if the warning has not been
361         * emitted yet.
362         * 
363         * @throws SAXException
364         */
365        private void warnAboutPrivateUseChar() throws SAXException {
366            if (!alreadyWarnedAboutPrivateUseCharacters) {
367                warn("Document uses the Unicode Private Use Area(s), which should not be used in publicly exchanged documents. (Charmod C073)");
368                alreadyWarnedAboutPrivateUseCharacters = true;
369            }
370        }
371    
372        /**
373         * Tells if the argument is a BMP PUA character.
374         * 
375         * @param c
376         *            the UTF-16 code unit to check
377         * @return <code>true</code> if PUA character
378         */
379        private boolean isPrivateUse(char c) {
380            return c >= '\uE000' && c <= '\uF8FF';
381        }
382    
383        /**
384         * Tells if the argument is an astral PUA character.
385         * 
386         * @param c
387         *            the code point to check
388         * @return <code>true</code> if astral private use
389         */
390        private boolean isAstralPrivateUse(int c) {
391            return (c >= 0xF0000 && c <= 0xFFFFD)
392                    || (c >= 0x100000 && c <= 0x10FFFD);
393        }
394    
395        @Override protected void errGarbageAfterLtSlash() throws SAXException {
396            err("Garbage after \u201C</\u201D.");
397        }
398    
399        @Override protected void errLtSlashGt() throws SAXException {
400            err("Saw \u201C</>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C&lt;\u201D) or mistyped end tag.");
401        }
402    
403        @Override protected void errWarnLtSlashInRcdata() throws SAXException {
404            if (html4) {
405                err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA")
406                        + " element \u201C"
407                        + endTagExpectation
408                        + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)");
409            } else {
410                warn((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA")
411                        + " element \u201C"
412                        + endTagExpectation
413                        + "\u201D contained the string \u201C</\u201D, but this did not close the element.");
414            }
415        }
416    
417        @Override protected void errHtml4LtSlashInRcdata(char folded)
418                throws SAXException {
419            if (html4 && (index > 0 || (folded >= 'a' && folded <= 'z'))
420                    && ElementName.IFRAME != endTagExpectation) {
421                err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA")
422                        + " element \u201C"
423                        + endTagExpectation.name
424                        + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)");
425            }
426        }
427    
428        @Override protected void errCharRefLacksSemicolon() throws SAXException {
429            err("Character reference was not terminated by a semicolon.");
430        }
431    
432        @Override protected void errNoDigitsInNCR() throws SAXException {
433            err("No digits after \u201C" + strBufToString() + "\u201D.");
434        }
435    
436        @Override protected void errGtInSystemId() throws SAXException {
437            err("\u201C>\u201D in system identifier.");
438        }
439    
440        @Override protected void errGtInPublicId() throws SAXException {
441            err("\u201C>\u201D in public identifier.");
442        }
443    
444        @Override protected void errNamelessDoctype() throws SAXException {
445            err("Nameless doctype.");
446        }
447    
448        @Override protected void errConsecutiveHyphens() throws SAXException {
449            err("Consecutive hyphens did not terminate a comment. \u201C--\u201D is not permitted inside a comment, but e.g. \u201C- -\u201D is.");
450        }
451    
452        @Override protected void errPrematureEndOfComment() throws SAXException {
453            err("Premature end of comment. Use \u201C-->\u201D to end a comment properly.");
454        }
455    
456        @Override protected void errBogusComment() throws SAXException {
457            err("Bogus comment.");
458        }
459    
460        @Override protected void errUnquotedAttributeValOrNull(char c)
461                throws SAXException {
462            switch (c) {
463                case '<':
464                    err("\u201C<\u201D in an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before.");
465                    return;
466                case '`':
467                    err("\u201C`\u201D in an unquoted attribute value. Probable cause: Using the wrong character as a quote.");
468                    return;
469                case '\uFFFD':
470                    return;
471                default:
472                    err("\u201C"
473                            + c
474                            + "\u201D in an unquoted attribute value. Probable causes: Attributes running together or a URL query string in an unquoted attribute value.");
475                    return;
476            }
477        }
478    
479        @Override protected void errSlashNotFollowedByGt() throws SAXException {
480            err("A slash was not immediate followed by \u201C>\u201D.");
481        }
482    
483        @Override protected void errHtml4XmlVoidSyntax() throws SAXException {
484            if (html4) {
485                err("The \u201C/>\u201D syntax on void elements is not allowed.  (This is an HTML4-only error.)");
486            }
487        }
488    
489        @Override protected void errNoSpaceBetweenAttributes() throws SAXException {
490            err("No space between attributes.");
491        }
492    
493        @Override protected void errHtml4NonNameInUnquotedAttribute(char c)
494                throws SAXException {
495            if (html4
496                    && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
497                            || (c >= '0' && c <= '9') || c == '.' || c == '-'
498                            || c == '_' || c == ':')) {
499                err("Non-name character in an unquoted attribute value. (This is an HTML4-only error.)");
500            }
501        }
502    
503        @Override protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(
504                char c) throws SAXException {
505            switch (c) {
506                case '=':
507                    err("\u201C=\u201D at the start of an unquoted attribute value. Probable cause: Stray duplicate equals sign.");
508                    return;
509                case '<':
510                    err("\u201C<\u201D at the start of an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before.");
511                    return;
512                case '`':
513                    err("\u201C`\u201D at the start of an unquoted attribute value. Probable cause: Using the wrong character as a quote.");
514                    return;
515            }
516        }
517    
518        @Override protected void errAttributeValueMissing() throws SAXException {
519            err("Attribute value missing.");
520        }
521    
522        @Override protected void errBadCharBeforeAttributeNameOrNull(char c)
523                throws SAXException {
524            if (c == '<') {
525                err("Saw \u201C<\u201D when expecting an attribute name. Probable cause: Missing \u201C>\u201D immediately before.");
526            } else if (c == '=') {
527                errEqualsSignBeforeAttributeName();
528            } else if (c != '\uFFFD') {
529                errQuoteBeforeAttributeName(c);
530            }
531        }
532    
533        @Override protected void errEqualsSignBeforeAttributeName()
534                throws SAXException {
535            err("Saw \u201C=\u201D when expecting an attribute name. Probable cause: Attribute name missing.");
536        }
537    
538        @Override protected void errBadCharAfterLt(char c) throws SAXException {
539            err("Bad character \u201C"
540                    + c
541                    + "\u201D after \u201C<\u201D. Probable cause: Unescaped \u201C<\u201D. Try escaping it as \u201C&lt;\u201D.");
542        }
543    
544        @Override protected void errLtGt() throws SAXException {
545            err("Saw \u201C<>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C&lt;\u201D) or mistyped start tag.");
546        }
547    
548        @Override protected void errProcessingInstruction() throws SAXException {
549            err("Saw \u201C<?\u201D. Probable cause: Attempt to use an XML processing instruction in HTML. (XML processing instructions are not supported in HTML.)");
550        }
551    
552        @Override protected void errUnescapedAmpersandInterpretedAsCharacterReference()
553                throws SAXException {
554            if (errorHandler == null) {
555                return;
556            }
557            SAXParseException spe = new SAXParseException(
558                    "The string following \u201C&\u201D was interpreted as a character reference. (\u201C&\u201D probably should have been escaped as \u201C&amp;\u201D.)",
559                    ampersandLocation);
560            errorHandler.error(spe);
561        }
562    
563        @Override protected void errNotSemicolonTerminated() throws SAXException {
564            err("Named character reference was not terminated by a semicolon. (Or \u201C&\u201D should have been escaped as \u201C&amp;\u201D.)");
565        }
566    
567        @Override protected void errNoNamedCharacterMatch() throws SAXException {
568            if (errorHandler == null) {
569                return;
570            }
571            SAXParseException spe = new SAXParseException(
572                    "\u201C&\u201D did not start a character reference. (\u201C&\u201D probably should have been escaped as \u201C&amp;\u201D.)",
573                    ampersandLocation);
574            errorHandler.error(spe);
575        }
576    
577        @Override protected void errQuoteBeforeAttributeName(char c)
578                throws SAXException {
579            err("Saw \u201C"
580                    + c
581                    + "\u201D when expecting an attribute name. Probable cause: \u201C=\u201D missing immediately before.");
582        }
583    
584        @Override protected void errQuoteOrLtInAttributeNameOrNull(char c)
585                throws SAXException {
586            if (c == '<') {
587                err("\u201C<\u201D in attribute name. Probable cause: \u201C>\u201D missing immediately before.");
588            } else if (c != '\uFFFD') {
589                err("Quote \u201C"
590                        + c
591                        + "\u201D in attribute name. Probable cause: Matching quote missing somewhere earlier.");
592            }
593        }
594    
595        @Override protected void errExpectedPublicId() throws SAXException {
596            err("Expected a public identifier but the doctype ended.");
597        }
598    
599        @Override protected void errBogusDoctype() throws SAXException {
600            err("Bogus doctype.");
601        }
602    
603        @Override protected void maybeWarnPrivateUseAstral() throws SAXException {
604            if (errorHandler != null && isAstralPrivateUse(value)) {
605                warnAboutPrivateUseChar();
606            }
607        }
608    
609        @Override protected void maybeWarnPrivateUse(char ch) throws SAXException {
610            if (errorHandler != null && isPrivateUse(ch)) {
611                warnAboutPrivateUseChar();
612            }
613        }
614    
615        @Override protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
616                throws SAXException {
617            if (attrs.getLength() != 0) {
618                /*
619                 * When an end tag token is emitted with attributes, that is a parse
620                 * error.
621                 */
622                err("End tag had attributes.");
623            }
624        }
625    
626        @Override protected void maybeErrSlashInEndTag(boolean selfClosing)
627                throws SAXException {
628            if (selfClosing && endTag) {
629                err("Stray \u201C/\u201D at the end of an end tag.");
630            }
631        }
632    
633        @Override protected char errNcrNonCharacter(char ch) throws SAXException {
634            switch (contentNonXmlCharPolicy) {
635                case FATAL:
636                    fatal("Character reference expands to a non-character ("
637                            + toUPlusString((char) value) + ").");
638                    break;
639                case ALTER_INFOSET:
640                    ch = '\uFFFD';
641                    // fall through
642                case ALLOW:
643                    err("Character reference expands to a non-character ("
644                            + toUPlusString((char) value) + ").");
645            }
646            return ch;
647        }
648    
649        /**
650         * @see nu.validator.htmlparser.impl.Tokenizer#errAstralNonCharacter(int)
651         */
652        @Override protected void errAstralNonCharacter(int ch) throws SAXException {
653            err("Character reference expands to an astral non-character ("
654                    + toUPlusString(value) + ").");
655        }
656    
657        @Override protected void errNcrSurrogate() throws SAXException {
658            err("Character reference expands to a surrogate.");
659        }
660    
661        @Override protected char errNcrControlChar(char ch) throws SAXException {
662            switch (contentNonXmlCharPolicy) {
663                case FATAL:
664                    fatal("Character reference expands to a control character ("
665                            + toUPlusString((char) value) + ").");
666                    break;
667                case ALTER_INFOSET:
668                    ch = '\uFFFD';
669                    // fall through
670                case ALLOW:
671                    err("Character reference expands to a control character ("
672                            + toUPlusString((char) value) + ").");
673            }
674            return ch;
675        }
676    
677        @Override protected void errNcrCr() throws SAXException {
678            err("A numeric character reference expanded to carriage return.");
679        }
680    
681        @Override protected void errNcrInC1Range() throws SAXException {
682            err("A numeric character reference expanded to the C1 controls range.");
683        }
684    
685        @Override protected void errEofInPublicId() throws SAXException {
686            err("End of file inside public identifier.");
687        }
688    
689        @Override protected void errEofInComment() throws SAXException {
690            err("End of file inside comment.");
691        }
692    
693        @Override protected void errEofInDoctype() throws SAXException {
694            err("End of file inside doctype.");
695        }
696    
697        @Override protected void errEofInAttributeValue() throws SAXException {
698            err("End of file reached when inside an attribute value. Ignoring tag.");
699        }
700    
701        @Override protected void errEofInAttributeName() throws SAXException {
702            err("End of file occurred in an attribute name. Ignoring tag.");
703        }
704    
705        @Override protected void errEofWithoutGt() throws SAXException {
706            err("Saw end of file without the previous tag ending with \u201C>\u201D. Ignoring tag.");
707        }
708    
709        @Override protected void errEofInTagName() throws SAXException {
710            err("End of file seen when looking for tag name. Ignoring tag.");
711        }
712    
713        @Override protected void errEofInEndTag() throws SAXException {
714            err("End of file inside end tag. Ignoring tag.");
715        }
716    
717        @Override protected void errEofAfterLt() throws SAXException {
718            err("End of file after \u201C<\u201D.");
719        }
720    
721        @Override protected void errNcrOutOfRange() throws SAXException {
722            err("Character reference outside the permissible Unicode range.");
723        }
724    
725        @Override protected void errNcrUnassigned() throws SAXException {
726            err("Character reference expands to a permanently unassigned code point.");
727        }
728    
729        @Override protected void errDuplicateAttribute() throws SAXException {
730            err("Duplicate attribute \u201C"
731                    + attributeName.getLocal(AttributeName.HTML) + "\u201D.");
732        }
733    
734        @Override protected void errEofInSystemId() throws SAXException {
735            err("End of file inside system identifier.");
736        }
737    
738        @Override protected void errExpectedSystemId() throws SAXException {
739            err("Expected a system identifier but the doctype ended.");
740        }
741    
742        @Override protected void errMissingSpaceBeforeDoctypeName()
743                throws SAXException {
744            err("Missing space before doctype name.");
745        }
746    
747        @Override protected void errHyphenHyphenBang() throws SAXException {
748            err("\u201C--!\u201D found in comment.");
749        }
750    
751        @Override protected void errNcrControlChar() throws SAXException {
752            err("Character reference expands to a control character ("
753                    + toUPlusString((char) value) + ").");
754        }
755    
756        @Override protected void errNcrZero() throws SAXException {
757            err("Character reference expands to zero.");
758        }
759    
760        @Override protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
761                throws SAXException {
762            err("No space between the doctype \u201CSYSTEM\u201D keyword and the quote.");
763        }
764    
765        @Override protected void errNoSpaceBetweenPublicAndSystemIds()
766                throws SAXException {
767            err("No space between the doctype public and system identifiers.");
768        }
769    
770        @Override protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
771                throws SAXException {
772            err("No space between the doctype \u201CPUBLIC\u201D keyword and the quote.");
773        }
774    
775        @Override protected void noteAttributeWithoutValue() throws SAXException {
776            note("xhtml2", "Attribute without value");
777        }
778    
779        @Override protected void noteUnquotedAttributeValue() throws SAXException {
780            note("xhtml1", "Unquoted attribute value.");
781        }
782    
783        /**
784         * Sets the transitionHandler.
785         * 
786         * @param transitionHandler
787         *            the transitionHandler to set
788         */
789        public void setTransitionHandler(TransitionHandler transitionHandler) {
790            this.transitionHandler = transitionHandler;
791        }
792    
793        /**
794         * Sets an offset to be added to the position reported to
795         * <code>TransitionHandler</code>.
796         * 
797         * @param offset
798         *            the offset
799         */
800        public void setTransitionBaseOffset(int offset) {
801            this.transitionBaseOffset = offset;
802        }
803    
804    }