001 /* 002 * Copyright (c) 2009-2010 Mozilla Foundation 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package nu.validator.htmlparser.impl; 024 025 import nu.validator.htmlparser.annotation.Inline; 026 import nu.validator.htmlparser.annotation.NoLength; 027 import nu.validator.htmlparser.common.TokenHandler; 028 import nu.validator.htmlparser.common.TransitionHandler; 029 import nu.validator.htmlparser.common.XmlViolationPolicy; 030 031 import java.util.HashMap; 032 033 import org.xml.sax.SAXException; 034 import org.xml.sax.SAXParseException; 035 036 public class ErrorReportingTokenizer extends Tokenizer { 037 038 /** 039 * Magic value for UTF-16 operations. 040 */ 041 private static final int SURROGATE_OFFSET = (0x10000 - (0xD800 << 10) - 0xDC00); 042 043 /** 044 * The policy for non-space non-XML characters. 045 */ 046 private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.ALTER_INFOSET; 047 048 /** 049 * Used together with <code>nonAsciiProhibited</code>. 050 */ 051 private boolean alreadyComplainedAboutNonAscii; 052 053 /** 054 * Keeps track of PUA warnings. 055 */ 056 private boolean alreadyWarnedAboutPrivateUseCharacters; 057 058 /** 059 * The current line number in the current resource being parsed. (First line 060 * is 1.) Passed on as locator data. 061 */ 062 private int line; 063 064 private int linePrev; 065 066 /** 067 * The current column number in the current resource being tokenized. (First 068 * column is 1, counted by UTF-16 code units.) Passed on as locator data. 069 */ 070 private int col; 071 072 private int colPrev; 073 074 private boolean nextCharOnNewLine; 075 076 private char prev; 077 078 private HashMap<String, String> errorProfileMap = null; 079 080 private TransitionHandler transitionHandler = null; 081 082 private int transitionBaseOffset = 0; 083 084 /** 085 * @param tokenHandler 086 * @param newAttributesEachTime 087 */ 088 public ErrorReportingTokenizer(TokenHandler tokenHandler, 089 boolean newAttributesEachTime) { 090 super(tokenHandler, newAttributesEachTime); 091 } 092 093 /** 094 * @param tokenHandler 095 */ 096 public ErrorReportingTokenizer(TokenHandler tokenHandler) { 097 super(tokenHandler); 098 } 099 100 /** 101 * @see org.xml.sax.Locator#getLineNumber() 102 */ 103 public int getLineNumber() { 104 if (line > 0) { 105 return line; 106 } else { 107 return -1; 108 } 109 } 110 111 /** 112 * @see org.xml.sax.Locator#getColumnNumber() 113 */ 114 public int getColumnNumber() { 115 if (col > 0) { 116 return col; 117 } else { 118 return -1; 119 } 120 } 121 122 /** 123 * Sets the contentNonXmlCharPolicy. 124 * 125 * @param contentNonXmlCharPolicy 126 * the contentNonXmlCharPolicy to set 127 */ 128 public void setContentNonXmlCharPolicy( 129 XmlViolationPolicy contentNonXmlCharPolicy) { 130 this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; 131 } 132 133 /** 134 * Sets the errorProfile. 135 * 136 * @param errorProfile 137 */ 138 public void setErrorProfile(HashMap<String, String> errorProfileMap) { 139 this.errorProfileMap = errorProfileMap; 140 } 141 142 /** 143 * Reports on an event based on profile selected. 144 * 145 * @param profile 146 * the profile this message belongs to 147 * @param message 148 * the message itself 149 * @throws SAXException 150 */ 151 public void note(String profile, String message) throws SAXException { 152 if (errorProfileMap == null) 153 return; 154 String level = errorProfileMap.get(profile); 155 if ("warn".equals(level)) { 156 warn(message); 157 } else if ("err".equals(level)) { 158 err(message); 159 // } else if ("info".equals(level)) { 160 // info(message); 161 } 162 } 163 164 protected void startErrorReporting() throws SAXException { 165 alreadyComplainedAboutNonAscii = false; 166 line = linePrev = 0; 167 col = colPrev = 1; 168 nextCharOnNewLine = true; 169 prev = '\u0000'; 170 alreadyWarnedAboutPrivateUseCharacters = false; 171 transitionBaseOffset = 0; 172 } 173 174 @Inline protected void silentCarriageReturn() { 175 nextCharOnNewLine = true; 176 lastCR = true; 177 } 178 179 @Inline protected void silentLineFeed() { 180 nextCharOnNewLine = true; 181 } 182 183 /** 184 * Returns the line. 185 * 186 * @return the line 187 */ 188 public int getLine() { 189 return line; 190 } 191 192 /** 193 * Returns the col. 194 * 195 * @return the col 196 */ 197 public int getCol() { 198 return col; 199 } 200 201 /** 202 * Returns the nextCharOnNewLine. 203 * 204 * @return the nextCharOnNewLine 205 */ 206 public boolean isNextCharOnNewLine() { 207 return nextCharOnNewLine; 208 } 209 210 private void complainAboutNonAscii() throws SAXException { 211 String encoding = null; 212 if (encodingDeclarationHandler != null) { 213 encoding = encodingDeclarationHandler.getCharacterEncoding(); 214 } 215 if (encoding == null) { 216 err("The character encoding of the document was not explicit but the document contains non-ASCII."); 217 } else { 218 err("No explicit character encoding declaration has been seen yet (assumed \u201C" 219 + encoding + "\u201D) but the document contains non-ASCII."); 220 } 221 } 222 223 /** 224 * Returns the alreadyComplainedAboutNonAscii. 225 * 226 * @return the alreadyComplainedAboutNonAscii 227 */ 228 public boolean isAlreadyComplainedAboutNonAscii() { 229 return alreadyComplainedAboutNonAscii; 230 } 231 232 /** 233 * Flushes coalesced character tokens. 234 * 235 * @param buf 236 * TODO 237 * @param pos 238 * TODO 239 * 240 * @throws SAXException 241 */ 242 @Override protected void flushChars(char[] buf, int pos) 243 throws SAXException { 244 if (pos > cstart) { 245 int currLine = line; 246 int currCol = col; 247 line = linePrev; 248 col = colPrev; 249 tokenHandler.characters(buf, cstart, pos - cstart); 250 line = currLine; 251 col = currCol; 252 } 253 cstart = 0x7fffffff; 254 } 255 256 @Override protected char checkChar(@NoLength char[] buf, int pos) 257 throws SAXException { 258 linePrev = line; 259 colPrev = col; 260 if (nextCharOnNewLine) { 261 line++; 262 col = 1; 263 nextCharOnNewLine = false; 264 } else { 265 col++; 266 } 267 268 char c = buf[pos]; 269 if (!confident && !alreadyComplainedAboutNonAscii && c > '\u007F') { 270 complainAboutNonAscii(); 271 alreadyComplainedAboutNonAscii = true; 272 } 273 switch (c) { 274 case '\u0000': 275 err("Saw U+0000 in stream."); 276 case '\t': 277 case '\r': 278 case '\n': 279 break; 280 case '\u000C': 281 if (contentNonXmlCharPolicy == XmlViolationPolicy.FATAL) { 282 fatal("This document is not mappable to XML 1.0 without data loss due to " 283 + toUPlusString(c) 284 + " which is not a legal XML 1.0 character."); 285 } else { 286 if (contentNonXmlCharPolicy == XmlViolationPolicy.ALTER_INFOSET) { 287 c = buf[pos] = ' '; 288 } 289 warn("This document is not mappable to XML 1.0 without data loss due to " 290 + toUPlusString(c) 291 + " which is not a legal XML 1.0 character."); 292 } 293 break; 294 default: 295 if ((c & 0xFC00) == 0xDC00) { 296 // Got a low surrogate. See if prev was high 297 // surrogate 298 if ((prev & 0xFC00) == 0xD800) { 299 int intVal = (prev << 10) + c + SURROGATE_OFFSET; 300 if ((intVal & 0xFFFE) == 0xFFFE) { 301 err("Astral non-character."); 302 } 303 if (isAstralPrivateUse(intVal)) { 304 warnAboutPrivateUseChar(); 305 } 306 } 307 } else if ((c < ' ' || ((c & 0xFFFE) == 0xFFFE))) { 308 switch (contentNonXmlCharPolicy) { 309 case FATAL: 310 fatal("Forbidden code point " + toUPlusString(c) 311 + "."); 312 break; 313 case ALTER_INFOSET: 314 c = buf[pos] = '\uFFFD'; 315 // fall through 316 case ALLOW: 317 err("Forbidden code point " + toUPlusString(c) 318 + "."); 319 } 320 } else if ((c >= '\u007F') && (c <= '\u009F') 321 || (c >= '\uFDD0') && (c <= '\uFDEF')) { 322 err("Forbidden code point " + toUPlusString(c) + "."); 323 } else if (isPrivateUse(c)) { 324 warnAboutPrivateUseChar(); 325 } 326 } 327 prev = c; 328 return c; 329 } 330 331 /** 332 * @throws SAXException 333 * @see nu.validator.htmlparser.impl.Tokenizer#transition(int, int, boolean, 334 * int) 335 */ 336 @Override protected int transition(int from, int to, boolean reconsume, 337 int pos) throws SAXException { 338 if (transitionHandler != null) { 339 transitionHandler.transition(from, to, reconsume, 340 transitionBaseOffset + pos); 341 } 342 return to; 343 } 344 345 private String toUPlusString(int c) { 346 String hexString = Integer.toHexString(c); 347 switch (hexString.length()) { 348 case 1: 349 return "U+000" + hexString; 350 case 2: 351 return "U+00" + hexString; 352 case 3: 353 return "U+0" + hexString; 354 default: 355 return "U+" + hexString; 356 } 357 } 358 359 /** 360 * Emits a warning about private use characters if the warning has not been 361 * emitted yet. 362 * 363 * @throws SAXException 364 */ 365 private void warnAboutPrivateUseChar() throws SAXException { 366 if (!alreadyWarnedAboutPrivateUseCharacters) { 367 warn("Document uses the Unicode Private Use Area(s), which should not be used in publicly exchanged documents. (Charmod C073)"); 368 alreadyWarnedAboutPrivateUseCharacters = true; 369 } 370 } 371 372 /** 373 * Tells if the argument is a BMP PUA character. 374 * 375 * @param c 376 * the UTF-16 code unit to check 377 * @return <code>true</code> if PUA character 378 */ 379 private boolean isPrivateUse(char c) { 380 return c >= '\uE000' && c <= '\uF8FF'; 381 } 382 383 /** 384 * Tells if the argument is an astral PUA character. 385 * 386 * @param c 387 * the code point to check 388 * @return <code>true</code> if astral private use 389 */ 390 private boolean isAstralPrivateUse(int c) { 391 return (c >= 0xF0000 && c <= 0xFFFFD) 392 || (c >= 0x100000 && c <= 0x10FFFD); 393 } 394 395 @Override protected void errGarbageAfterLtSlash() throws SAXException { 396 err("Garbage after \u201C</\u201D."); 397 } 398 399 @Override protected void errLtSlashGt() throws SAXException { 400 err("Saw \u201C</>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C<\u201D) or mistyped end tag."); 401 } 402 403 @Override protected void errWarnLtSlashInRcdata() throws SAXException { 404 if (html4) { 405 err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA") 406 + " element \u201C" 407 + endTagExpectation 408 + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)"); 409 } else { 410 warn((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA") 411 + " element \u201C" 412 + endTagExpectation 413 + "\u201D contained the string \u201C</\u201D, but this did not close the element."); 414 } 415 } 416 417 @Override protected void errHtml4LtSlashInRcdata(char folded) 418 throws SAXException { 419 if (html4 && (index > 0 || (folded >= 'a' && folded <= 'z')) 420 && ElementName.IFRAME != endTagExpectation) { 421 err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA") 422 + " element \u201C" 423 + endTagExpectation.name 424 + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)"); 425 } 426 } 427 428 @Override protected void errCharRefLacksSemicolon() throws SAXException { 429 err("Character reference was not terminated by a semicolon."); 430 } 431 432 @Override protected void errNoDigitsInNCR() throws SAXException { 433 err("No digits after \u201C" + strBufToString() + "\u201D."); 434 } 435 436 @Override protected void errGtInSystemId() throws SAXException { 437 err("\u201C>\u201D in system identifier."); 438 } 439 440 @Override protected void errGtInPublicId() throws SAXException { 441 err("\u201C>\u201D in public identifier."); 442 } 443 444 @Override protected void errNamelessDoctype() throws SAXException { 445 err("Nameless doctype."); 446 } 447 448 @Override protected void errConsecutiveHyphens() throws SAXException { 449 err("Consecutive hyphens did not terminate a comment. \u201C--\u201D is not permitted inside a comment, but e.g. \u201C- -\u201D is."); 450 } 451 452 @Override protected void errPrematureEndOfComment() throws SAXException { 453 err("Premature end of comment. Use \u201C-->\u201D to end a comment properly."); 454 } 455 456 @Override protected void errBogusComment() throws SAXException { 457 err("Bogus comment."); 458 } 459 460 @Override protected void errUnquotedAttributeValOrNull(char c) 461 throws SAXException { 462 switch (c) { 463 case '<': 464 err("\u201C<\u201D in an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before."); 465 return; 466 case '`': 467 err("\u201C`\u201D in an unquoted attribute value. Probable cause: Using the wrong character as a quote."); 468 return; 469 case '\uFFFD': 470 return; 471 default: 472 err("\u201C" 473 + c 474 + "\u201D in an unquoted attribute value. Probable causes: Attributes running together or a URL query string in an unquoted attribute value."); 475 return; 476 } 477 } 478 479 @Override protected void errSlashNotFollowedByGt() throws SAXException { 480 err("A slash was not immediate followed by \u201C>\u201D."); 481 } 482 483 @Override protected void errHtml4XmlVoidSyntax() throws SAXException { 484 if (html4) { 485 err("The \u201C/>\u201D syntax on void elements is not allowed. (This is an HTML4-only error.)"); 486 } 487 } 488 489 @Override protected void errNoSpaceBetweenAttributes() throws SAXException { 490 err("No space between attributes."); 491 } 492 493 @Override protected void errHtml4NonNameInUnquotedAttribute(char c) 494 throws SAXException { 495 if (html4 496 && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 497 || (c >= '0' && c <= '9') || c == '.' || c == '-' 498 || c == '_' || c == ':')) { 499 err("Non-name character in an unquoted attribute value. (This is an HTML4-only error.)"); 500 } 501 } 502 503 @Override protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull( 504 char c) throws SAXException { 505 switch (c) { 506 case '=': 507 err("\u201C=\u201D at the start of an unquoted attribute value. Probable cause: Stray duplicate equals sign."); 508 return; 509 case '<': 510 err("\u201C<\u201D at the start of an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before."); 511 return; 512 case '`': 513 err("\u201C`\u201D at the start of an unquoted attribute value. Probable cause: Using the wrong character as a quote."); 514 return; 515 } 516 } 517 518 @Override protected void errAttributeValueMissing() throws SAXException { 519 err("Attribute value missing."); 520 } 521 522 @Override protected void errBadCharBeforeAttributeNameOrNull(char c) 523 throws SAXException { 524 if (c == '<') { 525 err("Saw \u201C<\u201D when expecting an attribute name. Probable cause: Missing \u201C>\u201D immediately before."); 526 } else if (c == '=') { 527 errEqualsSignBeforeAttributeName(); 528 } else if (c != '\uFFFD') { 529 errQuoteBeforeAttributeName(c); 530 } 531 } 532 533 @Override protected void errEqualsSignBeforeAttributeName() 534 throws SAXException { 535 err("Saw \u201C=\u201D when expecting an attribute name. Probable cause: Attribute name missing."); 536 } 537 538 @Override protected void errBadCharAfterLt(char c) throws SAXException { 539 err("Bad character \u201C" 540 + c 541 + "\u201D after \u201C<\u201D. Probable cause: Unescaped \u201C<\u201D. Try escaping it as \u201C<\u201D."); 542 } 543 544 @Override protected void errLtGt() throws SAXException { 545 err("Saw \u201C<>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C<\u201D) or mistyped start tag."); 546 } 547 548 @Override protected void errProcessingInstruction() throws SAXException { 549 err("Saw \u201C<?\u201D. Probable cause: Attempt to use an XML processing instruction in HTML. (XML processing instructions are not supported in HTML.)"); 550 } 551 552 @Override protected void errUnescapedAmpersandInterpretedAsCharacterReference() 553 throws SAXException { 554 if (errorHandler == null) { 555 return; 556 } 557 SAXParseException spe = new SAXParseException( 558 "The string following \u201C&\u201D was interpreted as a character reference. (\u201C&\u201D probably should have been escaped as \u201C&\u201D.)", 559 ampersandLocation); 560 errorHandler.error(spe); 561 } 562 563 @Override protected void errNotSemicolonTerminated() throws SAXException { 564 err("Named character reference was not terminated by a semicolon. (Or \u201C&\u201D should have been escaped as \u201C&\u201D.)"); 565 } 566 567 @Override protected void errNoNamedCharacterMatch() throws SAXException { 568 if (errorHandler == null) { 569 return; 570 } 571 SAXParseException spe = new SAXParseException( 572 "\u201C&\u201D did not start a character reference. (\u201C&\u201D probably should have been escaped as \u201C&\u201D.)", 573 ampersandLocation); 574 errorHandler.error(spe); 575 } 576 577 @Override protected void errQuoteBeforeAttributeName(char c) 578 throws SAXException { 579 err("Saw \u201C" 580 + c 581 + "\u201D when expecting an attribute name. Probable cause: \u201C=\u201D missing immediately before."); 582 } 583 584 @Override protected void errQuoteOrLtInAttributeNameOrNull(char c) 585 throws SAXException { 586 if (c == '<') { 587 err("\u201C<\u201D in attribute name. Probable cause: \u201C>\u201D missing immediately before."); 588 } else if (c != '\uFFFD') { 589 err("Quote \u201C" 590 + c 591 + "\u201D in attribute name. Probable cause: Matching quote missing somewhere earlier."); 592 } 593 } 594 595 @Override protected void errExpectedPublicId() throws SAXException { 596 err("Expected a public identifier but the doctype ended."); 597 } 598 599 @Override protected void errBogusDoctype() throws SAXException { 600 err("Bogus doctype."); 601 } 602 603 @Override protected void maybeWarnPrivateUseAstral() throws SAXException { 604 if (errorHandler != null && isAstralPrivateUse(value)) { 605 warnAboutPrivateUseChar(); 606 } 607 } 608 609 @Override protected void maybeWarnPrivateUse(char ch) throws SAXException { 610 if (errorHandler != null && isPrivateUse(ch)) { 611 warnAboutPrivateUseChar(); 612 } 613 } 614 615 @Override protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs) 616 throws SAXException { 617 if (attrs.getLength() != 0) { 618 /* 619 * When an end tag token is emitted with attributes, that is a parse 620 * error. 621 */ 622 err("End tag had attributes."); 623 } 624 } 625 626 @Override protected void maybeErrSlashInEndTag(boolean selfClosing) 627 throws SAXException { 628 if (selfClosing && endTag) { 629 err("Stray \u201C/\u201D at the end of an end tag."); 630 } 631 } 632 633 @Override protected char errNcrNonCharacter(char ch) throws SAXException { 634 switch (contentNonXmlCharPolicy) { 635 case FATAL: 636 fatal("Character reference expands to a non-character (" 637 + toUPlusString((char) value) + ")."); 638 break; 639 case ALTER_INFOSET: 640 ch = '\uFFFD'; 641 // fall through 642 case ALLOW: 643 err("Character reference expands to a non-character (" 644 + toUPlusString((char) value) + ")."); 645 } 646 return ch; 647 } 648 649 /** 650 * @see nu.validator.htmlparser.impl.Tokenizer#errAstralNonCharacter(int) 651 */ 652 @Override protected void errAstralNonCharacter(int ch) throws SAXException { 653 err("Character reference expands to an astral non-character (" 654 + toUPlusString(value) + ")."); 655 } 656 657 @Override protected void errNcrSurrogate() throws SAXException { 658 err("Character reference expands to a surrogate."); 659 } 660 661 @Override protected char errNcrControlChar(char ch) throws SAXException { 662 switch (contentNonXmlCharPolicy) { 663 case FATAL: 664 fatal("Character reference expands to a control character (" 665 + toUPlusString((char) value) + ")."); 666 break; 667 case ALTER_INFOSET: 668 ch = '\uFFFD'; 669 // fall through 670 case ALLOW: 671 err("Character reference expands to a control character (" 672 + toUPlusString((char) value) + ")."); 673 } 674 return ch; 675 } 676 677 @Override protected void errNcrCr() throws SAXException { 678 err("A numeric character reference expanded to carriage return."); 679 } 680 681 @Override protected void errNcrInC1Range() throws SAXException { 682 err("A numeric character reference expanded to the C1 controls range."); 683 } 684 685 @Override protected void errEofInPublicId() throws SAXException { 686 err("End of file inside public identifier."); 687 } 688 689 @Override protected void errEofInComment() throws SAXException { 690 err("End of file inside comment."); 691 } 692 693 @Override protected void errEofInDoctype() throws SAXException { 694 err("End of file inside doctype."); 695 } 696 697 @Override protected void errEofInAttributeValue() throws SAXException { 698 err("End of file reached when inside an attribute value. Ignoring tag."); 699 } 700 701 @Override protected void errEofInAttributeName() throws SAXException { 702 err("End of file occurred in an attribute name. Ignoring tag."); 703 } 704 705 @Override protected void errEofWithoutGt() throws SAXException { 706 err("Saw end of file without the previous tag ending with \u201C>\u201D. Ignoring tag."); 707 } 708 709 @Override protected void errEofInTagName() throws SAXException { 710 err("End of file seen when looking for tag name. Ignoring tag."); 711 } 712 713 @Override protected void errEofInEndTag() throws SAXException { 714 err("End of file inside end tag. Ignoring tag."); 715 } 716 717 @Override protected void errEofAfterLt() throws SAXException { 718 err("End of file after \u201C<\u201D."); 719 } 720 721 @Override protected void errNcrOutOfRange() throws SAXException { 722 err("Character reference outside the permissible Unicode range."); 723 } 724 725 @Override protected void errNcrUnassigned() throws SAXException { 726 err("Character reference expands to a permanently unassigned code point."); 727 } 728 729 @Override protected void errDuplicateAttribute() throws SAXException { 730 err("Duplicate attribute \u201C" 731 + attributeName.getLocal(AttributeName.HTML) + "\u201D."); 732 } 733 734 @Override protected void errEofInSystemId() throws SAXException { 735 err("End of file inside system identifier."); 736 } 737 738 @Override protected void errExpectedSystemId() throws SAXException { 739 err("Expected a system identifier but the doctype ended."); 740 } 741 742 @Override protected void errMissingSpaceBeforeDoctypeName() 743 throws SAXException { 744 err("Missing space before doctype name."); 745 } 746 747 @Override protected void errHyphenHyphenBang() throws SAXException { 748 err("\u201C--!\u201D found in comment."); 749 } 750 751 @Override protected void errNcrControlChar() throws SAXException { 752 err("Character reference expands to a control character (" 753 + toUPlusString((char) value) + ")."); 754 } 755 756 @Override protected void errNcrZero() throws SAXException { 757 err("Character reference expands to zero."); 758 } 759 760 @Override protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote() 761 throws SAXException { 762 err("No space between the doctype \u201CSYSTEM\u201D keyword and the quote."); 763 } 764 765 @Override protected void errNoSpaceBetweenPublicAndSystemIds() 766 throws SAXException { 767 err("No space between the doctype public and system identifiers."); 768 } 769 770 @Override protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote() 771 throws SAXException { 772 err("No space between the doctype \u201CPUBLIC\u201D keyword and the quote."); 773 } 774 775 @Override protected void noteAttributeWithoutValue() throws SAXException { 776 note("xhtml2", "Attribute without value"); 777 } 778 779 @Override protected void noteUnquotedAttributeValue() throws SAXException { 780 note("xhtml1", "Unquoted attribute value."); 781 } 782 783 /** 784 * Sets the transitionHandler. 785 * 786 * @param transitionHandler 787 * the transitionHandler to set 788 */ 789 public void setTransitionHandler(TransitionHandler transitionHandler) { 790 this.transitionHandler = transitionHandler; 791 } 792 793 /** 794 * Sets an offset to be added to the position reported to 795 * <code>TransitionHandler</code>. 796 * 797 * @param offset 798 * the offset 799 */ 800 public void setTransitionBaseOffset(int offset) { 801 this.transitionBaseOffset = offset; 802 } 803 804 }