001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * Copyright (c) 2007-2010 Mozilla Foundation 004 * 005 * Permission is hereby granted, free of charge, to any person obtaining a 006 * copy of this software and associated documentation files (the "Software"), 007 * to deal in the Software without restriction, including without limitation 008 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 009 * and/or sell copies of the Software, and to permit persons to whom the 010 * Software is furnished to do so, subject to the following conditions: 011 * 012 * The above copyright notice and this permission notice shall be included in 013 * all copies or substantial portions of the Software. 014 * 015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 021 * DEALINGS IN THE SOFTWARE. 022 */ 023 024 package nu.validator.htmlparser.sax; 025 026 import java.io.IOException; 027 import java.net.MalformedURLException; 028 import java.net.URL; 029 import java.util.LinkedList; 030 import java.util.List; 031 import java.util.HashMap; 032 033 import nu.validator.htmlparser.common.CharacterHandler; 034 import nu.validator.htmlparser.common.DoctypeExpectation; 035 import nu.validator.htmlparser.common.DocumentModeHandler; 036 import nu.validator.htmlparser.common.Heuristics; 037 import nu.validator.htmlparser.common.TokenHandler; 038 import nu.validator.htmlparser.common.TransitionHandler; 039 import nu.validator.htmlparser.common.XmlViolationPolicy; 040 import nu.validator.htmlparser.impl.ErrorReportingTokenizer; 041 import nu.validator.htmlparser.impl.Tokenizer; 042 import nu.validator.htmlparser.impl.TreeBuilder; 043 import nu.validator.htmlparser.io.Driver; 044 import nu.validator.saxtree.Document; 045 import nu.validator.saxtree.DocumentFragment; 046 import nu.validator.saxtree.TreeParser; 047 048 import org.xml.sax.ContentHandler; 049 import org.xml.sax.DTDHandler; 050 import org.xml.sax.EntityResolver; 051 import org.xml.sax.ErrorHandler; 052 import org.xml.sax.InputSource; 053 import org.xml.sax.Locator; 054 import org.xml.sax.SAXException; 055 import org.xml.sax.SAXNotRecognizedException; 056 import org.xml.sax.SAXNotSupportedException; 057 import org.xml.sax.XMLReader; 058 import org.xml.sax.ext.LexicalHandler; 059 import org.xml.sax.helpers.DefaultHandler; 060 061 /** 062 * This class implements an HTML5 parser that exposes data through the SAX2 063 * interface. 064 * 065 * <p>By default, when using the constructor without arguments, the 066 * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible 067 * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general 068 * XML violation policy. To make the parser support non-conforming HTML fully 069 * per the HTML 5 spec while on the other hand potentially violating the SAX2 070 * API contract, set the general XML violation policy to <code>ALLOW</code>. 071 * It is possible to treat XML 1.0 infoset violations as fatal by setting 072 * the general XML violation policy to <code>FATAL</code>. 073 * 074 * <p>By default, this parser doesn't do true streaming but buffers everything 075 * first. The parser can be made truly streaming by calling 076 * <code>setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL)</code>. This 077 * has the consequence that errors that require non-streamable recovery are 078 * treated as fatal. 079 * 080 * <p>By default, in order to make the parse events emulate the parse events 081 * for a DTDless XML document, the parser does not report the doctype through 082 * <code>LexicalHandler</code>. Doctype reporting through 083 * <code>LexicalHandler</code> can be turned on by calling 084 * <code>setReportingDoctype(true)</code>. 085 * 086 * @version $Id$ 087 * @author hsivonen 088 */ 089 public class HtmlParser implements XMLReader { 090 091 private Driver driver = null; 092 093 private TreeBuilder<?> treeBuilder = null; 094 095 private SAXStreamer saxStreamer = null; // work around javac bug 096 097 private SAXTreeBuilder saxTreeBuilder = null; // work around javac bug 098 099 private ContentHandler contentHandler = null; 100 101 private LexicalHandler lexicalHandler = null; 102 103 private DTDHandler dtdHandler = null; 104 105 private EntityResolver entityResolver = null; 106 107 private ErrorHandler errorHandler = null; 108 109 private DocumentModeHandler documentModeHandler = null; 110 111 private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; 112 113 private boolean checkingNormalization = false; 114 115 private boolean scriptingEnabled = false; 116 117 private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>(); 118 119 private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; 120 121 private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; 122 123 private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; 124 125 private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; 126 127 private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; 128 129 private boolean html4ModeCompatibleWithXhtml1Schemata = false; 130 131 private boolean mappingLangToXmlLang = false; 132 133 private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL; 134 135 private boolean reportingDoctype = true; 136 137 private ErrorHandler treeBuilderErrorHandler = null; 138 139 private Heuristics heuristics = Heuristics.NONE; 140 141 private HashMap<String, String> errorProfileMap = null; 142 143 private TransitionHandler transitionHandler = null; 144 145 /** 146 * Instantiates the parser with a fatal XML violation policy. 147 * 148 */ 149 public HtmlParser() { 150 this(XmlViolationPolicy.FATAL); 151 } 152 153 /** 154 * Instantiates the parser with a specific XML violation policy. 155 * @param xmlPolicy the policy 156 */ 157 public HtmlParser(XmlViolationPolicy xmlPolicy) { 158 setXmlPolicy(xmlPolicy); 159 } 160 161 private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) { 162 if (errorHandler == null && transitionHandler == null && 163 contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) { 164 return new Tokenizer(handler, newAttributesEachTime); 165 } 166 ErrorReportingTokenizer tokenizer = 167 new ErrorReportingTokenizer(handler, newAttributesEachTime); 168 tokenizer.setErrorProfile(errorProfileMap); 169 return tokenizer; 170 } 171 172 /** 173 * This class wraps different tree builders depending on configuration. This 174 * method does the work of hiding this from the user of the class. 175 */ 176 private void lazyInit() { 177 if (driver == null) { 178 if (streamabilityViolationPolicy == XmlViolationPolicy.ALLOW) { 179 this.saxTreeBuilder = new SAXTreeBuilder(); 180 this.treeBuilder = this.saxTreeBuilder; 181 this.saxStreamer = null; 182 this.driver = new Driver(newTokenizer(treeBuilder, true)); 183 } else { 184 this.saxStreamer = new SAXStreamer(); 185 this.treeBuilder = this.saxStreamer; 186 this.saxTreeBuilder = null; 187 this.driver = new Driver(newTokenizer(treeBuilder, false)); 188 } 189 this.driver.setErrorHandler(errorHandler); 190 this.driver.setTransitionHandler(transitionHandler); 191 this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); 192 this.driver.setCheckingNormalization(checkingNormalization); 193 this.driver.setCommentPolicy(commentPolicy); 194 this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); 195 this.driver.setContentSpacePolicy(contentSpacePolicy); 196 this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); 197 this.driver.setMappingLangToXmlLang(mappingLangToXmlLang); 198 this.driver.setXmlnsPolicy(xmlnsPolicy); 199 this.driver.setHeuristics(heuristics); 200 for (CharacterHandler characterHandler : characterHandlers) { 201 this.driver.addCharacterHandler(characterHandler); 202 } 203 this.treeBuilder.setDoctypeExpectation(doctypeExpectation); 204 this.treeBuilder.setDocumentModeHandler(documentModeHandler); 205 this.treeBuilder.setIgnoringComments(lexicalHandler == null); 206 this.treeBuilder.setScriptingEnabled(scriptingEnabled); 207 this.treeBuilder.setReportingDoctype(reportingDoctype); 208 this.treeBuilder.setNamePolicy(namePolicy); 209 if (saxStreamer != null) { 210 saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() 211 : contentHandler); 212 saxStreamer.setLexicalHandler(lexicalHandler); 213 driver.setAllowRewinding(false); 214 } 215 } 216 } 217 218 /** 219 * @see org.xml.sax.XMLReader#getContentHandler() 220 */ 221 public ContentHandler getContentHandler() { 222 return contentHandler; 223 } 224 225 /** 226 * @see org.xml.sax.XMLReader#getDTDHandler() 227 */ 228 public DTDHandler getDTDHandler() { 229 return dtdHandler; 230 } 231 232 /** 233 * @see org.xml.sax.XMLReader#getEntityResolver() 234 */ 235 public EntityResolver getEntityResolver() { 236 return entityResolver; 237 } 238 239 /** 240 * @see org.xml.sax.XMLReader#getErrorHandler() 241 */ 242 public ErrorHandler getErrorHandler() { 243 return errorHandler; 244 } 245 246 /** 247 * Exposes the configuration of the emulated XML parser as well as 248 * boolean-valued configuration without using non-<code>XMLReader</code> 249 * getters directly. 250 * 251 * <dl> 252 * <dt><code>http://xml.org/sax/features/external-general-entities</code></dt> 253 * <dd><code>false</code></dd> 254 * <dt><code>http://xml.org/sax/features/external-parameter-entities</code></dt> 255 * <dd><code>false</code></dd> 256 * <dt><code>http://xml.org/sax/features/is-standalone</code></dt> 257 * <dd><code>true</code></dd> 258 * <dt><code>http://xml.org/sax/features/lexical-handler/parameter-entities</code></dt> 259 * <dd><code>false</code></dd> 260 * <dt><code>http://xml.org/sax/features/namespaces</code></dt> 261 * <dd><code>true</code></dd> 262 * <dt><code>http://xml.org/sax/features/namespace-prefixes</code></dt> 263 * <dd><code>false</code></dd> 264 * <dt><code>http://xml.org/sax/features/resolve-dtd-uris</code></dt> 265 * <dd><code>true</code></dd> 266 * <dt><code>http://xml.org/sax/features/string-interning</code></dt> 267 * <dd><code>false</code></dd> 268 * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt> 269 * <dd><code>isCheckingNormalization</code></dd> 270 * <dt><code>http://xml.org/sax/features/use-attributes2</code></dt> 271 * <dd><code>false</code></dd> 272 * <dt><code>http://xml.org/sax/features/use-locator2</code></dt> 273 * <dd><code>false</code></dd> 274 * <dt><code>http://xml.org/sax/features/use-entity-resolver2</code></dt> 275 * <dd><code>false</code></dd> 276 * <dt><code>http://xml.org/sax/features/validation</code></dt> 277 * <dd><code>false</code></dd> 278 * <dt><code>http://xml.org/sax/features/xmlns-uris</code></dt> 279 * <dd><code>false</code></dd> 280 * <dt><code>http://xml.org/sax/features/xml-1.1</code></dt> 281 * <dd><code>false</code></dd> 282 * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt> 283 * <dd><code>isHtml4ModeCompatibleWithXhtml1Schemata</code></dd> 284 * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt> 285 * <dd><code>isMappingLangToXmlLang</code></dd> 286 * <dt><code>http://validator.nu/features/scripting-enabled</code></dt> 287 * <dd><code>isScriptingEnabled</code></dd> 288 * </dl> 289 * 290 * @param name 291 * feature URI string 292 * @return a value per the list above 293 * @see org.xml.sax.XMLReader#getFeature(java.lang.String) 294 */ 295 public boolean getFeature(String name) throws SAXNotRecognizedException, 296 SAXNotSupportedException { 297 if ("http://xml.org/sax/features/external-general-entities".equals(name)) { 298 return false; 299 } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) { 300 return false; 301 } else if ("http://xml.org/sax/features/is-standalone".equals(name)) { 302 return true; 303 } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) { 304 return false; 305 } else if ("http://xml.org/sax/features/namespaces".equals(name)) { 306 return true; 307 } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) { 308 return false; 309 } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) { 310 return true; // default value--applicable scenario never happens 311 } else if ("http://xml.org/sax/features/string-interning".equals(name)) { 312 return true; 313 } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) { 314 return isCheckingNormalization(); // the checks aren't really per 315 // XML 1.1 316 } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) { 317 return false; 318 } else if ("http://xml.org/sax/features/use-locator2".equals(name)) { 319 return false; 320 } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) { 321 return false; 322 } else if ("http://xml.org/sax/features/validation".equals(name)) { 323 return false; 324 } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) { 325 return false; 326 } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) { 327 return false; 328 } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) { 329 return isHtml4ModeCompatibleWithXhtml1Schemata(); 330 } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) { 331 return isMappingLangToXmlLang(); 332 } else if ("http://validator.nu/features/scripting-enabled".equals(name)) { 333 return isScriptingEnabled(); 334 } else { 335 throw new SAXNotRecognizedException(); 336 } 337 } 338 339 /** 340 * Allows <code>XMLReader</code>-level access to non-boolean valued 341 * getters. 342 * 343 * <p> 344 * The properties are mapped as follows: 345 * 346 * <dl> 347 * <dt><code>http://xml.org/sax/properties/document-xml-version</code></dt> 348 * <dd><code>"1.0"</code></dd> 349 * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt> 350 * <dd><code>getLexicalHandler</code></dd> 351 * <dt><code>http://validator.nu/properties/content-space-policy</code></dt> 352 * <dd><code>getContentSpacePolicy</code></dd> 353 * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt> 354 * <dd><code>getContentNonXmlCharPolicy</code></dd> 355 * <dt><code>http://validator.nu/properties/comment-policy</code></dt> 356 * <dd><code>getCommentPolicy</code></dd> 357 * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt> 358 * <dd><code>getXmlnsPolicy</code></dd> 359 * <dt><code>http://validator.nu/properties/name-policy</code></dt> 360 * <dd><code>getNamePolicy</code></dd> 361 * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt> 362 * <dd><code>getStreamabilityViolationPolicy</code></dd> 363 * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt> 364 * <dd><code>getDocumentModeHandler</code></dd> 365 * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt> 366 * <dd><code>getDoctypeExpectation</code></dd> 367 * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt> 368 * </dl> 369 * 370 * @param name 371 * property URI string 372 * @return a value per the list above 373 * @see org.xml.sax.XMLReader#getProperty(java.lang.String) 374 */ 375 public Object getProperty(String name) throws SAXNotRecognizedException, 376 SAXNotSupportedException { 377 if ("http://xml.org/sax/properties/declaration-handler".equals(name)) { 378 throw new SAXNotSupportedException( 379 "This parser does not suppert DeclHandler."); 380 } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) { 381 return "1.0"; // Emulating an XML 1.1 parser is not supported. 382 } else if ("http://xml.org/sax/properties/dom-node".equals(name)) { 383 throw new SAXNotSupportedException( 384 "This parser does not walk the DOM."); 385 } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) { 386 return getLexicalHandler(); 387 } else if ("http://xml.org/sax/properties/xml-string".equals(name)) { 388 throw new SAXNotSupportedException( 389 "This parser does not expose the source as a string."); 390 } else if ("http://validator.nu/properties/content-space-policy".equals(name)) { 391 return getContentSpacePolicy(); 392 } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) { 393 return getContentNonXmlCharPolicy(); 394 } else if ("http://validator.nu/properties/comment-policy".equals(name)) { 395 return getCommentPolicy(); 396 } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) { 397 return getXmlnsPolicy(); 398 } else if ("http://validator.nu/properties/name-policy".equals(name)) { 399 return getNamePolicy(); 400 } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) { 401 return getStreamabilityViolationPolicy(); 402 } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) { 403 return getDocumentModeHandler(); 404 } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) { 405 return getDoctypeExpectation(); 406 } else if ("http://validator.nu/properties/xml-policy".equals(name)) { 407 throw new SAXNotSupportedException( 408 "Cannot get a convenience setter."); 409 } else if ("http://validator.nu/properties/heuristics".equals(name)) { 410 return getHeuristics(); 411 } else { 412 throw new SAXNotRecognizedException(); 413 } 414 } 415 416 /** 417 * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource) 418 */ 419 public void parse(InputSource input) throws IOException, SAXException { 420 lazyInit(); 421 try { 422 treeBuilder.setFragmentContext(null); 423 tokenize(input); 424 } finally { 425 if (saxTreeBuilder != null) { 426 Document document = saxTreeBuilder.getDocument(); 427 if (document != null) { 428 new TreeParser(contentHandler, lexicalHandler).parse(document); 429 } 430 } 431 } 432 } 433 434 /** 435 * Parses a fragment. 436 * 437 * @param input the input to parse 438 * @param context the name of the context element 439 * @throws IOException 440 * @throws SAXException 441 */ 442 public void parseFragment(InputSource input, String context) 443 throws IOException, SAXException { 444 lazyInit(); 445 try { 446 treeBuilder.setFragmentContext(context.intern()); 447 tokenize(input); 448 } finally { 449 if (saxTreeBuilder != null) { 450 DocumentFragment fragment = saxTreeBuilder.getDocumentFragment(); 451 new TreeParser(contentHandler, lexicalHandler).parse(fragment); 452 } 453 } 454 } 455 456 /** 457 * @param is 458 * @throws SAXException 459 * @throws IOException 460 * @throws MalformedURLException 461 */ 462 private void tokenize(InputSource is) throws SAXException, IOException, MalformedURLException { 463 if (is == null) { 464 throw new IllegalArgumentException("Null input."); 465 } 466 if (is.getByteStream() == null && is.getCharacterStream() == null) { 467 String systemId = is.getSystemId(); 468 if (systemId == null) { 469 throw new IllegalArgumentException("No byte stream, no character stream nor URI."); 470 } 471 if (entityResolver != null) { 472 is = entityResolver.resolveEntity(is.getPublicId(), systemId); 473 } 474 if (is.getByteStream() == null || is.getCharacterStream() == null) { 475 is = new InputSource(); 476 is.setSystemId(systemId); 477 is.setByteStream(new URL(systemId).openStream()); 478 } 479 } 480 driver.tokenize(is); 481 } 482 483 /** 484 * @see org.xml.sax.XMLReader#parse(java.lang.String) 485 */ 486 public void parse(String systemId) throws IOException, SAXException { 487 parse(new InputSource(systemId)); 488 } 489 490 /** 491 * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler) 492 */ 493 public void setContentHandler(ContentHandler handler) { 494 contentHandler = handler; 495 if (saxStreamer != null) { 496 saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() 497 : contentHandler); 498 } 499 } 500 501 /** 502 * Sets the lexical handler. 503 * @param handler the hander. 504 */ 505 public void setLexicalHandler(LexicalHandler handler) { 506 lexicalHandler = handler; 507 if (treeBuilder != null) { 508 treeBuilder.setIgnoringComments(handler == null); 509 if (saxStreamer != null) { 510 saxStreamer.setLexicalHandler(handler); 511 } 512 } 513 } 514 515 /** 516 * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler) 517 */ 518 public void setDTDHandler(DTDHandler handler) { 519 dtdHandler = handler; 520 } 521 522 /** 523 * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) 524 */ 525 public void setEntityResolver(EntityResolver resolver) { 526 entityResolver = resolver; 527 } 528 529 /** 530 * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) 531 */ 532 public void setErrorHandler(ErrorHandler handler) { 533 errorHandler = handler; 534 treeBuilderErrorHandler = handler; 535 driver = null; 536 } 537 538 public void setTransitionHandler(TransitionHandler handler) { 539 transitionHandler = handler; 540 driver = null; 541 } 542 543 /** 544 * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) 545 * @deprecated For Validator.nu internal use 546 */ 547 public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) { 548 treeBuilderErrorHandler = handler; 549 if (driver != null) { 550 treeBuilder.setErrorHandler(handler); 551 } 552 } 553 554 /** 555 * Sets a boolean feature without having to use non-<code>XMLReader</code> 556 * setters directly. 557 * 558 * <p> 559 * The supported features are: 560 * 561 * <dl> 562 * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt> 563 * <dd><code>setCheckingNormalization</code></dd> 564 * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt> 565 * <dd><code>setHtml4ModeCompatibleWithXhtml1Schemata</code></dd> 566 * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt> 567 * <dd><code>setMappingLangToXmlLang</code></dd> 568 * <dt><code>http://validator.nu/features/scripting-enabled</code></dt> 569 * <dd><code>setScriptingEnabled</code></dd> 570 * </dl> 571 * 572 * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean) 573 */ 574 public void setFeature(String name, boolean value) 575 throws SAXNotRecognizedException, SAXNotSupportedException { 576 if ("http://xml.org/sax/features/external-general-entities".equals(name)) { 577 if (value) { 578 throw new SAXNotSupportedException("Cannot set " + name + "."); 579 } 580 } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) { 581 if (value) { 582 throw new SAXNotSupportedException("Cannot set " + name + "."); 583 } 584 } else if ("http://xml.org/sax/features/is-standalone".equals(name)) { 585 if (!value) { 586 throw new SAXNotSupportedException("Cannot set " + name + "."); 587 } 588 } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) { 589 if (value) { 590 throw new SAXNotSupportedException("Cannot set " + name + "."); 591 } 592 } else if ("http://xml.org/sax/features/namespaces".equals(name)) { 593 if (!value) { 594 throw new SAXNotSupportedException("Cannot set " + name + "."); 595 } 596 } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) { 597 if (value) { 598 throw new SAXNotSupportedException("Cannot set " + name + "."); 599 } 600 } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) { 601 if (!value) { 602 throw new SAXNotSupportedException("Cannot set " + name + "."); 603 } 604 } else if ("http://xml.org/sax/features/string-interning".equals(name)) { 605 if (!value) { 606 throw new SAXNotSupportedException("Cannot set " + name + "."); 607 } 608 } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) { 609 setCheckingNormalization(value); 610 } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) { 611 if (value) { 612 throw new SAXNotSupportedException("Cannot set " + name + "."); 613 } 614 } else if ("http://xml.org/sax/features/use-locator2".equals(name)) { 615 if (value) { 616 throw new SAXNotSupportedException("Cannot set " + name + "."); 617 } 618 } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) { 619 if (value) { 620 throw new SAXNotSupportedException("Cannot set " + name + "."); 621 } 622 } else if ("http://xml.org/sax/features/validation".equals(name)) { 623 if (value) { 624 throw new SAXNotSupportedException("Cannot set " + name + "."); 625 } 626 } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) { 627 if (value) { 628 throw new SAXNotSupportedException("Cannot set " + name + "."); 629 } 630 } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) { 631 if (value) { 632 throw new SAXNotSupportedException("Cannot set " + name + "."); 633 } 634 } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) { 635 setHtml4ModeCompatibleWithXhtml1Schemata(value); 636 } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) { 637 setMappingLangToXmlLang(value); 638 } else if ("http://validator.nu/features/scripting-enabled".equals(name)) { 639 setScriptingEnabled(value); 640 } else { 641 throw new SAXNotRecognizedException(); 642 } 643 } 644 645 /** 646 * Sets a non-boolean property without having to use non-<code>XMLReader</code> 647 * setters directly. 648 * 649 * <dl> 650 * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt> 651 * <dd><code>setLexicalHandler</code></dd> 652 * <dt><code>http://validator.nu/properties/content-space-policy</code></dt> 653 * <dd><code>setContentSpacePolicy</code></dd> 654 * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt> 655 * <dd><code>setContentNonXmlCharPolicy</code></dd> 656 * <dt><code>http://validator.nu/properties/comment-policy</code></dt> 657 * <dd><code>setCommentPolicy</code></dd> 658 * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt> 659 * <dd><code>setXmlnsPolicy</code></dd> 660 * <dt><code>http://validator.nu/properties/name-policy</code></dt> 661 * <dd><code>setNamePolicy</code></dd> 662 * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt> 663 * <dd><code>setStreamabilityViolationPolicy</code></dd> 664 * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt> 665 * <dd><code>setDocumentModeHandler</code></dd> 666 * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt> 667 * <dd><code>setDoctypeExpectation</code></dd> 668 * <dt><code>http://validator.nu/properties/xml-policy</code></dt> 669 * <dd><code>setXmlPolicy</code></dd> 670 * </dl> 671 * 672 * @see org.xml.sax.XMLReader#setProperty(java.lang.String, 673 * java.lang.Object) 674 */ 675 public void setProperty(String name, Object value) 676 throws SAXNotRecognizedException, SAXNotSupportedException { 677 if ("http://xml.org/sax/properties/declaration-handler".equals(name)) { 678 throw new SAXNotSupportedException( 679 "This parser does not suppert DeclHandler."); 680 } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) { 681 throw new SAXNotSupportedException( 682 "Can't set document-xml-version."); 683 } else if ("http://xml.org/sax/properties/dom-node".equals(name)) { 684 throw new SAXNotSupportedException("Can't set dom-node."); 685 } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) { 686 setLexicalHandler((LexicalHandler) value); 687 } else if ("http://xml.org/sax/properties/xml-string".equals(name)) { 688 throw new SAXNotSupportedException("Can't set xml-string."); 689 } else if ("http://validator.nu/properties/content-space-policy".equals(name)) { 690 setContentSpacePolicy((XmlViolationPolicy) value); 691 } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) { 692 setContentNonXmlCharPolicy((XmlViolationPolicy) value); 693 } else if ("http://validator.nu/properties/comment-policy".equals(name)) { 694 setCommentPolicy((XmlViolationPolicy) value); 695 } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) { 696 setXmlnsPolicy((XmlViolationPolicy) value); 697 } else if ("http://validator.nu/properties/name-policy".equals(name)) { 698 setNamePolicy((XmlViolationPolicy) value); 699 } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) { 700 setStreamabilityViolationPolicy((XmlViolationPolicy) value); 701 } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) { 702 setDocumentModeHandler((DocumentModeHandler) value); 703 } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) { 704 setDoctypeExpectation((DoctypeExpectation) value); 705 } else if ("http://validator.nu/properties/xml-policy".equals(name)) { 706 setXmlPolicy((XmlViolationPolicy) value); 707 } else if ("http://validator.nu/properties/heuristics".equals(name)) { 708 setHeuristics((Heuristics) value); 709 } else { 710 throw new SAXNotRecognizedException(); 711 } 712 } 713 714 /** 715 * Indicates whether NFC normalization of source is being checked. 716 * @return <code>true</code> if NFC normalization of source is being checked. 717 * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() 718 */ 719 public boolean isCheckingNormalization() { 720 return checkingNormalization; 721 } 722 723 /** 724 * Toggles the checking of the NFC normalization of source. 725 * @param enable <code>true</code> to check normalization 726 * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) 727 */ 728 public void setCheckingNormalization(boolean enable) { 729 this.checkingNormalization = enable; 730 if (driver != null) { 731 driver.setCheckingNormalization(checkingNormalization); 732 } 733 } 734 735 /** 736 * Sets the policy for consecutive hyphens in comments. 737 * @param commentPolicy the policy 738 * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 739 */ 740 public void setCommentPolicy(XmlViolationPolicy commentPolicy) { 741 this.commentPolicy = commentPolicy; 742 if (driver != null) { 743 driver.setCommentPolicy(commentPolicy); 744 } 745 } 746 747 /** 748 * Sets the policy for non-XML characters except white space. 749 * @param contentNonXmlCharPolicy the policy 750 * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 751 */ 752 public void setContentNonXmlCharPolicy( 753 XmlViolationPolicy contentNonXmlCharPolicy) { 754 this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; 755 driver = null; 756 } 757 758 /** 759 * Sets the policy for non-XML white space. 760 * @param contentSpacePolicy the policy 761 * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 762 */ 763 public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { 764 this.contentSpacePolicy = contentSpacePolicy; 765 if (driver != null) { 766 driver.setContentSpacePolicy(contentSpacePolicy); 767 } 768 } 769 770 /** 771 * Whether the parser considers scripting to be enabled for noscript treatment. 772 * 773 * @return <code>true</code> if enabled 774 * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() 775 */ 776 public boolean isScriptingEnabled() { 777 return scriptingEnabled; 778 } 779 780 /** 781 * Sets whether the parser considers scripting to be enabled for noscript treatment. 782 * @param scriptingEnabled <code>true</code> to enable 783 * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) 784 */ 785 public void setScriptingEnabled(boolean scriptingEnabled) { 786 this.scriptingEnabled = scriptingEnabled; 787 if (treeBuilder != null) { 788 treeBuilder.setScriptingEnabled(scriptingEnabled); 789 } 790 } 791 792 /** 793 * Returns the doctype expectation. 794 * 795 * @return the doctypeExpectation 796 */ 797 public DoctypeExpectation getDoctypeExpectation() { 798 return doctypeExpectation; 799 } 800 801 /** 802 * Sets the doctype expectation. 803 * 804 * @param doctypeExpectation 805 * the doctypeExpectation to set 806 * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) 807 */ 808 public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { 809 this.doctypeExpectation = doctypeExpectation; 810 if (treeBuilder != null) { 811 treeBuilder.setDoctypeExpectation(doctypeExpectation); 812 } 813 } 814 815 /** 816 * Returns the document mode handler. 817 * 818 * @return the documentModeHandler 819 */ 820 public DocumentModeHandler getDocumentModeHandler() { 821 return documentModeHandler; 822 } 823 824 /** 825 * Sets the document mode handler. 826 * 827 * @param documentModeHandler 828 * the documentModeHandler to set 829 * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) 830 */ 831 public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { 832 this.documentModeHandler = documentModeHandler; 833 } 834 835 /** 836 * Returns the streamabilityViolationPolicy. 837 * 838 * @return the streamabilityViolationPolicy 839 */ 840 public XmlViolationPolicy getStreamabilityViolationPolicy() { 841 return streamabilityViolationPolicy; 842 } 843 844 /** 845 * Sets the streamabilityViolationPolicy. 846 * 847 * @param streamabilityViolationPolicy 848 * the streamabilityViolationPolicy to set 849 */ 850 public void setStreamabilityViolationPolicy( 851 XmlViolationPolicy streamabilityViolationPolicy) { 852 this.streamabilityViolationPolicy = streamabilityViolationPolicy; 853 driver = null; 854 } 855 856 /** 857 * Whether the HTML 4 mode reports boolean attributes in a way that repeats 858 * the name in the value. 859 * @param html4ModeCompatibleWithXhtml1Schemata 860 */ 861 public void setHtml4ModeCompatibleWithXhtml1Schemata( 862 boolean html4ModeCompatibleWithXhtml1Schemata) { 863 this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; 864 if (driver != null) { 865 driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); 866 } 867 } 868 869 /** 870 * Returns the <code>Locator</code> during parse. 871 * @return the <code>Locator</code> 872 */ 873 public Locator getDocumentLocator() { 874 return driver.getDocumentLocator(); 875 } 876 877 /** 878 * Whether the HTML 4 mode reports boolean attributes in a way that repeats 879 * the name in the value. 880 * 881 * @return the html4ModeCompatibleWithXhtml1Schemata 882 */ 883 public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { 884 return html4ModeCompatibleWithXhtml1Schemata; 885 } 886 887 /** 888 * Whether <code>lang</code> is mapped to <code>xml:lang</code>. 889 * @param mappingLangToXmlLang 890 * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) 891 */ 892 public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { 893 this.mappingLangToXmlLang = mappingLangToXmlLang; 894 if (driver != null) { 895 driver.setMappingLangToXmlLang(mappingLangToXmlLang); 896 } 897 } 898 899 /** 900 * Whether <code>lang</code> is mapped to <code>xml:lang</code>. 901 * 902 * @return the mappingLangToXmlLang 903 */ 904 public boolean isMappingLangToXmlLang() { 905 return mappingLangToXmlLang; 906 } 907 908 /** 909 * Whether the <code>xmlns</code> attribute on the root element is 910 * passed to through. (FATAL not allowed.) 911 * @param xmlnsPolicy 912 * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 913 */ 914 public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { 915 if (xmlnsPolicy == XmlViolationPolicy.FATAL) { 916 throw new IllegalArgumentException("Can't use FATAL here."); 917 } 918 this.xmlnsPolicy = xmlnsPolicy; 919 if (driver != null) { 920 driver.setXmlnsPolicy(xmlnsPolicy); 921 } 922 } 923 924 /** 925 * Returns the xmlnsPolicy. 926 * 927 * @return the xmlnsPolicy 928 */ 929 public XmlViolationPolicy getXmlnsPolicy() { 930 return xmlnsPolicy; 931 } 932 933 /** 934 * Returns the lexicalHandler. 935 * 936 * @return the lexicalHandler 937 */ 938 public LexicalHandler getLexicalHandler() { 939 return lexicalHandler; 940 } 941 942 /** 943 * Returns the commentPolicy. 944 * 945 * @return the commentPolicy 946 */ 947 public XmlViolationPolicy getCommentPolicy() { 948 return commentPolicy; 949 } 950 951 /** 952 * Returns the contentNonXmlCharPolicy. 953 * 954 * @return the contentNonXmlCharPolicy 955 */ 956 public XmlViolationPolicy getContentNonXmlCharPolicy() { 957 return contentNonXmlCharPolicy; 958 } 959 960 /** 961 * Returns the contentSpacePolicy. 962 * 963 * @return the contentSpacePolicy 964 */ 965 public XmlViolationPolicy getContentSpacePolicy() { 966 return contentSpacePolicy; 967 } 968 969 /** 970 * @param reportingDoctype 971 * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) 972 */ 973 public void setReportingDoctype(boolean reportingDoctype) { 974 this.reportingDoctype = reportingDoctype; 975 if (treeBuilder != null) { 976 treeBuilder.setReportingDoctype(reportingDoctype); 977 } 978 } 979 980 /** 981 * Returns the reportingDoctype. 982 * 983 * @return the reportingDoctype 984 */ 985 public boolean isReportingDoctype() { 986 return reportingDoctype; 987 } 988 989 /** 990 * @param errorProfile 991 * @see nu.validator.htmlparser.impl.errorReportingTokenizer#setErrorProfile(set) 992 */ 993 public void setErrorProfile(HashMap<String, String> errorProfileMap) { 994 this.errorProfileMap = errorProfileMap; 995 } 996 997 /** 998 * The policy for non-NCName element and attribute names. 999 * @param namePolicy 1000 * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 1001 */ 1002 public void setNamePolicy(XmlViolationPolicy namePolicy) { 1003 this.namePolicy = namePolicy; 1004 if (driver != null) { 1005 driver.setNamePolicy(namePolicy); 1006 treeBuilder.setNamePolicy(namePolicy); 1007 } 1008 } 1009 1010 /** 1011 * Sets the encoding sniffing heuristics. 1012 * 1013 * @param heuristics the heuristics to set 1014 * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) 1015 */ 1016 public void setHeuristics(Heuristics heuristics) { 1017 this.heuristics = heuristics; 1018 if (driver != null) { 1019 driver.setHeuristics(heuristics); 1020 } 1021 } 1022 1023 public Heuristics getHeuristics() { 1024 return this.heuristics; 1025 } 1026 1027 /** 1028 * This is a catch-all convenience method for setting name, xmlns, content space, 1029 * content non-XML char and comment policies in one go. This does not affect the 1030 * streamability policy or doctype reporting. 1031 * 1032 * @param xmlPolicy 1033 */ 1034 public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { 1035 setNamePolicy(xmlPolicy); 1036 setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); 1037 setContentSpacePolicy(xmlPolicy); 1038 setContentNonXmlCharPolicy(xmlPolicy); 1039 setCommentPolicy(xmlPolicy); 1040 } 1041 1042 /** 1043 * The policy for non-NCName element and attribute names. 1044 * 1045 * @return the namePolicy 1046 */ 1047 public XmlViolationPolicy getNamePolicy() { 1048 return namePolicy; 1049 } 1050 1051 /** 1052 * Does nothing. 1053 * @deprecated 1054 */ 1055 public void setBogusXmlnsPolicy( 1056 XmlViolationPolicy bogusXmlnsPolicy) { 1057 } 1058 1059 /** 1060 * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>. 1061 * @deprecated 1062 * @return <code>XmlViolationPolicy.ALTER_INFOSET</code> 1063 */ 1064 public XmlViolationPolicy getBogusXmlnsPolicy() { 1065 return XmlViolationPolicy.ALTER_INFOSET; 1066 } 1067 1068 public void addCharacterHandler(CharacterHandler characterHandler) { 1069 this.characterHandlers.add(characterHandler); 1070 if (driver != null) { 1071 driver.addCharacterHandler(characterHandler); 1072 } 1073 } 1074 }