001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * Copyright (c) 2007 Mozilla Foundation 004 * 005 * Permission is hereby granted, free of charge, to any person obtaining a 006 * copy of this software and associated documentation files (the "Software"), 007 * to deal in the Software without restriction, including without limitation 008 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 009 * and/or sell copies of the Software, and to permit persons to whom the 010 * Software is furnished to do so, subject to the following conditions: 011 * 012 * The above copyright notice and this permission notice shall be included in 013 * all copies or substantial portions of the Software. 014 * 015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 021 * DEALINGS IN THE SOFTWARE. 022 */ 023 024 package nu.validator.htmlparser.sax; 025 026 import java.io.IOException; 027 import java.net.MalformedURLException; 028 import java.net.URL; 029 import java.util.LinkedList; 030 import java.util.List; 031 032 import nu.validator.htmlparser.common.DoctypeExpectation; 033 import nu.validator.htmlparser.common.DocumentModeHandler; 034 import nu.validator.htmlparser.common.XmlViolationPolicy; 035 import nu.validator.htmlparser.impl.CharacterHandler; 036 import nu.validator.htmlparser.impl.Tokenizer; 037 import nu.validator.htmlparser.impl.TreeBuilder; 038 import nu.validator.saxtree.Document; 039 import nu.validator.saxtree.DocumentFragment; 040 import nu.validator.saxtree.TreeParser; 041 042 import org.xml.sax.ContentHandler; 043 import org.xml.sax.DTDHandler; 044 import org.xml.sax.EntityResolver; 045 import org.xml.sax.ErrorHandler; 046 import org.xml.sax.InputSource; 047 import org.xml.sax.Locator; 048 import org.xml.sax.SAXException; 049 import org.xml.sax.SAXNotRecognizedException; 050 import org.xml.sax.SAXNotSupportedException; 051 import org.xml.sax.XMLReader; 052 import org.xml.sax.ext.LexicalHandler; 053 import org.xml.sax.helpers.DefaultHandler; 054 055 /** 056 * This class implements an HTML5 parser that exposes data through the SAX2 057 * interface. 058 * 059 * <p>By default, when using the constructor without arguments, the 060 * this parser treats XML 1.0-incompatible infosets as fatal errors in 061 * order to adhere to the SAX2 API contract strictly. This corresponds to 062 * <code>FATAL</code> as the general XML violation policy. To make the parser 063 * support non-conforming HTML fully per the HTML 5 spec while on the other 064 * hand potentially violating the SAX2 API contract, set the general XML 065 * violation policy to <code>ALLOW</code>. Handling all input without fatal 066 * errors and without violating the SAX2 API contract is possible by setting 067 * the general XML violation policy to <code>ALTER_INFOSET</code>. <em>This 068 * makes the parser non-conforming</em> but is probably the most useful 069 * setting for most applications. 070 * 071 * <p>By default, this parser doesn't do true streaming but buffers everything 072 * first. The parser can be made truly streaming by calling 073 * <code>setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL)</code>. This 074 * has the consequence that errors that require non-streamable recovery are 075 * treated as fatal. 076 * 077 * <p>By default, in order to make the parse events emulate the parse events 078 * for a DTDless XML document, the parser does not report the doctype through 079 * <code>LexicalHandler</code>. Doctype reporting through 080 * <code>LexicalHandler</code> can be turned on by calling 081 * <code>setReportingDoctype(true)</code>. 082 * 083 * @version $Id: HtmlParser.java 161 2007-10-02 09:10:00Z hsivonen $ 084 * @author hsivonen 085 */ 086 public class HtmlParser implements XMLReader { 087 088 private Tokenizer tokenizer = null; 089 090 private TreeBuilder<?> treeBuilder = null; 091 092 private SAXStreamer saxStreamer = null; // work around javac bug 093 094 private SAXTreeBuilder saxTreeBuilder = null; // work around javac bug 095 096 private ContentHandler contentHandler = null; 097 098 private LexicalHandler lexicalHandler = null; 099 100 private DTDHandler dtdHandler = null; 101 102 private EntityResolver entityResolver = null; 103 104 private ErrorHandler errorHandler = null; 105 106 private DocumentModeHandler documentModeHandler = null; 107 108 private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; 109 110 private boolean checkingNormalization = false; 111 112 private boolean scriptingEnabled = false; 113 114 private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>(); 115 116 private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; 117 118 private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; 119 120 private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; 121 122 private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; 123 124 private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; 125 126 private boolean html4ModeCompatibleWithXhtml1Schemata; 127 128 private boolean mappingLangToXmlLang; 129 130 private XmlViolationPolicy xmlnsPolicy; 131 132 private XmlViolationPolicy bogusXmlnsPolicy; 133 134 private boolean reportingDoctype = true; 135 136 private ErrorHandler treeBuilderErrorHandler; 137 138 /** 139 * Instantiates the parser with a fatal XML violation policy. 140 * 141 */ 142 public HtmlParser() { 143 this(XmlViolationPolicy.FATAL); 144 } 145 146 /** 147 * Instantiates the parser with a specific XML violation policy. 148 * @param xmlPolicy the policy 149 */ 150 public HtmlParser(XmlViolationPolicy xmlPolicy) { 151 setXmlPolicy(xmlPolicy); 152 } 153 154 /** 155 * This class wraps differnt tree builders depending on configuration. This 156 * method does the work of hiding this from the user of the class. 157 */ 158 private void lazyInit() { 159 if (tokenizer == null) { 160 if (streamabilityViolationPolicy == XmlViolationPolicy.ALLOW) { 161 this.saxTreeBuilder = new SAXTreeBuilder(); 162 this.treeBuilder = this.saxTreeBuilder; 163 this.saxStreamer = null; 164 } else { 165 this.saxStreamer = new SAXStreamer(); 166 this.treeBuilder = this.saxStreamer; 167 this.saxTreeBuilder = null; 168 } 169 this.tokenizer = new Tokenizer(treeBuilder); 170 this.tokenizer.setErrorHandler(errorHandler); 171 this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); 172 this.tokenizer.setCheckingNormalization(checkingNormalization); 173 this.tokenizer.setCommentPolicy(commentPolicy); 174 this.tokenizer.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); 175 this.tokenizer.setContentSpacePolicy(contentSpacePolicy); 176 this.tokenizer.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); 177 this.tokenizer.setMappingLangToXmlLang(mappingLangToXmlLang); 178 this.tokenizer.setXmlnsPolicy(xmlnsPolicy); 179 for (CharacterHandler characterHandler : characterHandlers) { 180 this.tokenizer.addCharacterHandler(characterHandler); 181 } 182 this.treeBuilder.setDoctypeExpectation(doctypeExpectation); 183 this.treeBuilder.setDocumentModeHandler(documentModeHandler); 184 this.treeBuilder.setIgnoringComments(lexicalHandler == null); 185 this.treeBuilder.setScriptingEnabled(scriptingEnabled); 186 this.treeBuilder.setReportingDoctype(reportingDoctype); 187 if (saxStreamer != null) { 188 saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() 189 : contentHandler); 190 saxStreamer.setLexicalHandler(lexicalHandler); 191 } 192 } 193 } 194 195 /** 196 * @see org.xml.sax.XMLReader#getContentHandler() 197 */ 198 public ContentHandler getContentHandler() { 199 return contentHandler; 200 } 201 202 /** 203 * @see org.xml.sax.XMLReader#getDTDHandler() 204 */ 205 public DTDHandler getDTDHandler() { 206 return dtdHandler; 207 } 208 209 /** 210 * @see org.xml.sax.XMLReader#getEntityResolver() 211 */ 212 public EntityResolver getEntityResolver() { 213 return entityResolver; 214 } 215 216 /** 217 * @see org.xml.sax.XMLReader#getErrorHandler() 218 */ 219 public ErrorHandler getErrorHandler() { 220 return errorHandler; 221 } 222 223 /** 224 * Exposes the configuration of the emulated XML parser as well as 225 * boolean-valued configuration without using non-<code>XMLReader</code> 226 * getters directly. 227 * 228 * <dl> 229 * <dt><code>http://xml.org/sax/features/external-general-entities</code></dt> 230 * <dd><code>false</code></dd> 231 * <dt><code>http://xml.org/sax/features/external-parameter-entities</code></dt> 232 * <dd><code>false</code></dd> 233 * <dt><code>http://xml.org/sax/features/is-standalone</code></dt> 234 * <dd><code>true</code></dd> 235 * <dt><code>http://xml.org/sax/features/lexical-handler/parameter-entities</code></dt> 236 * <dd><code>false</code></dd> 237 * <dt><code>http://xml.org/sax/features/namespaces</code></dt> 238 * <dd><code>true</code></dd> 239 * <dt><code>http://xml.org/sax/features/namespace-prefixes</code></dt> 240 * <dd><code>false</code></dd> 241 * <dt><code>http://xml.org/sax/features/resolve-dtd-uris</code></dt> 242 * <dd><code>true</code></dd> 243 * <dt><code>http://xml.org/sax/features/string-interning</code></dt> 244 * <dd><code>false</code></dd> 245 * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt> 246 * <dd><code>isCheckingNormalization</code></dd> 247 * <dt><code>http://xml.org/sax/features/use-attributes2</code></dt> 248 * <dd><code>false</code></dd> 249 * <dt><code>http://xml.org/sax/features/use-locator2</code></dt> 250 * <dd><code>false</code></dd> 251 * <dt><code>http://xml.org/sax/features/use-entity-resolver2</code></dt> 252 * <dd><code>false</code></dd> 253 * <dt><code>http://xml.org/sax/features/validation</code></dt> 254 * <dd><code>false</code></dd> 255 * <dt><code>http://xml.org/sax/features/xmlns-uris</code></dt> 256 * <dd><code>false</code></dd> 257 * <dt><code>http://xml.org/sax/features/xml-1.1</code></dt> 258 * <dd><code>false</code></dd> 259 * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt> 260 * <dd><code>isHtml4ModeCompatibleWithXhtml1Schemata</code></dd> 261 * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt> 262 * <dd><code>isMappingLangToXmlLang</code></dd> 263 * <dt><code>http://validator.nu/features/scripting-enabled</code></dt> 264 * <dd><code>isScriptingEnabled</code></dd> 265 * </dl> 266 * 267 * @param name 268 * feature URI string 269 * @return a value per the list above 270 * @see org.xml.sax.XMLReader#getFeature(java.lang.String) 271 */ 272 public boolean getFeature(String name) throws SAXNotRecognizedException, 273 SAXNotSupportedException { 274 if ("http://xml.org/sax/features/external-general-entities".equals(name)) { 275 return false; 276 } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) { 277 return false; 278 } else if ("http://xml.org/sax/features/is-standalone".equals(name)) { 279 return true; 280 } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) { 281 return false; 282 } else if ("http://xml.org/sax/features/namespaces".equals(name)) { 283 return true; 284 } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) { 285 return false; 286 } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) { 287 return true; // default value--applicable scenario never happens 288 } else if ("http://xml.org/sax/features/string-interning".equals(name)) { 289 return false; // XXX revisit 290 } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) { 291 return isCheckingNormalization(); // the checks aren't really per 292 // XML 1.1 293 } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) { 294 return false; 295 } else if ("http://xml.org/sax/features/use-locator2".equals(name)) { 296 return false; 297 } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) { 298 return false; 299 } else if ("http://xml.org/sax/features/validation".equals(name)) { 300 return false; 301 } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) { 302 return false; 303 } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) { 304 return false; 305 } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) { 306 return isHtml4ModeCompatibleWithXhtml1Schemata(); 307 } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) { 308 return isMappingLangToXmlLang(); 309 } else if ("http://validator.nu/features/scripting-enabled".equals(name)) { 310 return isScriptingEnabled(); 311 } else { 312 throw new SAXNotRecognizedException(); 313 } 314 } 315 316 /** 317 * Allows <code>XMLReader</code>-level access to non-boolean valued 318 * getters. 319 * 320 * <p> 321 * The properties are mapped as follows: 322 * 323 * <dl> 324 * <dt><code>http://xml.org/sax/properties/document-xml-version</code></dt> 325 * <dd><code>"1.0"</code></dd> 326 * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt> 327 * <dd><code>getLexicalHandler</code></dd> 328 * <dt><code>http://validator.nu/properties/content-space-policy</code></dt> 329 * <dd><code>getContentSpacePolicy</code></dd> 330 * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt> 331 * <dd><code>getContentNonXmlCharPolicy</code></dd> 332 * <dt><code>http://validator.nu/properties/comment-policy</code></dt> 333 * <dd><code>getCommentPolicy</code></dd> 334 * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt> 335 * <dd><code>getXmlnsPolicy</code></dd> 336 * <dt><code>http://validator.nu/properties/name-policy</code></dt> 337 * <dd><code>getNamePolicy</code></dd> 338 * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt> 339 * <dd><code>getStreamabilityViolationPolicy</code></dd> 340 * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt> 341 * <dd><code>getDocumentModeHandler</code></dd> 342 * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt> 343 * <dd><code>getDoctypeExpectation</code></dd> 344 * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt> 345 * </dl> 346 * 347 * @param name 348 * property URI string 349 * @return a value per the list above 350 * @see org.xml.sax.XMLReader#getProperty(java.lang.String) 351 */ 352 public Object getProperty(String name) throws SAXNotRecognizedException, 353 SAXNotSupportedException { 354 if ("http://xml.org/sax/properties/declaration-handler".equals(name)) { 355 throw new SAXNotSupportedException( 356 "This parser does not suppert DeclHandler."); 357 } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) { 358 return "1.0"; // Emulating an XML 1.1 parser is not supported. 359 } else if ("http://xml.org/sax/properties/dom-node".equals(name)) { 360 throw new SAXNotSupportedException( 361 "This parser does not walk the DOM."); 362 } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) { 363 return getLexicalHandler(); 364 } else if ("http://xml.org/sax/properties/xml-string".equals(name)) { 365 throw new SAXNotSupportedException( 366 "This parser does not expose the source as a string."); 367 } else if ("http://validator.nu/properties/content-space-policy".equals(name)) { 368 return getContentSpacePolicy(); 369 } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) { 370 return getContentNonXmlCharPolicy(); 371 } else if ("http://validator.nu/properties/comment-policy".equals(name)) { 372 return getCommentPolicy(); 373 } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) { 374 return getXmlnsPolicy(); 375 } else if ("http://validator.nu/properties/name-policy".equals(name)) { 376 return getNamePolicy(); 377 } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) { 378 return getStreamabilityViolationPolicy(); 379 } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) { 380 return getDocumentModeHandler(); 381 } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) { 382 return getDoctypeExpectation(); 383 } else if ("http://validator.nu/properties/xml-policy".equals(name)) { 384 throw new SAXNotSupportedException( 385 "Cannot get a convenience setter."); 386 } else { 387 throw new SAXNotRecognizedException(); 388 } 389 } 390 391 /** 392 * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource) 393 */ 394 public void parse(InputSource input) throws IOException, SAXException { 395 lazyInit(); 396 try { 397 treeBuilder.setFragmentContext(null); 398 tokenize(input); 399 } finally { 400 if (saxTreeBuilder != null) { 401 Document document = saxTreeBuilder.getDocument(); 402 if (document != null) { 403 new TreeParser(contentHandler, lexicalHandler).parse(document); 404 } 405 } 406 } 407 } 408 409 /** 410 * Parser a fragment. 411 * 412 * @param input the input to parse 413 * @param context the name of the context element 414 * @throws IOException 415 * @throws SAXException 416 */ 417 public void parseFragment(InputSource input, String context) 418 throws IOException, SAXException { 419 lazyInit(); 420 try { 421 treeBuilder.setFragmentContext(context); 422 tokenize(input); 423 } finally { 424 if (saxTreeBuilder != null) { 425 DocumentFragment fragment = saxTreeBuilder.getDocumentFragment(); 426 new TreeParser(contentHandler, lexicalHandler).parse(fragment); 427 } 428 } 429 } 430 431 /** 432 * @param is 433 * @throws SAXException 434 * @throws IOException 435 * @throws MalformedURLException 436 */ 437 private void tokenize(InputSource is) throws SAXException, IOException, MalformedURLException { 438 if (is == null) { 439 throw new IllegalArgumentException("Null input."); 440 } 441 if (is.getByteStream() == null && is.getCharacterStream() == null) { 442 String systemId = is.getSystemId(); 443 if (systemId == null) { 444 throw new IllegalArgumentException("No byte stream, no character stream nor URI."); 445 } 446 if (entityResolver != null) { 447 is = entityResolver.resolveEntity(is.getPublicId(), systemId); 448 } 449 if (is.getByteStream() == null || is.getCharacterStream() == null) { 450 is = new InputSource(); 451 is.setSystemId(systemId); 452 is.setByteStream(new URL(systemId).openStream()); 453 } 454 } 455 tokenizer.tokenize(is); 456 } 457 458 /** 459 * @see org.xml.sax.XMLReader#parse(java.lang.String) 460 */ 461 public void parse(String systemId) throws IOException, SAXException { 462 parse(new InputSource(systemId)); 463 } 464 465 /** 466 * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler) 467 */ 468 public void setContentHandler(ContentHandler handler) { 469 contentHandler = handler; 470 if (saxStreamer != null) { 471 saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() 472 : contentHandler); 473 } 474 } 475 476 /** 477 * Sets the lexical handler. 478 * @param handler the hander. 479 */ 480 public void setLexicalHandler(LexicalHandler handler) { 481 lexicalHandler = handler; 482 if (treeBuilder != null) { 483 treeBuilder.setIgnoringComments(handler == null); 484 if (saxStreamer != null) { 485 saxStreamer.setLexicalHandler(handler); 486 } 487 } 488 } 489 490 /** 491 * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler) 492 */ 493 public void setDTDHandler(DTDHandler handler) { 494 dtdHandler = handler; 495 } 496 497 /** 498 * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) 499 */ 500 public void setEntityResolver(EntityResolver resolver) { 501 entityResolver = resolver; 502 } 503 504 /** 505 * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) 506 */ 507 public void setErrorHandler(ErrorHandler handler) { 508 errorHandler = handler; 509 treeBuilderErrorHandler = handler; 510 if (tokenizer != null) { 511 tokenizer.setErrorHandler(handler); 512 treeBuilder.setErrorHandler(handler); 513 } 514 } 515 516 /** 517 * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) 518 * @deprecated For Validator.nu internal use 519 */ 520 public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) { 521 treeBuilderErrorHandler = handler; 522 if (tokenizer != null) { 523 treeBuilder.setErrorHandler(handler); 524 } 525 } 526 527 /** 528 * Sets a boolean feature without having to use non-<code>XMLReader</code> 529 * setters directly. 530 * 531 * <p> 532 * The supported features are: 533 * 534 * <dl> 535 * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt> 536 * <dd><code>setCheckingNormalization</code></dd> 537 * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt> 538 * <dd><code>setHtml4ModeCompatibleWithXhtml1Schemata</code></dd> 539 * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt> 540 * <dd><code>setMappingLangToXmlLang</code></dd> 541 * <dt><code>http://validator.nu/features/scripting-enabled</code></dt> 542 * <dd><code>setScriptingEnabled</code></dd> 543 * </dl> 544 * 545 * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean) 546 */ 547 public void setFeature(String name, boolean value) 548 throws SAXNotRecognizedException, SAXNotSupportedException { 549 if ("http://xml.org/sax/features/external-general-entities".equals(name)) { 550 throw new SAXNotSupportedException("Cannot set " + name + "."); 551 } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) { 552 throw new SAXNotSupportedException("Cannot set " + name + "."); 553 } else if ("http://xml.org/sax/features/is-standalone".equals(name)) { 554 throw new SAXNotSupportedException("Cannot set " + name + "."); 555 } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) { 556 throw new SAXNotSupportedException("Cannot set " + name + "."); 557 } else if ("http://xml.org/sax/features/namespaces".equals(name)) { 558 throw new SAXNotSupportedException("Cannot set " + name + "."); 559 } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) { 560 throw new SAXNotSupportedException("Cannot set " + name + "."); 561 } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) { 562 throw new SAXNotSupportedException("Cannot set " + name + "."); 563 } else if ("http://xml.org/sax/features/string-interning".equals(name)) { 564 throw new SAXNotSupportedException("Cannot set " + name + "."); 565 } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) { 566 setCheckingNormalization(value); 567 } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) { 568 throw new SAXNotSupportedException("Cannot set " + name + "."); 569 } else if ("http://xml.org/sax/features/use-locator2".equals(name)) { 570 throw new SAXNotSupportedException("Cannot set " + name + "."); 571 } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) { 572 throw new SAXNotSupportedException("Cannot set " + name + "."); 573 } else if ("http://xml.org/sax/features/validation".equals(name)) { 574 throw new SAXNotSupportedException("Cannot set " + name + "."); 575 } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) { 576 throw new SAXNotSupportedException("Cannot set " + name + "."); 577 } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) { 578 throw new SAXNotSupportedException("Cannot set " + name + "."); 579 } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) { 580 setHtml4ModeCompatibleWithXhtml1Schemata(value); 581 } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) { 582 setMappingLangToXmlLang(value); 583 } else if ("http://validator.nu/features/scripting-enabled".equals(name)) { 584 setScriptingEnabled(value); 585 } else { 586 throw new SAXNotRecognizedException(); 587 } 588 } 589 590 /** 591 * Sets a non-boolean property without having to use non-<code>XMLReader</code> 592 * setters directly. 593 * 594 * <dl> 595 * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt> 596 * <dd><code>setLexicalHandler</code></dd> 597 * <dt><code>http://validator.nu/properties/content-space-policy</code></dt> 598 * <dd><code>setContentSpacePolicy</code></dd> 599 * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt> 600 * <dd><code>setContentNonXmlCharPolicy</code></dd> 601 * <dt><code>http://validator.nu/properties/comment-policy</code></dt> 602 * <dd><code>setCommentPolicy</code></dd> 603 * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt> 604 * <dd><code>setXmlnsPolicy</code></dd> 605 * <dt><code>http://validator.nu/properties/name-policy</code></dt> 606 * <dd><code>setNamePolicy</code></dd> 607 * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt> 608 * <dd><code>setStreamabilityViolationPolicy</code></dd> 609 * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt> 610 * <dd><code>setDocumentModeHandler</code></dd> 611 * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt> 612 * <dd><code>setDoctypeExpectation</code></dd> 613 * <dt><code>http://validator.nu/properties/xml-policy</code></dt> 614 * <dd><code>setXmlPolicy</code></dd> 615 * </dl> 616 * 617 * @see org.xml.sax.XMLReader#setProperty(java.lang.String, 618 * java.lang.Object) 619 */ 620 public void setProperty(String name, Object value) 621 throws SAXNotRecognizedException, SAXNotSupportedException { 622 if ("http://xml.org/sax/properties/declaration-handler".equals(name)) { 623 throw new SAXNotSupportedException( 624 "This parser does not suppert DeclHandler."); 625 } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) { 626 throw new SAXNotSupportedException( 627 "Can't set document-xml-version."); 628 } else if ("http://xml.org/sax/properties/dom-node".equals(name)) { 629 throw new SAXNotSupportedException("Can't set dom-node."); 630 } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) { 631 setLexicalHandler((LexicalHandler) value); 632 } else if ("http://xml.org/sax/properties/xml-string".equals(name)) { 633 throw new SAXNotSupportedException("Can't set xml-string."); 634 } else if ("http://validator.nu/properties/content-space-policy".equals(name)) { 635 setContentSpacePolicy((XmlViolationPolicy) value); 636 } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) { 637 setContentNonXmlCharPolicy((XmlViolationPolicy) value); 638 } else if ("http://validator.nu/properties/comment-policy".equals(name)) { 639 setCommentPolicy((XmlViolationPolicy) value); 640 } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) { 641 setXmlnsPolicy((XmlViolationPolicy) value); 642 } else if ("http://validator.nu/properties/name-policy".equals(name)) { 643 setNamePolicy((XmlViolationPolicy) value); 644 } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) { 645 setStreamabilityViolationPolicy((XmlViolationPolicy) value); 646 } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) { 647 setDocumentModeHandler((DocumentModeHandler) value); 648 } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) { 649 setDoctypeExpectation((DoctypeExpectation) value); 650 } else if ("http://validator.nu/properties/xml-policy".equals(name)) { 651 setXmlPolicy((XmlViolationPolicy) value); 652 } else { 653 throw new SAXNotRecognizedException(); 654 } 655 } 656 657 /** 658 * Indicates whether NFC normalization of source is being checked. 659 * @return <code>true</code> if NFC normalization of source is being checked. 660 * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() 661 */ 662 public boolean isCheckingNormalization() { 663 return checkingNormalization; 664 } 665 666 /** 667 * Toggles the checking of the NFC normalization of source. 668 * @param enable <code>true</code> to check normalization 669 * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) 670 */ 671 public void setCheckingNormalization(boolean enable) { 672 this.checkingNormalization = enable; 673 if (tokenizer != null) { 674 tokenizer.setCheckingNormalization(checkingNormalization); 675 } 676 } 677 678 /** 679 * Sets the policy for consecutive hyphens in comments. 680 * @param commentPolicy the policy 681 * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 682 */ 683 public void setCommentPolicy(XmlViolationPolicy commentPolicy) { 684 this.commentPolicy = commentPolicy; 685 if (tokenizer != null) { 686 tokenizer.setCommentPolicy(commentPolicy); 687 } 688 } 689 690 /** 691 * Sets the policy for non-XML characters except white space. 692 * @param contentNonXmlCharPolicy the policy 693 * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 694 */ 695 public void setContentNonXmlCharPolicy( 696 XmlViolationPolicy contentNonXmlCharPolicy) { 697 this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; 698 if (tokenizer != null) { 699 tokenizer.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); 700 } 701 } 702 703 /** 704 * Sets the policy for non-XML white space. 705 * @param contentSpacePolicy the policy 706 * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 707 */ 708 public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { 709 this.contentSpacePolicy = contentSpacePolicy; 710 if (tokenizer != null) { 711 tokenizer.setContentSpacePolicy(contentSpacePolicy); 712 } 713 } 714 715 /** 716 * Whether the parser considers scripting to be enabled for noscript treatment. 717 * 718 * @return <code>true</code> if enabled 719 * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() 720 */ 721 public boolean isScriptingEnabled() { 722 return scriptingEnabled; 723 } 724 725 /** 726 * Sets whether the parser considers scripting to be enabled for noscript treatment. 727 * @param scriptingEnabled <code>true</code> to enable 728 * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) 729 */ 730 public void setScriptingEnabled(boolean scriptingEnabled) { 731 this.scriptingEnabled = scriptingEnabled; 732 if (treeBuilder != null) { 733 treeBuilder.setScriptingEnabled(scriptingEnabled); 734 } 735 } 736 737 /** 738 * Returns the doctype expectation. 739 * 740 * @return the doctypeExpectation 741 */ 742 public DoctypeExpectation getDoctypeExpectation() { 743 return doctypeExpectation; 744 } 745 746 /** 747 * Sets the doctype expectation. 748 * 749 * @param doctypeExpectation 750 * the doctypeExpectation to set 751 * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) 752 */ 753 public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { 754 this.doctypeExpectation = doctypeExpectation; 755 if (treeBuilder != null) { 756 treeBuilder.setDoctypeExpectation(doctypeExpectation); 757 } 758 } 759 760 /** 761 * Returns the document mode handler. 762 * 763 * @return the documentModeHandler 764 */ 765 public DocumentModeHandler getDocumentModeHandler() { 766 return documentModeHandler; 767 } 768 769 /** 770 * Sets the document mode handler. 771 * 772 * @param documentModeHandler 773 * the documentModeHandler to set 774 * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) 775 */ 776 public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { 777 this.documentModeHandler = documentModeHandler; 778 } 779 780 /** 781 * Returns the streamabilityViolationPolicy. 782 * 783 * @return the streamabilityViolationPolicy 784 */ 785 public XmlViolationPolicy getStreamabilityViolationPolicy() { 786 return streamabilityViolationPolicy; 787 } 788 789 /** 790 * Sets the streamabilityViolationPolicy. 791 * 792 * @param streamabilityViolationPolicy 793 * the streamabilityViolationPolicy to set 794 */ 795 public void setStreamabilityViolationPolicy( 796 XmlViolationPolicy streamabilityViolationPolicy) { 797 this.streamabilityViolationPolicy = streamabilityViolationPolicy; 798 } 799 800 /** 801 * Whether the HTML 4 mode reports boolean attributes in a way that repeats 802 * the name in the value. 803 * @param html4ModeCompatibleWithXhtml1Schemata 804 */ 805 public void setHtml4ModeCompatibleWithXhtml1Schemata( 806 boolean html4ModeCompatibleWithXhtml1Schemata) { 807 this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; 808 if (tokenizer != null) { 809 tokenizer.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); 810 } 811 } 812 813 /** 814 * Returns the <code>Locator</code> during parse. 815 * @return the <code>Locator</code> 816 */ 817 public Locator getDocumentLocator() { 818 return tokenizer; 819 } 820 821 /** 822 * Whether the HTML 4 mode reports boolean attributes in a way that repeats 823 * the name in the value. 824 * 825 * @return the html4ModeCompatibleWithXhtml1Schemata 826 */ 827 public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { 828 return html4ModeCompatibleWithXhtml1Schemata; 829 } 830 831 /** 832 * Whether <code>lang</code> is mapped to <code>xml:lang</code>. 833 * @param mappingLangToXmlLang 834 * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) 835 */ 836 public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { 837 this.mappingLangToXmlLang = mappingLangToXmlLang; 838 if (tokenizer != null) { 839 tokenizer.setMappingLangToXmlLang(mappingLangToXmlLang); 840 } 841 } 842 843 /** 844 * Whether <code>lang</code> is mapped to <code>xml:lang</code>. 845 * 846 * @return the mappingLangToXmlLang 847 */ 848 public boolean isMappingLangToXmlLang() { 849 return mappingLangToXmlLang; 850 } 851 852 /** 853 * Whether the <code>xmlns</code> attribute on the root element is 854 * passed to through. (FATAL not allowed.) 855 * @param xmlnsPolicy 856 * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 857 */ 858 public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { 859 if (xmlnsPolicy == XmlViolationPolicy.FATAL) { 860 throw new IllegalArgumentException("Can't use FATAL here."); 861 } 862 this.xmlnsPolicy = xmlnsPolicy; 863 if (tokenizer != null) { 864 tokenizer.setXmlnsPolicy(xmlnsPolicy); 865 } 866 } 867 868 /** 869 * Returns the xmlnsPolicy. 870 * 871 * @return the xmlnsPolicy 872 */ 873 public XmlViolationPolicy getXmlnsPolicy() { 874 return xmlnsPolicy; 875 } 876 877 /** 878 * Returns the lexicalHandler. 879 * 880 * @return the lexicalHandler 881 */ 882 public LexicalHandler getLexicalHandler() { 883 return lexicalHandler; 884 } 885 886 /** 887 * Returns the commentPolicy. 888 * 889 * @return the commentPolicy 890 */ 891 public XmlViolationPolicy getCommentPolicy() { 892 return commentPolicy; 893 } 894 895 /** 896 * Returns the contentNonXmlCharPolicy. 897 * 898 * @return the contentNonXmlCharPolicy 899 */ 900 public XmlViolationPolicy getContentNonXmlCharPolicy() { 901 return contentNonXmlCharPolicy; 902 } 903 904 /** 905 * Returns the contentSpacePolicy. 906 * 907 * @return the contentSpacePolicy 908 */ 909 public XmlViolationPolicy getContentSpacePolicy() { 910 return contentSpacePolicy; 911 } 912 913 /** 914 * @param reportingDoctype 915 * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) 916 */ 917 public void setReportingDoctype(boolean reportingDoctype) { 918 this.reportingDoctype = reportingDoctype; 919 if (treeBuilder != null) { 920 treeBuilder.setReportingDoctype(reportingDoctype); 921 } 922 } 923 924 /** 925 * Returns the reportingDoctype. 926 * 927 * @return the reportingDoctype 928 */ 929 public boolean isReportingDoctype() { 930 return reportingDoctype; 931 } 932 933 /** 934 * The policy for non-NCName element and attribute names. 935 * @param namePolicy 936 * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 937 */ 938 public void setNamePolicy(XmlViolationPolicy namePolicy) { 939 this.namePolicy = namePolicy; 940 if (tokenizer != null) { 941 tokenizer.setNamePolicy(namePolicy); 942 } 943 } 944 945 /** 946 * This is a catch-all convenience method for setting name, xmlns, content space, 947 * content non-XML char and comment policies in one go. This does not affect the 948 * streamability policy or doctype reporting. 949 * 950 * @param xmlPolicy 951 */ 952 public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { 953 setNamePolicy(xmlPolicy); 954 setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); 955 setContentSpacePolicy(xmlPolicy); 956 setContentNonXmlCharPolicy(xmlPolicy); 957 setCommentPolicy(xmlPolicy); 958 setBogusXmlnsPolicy(xmlPolicy); 959 } 960 961 /** 962 * The policy for non-NCName element and attribute names. 963 * 964 * @return the namePolicy 965 */ 966 public XmlViolationPolicy getNamePolicy() { 967 return namePolicy; 968 } 969 970 /** 971 * Sets the policy for forbidden <code>xmlns</code> attributes. 972 * @param bogusXmlnsPolicy the policy 973 * @see nu.validator.htmlparser.impl.Tokenizer#setBogusXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 974 */ 975 public void setBogusXmlnsPolicy( 976 XmlViolationPolicy bogusXmlnsPolicy) { 977 this.bogusXmlnsPolicy = bogusXmlnsPolicy; 978 if (tokenizer != null) { 979 tokenizer.setBogusXmlnsPolicy(bogusXmlnsPolicy); 980 } 981 } 982 983 /** 984 * Returns the bogusXmlnsPolicy. 985 * 986 * @return the bogusXmlnsPolicy 987 */ 988 public XmlViolationPolicy getBogusXmlnsPolicy() { 989 return bogusXmlnsPolicy; 990 } 991 992 public void addCharacterHandler(CharacterHandler characterHandler) { 993 this.characterHandlers.add(characterHandler); 994 if (tokenizer != null) { 995 tokenizer.addCharacterHandler(characterHandler); 996 } 997 } 998 }