001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * Copyright (c) 2007-2008 Mozilla Foundation 004 * 005 * Permission is hereby granted, free of charge, to any person obtaining a 006 * copy of this software and associated documentation files (the "Software"), 007 * to deal in the Software without restriction, including without limitation 008 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 009 * and/or sell copies of the Software, and to permit persons to whom the 010 * Software is furnished to do so, subject to the following conditions: 011 * 012 * The above copyright notice and this permission notice shall be included in 013 * all copies or substantial portions of the Software. 014 * 015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 021 * DEALINGS IN THE SOFTWARE. 022 */ 023 024 package nu.validator.htmlparser.dom; 025 026 import java.io.IOException; 027 import java.net.MalformedURLException; 028 import java.net.URL; 029 import java.util.LinkedList; 030 import java.util.List; 031 032 import javax.xml.parsers.DocumentBuilder; 033 import javax.xml.parsers.DocumentBuilderFactory; 034 import javax.xml.parsers.ParserConfigurationException; 035 036 import nu.validator.htmlparser.common.CharacterHandler; 037 import nu.validator.htmlparser.common.DoctypeExpectation; 038 import nu.validator.htmlparser.common.DocumentModeHandler; 039 import nu.validator.htmlparser.common.Heuristics; 040 import nu.validator.htmlparser.common.TokenHandler; 041 import nu.validator.htmlparser.common.TransitionHandler; 042 import nu.validator.htmlparser.common.XmlViolationPolicy; 043 import nu.validator.htmlparser.impl.ErrorReportingTokenizer; 044 import nu.validator.htmlparser.impl.Tokenizer; 045 import nu.validator.htmlparser.io.Driver; 046 047 import org.w3c.dom.DOMImplementation; 048 import org.w3c.dom.Document; 049 import org.w3c.dom.DocumentFragment; 050 import org.xml.sax.EntityResolver; 051 import org.xml.sax.ErrorHandler; 052 import org.xml.sax.InputSource; 053 import org.xml.sax.Locator; 054 import org.xml.sax.SAXException; 055 056 /** 057 * This class implements an HTML5 parser that exposes data through the DOM 058 * interface. 059 * 060 * <p>By default, when using the constructor without arguments, the 061 * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible 062 * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general 063 * XML violation policy. To make the parser support non-conforming HTML fully 064 * per the HTML 5 spec while on the other hand potentially violating the SAX2 065 * API contract, set the general XML violation policy to <code>ALLOW</code>. 066 * This does not work with a standard DOM implementation. 067 * It is possible to treat XML 1.0 infoset violations as fatal by setting 068 * the general XML violation policy to <code>FATAL</code>. 069 * 070 * <p>The doctype is not represented in the tree. 071 * 072 * <p>The document mode is represented as user data <code>DocumentMode</code> 073 * object with the key <code>nu.validator.document-mode</code> on the document 074 * node. 075 * 076 * <p>The form pointer is also stored as user data with the key 077 * <code>nu.validator.form-pointer</code>. 078 * 079 * @version $Id$ 080 * @author hsivonen 081 */ 082 public class HtmlDocumentBuilder extends DocumentBuilder { 083 084 /** 085 * Returns the JAXP DOM implementation. 086 * 087 * @return the JAXP DOM implementation 088 */ 089 private static DOMImplementation jaxpDOMImplementation() { 090 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 091 factory.setNamespaceAware(true); 092 DocumentBuilder builder; 093 try { 094 builder = factory.newDocumentBuilder(); 095 } catch (ParserConfigurationException e) { 096 throw new RuntimeException(e); 097 } 098 return builder.getDOMImplementation(); 099 } 100 101 /** 102 * The tokenizer. 103 */ 104 private Driver driver; 105 106 /** 107 * The tree builder. 108 */ 109 private final DOMTreeBuilder treeBuilder; 110 111 /** 112 * The DOM impl. 113 */ 114 private final DOMImplementation implementation; 115 116 /** 117 * The entity resolver. 118 */ 119 private EntityResolver entityResolver; 120 121 private ErrorHandler errorHandler = null; 122 123 private DocumentModeHandler documentModeHandler = null; 124 125 private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; 126 127 private boolean checkingNormalization = false; 128 129 private boolean scriptingEnabled = false; 130 131 private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>(); 132 133 private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; 134 135 private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; 136 137 private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; 138 139 private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; 140 141 private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; 142 143 private boolean html4ModeCompatibleWithXhtml1Schemata = false; 144 145 private boolean mappingLangToXmlLang = false; 146 147 private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL; 148 149 private boolean reportingDoctype = true; 150 151 private ErrorHandler treeBuilderErrorHandler = null; 152 153 private Heuristics heuristics = Heuristics.NONE; 154 155 private TransitionHandler transitionHandler = null; 156 157 /** 158 * Instantiates the document builder with a specific DOM 159 * implementation and XML violation policy. 160 * 161 * @param implementation 162 * the DOM implementation 163 * @param xmlPolicy the policy 164 */ 165 public HtmlDocumentBuilder(DOMImplementation implementation, 166 XmlViolationPolicy xmlPolicy) { 167 this.implementation = implementation; 168 this.treeBuilder = new DOMTreeBuilder(implementation); 169 this.driver = null; 170 setXmlPolicy(xmlPolicy); 171 } 172 173 /** 174 * Instantiates the document builder with a specific DOM implementation 175 * and the infoset-altering XML violation policy. 176 * 177 * @param implementation 178 * the DOM implementation 179 */ 180 public HtmlDocumentBuilder(DOMImplementation implementation) { 181 this(implementation, XmlViolationPolicy.ALTER_INFOSET); 182 } 183 184 /** 185 * Instantiates the document builder with the JAXP DOM implementation 186 * and the infoset-altering XML violation policy. 187 */ 188 public HtmlDocumentBuilder() { 189 this(XmlViolationPolicy.ALTER_INFOSET); 190 } 191 192 /** 193 * Instantiates the document builder with the JAXP DOM implementation 194 * and a specific XML violation policy. 195 * @param xmlPolicy the policy 196 */ 197 public HtmlDocumentBuilder(XmlViolationPolicy xmlPolicy) { 198 this(jaxpDOMImplementation(), xmlPolicy); 199 } 200 201 202 private Tokenizer newTokenizer(TokenHandler handler, 203 boolean newAttributesEachTime) { 204 if (errorHandler == null && transitionHandler == null 205 && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) { 206 return new Tokenizer(handler, newAttributesEachTime); 207 } else { 208 return new ErrorReportingTokenizer(handler, newAttributesEachTime); 209 } 210 } 211 212 /** 213 * This class wraps different tree builders depending on configuration. This 214 * method does the work of hiding this from the user of the class. 215 */ 216 private void lazyInit() { 217 if (driver == null) { 218 this.driver = new Driver(newTokenizer(treeBuilder, false)); 219 this.driver.setErrorHandler(errorHandler); 220 this.driver.setTransitionHandler(transitionHandler); 221 this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); 222 this.driver.setCheckingNormalization(checkingNormalization); 223 this.driver.setCommentPolicy(commentPolicy); 224 this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); 225 this.driver.setContentSpacePolicy(contentSpacePolicy); 226 this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); 227 this.driver.setMappingLangToXmlLang(mappingLangToXmlLang); 228 this.driver.setXmlnsPolicy(xmlnsPolicy); 229 this.driver.setHeuristics(heuristics); 230 for (CharacterHandler characterHandler : characterHandlers) { 231 this.driver.addCharacterHandler(characterHandler); 232 } 233 this.treeBuilder.setDoctypeExpectation(doctypeExpectation); 234 this.treeBuilder.setDocumentModeHandler(documentModeHandler); 235 this.treeBuilder.setScriptingEnabled(scriptingEnabled); 236 this.treeBuilder.setReportingDoctype(reportingDoctype); 237 this.treeBuilder.setNamePolicy(namePolicy); 238 } 239 } 240 241 /** 242 * Tokenizes the input source. 243 * 244 * @param is the source 245 * @throws SAXException if stuff goes wrong 246 * @throws IOException if IO goes wrong 247 * @throws MalformedURLException if the system ID is malformed and the entity resolver is <code>null</code> 248 */ 249 private void tokenize(InputSource is) throws SAXException, IOException, 250 MalformedURLException { 251 if (is == null) { 252 throw new IllegalArgumentException("Null input."); 253 } 254 if (is.getByteStream() == null && is.getCharacterStream() == null) { 255 String systemId = is.getSystemId(); 256 if (systemId == null) { 257 throw new IllegalArgumentException( 258 "No byte stream, no character stream nor URI."); 259 } 260 if (entityResolver != null) { 261 is = entityResolver.resolveEntity(is.getPublicId(), systemId); 262 } 263 if (is.getByteStream() == null || is.getCharacterStream() == null) { 264 is = new InputSource(); 265 is.setSystemId(systemId); 266 is.setByteStream(new URL(systemId).openStream()); 267 } 268 } 269 if (driver == null) lazyInit(); 270 driver.tokenize(is); 271 } 272 273 /** 274 * Returns the DOM implementation 275 * @return the DOM implementation 276 * @see javax.xml.parsers.DocumentBuilder#getDOMImplementation() 277 */ 278 @Override public DOMImplementation getDOMImplementation() { 279 return implementation; 280 } 281 282 /** 283 * Returns <code>true</code>. 284 * @return <code>true</code> 285 * @see javax.xml.parsers.DocumentBuilder#isNamespaceAware() 286 */ 287 @Override public boolean isNamespaceAware() { 288 return true; 289 } 290 291 /** 292 * Returns <code>false</code> 293 * @return <code>false</code> 294 * @see javax.xml.parsers.DocumentBuilder#isValidating() 295 */ 296 @Override public boolean isValidating() { 297 return false; 298 } 299 300 /** 301 * For API compatibility. 302 * @see javax.xml.parsers.DocumentBuilder#newDocument() 303 */ 304 @Override public Document newDocument() { 305 return implementation.createDocument(null, null, null); 306 } 307 308 /** 309 * Parses a document from a SAX <code>InputSource</code>. 310 * @param is the source 311 * @return the doc 312 * @throws SAXException if stuff goes wrong 313 * @throws IOException if IO goes wrong 314 * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource) 315 */ 316 @Override public Document parse(InputSource is) throws SAXException, 317 IOException { 318 treeBuilder.setFragmentContext(null); 319 tokenize(is); 320 return treeBuilder.getDocument(); 321 } 322 323 /** 324 * Parses a document fragment from a SAX <code>InputSource</code>. 325 * @param is the source 326 * @param context the context element name 327 * @return the doc 328 * @throws SAXException if stuff goes wrong 329 * @throws IOException if IO goes wrong 330 */ 331 public DocumentFragment parseFragment(InputSource is, String context) 332 throws IOException, SAXException { 333 treeBuilder.setFragmentContext(context.intern()); 334 tokenize(is); 335 return treeBuilder.getDocumentFragment(); 336 } 337 338 /** 339 * Sets the entity resolver for URI-only inputs. 340 * @param resolver the resolver 341 * @see javax.xml.parsers.DocumentBuilder#setEntityResolver(org.xml.sax.EntityResolver) 342 */ 343 @Override public void setEntityResolver(EntityResolver resolver) { 344 this.entityResolver = resolver; 345 } 346 347 /** 348 * Sets the error handler. 349 * @param errorHandler the handler 350 * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler) 351 */ 352 @Override public void setErrorHandler(ErrorHandler errorHandler) { 353 treeBuilder.setErrorHandler(errorHandler); 354 driver.setErrorHandler(errorHandler); 355 } 356 357 public void setTransitionHander(TransitionHandler handler) { 358 transitionHandler = handler; 359 driver = null; 360 } 361 362 /** 363 * Indicates whether NFC normalization of source is being checked. 364 * @return <code>true</code> if NFC normalization of source is being checked. 365 * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() 366 */ 367 public boolean isCheckingNormalization() { 368 return checkingNormalization; 369 } 370 371 /** 372 * Toggles the checking of the NFC normalization of source. 373 * @param enable <code>true</code> to check normalization 374 * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) 375 */ 376 public void setCheckingNormalization(boolean enable) { 377 this.checkingNormalization = enable; 378 if (driver != null) { 379 driver.setCheckingNormalization(checkingNormalization); 380 } 381 } 382 383 /** 384 * Sets the policy for consecutive hyphens in comments. 385 * @param commentPolicy the policy 386 * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 387 */ 388 public void setCommentPolicy(XmlViolationPolicy commentPolicy) { 389 this.commentPolicy = commentPolicy; 390 if (driver != null) { 391 driver.setCommentPolicy(commentPolicy); 392 } 393 } 394 395 /** 396 * Sets the policy for non-XML characters except white space. 397 * @param contentNonXmlCharPolicy the policy 398 * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 399 */ 400 public void setContentNonXmlCharPolicy( 401 XmlViolationPolicy contentNonXmlCharPolicy) { 402 this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; 403 driver = null; 404 } 405 406 /** 407 * Sets the policy for non-XML white space. 408 * @param contentSpacePolicy the policy 409 * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 410 */ 411 public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { 412 this.contentSpacePolicy = contentSpacePolicy; 413 if (driver != null) { 414 driver.setContentSpacePolicy(contentSpacePolicy); 415 } 416 } 417 418 /** 419 * Whether the parser considers scripting to be enabled for noscript treatment. 420 * 421 * @return <code>true</code> if enabled 422 * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() 423 */ 424 public boolean isScriptingEnabled() { 425 return scriptingEnabled; 426 } 427 428 /** 429 * Sets whether the parser considers scripting to be enabled for noscript treatment. 430 * @param scriptingEnabled <code>true</code> to enable 431 * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) 432 */ 433 public void setScriptingEnabled(boolean scriptingEnabled) { 434 this.scriptingEnabled = scriptingEnabled; 435 if (treeBuilder != null) { 436 treeBuilder.setScriptingEnabled(scriptingEnabled); 437 } 438 } 439 440 /** 441 * Returns the doctype expectation. 442 * 443 * @return the doctypeExpectation 444 */ 445 public DoctypeExpectation getDoctypeExpectation() { 446 return doctypeExpectation; 447 } 448 449 /** 450 * Sets the doctype expectation. 451 * 452 * @param doctypeExpectation 453 * the doctypeExpectation to set 454 * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) 455 */ 456 public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { 457 this.doctypeExpectation = doctypeExpectation; 458 if (treeBuilder != null) { 459 treeBuilder.setDoctypeExpectation(doctypeExpectation); 460 } 461 } 462 463 /** 464 * Returns the document mode handler. 465 * 466 * @return the documentModeHandler 467 */ 468 public DocumentModeHandler getDocumentModeHandler() { 469 return documentModeHandler; 470 } 471 472 /** 473 * Sets the document mode handler. 474 * 475 * @param documentModeHandler 476 * the documentModeHandler to set 477 * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) 478 */ 479 public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { 480 this.documentModeHandler = documentModeHandler; 481 } 482 483 /** 484 * Returns the streamabilityViolationPolicy. 485 * 486 * @return the streamabilityViolationPolicy 487 */ 488 public XmlViolationPolicy getStreamabilityViolationPolicy() { 489 return streamabilityViolationPolicy; 490 } 491 492 /** 493 * Sets the streamabilityViolationPolicy. 494 * 495 * @param streamabilityViolationPolicy 496 * the streamabilityViolationPolicy to set 497 */ 498 public void setStreamabilityViolationPolicy( 499 XmlViolationPolicy streamabilityViolationPolicy) { 500 this.streamabilityViolationPolicy = streamabilityViolationPolicy; 501 driver = null; 502 } 503 504 /** 505 * Whether the HTML 4 mode reports boolean attributes in a way that repeats 506 * the name in the value. 507 * @param html4ModeCompatibleWithXhtml1Schemata 508 */ 509 public void setHtml4ModeCompatibleWithXhtml1Schemata( 510 boolean html4ModeCompatibleWithXhtml1Schemata) { 511 this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; 512 if (driver != null) { 513 driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); 514 } 515 } 516 517 /** 518 * Returns the <code>Locator</code> during parse. 519 * @return the <code>Locator</code> 520 */ 521 public Locator getDocumentLocator() { 522 return driver.getDocumentLocator(); 523 } 524 525 /** 526 * Whether the HTML 4 mode reports boolean attributes in a way that repeats 527 * the name in the value. 528 * 529 * @return the html4ModeCompatibleWithXhtml1Schemata 530 */ 531 public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { 532 return html4ModeCompatibleWithXhtml1Schemata; 533 } 534 535 /** 536 * Whether <code>lang</code> is mapped to <code>xml:lang</code>. 537 * @param mappingLangToXmlLang 538 * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) 539 */ 540 public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { 541 this.mappingLangToXmlLang = mappingLangToXmlLang; 542 if (driver != null) { 543 driver.setMappingLangToXmlLang(mappingLangToXmlLang); 544 } 545 } 546 547 /** 548 * Whether <code>lang</code> is mapped to <code>xml:lang</code>. 549 * 550 * @return the mappingLangToXmlLang 551 */ 552 public boolean isMappingLangToXmlLang() { 553 return mappingLangToXmlLang; 554 } 555 556 /** 557 * Whether the <code>xmlns</code> attribute on the root element is 558 * passed to through. (FATAL not allowed.) 559 * @param xmlnsPolicy 560 * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 561 */ 562 public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { 563 if (xmlnsPolicy == XmlViolationPolicy.FATAL) { 564 throw new IllegalArgumentException("Can't use FATAL here."); 565 } 566 this.xmlnsPolicy = xmlnsPolicy; 567 if (driver != null) { 568 driver.setXmlnsPolicy(xmlnsPolicy); 569 } 570 } 571 572 /** 573 * Returns the xmlnsPolicy. 574 * 575 * @return the xmlnsPolicy 576 */ 577 public XmlViolationPolicy getXmlnsPolicy() { 578 return xmlnsPolicy; 579 } 580 581 /** 582 * Returns the commentPolicy. 583 * 584 * @return the commentPolicy 585 */ 586 public XmlViolationPolicy getCommentPolicy() { 587 return commentPolicy; 588 } 589 590 /** 591 * Returns the contentNonXmlCharPolicy. 592 * 593 * @return the contentNonXmlCharPolicy 594 */ 595 public XmlViolationPolicy getContentNonXmlCharPolicy() { 596 return contentNonXmlCharPolicy; 597 } 598 599 /** 600 * Returns the contentSpacePolicy. 601 * 602 * @return the contentSpacePolicy 603 */ 604 public XmlViolationPolicy getContentSpacePolicy() { 605 return contentSpacePolicy; 606 } 607 608 /** 609 * @param reportingDoctype 610 * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) 611 */ 612 public void setReportingDoctype(boolean reportingDoctype) { 613 this.reportingDoctype = reportingDoctype; 614 if (treeBuilder != null) { 615 treeBuilder.setReportingDoctype(reportingDoctype); 616 } 617 } 618 619 /** 620 * Returns the reportingDoctype. 621 * 622 * @return the reportingDoctype 623 */ 624 public boolean isReportingDoctype() { 625 return reportingDoctype; 626 } 627 628 /** 629 * The policy for non-NCName element and attribute names. 630 * @param namePolicy 631 * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 632 */ 633 public void setNamePolicy(XmlViolationPolicy namePolicy) { 634 this.namePolicy = namePolicy; 635 if (driver != null) { 636 driver.setNamePolicy(namePolicy); 637 treeBuilder.setNamePolicy(namePolicy); 638 } 639 } 640 641 /** 642 * Sets the encoding sniffing heuristics. 643 * 644 * @param heuristics the heuristics to set 645 * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) 646 */ 647 public void setHeuristics(Heuristics heuristics) { 648 this.heuristics = heuristics; 649 if (driver != null) { 650 driver.setHeuristics(heuristics); 651 } 652 } 653 654 public Heuristics getHeuristics() { 655 return this.heuristics; 656 } 657 658 /** 659 * This is a catch-all convenience method for setting name, xmlns, content space, 660 * content non-XML char and comment policies in one go. This does not affect the 661 * streamability policy or doctype reporting. 662 * 663 * @param xmlPolicy 664 */ 665 public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { 666 setNamePolicy(xmlPolicy); 667 setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); 668 setContentSpacePolicy(xmlPolicy); 669 setContentNonXmlCharPolicy(xmlPolicy); 670 setCommentPolicy(xmlPolicy); 671 } 672 673 /** 674 * The policy for non-NCName element and attribute names. 675 * 676 * @return the namePolicy 677 */ 678 public XmlViolationPolicy getNamePolicy() { 679 return namePolicy; 680 } 681 682 /** 683 * Does nothing. 684 * @deprecated 685 */ 686 public void setBogusXmlnsPolicy( 687 XmlViolationPolicy bogusXmlnsPolicy) { 688 } 689 690 /** 691 * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>. 692 * @deprecated 693 * @return <code>XmlViolationPolicy.ALTER_INFOSET</code> 694 */ 695 public XmlViolationPolicy getBogusXmlnsPolicy() { 696 return XmlViolationPolicy.ALTER_INFOSET; 697 } 698 699 public void addCharacterHandler(CharacterHandler characterHandler) { 700 this.characterHandlers.add(characterHandler); 701 if (driver != null) { 702 driver.addCharacterHandler(characterHandler); 703 } 704 } 705 706 707 /** 708 * Sets whether comment nodes appear in the tree. 709 * @param ignoreComments <code>true</code> to ignore comments 710 * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) 711 */ 712 public void setIgnoringComments(boolean ignoreComments) { 713 treeBuilder.setIgnoringComments(ignoreComments); 714 } 715 716 }