001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * Copyright (c) 2007-2008 Mozilla Foundation 004 * 005 * Permission is hereby granted, free of charge, to any person obtaining a 006 * copy of this software and associated documentation files (the "Software"), 007 * to deal in the Software without restriction, including without limitation 008 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 009 * and/or sell copies of the Software, and to permit persons to whom the 010 * Software is furnished to do so, subject to the following conditions: 011 * 012 * The above copyright notice and this permission notice shall be included in 013 * all copies or substantial portions of the Software. 014 * 015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 021 * DEALINGS IN THE SOFTWARE. 022 */ 023 024 package nu.validator.htmlparser.xom; 025 026 import java.io.File; 027 import java.io.FileInputStream; 028 import java.io.IOException; 029 import java.io.InputStream; 030 import java.io.Reader; 031 import java.io.StringReader; 032 import java.net.MalformedURLException; 033 import java.net.URL; 034 import java.util.LinkedList; 035 import java.util.List; 036 037 import nu.validator.htmlparser.common.CharacterHandler; 038 import nu.validator.htmlparser.common.DoctypeExpectation; 039 import nu.validator.htmlparser.common.DocumentModeHandler; 040 import nu.validator.htmlparser.common.Heuristics; 041 import nu.validator.htmlparser.common.TokenHandler; 042 import nu.validator.htmlparser.common.TransitionHandler; 043 import nu.validator.htmlparser.common.XmlViolationPolicy; 044 import nu.validator.htmlparser.impl.ErrorReportingTokenizer; 045 import nu.validator.htmlparser.impl.Tokenizer; 046 import nu.validator.htmlparser.io.Driver; 047 import nu.xom.Builder; 048 import nu.xom.Document; 049 import nu.xom.Nodes; 050 import nu.xom.ParsingException; 051 import nu.xom.ValidityException; 052 053 import org.xml.sax.EntityResolver; 054 import org.xml.sax.ErrorHandler; 055 import org.xml.sax.InputSource; 056 import org.xml.sax.Locator; 057 import org.xml.sax.SAXException; 058 import org.xml.sax.SAXParseException; 059 060 /** 061 * This class implements an HTML5 parser that exposes data through the XOM 062 * interface. 063 * 064 * <p>By default, when using the constructor without arguments, the 065 * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible 066 * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general 067 * XML violation policy. It is possible to treat XML 1.0 infoset violations 068 * as fatal by setting the general XML violation policy to <code>FATAL</code>. 069 * 070 * <p>The doctype is not represented in the tree. 071 * 072 * <p>The document mode is represented via the <code>Mode</code> 073 * interface on the <code>Document</code> node if the node implements 074 * that interface (depends on the used node factory). 075 * 076 * <p>The form pointer is stored if the node factory supports storing it. 077 * 078 * <p>This package has its own node factory class because the official 079 * XOM node factory may return multiple nodes instead of one confusing 080 * the assumptions of the DOM-oriented HTML5 parsing algorithm. 081 * 082 * @version $Id$ 083 * @author hsivonen 084 */ 085 public class HtmlBuilder extends Builder { 086 087 private Driver driver; 088 089 private final XOMTreeBuilder treeBuilder; 090 091 private final SimpleNodeFactory simpleNodeFactory; 092 093 private EntityResolver entityResolver; 094 095 private ErrorHandler errorHandler = null; 096 097 private DocumentModeHandler documentModeHandler = null; 098 099 private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; 100 101 private boolean checkingNormalization = false; 102 103 private boolean scriptingEnabled = false; 104 105 private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>(); 106 107 private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; 108 109 private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; 110 111 private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; 112 113 private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; 114 115 private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; 116 117 private boolean html4ModeCompatibleWithXhtml1Schemata = false; 118 119 private boolean mappingLangToXmlLang = false; 120 121 private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL; 122 123 private boolean reportingDoctype = true; 124 125 private ErrorHandler treeBuilderErrorHandler = null; 126 127 private Heuristics heuristics = Heuristics.NONE; 128 129 private TransitionHandler transitionHandler = null; 130 131 /** 132 * Constructor with default node factory and fatal XML violation policy. 133 */ 134 public HtmlBuilder() { 135 this(new SimpleNodeFactory(), XmlViolationPolicy.FATAL); 136 } 137 138 /** 139 * Constructor with given node factory and fatal XML violation policy. 140 * @param nodeFactory the factory 141 */ 142 public HtmlBuilder(SimpleNodeFactory nodeFactory) { 143 this(nodeFactory, XmlViolationPolicy.FATAL); 144 } 145 146 /** 147 * Constructor with default node factory and given XML violation policy. 148 * @param xmlPolicy the policy 149 */ 150 public HtmlBuilder(XmlViolationPolicy xmlPolicy) { 151 this(new SimpleNodeFactory(), xmlPolicy); 152 } 153 154 /** 155 * Constructor with given node factory and given XML violation policy. 156 * @param nodeFactory the factory 157 * @param xmlPolicy the policy 158 */ 159 public HtmlBuilder(SimpleNodeFactory nodeFactory, XmlViolationPolicy xmlPolicy) { 160 super(); 161 this.simpleNodeFactory = nodeFactory; 162 this.treeBuilder = new XOMTreeBuilder(nodeFactory); 163 this.driver = null; 164 this.driver.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); 165 setXmlPolicy(xmlPolicy); 166 } 167 168 private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) { 169 if (errorHandler == null && transitionHandler == null 170 && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) { 171 return new Tokenizer(handler, newAttributesEachTime); 172 } else { 173 return new ErrorReportingTokenizer(handler, newAttributesEachTime); 174 } 175 } 176 177 /** 178 * This class wraps different tree builders depending on configuration. This 179 * method does the work of hiding this from the user of the class. 180 */ 181 private void lazyInit() { 182 if (driver == null) { 183 this.driver = new Driver(newTokenizer(treeBuilder, false)); 184 this.driver.setErrorHandler(errorHandler); 185 this.driver.setTransitionHandler(transitionHandler); 186 this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); 187 this.driver.setCheckingNormalization(checkingNormalization); 188 this.driver.setCommentPolicy(commentPolicy); 189 this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); 190 this.driver.setContentSpacePolicy(contentSpacePolicy); 191 this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); 192 this.driver.setMappingLangToXmlLang(mappingLangToXmlLang); 193 this.driver.setXmlnsPolicy(xmlnsPolicy); 194 this.driver.setHeuristics(heuristics); 195 for (CharacterHandler characterHandler : characterHandlers) { 196 this.driver.addCharacterHandler(characterHandler); 197 } 198 this.treeBuilder.setDoctypeExpectation(doctypeExpectation); 199 this.treeBuilder.setDocumentModeHandler(documentModeHandler); 200 this.treeBuilder.setScriptingEnabled(scriptingEnabled); 201 this.treeBuilder.setReportingDoctype(reportingDoctype); 202 this.treeBuilder.setNamePolicy(namePolicy); 203 } 204 } 205 206 207 private void tokenize(InputSource is) throws ParsingException, IOException, 208 MalformedURLException { 209 try { 210 if (is == null) { 211 throw new IllegalArgumentException("Null input."); 212 } 213 if (is.getByteStream() == null && is.getCharacterStream() == null) { 214 String systemId = is.getSystemId(); 215 if (systemId == null) { 216 throw new IllegalArgumentException( 217 "No byte stream, no character stream nor URI."); 218 } 219 if (entityResolver != null) { 220 is = entityResolver.resolveEntity(is.getPublicId(), 221 systemId); 222 } 223 if (is.getByteStream() == null 224 || is.getCharacterStream() == null) { 225 is = new InputSource(); 226 is.setSystemId(systemId); 227 is.setByteStream(new URL(systemId).openStream()); 228 } 229 } 230 driver.tokenize(is); 231 } catch (SAXParseException e) { 232 throw new ParsingException(e.getMessage(), e.getSystemId(), e.getLineNumber(), 233 e.getColumnNumber(), e); 234 } catch (SAXException e) { 235 throw new ParsingException(e.getMessage(), e); 236 } 237 } 238 239 /** 240 * Parse from SAX <code>InputSource</code>. 241 * @param is the <code>InputSource</code> 242 * @return the document 243 * @throws ParsingException in case of an XML violation 244 * @throws IOException if IO goes wrang 245 */ 246 public Document build(InputSource is) throws ParsingException, IOException { 247 lazyInit(); 248 treeBuilder.setFragmentContext(null); 249 tokenize(is); 250 return treeBuilder.getDocument(); 251 } 252 253 /** 254 * Parse a fragment from SAX <code>InputSource</code>. 255 * @param is the <code>InputSource</code> 256 * @param context the name of the context element 257 * @return the fragment 258 * @throws ParsingException in case of an XML violation 259 * @throws IOException if IO goes wrang 260 */ 261 public Nodes buildFragment(InputSource is, String context) 262 throws IOException, ParsingException { 263 lazyInit(); 264 treeBuilder.setFragmentContext(context.intern()); 265 tokenize(is); 266 return treeBuilder.getDocumentFragment(); 267 } 268 269 270 /** 271 * Parse from <code>File</code>. 272 * @param file the file 273 * @return the document 274 * @throws ParsingException in case of an XML violation 275 * @throws IOException if IO goes wrang 276 * @see nu.xom.Builder#build(java.io.File) 277 */ 278 @Override 279 public Document build(File file) throws ParsingException, 280 ValidityException, IOException { 281 return build(new FileInputStream(file), file.toURI().toASCIIString()); 282 } 283 284 /** 285 * Parse from <code>InputStream</code>. 286 * @param stream the stream 287 * @param uri the base URI 288 * @return the document 289 * @throws ParsingException in case of an XML violation 290 * @throws IOException if IO goes wrang 291 * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String) 292 */ 293 @Override 294 public Document build(InputStream stream, String uri) 295 throws ParsingException, ValidityException, IOException { 296 InputSource is = new InputSource(stream); 297 is.setSystemId(uri); 298 return build(is); 299 } 300 301 /** 302 * Parse from <code>InputStream</code>. 303 * @param stream the stream 304 * @return the document 305 * @throws ParsingException in case of an XML violation 306 * @throws IOException if IO goes wrang 307 * @see nu.xom.Builder#build(java.io.InputStream) 308 */ 309 @Override 310 public Document build(InputStream stream) throws ParsingException, 311 ValidityException, IOException { 312 return build(new InputSource(stream)); 313 } 314 315 /** 316 * Parse from <code>Reader</code>. 317 * @param stream the reader 318 * @param uri the base URI 319 * @return the document 320 * @throws ParsingException in case of an XML violation 321 * @throws IOException if IO goes wrang 322 * @see nu.xom.Builder#build(java.io.Reader, java.lang.String) 323 */ 324 @Override 325 public Document build(Reader stream, String uri) throws ParsingException, 326 ValidityException, IOException { 327 InputSource is = new InputSource(stream); 328 is.setSystemId(uri); 329 return build(is); 330 } 331 332 /** 333 * Parse from <code>Reader</code>. 334 * @param stream the reader 335 * @return the document 336 * @throws ParsingException in case of an XML violation 337 * @throws IOException if IO goes wrang 338 * @see nu.xom.Builder#build(java.io.Reader) 339 */ 340 @Override 341 public Document build(Reader stream) throws ParsingException, 342 ValidityException, IOException { 343 return build(new InputSource(stream)); 344 } 345 346 /** 347 * Parse from <code>String</code>. 348 * @param content the HTML source as string 349 * @param uri the base URI 350 * @return the document 351 * @throws ParsingException in case of an XML violation 352 * @throws IOException if IO goes wrang 353 * @see nu.xom.Builder#build(java.lang.String, java.lang.String) 354 */ 355 @Override 356 public Document build(String content, String uri) throws ParsingException, 357 ValidityException, IOException { 358 return build(new StringReader(content), uri); 359 } 360 361 /** 362 * Parse from URI. 363 * @param uri the URI of the document 364 * @return the document 365 * @throws ParsingException in case of an XML violation 366 * @throws IOException if IO goes wrang 367 * @see nu.xom.Builder#build(java.lang.String) 368 */ 369 @Override 370 public Document build(String uri) throws ParsingException, 371 ValidityException, IOException { 372 return build(new InputSource(uri)); 373 } 374 375 /** 376 * Gets the node factory 377 */ 378 public SimpleNodeFactory getSimpleNodeFactory() { 379 return simpleNodeFactory; 380 } 381 382 /** 383 * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) 384 */ 385 public void setEntityResolver(EntityResolver resolver) { 386 entityResolver = resolver; 387 } 388 389 /** 390 * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) 391 */ 392 public void setErrorHandler(ErrorHandler handler) { 393 errorHandler = handler; 394 treeBuilderErrorHandler = handler; 395 driver = null; 396 } 397 398 public void setTransitionHander(TransitionHandler handler) { 399 transitionHandler = handler; 400 driver = null; 401 } 402 403 /** 404 * Indicates whether NFC normalization of source is being checked. 405 * @return <code>true</code> if NFC normalization of source is being checked. 406 * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() 407 */ 408 public boolean isCheckingNormalization() { 409 return checkingNormalization; 410 } 411 412 /** 413 * Toggles the checking of the NFC normalization of source. 414 * @param enable <code>true</code> to check normalization 415 * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) 416 */ 417 public void setCheckingNormalization(boolean enable) { 418 this.checkingNormalization = enable; 419 if (driver != null) { 420 driver.setCheckingNormalization(checkingNormalization); 421 } 422 } 423 424 /** 425 * Sets the policy for consecutive hyphens in comments. 426 * @param commentPolicy the policy 427 * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 428 */ 429 public void setCommentPolicy(XmlViolationPolicy commentPolicy) { 430 this.commentPolicy = commentPolicy; 431 if (driver != null) { 432 driver.setCommentPolicy(commentPolicy); 433 } 434 } 435 436 /** 437 * Sets the policy for non-XML characters except white space. 438 * @param contentNonXmlCharPolicy the policy 439 * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 440 */ 441 public void setContentNonXmlCharPolicy( 442 XmlViolationPolicy contentNonXmlCharPolicy) { 443 this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; 444 driver = null; 445 } 446 447 /** 448 * Sets the policy for non-XML white space. 449 * @param contentSpacePolicy the policy 450 * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 451 */ 452 public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { 453 this.contentSpacePolicy = contentSpacePolicy; 454 if (driver != null) { 455 driver.setContentSpacePolicy(contentSpacePolicy); 456 } 457 } 458 459 /** 460 * Whether the parser considers scripting to be enabled for noscript treatment. 461 * 462 * @return <code>true</code> if enabled 463 * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() 464 */ 465 public boolean isScriptingEnabled() { 466 return scriptingEnabled; 467 } 468 469 /** 470 * Sets whether the parser considers scripting to be enabled for noscript treatment. 471 * @param scriptingEnabled <code>true</code> to enable 472 * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) 473 */ 474 public void setScriptingEnabled(boolean scriptingEnabled) { 475 this.scriptingEnabled = scriptingEnabled; 476 if (treeBuilder != null) { 477 treeBuilder.setScriptingEnabled(scriptingEnabled); 478 } 479 } 480 481 /** 482 * Returns the doctype expectation. 483 * 484 * @return the doctypeExpectation 485 */ 486 public DoctypeExpectation getDoctypeExpectation() { 487 return doctypeExpectation; 488 } 489 490 /** 491 * Sets the doctype expectation. 492 * 493 * @param doctypeExpectation 494 * the doctypeExpectation to set 495 * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) 496 */ 497 public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { 498 this.doctypeExpectation = doctypeExpectation; 499 if (treeBuilder != null) { 500 treeBuilder.setDoctypeExpectation(doctypeExpectation); 501 } 502 } 503 504 /** 505 * Returns the document mode handler. 506 * 507 * @return the documentModeHandler 508 */ 509 public DocumentModeHandler getDocumentModeHandler() { 510 return documentModeHandler; 511 } 512 513 /** 514 * Sets the document mode handler. 515 * 516 * @param documentModeHandler 517 * the documentModeHandler to set 518 * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) 519 */ 520 public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { 521 this.documentModeHandler = documentModeHandler; 522 } 523 524 /** 525 * Returns the streamabilityViolationPolicy. 526 * 527 * @return the streamabilityViolationPolicy 528 */ 529 public XmlViolationPolicy getStreamabilityViolationPolicy() { 530 return streamabilityViolationPolicy; 531 } 532 533 /** 534 * Sets the streamabilityViolationPolicy. 535 * 536 * @param streamabilityViolationPolicy 537 * the streamabilityViolationPolicy to set 538 */ 539 public void setStreamabilityViolationPolicy( 540 XmlViolationPolicy streamabilityViolationPolicy) { 541 this.streamabilityViolationPolicy = streamabilityViolationPolicy; 542 driver = null; 543 } 544 545 /** 546 * Whether the HTML 4 mode reports boolean attributes in a way that repeats 547 * the name in the value. 548 * @param html4ModeCompatibleWithXhtml1Schemata 549 */ 550 public void setHtml4ModeCompatibleWithXhtml1Schemata( 551 boolean html4ModeCompatibleWithXhtml1Schemata) { 552 this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; 553 if (driver != null) { 554 driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); 555 } 556 } 557 558 /** 559 * Returns the <code>Locator</code> during parse. 560 * @return the <code>Locator</code> 561 */ 562 public Locator getDocumentLocator() { 563 return driver.getDocumentLocator(); 564 } 565 566 /** 567 * Whether the HTML 4 mode reports boolean attributes in a way that repeats 568 * the name in the value. 569 * 570 * @return the html4ModeCompatibleWithXhtml1Schemata 571 */ 572 public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { 573 return html4ModeCompatibleWithXhtml1Schemata; 574 } 575 576 /** 577 * Whether <code>lang</code> is mapped to <code>xml:lang</code>. 578 * @param mappingLangToXmlLang 579 * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) 580 */ 581 public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { 582 this.mappingLangToXmlLang = mappingLangToXmlLang; 583 if (driver != null) { 584 driver.setMappingLangToXmlLang(mappingLangToXmlLang); 585 } 586 } 587 588 /** 589 * Whether <code>lang</code> is mapped to <code>xml:lang</code>. 590 * 591 * @return the mappingLangToXmlLang 592 */ 593 public boolean isMappingLangToXmlLang() { 594 return mappingLangToXmlLang; 595 } 596 597 /** 598 * Whether the <code>xmlns</code> attribute on the root element is 599 * passed to through. (FATAL not allowed.) 600 * @param xmlnsPolicy 601 * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 602 */ 603 public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { 604 if (xmlnsPolicy == XmlViolationPolicy.FATAL) { 605 throw new IllegalArgumentException("Can't use FATAL here."); 606 } 607 this.xmlnsPolicy = xmlnsPolicy; 608 if (driver != null) { 609 driver.setXmlnsPolicy(xmlnsPolicy); 610 } 611 } 612 613 /** 614 * Returns the xmlnsPolicy. 615 * 616 * @return the xmlnsPolicy 617 */ 618 public XmlViolationPolicy getXmlnsPolicy() { 619 return xmlnsPolicy; 620 } 621 622 /** 623 * Returns the commentPolicy. 624 * 625 * @return the commentPolicy 626 */ 627 public XmlViolationPolicy getCommentPolicy() { 628 return commentPolicy; 629 } 630 631 /** 632 * Returns the contentNonXmlCharPolicy. 633 * 634 * @return the contentNonXmlCharPolicy 635 */ 636 public XmlViolationPolicy getContentNonXmlCharPolicy() { 637 return contentNonXmlCharPolicy; 638 } 639 640 /** 641 * Returns the contentSpacePolicy. 642 * 643 * @return the contentSpacePolicy 644 */ 645 public XmlViolationPolicy getContentSpacePolicy() { 646 return contentSpacePolicy; 647 } 648 649 /** 650 * @param reportingDoctype 651 * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) 652 */ 653 public void setReportingDoctype(boolean reportingDoctype) { 654 this.reportingDoctype = reportingDoctype; 655 if (treeBuilder != null) { 656 treeBuilder.setReportingDoctype(reportingDoctype); 657 } 658 } 659 660 /** 661 * Returns the reportingDoctype. 662 * 663 * @return the reportingDoctype 664 */ 665 public boolean isReportingDoctype() { 666 return reportingDoctype; 667 } 668 669 /** 670 * The policy for non-NCName element and attribute names. 671 * @param namePolicy 672 * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) 673 */ 674 public void setNamePolicy(XmlViolationPolicy namePolicy) { 675 this.namePolicy = namePolicy; 676 if (driver != null) { 677 driver.setNamePolicy(namePolicy); 678 treeBuilder.setNamePolicy(namePolicy); 679 } 680 } 681 682 /** 683 * Sets the encoding sniffing heuristics. 684 * 685 * @param heuristics the heuristics to set 686 * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) 687 */ 688 public void setHeuristics(Heuristics heuristics) { 689 this.heuristics = heuristics; 690 if (driver != null) { 691 driver.setHeuristics(heuristics); 692 } 693 } 694 695 public Heuristics getHeuristics() { 696 return this.heuristics; 697 } 698 699 /** 700 * This is a catch-all convenience method for setting name, xmlns, content space, 701 * content non-XML char and comment policies in one go. This does not affect the 702 * streamability policy or doctype reporting. 703 * 704 * @param xmlPolicy 705 */ 706 public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { 707 setNamePolicy(xmlPolicy); 708 setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); 709 setContentSpacePolicy(xmlPolicy); 710 setContentNonXmlCharPolicy(xmlPolicy); 711 setCommentPolicy(xmlPolicy); 712 } 713 714 /** 715 * The policy for non-NCName element and attribute names. 716 * 717 * @return the namePolicy 718 */ 719 public XmlViolationPolicy getNamePolicy() { 720 return namePolicy; 721 } 722 723 /** 724 * Does nothing. 725 * @deprecated 726 */ 727 public void setBogusXmlnsPolicy( 728 XmlViolationPolicy bogusXmlnsPolicy) { 729 } 730 731 /** 732 * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>. 733 * @deprecated 734 * @return <code>XmlViolationPolicy.ALTER_INFOSET</code> 735 */ 736 public XmlViolationPolicy getBogusXmlnsPolicy() { 737 return XmlViolationPolicy.ALTER_INFOSET; 738 } 739 740 public void addCharacterHandler(CharacterHandler characterHandler) { 741 this.characterHandlers.add(characterHandler); 742 if (driver != null) { 743 driver.addCharacterHandler(characterHandler); 744 } 745 } 746 747 748 /** 749 * Sets whether comment nodes appear in the tree. 750 * @param ignoreComments <code>true</code> to ignore comments 751 * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) 752 */ 753 public void setIgnoringComments(boolean ignoreComments) { 754 treeBuilder.setIgnoringComments(ignoreComments); 755 } 756 757 }