001 /*
002 * Copyright (c) 2007 Henri Sivonen
003 * Copyright (c) 2007-2008 Mozilla Foundation
004 *
005 * Permission is hereby granted, free of charge, to any person obtaining a
006 * copy of this software and associated documentation files (the "Software"),
007 * to deal in the Software without restriction, including without limitation
008 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
009 * and/or sell copies of the Software, and to permit persons to whom the
010 * Software is furnished to do so, subject to the following conditions:
011 *
012 * The above copyright notice and this permission notice shall be included in
013 * all copies or substantial portions of the Software.
014 *
015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
021 * DEALINGS IN THE SOFTWARE.
022 */
023
024 package nu.validator.htmlparser.xom;
025
026 import java.io.File;
027 import java.io.FileInputStream;
028 import java.io.IOException;
029 import java.io.InputStream;
030 import java.io.Reader;
031 import java.io.StringReader;
032 import java.net.MalformedURLException;
033 import java.net.URL;
034 import java.util.LinkedList;
035 import java.util.List;
036
037 import nu.validator.htmlparser.common.CharacterHandler;
038 import nu.validator.htmlparser.common.DoctypeExpectation;
039 import nu.validator.htmlparser.common.DocumentModeHandler;
040 import nu.validator.htmlparser.common.Heuristics;
041 import nu.validator.htmlparser.common.TokenHandler;
042 import nu.validator.htmlparser.common.TransitionHandler;
043 import nu.validator.htmlparser.common.XmlViolationPolicy;
044 import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
045 import nu.validator.htmlparser.impl.Tokenizer;
046 import nu.validator.htmlparser.io.Driver;
047 import nu.xom.Builder;
048 import nu.xom.Document;
049 import nu.xom.Nodes;
050 import nu.xom.ParsingException;
051 import nu.xom.ValidityException;
052
053 import org.xml.sax.EntityResolver;
054 import org.xml.sax.ErrorHandler;
055 import org.xml.sax.InputSource;
056 import org.xml.sax.Locator;
057 import org.xml.sax.SAXException;
058 import org.xml.sax.SAXParseException;
059
060 /**
061 * This class implements an HTML5 parser that exposes data through the XOM
062 * interface.
063 *
064 * <p>By default, when using the constructor without arguments, the
065 * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
066 * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general
067 * XML violation policy. It is possible to treat XML 1.0 infoset violations
068 * as fatal by setting the general XML violation policy to <code>FATAL</code>.
069 *
070 * <p>The doctype is not represented in the tree.
071 *
072 * <p>The document mode is represented via the <code>Mode</code>
073 * interface on the <code>Document</code> node if the node implements
074 * that interface (depends on the used node factory).
075 *
076 * <p>The form pointer is stored if the node factory supports storing it.
077 *
078 * <p>This package has its own node factory class because the official
079 * XOM node factory may return multiple nodes instead of one confusing
080 * the assumptions of the DOM-oriented HTML5 parsing algorithm.
081 *
082 * @version $Id$
083 * @author hsivonen
084 */
085 public class HtmlBuilder extends Builder {
086
087 private Driver driver;
088
089 private final XOMTreeBuilder treeBuilder;
090
091 private final SimpleNodeFactory simpleNodeFactory;
092
093 private EntityResolver entityResolver;
094
095 private ErrorHandler errorHandler = null;
096
097 private DocumentModeHandler documentModeHandler = null;
098
099 private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
100
101 private boolean checkingNormalization = false;
102
103 private boolean scriptingEnabled = false;
104
105 private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
106
107 private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
108
109 private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
110
111 private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
112
113 private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
114
115 private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
116
117 private boolean html4ModeCompatibleWithXhtml1Schemata = false;
118
119 private boolean mappingLangToXmlLang = false;
120
121 private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
122
123 private boolean reportingDoctype = true;
124
125 private ErrorHandler treeBuilderErrorHandler = null;
126
127 private Heuristics heuristics = Heuristics.NONE;
128
129 private TransitionHandler transitionHandler = null;
130
131 /**
132 * Constructor with default node factory and fatal XML violation policy.
133 */
134 public HtmlBuilder() {
135 this(new SimpleNodeFactory(), XmlViolationPolicy.FATAL);
136 }
137
138 /**
139 * Constructor with given node factory and fatal XML violation policy.
140 * @param nodeFactory the factory
141 */
142 public HtmlBuilder(SimpleNodeFactory nodeFactory) {
143 this(nodeFactory, XmlViolationPolicy.FATAL);
144 }
145
146 /**
147 * Constructor with default node factory and given XML violation policy.
148 * @param xmlPolicy the policy
149 */
150 public HtmlBuilder(XmlViolationPolicy xmlPolicy) {
151 this(new SimpleNodeFactory(), xmlPolicy);
152 }
153
154 /**
155 * Constructor with given node factory and given XML violation policy.
156 * @param nodeFactory the factory
157 * @param xmlPolicy the policy
158 */
159 public HtmlBuilder(SimpleNodeFactory nodeFactory, XmlViolationPolicy xmlPolicy) {
160 super();
161 this.simpleNodeFactory = nodeFactory;
162 this.treeBuilder = new XOMTreeBuilder(nodeFactory);
163 this.driver = null;
164 this.driver.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
165 setXmlPolicy(xmlPolicy);
166 }
167
168 private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) {
169 if (errorHandler == null && transitionHandler == null
170 && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
171 return new Tokenizer(handler, newAttributesEachTime);
172 } else {
173 return new ErrorReportingTokenizer(handler, newAttributesEachTime);
174 }
175 }
176
177 /**
178 * This class wraps different tree builders depending on configuration. This
179 * method does the work of hiding this from the user of the class.
180 */
181 private void lazyInit() {
182 if (driver == null) {
183 this.driver = new Driver(newTokenizer(treeBuilder, false));
184 this.driver.setErrorHandler(errorHandler);
185 this.driver.setTransitionHandler(transitionHandler);
186 this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
187 this.driver.setCheckingNormalization(checkingNormalization);
188 this.driver.setCommentPolicy(commentPolicy);
189 this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
190 this.driver.setContentSpacePolicy(contentSpacePolicy);
191 this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
192 this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
193 this.driver.setXmlnsPolicy(xmlnsPolicy);
194 this.driver.setHeuristics(heuristics);
195 for (CharacterHandler characterHandler : characterHandlers) {
196 this.driver.addCharacterHandler(characterHandler);
197 }
198 this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
199 this.treeBuilder.setDocumentModeHandler(documentModeHandler);
200 this.treeBuilder.setScriptingEnabled(scriptingEnabled);
201 this.treeBuilder.setReportingDoctype(reportingDoctype);
202 this.treeBuilder.setNamePolicy(namePolicy);
203 }
204 }
205
206
207 private void tokenize(InputSource is) throws ParsingException, IOException,
208 MalformedURLException {
209 try {
210 if (is == null) {
211 throw new IllegalArgumentException("Null input.");
212 }
213 if (is.getByteStream() == null && is.getCharacterStream() == null) {
214 String systemId = is.getSystemId();
215 if (systemId == null) {
216 throw new IllegalArgumentException(
217 "No byte stream, no character stream nor URI.");
218 }
219 if (entityResolver != null) {
220 is = entityResolver.resolveEntity(is.getPublicId(),
221 systemId);
222 }
223 if (is.getByteStream() == null
224 || is.getCharacterStream() == null) {
225 is = new InputSource();
226 is.setSystemId(systemId);
227 is.setByteStream(new URL(systemId).openStream());
228 }
229 }
230 driver.tokenize(is);
231 } catch (SAXParseException e) {
232 throw new ParsingException(e.getMessage(), e.getSystemId(), e.getLineNumber(),
233 e.getColumnNumber(), e);
234 } catch (SAXException e) {
235 throw new ParsingException(e.getMessage(), e);
236 }
237 }
238
239 /**
240 * Parse from SAX <code>InputSource</code>.
241 * @param is the <code>InputSource</code>
242 * @return the document
243 * @throws ParsingException in case of an XML violation
244 * @throws IOException if IO goes wrang
245 */
246 public Document build(InputSource is) throws ParsingException, IOException {
247 lazyInit();
248 treeBuilder.setFragmentContext(null);
249 tokenize(is);
250 return treeBuilder.getDocument();
251 }
252
253 /**
254 * Parse a fragment from SAX <code>InputSource</code>.
255 * @param is the <code>InputSource</code>
256 * @param context the name of the context element
257 * @return the fragment
258 * @throws ParsingException in case of an XML violation
259 * @throws IOException if IO goes wrang
260 */
261 public Nodes buildFragment(InputSource is, String context)
262 throws IOException, ParsingException {
263 lazyInit();
264 treeBuilder.setFragmentContext(context.intern());
265 tokenize(is);
266 return treeBuilder.getDocumentFragment();
267 }
268
269
270 /**
271 * Parse from <code>File</code>.
272 * @param file the file
273 * @return the document
274 * @throws ParsingException in case of an XML violation
275 * @throws IOException if IO goes wrang
276 * @see nu.xom.Builder#build(java.io.File)
277 */
278 @Override
279 public Document build(File file) throws ParsingException,
280 ValidityException, IOException {
281 return build(new FileInputStream(file), file.toURI().toASCIIString());
282 }
283
284 /**
285 * Parse from <code>InputStream</code>.
286 * @param stream the stream
287 * @param uri the base URI
288 * @return the document
289 * @throws ParsingException in case of an XML violation
290 * @throws IOException if IO goes wrang
291 * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String)
292 */
293 @Override
294 public Document build(InputStream stream, String uri)
295 throws ParsingException, ValidityException, IOException {
296 InputSource is = new InputSource(stream);
297 is.setSystemId(uri);
298 return build(is);
299 }
300
301 /**
302 * Parse from <code>InputStream</code>.
303 * @param stream the stream
304 * @return the document
305 * @throws ParsingException in case of an XML violation
306 * @throws IOException if IO goes wrang
307 * @see nu.xom.Builder#build(java.io.InputStream)
308 */
309 @Override
310 public Document build(InputStream stream) throws ParsingException,
311 ValidityException, IOException {
312 return build(new InputSource(stream));
313 }
314
315 /**
316 * Parse from <code>Reader</code>.
317 * @param stream the reader
318 * @param uri the base URI
319 * @return the document
320 * @throws ParsingException in case of an XML violation
321 * @throws IOException if IO goes wrang
322 * @see nu.xom.Builder#build(java.io.Reader, java.lang.String)
323 */
324 @Override
325 public Document build(Reader stream, String uri) throws ParsingException,
326 ValidityException, IOException {
327 InputSource is = new InputSource(stream);
328 is.setSystemId(uri);
329 return build(is);
330 }
331
332 /**
333 * Parse from <code>Reader</code>.
334 * @param stream the reader
335 * @return the document
336 * @throws ParsingException in case of an XML violation
337 * @throws IOException if IO goes wrang
338 * @see nu.xom.Builder#build(java.io.Reader)
339 */
340 @Override
341 public Document build(Reader stream) throws ParsingException,
342 ValidityException, IOException {
343 return build(new InputSource(stream));
344 }
345
346 /**
347 * Parse from <code>String</code>.
348 * @param content the HTML source as string
349 * @param uri the base URI
350 * @return the document
351 * @throws ParsingException in case of an XML violation
352 * @throws IOException if IO goes wrang
353 * @see nu.xom.Builder#build(java.lang.String, java.lang.String)
354 */
355 @Override
356 public Document build(String content, String uri) throws ParsingException,
357 ValidityException, IOException {
358 return build(new StringReader(content), uri);
359 }
360
361 /**
362 * Parse from URI.
363 * @param uri the URI of the document
364 * @return the document
365 * @throws ParsingException in case of an XML violation
366 * @throws IOException if IO goes wrang
367 * @see nu.xom.Builder#build(java.lang.String)
368 */
369 @Override
370 public Document build(String uri) throws ParsingException,
371 ValidityException, IOException {
372 return build(new InputSource(uri));
373 }
374
375 /**
376 * Gets the node factory
377 */
378 public SimpleNodeFactory getSimpleNodeFactory() {
379 return simpleNodeFactory;
380 }
381
382 /**
383 * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
384 */
385 public void setEntityResolver(EntityResolver resolver) {
386 entityResolver = resolver;
387 }
388
389 /**
390 * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
391 */
392 public void setErrorHandler(ErrorHandler handler) {
393 errorHandler = handler;
394 treeBuilderErrorHandler = handler;
395 driver = null;
396 }
397
398 public void setTransitionHander(TransitionHandler handler) {
399 transitionHandler = handler;
400 driver = null;
401 }
402
403 /**
404 * Indicates whether NFC normalization of source is being checked.
405 * @return <code>true</code> if NFC normalization of source is being checked.
406 * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
407 */
408 public boolean isCheckingNormalization() {
409 return checkingNormalization;
410 }
411
412 /**
413 * Toggles the checking of the NFC normalization of source.
414 * @param enable <code>true</code> to check normalization
415 * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
416 */
417 public void setCheckingNormalization(boolean enable) {
418 this.checkingNormalization = enable;
419 if (driver != null) {
420 driver.setCheckingNormalization(checkingNormalization);
421 }
422 }
423
424 /**
425 * Sets the policy for consecutive hyphens in comments.
426 * @param commentPolicy the policy
427 * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
428 */
429 public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
430 this.commentPolicy = commentPolicy;
431 if (driver != null) {
432 driver.setCommentPolicy(commentPolicy);
433 }
434 }
435
436 /**
437 * Sets the policy for non-XML characters except white space.
438 * @param contentNonXmlCharPolicy the policy
439 * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
440 */
441 public void setContentNonXmlCharPolicy(
442 XmlViolationPolicy contentNonXmlCharPolicy) {
443 this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
444 driver = null;
445 }
446
447 /**
448 * Sets the policy for non-XML white space.
449 * @param contentSpacePolicy the policy
450 * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
451 */
452 public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
453 this.contentSpacePolicy = contentSpacePolicy;
454 if (driver != null) {
455 driver.setContentSpacePolicy(contentSpacePolicy);
456 }
457 }
458
459 /**
460 * Whether the parser considers scripting to be enabled for noscript treatment.
461 *
462 * @return <code>true</code> if enabled
463 * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
464 */
465 public boolean isScriptingEnabled() {
466 return scriptingEnabled;
467 }
468
469 /**
470 * Sets whether the parser considers scripting to be enabled for noscript treatment.
471 * @param scriptingEnabled <code>true</code> to enable
472 * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
473 */
474 public void setScriptingEnabled(boolean scriptingEnabled) {
475 this.scriptingEnabled = scriptingEnabled;
476 if (treeBuilder != null) {
477 treeBuilder.setScriptingEnabled(scriptingEnabled);
478 }
479 }
480
481 /**
482 * Returns the doctype expectation.
483 *
484 * @return the doctypeExpectation
485 */
486 public DoctypeExpectation getDoctypeExpectation() {
487 return doctypeExpectation;
488 }
489
490 /**
491 * Sets the doctype expectation.
492 *
493 * @param doctypeExpectation
494 * the doctypeExpectation to set
495 * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
496 */
497 public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
498 this.doctypeExpectation = doctypeExpectation;
499 if (treeBuilder != null) {
500 treeBuilder.setDoctypeExpectation(doctypeExpectation);
501 }
502 }
503
504 /**
505 * Returns the document mode handler.
506 *
507 * @return the documentModeHandler
508 */
509 public DocumentModeHandler getDocumentModeHandler() {
510 return documentModeHandler;
511 }
512
513 /**
514 * Sets the document mode handler.
515 *
516 * @param documentModeHandler
517 * the documentModeHandler to set
518 * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
519 */
520 public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
521 this.documentModeHandler = documentModeHandler;
522 }
523
524 /**
525 * Returns the streamabilityViolationPolicy.
526 *
527 * @return the streamabilityViolationPolicy
528 */
529 public XmlViolationPolicy getStreamabilityViolationPolicy() {
530 return streamabilityViolationPolicy;
531 }
532
533 /**
534 * Sets the streamabilityViolationPolicy.
535 *
536 * @param streamabilityViolationPolicy
537 * the streamabilityViolationPolicy to set
538 */
539 public void setStreamabilityViolationPolicy(
540 XmlViolationPolicy streamabilityViolationPolicy) {
541 this.streamabilityViolationPolicy = streamabilityViolationPolicy;
542 driver = null;
543 }
544
545 /**
546 * Whether the HTML 4 mode reports boolean attributes in a way that repeats
547 * the name in the value.
548 * @param html4ModeCompatibleWithXhtml1Schemata
549 */
550 public void setHtml4ModeCompatibleWithXhtml1Schemata(
551 boolean html4ModeCompatibleWithXhtml1Schemata) {
552 this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
553 if (driver != null) {
554 driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
555 }
556 }
557
558 /**
559 * Returns the <code>Locator</code> during parse.
560 * @return the <code>Locator</code>
561 */
562 public Locator getDocumentLocator() {
563 return driver.getDocumentLocator();
564 }
565
566 /**
567 * Whether the HTML 4 mode reports boolean attributes in a way that repeats
568 * the name in the value.
569 *
570 * @return the html4ModeCompatibleWithXhtml1Schemata
571 */
572 public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
573 return html4ModeCompatibleWithXhtml1Schemata;
574 }
575
576 /**
577 * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
578 * @param mappingLangToXmlLang
579 * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
580 */
581 public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
582 this.mappingLangToXmlLang = mappingLangToXmlLang;
583 if (driver != null) {
584 driver.setMappingLangToXmlLang(mappingLangToXmlLang);
585 }
586 }
587
588 /**
589 * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
590 *
591 * @return the mappingLangToXmlLang
592 */
593 public boolean isMappingLangToXmlLang() {
594 return mappingLangToXmlLang;
595 }
596
597 /**
598 * Whether the <code>xmlns</code> attribute on the root element is
599 * passed to through. (FATAL not allowed.)
600 * @param xmlnsPolicy
601 * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
602 */
603 public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
604 if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
605 throw new IllegalArgumentException("Can't use FATAL here.");
606 }
607 this.xmlnsPolicy = xmlnsPolicy;
608 if (driver != null) {
609 driver.setXmlnsPolicy(xmlnsPolicy);
610 }
611 }
612
613 /**
614 * Returns the xmlnsPolicy.
615 *
616 * @return the xmlnsPolicy
617 */
618 public XmlViolationPolicy getXmlnsPolicy() {
619 return xmlnsPolicy;
620 }
621
622 /**
623 * Returns the commentPolicy.
624 *
625 * @return the commentPolicy
626 */
627 public XmlViolationPolicy getCommentPolicy() {
628 return commentPolicy;
629 }
630
631 /**
632 * Returns the contentNonXmlCharPolicy.
633 *
634 * @return the contentNonXmlCharPolicy
635 */
636 public XmlViolationPolicy getContentNonXmlCharPolicy() {
637 return contentNonXmlCharPolicy;
638 }
639
640 /**
641 * Returns the contentSpacePolicy.
642 *
643 * @return the contentSpacePolicy
644 */
645 public XmlViolationPolicy getContentSpacePolicy() {
646 return contentSpacePolicy;
647 }
648
649 /**
650 * @param reportingDoctype
651 * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
652 */
653 public void setReportingDoctype(boolean reportingDoctype) {
654 this.reportingDoctype = reportingDoctype;
655 if (treeBuilder != null) {
656 treeBuilder.setReportingDoctype(reportingDoctype);
657 }
658 }
659
660 /**
661 * Returns the reportingDoctype.
662 *
663 * @return the reportingDoctype
664 */
665 public boolean isReportingDoctype() {
666 return reportingDoctype;
667 }
668
669 /**
670 * The policy for non-NCName element and attribute names.
671 * @param namePolicy
672 * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
673 */
674 public void setNamePolicy(XmlViolationPolicy namePolicy) {
675 this.namePolicy = namePolicy;
676 if (driver != null) {
677 driver.setNamePolicy(namePolicy);
678 treeBuilder.setNamePolicy(namePolicy);
679 }
680 }
681
682 /**
683 * Sets the encoding sniffing heuristics.
684 *
685 * @param heuristics the heuristics to set
686 * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
687 */
688 public void setHeuristics(Heuristics heuristics) {
689 this.heuristics = heuristics;
690 if (driver != null) {
691 driver.setHeuristics(heuristics);
692 }
693 }
694
695 public Heuristics getHeuristics() {
696 return this.heuristics;
697 }
698
699 /**
700 * This is a catch-all convenience method for setting name, xmlns, content space,
701 * content non-XML char and comment policies in one go. This does not affect the
702 * streamability policy or doctype reporting.
703 *
704 * @param xmlPolicy
705 */
706 public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
707 setNamePolicy(xmlPolicy);
708 setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
709 setContentSpacePolicy(xmlPolicy);
710 setContentNonXmlCharPolicy(xmlPolicy);
711 setCommentPolicy(xmlPolicy);
712 }
713
714 /**
715 * The policy for non-NCName element and attribute names.
716 *
717 * @return the namePolicy
718 */
719 public XmlViolationPolicy getNamePolicy() {
720 return namePolicy;
721 }
722
723 /**
724 * Does nothing.
725 * @deprecated
726 */
727 public void setBogusXmlnsPolicy(
728 XmlViolationPolicy bogusXmlnsPolicy) {
729 }
730
731 /**
732 * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
733 * @deprecated
734 * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
735 */
736 public XmlViolationPolicy getBogusXmlnsPolicy() {
737 return XmlViolationPolicy.ALTER_INFOSET;
738 }
739
740 public void addCharacterHandler(CharacterHandler characterHandler) {
741 this.characterHandlers.add(characterHandler);
742 if (driver != null) {
743 driver.addCharacterHandler(characterHandler);
744 }
745 }
746
747
748 /**
749 * Sets whether comment nodes appear in the tree.
750 * @param ignoreComments <code>true</code> to ignore comments
751 * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
752 */
753 public void setIgnoringComments(boolean ignoreComments) {
754 treeBuilder.setIgnoringComments(ignoreComments);
755 }
756
757 }