001    /*
002     * Copyright (c) 2007 Henri Sivonen
003     * Copyright (c) 2007-2008 Mozilla Foundation
004     *
005     * Permission is hereby granted, free of charge, to any person obtaining a 
006     * copy of this software and associated documentation files (the "Software"), 
007     * to deal in the Software without restriction, including without limitation 
008     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
009     * and/or sell copies of the Software, and to permit persons to whom the 
010     * Software is furnished to do so, subject to the following conditions:
011     *
012     * The above copyright notice and this permission notice shall be included in 
013     * all copies or substantial portions of the Software.
014     *
015     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
016     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
017     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
018     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
019     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
020     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
021     * DEALINGS IN THE SOFTWARE.
022     */
023    
024    package nu.validator.htmlparser.xom;
025    
026    import java.io.File;
027    import java.io.FileInputStream;
028    import java.io.IOException;
029    import java.io.InputStream;
030    import java.io.Reader;
031    import java.io.StringReader;
032    import java.net.MalformedURLException;
033    import java.net.URL;
034    import java.util.LinkedList;
035    import java.util.List;
036    
037    import nu.validator.htmlparser.common.CharacterHandler;
038    import nu.validator.htmlparser.common.DoctypeExpectation;
039    import nu.validator.htmlparser.common.DocumentModeHandler;
040    import nu.validator.htmlparser.common.Heuristics;
041    import nu.validator.htmlparser.common.TokenHandler;
042    import nu.validator.htmlparser.common.TransitionHandler;
043    import nu.validator.htmlparser.common.XmlViolationPolicy;
044    import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
045    import nu.validator.htmlparser.impl.Tokenizer;
046    import nu.validator.htmlparser.io.Driver;
047    import nu.xom.Builder;
048    import nu.xom.Document;
049    import nu.xom.Nodes;
050    import nu.xom.ParsingException;
051    import nu.xom.ValidityException;
052    
053    import org.xml.sax.EntityResolver;
054    import org.xml.sax.ErrorHandler;
055    import org.xml.sax.InputSource;
056    import org.xml.sax.Locator;
057    import org.xml.sax.SAXException;
058    import org.xml.sax.SAXParseException;
059    
060    /**
061     * This class implements an HTML5 parser that exposes data through the XOM 
062     * interface. 
063     * 
064     * <p>By default, when using the constructor without arguments, the 
065     * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
066     * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general 
067     * XML violation policy. It is possible to treat XML 1.0 infoset violations 
068     * as fatal by setting the general XML violation policy to <code>FATAL</code>. 
069     * 
070     * <p>The doctype is not represented in the tree.
071     * 
072     * <p>The document mode is represented via the <code>Mode</code> 
073     * interface on the <code>Document</code> node if the node implements 
074     * that interface (depends on the used node factory).
075     * 
076     * <p>The form pointer is stored if the node factory supports storing it.
077     * 
078     * <p>This package has its own node factory class because the official 
079     * XOM node factory may return multiple nodes instead of one confusing 
080     * the assumptions of the DOM-oriented HTML5 parsing algorithm.
081     * 
082     * @version $Id$
083     * @author hsivonen
084     */
085    public class HtmlBuilder extends Builder {
086    
087        private Driver driver;
088    
089        private final XOMTreeBuilder treeBuilder;
090    
091        private final SimpleNodeFactory simpleNodeFactory;
092    
093        private EntityResolver entityResolver;
094    
095        private ErrorHandler errorHandler = null;
096    
097        private DocumentModeHandler documentModeHandler = null;
098    
099        private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
100    
101        private boolean checkingNormalization = false;
102    
103        private boolean scriptingEnabled = false;
104    
105        private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
106        
107        private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
108    
109        private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
110    
111        private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
112    
113        private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
114    
115        private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
116        
117        private boolean html4ModeCompatibleWithXhtml1Schemata = false;
118    
119        private boolean mappingLangToXmlLang = false;
120    
121        private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
122        
123        private boolean reportingDoctype = true;
124    
125        private ErrorHandler treeBuilderErrorHandler = null;
126    
127        private Heuristics heuristics = Heuristics.NONE;
128    
129        private TransitionHandler transitionHandler = null;
130        
131        /**
132         * Constructor with default node factory and fatal XML violation policy.
133         */
134        public HtmlBuilder() {
135            this(new SimpleNodeFactory(), XmlViolationPolicy.FATAL);
136        }
137        
138        /**
139         * Constructor with given node factory and fatal XML violation policy.
140         * @param nodeFactory the factory
141         */
142        public HtmlBuilder(SimpleNodeFactory nodeFactory) {
143            this(nodeFactory, XmlViolationPolicy.FATAL);
144        }
145    
146        /**
147         * Constructor with default node factory and given XML violation policy.
148         * @param xmlPolicy the policy
149         */
150        public HtmlBuilder(XmlViolationPolicy xmlPolicy) {
151            this(new SimpleNodeFactory(), xmlPolicy);
152        }
153        
154        /**
155         * Constructor with given node factory and given XML violation policy.
156         * @param nodeFactory the factory
157         * @param xmlPolicy the policy
158         */
159        public HtmlBuilder(SimpleNodeFactory nodeFactory, XmlViolationPolicy xmlPolicy) {
160            super();
161            this.simpleNodeFactory = nodeFactory;
162            this.treeBuilder = new XOMTreeBuilder(nodeFactory);
163            this.driver = null;
164            this.driver.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
165            setXmlPolicy(xmlPolicy);
166        }
167    
168        private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) {
169            if (errorHandler == null && transitionHandler == null
170                    && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
171                return new Tokenizer(handler, newAttributesEachTime);
172            } else {
173                return new ErrorReportingTokenizer(handler, newAttributesEachTime);
174            }
175       }
176        
177        /**
178         * This class wraps different tree builders depending on configuration. This 
179         * method does the work of hiding this from the user of the class.
180         */
181        private void lazyInit() {
182            if (driver == null) {
183                this.driver = new Driver(newTokenizer(treeBuilder, false));
184                this.driver.setErrorHandler(errorHandler);
185                this.driver.setTransitionHandler(transitionHandler);
186                this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
187                this.driver.setCheckingNormalization(checkingNormalization);
188                this.driver.setCommentPolicy(commentPolicy);
189                this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
190                this.driver.setContentSpacePolicy(contentSpacePolicy);
191                this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
192                this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
193                this.driver.setXmlnsPolicy(xmlnsPolicy);
194                this.driver.setHeuristics(heuristics);
195                for (CharacterHandler characterHandler : characterHandlers) {
196                    this.driver.addCharacterHandler(characterHandler);
197                }
198                this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
199                this.treeBuilder.setDocumentModeHandler(documentModeHandler);
200                this.treeBuilder.setScriptingEnabled(scriptingEnabled);
201                this.treeBuilder.setReportingDoctype(reportingDoctype);
202                this.treeBuilder.setNamePolicy(namePolicy);
203            }
204        }
205    
206        
207        private void tokenize(InputSource is) throws ParsingException, IOException,
208                MalformedURLException {
209            try {
210                if (is == null) {
211                    throw new IllegalArgumentException("Null input.");
212                }
213                if (is.getByteStream() == null && is.getCharacterStream() == null) {
214                    String systemId = is.getSystemId();
215                    if (systemId == null) {
216                        throw new IllegalArgumentException(
217                                "No byte stream, no character stream nor URI.");
218                    }
219                    if (entityResolver != null) {
220                        is = entityResolver.resolveEntity(is.getPublicId(),
221                                systemId);
222                    }
223                    if (is.getByteStream() == null
224                            || is.getCharacterStream() == null) {
225                        is = new InputSource();
226                        is.setSystemId(systemId);
227                        is.setByteStream(new URL(systemId).openStream());
228                    }
229                }
230                driver.tokenize(is);
231            } catch (SAXParseException e) {
232                throw new ParsingException(e.getMessage(), e.getSystemId(), e.getLineNumber(),
233                        e.getColumnNumber(), e);
234            } catch (SAXException e) {
235                throw new ParsingException(e.getMessage(), e);
236            }
237        }
238    
239        /**
240         * Parse from SAX <code>InputSource</code>.
241         * @param is the <code>InputSource</code>
242         * @return the document
243         * @throws ParsingException in case of an XML violation
244         * @throws IOException if IO goes wrang
245         */
246        public Document build(InputSource is) throws ParsingException, IOException {
247            lazyInit();
248            treeBuilder.setFragmentContext(null);
249            tokenize(is);
250            return treeBuilder.getDocument();
251        }
252    
253        /**
254         * Parse a fragment from SAX <code>InputSource</code>.
255         * @param is the <code>InputSource</code>
256         * @param context the name of the context element
257         * @return the fragment
258         * @throws ParsingException in case of an XML violation
259         * @throws IOException if IO goes wrang
260         */
261        public Nodes buildFragment(InputSource is, String context)
262                throws IOException, ParsingException {
263            lazyInit();
264            treeBuilder.setFragmentContext(context.intern());
265            tokenize(is);
266            return treeBuilder.getDocumentFragment();
267        }
268    
269        
270        /**
271         * Parse from <code>File</code>.
272         * @param file the file
273         * @return the document
274         * @throws ParsingException in case of an XML violation
275         * @throws IOException if IO goes wrang
276         * @see nu.xom.Builder#build(java.io.File)
277         */
278        @Override
279        public Document build(File file) throws ParsingException,
280                ValidityException, IOException {
281            return build(new FileInputStream(file), file.toURI().toASCIIString());
282        }
283    
284        /**
285         * Parse from <code>InputStream</code>.
286         * @param stream the stream
287         * @param uri the base URI
288         * @return the document
289         * @throws ParsingException in case of an XML violation
290         * @throws IOException if IO goes wrang
291         * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String)
292         */
293        @Override
294        public Document build(InputStream stream, String uri)
295                throws ParsingException, ValidityException, IOException {
296            InputSource is = new InputSource(stream);
297            is.setSystemId(uri);
298            return build(is);
299        }
300    
301        /**
302         * Parse from <code>InputStream</code>.
303         * @param stream the stream
304         * @return the document
305         * @throws ParsingException in case of an XML violation
306         * @throws IOException if IO goes wrang
307         * @see nu.xom.Builder#build(java.io.InputStream)
308         */
309        @Override
310        public Document build(InputStream stream) throws ParsingException,
311                ValidityException, IOException {
312            return build(new InputSource(stream));
313        }
314    
315        /**
316         * Parse from <code>Reader</code>.
317         * @param stream the reader
318         * @param uri the base URI
319         * @return the document
320         * @throws ParsingException in case of an XML violation
321         * @throws IOException if IO goes wrang
322         * @see nu.xom.Builder#build(java.io.Reader, java.lang.String)
323         */
324        @Override
325        public Document build(Reader stream, String uri) throws ParsingException,
326                ValidityException, IOException {
327            InputSource is = new InputSource(stream);
328            is.setSystemId(uri);
329            return build(is);
330        }
331    
332        /**
333         * Parse from <code>Reader</code>.
334         * @param stream the reader
335         * @return the document
336         * @throws ParsingException in case of an XML violation
337         * @throws IOException if IO goes wrang
338         * @see nu.xom.Builder#build(java.io.Reader)
339         */
340        @Override
341        public Document build(Reader stream) throws ParsingException,
342                ValidityException, IOException {
343            return build(new InputSource(stream));
344        }
345    
346        /**
347         * Parse from <code>String</code>.
348         * @param content the HTML source as string
349         * @param uri the base URI
350         * @return the document
351         * @throws ParsingException in case of an XML violation
352         * @throws IOException if IO goes wrang
353         * @see nu.xom.Builder#build(java.lang.String, java.lang.String)
354         */
355        @Override
356        public Document build(String content, String uri) throws ParsingException,
357                ValidityException, IOException {
358            return build(new StringReader(content), uri);
359        }
360    
361        /**
362         * Parse from URI.
363         * @param uri the URI of the document
364         * @return the document
365         * @throws ParsingException in case of an XML violation
366         * @throws IOException if IO goes wrang
367         * @see nu.xom.Builder#build(java.lang.String)
368         */
369        @Override
370        public Document build(String uri) throws ParsingException,
371                ValidityException, IOException {
372            return build(new InputSource(uri));
373        }
374    
375        /**
376         * Gets the node factory
377         */
378        public SimpleNodeFactory getSimpleNodeFactory() {
379            return simpleNodeFactory;
380        }
381    
382        /**
383         * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
384         */
385        public void setEntityResolver(EntityResolver resolver) {
386            entityResolver = resolver;
387        }
388    
389        /**
390         * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
391         */
392        public void setErrorHandler(ErrorHandler handler) {
393            errorHandler = handler;
394            treeBuilderErrorHandler = handler;
395            driver = null;
396        }
397        
398        public void setTransitionHander(TransitionHandler handler) {
399            transitionHandler = handler;
400            driver = null;
401        }
402    
403        /**
404         * Indicates whether NFC normalization of source is being checked.
405         * @return <code>true</code> if NFC normalization of source is being checked.
406         * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
407         */
408        public boolean isCheckingNormalization() {
409            return checkingNormalization;
410        }
411    
412        /**
413         * Toggles the checking of the NFC normalization of source.
414         * @param enable <code>true</code> to check normalization
415         * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
416         */
417        public void setCheckingNormalization(boolean enable) {
418            this.checkingNormalization = enable;
419            if (driver != null) {
420                driver.setCheckingNormalization(checkingNormalization);
421            }
422        }
423    
424        /**
425         * Sets the policy for consecutive hyphens in comments.
426         * @param commentPolicy the policy
427         * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
428         */
429        public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
430            this.commentPolicy = commentPolicy;
431            if (driver != null) {
432                driver.setCommentPolicy(commentPolicy);
433            }
434        }
435    
436        /**
437         * Sets the policy for non-XML characters except white space.
438         * @param contentNonXmlCharPolicy the policy
439         * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
440         */
441        public void setContentNonXmlCharPolicy(
442                XmlViolationPolicy contentNonXmlCharPolicy) {
443            this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
444            driver = null;
445        }
446    
447        /**
448         * Sets the policy for non-XML white space.
449         * @param contentSpacePolicy the policy
450         * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
451         */
452        public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
453            this.contentSpacePolicy = contentSpacePolicy;
454            if (driver != null) {
455                driver.setContentSpacePolicy(contentSpacePolicy);
456            }
457        }
458    
459        /**
460         * Whether the parser considers scripting to be enabled for noscript treatment.
461         * 
462         * @return <code>true</code> if enabled
463         * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
464         */
465        public boolean isScriptingEnabled() {
466            return scriptingEnabled;
467        }
468    
469        /**
470         * Sets whether the parser considers scripting to be enabled for noscript treatment.
471         * @param scriptingEnabled <code>true</code> to enable
472         * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
473         */
474        public void setScriptingEnabled(boolean scriptingEnabled) {
475            this.scriptingEnabled = scriptingEnabled;
476            if (treeBuilder != null) {
477                treeBuilder.setScriptingEnabled(scriptingEnabled);
478            }
479        }
480    
481        /**
482         * Returns the doctype expectation.
483         * 
484         * @return the doctypeExpectation
485         */
486        public DoctypeExpectation getDoctypeExpectation() {
487            return doctypeExpectation;
488        }
489    
490        /**
491         * Sets the doctype expectation.
492         * 
493         * @param doctypeExpectation
494         *            the doctypeExpectation to set
495         * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
496         */
497        public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
498            this.doctypeExpectation = doctypeExpectation;
499            if (treeBuilder != null) {
500                treeBuilder.setDoctypeExpectation(doctypeExpectation);
501            }
502        }
503    
504        /**
505         * Returns the document mode handler.
506         * 
507         * @return the documentModeHandler
508         */
509        public DocumentModeHandler getDocumentModeHandler() {
510            return documentModeHandler;
511        }
512    
513        /**
514         * Sets the document mode handler.
515         * 
516         * @param documentModeHandler
517         *            the documentModeHandler to set
518         * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
519         */
520        public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
521            this.documentModeHandler = documentModeHandler;
522        }
523    
524        /**
525         * Returns the streamabilityViolationPolicy.
526         * 
527         * @return the streamabilityViolationPolicy
528         */
529        public XmlViolationPolicy getStreamabilityViolationPolicy() {
530            return streamabilityViolationPolicy;
531        }
532    
533        /**
534         * Sets the streamabilityViolationPolicy.
535         * 
536         * @param streamabilityViolationPolicy
537         *            the streamabilityViolationPolicy to set
538         */
539        public void setStreamabilityViolationPolicy(
540                XmlViolationPolicy streamabilityViolationPolicy) {
541            this.streamabilityViolationPolicy = streamabilityViolationPolicy;
542            driver = null;
543        }
544    
545        /**
546         * Whether the HTML 4 mode reports boolean attributes in a way that repeats
547         * the name in the value.
548         * @param html4ModeCompatibleWithXhtml1Schemata
549         */
550        public void setHtml4ModeCompatibleWithXhtml1Schemata(
551                boolean html4ModeCompatibleWithXhtml1Schemata) {
552            this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
553            if (driver != null) {
554                driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
555            }
556        }
557    
558        /**
559         * Returns the <code>Locator</code> during parse.
560         * @return the <code>Locator</code>
561         */
562        public Locator getDocumentLocator() {
563            return driver.getDocumentLocator();
564        }
565    
566        /**
567         * Whether the HTML 4 mode reports boolean attributes in a way that repeats
568         * the name in the value.
569         * 
570         * @return the html4ModeCompatibleWithXhtml1Schemata
571         */
572        public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
573            return html4ModeCompatibleWithXhtml1Schemata;
574        }
575    
576        /**
577         * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
578         * @param mappingLangToXmlLang
579         * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
580         */
581        public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
582            this.mappingLangToXmlLang = mappingLangToXmlLang;
583            if (driver != null) {
584                driver.setMappingLangToXmlLang(mappingLangToXmlLang);
585            }
586        }
587    
588        /**
589         * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
590         * 
591         * @return the mappingLangToXmlLang
592         */
593        public boolean isMappingLangToXmlLang() {
594            return mappingLangToXmlLang;
595        }
596    
597        /**
598         * Whether the <code>xmlns</code> attribute on the root element is 
599         * passed to through. (FATAL not allowed.)
600         * @param xmlnsPolicy
601         * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
602         */
603        public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
604            if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
605                throw new IllegalArgumentException("Can't use FATAL here.");
606            }
607            this.xmlnsPolicy = xmlnsPolicy;
608            if (driver != null) {
609                driver.setXmlnsPolicy(xmlnsPolicy);
610            }
611        }
612    
613        /**
614         * Returns the xmlnsPolicy.
615         * 
616         * @return the xmlnsPolicy
617         */
618        public XmlViolationPolicy getXmlnsPolicy() {
619            return xmlnsPolicy;
620        }
621    
622        /**
623         * Returns the commentPolicy.
624         * 
625         * @return the commentPolicy
626         */
627        public XmlViolationPolicy getCommentPolicy() {
628            return commentPolicy;
629        }
630    
631        /**
632         * Returns the contentNonXmlCharPolicy.
633         * 
634         * @return the contentNonXmlCharPolicy
635         */
636        public XmlViolationPolicy getContentNonXmlCharPolicy() {
637            return contentNonXmlCharPolicy;
638        }
639    
640        /**
641         * Returns the contentSpacePolicy.
642         * 
643         * @return the contentSpacePolicy
644         */
645        public XmlViolationPolicy getContentSpacePolicy() {
646            return contentSpacePolicy;
647        }
648    
649        /**
650         * @param reportingDoctype
651         * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
652         */
653        public void setReportingDoctype(boolean reportingDoctype) {
654            this.reportingDoctype = reportingDoctype;
655            if (treeBuilder != null) {
656                treeBuilder.setReportingDoctype(reportingDoctype);
657            }
658        }
659    
660        /**
661         * Returns the reportingDoctype.
662         * 
663         * @return the reportingDoctype
664         */
665        public boolean isReportingDoctype() {
666            return reportingDoctype;
667        }
668    
669        /**
670         * The policy for non-NCName element and attribute names.
671         * @param namePolicy
672         * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
673         */
674        public void setNamePolicy(XmlViolationPolicy namePolicy) {
675            this.namePolicy = namePolicy;
676            if (driver != null) {
677                driver.setNamePolicy(namePolicy);
678                treeBuilder.setNamePolicy(namePolicy);
679            }
680        }
681        
682        /**
683         * Sets the encoding sniffing heuristics.
684         * 
685         * @param heuristics the heuristics to set
686         * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
687         */
688        public void setHeuristics(Heuristics heuristics) {
689            this.heuristics = heuristics;
690            if (driver != null) {
691                driver.setHeuristics(heuristics);
692            }
693        }
694        
695        public Heuristics getHeuristics() {
696            return this.heuristics;
697        }
698    
699        /**
700         * This is a catch-all convenience method for setting name, xmlns, content space, 
701         * content non-XML char and comment policies in one go. This does not affect the 
702         * streamability policy or doctype reporting.
703         * 
704         * @param xmlPolicy
705         */
706        public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
707            setNamePolicy(xmlPolicy);
708            setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
709            setContentSpacePolicy(xmlPolicy);
710            setContentNonXmlCharPolicy(xmlPolicy);
711            setCommentPolicy(xmlPolicy);
712        }
713    
714        /**
715         * The policy for non-NCName element and attribute names.
716         * 
717         * @return the namePolicy
718         */
719        public XmlViolationPolicy getNamePolicy() {
720            return namePolicy;
721        }
722    
723        /**
724         * Does nothing.
725         * @deprecated
726         */
727        public void setBogusXmlnsPolicy(
728                XmlViolationPolicy bogusXmlnsPolicy) {
729        }
730    
731        /**
732         * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
733         * @deprecated
734         * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
735         */
736        public XmlViolationPolicy getBogusXmlnsPolicy() {
737            return XmlViolationPolicy.ALTER_INFOSET;
738        }
739        
740        public void addCharacterHandler(CharacterHandler characterHandler) {
741            this.characterHandlers.add(characterHandler);
742            if (driver != null) {
743                driver.addCharacterHandler(characterHandler);
744            }
745        }
746    
747        
748        /**
749         * Sets whether comment nodes appear in the tree.
750         * @param ignoreComments <code>true</code> to ignore comments
751         * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
752         */
753        public void setIgnoringComments(boolean ignoreComments) {
754            treeBuilder.setIgnoringComments(ignoreComments);
755        }
756    
757    }