001    /*
002     * Copyright (c) 2007 Henri Sivonen
003     * Copyright (c) 2007-2008 Mozilla Foundation
004     *
005     * Permission is hereby granted, free of charge, to any person obtaining a 
006     * copy of this software and associated documentation files (the "Software"), 
007     * to deal in the Software without restriction, including without limitation 
008     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
009     * and/or sell copies of the Software, and to permit persons to whom the 
010     * Software is furnished to do so, subject to the following conditions:
011     *
012     * The above copyright notice and this permission notice shall be included in 
013     * all copies or substantial portions of the Software.
014     *
015     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
016     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
017     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
018     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
019     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
020     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
021     * DEALINGS IN THE SOFTWARE.
022     */
023    
024    package nu.validator.htmlparser.dom;
025    
026    import java.io.IOException;
027    import java.net.MalformedURLException;
028    import java.net.URL;
029    import java.util.LinkedList;
030    import java.util.List;
031    
032    import javax.xml.parsers.DocumentBuilder;
033    import javax.xml.parsers.DocumentBuilderFactory;
034    import javax.xml.parsers.ParserConfigurationException;
035    
036    import nu.validator.htmlparser.common.CharacterHandler;
037    import nu.validator.htmlparser.common.DoctypeExpectation;
038    import nu.validator.htmlparser.common.DocumentModeHandler;
039    import nu.validator.htmlparser.common.Heuristics;
040    import nu.validator.htmlparser.common.TokenHandler;
041    import nu.validator.htmlparser.common.TransitionHandler;
042    import nu.validator.htmlparser.common.XmlViolationPolicy;
043    import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
044    import nu.validator.htmlparser.impl.Tokenizer;
045    import nu.validator.htmlparser.io.Driver;
046    
047    import org.w3c.dom.DOMImplementation;
048    import org.w3c.dom.Document;
049    import org.w3c.dom.DocumentFragment;
050    import org.xml.sax.EntityResolver;
051    import org.xml.sax.ErrorHandler;
052    import org.xml.sax.InputSource;
053    import org.xml.sax.Locator;
054    import org.xml.sax.SAXException;
055    
056    /**
057     * This class implements an HTML5 parser that exposes data through the DOM 
058     * interface. 
059     * 
060     * <p>By default, when using the constructor without arguments, the 
061     * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
062     * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general 
063     * XML violation policy. To make the parser support non-conforming HTML fully 
064     * per the HTML 5 spec while on the other hand potentially violating the SAX2 
065     * API contract, set the general XML violation policy to <code>ALLOW</code>. 
066     * This does not work with a standard DOM implementation.
067     * It is possible to treat XML 1.0 infoset violations as fatal by setting 
068     * the general XML violation policy to <code>FATAL</code>. 
069     * 
070     * <p>The doctype is not represented in the tree.
071     * 
072     * <p>The document mode is represented as user data <code>DocumentMode</code> 
073     * object with the key <code>nu.validator.document-mode</code> on the document 
074     * node. 
075     * 
076     * <p>The form pointer is also stored as user data with the key 
077     * <code>nu.validator.form-pointer</code>.
078     * 
079     * @version $Id$
080     * @author hsivonen
081     */
082    public class HtmlDocumentBuilder extends DocumentBuilder {
083    
084        /**
085         * Returns the JAXP DOM implementation.
086         * 
087         * @return the JAXP DOM implementation
088         */
089        private static DOMImplementation jaxpDOMImplementation() {
090            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
091            factory.setNamespaceAware(true);
092            DocumentBuilder builder;
093            try {
094                builder = factory.newDocumentBuilder();
095            } catch (ParserConfigurationException e) {
096                throw new RuntimeException(e);
097            }
098            return builder.getDOMImplementation();
099        }
100    
101        /**
102         * The tokenizer.
103         */
104        private Driver driver;
105    
106        /**
107         * The tree builder.
108         */
109        private final DOMTreeBuilder treeBuilder;
110    
111        /**
112         * The DOM impl.
113         */
114        private final DOMImplementation implementation;
115    
116        /**
117         * The entity resolver.
118         */
119        private EntityResolver entityResolver;
120    
121        private ErrorHandler errorHandler = null;
122        
123        private DocumentModeHandler documentModeHandler = null;
124    
125        private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
126    
127        private boolean checkingNormalization = false;
128    
129        private boolean scriptingEnabled = false;
130    
131        private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
132        
133        private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
134    
135        private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
136    
137        private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
138    
139        private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
140    
141        private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
142        
143        private boolean html4ModeCompatibleWithXhtml1Schemata = false;
144    
145        private boolean mappingLangToXmlLang = false;
146    
147        private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
148        
149        private boolean reportingDoctype = true;
150    
151        private ErrorHandler treeBuilderErrorHandler = null;
152    
153        private Heuristics heuristics = Heuristics.NONE;
154    
155        private TransitionHandler transitionHandler = null;
156    
157        /**
158         * Instantiates the document builder with a specific DOM 
159         * implementation and XML violation policy.
160         * 
161         * @param implementation
162         *            the DOM implementation
163         *            @param xmlPolicy the policy
164         */
165        public HtmlDocumentBuilder(DOMImplementation implementation,
166                XmlViolationPolicy xmlPolicy) {
167            this.implementation = implementation;
168            this.treeBuilder = new DOMTreeBuilder(implementation);
169            this.driver = null;
170            setXmlPolicy(xmlPolicy);
171        }
172    
173        /**
174         * Instantiates the document builder with a specific DOM implementation 
175         * and the infoset-altering XML violation policy.
176         * 
177         * @param implementation
178         *            the DOM implementation
179         */
180        public HtmlDocumentBuilder(DOMImplementation implementation) {
181            this(implementation, XmlViolationPolicy.ALTER_INFOSET);
182        }
183    
184        /**
185         * Instantiates the document builder with the JAXP DOM implementation 
186         * and the infoset-altering XML violation policy.
187         */
188        public HtmlDocumentBuilder() {
189            this(XmlViolationPolicy.ALTER_INFOSET);
190        }
191    
192        /**
193         * Instantiates the document builder with the JAXP DOM implementation 
194         * and a specific XML violation policy.
195         *            @param xmlPolicy the policy
196         */
197        public HtmlDocumentBuilder(XmlViolationPolicy xmlPolicy) {
198            this(jaxpDOMImplementation(), xmlPolicy);
199        }
200    
201    
202        private Tokenizer newTokenizer(TokenHandler handler,
203                boolean newAttributesEachTime) {
204            if (errorHandler == null && transitionHandler == null
205                    && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
206                return new Tokenizer(handler, newAttributesEachTime);
207            } else {
208                return new ErrorReportingTokenizer(handler, newAttributesEachTime);
209            }
210        }
211        
212        /**
213         * This class wraps different tree builders depending on configuration. This 
214         * method does the work of hiding this from the user of the class.
215         */
216        private void lazyInit() {
217            if (driver == null) {
218                this.driver = new Driver(newTokenizer(treeBuilder, false));
219                this.driver.setErrorHandler(errorHandler);
220                this.driver.setTransitionHandler(transitionHandler);
221                this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
222                this.driver.setCheckingNormalization(checkingNormalization);
223                this.driver.setCommentPolicy(commentPolicy);
224                this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
225                this.driver.setContentSpacePolicy(contentSpacePolicy);
226                this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
227                this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
228                this.driver.setXmlnsPolicy(xmlnsPolicy);
229                this.driver.setHeuristics(heuristics);
230                for (CharacterHandler characterHandler : characterHandlers) {
231                    this.driver.addCharacterHandler(characterHandler);
232                }
233                this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
234                this.treeBuilder.setDocumentModeHandler(documentModeHandler);
235                this.treeBuilder.setScriptingEnabled(scriptingEnabled);
236                this.treeBuilder.setReportingDoctype(reportingDoctype);
237                this.treeBuilder.setNamePolicy(namePolicy);
238            }
239        }
240        
241        /**
242         * Tokenizes the input source.
243         * 
244         * @param is the source
245         * @throws SAXException if stuff goes wrong
246         * @throws IOException if IO goes wrong
247         * @throws MalformedURLException if the system ID is malformed and the entity resolver is <code>null</code>
248         */
249        private void tokenize(InputSource is) throws SAXException, IOException,
250                MalformedURLException {
251            if (is == null) {
252                throw new IllegalArgumentException("Null input.");
253            }
254            if (is.getByteStream() == null && is.getCharacterStream() == null) {
255                String systemId = is.getSystemId();
256                if (systemId == null) {
257                    throw new IllegalArgumentException(
258                            "No byte stream, no character stream nor URI.");
259                }
260                if (entityResolver != null) {
261                    is = entityResolver.resolveEntity(is.getPublicId(), systemId);
262                }
263                if (is.getByteStream() == null || is.getCharacterStream() == null) {
264                    is = new InputSource();
265                    is.setSystemId(systemId);
266                    is.setByteStream(new URL(systemId).openStream());
267                }
268            }
269            if (driver == null) lazyInit();
270            driver.tokenize(is);
271        }
272        
273        /**
274         * Returns the DOM implementation
275         * @return the DOM implementation
276         * @see javax.xml.parsers.DocumentBuilder#getDOMImplementation()
277         */
278        @Override public DOMImplementation getDOMImplementation() {
279            return implementation;
280        }
281    
282        /**
283         * Returns <code>true</code>.
284         * @return <code>true</code>
285         * @see javax.xml.parsers.DocumentBuilder#isNamespaceAware()
286         */
287        @Override public boolean isNamespaceAware() {
288            return true;
289        }
290    
291        /**
292         * Returns <code>false</code>
293         * @return <code>false</code>
294         * @see javax.xml.parsers.DocumentBuilder#isValidating()
295         */
296        @Override public boolean isValidating() {
297            return false;
298        }
299    
300        /**
301         * For API compatibility.
302         * @see javax.xml.parsers.DocumentBuilder#newDocument()
303         */
304        @Override public Document newDocument() {
305            return implementation.createDocument(null, null, null);
306        }
307    
308        /**
309         * Parses a document from a SAX <code>InputSource</code>.
310         * @param is the source
311         * @return the doc
312         * @throws SAXException if stuff goes wrong
313         * @throws IOException if IO goes wrong
314         * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource)
315         */
316        @Override public Document parse(InputSource is) throws SAXException,
317                IOException {
318            treeBuilder.setFragmentContext(null);
319            tokenize(is);
320            return treeBuilder.getDocument();
321        }
322    
323        /**
324         * Parses a document fragment from a SAX <code>InputSource</code>.
325         * @param is the source
326         * @param context the context element name
327         * @return the doc
328         * @throws SAXException if stuff goes wrong
329         * @throws IOException if IO goes wrong
330         */
331        public DocumentFragment parseFragment(InputSource is, String context)
332                throws IOException, SAXException {
333            treeBuilder.setFragmentContext(context.intern());
334            tokenize(is);
335            return treeBuilder.getDocumentFragment();
336        }
337    
338        /**
339         * Sets the entity resolver for URI-only inputs.
340         * @param resolver the resolver
341         * @see javax.xml.parsers.DocumentBuilder#setEntityResolver(org.xml.sax.EntityResolver)
342         */
343        @Override public void setEntityResolver(EntityResolver resolver) {
344            this.entityResolver = resolver;
345        }
346    
347        /**
348         * Sets the error handler.
349         * @param errorHandler the handler
350         * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler)
351         */
352        @Override public void setErrorHandler(ErrorHandler errorHandler) {
353            treeBuilder.setErrorHandler(errorHandler);
354            driver.setErrorHandler(errorHandler);
355        }
356    
357        public void setTransitionHander(TransitionHandler handler) {
358            transitionHandler = handler;
359            driver = null;
360        }
361        
362        /**
363         * Indicates whether NFC normalization of source is being checked.
364         * @return <code>true</code> if NFC normalization of source is being checked.
365         * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
366         */
367        public boolean isCheckingNormalization() {
368            return checkingNormalization;
369        }
370    
371        /**
372         * Toggles the checking of the NFC normalization of source.
373         * @param enable <code>true</code> to check normalization
374         * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
375         */
376        public void setCheckingNormalization(boolean enable) {
377            this.checkingNormalization = enable;
378            if (driver != null) {
379                driver.setCheckingNormalization(checkingNormalization);
380            }
381        }
382    
383        /**
384         * Sets the policy for consecutive hyphens in comments.
385         * @param commentPolicy the policy
386         * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
387         */
388        public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
389            this.commentPolicy = commentPolicy;
390            if (driver != null) {
391                driver.setCommentPolicy(commentPolicy);
392            }
393        }
394    
395        /**
396         * Sets the policy for non-XML characters except white space.
397         * @param contentNonXmlCharPolicy the policy
398         * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
399         */
400        public void setContentNonXmlCharPolicy(
401                XmlViolationPolicy contentNonXmlCharPolicy) {
402            this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
403            driver = null;
404        }
405    
406        /**
407         * Sets the policy for non-XML white space.
408         * @param contentSpacePolicy the policy
409         * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
410         */
411        public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
412            this.contentSpacePolicy = contentSpacePolicy;
413            if (driver != null) {
414                driver.setContentSpacePolicy(contentSpacePolicy);
415            }
416        }
417    
418        /**
419         * Whether the parser considers scripting to be enabled for noscript treatment.
420         * 
421         * @return <code>true</code> if enabled
422         * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
423         */
424        public boolean isScriptingEnabled() {
425            return scriptingEnabled;
426        }
427    
428        /**
429         * Sets whether the parser considers scripting to be enabled for noscript treatment.
430         * @param scriptingEnabled <code>true</code> to enable
431         * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
432         */
433        public void setScriptingEnabled(boolean scriptingEnabled) {
434            this.scriptingEnabled = scriptingEnabled;
435            if (treeBuilder != null) {
436                treeBuilder.setScriptingEnabled(scriptingEnabled);
437            }
438        }
439    
440        /**
441         * Returns the doctype expectation.
442         * 
443         * @return the doctypeExpectation
444         */
445        public DoctypeExpectation getDoctypeExpectation() {
446            return doctypeExpectation;
447        }
448    
449        /**
450         * Sets the doctype expectation.
451         * 
452         * @param doctypeExpectation
453         *            the doctypeExpectation to set
454         * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
455         */
456        public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
457            this.doctypeExpectation = doctypeExpectation;
458            if (treeBuilder != null) {
459                treeBuilder.setDoctypeExpectation(doctypeExpectation);
460            }
461        }
462    
463        /**
464         * Returns the document mode handler.
465         * 
466         * @return the documentModeHandler
467         */
468        public DocumentModeHandler getDocumentModeHandler() {
469            return documentModeHandler;
470        }
471    
472        /**
473         * Sets the document mode handler.
474         * 
475         * @param documentModeHandler
476         *            the documentModeHandler to set
477         * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
478         */
479        public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
480            this.documentModeHandler = documentModeHandler;
481        }
482    
483        /**
484         * Returns the streamabilityViolationPolicy.
485         * 
486         * @return the streamabilityViolationPolicy
487         */
488        public XmlViolationPolicy getStreamabilityViolationPolicy() {
489            return streamabilityViolationPolicy;
490        }
491    
492        /**
493         * Sets the streamabilityViolationPolicy.
494         * 
495         * @param streamabilityViolationPolicy
496         *            the streamabilityViolationPolicy to set
497         */
498        public void setStreamabilityViolationPolicy(
499                XmlViolationPolicy streamabilityViolationPolicy) {
500            this.streamabilityViolationPolicy = streamabilityViolationPolicy;
501            driver = null;
502        }
503    
504        /**
505         * Whether the HTML 4 mode reports boolean attributes in a way that repeats
506         * the name in the value.
507         * @param html4ModeCompatibleWithXhtml1Schemata
508         */
509        public void setHtml4ModeCompatibleWithXhtml1Schemata(
510                boolean html4ModeCompatibleWithXhtml1Schemata) {
511            this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
512            if (driver != null) {
513                driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
514            }
515        }
516    
517        /**
518         * Returns the <code>Locator</code> during parse.
519         * @return the <code>Locator</code>
520         */
521        public Locator getDocumentLocator() {
522            return driver.getDocumentLocator();
523        }
524    
525        /**
526         * Whether the HTML 4 mode reports boolean attributes in a way that repeats
527         * the name in the value.
528         * 
529         * @return the html4ModeCompatibleWithXhtml1Schemata
530         */
531        public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
532            return html4ModeCompatibleWithXhtml1Schemata;
533        }
534    
535        /**
536         * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
537         * @param mappingLangToXmlLang
538         * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
539         */
540        public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
541            this.mappingLangToXmlLang = mappingLangToXmlLang;
542            if (driver != null) {
543                driver.setMappingLangToXmlLang(mappingLangToXmlLang);
544            }
545        }
546    
547        /**
548         * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
549         * 
550         * @return the mappingLangToXmlLang
551         */
552        public boolean isMappingLangToXmlLang() {
553            return mappingLangToXmlLang;
554        }
555    
556        /**
557         * Whether the <code>xmlns</code> attribute on the root element is 
558         * passed to through. (FATAL not allowed.)
559         * @param xmlnsPolicy
560         * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
561         */
562        public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
563            if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
564                throw new IllegalArgumentException("Can't use FATAL here.");
565            }
566            this.xmlnsPolicy = xmlnsPolicy;
567            if (driver != null) {
568                driver.setXmlnsPolicy(xmlnsPolicy);
569            }
570        }
571    
572        /**
573         * Returns the xmlnsPolicy.
574         * 
575         * @return the xmlnsPolicy
576         */
577        public XmlViolationPolicy getXmlnsPolicy() {
578            return xmlnsPolicy;
579        }
580    
581        /**
582         * Returns the commentPolicy.
583         * 
584         * @return the commentPolicy
585         */
586        public XmlViolationPolicy getCommentPolicy() {
587            return commentPolicy;
588        }
589    
590        /**
591         * Returns the contentNonXmlCharPolicy.
592         * 
593         * @return the contentNonXmlCharPolicy
594         */
595        public XmlViolationPolicy getContentNonXmlCharPolicy() {
596            return contentNonXmlCharPolicy;
597        }
598    
599        /**
600         * Returns the contentSpacePolicy.
601         * 
602         * @return the contentSpacePolicy
603         */
604        public XmlViolationPolicy getContentSpacePolicy() {
605            return contentSpacePolicy;
606        }
607    
608        /**
609         * @param reportingDoctype
610         * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
611         */
612        public void setReportingDoctype(boolean reportingDoctype) {
613            this.reportingDoctype = reportingDoctype;
614            if (treeBuilder != null) {
615                treeBuilder.setReportingDoctype(reportingDoctype);
616            }
617        }
618    
619        /**
620         * Returns the reportingDoctype.
621         * 
622         * @return the reportingDoctype
623         */
624        public boolean isReportingDoctype() {
625            return reportingDoctype;
626        }
627    
628        /**
629         * The policy for non-NCName element and attribute names.
630         * @param namePolicy
631         * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
632         */
633        public void setNamePolicy(XmlViolationPolicy namePolicy) {
634            this.namePolicy = namePolicy;
635            if (driver != null) {
636                driver.setNamePolicy(namePolicy);
637                treeBuilder.setNamePolicy(namePolicy);
638            }
639        }
640        
641        /**
642         * Sets the encoding sniffing heuristics.
643         * 
644         * @param heuristics the heuristics to set
645         * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
646         */
647        public void setHeuristics(Heuristics heuristics) {
648            this.heuristics = heuristics;
649            if (driver != null) {
650                driver.setHeuristics(heuristics);
651            }
652        }
653        
654        public Heuristics getHeuristics() {
655            return this.heuristics;
656        }
657    
658        /**
659         * This is a catch-all convenience method for setting name, xmlns, content space, 
660         * content non-XML char and comment policies in one go. This does not affect the 
661         * streamability policy or doctype reporting.
662         * 
663         * @param xmlPolicy
664         */
665        public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
666            setNamePolicy(xmlPolicy);
667            setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
668            setContentSpacePolicy(xmlPolicy);
669            setContentNonXmlCharPolicy(xmlPolicy);
670            setCommentPolicy(xmlPolicy);
671        }
672    
673        /**
674         * The policy for non-NCName element and attribute names.
675         * 
676         * @return the namePolicy
677         */
678        public XmlViolationPolicy getNamePolicy() {
679            return namePolicy;
680        }
681    
682        /**
683         * Does nothing.
684         * @deprecated
685         */
686        public void setBogusXmlnsPolicy(
687                XmlViolationPolicy bogusXmlnsPolicy) {
688        }
689    
690        /**
691         * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
692         * @deprecated
693         * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
694         */
695        public XmlViolationPolicy getBogusXmlnsPolicy() {
696            return XmlViolationPolicy.ALTER_INFOSET;
697        }
698        
699        public void addCharacterHandler(CharacterHandler characterHandler) {
700            this.characterHandlers.add(characterHandler);
701            if (driver != null) {
702                driver.addCharacterHandler(characterHandler);
703            }
704        }
705    
706        
707        /**
708         * Sets whether comment nodes appear in the tree.
709         * @param ignoreComments <code>true</code> to ignore comments
710         * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean)
711         */
712        public void setIgnoringComments(boolean ignoreComments) {
713            treeBuilder.setIgnoringComments(ignoreComments);
714        }
715    
716    }