001    /*
002     * Copyright (c) 2005, 2006 Henri Sivonen
003     * Copyright (c) 2007 Mozilla Foundation
004     *
005     * Permission is hereby granted, free of charge, to any person obtaining a 
006     * copy of this software and associated documentation files (the "Software"), 
007     * to deal in the Software without restriction, including without limitation 
008     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
009     * and/or sell copies of the Software, and to permit persons to whom the 
010     * Software is furnished to do so, subject to the following conditions:
011     *
012     * The above copyright notice and this permission notice shall be included in 
013     * all copies or substantial portions of the Software.
014     *
015     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
016     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
017     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
018     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
019     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
020     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
021     * DEALINGS IN THE SOFTWARE.
022     */
023    
024    package nu.validator.servlet;
025    
026    import java.io.BufferedReader;
027    import java.io.File;
028    import java.io.FileInputStream;
029    import java.io.IOException;
030    import java.io.InputStreamReader;
031    import java.io.OutputStream;
032    import java.net.MalformedURLException;
033    import java.util.Arrays;
034    import java.util.HashMap;
035    import java.util.HashSet;
036    import java.util.Iterator;
037    import java.util.LinkedList;
038    import java.util.List;
039    import java.util.Map;
040    import java.util.Properties;
041    import java.util.Set;
042    import java.util.SortedMap;
043    import java.util.TreeMap;
044    import java.util.regex.Matcher;
045    import java.util.regex.Pattern;
046    
047    import javax.servlet.ServletException;
048    import javax.servlet.http.HttpServletRequest;
049    import javax.servlet.http.HttpServletResponse;
050    
051    import nu.validator.gnu.xml.aelfred2.SAXDriver;
052    import nu.validator.htmlparser.common.DoctypeExpectation;
053    import nu.validator.htmlparser.common.DocumentMode;
054    import nu.validator.htmlparser.common.DocumentModeHandler;
055    import nu.validator.htmlparser.common.XmlViolationPolicy;
056    import nu.validator.htmlparser.sax.HtmlParser;
057    import nu.validator.messages.JsonMessageEmitter;
058    import nu.validator.messages.MessageEmitterAdapter;
059    import nu.validator.messages.TextMessageEmitter;
060    import nu.validator.messages.XhtmlMessageEmitter;
061    import nu.validator.messages.XmlMessageEmitter;
062    import nu.validator.source.SourceCode;
063    import nu.validator.xml.AttributesImpl;
064    import nu.validator.xml.CharacterUtil;
065    import nu.validator.xml.ContentTypeParser;
066    import nu.validator.xml.HtmlSerializer;
067    import nu.validator.xml.IdFilter;
068    import nu.validator.xml.LocalCacheEntityResolver;
069    import nu.validator.xml.NullEntityResolver;
070    import nu.validator.xml.PrudentHttpEntityResolver;
071    import nu.validator.xml.SystemErrErrorHandler;
072    import nu.validator.xml.TypedInputSource;
073    import nu.validator.xml.WiretapXMLReaderWrapper;
074    import nu.validator.xml.XhtmlSaxEmitter;
075    
076    import org.apache.log4j.Logger;
077    import org.apache.xml.serializer.Method;
078    import org.apache.xml.serializer.OutputPropertiesFactory;
079    import org.apache.xml.serializer.Serializer;
080    import org.apache.xml.serializer.SerializerFactory;
081    import org.whattf.checker.DebugChecker;
082    import org.whattf.checker.NormalizationChecker;
083    import org.whattf.checker.SignificantInlineChecker;
084    import org.whattf.checker.TextContentChecker;
085    import org.whattf.checker.UsemapChecker;
086    import org.whattf.checker.jing.CheckerValidator;
087    import org.whattf.checker.table.TableChecker;
088    import org.xml.sax.ContentHandler;
089    import org.xml.sax.EntityResolver;
090    import org.xml.sax.ErrorHandler;
091    import org.xml.sax.Locator;
092    import org.xml.sax.SAXException;
093    import org.xml.sax.SAXNotRecognizedException;
094    import org.xml.sax.SAXNotSupportedException;
095    import org.xml.sax.SAXParseException;
096    import org.xml.sax.XMLReader;
097    import org.xml.sax.ext.LexicalHandler;
098    
099    import com.hp.hpl.jena.iri.IRI;
100    import com.hp.hpl.jena.iri.IRIException;
101    import com.hp.hpl.jena.iri.IRIFactory;
102    import com.ibm.icu.text.Normalizer;
103    import com.thaiopensource.relaxng.impl.CombineValidator;
104    import com.thaiopensource.util.PropertyMap;
105    import com.thaiopensource.util.PropertyMapBuilder;
106    import com.thaiopensource.validate.IncorrectSchemaException;
107    import com.thaiopensource.validate.Schema;
108    import com.thaiopensource.validate.SchemaReader;
109    import com.thaiopensource.validate.ValidateProperty;
110    import com.thaiopensource.validate.Validator;
111    import com.thaiopensource.validate.auto.AutoSchemaReader;
112    import com.thaiopensource.validate.rng.CompactSchemaReader;
113    import com.thaiopensource.validate.rng.RngProperty;
114    
115    
116    /**
117     * @version $Id: VerifierServletTransaction.java,v 1.10 2005/07/24 07:32:48
118     *          hsivonen Exp $
119     * @author hsivonen
120     */
121    class VerifierServletTransaction implements DocumentModeHandler {
122    
123        private enum OutputFormat {
124            HTML, XHTML, TEXT, XML, JSON, RELAXED, SOAP, UNICORN, EMACS
125        }
126    
127        private static final Logger log4j = Logger.getLogger(VerifierServletTransaction.class);
128    
129        private static final Pattern SPACE = Pattern.compile("\\s+");
130        
131        private static final Pattern JS_IDENTIFIER = Pattern.compile("[\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}_\\$][\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}_\\$\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}]*");
132    
133        private static final String[] JS_RESERVED_WORDS = { "abstract", "boolean",
134                "break", "byte", "case", "catch", "char", "class", "const",
135                "continue", "debugger", "default", "delete", "do", "double",
136                "else", "enum", "export", "extends", "final", "finally", "float",
137                "for", "function", "goto", "if", "implements", "import", "in",
138                "instanceof", "int", "interface", "long", "native", "new",
139                "package", "private", "protected", "public", "return", "short",
140                "static", "super", "switch", "synchronized", "this", "throw",
141                "throws", "transient", "try", "typeof", "var", "void", "volatile",
142                "while", "with" };
143    
144        protected static final int HTML5_SCHEMA = 3;
145    
146        protected static final int XHTML1STRICT_SCHEMA = 2;
147    
148        protected static final int XHTML1TRANSITIONAL_SCHEMA = 1;
149    
150        protected static final int XHTML5_SCHEMA = 7;
151    
152        private static final char[] SERVICE_TITLE = "Validator.nu ".toCharArray();
153    
154        private static final char[] TWO_POINT_OH_BETA = "2.1 Gamma".toCharArray();
155    
156        private static final char[] RESULTS_TITLE = "Validation results for ".toCharArray();
157    
158        private static final char[] FOR = " for ".toCharArray();
159    
160        private static final Map pathMap = new HashMap();
161    
162        private static int[] presetDoctypes;
163    
164        private static String[] presetLabels;
165    
166        private static String[] presetUrls;
167    
168        private static String[] presetNamespaces;
169    
170        private static final String[] KNOWN_CONTENT_TYPES = {
171                "application/atom+xml", "application/docbook+xml",
172                "application/xhtml+xml", "application/xv+xml" };
173    
174        private static final String[] NAMESPACES_FOR_KNOWN_CONTENT_TYPES = {
175                "http://www.w3.org/2005/Atom", "http://docbook.org/ns/docbook",
176                "http://www.w3.org/1999/xhtml", "http://www.w3.org/1999/xhtml" };
177    
178        private static final String[] ALL_CHECKERS = {
179                "http://hsivonen.iki.fi/checkers/table/",
180                "http://hsivonen.iki.fi/checkers/nfc/",
181                "http://hsivonen.iki.fi/checkers/significant-inline/",
182                "http://hsivonen.iki.fi/checkers/text-content/",
183                "http://n.validator.nu/checkers/usemap/"};
184    
185        private static final String[] ALL_CHECKERS_HTML4 = {
186                "http://hsivonen.iki.fi/checkers/table/",
187                "http://hsivonen.iki.fi/checkers/nfc/" };
188    
189        private long start = System.currentTimeMillis();
190    
191        private final HttpServletRequest request;
192    
193        private final HttpServletResponse response;
194    
195        private IRIFactory iriFactory;
196    
197        protected String document = null;
198    
199        private ParserMode parser = ParserMode.AUTO;
200    
201        private boolean laxType = false;
202    
203        protected ContentHandler contentHandler;
204    
205        protected XhtmlSaxEmitter emitter;
206    
207        protected MessageEmitterAdapter errorHandler;
208    
209        private AttributesImpl attrs = new AttributesImpl();
210    
211        private OutputStream out;
212    
213        private PropertyMap jingPropertyMap;
214    
215        protected LocalCacheEntityResolver entityResolver;
216    
217        private static long lastModified;
218    
219        private static String[] preloadedSchemaUrls;
220    
221        private static Schema[] preloadedSchemas;
222    
223        private String schemaUrls = null;
224    
225        protected Validator validator = null;
226    
227        private BufferingRootNamespaceSniffer bufferingRootNamespaceSniffer = null;
228    
229        private String contentType = null;
230    
231        protected HtmlParser htmlParser = null;
232    
233        protected SAXDriver xmlParser = null;
234        
235        protected XMLReader reader;
236    
237        protected TypedInputSource documentInput;
238    
239        protected PrudentHttpEntityResolver httpRes;
240    
241        protected ContentTypeParser contentTypeParser;
242    
243        private Set<String> loadedValidatorUrls = new HashSet<String>();
244    
245        private boolean checkNormalization = false;
246    
247        private boolean rootNamespaceSeen = false;
248    
249        private OutputFormat outputFormat;
250    
251        private String postContentType;
252    
253        private boolean methodIsGet;
254    
255        private SourceCode sourceCode = new SourceCode();
256    
257        private boolean showSource;
258    
259        static {
260            try {
261                log4j.debug("Starting static initializer.");
262    
263                String presetPath = System.getProperty("nu.validator.servlet.presetconfpath");
264                File presetFile = new File(presetPath);
265                lastModified = presetFile.lastModified();
266                BufferedReader r = new BufferedReader(new InputStreamReader(
267                        new FileInputStream(presetFile), "UTF-8"));
268                String line;
269                List<String> doctypes = new LinkedList<String>();
270                List<String> namespaces = new LinkedList<String>();
271                List<String> labels = new LinkedList<String>();
272                List<String> urls = new LinkedList<String>();
273    
274                log4j.debug("Starting to loop over config file lines.");
275    
276                while ((line = r.readLine()) != null) {
277                    if ("".equals(line.trim())) {
278                        break;
279                    }
280                    String s[] = line.split("\t");
281                    doctypes.add(s[0]);
282                    namespaces.add(s[1]);
283                    labels.add(s[2]);
284                    urls.add(s[3]);
285                }
286    
287                log4j.debug("Finished reading config.");
288    
289                String[] presetDoctypesAsStrings = doctypes.toArray(new String[0]);
290                presetNamespaces = namespaces.toArray(new String[0]);
291                presetLabels = labels.toArray(new String[0]);
292                presetUrls = urls.toArray(new String[0]);
293    
294                log4j.debug("Converted config to arrays.");
295    
296                for (int i = 0; i < presetNamespaces.length; i++) {
297                    String str = presetNamespaces[i];
298                    if ("-".equals(str)) {
299                        presetNamespaces[i] = null;
300                    } else {
301                        presetNamespaces[i] = presetNamespaces[i].intern();
302                    }
303                }
304    
305                log4j.debug("Prepared namespace array.");
306    
307                presetDoctypes = new int[presetDoctypesAsStrings.length];
308                for (int i = 0; i < presetDoctypesAsStrings.length; i++) {
309                    presetDoctypes[i] = Integer.parseInt(presetDoctypesAsStrings[i]);
310                }
311    
312                log4j.debug("Parsed doctype numbers into ints.");
313    
314                String prefix = System.getProperty("nu.validator.servlet.cachepathprefix");
315    
316                log4j.debug("The cache path prefix is: " + prefix);
317    
318                String cacheConfPath = System.getProperty("nu.validator.servlet.cacheconfpath");
319    
320                log4j.debug("The cache config path is: " + cacheConfPath);
321    
322                r = new BufferedReader(new InputStreamReader(new FileInputStream(
323                        cacheConfPath), "UTF-8"));
324                while ((line = r.readLine()) != null) {
325                    if ("".equals(line.trim())) {
326                        break;
327                    }
328                    String s[] = line.split("\t");
329                    pathMap.put(s[0], prefix + s[1]);
330                }
331    
332                log4j.debug("Cache config read.");
333    
334                ErrorHandler eh = new SystemErrErrorHandler();
335                LocalCacheEntityResolver er = new LocalCacheEntityResolver(pathMap,
336                        new NullEntityResolver());
337                er.setAllowRnc(true);
338                PropertyMapBuilder pmb = new PropertyMapBuilder();
339                pmb.put(ValidateProperty.ERROR_HANDLER, eh);
340                pmb.put(ValidateProperty.ENTITY_RESOLVER, er);
341                pmb.put(ValidateProperty.XML_READER_CREATOR,
342                        new VerifierServletXMLReaderCreator(eh, er));
343                RngProperty.CHECK_ID_IDREF.add(pmb);
344                PropertyMap pMap = pmb.toPropertyMap();
345    
346                log4j.debug("Parsing set up. Starting to read schemas.");
347    
348                SortedMap<String, Schema> schemaMap = new TreeMap<String, Schema>();
349                for (int i = 0; i < presetUrls.length; i++) {
350                    String[] urls1 = SPACE.split(presetUrls[i]);
351                    for (int j = 0; j < urls1.length; j++) {
352                        String url = urls1[j];
353                        if (schemaMap.get(url) == null && !isCheckerUrl(url)) {
354                            Schema sch = schemaByUrl(url, er, pMap);
355                            schemaMap.put(url, sch);
356                        }
357                    }
358                }
359    
360                log4j.debug("Schemas read.");
361    
362                preloadedSchemaUrls = new String[schemaMap.size()];
363                preloadedSchemas = new Schema[schemaMap.size()];
364                int i = 0;
365                for (Iterator iter = schemaMap.entrySet().iterator(); iter.hasNext();) {
366                    Map.Entry entry = (Map.Entry) iter.next();
367                    preloadedSchemaUrls[i] = entry.getKey().toString().intern();
368                    preloadedSchemas[i] = (Schema) entry.getValue();
369                    i++;
370                }
371    
372                log4j.debug("Initialization complete.");
373            } catch (Exception e) {
374                throw new RuntimeException(e);
375            }
376        }
377    
378        protected static String scrub(String s) {
379            return Normalizer.normalize(
380                    CharacterUtil.prudentlyScrubCharacterData(s), Normalizer.NFC);
381        }
382    
383        private static boolean isCheckerUrl(String url) {
384            if ("http://hsivonen.iki.fi/checkers/all/".equals(url)) {
385                return true;
386            } else if ("http://hsivonen.iki.fi/checkers/all-html4/".equals(url)) {
387                return true;
388            }
389            for (int i = 0; i < ALL_CHECKERS.length; i++) {
390                if (ALL_CHECKERS[i].equals(url)) {
391                    return true;
392                }
393            }
394            return false;
395        }
396    
397        /**
398         * @param request
399         * @param response
400         */
401        VerifierServletTransaction(HttpServletRequest request,
402                HttpServletResponse response) {
403            this.request = request;
404            this.response = response;
405            this.iriFactory = IRIFactory.iriImplementation();
406        }
407    
408        protected boolean willValidate() {
409            if (methodIsGet) {
410                return document != null;
411            } else { // POST
412                return true;
413            }
414        }
415    
416        void service() throws ServletException, IOException {
417            this.methodIsGet = "GET".equals(request.getMethod())
418                    || "HEAD".equals(request.getMethod());
419    
420            this.out = response.getOutputStream();
421    
422            request.setCharacterEncoding("utf-8");
423    
424            if (!methodIsGet) {
425                postContentType = request.getContentType();
426                if (postContentType == null) {
427                    response.sendError(HttpServletResponse.SC_BAD_REQUEST,
428                            "Content-Type missing");
429                    return;
430                } else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) {
431                    response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE,
432                    "application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
433                    return;                
434                }
435            }
436    
437            String outFormat = request.getParameter("out");
438            if (outFormat == null) {
439                outputFormat = OutputFormat.HTML;
440            } else {
441                if ("html".equals(outFormat)) {
442                    outputFormat = OutputFormat.HTML;
443                } else if ("xhtml".equals(outFormat)) {
444                    outputFormat = OutputFormat.XHTML;
445                } else if ("text".equals(outFormat)) {
446                    outputFormat = OutputFormat.TEXT;
447                } else if ("xml".equals(outFormat)) {
448                    outputFormat = OutputFormat.XML;
449                } else if ("json".equals(outFormat)) {
450                    outputFormat = OutputFormat.JSON;
451                } else {
452                    response.sendError(HttpServletResponse.SC_BAD_REQUEST,
453                            "Unsupported output format");
454                    return;
455                }
456            }
457    
458            if (!methodIsGet) {
459                document = scrubUrl(request.getHeader("Content-Location"));
460            }
461            if (document == null) {
462                document = scrubUrl(request.getParameter("doc"));
463            }
464    
465            document = ("".equals(document)) ? null : document;
466    
467            String callback = null;
468            if (outputFormat == OutputFormat.JSON) {
469                callback = request.getParameter("callback");
470                if (callback != null) {
471                    Matcher m = JS_IDENTIFIER.matcher(callback);
472                    if (m.matches()) {
473                        if (Arrays.binarySearch(JS_RESERVED_WORDS, callback) >= 0) {
474                            response.sendError(HttpServletResponse.SC_BAD_REQUEST,
475                                    "Callback is a reserved word.");
476                            return;
477                        }
478                    } else {
479                        response.sendError(HttpServletResponse.SC_BAD_REQUEST,
480                                "Callback is not a valid ECMA 262 IdentifierName.");
481                        return;
482                    }
483                }
484            }
485    
486            String methodCheck = request.getHeader("Method-Check");
487            
488            if (willValidate()) {
489                response.setDateHeader("Expires", 0);
490                response.setHeader("Cache-Control", "no-cache");
491            } else if (methodCheck != null) {
492                // XXX revisit if anne changes the access-control stuff to use OPTIONS
493                response.setStatus(HttpServletResponse.SC_NO_CONTENT);
494                response.setHeader("Allow", "POST");
495                return;
496            } else if (outputFormat == OutputFormat.HTML
497                        || outputFormat == OutputFormat.XHTML) {
498                    response.setDateHeader("Last-Modified", lastModified);
499                } else {
500                    response.sendError(HttpServletResponse.SC_BAD_REQUEST,
501                            "No input document");
502                    return;
503                }
504    
505            setup();
506    
507            showSource = (request.getParameter("showsource") != null);
508            
509            try {
510                if (outputFormat == OutputFormat.HTML
511                        || outputFormat == OutputFormat.XHTML) {
512                    if (outputFormat == OutputFormat.HTML) {
513                        response.setContentType("text/html; charset=utf-8");
514                        contentHandler = new HtmlSerializer(out,
515                                HtmlSerializer.DOCTYPE_HTML5, false, "UTF-8");
516                    } else {
517                        response.setContentType("application/xhtml+xml");
518                        Properties props = OutputPropertiesFactory.getDefaultMethodProperties(Method.XML);
519                        Serializer ser = SerializerFactory.getSerializer(props);
520                        ser.setOutputStream(out);
521                        contentHandler = ser.asContentHandler();
522                    }
523                    emitter = new XhtmlSaxEmitter(contentHandler);
524                    errorHandler = new MessageEmitterAdapter(sourceCode, showSource,
525                            new XhtmlMessageEmitter(contentHandler));
526                    PageEmitter.emit(contentHandler, this);
527                } else {
528                    if (outputFormat == OutputFormat.TEXT) {
529                        response.setContentType("text/plain; charset=utf-8");
530                        errorHandler = new MessageEmitterAdapter(sourceCode, showSource,
531                                new TextMessageEmitter(out));
532                    } else if (outputFormat == OutputFormat.XML) {
533                        response.setContentType("application/xml");
534                        Properties props = OutputPropertiesFactory.getDefaultMethodProperties(Method.XML);
535                        Serializer ser = SerializerFactory.getSerializer(props);
536                        ser.setOutputStream(out);
537                        errorHandler = new MessageEmitterAdapter(sourceCode, showSource,
538                                new XmlMessageEmitter(ser.asContentHandler()));
539                    } else if (outputFormat == OutputFormat.JSON) {
540                        if (callback == null) {
541                            response.setContentType("application/json");
542                        } else {
543                            response.setContentType("application/javascript");
544                         }
545                        errorHandler = new MessageEmitterAdapter(sourceCode, showSource,
546                                new JsonMessageEmitter(
547                                        new nu.validator.json.Serializer(out),
548                                        callback));
549                    } else {
550                        throw new RuntimeException("Unreachable.");
551                    }
552                    validate();
553                }
554            } catch (SAXException e) {
555                throw new ServletException(e);
556            }
557        }
558    
559        /**
560         * @throws ServletException
561         */
562        protected void setup() throws ServletException {
563            String preset = request.getParameter("preset");
564    
565            if (preset != null && !"".equals(preset)) {
566                schemaUrls = preset;
567            } else {
568                schemaUrls = request.getParameter("schema");
569            }
570            if (schemaUrls == null) {
571                schemaUrls = "";
572            }
573    
574            String parserStr = request.getParameter("parser");
575    
576            if ("html".equals(parserStr)) {
577                parser = ParserMode.HTML_AUTO;
578            } else if ("xmldtd".equals(parserStr)) {
579                parser = ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION;
580            } else if ("xml".equals(parserStr)) {
581                parser = ParserMode.XML_NO_EXTERNAL_ENTITIES;
582            } else if ("html5".equals(parserStr)) {
583                parser = ParserMode.HTML;
584            } else if ("html4".equals(parserStr)) {
585                parser = ParserMode.HTML401_STRICT;
586            } else if ("html4tr".equals(parserStr)) {
587                parser = ParserMode.HTML401_TRANSITIONAL;
588            } // else auto
589    
590            laxType = (request.getParameter("laxtype") != null);
591        }
592    
593        private boolean isHtmlUnsafePreset() {
594            if ("".equals(schemaUrls)) {
595                return false;
596            }
597            boolean preset = false;
598            for (int i = 0; i < presetUrls.length; i++) {
599                if (presetUrls[i].equals(schemaUrls)) {
600                    preset = true;
601                    break;
602                }
603            }
604            if (!preset) {
605                return false;
606            }
607            return !(schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-basic.rng")
608                    || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-strict.rng")
609                    || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-strict-wcag.rng")
610                    || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-transitional.rng")
611                    || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-transitional-wcag.rng") || schemaUrls.startsWith("http://syntax.whattf.org/relaxng/html5full.rnc"));
612    
613        }
614    
615        /**
616         * @throws SAXException
617         */
618        @SuppressWarnings("deprecation")
619        void validate() throws SAXException {
620            if (!willValidate()) {
621                return;
622            }
623            try {
624                out.flush();
625            } catch (IOException e1) {
626                throw new SAXException(e1);
627            }
628            httpRes = new PrudentHttpEntityResolver(2048 * 1024, laxType,
629                    errorHandler);
630            contentTypeParser = new ContentTypeParser(errorHandler, laxType);
631            entityResolver = new LocalCacheEntityResolver(pathMap, httpRes);
632            setAllowRnc(true);
633            boolean stats = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML);
634            try {
635                this.errorHandler.start(document);
636                PropertyMapBuilder pmb = new PropertyMapBuilder();
637                pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler);
638                pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver);
639                pmb.put(ValidateProperty.XML_READER_CREATOR,
640                        new VerifierServletXMLReaderCreator(errorHandler,
641                                entityResolver));
642                RngProperty.CHECK_ID_IDREF.add(pmb);
643                jingPropertyMap = pmb.toPropertyMap();
644    
645                tryToSetupValidator();
646    
647                setAllowRnc(false);
648    
649                loadDocAndSetupParser();
650    
651                reader.setErrorHandler(errorHandler);
652                // XXX set xml:id filter separately
653                contentType = documentInput.getType();
654                sourceCode.initialize(documentInput);
655                if (validator == null) {
656                    checkNormalization = true;
657                }
658                if (checkNormalization) {
659                    reader.setFeature(
660                            "http://xml.org/sax/features/unicode-normalization-checking",
661                            true);
662                }
663                WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper(
664                        reader);
665                ContentHandler recorder = sourceCode.getLocationRecorder();
666                wiretap.setWiretapContentHander(recorder);
667                wiretap.setWiretapLexicalHandler((LexicalHandler) recorder);
668                reader = wiretap;
669                if (htmlParser != null) {
670                    htmlParser.addCharacterHandler(sourceCode);
671                    htmlParser.setMappingLangToXmlLang(true);
672                    htmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
673                    htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler);
674                } else if (xmlParser != null) {
675                    xmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
676                } else {
677                    throw new RuntimeException("Bug. Unreachable.");
678                }
679                reader.parse(documentInput);
680            } catch (SAXException e) {
681                log4j.debug("SAXException", e);
682            } catch (IOException e) {
683                stats = false;
684                log4j.info("IOException", e);
685                errorHandler.ioError(e);
686            } catch (IncorrectSchemaException e) {
687                log4j.debug("IncorrectSchemaException", e);
688                errorHandler.schemaError(e);
689            } catch (RuntimeException e) {
690                stats = false;
691                log4j.error("RuntimeException, doc: " + document + " schema: "
692                        + schemaUrls + " lax: " + laxType, e);
693                errorHandler.internalError(
694                        e,
695                        "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
696            } catch (Error e) {
697                stats = false;
698                log4j.error("Error, doc: " + document + " schema: " + schemaUrls
699                        + " lax: " + laxType, e);
700                errorHandler.internalError(
701                        e,
702                        "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
703            } finally {
704                errorHandler.end(successMessage(), failureMessage());
705            }
706            if (stats) {
707                StatsEmitter.emit(contentHandler, this);
708            }
709        }
710    
711        /**
712         * @return
713         * @throws SAXException
714         */
715        protected String successMessage() throws SAXException {
716            return "The document validates according to the specified schema(s).";
717        }
718    
719        protected String failureMessage() throws SAXException {
720            return "There were errors.";
721        }
722    
723        /**
724         * @throws SAXException
725         * @throws IOException
726         * @throws IncorrectSchemaException
727         */
728        protected void tryToSetupValidator() throws SAXException, IOException,
729                IncorrectSchemaException {
730            validator = validatorByUrls(schemaUrls);
731        }
732    
733        /**
734         * @throws SAXException
735         * @throws IOException
736         * @throws IncorrectSchemaException
737         * @throws SAXNotRecognizedException
738         * @throws SAXNotSupportedException
739         */
740        protected void loadDocAndSetupParser() throws SAXException, IOException,
741                IncorrectSchemaException, SAXNotRecognizedException,
742                SAXNotSupportedException {
743            switch (parser) {
744                case HTML_AUTO:
745                case HTML:
746                case HTML401_STRICT:
747                case HTML401_TRANSITIONAL:
748                    if (isHtmlUnsafePreset()) {
749                        String message = "The chosen preset schema is not appropriate for HTML.";
750                        SAXException se = new SAXException(message);
751                        errorHandler.schemaError(se);
752                        throw se;
753                    }
754                    setAllowGenericXml(false);
755                    setAllowHtml(true);
756                    setAcceptAllKnownXmlTypes(false);
757                    setAllowXhtml(false);
758                    loadDocumentInput();
759                    newHtmlParser();
760                    DoctypeExpectation doctypeExpectation;
761                    int schemaId;
762                    switch (parser) {
763                        case HTML:
764                            doctypeExpectation = DoctypeExpectation.HTML;
765                            schemaId = HTML5_SCHEMA;
766                            break;
767                        case HTML401_STRICT:
768                            doctypeExpectation = DoctypeExpectation.HTML401_STRICT;
769                            schemaId = XHTML1STRICT_SCHEMA;
770                            break;
771                        case HTML401_TRANSITIONAL:
772                            doctypeExpectation = DoctypeExpectation.HTML401_TRANSITIONAL;
773                            schemaId = XHTML1TRANSITIONAL_SCHEMA;
774                            break;
775                        default:
776                            doctypeExpectation = DoctypeExpectation.AUTO;
777                            schemaId = 0;
778                            break;
779                    }
780                    htmlParser.setDoctypeExpectation(doctypeExpectation);
781                    htmlParser.setDocumentModeHandler(this);
782                    reader = htmlParser;
783                    if (validator == null) {
784                        validator = validatorByDoctype(schemaId);
785                    }
786                    if (validator != null) {
787                        reader.setContentHandler(validator.getContentHandler());
788                    }
789                    break;
790                case XML_NO_EXTERNAL_ENTITIES:
791                case XML_EXTERNAL_ENTITIES_NO_VALIDATION:
792                    setAllowGenericXml(true);
793                    setAllowHtml(false);
794                    setAcceptAllKnownXmlTypes(true);
795                    setAllowXhtml(true);
796                    loadDocumentInput();
797                    setupXmlParser();
798                    break;
799                default:
800                    setAllowGenericXml(true);
801                    setAllowHtml(true);
802                    setAcceptAllKnownXmlTypes(true);
803                    setAllowXhtml(true);
804                    loadDocumentInput();
805                    if ("text/html".equals(documentInput.getType())) {
806                        if (isHtmlUnsafePreset()) {
807                            String message = "The Content-Type was \u201Ctext/html\u201D, but the chosen preset schema is not appropriate for HTML.";
808                            SAXException se = new SAXException(message);
809                            errorHandler.schemaError(se);
810                            throw se;
811                        }
812                        errorHandler.info("The Content-Type was \u201Ctext/html\u201D. Using the HTML parser.");
813                        newHtmlParser();
814                        htmlParser.setDoctypeExpectation(DoctypeExpectation.AUTO);
815                        htmlParser.setDocumentModeHandler(this);
816                        reader = htmlParser;
817                        if (validator != null) {
818                            reader.setContentHandler(validator.getContentHandler());
819                        }
820                    } else {
821                        errorHandler.info("The Content-Type was \u201C"
822                                + documentInput.getType()
823                                + "\u201D. Using the XML parser (not resolving external entities).");
824                        setupXmlParser();
825                    }
826                    break;
827            }
828        }
829    
830        /**
831         * 
832         */
833        protected void newHtmlParser() {
834            htmlParser = new HtmlParser();
835            htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
836            htmlParser.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
837            htmlParser.setMappingLangToXmlLang(true);
838            htmlParser.setHtml4ModeCompatibleWithXhtml1Schemata(true);
839        }
840    
841        protected Validator validatorByDoctype(int schemaId) throws SAXException,
842                IOException, IncorrectSchemaException {
843            if (schemaId == 0) {
844                return null;
845            }
846            for (int i = 0; i < presetDoctypes.length; i++) {
847                if (presetDoctypes[i] == schemaId) {
848                    return validatorByUrls(presetUrls[i]);
849                }
850            }
851            throw new RuntimeException("Doctype mappings not initialized properly.");
852        }
853    
854        /**
855         * @param entityResolver2
856         * @return
857         * @throws SAXNotRecognizedException
858         * @throws SAXNotSupportedException
859         */
860        protected void setupXmlParser() throws SAXNotRecognizedException,
861                SAXNotSupportedException {
862            xmlParser = new SAXDriver();
863            xmlParser.setCharacterHandler(sourceCode);
864            reader = new IdFilter(xmlParser);
865            reader.setFeature(
866                    "http://xml.org/sax/features/string-interning",
867                    true);
868            reader.setFeature(
869                    "http://xml.org/sax/features/external-general-entities",
870                    parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION);
871            reader.setFeature(
872                    "http://xml.org/sax/features/external-parameter-entities",
873                    parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION);
874            if (parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION) {
875                reader.setEntityResolver(entityResolver);
876            } else {
877                reader.setEntityResolver(new NullEntityResolver());
878            }
879            if (validator == null) {
880                bufferingRootNamespaceSniffer = new BufferingRootNamespaceSniffer(
881                        this);
882                reader.setContentHandler(bufferingRootNamespaceSniffer);
883            } else {
884                reader.setContentHandler(new RootNamespaceSniffer(this,
885                        validator.getContentHandler()));
886                reader.setDTDHandler(validator.getDTDHandler());
887            }
888        }
889    
890        /**
891         * @param validator
892         * @return
893         * @throws SAXException
894         * @throws IOException
895         * @throws IncorrectSchemaException
896         */
897        private Validator validatorByUrls(String schemaList) throws SAXException,
898                IOException, IncorrectSchemaException {
899            Validator validator = null;
900            String[] schemas = SPACE.split(schemaList);
901            for (int i = schemas.length - 1; i > -1; i--) {
902                String url = schemas[i];
903                if ("http://hsivonen.iki.fi/checkers/all/".equals(url)) {
904                    for (int j = 0; j < ALL_CHECKERS.length; j++) {
905                        validator = combineValidatorByUrl(validator,
906                                ALL_CHECKERS[j]);
907                    }
908                } else if ("http://hsivonen.iki.fi/checkers/all-html4/".equals(url)) {
909                    for (int j = 0; j < ALL_CHECKERS_HTML4.length; j++) {
910                        validator = combineValidatorByUrl(validator,
911                                ALL_CHECKERS_HTML4[j]);
912                    }
913                } else {
914                    validator = combineValidatorByUrl(validator, url);
915                }
916            }
917            return validator;
918        }
919    
920        /**
921         * @param validator
922         * @param url
923         * @return
924         * @throws SAXException
925         * @throws IOException
926         * @throws IncorrectSchemaException
927         */
928        private Validator combineValidatorByUrl(Validator validator, String url)
929                throws SAXException, IOException, IncorrectSchemaException {
930            if (!"".equals(url)) {
931                Validator v = validatorByUrl(url);
932                if (validator == null) {
933                    validator = v;
934                } else {
935                    validator = new CombineValidator(v, validator);
936                }
937            }
938            return validator;
939        }
940    
941        /**
942         * @param url
943         * @return
944         * @throws SAXException
945         * @throws IOException
946         * @throws IncorrectSchemaException
947         */
948        private Validator validatorByUrl(String url) throws SAXException,
949                IOException, IncorrectSchemaException {
950            if (loadedValidatorUrls.contains(url)) {
951                return null;
952            }
953            loadedValidatorUrls.add(url);
954            if ("http://hsivonen.iki.fi/checkers/table/".equals(url)) {
955                return new CheckerValidator(new TableChecker(), jingPropertyMap);
956            } else if ("http://hsivonen.iki.fi/checkers/nfc/".equals(url)) {
957                this.checkNormalization = true;
958                return new CheckerValidator(new NormalizationChecker(),
959                        jingPropertyMap);
960            } else if ("http://hsivonen.iki.fi/checkers/significant-inline/".equals(url)) {
961                return new CheckerValidator(new SignificantInlineChecker(),
962                        jingPropertyMap);
963            } else if ("http://hsivonen.iki.fi/checkers/debug/".equals(url)) {
964                return new CheckerValidator(new DebugChecker(), jingPropertyMap);
965            } else if ("http://hsivonen.iki.fi/checkers/text-content/".equals(url)) {
966                return new CheckerValidator(new TextContentChecker(),
967                        jingPropertyMap);
968            } else if ("http://n.validator.nu/checkers/usemap/".equals(url)) {
969                return new CheckerValidator(new UsemapChecker(), jingPropertyMap);
970            }
971            Schema sch = schemaByUrl(url);
972            Validator validator = sch.createValidator(jingPropertyMap);
973            return validator;
974        }
975    
976        /**
977         * @param url
978         * @return
979         * @throws SAXException
980         * @throws IOException
981         * @throws IncorrectSchemaException
982         */
983        private Schema schemaByUrl(String url) throws SAXException, IOException,
984                IncorrectSchemaException {
985            int i = Arrays.binarySearch(preloadedSchemaUrls, url);
986            if (i > -1) {
987                return preloadedSchemas[i];
988            }
989    
990            TypedInputSource schemaInput = (TypedInputSource) entityResolver.resolveEntity(
991                    null, url);
992            SchemaReader sr = null;
993            if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) {
994                sr = CompactSchemaReader.getInstance();
995            } else {
996                sr = new AutoSchemaReader();
997            }
998            Schema sch = sr.createSchema(schemaInput, jingPropertyMap);
999            return sch;
1000        }
1001    
1002        /**
1003         * @param url
1004         * @return
1005         * @throws SAXException
1006         * @throws IOException
1007         * @throws IncorrectSchemaException
1008         */
1009        private static Schema schemaByUrl(String url, EntityResolver resolver,
1010                PropertyMap pMap) throws SAXException, IOException,
1011                IncorrectSchemaException {
1012            log4j.debug("Will load schema: " + url);
1013            TypedInputSource schemaInput = (TypedInputSource) resolver.resolveEntity(
1014                    null, url);
1015            SchemaReader sr = null;
1016            if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) {
1017                sr = CompactSchemaReader.getInstance();
1018            } else {
1019                sr = new AutoSchemaReader();
1020            }
1021            Schema sch = sr.createSchema(schemaInput, pMap);
1022            return sch;
1023        }
1024    
1025        /**
1026         * @throws SAXException
1027         */
1028        void emitTitle(boolean markupAllowed) throws SAXException {
1029            if (willValidate()) {
1030                emitter.characters(RESULTS_TITLE);
1031                if (document != null) {
1032                    emitter.characters(FOR);
1033                    emitter.characters(scrub(document));
1034                }
1035            } else {
1036                emitter.characters(SERVICE_TITLE);
1037                if (markupAllowed) {
1038                    emitter.startElement("span");
1039                    emitter.characters(TWO_POINT_OH_BETA);
1040                    emitter.endElement("span");
1041                }
1042            }
1043        }
1044    
1045        void emitForm() throws SAXException {
1046            attrs.clear();
1047            attrs.addAttribute("method", "get");
1048    //        attrs.addAttribute("method", "post");
1049    //        attrs.addAttribute("enctype", "multipart/form-data");
1050            attrs.addAttribute("action", request.getRequestURL().toString());
1051            attrs.addAttribute("onsubmit", "formSubmission()");
1052            emitter.startElement("form", attrs);
1053            emitFormContent();
1054            emitter.endElement("form");
1055        }
1056    
1057        /**
1058         * @throws SAXException
1059         */
1060        protected void emitFormContent() throws SAXException {
1061            FormEmitter.emit(contentHandler, this);
1062        }
1063    
1064        void emitSchemaField() throws SAXException {
1065            attrs.clear();
1066            attrs.addAttribute("name", "schema");
1067            attrs.addAttribute("id", "schema");
1068            attrs.addAttribute("onchange", "schemaChanged();");
1069            attrs.addAttribute("pattern", "(?:https?://.+(?:\\s+https?://.+)*)?");
1070            attrs.addAttribute(
1071                    "title",
1072                    "The schema field takes zero or more space-separated absolute IRIs (http or https only) of the schemas that the document is to be validated against. (When left blank, the service will attempt to pick schemas automatically.)");
1073            if (schemaUrls != null) {
1074                attrs.addAttribute("value", scrub(schemaUrls));
1075            }
1076            emitter.startElement("input", attrs);
1077            emitter.endElement("input");
1078        }
1079    
1080        void emitDocField() throws SAXException {
1081            attrs.clear();
1082            attrs.addAttribute("type", "url");
1083            attrs.addAttribute("name", "doc");
1084            attrs.addAttribute("id", "doc");
1085            attrs.addAttribute("pattern", "(?:https?://.+)?");
1086            attrs.addAttribute(
1087                    "title",
1088                    // XXX drop last sentence for html5 facet
1089                    "The document field takes the absolute IRI (http or https only) of the document to be checked. (The document field can also be left blank in order to bookmark settings.)");
1090            if (document != null) {
1091                attrs.addAttribute("value", scrub(document));
1092            }
1093            emitter.startElement("input", attrs);
1094            emitter.endElement("input");
1095        }
1096    
1097        private String scrubUrl(String urlStr) {
1098            if (urlStr == null) {
1099                return null;
1100            }
1101    
1102            try {
1103                IRI iri = iriFactory.construct(urlStr);
1104                return iri.toASCIIString();
1105            } catch (IRIException e) {
1106                return null;
1107            } catch (MalformedURLException e) {
1108                return null;
1109            }
1110        }
1111    
1112        /**
1113         * @throws SAXException
1114         * 
1115         */
1116        void emitSchemaDuration() throws SAXException {
1117        }
1118    
1119        /**
1120         * @throws SAXException
1121         * 
1122         */
1123        void emitDocDuration() throws SAXException {
1124        }
1125    
1126        /**
1127         * @throws SAXException
1128         * 
1129         */
1130        void emitTotalDuration() throws SAXException {
1131            emitter.characters("" + (System.currentTimeMillis() - start));
1132        }
1133    
1134        /**
1135         * @throws SAXException
1136         * 
1137         */
1138        void emitPresetOptions() throws SAXException {
1139            for (int i = 0; i < presetUrls.length; i++) {
1140                emitter.option(presetLabels[i], presetUrls[i], false);
1141            }
1142        }
1143    
1144        /**
1145         * @throws SAXException
1146         * 
1147         */
1148        void emitParserOptions() throws SAXException {
1149            emitter.option("Automatically from Content-Type", "",
1150                    (parser == ParserMode.AUTO));
1151            emitter.option("XML; don\u2019t load external entities", "xml",
1152                    (parser == ParserMode.XML_NO_EXTERNAL_ENTITIES));
1153            emitter.option("XML; load external entities", "xmldtd",
1154                    (parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION));
1155            emitter.option("HTML; flavor from doctype", "html",
1156                    (parser == ParserMode.HTML_AUTO));
1157            emitter.option("HTML5", "html5", (parser == ParserMode.HTML));
1158            emitter.option("HTML 4.01 Strict", "html4",
1159                    (parser == ParserMode.HTML401_STRICT));
1160            emitter.option("HTML 4.01 Transitional", "html4tr",
1161                    (parser == ParserMode.HTML401_TRANSITIONAL));
1162        }
1163    
1164        /**
1165         * @throws SAXException
1166         * 
1167         */
1168        void emitLaxTypeField() throws SAXException {
1169            emitter.checkbox("laxtype", "yes", laxType);
1170        }
1171    
1172        /**
1173         * @throws SAXException
1174         * 
1175         */
1176        void emitShowSourceField() throws SAXException {
1177            emitter.checkbox("showsource", "yes", showSource);
1178        }
1179        
1180        void rootNamespace(String namespace, Locator locator) throws SAXException {
1181            if (validator == null) {
1182                int index = -1;
1183                for (int i = 0; i < presetNamespaces.length; i++) {
1184                    if (namespace.equals(presetNamespaces[i])) {
1185                        index = i;
1186                        break;
1187                    }
1188                }
1189                if (index == -1) {
1190                    String message = "Cannot find preset schema for namespace: \u201C"
1191                            + namespace + "\u201D.";
1192                    SAXException se = new SAXException(message);
1193                    errorHandler.schemaError(se);
1194                    throw se;
1195                }
1196                String label = presetLabels[index];
1197                String urls = presetUrls[index];
1198                errorHandler.info("Using the preset for " + label
1199                        + " based on the root namespace.");
1200                try {
1201                    validator = validatorByUrls(urls);
1202                } catch (IOException ioe) {
1203                    // At this point the schema comes from memory.
1204                    throw new RuntimeException(ioe);
1205                } catch (IncorrectSchemaException e) {
1206                    // At this point the schema comes from memory.
1207                    throw new RuntimeException(e);
1208                }
1209                if (bufferingRootNamespaceSniffer == null) {
1210                    throw new RuntimeException(
1211                            "Bug! bufferingRootNamespaceSniffer was null.");
1212                }
1213                bufferingRootNamespaceSniffer.setContentHandler(validator.getContentHandler());
1214            }
1215    
1216            if (!rootNamespaceSeen) {
1217                rootNamespaceSeen = true;
1218                if (contentType != null) {
1219                    int i;
1220                    if ((i = Arrays.binarySearch(KNOWN_CONTENT_TYPES, contentType)) > -1) {
1221                        if (!NAMESPACES_FOR_KNOWN_CONTENT_TYPES[i].equals(namespace)) {
1222                            String message = "\u201C"
1223                                    + contentType
1224                                    + "\u201D is not an appropriate Content-Type for a document whose root namespace is \u201C"
1225                                    + namespace + "\u201D.";
1226                            SAXParseException spe = new SAXParseException(message,
1227                                    locator);
1228                            errorHandler.warning(spe);
1229                        }
1230                    }
1231                }
1232            }
1233        }
1234    
1235        public void documentMode(DocumentMode mode, String publicIdentifier,
1236                String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
1237                throws SAXException {
1238            if (validator == null) {
1239                try {
1240                    if ("-//W3C//DTD XHTML 1.0 Transitional//EN".equals(publicIdentifier)) {
1241                        errorHandler.info("XHTML 1.0 Transitional doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for XHTML 1.0 Transitional."
1242                                + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled."
1243                                        : ""));
1244                        validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
1245                    } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) {
1246                        errorHandler.info("XHTML 1.0 Strict doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for XHTML 1.0 Strict."
1247                                + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled."
1248                                        : ""));
1249                        validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
1250                    } else if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
1251                        errorHandler.info("HTML 4.01 Transitional doctype seen. Using the schema for XHTML 1.0 Transitional."
1252                                + (html4SpecificAdditionalErrorChecks ? ""
1253                                        : " HTML4-specific tokenization errors are not enabled."));
1254                        validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
1255                    } else if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
1256                        errorHandler.info("HTML 4.01 Strict doctype seen. Using the schema for XHTML 1.0 Strict."
1257                                + (html4SpecificAdditionalErrorChecks ? ""
1258                                        : " HTML4-specific tokenization errors are not enabled."));
1259                        validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
1260                    } else if ("-//W3C//DTD HTML 4.0 Transitional//EN".equals(publicIdentifier)) {
1261                        errorHandler.info("Legacy HTML 4.0 Transitional doctype seen.  Please consider using HTML 4.01 Transitional instead. Proceeding anyway for your convenience with the schema for XHTML 1.0 Transitional."
1262                                + (html4SpecificAdditionalErrorChecks ? ""
1263                                        : " HTML4-specific tokenization errors are not enabled."));
1264                        validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
1265                    } else if ("-//W3C//DTD HTML 4.0//EN".equals(publicIdentifier)) {
1266                        errorHandler.info("Legacy HTML 4.0 Strict doctype seen. Please consider using HTML 4.01 instead. Proceeding anyway for your convenience with the schema for XHTML 1.0 Strict."
1267                                + (html4SpecificAdditionalErrorChecks ? ""
1268                                        : " HTML4-specific tokenization errors are not enabled."));
1269                        validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
1270                    } else {
1271                        errorHandler.info("Using the schema for HTML5."
1272                                + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled."
1273                                        : ""));
1274                        validator = validatorByDoctype(HTML5_SCHEMA);
1275                    }
1276                } catch (IOException ioe) {
1277                    // At this point the schema comes from memory.
1278                    throw new RuntimeException(ioe);
1279                } catch (IncorrectSchemaException e) {
1280                    // At this point the schema comes from memory.
1281                    throw new RuntimeException(e);
1282                }
1283                ContentHandler ch = validator.getContentHandler();
1284                ch.setDocumentLocator(htmlParser.getDocumentLocator());
1285                ch.startDocument();
1286                reader.setContentHandler(ch);
1287            } else {
1288                if (html4SpecificAdditionalErrorChecks) {
1289                    errorHandler.info("HTML4-specific tokenization errors are enabled.");
1290                }
1291            }
1292        }
1293    
1294        /**
1295         * @param acceptAllKnownXmlTypes
1296         * @see nu.validator.xml.ContentTypeParser#setAcceptAllKnownXmlTypes(boolean)
1297         */
1298        protected void setAcceptAllKnownXmlTypes(boolean acceptAllKnownXmlTypes) {
1299            contentTypeParser.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes);
1300            httpRes.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes);
1301        }
1302    
1303        /**
1304         * @param allowGenericXml
1305         * @see nu.validator.xml.ContentTypeParser#setAllowGenericXml(boolean)
1306         */
1307        protected void setAllowGenericXml(boolean allowGenericXml) {
1308            contentTypeParser.setAllowGenericXml(allowGenericXml);
1309            httpRes.setAllowGenericXml(allowGenericXml);
1310        }
1311    
1312        /**
1313         * @param allowHtml
1314         * @see nu.validator.xml.ContentTypeParser#setAllowHtml(boolean)
1315         */
1316        protected void setAllowHtml(boolean allowHtml) {
1317            contentTypeParser.setAllowHtml(allowHtml);
1318            httpRes.setAllowHtml(allowHtml);
1319        }
1320    
1321        /**
1322         * @param allowRnc
1323         * @see nu.validator.xml.ContentTypeParser#setAllowRnc(boolean)
1324         */
1325        protected void setAllowRnc(boolean allowRnc) {
1326            contentTypeParser.setAllowRnc(allowRnc);
1327            httpRes.setAllowRnc(allowRnc);
1328            entityResolver.setAllowRnc(allowRnc);
1329        }
1330    
1331        /**
1332         * @param allowXhtml
1333         * @see nu.validator.xml.ContentTypeParser#setAllowXhtml(boolean)
1334         */
1335        protected void setAllowXhtml(boolean allowXhtml) {
1336            contentTypeParser.setAllowXhtml(allowXhtml);
1337            httpRes.setAllowXhtml(allowXhtml);
1338        }
1339    
1340        /**
1341         * @throws SAXException
1342         * @throws IOException
1343         */
1344        protected void loadDocumentInput() throws SAXException, IOException {
1345            if (methodIsGet) {
1346                documentInput = (TypedInputSource) entityResolver.resolveEntity(
1347                        null, document);
1348            } else { // POST
1349                documentInput = contentTypeParser.buildTypedInputSource(document,
1350                        null, postContentType);
1351                documentInput.setByteStream(request.getInputStream());
1352            }
1353        }
1354    }