001 /*
002 * Copyright (c) 2005, 2006 Henri Sivonen
003 * Copyright (c) 2007 Mozilla Foundation
004 *
005 * Permission is hereby granted, free of charge, to any person obtaining a
006 * copy of this software and associated documentation files (the "Software"),
007 * to deal in the Software without restriction, including without limitation
008 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
009 * and/or sell copies of the Software, and to permit persons to whom the
010 * Software is furnished to do so, subject to the following conditions:
011 *
012 * The above copyright notice and this permission notice shall be included in
013 * all copies or substantial portions of the Software.
014 *
015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
021 * DEALINGS IN THE SOFTWARE.
022 */
023
024 package nu.validator.servlet;
025
026 import java.io.BufferedReader;
027 import java.io.File;
028 import java.io.FileInputStream;
029 import java.io.IOException;
030 import java.io.InputStreamReader;
031 import java.io.OutputStream;
032 import java.net.MalformedURLException;
033 import java.util.Arrays;
034 import java.util.HashMap;
035 import java.util.HashSet;
036 import java.util.Iterator;
037 import java.util.LinkedList;
038 import java.util.List;
039 import java.util.Map;
040 import java.util.Properties;
041 import java.util.Set;
042 import java.util.SortedMap;
043 import java.util.TreeMap;
044 import java.util.regex.Matcher;
045 import java.util.regex.Pattern;
046
047 import javax.servlet.ServletException;
048 import javax.servlet.http.HttpServletRequest;
049 import javax.servlet.http.HttpServletResponse;
050
051 import nu.validator.gnu.xml.aelfred2.SAXDriver;
052 import nu.validator.htmlparser.common.DoctypeExpectation;
053 import nu.validator.htmlparser.common.DocumentMode;
054 import nu.validator.htmlparser.common.DocumentModeHandler;
055 import nu.validator.htmlparser.common.XmlViolationPolicy;
056 import nu.validator.htmlparser.sax.HtmlParser;
057 import nu.validator.messages.JsonMessageEmitter;
058 import nu.validator.messages.MessageEmitterAdapter;
059 import nu.validator.messages.TextMessageEmitter;
060 import nu.validator.messages.XhtmlMessageEmitter;
061 import nu.validator.messages.XmlMessageEmitter;
062 import nu.validator.source.SourceCode;
063 import nu.validator.xml.AttributesImpl;
064 import nu.validator.xml.CharacterUtil;
065 import nu.validator.xml.ContentTypeParser;
066 import nu.validator.xml.HtmlSerializer;
067 import nu.validator.xml.IdFilter;
068 import nu.validator.xml.LocalCacheEntityResolver;
069 import nu.validator.xml.NullEntityResolver;
070 import nu.validator.xml.PrudentHttpEntityResolver;
071 import nu.validator.xml.SystemErrErrorHandler;
072 import nu.validator.xml.TypedInputSource;
073 import nu.validator.xml.WiretapXMLReaderWrapper;
074 import nu.validator.xml.XhtmlSaxEmitter;
075
076 import org.apache.log4j.Logger;
077 import org.apache.xml.serializer.Method;
078 import org.apache.xml.serializer.OutputPropertiesFactory;
079 import org.apache.xml.serializer.Serializer;
080 import org.apache.xml.serializer.SerializerFactory;
081 import org.whattf.checker.DebugChecker;
082 import org.whattf.checker.NormalizationChecker;
083 import org.whattf.checker.SignificantInlineChecker;
084 import org.whattf.checker.TextContentChecker;
085 import org.whattf.checker.UsemapChecker;
086 import org.whattf.checker.jing.CheckerValidator;
087 import org.whattf.checker.table.TableChecker;
088 import org.xml.sax.ContentHandler;
089 import org.xml.sax.EntityResolver;
090 import org.xml.sax.ErrorHandler;
091 import org.xml.sax.Locator;
092 import org.xml.sax.SAXException;
093 import org.xml.sax.SAXNotRecognizedException;
094 import org.xml.sax.SAXNotSupportedException;
095 import org.xml.sax.SAXParseException;
096 import org.xml.sax.XMLReader;
097 import org.xml.sax.ext.LexicalHandler;
098
099 import com.hp.hpl.jena.iri.IRI;
100 import com.hp.hpl.jena.iri.IRIException;
101 import com.hp.hpl.jena.iri.IRIFactory;
102 import com.ibm.icu.text.Normalizer;
103 import com.thaiopensource.relaxng.impl.CombineValidator;
104 import com.thaiopensource.util.PropertyMap;
105 import com.thaiopensource.util.PropertyMapBuilder;
106 import com.thaiopensource.validate.IncorrectSchemaException;
107 import com.thaiopensource.validate.Schema;
108 import com.thaiopensource.validate.SchemaReader;
109 import com.thaiopensource.validate.ValidateProperty;
110 import com.thaiopensource.validate.Validator;
111 import com.thaiopensource.validate.auto.AutoSchemaReader;
112 import com.thaiopensource.validate.rng.CompactSchemaReader;
113 import com.thaiopensource.validate.rng.RngProperty;
114
115
116 /**
117 * @version $Id: VerifierServletTransaction.java,v 1.10 2005/07/24 07:32:48
118 * hsivonen Exp $
119 * @author hsivonen
120 */
121 class VerifierServletTransaction implements DocumentModeHandler {
122
123 private enum OutputFormat {
124 HTML, XHTML, TEXT, XML, JSON, RELAXED, SOAP, UNICORN, EMACS
125 }
126
127 private static final Logger log4j = Logger.getLogger(VerifierServletTransaction.class);
128
129 private static final Pattern SPACE = Pattern.compile("\\s+");
130
131 private static final Pattern JS_IDENTIFIER = Pattern.compile("[\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}_\\$][\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}_\\$\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}]*");
132
133 private static final String[] JS_RESERVED_WORDS = { "abstract", "boolean",
134 "break", "byte", "case", "catch", "char", "class", "const",
135 "continue", "debugger", "default", "delete", "do", "double",
136 "else", "enum", "export", "extends", "final", "finally", "float",
137 "for", "function", "goto", "if", "implements", "import", "in",
138 "instanceof", "int", "interface", "long", "native", "new",
139 "package", "private", "protected", "public", "return", "short",
140 "static", "super", "switch", "synchronized", "this", "throw",
141 "throws", "transient", "try", "typeof", "var", "void", "volatile",
142 "while", "with" };
143
144 protected static final int HTML5_SCHEMA = 3;
145
146 protected static final int XHTML1STRICT_SCHEMA = 2;
147
148 protected static final int XHTML1TRANSITIONAL_SCHEMA = 1;
149
150 protected static final int XHTML5_SCHEMA = 7;
151
152 private static final char[] SERVICE_TITLE = "Validator.nu ".toCharArray();
153
154 private static final char[] TWO_POINT_OH_BETA = "2.1 Gamma".toCharArray();
155
156 private static final char[] RESULTS_TITLE = "Validation results for ".toCharArray();
157
158 private static final char[] FOR = " for ".toCharArray();
159
160 private static final Map pathMap = new HashMap();
161
162 private static int[] presetDoctypes;
163
164 private static String[] presetLabels;
165
166 private static String[] presetUrls;
167
168 private static String[] presetNamespaces;
169
170 private static final String[] KNOWN_CONTENT_TYPES = {
171 "application/atom+xml", "application/docbook+xml",
172 "application/xhtml+xml", "application/xv+xml" };
173
174 private static final String[] NAMESPACES_FOR_KNOWN_CONTENT_TYPES = {
175 "http://www.w3.org/2005/Atom", "http://docbook.org/ns/docbook",
176 "http://www.w3.org/1999/xhtml", "http://www.w3.org/1999/xhtml" };
177
178 private static final String[] ALL_CHECKERS = {
179 "http://hsivonen.iki.fi/checkers/table/",
180 "http://hsivonen.iki.fi/checkers/nfc/",
181 "http://hsivonen.iki.fi/checkers/significant-inline/",
182 "http://hsivonen.iki.fi/checkers/text-content/",
183 "http://n.validator.nu/checkers/usemap/"};
184
185 private static final String[] ALL_CHECKERS_HTML4 = {
186 "http://hsivonen.iki.fi/checkers/table/",
187 "http://hsivonen.iki.fi/checkers/nfc/" };
188
189 private long start = System.currentTimeMillis();
190
191 private final HttpServletRequest request;
192
193 private final HttpServletResponse response;
194
195 private IRIFactory iriFactory;
196
197 protected String document = null;
198
199 private ParserMode parser = ParserMode.AUTO;
200
201 private boolean laxType = false;
202
203 protected ContentHandler contentHandler;
204
205 protected XhtmlSaxEmitter emitter;
206
207 protected MessageEmitterAdapter errorHandler;
208
209 private AttributesImpl attrs = new AttributesImpl();
210
211 private OutputStream out;
212
213 private PropertyMap jingPropertyMap;
214
215 protected LocalCacheEntityResolver entityResolver;
216
217 private static long lastModified;
218
219 private static String[] preloadedSchemaUrls;
220
221 private static Schema[] preloadedSchemas;
222
223 private String schemaUrls = null;
224
225 protected Validator validator = null;
226
227 private BufferingRootNamespaceSniffer bufferingRootNamespaceSniffer = null;
228
229 private String contentType = null;
230
231 protected HtmlParser htmlParser = null;
232
233 protected SAXDriver xmlParser = null;
234
235 protected XMLReader reader;
236
237 protected TypedInputSource documentInput;
238
239 protected PrudentHttpEntityResolver httpRes;
240
241 protected ContentTypeParser contentTypeParser;
242
243 private Set<String> loadedValidatorUrls = new HashSet<String>();
244
245 private boolean checkNormalization = false;
246
247 private boolean rootNamespaceSeen = false;
248
249 private OutputFormat outputFormat;
250
251 private String postContentType;
252
253 private boolean methodIsGet;
254
255 private SourceCode sourceCode = new SourceCode();
256
257 private boolean showSource;
258
259 static {
260 try {
261 log4j.debug("Starting static initializer.");
262
263 String presetPath = System.getProperty("nu.validator.servlet.presetconfpath");
264 File presetFile = new File(presetPath);
265 lastModified = presetFile.lastModified();
266 BufferedReader r = new BufferedReader(new InputStreamReader(
267 new FileInputStream(presetFile), "UTF-8"));
268 String line;
269 List<String> doctypes = new LinkedList<String>();
270 List<String> namespaces = new LinkedList<String>();
271 List<String> labels = new LinkedList<String>();
272 List<String> urls = new LinkedList<String>();
273
274 log4j.debug("Starting to loop over config file lines.");
275
276 while ((line = r.readLine()) != null) {
277 if ("".equals(line.trim())) {
278 break;
279 }
280 String s[] = line.split("\t");
281 doctypes.add(s[0]);
282 namespaces.add(s[1]);
283 labels.add(s[2]);
284 urls.add(s[3]);
285 }
286
287 log4j.debug("Finished reading config.");
288
289 String[] presetDoctypesAsStrings = doctypes.toArray(new String[0]);
290 presetNamespaces = namespaces.toArray(new String[0]);
291 presetLabels = labels.toArray(new String[0]);
292 presetUrls = urls.toArray(new String[0]);
293
294 log4j.debug("Converted config to arrays.");
295
296 for (int i = 0; i < presetNamespaces.length; i++) {
297 String str = presetNamespaces[i];
298 if ("-".equals(str)) {
299 presetNamespaces[i] = null;
300 } else {
301 presetNamespaces[i] = presetNamespaces[i].intern();
302 }
303 }
304
305 log4j.debug("Prepared namespace array.");
306
307 presetDoctypes = new int[presetDoctypesAsStrings.length];
308 for (int i = 0; i < presetDoctypesAsStrings.length; i++) {
309 presetDoctypes[i] = Integer.parseInt(presetDoctypesAsStrings[i]);
310 }
311
312 log4j.debug("Parsed doctype numbers into ints.");
313
314 String prefix = System.getProperty("nu.validator.servlet.cachepathprefix");
315
316 log4j.debug("The cache path prefix is: " + prefix);
317
318 String cacheConfPath = System.getProperty("nu.validator.servlet.cacheconfpath");
319
320 log4j.debug("The cache config path is: " + cacheConfPath);
321
322 r = new BufferedReader(new InputStreamReader(new FileInputStream(
323 cacheConfPath), "UTF-8"));
324 while ((line = r.readLine()) != null) {
325 if ("".equals(line.trim())) {
326 break;
327 }
328 String s[] = line.split("\t");
329 pathMap.put(s[0], prefix + s[1]);
330 }
331
332 log4j.debug("Cache config read.");
333
334 ErrorHandler eh = new SystemErrErrorHandler();
335 LocalCacheEntityResolver er = new LocalCacheEntityResolver(pathMap,
336 new NullEntityResolver());
337 er.setAllowRnc(true);
338 PropertyMapBuilder pmb = new PropertyMapBuilder();
339 pmb.put(ValidateProperty.ERROR_HANDLER, eh);
340 pmb.put(ValidateProperty.ENTITY_RESOLVER, er);
341 pmb.put(ValidateProperty.XML_READER_CREATOR,
342 new VerifierServletXMLReaderCreator(eh, er));
343 RngProperty.CHECK_ID_IDREF.add(pmb);
344 PropertyMap pMap = pmb.toPropertyMap();
345
346 log4j.debug("Parsing set up. Starting to read schemas.");
347
348 SortedMap<String, Schema> schemaMap = new TreeMap<String, Schema>();
349 for (int i = 0; i < presetUrls.length; i++) {
350 String[] urls1 = SPACE.split(presetUrls[i]);
351 for (int j = 0; j < urls1.length; j++) {
352 String url = urls1[j];
353 if (schemaMap.get(url) == null && !isCheckerUrl(url)) {
354 Schema sch = schemaByUrl(url, er, pMap);
355 schemaMap.put(url, sch);
356 }
357 }
358 }
359
360 log4j.debug("Schemas read.");
361
362 preloadedSchemaUrls = new String[schemaMap.size()];
363 preloadedSchemas = new Schema[schemaMap.size()];
364 int i = 0;
365 for (Iterator iter = schemaMap.entrySet().iterator(); iter.hasNext();) {
366 Map.Entry entry = (Map.Entry) iter.next();
367 preloadedSchemaUrls[i] = entry.getKey().toString().intern();
368 preloadedSchemas[i] = (Schema) entry.getValue();
369 i++;
370 }
371
372 log4j.debug("Initialization complete.");
373 } catch (Exception e) {
374 throw new RuntimeException(e);
375 }
376 }
377
378 protected static String scrub(String s) {
379 return Normalizer.normalize(
380 CharacterUtil.prudentlyScrubCharacterData(s), Normalizer.NFC);
381 }
382
383 private static boolean isCheckerUrl(String url) {
384 if ("http://hsivonen.iki.fi/checkers/all/".equals(url)) {
385 return true;
386 } else if ("http://hsivonen.iki.fi/checkers/all-html4/".equals(url)) {
387 return true;
388 }
389 for (int i = 0; i < ALL_CHECKERS.length; i++) {
390 if (ALL_CHECKERS[i].equals(url)) {
391 return true;
392 }
393 }
394 return false;
395 }
396
397 /**
398 * @param request
399 * @param response
400 */
401 VerifierServletTransaction(HttpServletRequest request,
402 HttpServletResponse response) {
403 this.request = request;
404 this.response = response;
405 this.iriFactory = IRIFactory.iriImplementation();
406 }
407
408 protected boolean willValidate() {
409 if (methodIsGet) {
410 return document != null;
411 } else { // POST
412 return true;
413 }
414 }
415
416 void service() throws ServletException, IOException {
417 this.methodIsGet = "GET".equals(request.getMethod())
418 || "HEAD".equals(request.getMethod());
419
420 this.out = response.getOutputStream();
421
422 request.setCharacterEncoding("utf-8");
423
424 if (!methodIsGet) {
425 postContentType = request.getContentType();
426 if (postContentType == null) {
427 response.sendError(HttpServletResponse.SC_BAD_REQUEST,
428 "Content-Type missing");
429 return;
430 } else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) {
431 response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE,
432 "application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
433 return;
434 }
435 }
436
437 String outFormat = request.getParameter("out");
438 if (outFormat == null) {
439 outputFormat = OutputFormat.HTML;
440 } else {
441 if ("html".equals(outFormat)) {
442 outputFormat = OutputFormat.HTML;
443 } else if ("xhtml".equals(outFormat)) {
444 outputFormat = OutputFormat.XHTML;
445 } else if ("text".equals(outFormat)) {
446 outputFormat = OutputFormat.TEXT;
447 } else if ("xml".equals(outFormat)) {
448 outputFormat = OutputFormat.XML;
449 } else if ("json".equals(outFormat)) {
450 outputFormat = OutputFormat.JSON;
451 } else {
452 response.sendError(HttpServletResponse.SC_BAD_REQUEST,
453 "Unsupported output format");
454 return;
455 }
456 }
457
458 if (!methodIsGet) {
459 document = scrubUrl(request.getHeader("Content-Location"));
460 }
461 if (document == null) {
462 document = scrubUrl(request.getParameter("doc"));
463 }
464
465 document = ("".equals(document)) ? null : document;
466
467 String callback = null;
468 if (outputFormat == OutputFormat.JSON) {
469 callback = request.getParameter("callback");
470 if (callback != null) {
471 Matcher m = JS_IDENTIFIER.matcher(callback);
472 if (m.matches()) {
473 if (Arrays.binarySearch(JS_RESERVED_WORDS, callback) >= 0) {
474 response.sendError(HttpServletResponse.SC_BAD_REQUEST,
475 "Callback is a reserved word.");
476 return;
477 }
478 } else {
479 response.sendError(HttpServletResponse.SC_BAD_REQUEST,
480 "Callback is not a valid ECMA 262 IdentifierName.");
481 return;
482 }
483 }
484 }
485
486 String methodCheck = request.getHeader("Method-Check");
487
488 if (willValidate()) {
489 response.setDateHeader("Expires", 0);
490 response.setHeader("Cache-Control", "no-cache");
491 } else if (methodCheck != null) {
492 // XXX revisit if anne changes the access-control stuff to use OPTIONS
493 response.setStatus(HttpServletResponse.SC_NO_CONTENT);
494 response.setHeader("Allow", "POST");
495 return;
496 } else if (outputFormat == OutputFormat.HTML
497 || outputFormat == OutputFormat.XHTML) {
498 response.setDateHeader("Last-Modified", lastModified);
499 } else {
500 response.sendError(HttpServletResponse.SC_BAD_REQUEST,
501 "No input document");
502 return;
503 }
504
505 setup();
506
507 showSource = (request.getParameter("showsource") != null);
508
509 try {
510 if (outputFormat == OutputFormat.HTML
511 || outputFormat == OutputFormat.XHTML) {
512 if (outputFormat == OutputFormat.HTML) {
513 response.setContentType("text/html; charset=utf-8");
514 contentHandler = new HtmlSerializer(out,
515 HtmlSerializer.DOCTYPE_HTML5, false, "UTF-8");
516 } else {
517 response.setContentType("application/xhtml+xml");
518 Properties props = OutputPropertiesFactory.getDefaultMethodProperties(Method.XML);
519 Serializer ser = SerializerFactory.getSerializer(props);
520 ser.setOutputStream(out);
521 contentHandler = ser.asContentHandler();
522 }
523 emitter = new XhtmlSaxEmitter(contentHandler);
524 errorHandler = new MessageEmitterAdapter(sourceCode, showSource,
525 new XhtmlMessageEmitter(contentHandler));
526 PageEmitter.emit(contentHandler, this);
527 } else {
528 if (outputFormat == OutputFormat.TEXT) {
529 response.setContentType("text/plain; charset=utf-8");
530 errorHandler = new MessageEmitterAdapter(sourceCode, showSource,
531 new TextMessageEmitter(out));
532 } else if (outputFormat == OutputFormat.XML) {
533 response.setContentType("application/xml");
534 Properties props = OutputPropertiesFactory.getDefaultMethodProperties(Method.XML);
535 Serializer ser = SerializerFactory.getSerializer(props);
536 ser.setOutputStream(out);
537 errorHandler = new MessageEmitterAdapter(sourceCode, showSource,
538 new XmlMessageEmitter(ser.asContentHandler()));
539 } else if (outputFormat == OutputFormat.JSON) {
540 if (callback == null) {
541 response.setContentType("application/json");
542 } else {
543 response.setContentType("application/javascript");
544 }
545 errorHandler = new MessageEmitterAdapter(sourceCode, showSource,
546 new JsonMessageEmitter(
547 new nu.validator.json.Serializer(out),
548 callback));
549 } else {
550 throw new RuntimeException("Unreachable.");
551 }
552 validate();
553 }
554 } catch (SAXException e) {
555 throw new ServletException(e);
556 }
557 }
558
559 /**
560 * @throws ServletException
561 */
562 protected void setup() throws ServletException {
563 String preset = request.getParameter("preset");
564
565 if (preset != null && !"".equals(preset)) {
566 schemaUrls = preset;
567 } else {
568 schemaUrls = request.getParameter("schema");
569 }
570 if (schemaUrls == null) {
571 schemaUrls = "";
572 }
573
574 String parserStr = request.getParameter("parser");
575
576 if ("html".equals(parserStr)) {
577 parser = ParserMode.HTML_AUTO;
578 } else if ("xmldtd".equals(parserStr)) {
579 parser = ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION;
580 } else if ("xml".equals(parserStr)) {
581 parser = ParserMode.XML_NO_EXTERNAL_ENTITIES;
582 } else if ("html5".equals(parserStr)) {
583 parser = ParserMode.HTML;
584 } else if ("html4".equals(parserStr)) {
585 parser = ParserMode.HTML401_STRICT;
586 } else if ("html4tr".equals(parserStr)) {
587 parser = ParserMode.HTML401_TRANSITIONAL;
588 } // else auto
589
590 laxType = (request.getParameter("laxtype") != null);
591 }
592
593 private boolean isHtmlUnsafePreset() {
594 if ("".equals(schemaUrls)) {
595 return false;
596 }
597 boolean preset = false;
598 for (int i = 0; i < presetUrls.length; i++) {
599 if (presetUrls[i].equals(schemaUrls)) {
600 preset = true;
601 break;
602 }
603 }
604 if (!preset) {
605 return false;
606 }
607 return !(schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-basic.rng")
608 || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-strict.rng")
609 || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-strict-wcag.rng")
610 || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-transitional.rng")
611 || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-transitional-wcag.rng") || schemaUrls.startsWith("http://syntax.whattf.org/relaxng/html5full.rnc"));
612
613 }
614
615 /**
616 * @throws SAXException
617 */
618 @SuppressWarnings("deprecation")
619 void validate() throws SAXException {
620 if (!willValidate()) {
621 return;
622 }
623 try {
624 out.flush();
625 } catch (IOException e1) {
626 throw new SAXException(e1);
627 }
628 httpRes = new PrudentHttpEntityResolver(2048 * 1024, laxType,
629 errorHandler);
630 contentTypeParser = new ContentTypeParser(errorHandler, laxType);
631 entityResolver = new LocalCacheEntityResolver(pathMap, httpRes);
632 setAllowRnc(true);
633 boolean stats = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML);
634 try {
635 this.errorHandler.start(document);
636 PropertyMapBuilder pmb = new PropertyMapBuilder();
637 pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler);
638 pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver);
639 pmb.put(ValidateProperty.XML_READER_CREATOR,
640 new VerifierServletXMLReaderCreator(errorHandler,
641 entityResolver));
642 RngProperty.CHECK_ID_IDREF.add(pmb);
643 jingPropertyMap = pmb.toPropertyMap();
644
645 tryToSetupValidator();
646
647 setAllowRnc(false);
648
649 loadDocAndSetupParser();
650
651 reader.setErrorHandler(errorHandler);
652 // XXX set xml:id filter separately
653 contentType = documentInput.getType();
654 sourceCode.initialize(documentInput);
655 if (validator == null) {
656 checkNormalization = true;
657 }
658 if (checkNormalization) {
659 reader.setFeature(
660 "http://xml.org/sax/features/unicode-normalization-checking",
661 true);
662 }
663 WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper(
664 reader);
665 ContentHandler recorder = sourceCode.getLocationRecorder();
666 wiretap.setWiretapContentHander(recorder);
667 wiretap.setWiretapLexicalHandler((LexicalHandler) recorder);
668 reader = wiretap;
669 if (htmlParser != null) {
670 htmlParser.addCharacterHandler(sourceCode);
671 htmlParser.setMappingLangToXmlLang(true);
672 htmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
673 htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler);
674 } else if (xmlParser != null) {
675 xmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
676 } else {
677 throw new RuntimeException("Bug. Unreachable.");
678 }
679 reader.parse(documentInput);
680 } catch (SAXException e) {
681 log4j.debug("SAXException", e);
682 } catch (IOException e) {
683 stats = false;
684 log4j.info("IOException", e);
685 errorHandler.ioError(e);
686 } catch (IncorrectSchemaException e) {
687 log4j.debug("IncorrectSchemaException", e);
688 errorHandler.schemaError(e);
689 } catch (RuntimeException e) {
690 stats = false;
691 log4j.error("RuntimeException, doc: " + document + " schema: "
692 + schemaUrls + " lax: " + laxType, e);
693 errorHandler.internalError(
694 e,
695 "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
696 } catch (Error e) {
697 stats = false;
698 log4j.error("Error, doc: " + document + " schema: " + schemaUrls
699 + " lax: " + laxType, e);
700 errorHandler.internalError(
701 e,
702 "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified.");
703 } finally {
704 errorHandler.end(successMessage(), failureMessage());
705 }
706 if (stats) {
707 StatsEmitter.emit(contentHandler, this);
708 }
709 }
710
711 /**
712 * @return
713 * @throws SAXException
714 */
715 protected String successMessage() throws SAXException {
716 return "The document validates according to the specified schema(s).";
717 }
718
719 protected String failureMessage() throws SAXException {
720 return "There were errors.";
721 }
722
723 /**
724 * @throws SAXException
725 * @throws IOException
726 * @throws IncorrectSchemaException
727 */
728 protected void tryToSetupValidator() throws SAXException, IOException,
729 IncorrectSchemaException {
730 validator = validatorByUrls(schemaUrls);
731 }
732
733 /**
734 * @throws SAXException
735 * @throws IOException
736 * @throws IncorrectSchemaException
737 * @throws SAXNotRecognizedException
738 * @throws SAXNotSupportedException
739 */
740 protected void loadDocAndSetupParser() throws SAXException, IOException,
741 IncorrectSchemaException, SAXNotRecognizedException,
742 SAXNotSupportedException {
743 switch (parser) {
744 case HTML_AUTO:
745 case HTML:
746 case HTML401_STRICT:
747 case HTML401_TRANSITIONAL:
748 if (isHtmlUnsafePreset()) {
749 String message = "The chosen preset schema is not appropriate for HTML.";
750 SAXException se = new SAXException(message);
751 errorHandler.schemaError(se);
752 throw se;
753 }
754 setAllowGenericXml(false);
755 setAllowHtml(true);
756 setAcceptAllKnownXmlTypes(false);
757 setAllowXhtml(false);
758 loadDocumentInput();
759 newHtmlParser();
760 DoctypeExpectation doctypeExpectation;
761 int schemaId;
762 switch (parser) {
763 case HTML:
764 doctypeExpectation = DoctypeExpectation.HTML;
765 schemaId = HTML5_SCHEMA;
766 break;
767 case HTML401_STRICT:
768 doctypeExpectation = DoctypeExpectation.HTML401_STRICT;
769 schemaId = XHTML1STRICT_SCHEMA;
770 break;
771 case HTML401_TRANSITIONAL:
772 doctypeExpectation = DoctypeExpectation.HTML401_TRANSITIONAL;
773 schemaId = XHTML1TRANSITIONAL_SCHEMA;
774 break;
775 default:
776 doctypeExpectation = DoctypeExpectation.AUTO;
777 schemaId = 0;
778 break;
779 }
780 htmlParser.setDoctypeExpectation(doctypeExpectation);
781 htmlParser.setDocumentModeHandler(this);
782 reader = htmlParser;
783 if (validator == null) {
784 validator = validatorByDoctype(schemaId);
785 }
786 if (validator != null) {
787 reader.setContentHandler(validator.getContentHandler());
788 }
789 break;
790 case XML_NO_EXTERNAL_ENTITIES:
791 case XML_EXTERNAL_ENTITIES_NO_VALIDATION:
792 setAllowGenericXml(true);
793 setAllowHtml(false);
794 setAcceptAllKnownXmlTypes(true);
795 setAllowXhtml(true);
796 loadDocumentInput();
797 setupXmlParser();
798 break;
799 default:
800 setAllowGenericXml(true);
801 setAllowHtml(true);
802 setAcceptAllKnownXmlTypes(true);
803 setAllowXhtml(true);
804 loadDocumentInput();
805 if ("text/html".equals(documentInput.getType())) {
806 if (isHtmlUnsafePreset()) {
807 String message = "The Content-Type was \u201Ctext/html\u201D, but the chosen preset schema is not appropriate for HTML.";
808 SAXException se = new SAXException(message);
809 errorHandler.schemaError(se);
810 throw se;
811 }
812 errorHandler.info("The Content-Type was \u201Ctext/html\u201D. Using the HTML parser.");
813 newHtmlParser();
814 htmlParser.setDoctypeExpectation(DoctypeExpectation.AUTO);
815 htmlParser.setDocumentModeHandler(this);
816 reader = htmlParser;
817 if (validator != null) {
818 reader.setContentHandler(validator.getContentHandler());
819 }
820 } else {
821 errorHandler.info("The Content-Type was \u201C"
822 + documentInput.getType()
823 + "\u201D. Using the XML parser (not resolving external entities).");
824 setupXmlParser();
825 }
826 break;
827 }
828 }
829
830 /**
831 *
832 */
833 protected void newHtmlParser() {
834 htmlParser = new HtmlParser();
835 htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
836 htmlParser.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
837 htmlParser.setMappingLangToXmlLang(true);
838 htmlParser.setHtml4ModeCompatibleWithXhtml1Schemata(true);
839 }
840
841 protected Validator validatorByDoctype(int schemaId) throws SAXException,
842 IOException, IncorrectSchemaException {
843 if (schemaId == 0) {
844 return null;
845 }
846 for (int i = 0; i < presetDoctypes.length; i++) {
847 if (presetDoctypes[i] == schemaId) {
848 return validatorByUrls(presetUrls[i]);
849 }
850 }
851 throw new RuntimeException("Doctype mappings not initialized properly.");
852 }
853
854 /**
855 * @param entityResolver2
856 * @return
857 * @throws SAXNotRecognizedException
858 * @throws SAXNotSupportedException
859 */
860 protected void setupXmlParser() throws SAXNotRecognizedException,
861 SAXNotSupportedException {
862 xmlParser = new SAXDriver();
863 xmlParser.setCharacterHandler(sourceCode);
864 reader = new IdFilter(xmlParser);
865 reader.setFeature(
866 "http://xml.org/sax/features/string-interning",
867 true);
868 reader.setFeature(
869 "http://xml.org/sax/features/external-general-entities",
870 parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION);
871 reader.setFeature(
872 "http://xml.org/sax/features/external-parameter-entities",
873 parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION);
874 if (parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION) {
875 reader.setEntityResolver(entityResolver);
876 } else {
877 reader.setEntityResolver(new NullEntityResolver());
878 }
879 if (validator == null) {
880 bufferingRootNamespaceSniffer = new BufferingRootNamespaceSniffer(
881 this);
882 reader.setContentHandler(bufferingRootNamespaceSniffer);
883 } else {
884 reader.setContentHandler(new RootNamespaceSniffer(this,
885 validator.getContentHandler()));
886 reader.setDTDHandler(validator.getDTDHandler());
887 }
888 }
889
890 /**
891 * @param validator
892 * @return
893 * @throws SAXException
894 * @throws IOException
895 * @throws IncorrectSchemaException
896 */
897 private Validator validatorByUrls(String schemaList) throws SAXException,
898 IOException, IncorrectSchemaException {
899 Validator validator = null;
900 String[] schemas = SPACE.split(schemaList);
901 for (int i = schemas.length - 1; i > -1; i--) {
902 String url = schemas[i];
903 if ("http://hsivonen.iki.fi/checkers/all/".equals(url)) {
904 for (int j = 0; j < ALL_CHECKERS.length; j++) {
905 validator = combineValidatorByUrl(validator,
906 ALL_CHECKERS[j]);
907 }
908 } else if ("http://hsivonen.iki.fi/checkers/all-html4/".equals(url)) {
909 for (int j = 0; j < ALL_CHECKERS_HTML4.length; j++) {
910 validator = combineValidatorByUrl(validator,
911 ALL_CHECKERS_HTML4[j]);
912 }
913 } else {
914 validator = combineValidatorByUrl(validator, url);
915 }
916 }
917 return validator;
918 }
919
920 /**
921 * @param validator
922 * @param url
923 * @return
924 * @throws SAXException
925 * @throws IOException
926 * @throws IncorrectSchemaException
927 */
928 private Validator combineValidatorByUrl(Validator validator, String url)
929 throws SAXException, IOException, IncorrectSchemaException {
930 if (!"".equals(url)) {
931 Validator v = validatorByUrl(url);
932 if (validator == null) {
933 validator = v;
934 } else {
935 validator = new CombineValidator(v, validator);
936 }
937 }
938 return validator;
939 }
940
941 /**
942 * @param url
943 * @return
944 * @throws SAXException
945 * @throws IOException
946 * @throws IncorrectSchemaException
947 */
948 private Validator validatorByUrl(String url) throws SAXException,
949 IOException, IncorrectSchemaException {
950 if (loadedValidatorUrls.contains(url)) {
951 return null;
952 }
953 loadedValidatorUrls.add(url);
954 if ("http://hsivonen.iki.fi/checkers/table/".equals(url)) {
955 return new CheckerValidator(new TableChecker(), jingPropertyMap);
956 } else if ("http://hsivonen.iki.fi/checkers/nfc/".equals(url)) {
957 this.checkNormalization = true;
958 return new CheckerValidator(new NormalizationChecker(),
959 jingPropertyMap);
960 } else if ("http://hsivonen.iki.fi/checkers/significant-inline/".equals(url)) {
961 return new CheckerValidator(new SignificantInlineChecker(),
962 jingPropertyMap);
963 } else if ("http://hsivonen.iki.fi/checkers/debug/".equals(url)) {
964 return new CheckerValidator(new DebugChecker(), jingPropertyMap);
965 } else if ("http://hsivonen.iki.fi/checkers/text-content/".equals(url)) {
966 return new CheckerValidator(new TextContentChecker(),
967 jingPropertyMap);
968 } else if ("http://n.validator.nu/checkers/usemap/".equals(url)) {
969 return new CheckerValidator(new UsemapChecker(), jingPropertyMap);
970 }
971 Schema sch = schemaByUrl(url);
972 Validator validator = sch.createValidator(jingPropertyMap);
973 return validator;
974 }
975
976 /**
977 * @param url
978 * @return
979 * @throws SAXException
980 * @throws IOException
981 * @throws IncorrectSchemaException
982 */
983 private Schema schemaByUrl(String url) throws SAXException, IOException,
984 IncorrectSchemaException {
985 int i = Arrays.binarySearch(preloadedSchemaUrls, url);
986 if (i > -1) {
987 return preloadedSchemas[i];
988 }
989
990 TypedInputSource schemaInput = (TypedInputSource) entityResolver.resolveEntity(
991 null, url);
992 SchemaReader sr = null;
993 if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) {
994 sr = CompactSchemaReader.getInstance();
995 } else {
996 sr = new AutoSchemaReader();
997 }
998 Schema sch = sr.createSchema(schemaInput, jingPropertyMap);
999 return sch;
1000 }
1001
1002 /**
1003 * @param url
1004 * @return
1005 * @throws SAXException
1006 * @throws IOException
1007 * @throws IncorrectSchemaException
1008 */
1009 private static Schema schemaByUrl(String url, EntityResolver resolver,
1010 PropertyMap pMap) throws SAXException, IOException,
1011 IncorrectSchemaException {
1012 log4j.debug("Will load schema: " + url);
1013 TypedInputSource schemaInput = (TypedInputSource) resolver.resolveEntity(
1014 null, url);
1015 SchemaReader sr = null;
1016 if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) {
1017 sr = CompactSchemaReader.getInstance();
1018 } else {
1019 sr = new AutoSchemaReader();
1020 }
1021 Schema sch = sr.createSchema(schemaInput, pMap);
1022 return sch;
1023 }
1024
1025 /**
1026 * @throws SAXException
1027 */
1028 void emitTitle(boolean markupAllowed) throws SAXException {
1029 if (willValidate()) {
1030 emitter.characters(RESULTS_TITLE);
1031 if (document != null) {
1032 emitter.characters(FOR);
1033 emitter.characters(scrub(document));
1034 }
1035 } else {
1036 emitter.characters(SERVICE_TITLE);
1037 if (markupAllowed) {
1038 emitter.startElement("span");
1039 emitter.characters(TWO_POINT_OH_BETA);
1040 emitter.endElement("span");
1041 }
1042 }
1043 }
1044
1045 void emitForm() throws SAXException {
1046 attrs.clear();
1047 attrs.addAttribute("method", "get");
1048 // attrs.addAttribute("method", "post");
1049 // attrs.addAttribute("enctype", "multipart/form-data");
1050 attrs.addAttribute("action", request.getRequestURL().toString());
1051 attrs.addAttribute("onsubmit", "formSubmission()");
1052 emitter.startElement("form", attrs);
1053 emitFormContent();
1054 emitter.endElement("form");
1055 }
1056
1057 /**
1058 * @throws SAXException
1059 */
1060 protected void emitFormContent() throws SAXException {
1061 FormEmitter.emit(contentHandler, this);
1062 }
1063
1064 void emitSchemaField() throws SAXException {
1065 attrs.clear();
1066 attrs.addAttribute("name", "schema");
1067 attrs.addAttribute("id", "schema");
1068 attrs.addAttribute("onchange", "schemaChanged();");
1069 attrs.addAttribute("pattern", "(?:https?://.+(?:\\s+https?://.+)*)?");
1070 attrs.addAttribute(
1071 "title",
1072 "The schema field takes zero or more space-separated absolute IRIs (http or https only) of the schemas that the document is to be validated against. (When left blank, the service will attempt to pick schemas automatically.)");
1073 if (schemaUrls != null) {
1074 attrs.addAttribute("value", scrub(schemaUrls));
1075 }
1076 emitter.startElement("input", attrs);
1077 emitter.endElement("input");
1078 }
1079
1080 void emitDocField() throws SAXException {
1081 attrs.clear();
1082 attrs.addAttribute("type", "url");
1083 attrs.addAttribute("name", "doc");
1084 attrs.addAttribute("id", "doc");
1085 attrs.addAttribute("pattern", "(?:https?://.+)?");
1086 attrs.addAttribute(
1087 "title",
1088 // XXX drop last sentence for html5 facet
1089 "The document field takes the absolute IRI (http or https only) of the document to be checked. (The document field can also be left blank in order to bookmark settings.)");
1090 if (document != null) {
1091 attrs.addAttribute("value", scrub(document));
1092 }
1093 emitter.startElement("input", attrs);
1094 emitter.endElement("input");
1095 }
1096
1097 private String scrubUrl(String urlStr) {
1098 if (urlStr == null) {
1099 return null;
1100 }
1101
1102 try {
1103 IRI iri = iriFactory.construct(urlStr);
1104 return iri.toASCIIString();
1105 } catch (IRIException e) {
1106 return null;
1107 } catch (MalformedURLException e) {
1108 return null;
1109 }
1110 }
1111
1112 /**
1113 * @throws SAXException
1114 *
1115 */
1116 void emitSchemaDuration() throws SAXException {
1117 }
1118
1119 /**
1120 * @throws SAXException
1121 *
1122 */
1123 void emitDocDuration() throws SAXException {
1124 }
1125
1126 /**
1127 * @throws SAXException
1128 *
1129 */
1130 void emitTotalDuration() throws SAXException {
1131 emitter.characters("" + (System.currentTimeMillis() - start));
1132 }
1133
1134 /**
1135 * @throws SAXException
1136 *
1137 */
1138 void emitPresetOptions() throws SAXException {
1139 for (int i = 0; i < presetUrls.length; i++) {
1140 emitter.option(presetLabels[i], presetUrls[i], false);
1141 }
1142 }
1143
1144 /**
1145 * @throws SAXException
1146 *
1147 */
1148 void emitParserOptions() throws SAXException {
1149 emitter.option("Automatically from Content-Type", "",
1150 (parser == ParserMode.AUTO));
1151 emitter.option("XML; don\u2019t load external entities", "xml",
1152 (parser == ParserMode.XML_NO_EXTERNAL_ENTITIES));
1153 emitter.option("XML; load external entities", "xmldtd",
1154 (parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION));
1155 emitter.option("HTML; flavor from doctype", "html",
1156 (parser == ParserMode.HTML_AUTO));
1157 emitter.option("HTML5", "html5", (parser == ParserMode.HTML));
1158 emitter.option("HTML 4.01 Strict", "html4",
1159 (parser == ParserMode.HTML401_STRICT));
1160 emitter.option("HTML 4.01 Transitional", "html4tr",
1161 (parser == ParserMode.HTML401_TRANSITIONAL));
1162 }
1163
1164 /**
1165 * @throws SAXException
1166 *
1167 */
1168 void emitLaxTypeField() throws SAXException {
1169 emitter.checkbox("laxtype", "yes", laxType);
1170 }
1171
1172 /**
1173 * @throws SAXException
1174 *
1175 */
1176 void emitShowSourceField() throws SAXException {
1177 emitter.checkbox("showsource", "yes", showSource);
1178 }
1179
1180 void rootNamespace(String namespace, Locator locator) throws SAXException {
1181 if (validator == null) {
1182 int index = -1;
1183 for (int i = 0; i < presetNamespaces.length; i++) {
1184 if (namespace.equals(presetNamespaces[i])) {
1185 index = i;
1186 break;
1187 }
1188 }
1189 if (index == -1) {
1190 String message = "Cannot find preset schema for namespace: \u201C"
1191 + namespace + "\u201D.";
1192 SAXException se = new SAXException(message);
1193 errorHandler.schemaError(se);
1194 throw se;
1195 }
1196 String label = presetLabels[index];
1197 String urls = presetUrls[index];
1198 errorHandler.info("Using the preset for " + label
1199 + " based on the root namespace.");
1200 try {
1201 validator = validatorByUrls(urls);
1202 } catch (IOException ioe) {
1203 // At this point the schema comes from memory.
1204 throw new RuntimeException(ioe);
1205 } catch (IncorrectSchemaException e) {
1206 // At this point the schema comes from memory.
1207 throw new RuntimeException(e);
1208 }
1209 if (bufferingRootNamespaceSniffer == null) {
1210 throw new RuntimeException(
1211 "Bug! bufferingRootNamespaceSniffer was null.");
1212 }
1213 bufferingRootNamespaceSniffer.setContentHandler(validator.getContentHandler());
1214 }
1215
1216 if (!rootNamespaceSeen) {
1217 rootNamespaceSeen = true;
1218 if (contentType != null) {
1219 int i;
1220 if ((i = Arrays.binarySearch(KNOWN_CONTENT_TYPES, contentType)) > -1) {
1221 if (!NAMESPACES_FOR_KNOWN_CONTENT_TYPES[i].equals(namespace)) {
1222 String message = "\u201C"
1223 + contentType
1224 + "\u201D is not an appropriate Content-Type for a document whose root namespace is \u201C"
1225 + namespace + "\u201D.";
1226 SAXParseException spe = new SAXParseException(message,
1227 locator);
1228 errorHandler.warning(spe);
1229 }
1230 }
1231 }
1232 }
1233 }
1234
1235 public void documentMode(DocumentMode mode, String publicIdentifier,
1236 String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
1237 throws SAXException {
1238 if (validator == null) {
1239 try {
1240 if ("-//W3C//DTD XHTML 1.0 Transitional//EN".equals(publicIdentifier)) {
1241 errorHandler.info("XHTML 1.0 Transitional doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for XHTML 1.0 Transitional."
1242 + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled."
1243 : ""));
1244 validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
1245 } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) {
1246 errorHandler.info("XHTML 1.0 Strict doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for XHTML 1.0 Strict."
1247 + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled."
1248 : ""));
1249 validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
1250 } else if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
1251 errorHandler.info("HTML 4.01 Transitional doctype seen. Using the schema for XHTML 1.0 Transitional."
1252 + (html4SpecificAdditionalErrorChecks ? ""
1253 : " HTML4-specific tokenization errors are not enabled."));
1254 validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
1255 } else if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
1256 errorHandler.info("HTML 4.01 Strict doctype seen. Using the schema for XHTML 1.0 Strict."
1257 + (html4SpecificAdditionalErrorChecks ? ""
1258 : " HTML4-specific tokenization errors are not enabled."));
1259 validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
1260 } else if ("-//W3C//DTD HTML 4.0 Transitional//EN".equals(publicIdentifier)) {
1261 errorHandler.info("Legacy HTML 4.0 Transitional doctype seen. Please consider using HTML 4.01 Transitional instead. Proceeding anyway for your convenience with the schema for XHTML 1.0 Transitional."
1262 + (html4SpecificAdditionalErrorChecks ? ""
1263 : " HTML4-specific tokenization errors are not enabled."));
1264 validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA);
1265 } else if ("-//W3C//DTD HTML 4.0//EN".equals(publicIdentifier)) {
1266 errorHandler.info("Legacy HTML 4.0 Strict doctype seen. Please consider using HTML 4.01 instead. Proceeding anyway for your convenience with the schema for XHTML 1.0 Strict."
1267 + (html4SpecificAdditionalErrorChecks ? ""
1268 : " HTML4-specific tokenization errors are not enabled."));
1269 validator = validatorByDoctype(XHTML1STRICT_SCHEMA);
1270 } else {
1271 errorHandler.info("Using the schema for HTML5."
1272 + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled."
1273 : ""));
1274 validator = validatorByDoctype(HTML5_SCHEMA);
1275 }
1276 } catch (IOException ioe) {
1277 // At this point the schema comes from memory.
1278 throw new RuntimeException(ioe);
1279 } catch (IncorrectSchemaException e) {
1280 // At this point the schema comes from memory.
1281 throw new RuntimeException(e);
1282 }
1283 ContentHandler ch = validator.getContentHandler();
1284 ch.setDocumentLocator(htmlParser.getDocumentLocator());
1285 ch.startDocument();
1286 reader.setContentHandler(ch);
1287 } else {
1288 if (html4SpecificAdditionalErrorChecks) {
1289 errorHandler.info("HTML4-specific tokenization errors are enabled.");
1290 }
1291 }
1292 }
1293
1294 /**
1295 * @param acceptAllKnownXmlTypes
1296 * @see nu.validator.xml.ContentTypeParser#setAcceptAllKnownXmlTypes(boolean)
1297 */
1298 protected void setAcceptAllKnownXmlTypes(boolean acceptAllKnownXmlTypes) {
1299 contentTypeParser.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes);
1300 httpRes.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes);
1301 }
1302
1303 /**
1304 * @param allowGenericXml
1305 * @see nu.validator.xml.ContentTypeParser#setAllowGenericXml(boolean)
1306 */
1307 protected void setAllowGenericXml(boolean allowGenericXml) {
1308 contentTypeParser.setAllowGenericXml(allowGenericXml);
1309 httpRes.setAllowGenericXml(allowGenericXml);
1310 }
1311
1312 /**
1313 * @param allowHtml
1314 * @see nu.validator.xml.ContentTypeParser#setAllowHtml(boolean)
1315 */
1316 protected void setAllowHtml(boolean allowHtml) {
1317 contentTypeParser.setAllowHtml(allowHtml);
1318 httpRes.setAllowHtml(allowHtml);
1319 }
1320
1321 /**
1322 * @param allowRnc
1323 * @see nu.validator.xml.ContentTypeParser#setAllowRnc(boolean)
1324 */
1325 protected void setAllowRnc(boolean allowRnc) {
1326 contentTypeParser.setAllowRnc(allowRnc);
1327 httpRes.setAllowRnc(allowRnc);
1328 entityResolver.setAllowRnc(allowRnc);
1329 }
1330
1331 /**
1332 * @param allowXhtml
1333 * @see nu.validator.xml.ContentTypeParser#setAllowXhtml(boolean)
1334 */
1335 protected void setAllowXhtml(boolean allowXhtml) {
1336 contentTypeParser.setAllowXhtml(allowXhtml);
1337 httpRes.setAllowXhtml(allowXhtml);
1338 }
1339
1340 /**
1341 * @throws SAXException
1342 * @throws IOException
1343 */
1344 protected void loadDocumentInput() throws SAXException, IOException {
1345 if (methodIsGet) {
1346 documentInput = (TypedInputSource) entityResolver.resolveEntity(
1347 null, document);
1348 } else { // POST
1349 documentInput = contentTypeParser.buildTypedInputSource(document,
1350 null, postContentType);
1351 documentInput.setByteStream(request.getInputStream());
1352 }
1353 }
1354 }