001 /* 002 * Copyright (c) 2005, 2006 Henri Sivonen 003 * Copyright (c) 2007 Mozilla Foundation 004 * 005 * Permission is hereby granted, free of charge, to any person obtaining a 006 * copy of this software and associated documentation files (the "Software"), 007 * to deal in the Software without restriction, including without limitation 008 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 009 * and/or sell copies of the Software, and to permit persons to whom the 010 * Software is furnished to do so, subject to the following conditions: 011 * 012 * The above copyright notice and this permission notice shall be included in 013 * all copies or substantial portions of the Software. 014 * 015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 021 * DEALINGS IN THE SOFTWARE. 022 */ 023 024 package nu.validator.servlet; 025 026 import java.io.BufferedReader; 027 import java.io.File; 028 import java.io.FileInputStream; 029 import java.io.IOException; 030 import java.io.InputStreamReader; 031 import java.io.OutputStream; 032 import java.net.MalformedURLException; 033 import java.util.Arrays; 034 import java.util.HashMap; 035 import java.util.HashSet; 036 import java.util.Iterator; 037 import java.util.LinkedList; 038 import java.util.List; 039 import java.util.Map; 040 import java.util.Properties; 041 import java.util.Set; 042 import java.util.SortedMap; 043 import java.util.TreeMap; 044 import java.util.regex.Matcher; 045 import java.util.regex.Pattern; 046 047 import javax.servlet.ServletException; 048 import javax.servlet.http.HttpServletRequest; 049 import javax.servlet.http.HttpServletResponse; 050 051 import nu.validator.gnu.xml.aelfred2.SAXDriver; 052 import nu.validator.htmlparser.common.DoctypeExpectation; 053 import nu.validator.htmlparser.common.DocumentMode; 054 import nu.validator.htmlparser.common.DocumentModeHandler; 055 import nu.validator.htmlparser.common.XmlViolationPolicy; 056 import nu.validator.htmlparser.sax.HtmlParser; 057 import nu.validator.messages.JsonMessageEmitter; 058 import nu.validator.messages.MessageEmitterAdapter; 059 import nu.validator.messages.TextMessageEmitter; 060 import nu.validator.messages.XhtmlMessageEmitter; 061 import nu.validator.messages.XmlMessageEmitter; 062 import nu.validator.source.SourceCode; 063 import nu.validator.xml.AttributesImpl; 064 import nu.validator.xml.CharacterUtil; 065 import nu.validator.xml.ContentTypeParser; 066 import nu.validator.xml.HtmlSerializer; 067 import nu.validator.xml.IdFilter; 068 import nu.validator.xml.LocalCacheEntityResolver; 069 import nu.validator.xml.NullEntityResolver; 070 import nu.validator.xml.PrudentHttpEntityResolver; 071 import nu.validator.xml.SystemErrErrorHandler; 072 import nu.validator.xml.TypedInputSource; 073 import nu.validator.xml.WiretapXMLReaderWrapper; 074 import nu.validator.xml.XhtmlSaxEmitter; 075 076 import org.apache.log4j.Logger; 077 import org.apache.xml.serializer.Method; 078 import org.apache.xml.serializer.OutputPropertiesFactory; 079 import org.apache.xml.serializer.Serializer; 080 import org.apache.xml.serializer.SerializerFactory; 081 import org.whattf.checker.DebugChecker; 082 import org.whattf.checker.NormalizationChecker; 083 import org.whattf.checker.SignificantInlineChecker; 084 import org.whattf.checker.TextContentChecker; 085 import org.whattf.checker.UsemapChecker; 086 import org.whattf.checker.jing.CheckerValidator; 087 import org.whattf.checker.table.TableChecker; 088 import org.xml.sax.ContentHandler; 089 import org.xml.sax.EntityResolver; 090 import org.xml.sax.ErrorHandler; 091 import org.xml.sax.Locator; 092 import org.xml.sax.SAXException; 093 import org.xml.sax.SAXNotRecognizedException; 094 import org.xml.sax.SAXNotSupportedException; 095 import org.xml.sax.SAXParseException; 096 import org.xml.sax.XMLReader; 097 import org.xml.sax.ext.LexicalHandler; 098 099 import com.hp.hpl.jena.iri.IRI; 100 import com.hp.hpl.jena.iri.IRIException; 101 import com.hp.hpl.jena.iri.IRIFactory; 102 import com.ibm.icu.text.Normalizer; 103 import com.thaiopensource.relaxng.impl.CombineValidator; 104 import com.thaiopensource.util.PropertyMap; 105 import com.thaiopensource.util.PropertyMapBuilder; 106 import com.thaiopensource.validate.IncorrectSchemaException; 107 import com.thaiopensource.validate.Schema; 108 import com.thaiopensource.validate.SchemaReader; 109 import com.thaiopensource.validate.ValidateProperty; 110 import com.thaiopensource.validate.Validator; 111 import com.thaiopensource.validate.auto.AutoSchemaReader; 112 import com.thaiopensource.validate.rng.CompactSchemaReader; 113 import com.thaiopensource.validate.rng.RngProperty; 114 115 116 /** 117 * @version $Id: VerifierServletTransaction.java,v 1.10 2005/07/24 07:32:48 118 * hsivonen Exp $ 119 * @author hsivonen 120 */ 121 class VerifierServletTransaction implements DocumentModeHandler { 122 123 private enum OutputFormat { 124 HTML, XHTML, TEXT, XML, JSON, RELAXED, SOAP, UNICORN, EMACS 125 } 126 127 private static final Logger log4j = Logger.getLogger(VerifierServletTransaction.class); 128 129 private static final Pattern SPACE = Pattern.compile("\\s+"); 130 131 private static final Pattern JS_IDENTIFIER = Pattern.compile("[\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}_\\$][\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}_\\$\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}]*"); 132 133 private static final String[] JS_RESERVED_WORDS = { "abstract", "boolean", 134 "break", "byte", "case", "catch", "char", "class", "const", 135 "continue", "debugger", "default", "delete", "do", "double", 136 "else", "enum", "export", "extends", "final", "finally", "float", 137 "for", "function", "goto", "if", "implements", "import", "in", 138 "instanceof", "int", "interface", "long", "native", "new", 139 "package", "private", "protected", "public", "return", "short", 140 "static", "super", "switch", "synchronized", "this", "throw", 141 "throws", "transient", "try", "typeof", "var", "void", "volatile", 142 "while", "with" }; 143 144 protected static final int HTML5_SCHEMA = 3; 145 146 protected static final int XHTML1STRICT_SCHEMA = 2; 147 148 protected static final int XHTML1TRANSITIONAL_SCHEMA = 1; 149 150 protected static final int XHTML5_SCHEMA = 7; 151 152 private static final char[] SERVICE_TITLE = "Validator.nu ".toCharArray(); 153 154 private static final char[] TWO_POINT_OH_BETA = "2.1 Gamma".toCharArray(); 155 156 private static final char[] RESULTS_TITLE = "Validation results for ".toCharArray(); 157 158 private static final char[] FOR = " for ".toCharArray(); 159 160 private static final Map pathMap = new HashMap(); 161 162 private static int[] presetDoctypes; 163 164 private static String[] presetLabels; 165 166 private static String[] presetUrls; 167 168 private static String[] presetNamespaces; 169 170 private static final String[] KNOWN_CONTENT_TYPES = { 171 "application/atom+xml", "application/docbook+xml", 172 "application/xhtml+xml", "application/xv+xml" }; 173 174 private static final String[] NAMESPACES_FOR_KNOWN_CONTENT_TYPES = { 175 "http://www.w3.org/2005/Atom", "http://docbook.org/ns/docbook", 176 "http://www.w3.org/1999/xhtml", "http://www.w3.org/1999/xhtml" }; 177 178 private static final String[] ALL_CHECKERS = { 179 "http://hsivonen.iki.fi/checkers/table/", 180 "http://hsivonen.iki.fi/checkers/nfc/", 181 "http://hsivonen.iki.fi/checkers/significant-inline/", 182 "http://hsivonen.iki.fi/checkers/text-content/", 183 "http://n.validator.nu/checkers/usemap/"}; 184 185 private static final String[] ALL_CHECKERS_HTML4 = { 186 "http://hsivonen.iki.fi/checkers/table/", 187 "http://hsivonen.iki.fi/checkers/nfc/" }; 188 189 private long start = System.currentTimeMillis(); 190 191 private final HttpServletRequest request; 192 193 private final HttpServletResponse response; 194 195 private IRIFactory iriFactory; 196 197 protected String document = null; 198 199 private ParserMode parser = ParserMode.AUTO; 200 201 private boolean laxType = false; 202 203 protected ContentHandler contentHandler; 204 205 protected XhtmlSaxEmitter emitter; 206 207 protected MessageEmitterAdapter errorHandler; 208 209 private AttributesImpl attrs = new AttributesImpl(); 210 211 private OutputStream out; 212 213 private PropertyMap jingPropertyMap; 214 215 protected LocalCacheEntityResolver entityResolver; 216 217 private static long lastModified; 218 219 private static String[] preloadedSchemaUrls; 220 221 private static Schema[] preloadedSchemas; 222 223 private String schemaUrls = null; 224 225 protected Validator validator = null; 226 227 private BufferingRootNamespaceSniffer bufferingRootNamespaceSniffer = null; 228 229 private String contentType = null; 230 231 protected HtmlParser htmlParser = null; 232 233 protected SAXDriver xmlParser = null; 234 235 protected XMLReader reader; 236 237 protected TypedInputSource documentInput; 238 239 protected PrudentHttpEntityResolver httpRes; 240 241 protected ContentTypeParser contentTypeParser; 242 243 private Set<String> loadedValidatorUrls = new HashSet<String>(); 244 245 private boolean checkNormalization = false; 246 247 private boolean rootNamespaceSeen = false; 248 249 private OutputFormat outputFormat; 250 251 private String postContentType; 252 253 private boolean methodIsGet; 254 255 private SourceCode sourceCode = new SourceCode(); 256 257 private boolean showSource; 258 259 static { 260 try { 261 log4j.debug("Starting static initializer."); 262 263 String presetPath = System.getProperty("nu.validator.servlet.presetconfpath"); 264 File presetFile = new File(presetPath); 265 lastModified = presetFile.lastModified(); 266 BufferedReader r = new BufferedReader(new InputStreamReader( 267 new FileInputStream(presetFile), "UTF-8")); 268 String line; 269 List<String> doctypes = new LinkedList<String>(); 270 List<String> namespaces = new LinkedList<String>(); 271 List<String> labels = new LinkedList<String>(); 272 List<String> urls = new LinkedList<String>(); 273 274 log4j.debug("Starting to loop over config file lines."); 275 276 while ((line = r.readLine()) != null) { 277 if ("".equals(line.trim())) { 278 break; 279 } 280 String s[] = line.split("\t"); 281 doctypes.add(s[0]); 282 namespaces.add(s[1]); 283 labels.add(s[2]); 284 urls.add(s[3]); 285 } 286 287 log4j.debug("Finished reading config."); 288 289 String[] presetDoctypesAsStrings = doctypes.toArray(new String[0]); 290 presetNamespaces = namespaces.toArray(new String[0]); 291 presetLabels = labels.toArray(new String[0]); 292 presetUrls = urls.toArray(new String[0]); 293 294 log4j.debug("Converted config to arrays."); 295 296 for (int i = 0; i < presetNamespaces.length; i++) { 297 String str = presetNamespaces[i]; 298 if ("-".equals(str)) { 299 presetNamespaces[i] = null; 300 } else { 301 presetNamespaces[i] = presetNamespaces[i].intern(); 302 } 303 } 304 305 log4j.debug("Prepared namespace array."); 306 307 presetDoctypes = new int[presetDoctypesAsStrings.length]; 308 for (int i = 0; i < presetDoctypesAsStrings.length; i++) { 309 presetDoctypes[i] = Integer.parseInt(presetDoctypesAsStrings[i]); 310 } 311 312 log4j.debug("Parsed doctype numbers into ints."); 313 314 String prefix = System.getProperty("nu.validator.servlet.cachepathprefix"); 315 316 log4j.debug("The cache path prefix is: " + prefix); 317 318 String cacheConfPath = System.getProperty("nu.validator.servlet.cacheconfpath"); 319 320 log4j.debug("The cache config path is: " + cacheConfPath); 321 322 r = new BufferedReader(new InputStreamReader(new FileInputStream( 323 cacheConfPath), "UTF-8")); 324 while ((line = r.readLine()) != null) { 325 if ("".equals(line.trim())) { 326 break; 327 } 328 String s[] = line.split("\t"); 329 pathMap.put(s[0], prefix + s[1]); 330 } 331 332 log4j.debug("Cache config read."); 333 334 ErrorHandler eh = new SystemErrErrorHandler(); 335 LocalCacheEntityResolver er = new LocalCacheEntityResolver(pathMap, 336 new NullEntityResolver()); 337 er.setAllowRnc(true); 338 PropertyMapBuilder pmb = new PropertyMapBuilder(); 339 pmb.put(ValidateProperty.ERROR_HANDLER, eh); 340 pmb.put(ValidateProperty.ENTITY_RESOLVER, er); 341 pmb.put(ValidateProperty.XML_READER_CREATOR, 342 new VerifierServletXMLReaderCreator(eh, er)); 343 RngProperty.CHECK_ID_IDREF.add(pmb); 344 PropertyMap pMap = pmb.toPropertyMap(); 345 346 log4j.debug("Parsing set up. Starting to read schemas."); 347 348 SortedMap<String, Schema> schemaMap = new TreeMap<String, Schema>(); 349 for (int i = 0; i < presetUrls.length; i++) { 350 String[] urls1 = SPACE.split(presetUrls[i]); 351 for (int j = 0; j < urls1.length; j++) { 352 String url = urls1[j]; 353 if (schemaMap.get(url) == null && !isCheckerUrl(url)) { 354 Schema sch = schemaByUrl(url, er, pMap); 355 schemaMap.put(url, sch); 356 } 357 } 358 } 359 360 log4j.debug("Schemas read."); 361 362 preloadedSchemaUrls = new String[schemaMap.size()]; 363 preloadedSchemas = new Schema[schemaMap.size()]; 364 int i = 0; 365 for (Iterator iter = schemaMap.entrySet().iterator(); iter.hasNext();) { 366 Map.Entry entry = (Map.Entry) iter.next(); 367 preloadedSchemaUrls[i] = entry.getKey().toString().intern(); 368 preloadedSchemas[i] = (Schema) entry.getValue(); 369 i++; 370 } 371 372 log4j.debug("Initialization complete."); 373 } catch (Exception e) { 374 throw new RuntimeException(e); 375 } 376 } 377 378 protected static String scrub(String s) { 379 return Normalizer.normalize( 380 CharacterUtil.prudentlyScrubCharacterData(s), Normalizer.NFC); 381 } 382 383 private static boolean isCheckerUrl(String url) { 384 if ("http://hsivonen.iki.fi/checkers/all/".equals(url)) { 385 return true; 386 } else if ("http://hsivonen.iki.fi/checkers/all-html4/".equals(url)) { 387 return true; 388 } 389 for (int i = 0; i < ALL_CHECKERS.length; i++) { 390 if (ALL_CHECKERS[i].equals(url)) { 391 return true; 392 } 393 } 394 return false; 395 } 396 397 /** 398 * @param request 399 * @param response 400 */ 401 VerifierServletTransaction(HttpServletRequest request, 402 HttpServletResponse response) { 403 this.request = request; 404 this.response = response; 405 this.iriFactory = IRIFactory.iriImplementation(); 406 } 407 408 protected boolean willValidate() { 409 if (methodIsGet) { 410 return document != null; 411 } else { // POST 412 return true; 413 } 414 } 415 416 void service() throws ServletException, IOException { 417 this.methodIsGet = "GET".equals(request.getMethod()) 418 || "HEAD".equals(request.getMethod()); 419 420 this.out = response.getOutputStream(); 421 422 request.setCharacterEncoding("utf-8"); 423 424 if (!methodIsGet) { 425 postContentType = request.getContentType(); 426 if (postContentType == null) { 427 response.sendError(HttpServletResponse.SC_BAD_REQUEST, 428 "Content-Type missing"); 429 return; 430 } else if (postContentType.trim().toLowerCase().startsWith("application/x-www-form-urlencoded")) { 431 response.sendError(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE, 432 "application/x-www-form-urlencoded not supported. Please use multipart/form-data."); 433 return; 434 } 435 } 436 437 String outFormat = request.getParameter("out"); 438 if (outFormat == null) { 439 outputFormat = OutputFormat.HTML; 440 } else { 441 if ("html".equals(outFormat)) { 442 outputFormat = OutputFormat.HTML; 443 } else if ("xhtml".equals(outFormat)) { 444 outputFormat = OutputFormat.XHTML; 445 } else if ("text".equals(outFormat)) { 446 outputFormat = OutputFormat.TEXT; 447 } else if ("xml".equals(outFormat)) { 448 outputFormat = OutputFormat.XML; 449 } else if ("json".equals(outFormat)) { 450 outputFormat = OutputFormat.JSON; 451 } else { 452 response.sendError(HttpServletResponse.SC_BAD_REQUEST, 453 "Unsupported output format"); 454 return; 455 } 456 } 457 458 if (!methodIsGet) { 459 document = scrubUrl(request.getHeader("Content-Location")); 460 } 461 if (document == null) { 462 document = scrubUrl(request.getParameter("doc")); 463 } 464 465 document = ("".equals(document)) ? null : document; 466 467 String callback = null; 468 if (outputFormat == OutputFormat.JSON) { 469 callback = request.getParameter("callback"); 470 if (callback != null) { 471 Matcher m = JS_IDENTIFIER.matcher(callback); 472 if (m.matches()) { 473 if (Arrays.binarySearch(JS_RESERVED_WORDS, callback) >= 0) { 474 response.sendError(HttpServletResponse.SC_BAD_REQUEST, 475 "Callback is a reserved word."); 476 return; 477 } 478 } else { 479 response.sendError(HttpServletResponse.SC_BAD_REQUEST, 480 "Callback is not a valid ECMA 262 IdentifierName."); 481 return; 482 } 483 } 484 } 485 486 String methodCheck = request.getHeader("Method-Check"); 487 488 if (willValidate()) { 489 response.setDateHeader("Expires", 0); 490 response.setHeader("Cache-Control", "no-cache"); 491 } else if (methodCheck != null) { 492 // XXX revisit if anne changes the access-control stuff to use OPTIONS 493 response.setStatus(HttpServletResponse.SC_NO_CONTENT); 494 response.setHeader("Allow", "POST"); 495 return; 496 } else if (outputFormat == OutputFormat.HTML 497 || outputFormat == OutputFormat.XHTML) { 498 response.setDateHeader("Last-Modified", lastModified); 499 } else { 500 response.sendError(HttpServletResponse.SC_BAD_REQUEST, 501 "No input document"); 502 return; 503 } 504 505 setup(); 506 507 showSource = (request.getParameter("showsource") != null); 508 509 try { 510 if (outputFormat == OutputFormat.HTML 511 || outputFormat == OutputFormat.XHTML) { 512 if (outputFormat == OutputFormat.HTML) { 513 response.setContentType("text/html; charset=utf-8"); 514 contentHandler = new HtmlSerializer(out, 515 HtmlSerializer.DOCTYPE_HTML5, false, "UTF-8"); 516 } else { 517 response.setContentType("application/xhtml+xml"); 518 Properties props = OutputPropertiesFactory.getDefaultMethodProperties(Method.XML); 519 Serializer ser = SerializerFactory.getSerializer(props); 520 ser.setOutputStream(out); 521 contentHandler = ser.asContentHandler(); 522 } 523 emitter = new XhtmlSaxEmitter(contentHandler); 524 errorHandler = new MessageEmitterAdapter(sourceCode, showSource, 525 new XhtmlMessageEmitter(contentHandler)); 526 PageEmitter.emit(contentHandler, this); 527 } else { 528 if (outputFormat == OutputFormat.TEXT) { 529 response.setContentType("text/plain; charset=utf-8"); 530 errorHandler = new MessageEmitterAdapter(sourceCode, showSource, 531 new TextMessageEmitter(out)); 532 } else if (outputFormat == OutputFormat.XML) { 533 response.setContentType("application/xml"); 534 Properties props = OutputPropertiesFactory.getDefaultMethodProperties(Method.XML); 535 Serializer ser = SerializerFactory.getSerializer(props); 536 ser.setOutputStream(out); 537 errorHandler = new MessageEmitterAdapter(sourceCode, showSource, 538 new XmlMessageEmitter(ser.asContentHandler())); 539 } else if (outputFormat == OutputFormat.JSON) { 540 if (callback == null) { 541 response.setContentType("application/json"); 542 } else { 543 response.setContentType("application/javascript"); 544 } 545 errorHandler = new MessageEmitterAdapter(sourceCode, showSource, 546 new JsonMessageEmitter( 547 new nu.validator.json.Serializer(out), 548 callback)); 549 } else { 550 throw new RuntimeException("Unreachable."); 551 } 552 validate(); 553 } 554 } catch (SAXException e) { 555 throw new ServletException(e); 556 } 557 } 558 559 /** 560 * @throws ServletException 561 */ 562 protected void setup() throws ServletException { 563 String preset = request.getParameter("preset"); 564 565 if (preset != null && !"".equals(preset)) { 566 schemaUrls = preset; 567 } else { 568 schemaUrls = request.getParameter("schema"); 569 } 570 if (schemaUrls == null) { 571 schemaUrls = ""; 572 } 573 574 String parserStr = request.getParameter("parser"); 575 576 if ("html".equals(parserStr)) { 577 parser = ParserMode.HTML_AUTO; 578 } else if ("xmldtd".equals(parserStr)) { 579 parser = ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION; 580 } else if ("xml".equals(parserStr)) { 581 parser = ParserMode.XML_NO_EXTERNAL_ENTITIES; 582 } else if ("html5".equals(parserStr)) { 583 parser = ParserMode.HTML; 584 } else if ("html4".equals(parserStr)) { 585 parser = ParserMode.HTML401_STRICT; 586 } else if ("html4tr".equals(parserStr)) { 587 parser = ParserMode.HTML401_TRANSITIONAL; 588 } // else auto 589 590 laxType = (request.getParameter("laxtype") != null); 591 } 592 593 private boolean isHtmlUnsafePreset() { 594 if ("".equals(schemaUrls)) { 595 return false; 596 } 597 boolean preset = false; 598 for (int i = 0; i < presetUrls.length; i++) { 599 if (presetUrls[i].equals(schemaUrls)) { 600 preset = true; 601 break; 602 } 603 } 604 if (!preset) { 605 return false; 606 } 607 return !(schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-basic.rng") 608 || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-strict.rng") 609 || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-strict-wcag.rng") 610 || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-transitional.rng") 611 || schemaUrls.startsWith("http://hsivonen.iki.fi/xhtml-schema/xhtml-transitional-wcag.rng") || schemaUrls.startsWith("http://syntax.whattf.org/relaxng/html5full.rnc")); 612 613 } 614 615 /** 616 * @throws SAXException 617 */ 618 @SuppressWarnings("deprecation") 619 void validate() throws SAXException { 620 if (!willValidate()) { 621 return; 622 } 623 try { 624 out.flush(); 625 } catch (IOException e1) { 626 throw new SAXException(e1); 627 } 628 httpRes = new PrudentHttpEntityResolver(2048 * 1024, laxType, 629 errorHandler); 630 contentTypeParser = new ContentTypeParser(errorHandler, laxType); 631 entityResolver = new LocalCacheEntityResolver(pathMap, httpRes); 632 setAllowRnc(true); 633 boolean stats = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML); 634 try { 635 this.errorHandler.start(document); 636 PropertyMapBuilder pmb = new PropertyMapBuilder(); 637 pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler); 638 pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver); 639 pmb.put(ValidateProperty.XML_READER_CREATOR, 640 new VerifierServletXMLReaderCreator(errorHandler, 641 entityResolver)); 642 RngProperty.CHECK_ID_IDREF.add(pmb); 643 jingPropertyMap = pmb.toPropertyMap(); 644 645 tryToSetupValidator(); 646 647 setAllowRnc(false); 648 649 loadDocAndSetupParser(); 650 651 reader.setErrorHandler(errorHandler); 652 // XXX set xml:id filter separately 653 contentType = documentInput.getType(); 654 sourceCode.initialize(documentInput); 655 if (validator == null) { 656 checkNormalization = true; 657 } 658 if (checkNormalization) { 659 reader.setFeature( 660 "http://xml.org/sax/features/unicode-normalization-checking", 661 true); 662 } 663 WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper( 664 reader); 665 ContentHandler recorder = sourceCode.getLocationRecorder(); 666 wiretap.setWiretapContentHander(recorder); 667 wiretap.setWiretapLexicalHandler((LexicalHandler) recorder); 668 reader = wiretap; 669 if (htmlParser != null) { 670 htmlParser.addCharacterHandler(sourceCode); 671 htmlParser.setMappingLangToXmlLang(true); 672 htmlParser.setErrorHandler(errorHandler.getExactErrorHandler()); 673 htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler); 674 } else if (xmlParser != null) { 675 xmlParser.setErrorHandler(errorHandler.getExactErrorHandler()); 676 } else { 677 throw new RuntimeException("Bug. Unreachable."); 678 } 679 reader.parse(documentInput); 680 } catch (SAXException e) { 681 log4j.debug("SAXException", e); 682 } catch (IOException e) { 683 stats = false; 684 log4j.info("IOException", e); 685 errorHandler.ioError(e); 686 } catch (IncorrectSchemaException e) { 687 log4j.debug("IncorrectSchemaException", e); 688 errorHandler.schemaError(e); 689 } catch (RuntimeException e) { 690 stats = false; 691 log4j.error("RuntimeException, doc: " + document + " schema: " 692 + schemaUrls + " lax: " + laxType, e); 693 errorHandler.internalError( 694 e, 695 "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified."); 696 } catch (Error e) { 697 stats = false; 698 log4j.error("Error, doc: " + document + " schema: " + schemaUrls 699 + " lax: " + laxType, e); 700 errorHandler.internalError( 701 e, 702 "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified."); 703 } finally { 704 errorHandler.end(successMessage(), failureMessage()); 705 } 706 if (stats) { 707 StatsEmitter.emit(contentHandler, this); 708 } 709 } 710 711 /** 712 * @return 713 * @throws SAXException 714 */ 715 protected String successMessage() throws SAXException { 716 return "The document validates according to the specified schema(s)."; 717 } 718 719 protected String failureMessage() throws SAXException { 720 return "There were errors."; 721 } 722 723 /** 724 * @throws SAXException 725 * @throws IOException 726 * @throws IncorrectSchemaException 727 */ 728 protected void tryToSetupValidator() throws SAXException, IOException, 729 IncorrectSchemaException { 730 validator = validatorByUrls(schemaUrls); 731 } 732 733 /** 734 * @throws SAXException 735 * @throws IOException 736 * @throws IncorrectSchemaException 737 * @throws SAXNotRecognizedException 738 * @throws SAXNotSupportedException 739 */ 740 protected void loadDocAndSetupParser() throws SAXException, IOException, 741 IncorrectSchemaException, SAXNotRecognizedException, 742 SAXNotSupportedException { 743 switch (parser) { 744 case HTML_AUTO: 745 case HTML: 746 case HTML401_STRICT: 747 case HTML401_TRANSITIONAL: 748 if (isHtmlUnsafePreset()) { 749 String message = "The chosen preset schema is not appropriate for HTML."; 750 SAXException se = new SAXException(message); 751 errorHandler.schemaError(se); 752 throw se; 753 } 754 setAllowGenericXml(false); 755 setAllowHtml(true); 756 setAcceptAllKnownXmlTypes(false); 757 setAllowXhtml(false); 758 loadDocumentInput(); 759 newHtmlParser(); 760 DoctypeExpectation doctypeExpectation; 761 int schemaId; 762 switch (parser) { 763 case HTML: 764 doctypeExpectation = DoctypeExpectation.HTML; 765 schemaId = HTML5_SCHEMA; 766 break; 767 case HTML401_STRICT: 768 doctypeExpectation = DoctypeExpectation.HTML401_STRICT; 769 schemaId = XHTML1STRICT_SCHEMA; 770 break; 771 case HTML401_TRANSITIONAL: 772 doctypeExpectation = DoctypeExpectation.HTML401_TRANSITIONAL; 773 schemaId = XHTML1TRANSITIONAL_SCHEMA; 774 break; 775 default: 776 doctypeExpectation = DoctypeExpectation.AUTO; 777 schemaId = 0; 778 break; 779 } 780 htmlParser.setDoctypeExpectation(doctypeExpectation); 781 htmlParser.setDocumentModeHandler(this); 782 reader = htmlParser; 783 if (validator == null) { 784 validator = validatorByDoctype(schemaId); 785 } 786 if (validator != null) { 787 reader.setContentHandler(validator.getContentHandler()); 788 } 789 break; 790 case XML_NO_EXTERNAL_ENTITIES: 791 case XML_EXTERNAL_ENTITIES_NO_VALIDATION: 792 setAllowGenericXml(true); 793 setAllowHtml(false); 794 setAcceptAllKnownXmlTypes(true); 795 setAllowXhtml(true); 796 loadDocumentInput(); 797 setupXmlParser(); 798 break; 799 default: 800 setAllowGenericXml(true); 801 setAllowHtml(true); 802 setAcceptAllKnownXmlTypes(true); 803 setAllowXhtml(true); 804 loadDocumentInput(); 805 if ("text/html".equals(documentInput.getType())) { 806 if (isHtmlUnsafePreset()) { 807 String message = "The Content-Type was \u201Ctext/html\u201D, but the chosen preset schema is not appropriate for HTML."; 808 SAXException se = new SAXException(message); 809 errorHandler.schemaError(se); 810 throw se; 811 } 812 errorHandler.info("The Content-Type was \u201Ctext/html\u201D. Using the HTML parser."); 813 newHtmlParser(); 814 htmlParser.setDoctypeExpectation(DoctypeExpectation.AUTO); 815 htmlParser.setDocumentModeHandler(this); 816 reader = htmlParser; 817 if (validator != null) { 818 reader.setContentHandler(validator.getContentHandler()); 819 } 820 } else { 821 errorHandler.info("The Content-Type was \u201C" 822 + documentInput.getType() 823 + "\u201D. Using the XML parser (not resolving external entities)."); 824 setupXmlParser(); 825 } 826 break; 827 } 828 } 829 830 /** 831 * 832 */ 833 protected void newHtmlParser() { 834 htmlParser = new HtmlParser(); 835 htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL); 836 htmlParser.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); 837 htmlParser.setMappingLangToXmlLang(true); 838 htmlParser.setHtml4ModeCompatibleWithXhtml1Schemata(true); 839 } 840 841 protected Validator validatorByDoctype(int schemaId) throws SAXException, 842 IOException, IncorrectSchemaException { 843 if (schemaId == 0) { 844 return null; 845 } 846 for (int i = 0; i < presetDoctypes.length; i++) { 847 if (presetDoctypes[i] == schemaId) { 848 return validatorByUrls(presetUrls[i]); 849 } 850 } 851 throw new RuntimeException("Doctype mappings not initialized properly."); 852 } 853 854 /** 855 * @param entityResolver2 856 * @return 857 * @throws SAXNotRecognizedException 858 * @throws SAXNotSupportedException 859 */ 860 protected void setupXmlParser() throws SAXNotRecognizedException, 861 SAXNotSupportedException { 862 xmlParser = new SAXDriver(); 863 xmlParser.setCharacterHandler(sourceCode); 864 reader = new IdFilter(xmlParser); 865 reader.setFeature( 866 "http://xml.org/sax/features/string-interning", 867 true); 868 reader.setFeature( 869 "http://xml.org/sax/features/external-general-entities", 870 parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION); 871 reader.setFeature( 872 "http://xml.org/sax/features/external-parameter-entities", 873 parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION); 874 if (parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION) { 875 reader.setEntityResolver(entityResolver); 876 } else { 877 reader.setEntityResolver(new NullEntityResolver()); 878 } 879 if (validator == null) { 880 bufferingRootNamespaceSniffer = new BufferingRootNamespaceSniffer( 881 this); 882 reader.setContentHandler(bufferingRootNamespaceSniffer); 883 } else { 884 reader.setContentHandler(new RootNamespaceSniffer(this, 885 validator.getContentHandler())); 886 reader.setDTDHandler(validator.getDTDHandler()); 887 } 888 } 889 890 /** 891 * @param validator 892 * @return 893 * @throws SAXException 894 * @throws IOException 895 * @throws IncorrectSchemaException 896 */ 897 private Validator validatorByUrls(String schemaList) throws SAXException, 898 IOException, IncorrectSchemaException { 899 Validator validator = null; 900 String[] schemas = SPACE.split(schemaList); 901 for (int i = schemas.length - 1; i > -1; i--) { 902 String url = schemas[i]; 903 if ("http://hsivonen.iki.fi/checkers/all/".equals(url)) { 904 for (int j = 0; j < ALL_CHECKERS.length; j++) { 905 validator = combineValidatorByUrl(validator, 906 ALL_CHECKERS[j]); 907 } 908 } else if ("http://hsivonen.iki.fi/checkers/all-html4/".equals(url)) { 909 for (int j = 0; j < ALL_CHECKERS_HTML4.length; j++) { 910 validator = combineValidatorByUrl(validator, 911 ALL_CHECKERS_HTML4[j]); 912 } 913 } else { 914 validator = combineValidatorByUrl(validator, url); 915 } 916 } 917 return validator; 918 } 919 920 /** 921 * @param validator 922 * @param url 923 * @return 924 * @throws SAXException 925 * @throws IOException 926 * @throws IncorrectSchemaException 927 */ 928 private Validator combineValidatorByUrl(Validator validator, String url) 929 throws SAXException, IOException, IncorrectSchemaException { 930 if (!"".equals(url)) { 931 Validator v = validatorByUrl(url); 932 if (validator == null) { 933 validator = v; 934 } else { 935 validator = new CombineValidator(v, validator); 936 } 937 } 938 return validator; 939 } 940 941 /** 942 * @param url 943 * @return 944 * @throws SAXException 945 * @throws IOException 946 * @throws IncorrectSchemaException 947 */ 948 private Validator validatorByUrl(String url) throws SAXException, 949 IOException, IncorrectSchemaException { 950 if (loadedValidatorUrls.contains(url)) { 951 return null; 952 } 953 loadedValidatorUrls.add(url); 954 if ("http://hsivonen.iki.fi/checkers/table/".equals(url)) { 955 return new CheckerValidator(new TableChecker(), jingPropertyMap); 956 } else if ("http://hsivonen.iki.fi/checkers/nfc/".equals(url)) { 957 this.checkNormalization = true; 958 return new CheckerValidator(new NormalizationChecker(), 959 jingPropertyMap); 960 } else if ("http://hsivonen.iki.fi/checkers/significant-inline/".equals(url)) { 961 return new CheckerValidator(new SignificantInlineChecker(), 962 jingPropertyMap); 963 } else if ("http://hsivonen.iki.fi/checkers/debug/".equals(url)) { 964 return new CheckerValidator(new DebugChecker(), jingPropertyMap); 965 } else if ("http://hsivonen.iki.fi/checkers/text-content/".equals(url)) { 966 return new CheckerValidator(new TextContentChecker(), 967 jingPropertyMap); 968 } else if ("http://n.validator.nu/checkers/usemap/".equals(url)) { 969 return new CheckerValidator(new UsemapChecker(), jingPropertyMap); 970 } 971 Schema sch = schemaByUrl(url); 972 Validator validator = sch.createValidator(jingPropertyMap); 973 return validator; 974 } 975 976 /** 977 * @param url 978 * @return 979 * @throws SAXException 980 * @throws IOException 981 * @throws IncorrectSchemaException 982 */ 983 private Schema schemaByUrl(String url) throws SAXException, IOException, 984 IncorrectSchemaException { 985 int i = Arrays.binarySearch(preloadedSchemaUrls, url); 986 if (i > -1) { 987 return preloadedSchemas[i]; 988 } 989 990 TypedInputSource schemaInput = (TypedInputSource) entityResolver.resolveEntity( 991 null, url); 992 SchemaReader sr = null; 993 if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) { 994 sr = CompactSchemaReader.getInstance(); 995 } else { 996 sr = new AutoSchemaReader(); 997 } 998 Schema sch = sr.createSchema(schemaInput, jingPropertyMap); 999 return sch; 1000 } 1001 1002 /** 1003 * @param url 1004 * @return 1005 * @throws SAXException 1006 * @throws IOException 1007 * @throws IncorrectSchemaException 1008 */ 1009 private static Schema schemaByUrl(String url, EntityResolver resolver, 1010 PropertyMap pMap) throws SAXException, IOException, 1011 IncorrectSchemaException { 1012 log4j.debug("Will load schema: " + url); 1013 TypedInputSource schemaInput = (TypedInputSource) resolver.resolveEntity( 1014 null, url); 1015 SchemaReader sr = null; 1016 if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) { 1017 sr = CompactSchemaReader.getInstance(); 1018 } else { 1019 sr = new AutoSchemaReader(); 1020 } 1021 Schema sch = sr.createSchema(schemaInput, pMap); 1022 return sch; 1023 } 1024 1025 /** 1026 * @throws SAXException 1027 */ 1028 void emitTitle(boolean markupAllowed) throws SAXException { 1029 if (willValidate()) { 1030 emitter.characters(RESULTS_TITLE); 1031 if (document != null) { 1032 emitter.characters(FOR); 1033 emitter.characters(scrub(document)); 1034 } 1035 } else { 1036 emitter.characters(SERVICE_TITLE); 1037 if (markupAllowed) { 1038 emitter.startElement("span"); 1039 emitter.characters(TWO_POINT_OH_BETA); 1040 emitter.endElement("span"); 1041 } 1042 } 1043 } 1044 1045 void emitForm() throws SAXException { 1046 attrs.clear(); 1047 attrs.addAttribute("method", "get"); 1048 // attrs.addAttribute("method", "post"); 1049 // attrs.addAttribute("enctype", "multipart/form-data"); 1050 attrs.addAttribute("action", request.getRequestURL().toString()); 1051 attrs.addAttribute("onsubmit", "formSubmission()"); 1052 emitter.startElement("form", attrs); 1053 emitFormContent(); 1054 emitter.endElement("form"); 1055 } 1056 1057 /** 1058 * @throws SAXException 1059 */ 1060 protected void emitFormContent() throws SAXException { 1061 FormEmitter.emit(contentHandler, this); 1062 } 1063 1064 void emitSchemaField() throws SAXException { 1065 attrs.clear(); 1066 attrs.addAttribute("name", "schema"); 1067 attrs.addAttribute("id", "schema"); 1068 attrs.addAttribute("onchange", "schemaChanged();"); 1069 attrs.addAttribute("pattern", "(?:https?://.+(?:\\s+https?://.+)*)?"); 1070 attrs.addAttribute( 1071 "title", 1072 "The schema field takes zero or more space-separated absolute IRIs (http or https only) of the schemas that the document is to be validated against. (When left blank, the service will attempt to pick schemas automatically.)"); 1073 if (schemaUrls != null) { 1074 attrs.addAttribute("value", scrub(schemaUrls)); 1075 } 1076 emitter.startElement("input", attrs); 1077 emitter.endElement("input"); 1078 } 1079 1080 void emitDocField() throws SAXException { 1081 attrs.clear(); 1082 attrs.addAttribute("type", "url"); 1083 attrs.addAttribute("name", "doc"); 1084 attrs.addAttribute("id", "doc"); 1085 attrs.addAttribute("pattern", "(?:https?://.+)?"); 1086 attrs.addAttribute( 1087 "title", 1088 // XXX drop last sentence for html5 facet 1089 "The document field takes the absolute IRI (http or https only) of the document to be checked. (The document field can also be left blank in order to bookmark settings.)"); 1090 if (document != null) { 1091 attrs.addAttribute("value", scrub(document)); 1092 } 1093 emitter.startElement("input", attrs); 1094 emitter.endElement("input"); 1095 } 1096 1097 private String scrubUrl(String urlStr) { 1098 if (urlStr == null) { 1099 return null; 1100 } 1101 1102 try { 1103 IRI iri = iriFactory.construct(urlStr); 1104 return iri.toASCIIString(); 1105 } catch (IRIException e) { 1106 return null; 1107 } catch (MalformedURLException e) { 1108 return null; 1109 } 1110 } 1111 1112 /** 1113 * @throws SAXException 1114 * 1115 */ 1116 void emitSchemaDuration() throws SAXException { 1117 } 1118 1119 /** 1120 * @throws SAXException 1121 * 1122 */ 1123 void emitDocDuration() throws SAXException { 1124 } 1125 1126 /** 1127 * @throws SAXException 1128 * 1129 */ 1130 void emitTotalDuration() throws SAXException { 1131 emitter.characters("" + (System.currentTimeMillis() - start)); 1132 } 1133 1134 /** 1135 * @throws SAXException 1136 * 1137 */ 1138 void emitPresetOptions() throws SAXException { 1139 for (int i = 0; i < presetUrls.length; i++) { 1140 emitter.option(presetLabels[i], presetUrls[i], false); 1141 } 1142 } 1143 1144 /** 1145 * @throws SAXException 1146 * 1147 */ 1148 void emitParserOptions() throws SAXException { 1149 emitter.option("Automatically from Content-Type", "", 1150 (parser == ParserMode.AUTO)); 1151 emitter.option("XML; don\u2019t load external entities", "xml", 1152 (parser == ParserMode.XML_NO_EXTERNAL_ENTITIES)); 1153 emitter.option("XML; load external entities", "xmldtd", 1154 (parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION)); 1155 emitter.option("HTML; flavor from doctype", "html", 1156 (parser == ParserMode.HTML_AUTO)); 1157 emitter.option("HTML5", "html5", (parser == ParserMode.HTML)); 1158 emitter.option("HTML 4.01 Strict", "html4", 1159 (parser == ParserMode.HTML401_STRICT)); 1160 emitter.option("HTML 4.01 Transitional", "html4tr", 1161 (parser == ParserMode.HTML401_TRANSITIONAL)); 1162 } 1163 1164 /** 1165 * @throws SAXException 1166 * 1167 */ 1168 void emitLaxTypeField() throws SAXException { 1169 emitter.checkbox("laxtype", "yes", laxType); 1170 } 1171 1172 /** 1173 * @throws SAXException 1174 * 1175 */ 1176 void emitShowSourceField() throws SAXException { 1177 emitter.checkbox("showsource", "yes", showSource); 1178 } 1179 1180 void rootNamespace(String namespace, Locator locator) throws SAXException { 1181 if (validator == null) { 1182 int index = -1; 1183 for (int i = 0; i < presetNamespaces.length; i++) { 1184 if (namespace.equals(presetNamespaces[i])) { 1185 index = i; 1186 break; 1187 } 1188 } 1189 if (index == -1) { 1190 String message = "Cannot find preset schema for namespace: \u201C" 1191 + namespace + "\u201D."; 1192 SAXException se = new SAXException(message); 1193 errorHandler.schemaError(se); 1194 throw se; 1195 } 1196 String label = presetLabels[index]; 1197 String urls = presetUrls[index]; 1198 errorHandler.info("Using the preset for " + label 1199 + " based on the root namespace."); 1200 try { 1201 validator = validatorByUrls(urls); 1202 } catch (IOException ioe) { 1203 // At this point the schema comes from memory. 1204 throw new RuntimeException(ioe); 1205 } catch (IncorrectSchemaException e) { 1206 // At this point the schema comes from memory. 1207 throw new RuntimeException(e); 1208 } 1209 if (bufferingRootNamespaceSniffer == null) { 1210 throw new RuntimeException( 1211 "Bug! bufferingRootNamespaceSniffer was null."); 1212 } 1213 bufferingRootNamespaceSniffer.setContentHandler(validator.getContentHandler()); 1214 } 1215 1216 if (!rootNamespaceSeen) { 1217 rootNamespaceSeen = true; 1218 if (contentType != null) { 1219 int i; 1220 if ((i = Arrays.binarySearch(KNOWN_CONTENT_TYPES, contentType)) > -1) { 1221 if (!NAMESPACES_FOR_KNOWN_CONTENT_TYPES[i].equals(namespace)) { 1222 String message = "\u201C" 1223 + contentType 1224 + "\u201D is not an appropriate Content-Type for a document whose root namespace is \u201C" 1225 + namespace + "\u201D."; 1226 SAXParseException spe = new SAXParseException(message, 1227 locator); 1228 errorHandler.warning(spe); 1229 } 1230 } 1231 } 1232 } 1233 } 1234 1235 public void documentMode(DocumentMode mode, String publicIdentifier, 1236 String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) 1237 throws SAXException { 1238 if (validator == null) { 1239 try { 1240 if ("-//W3C//DTD XHTML 1.0 Transitional//EN".equals(publicIdentifier)) { 1241 errorHandler.info("XHTML 1.0 Transitional doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for XHTML 1.0 Transitional." 1242 + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled." 1243 : "")); 1244 validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA); 1245 } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) { 1246 errorHandler.info("XHTML 1.0 Strict doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for XHTML 1.0 Strict." 1247 + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled." 1248 : "")); 1249 validator = validatorByDoctype(XHTML1STRICT_SCHEMA); 1250 } else if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) { 1251 errorHandler.info("HTML 4.01 Transitional doctype seen. Using the schema for XHTML 1.0 Transitional." 1252 + (html4SpecificAdditionalErrorChecks ? "" 1253 : " HTML4-specific tokenization errors are not enabled.")); 1254 validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA); 1255 } else if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) { 1256 errorHandler.info("HTML 4.01 Strict doctype seen. Using the schema for XHTML 1.0 Strict." 1257 + (html4SpecificAdditionalErrorChecks ? "" 1258 : " HTML4-specific tokenization errors are not enabled.")); 1259 validator = validatorByDoctype(XHTML1STRICT_SCHEMA); 1260 } else if ("-//W3C//DTD HTML 4.0 Transitional//EN".equals(publicIdentifier)) { 1261 errorHandler.info("Legacy HTML 4.0 Transitional doctype seen. Please consider using HTML 4.01 Transitional instead. Proceeding anyway for your convenience with the schema for XHTML 1.0 Transitional." 1262 + (html4SpecificAdditionalErrorChecks ? "" 1263 : " HTML4-specific tokenization errors are not enabled.")); 1264 validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA); 1265 } else if ("-//W3C//DTD HTML 4.0//EN".equals(publicIdentifier)) { 1266 errorHandler.info("Legacy HTML 4.0 Strict doctype seen. Please consider using HTML 4.01 instead. Proceeding anyway for your convenience with the schema for XHTML 1.0 Strict." 1267 + (html4SpecificAdditionalErrorChecks ? "" 1268 : " HTML4-specific tokenization errors are not enabled.")); 1269 validator = validatorByDoctype(XHTML1STRICT_SCHEMA); 1270 } else { 1271 errorHandler.info("Using the schema for HTML5." 1272 + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled." 1273 : "")); 1274 validator = validatorByDoctype(HTML5_SCHEMA); 1275 } 1276 } catch (IOException ioe) { 1277 // At this point the schema comes from memory. 1278 throw new RuntimeException(ioe); 1279 } catch (IncorrectSchemaException e) { 1280 // At this point the schema comes from memory. 1281 throw new RuntimeException(e); 1282 } 1283 ContentHandler ch = validator.getContentHandler(); 1284 ch.setDocumentLocator(htmlParser.getDocumentLocator()); 1285 ch.startDocument(); 1286 reader.setContentHandler(ch); 1287 } else { 1288 if (html4SpecificAdditionalErrorChecks) { 1289 errorHandler.info("HTML4-specific tokenization errors are enabled."); 1290 } 1291 } 1292 } 1293 1294 /** 1295 * @param acceptAllKnownXmlTypes 1296 * @see nu.validator.xml.ContentTypeParser#setAcceptAllKnownXmlTypes(boolean) 1297 */ 1298 protected void setAcceptAllKnownXmlTypes(boolean acceptAllKnownXmlTypes) { 1299 contentTypeParser.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes); 1300 httpRes.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes); 1301 } 1302 1303 /** 1304 * @param allowGenericXml 1305 * @see nu.validator.xml.ContentTypeParser#setAllowGenericXml(boolean) 1306 */ 1307 protected void setAllowGenericXml(boolean allowGenericXml) { 1308 contentTypeParser.setAllowGenericXml(allowGenericXml); 1309 httpRes.setAllowGenericXml(allowGenericXml); 1310 } 1311 1312 /** 1313 * @param allowHtml 1314 * @see nu.validator.xml.ContentTypeParser#setAllowHtml(boolean) 1315 */ 1316 protected void setAllowHtml(boolean allowHtml) { 1317 contentTypeParser.setAllowHtml(allowHtml); 1318 httpRes.setAllowHtml(allowHtml); 1319 } 1320 1321 /** 1322 * @param allowRnc 1323 * @see nu.validator.xml.ContentTypeParser#setAllowRnc(boolean) 1324 */ 1325 protected void setAllowRnc(boolean allowRnc) { 1326 contentTypeParser.setAllowRnc(allowRnc); 1327 httpRes.setAllowRnc(allowRnc); 1328 entityResolver.setAllowRnc(allowRnc); 1329 } 1330 1331 /** 1332 * @param allowXhtml 1333 * @see nu.validator.xml.ContentTypeParser#setAllowXhtml(boolean) 1334 */ 1335 protected void setAllowXhtml(boolean allowXhtml) { 1336 contentTypeParser.setAllowXhtml(allowXhtml); 1337 httpRes.setAllowXhtml(allowXhtml); 1338 } 1339 1340 /** 1341 * @throws SAXException 1342 * @throws IOException 1343 */ 1344 protected void loadDocumentInput() throws SAXException, IOException { 1345 if (methodIsGet) { 1346 documentInput = (TypedInputSource) entityResolver.resolveEntity( 1347 null, document); 1348 } else { // POST 1349 documentInput = contentTypeParser.buildTypedInputSource(document, 1350 null, postContentType); 1351 documentInput.setByteStream(request.getInputStream()); 1352 } 1353 } 1354 }