001 package nu.validator.servlet;
002
003 import java.io.IOException;
004 import java.io.OutputStreamWriter;
005 import java.io.Writer;
006 import java.net.MalformedURLException;
007
008 import javax.servlet.http.HttpServletRequest;
009 import javax.servlet.http.HttpServletResponse;
010
011 import nu.validator.gnu.xml.aelfred2.SAXDriver;
012 import nu.validator.htmlparser.test.ListErrorHandler;
013 import nu.validator.htmlparser.test.TreeDumpContentHandler;
014 import nu.validator.xml.NullEntityResolver;
015 import nu.validator.xml.PrudentHttpEntityResolver;
016 import nu.validator.xml.TypedInputSource;
017
018 import org.xml.sax.SAXException;
019 import org.xml.sax.XMLReader;
020
021 import com.hp.hpl.jena.iri.IRIException;
022 import com.hp.hpl.jena.iri.IRIFactory;
023
024
025 public class ParseTreePrinter {
026
027 private static final String FORM_HTML = "<!DOCTYPE html><title>Parse Tree Dump</title><form><p><input type='url' name='doc' id='doc' pattern='(?:https?://.+)?'> <input name='submit' value='Print Tree' type='submit' id='submit'></form>";
028
029 private final HttpServletRequest request;
030
031 private final HttpServletResponse response;
032
033 /**
034 * @param request
035 * @param response
036 */
037 public ParseTreePrinter(final HttpServletRequest request,
038 final HttpServletResponse response) {
039 this.request = request;
040 this.response = response;
041 }
042
043 private String scrubUrl(String urlStr) {
044 if (urlStr == null) {
045 return null;
046 }
047 try {
048 return IRIFactory.iriImplementation().construct(urlStr).toASCIIString();
049 } catch (IRIException e) {
050 return null;
051 } catch (MalformedURLException e) {
052 return null;
053 }
054 }
055
056 public void service() throws IOException {
057 String document = scrubUrl(request.getParameter("doc"));
058 document = ("".equals(document)) ? null : document;
059 Writer writer = new OutputStreamWriter(response.getOutputStream(), "UTF-8");
060 if (document == null) {
061 response.setContentType("text/html; charset=utf-8");
062 writer.write(FORM_HTML);
063 writer.flush();
064 writer.close();
065 return;
066 } else {
067 response.setContentType("text/plain; charset=utf-8");
068 try {
069 PrudentHttpEntityResolver httpRes = new PrudentHttpEntityResolver(
070 2048 * 1024, false, null);
071 httpRes.setAllowGenericXml(false);
072 httpRes.setAcceptAllKnownXmlTypes(false);
073 httpRes.setAllowHtml(true);
074 httpRes.setAllowXhtml(true);
075 TypedInputSource documentInput = (TypedInputSource) httpRes.resolveEntity(
076 null, document);
077 String type = documentInput.getType();
078 XMLReader parser;
079 if ("text/html".equals(type)) {
080 writer.write("HTML parser\n\n#document\n");
081 parser = new nu.validator.htmlparser.sax.HtmlParser();
082 } else if ("application/xhtml+xml".equals(type)) {
083 writer.write("XML parser\n\n#document\n");
084 parser = new SAXDriver();
085 parser.setFeature(
086 "http://xml.org/sax/features/external-general-entities",
087 false);
088 parser.setFeature(
089 "http://xml.org/sax/features/external-parameter-entities",
090 false);
091 parser.setEntityResolver(new NullEntityResolver());
092 } else {
093 writer.write("Unsupported content type.\n");
094 writer.flush();
095 writer.close();
096 return;
097 }
098 TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(writer, false);
099 ListErrorHandler listErrorHandler = new ListErrorHandler();
100 parser.setContentHandler(treeDumpContentHandler);
101 parser.setProperty("http://xml.org/sax/properties/lexical-handler", treeDumpContentHandler);
102 parser.setErrorHandler(listErrorHandler);
103 parser.parse(documentInput);
104 writer.write("#errors\n");
105 for (String err : listErrorHandler.getErrors()) {
106 writer.write(err);
107 writer.write('\n');
108 }
109 } catch (SAXException e) {
110 writer.write("Exception:\n");
111 writer.write(e.getMessage());
112 writer.write("\n");
113 } catch (IOException e) {
114 writer.write("Exception:\n");
115 writer.write(e.getMessage());
116 writer.write("\n");
117 } finally {
118 writer.flush();
119 writer.close();
120 }
121 }
122 }
123
124 }