001    package nu.validator.servlet;
002    
003    import java.io.IOException;
004    import java.io.OutputStreamWriter;
005    import java.io.Writer;
006    import java.net.MalformedURLException;
007    
008    import javax.servlet.http.HttpServletRequest;
009    import javax.servlet.http.HttpServletResponse;
010    
011    import nu.validator.gnu.xml.aelfred2.SAXDriver;
012    import nu.validator.htmlparser.test.ListErrorHandler;
013    import nu.validator.htmlparser.test.TreeDumpContentHandler;
014    import nu.validator.xml.NullEntityResolver;
015    import nu.validator.xml.PrudentHttpEntityResolver;
016    import nu.validator.xml.TypedInputSource;
017    
018    import org.xml.sax.SAXException;
019    import org.xml.sax.XMLReader;
020    
021    import com.hp.hpl.jena.iri.IRIException;
022    import com.hp.hpl.jena.iri.IRIFactory;
023    
024    
025    public class ParseTreePrinter {
026        
027        private static final String FORM_HTML = "<!DOCTYPE html><title>Parse Tree Dump</title><form><p><input type='url' name='doc' id='doc' pattern='(?:https?://.+)?'> <input name='submit' value='Print Tree' type='submit' id='submit'></form>";
028        
029        private final HttpServletRequest request;
030    
031        private final HttpServletResponse response;
032    
033        /**
034         * @param request
035         * @param response
036         */
037        public ParseTreePrinter(final HttpServletRequest request,
038                final HttpServletResponse response) {
039            this.request = request;
040            this.response = response;
041        }
042    
043        private String scrubUrl(String urlStr) {
044            if (urlStr == null) {
045                return null;
046            }
047            try {
048                return IRIFactory.iriImplementation().construct(urlStr).toASCIIString();
049            } catch (IRIException e) {
050                return null;
051            } catch (MalformedURLException e) {
052                return null;
053            }
054        }
055    
056        public void service() throws IOException {
057            String document = scrubUrl(request.getParameter("doc"));
058            document = ("".equals(document)) ? null : document;
059            Writer writer = new OutputStreamWriter(response.getOutputStream(), "UTF-8");
060            if (document == null) {
061                response.setContentType("text/html; charset=utf-8");
062                writer.write(FORM_HTML);
063                writer.flush();
064                writer.close();
065                return;
066            } else {
067                response.setContentType("text/plain; charset=utf-8");
068                try {
069                PrudentHttpEntityResolver httpRes = new PrudentHttpEntityResolver(
070                        2048 * 1024, false, null);
071                httpRes.setAllowGenericXml(false);
072                httpRes.setAcceptAllKnownXmlTypes(false);
073                httpRes.setAllowHtml(true);
074                httpRes.setAllowXhtml(true);
075                TypedInputSource documentInput = (TypedInputSource) httpRes.resolveEntity(
076                        null, document);
077                String type = documentInput.getType();
078                XMLReader parser;
079                if ("text/html".equals(type)) {
080                    writer.write("HTML parser\n\n#document\n");
081                    parser = new nu.validator.htmlparser.sax.HtmlParser();
082                } else if ("application/xhtml+xml".equals(type)) {
083                    writer.write("XML parser\n\n#document\n");
084                    parser = new SAXDriver();
085                    parser.setFeature(
086                            "http://xml.org/sax/features/external-general-entities",
087                            false);
088                    parser.setFeature(
089                            "http://xml.org/sax/features/external-parameter-entities",
090                            false);
091                    parser.setEntityResolver(new NullEntityResolver());
092                } else {
093                    writer.write("Unsupported content type.\n");
094                    writer.flush();
095                    writer.close();
096                    return;
097                }
098                TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(writer, false);
099                ListErrorHandler listErrorHandler = new ListErrorHandler();
100                parser.setContentHandler(treeDumpContentHandler);
101                parser.setProperty("http://xml.org/sax/properties/lexical-handler", treeDumpContentHandler);
102                parser.setErrorHandler(listErrorHandler);
103                parser.parse(documentInput);
104                writer.write("#errors\n");
105                for (String err : listErrorHandler.getErrors()) {
106                    writer.write(err);
107                    writer.write('\n');
108                }
109                } catch (SAXException e) {
110                    writer.write("Exception:\n");
111                    writer.write(e.getMessage());
112                    writer.write("\n");
113                } catch (IOException e) {
114                    writer.write("Exception:\n");
115                    writer.write(e.getMessage());
116                    writer.write("\n");
117                } finally {
118                    writer.flush();
119                    writer.close();
120                }
121            }
122        }
123    
124    }