001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * Copyright (c) 2007 Mozilla Foundation 004 * 005 * Permission is hereby granted, free of charge, to any person obtaining a 006 * copy of this software and associated documentation files (the "Software"), 007 * to deal in the Software without restriction, including without limitation 008 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 009 * and/or sell copies of the Software, and to permit persons to whom the 010 * Software is furnished to do so, subject to the following conditions: 011 * 012 * The above copyright notice and this permission notice shall be included in 013 * all copies or substantial portions of the Software. 014 * 015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 021 * DEALINGS IN THE SOFTWARE. 022 */ 023 024 package nu.validator.htmlparser.tools; 025 026 import java.io.File; 027 import java.io.FileOutputStream; 028 import java.io.IOException; 029 import java.net.MalformedURLException; 030 import java.util.Properties; 031 032 import javax.xml.parsers.DocumentBuilder; 033 import javax.xml.parsers.DocumentBuilderFactory; 034 import javax.xml.parsers.ParserConfigurationException; 035 import javax.xml.parsers.SAXParserFactory; 036 import javax.xml.transform.Templates; 037 import javax.xml.transform.Transformer; 038 import javax.xml.transform.TransformerException; 039 import javax.xml.transform.TransformerFactory; 040 import javax.xml.transform.dom.DOMSource; 041 import javax.xml.transform.sax.SAXResult; 042 import javax.xml.transform.sax.SAXTransformerFactory; 043 import javax.xml.transform.sax.TemplatesHandler; 044 import javax.xml.transform.sax.TransformerHandler; 045 046 import nu.validator.htmlparser.common.XmlViolationPolicy; 047 import nu.validator.htmlparser.dom.HtmlDocumentBuilder; 048 import nu.validator.htmlparser.sax.HtmlParser; 049 import nu.validator.htmlparser.sax.HtmlSerializer; 050 import nu.validator.htmlparser.test.SystemErrErrorHandler; 051 052 import org.apache.xml.serializer.Method; 053 import org.apache.xml.serializer.OutputPropertiesFactory; 054 import org.apache.xml.serializer.Serializer; 055 import org.apache.xml.serializer.SerializerFactory; 056 import org.w3c.dom.Document; 057 import org.xml.sax.ContentHandler; 058 import org.xml.sax.SAXException; 059 import org.xml.sax.XMLReader; 060 import org.xml.sax.ext.LexicalHandler; 061 062 public class XSLT4HTML5 { 063 064 private enum Mode { 065 STREAMING_SAX, BUFFERED_SAX, DOM, 066 } 067 068 private static final String TEMPLATE = "--template="; 069 070 private static final String INPUT_HTML = "--input-html="; 071 072 private static final String INPUT_XML = "--input-xml="; 073 074 private static final String OUTPUT_HTML = "--output-html="; 075 076 private static final String OUTPUT_XML = "--output-xml="; 077 078 private static final String MODE = "--mode="; 079 080 /** 081 * @param args 082 * @throws ParserConfigurationException 083 * @throws SAXException 084 * @throws IOException 085 * @throws MalformedURLException 086 * @throws TransformerException 087 */ 088 public static void main(String[] args) throws SAXException, 089 ParserConfigurationException, MalformedURLException, IOException, TransformerException { 090 if (args.length == 0) { 091 System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]"); 092 System.exit(0); 093 } 094 String template = null; 095 String input = null; 096 boolean inputHtml = false; 097 String output = null; 098 boolean outputHtml = false; 099 Mode mode = null; 100 for (int i = 0; i < args.length; i++) { 101 String arg = args[i]; 102 if (arg.startsWith(TEMPLATE)) { 103 if (template == null) { 104 template = arg.substring(TEMPLATE.length()); 105 } else { 106 System.err.println("Tried to set template twice."); 107 System.exit(1); 108 } 109 } else if (arg.startsWith(INPUT_HTML)) { 110 if (input == null) { 111 input = arg.substring(INPUT_HTML.length()); 112 inputHtml = true; 113 } else { 114 System.err.println("Tried to set input twice."); 115 System.exit(2); 116 } 117 } else if (arg.startsWith(INPUT_XML)) { 118 if (input == null) { 119 input = arg.substring(INPUT_XML.length()); 120 inputHtml = false; 121 } else { 122 System.err.println("Tried to set input twice."); 123 System.exit(2); 124 } 125 } else if (arg.startsWith(OUTPUT_HTML)) { 126 if (output == null) { 127 output = arg.substring(OUTPUT_HTML.length()); 128 outputHtml = true; 129 } else { 130 System.err.println("Tried to set output twice."); 131 System.exit(3); 132 } 133 } else if (arg.startsWith(OUTPUT_XML)) { 134 if (output == null) { 135 output = arg.substring(OUTPUT_XML.length()); 136 outputHtml = false; 137 } else { 138 System.err.println("Tried to set output twice."); 139 System.exit(3); 140 } 141 } else if (arg.startsWith(MODE)) { 142 if (mode == null) { 143 String modeStr = arg.substring(MODE.length()); 144 if ("dom".equals(modeStr)) { 145 mode = Mode.DOM; 146 } else if ("sax-buffered".equals(modeStr)) { 147 mode = Mode.BUFFERED_SAX; 148 } else if ("sax-streaming".equals(modeStr)) { 149 mode = Mode.STREAMING_SAX; 150 } else { 151 System.err.println("Unrecognized mode."); 152 System.exit(5); 153 } 154 } else { 155 System.err.println("Tried to set mode twice."); 156 System.exit(4); 157 } 158 } 159 } 160 161 if (template == null) { 162 System.err.println("No template specified."); 163 System.exit(6); 164 } 165 if (input == null) { 166 System.err.println("No input specified."); 167 System.exit(7); 168 } 169 if (output == null) { 170 System.err.println("No output specified."); 171 System.exit(8); 172 } 173 if (mode == null) { 174 mode = Mode.BUFFERED_SAX; 175 } 176 177 SystemErrErrorHandler errorHandler = new SystemErrErrorHandler(); 178 179 SAXParserFactory factory = SAXParserFactory.newInstance(); 180 factory.setNamespaceAware(true); 181 factory.setValidating(false); 182 XMLReader reader = factory.newSAXParser().getXMLReader(); 183 reader.setErrorHandler(errorHandler); 184 185 SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance(); 186 transformerFactory.setErrorListener(errorHandler); 187 TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler(); 188 reader.setContentHandler(templatesHandler); 189 reader.parse(new File(template).toURI().toASCIIString()); 190 191 Templates templates = templatesHandler.getTemplates(); 192 193 FileOutputStream outputStream = new FileOutputStream(output); 194 ContentHandler serializer; 195 if (outputHtml) { 196 serializer = new HtmlSerializer(outputStream); 197 } else { 198 Properties props = OutputPropertiesFactory.getDefaultMethodProperties(Method.XML); 199 Serializer ser = SerializerFactory.getSerializer(props); 200 ser.setOutputStream(outputStream); 201 serializer = ser.asContentHandler(); 202 } 203 SAXResult result = new SAXResult(new XmlnsDropper(serializer)); 204 result.setLexicalHandler((LexicalHandler) serializer); 205 206 if (mode == Mode.DOM) { 207 Document inputDoc; 208 DocumentBuilder builder; 209 if (inputHtml) { 210 builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET); 211 } else { 212 DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); 213 factory.setNamespaceAware(true); 214 try { 215 builder = builderFactory.newDocumentBuilder(); 216 } catch (ParserConfigurationException e) { 217 throw new RuntimeException(e); 218 } 219 } 220 inputDoc = builder.parse(new File(input)); 221 DOMSource inputSource = new DOMSource(inputDoc, 222 new File(input).toURI().toASCIIString()); 223 Transformer transformer = templates.newTransformer(); 224 transformer.setErrorListener(errorHandler); 225 transformer.transform(inputSource, result); 226 } else { 227 if (inputHtml) { 228 reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); 229 if (mode == Mode.STREAMING_SAX) { 230 reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL); 231 } 232 } 233 TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates); 234 transformerHandler.setResult(result); 235 reader.setErrorHandler(errorHandler); 236 reader.setContentHandler(transformerHandler); 237 reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler); 238 reader.parse(new File(input).toURI().toASCIIString()); 239 } 240 outputStream.flush(); 241 outputStream.close(); 242 } 243 244 }