001 /*
002 * Copyright (c) 2007 Henri Sivonen
003 * Copyright (c) 2007 Mozilla Foundation
004 *
005 * Permission is hereby granted, free of charge, to any person obtaining a
006 * copy of this software and associated documentation files (the "Software"),
007 * to deal in the Software without restriction, including without limitation
008 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
009 * and/or sell copies of the Software, and to permit persons to whom the
010 * Software is furnished to do so, subject to the following conditions:
011 *
012 * The above copyright notice and this permission notice shall be included in
013 * all copies or substantial portions of the Software.
014 *
015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
021 * DEALINGS IN THE SOFTWARE.
022 */
023
024 package nu.validator.htmlparser.tools;
025
026 import java.io.File;
027 import java.io.FileOutputStream;
028 import java.io.IOException;
029 import java.net.MalformedURLException;
030 import java.util.Properties;
031
032 import javax.xml.parsers.DocumentBuilder;
033 import javax.xml.parsers.DocumentBuilderFactory;
034 import javax.xml.parsers.ParserConfigurationException;
035 import javax.xml.parsers.SAXParserFactory;
036 import javax.xml.transform.Templates;
037 import javax.xml.transform.Transformer;
038 import javax.xml.transform.TransformerException;
039 import javax.xml.transform.TransformerFactory;
040 import javax.xml.transform.dom.DOMSource;
041 import javax.xml.transform.sax.SAXResult;
042 import javax.xml.transform.sax.SAXTransformerFactory;
043 import javax.xml.transform.sax.TemplatesHandler;
044 import javax.xml.transform.sax.TransformerHandler;
045
046 import nu.validator.htmlparser.common.XmlViolationPolicy;
047 import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
048 import nu.validator.htmlparser.sax.HtmlParser;
049 import nu.validator.htmlparser.sax.HtmlSerializer;
050 import nu.validator.htmlparser.test.SystemErrErrorHandler;
051
052 import org.apache.xml.serializer.Method;
053 import org.apache.xml.serializer.OutputPropertiesFactory;
054 import org.apache.xml.serializer.Serializer;
055 import org.apache.xml.serializer.SerializerFactory;
056 import org.w3c.dom.Document;
057 import org.xml.sax.ContentHandler;
058 import org.xml.sax.SAXException;
059 import org.xml.sax.XMLReader;
060 import org.xml.sax.ext.LexicalHandler;
061
062 public class XSLT4HTML5 {
063
064 private enum Mode {
065 STREAMING_SAX, BUFFERED_SAX, DOM,
066 }
067
068 private static final String TEMPLATE = "--template=";
069
070 private static final String INPUT_HTML = "--input-html=";
071
072 private static final String INPUT_XML = "--input-xml=";
073
074 private static final String OUTPUT_HTML = "--output-html=";
075
076 private static final String OUTPUT_XML = "--output-xml=";
077
078 private static final String MODE = "--mode=";
079
080 /**
081 * @param args
082 * @throws ParserConfigurationException
083 * @throws SAXException
084 * @throws IOException
085 * @throws MalformedURLException
086 * @throws TransformerException
087 */
088 public static void main(String[] args) throws SAXException,
089 ParserConfigurationException, MalformedURLException, IOException, TransformerException {
090 if (args.length == 0) {
091 System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
092 System.exit(0);
093 }
094 String template = null;
095 String input = null;
096 boolean inputHtml = false;
097 String output = null;
098 boolean outputHtml = false;
099 Mode mode = null;
100 for (int i = 0; i < args.length; i++) {
101 String arg = args[i];
102 if (arg.startsWith(TEMPLATE)) {
103 if (template == null) {
104 template = arg.substring(TEMPLATE.length());
105 } else {
106 System.err.println("Tried to set template twice.");
107 System.exit(1);
108 }
109 } else if (arg.startsWith(INPUT_HTML)) {
110 if (input == null) {
111 input = arg.substring(INPUT_HTML.length());
112 inputHtml = true;
113 } else {
114 System.err.println("Tried to set input twice.");
115 System.exit(2);
116 }
117 } else if (arg.startsWith(INPUT_XML)) {
118 if (input == null) {
119 input = arg.substring(INPUT_XML.length());
120 inputHtml = false;
121 } else {
122 System.err.println("Tried to set input twice.");
123 System.exit(2);
124 }
125 } else if (arg.startsWith(OUTPUT_HTML)) {
126 if (output == null) {
127 output = arg.substring(OUTPUT_HTML.length());
128 outputHtml = true;
129 } else {
130 System.err.println("Tried to set output twice.");
131 System.exit(3);
132 }
133 } else if (arg.startsWith(OUTPUT_XML)) {
134 if (output == null) {
135 output = arg.substring(OUTPUT_XML.length());
136 outputHtml = false;
137 } else {
138 System.err.println("Tried to set output twice.");
139 System.exit(3);
140 }
141 } else if (arg.startsWith(MODE)) {
142 if (mode == null) {
143 String modeStr = arg.substring(MODE.length());
144 if ("dom".equals(modeStr)) {
145 mode = Mode.DOM;
146 } else if ("sax-buffered".equals(modeStr)) {
147 mode = Mode.BUFFERED_SAX;
148 } else if ("sax-streaming".equals(modeStr)) {
149 mode = Mode.STREAMING_SAX;
150 } else {
151 System.err.println("Unrecognized mode.");
152 System.exit(5);
153 }
154 } else {
155 System.err.println("Tried to set mode twice.");
156 System.exit(4);
157 }
158 }
159 }
160
161 if (template == null) {
162 System.err.println("No template specified.");
163 System.exit(6);
164 }
165 if (input == null) {
166 System.err.println("No input specified.");
167 System.exit(7);
168 }
169 if (output == null) {
170 System.err.println("No output specified.");
171 System.exit(8);
172 }
173 if (mode == null) {
174 mode = Mode.BUFFERED_SAX;
175 }
176
177 SystemErrErrorHandler errorHandler = new SystemErrErrorHandler();
178
179 SAXParserFactory factory = SAXParserFactory.newInstance();
180 factory.setNamespaceAware(true);
181 factory.setValidating(false);
182 XMLReader reader = factory.newSAXParser().getXMLReader();
183 reader.setErrorHandler(errorHandler);
184
185 SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance();
186 transformerFactory.setErrorListener(errorHandler);
187 TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler();
188 reader.setContentHandler(templatesHandler);
189 reader.parse(new File(template).toURI().toASCIIString());
190
191 Templates templates = templatesHandler.getTemplates();
192
193 FileOutputStream outputStream = new FileOutputStream(output);
194 ContentHandler serializer;
195 if (outputHtml) {
196 serializer = new HtmlSerializer(outputStream);
197 } else {
198 Properties props = OutputPropertiesFactory.getDefaultMethodProperties(Method.XML);
199 Serializer ser = SerializerFactory.getSerializer(props);
200 ser.setOutputStream(outputStream);
201 serializer = ser.asContentHandler();
202 }
203 SAXResult result = new SAXResult(new XmlnsDropper(serializer));
204 result.setLexicalHandler((LexicalHandler) serializer);
205
206 if (mode == Mode.DOM) {
207 Document inputDoc;
208 DocumentBuilder builder;
209 if (inputHtml) {
210 builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET);
211 } else {
212 DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
213 factory.setNamespaceAware(true);
214 try {
215 builder = builderFactory.newDocumentBuilder();
216 } catch (ParserConfigurationException e) {
217 throw new RuntimeException(e);
218 }
219 }
220 inputDoc = builder.parse(new File(input));
221 DOMSource inputSource = new DOMSource(inputDoc,
222 new File(input).toURI().toASCIIString());
223 Transformer transformer = templates.newTransformer();
224 transformer.setErrorListener(errorHandler);
225 transformer.transform(inputSource, result);
226 } else {
227 if (inputHtml) {
228 reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
229 if (mode == Mode.STREAMING_SAX) {
230 reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL);
231 }
232 }
233 TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates);
234 transformerHandler.setResult(result);
235 reader.setErrorHandler(errorHandler);
236 reader.setContentHandler(transformerHandler);
237 reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler);
238 reader.parse(new File(input).toURI().toASCIIString());
239 }
240 outputStream.flush();
241 outputStream.close();
242 }
243
244 }