001    /*
002     * Copyright (c) 2007 Henri Sivonen
003     * Copyright (c) 2007 Mozilla Foundation
004     *
005     * Permission is hereby granted, free of charge, to any person obtaining a 
006     * copy of this software and associated documentation files (the "Software"), 
007     * to deal in the Software without restriction, including without limitation 
008     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
009     * and/or sell copies of the Software, and to permit persons to whom the 
010     * Software is furnished to do so, subject to the following conditions:
011     *
012     * The above copyright notice and this permission notice shall be included in 
013     * all copies or substantial portions of the Software.
014     *
015     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
016     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
017     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
018     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
019     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
020     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
021     * DEALINGS IN THE SOFTWARE.
022     */
023    
024    package nu.validator.htmlparser.tools;
025    
026    import java.io.File;
027    import java.io.FileOutputStream;
028    import java.io.IOException;
029    import java.net.MalformedURLException;
030    import java.util.Properties;
031    
032    import javax.xml.parsers.DocumentBuilder;
033    import javax.xml.parsers.DocumentBuilderFactory;
034    import javax.xml.parsers.ParserConfigurationException;
035    import javax.xml.parsers.SAXParserFactory;
036    import javax.xml.transform.Templates;
037    import javax.xml.transform.Transformer;
038    import javax.xml.transform.TransformerException;
039    import javax.xml.transform.TransformerFactory;
040    import javax.xml.transform.dom.DOMSource;
041    import javax.xml.transform.sax.SAXResult;
042    import javax.xml.transform.sax.SAXTransformerFactory;
043    import javax.xml.transform.sax.TemplatesHandler;
044    import javax.xml.transform.sax.TransformerHandler;
045    
046    import nu.validator.htmlparser.common.XmlViolationPolicy;
047    import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
048    import nu.validator.htmlparser.sax.HtmlParser;
049    import nu.validator.htmlparser.sax.HtmlSerializer;
050    import nu.validator.htmlparser.test.SystemErrErrorHandler;
051    
052    import org.apache.xml.serializer.Method;
053    import org.apache.xml.serializer.OutputPropertiesFactory;
054    import org.apache.xml.serializer.Serializer;
055    import org.apache.xml.serializer.SerializerFactory;
056    import org.w3c.dom.Document;
057    import org.xml.sax.ContentHandler;
058    import org.xml.sax.SAXException;
059    import org.xml.sax.XMLReader;
060    import org.xml.sax.ext.LexicalHandler;
061    
062    public class XSLT4HTML5 {
063    
064        private enum Mode {
065            STREAMING_SAX, BUFFERED_SAX, DOM,
066        }
067    
068        private static final String TEMPLATE = "--template=";
069    
070        private static final String INPUT_HTML = "--input-html=";
071    
072        private static final String INPUT_XML = "--input-xml=";
073    
074        private static final String OUTPUT_HTML = "--output-html=";
075    
076        private static final String OUTPUT_XML = "--output-xml=";
077    
078        private static final String MODE = "--mode=";
079    
080        /**
081         * @param args
082         * @throws ParserConfigurationException 
083         * @throws SAXException 
084         * @throws IOException 
085         * @throws MalformedURLException 
086         * @throws TransformerException 
087         */
088        public static void main(String[] args) throws SAXException,
089                ParserConfigurationException, MalformedURLException, IOException, TransformerException {
090            if (args.length == 0) {
091                System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
092                System.exit(0);
093            }
094            String template = null;
095            String input = null;
096            boolean inputHtml = false;
097            String output = null;
098            boolean outputHtml = false;
099            Mode mode = null;
100            for (int i = 0; i < args.length; i++) {
101                String arg = args[i];
102                if (arg.startsWith(TEMPLATE)) {
103                    if (template == null) {
104                        template = arg.substring(TEMPLATE.length());
105                    } else {
106                        System.err.println("Tried to set template twice.");
107                        System.exit(1);
108                    }
109                } else if (arg.startsWith(INPUT_HTML)) {
110                    if (input == null) {
111                        input = arg.substring(INPUT_HTML.length());
112                        inputHtml = true;
113                    } else {
114                        System.err.println("Tried to set input twice.");
115                        System.exit(2);
116                    }
117                } else if (arg.startsWith(INPUT_XML)) {
118                    if (input == null) {
119                        input = arg.substring(INPUT_XML.length());
120                        inputHtml = false;
121                    } else {
122                        System.err.println("Tried to set input twice.");
123                        System.exit(2);
124                    }
125                } else if (arg.startsWith(OUTPUT_HTML)) {
126                    if (output == null) {
127                        output = arg.substring(OUTPUT_HTML.length());
128                        outputHtml = true;
129                    } else {
130                        System.err.println("Tried to set output twice.");
131                        System.exit(3);
132                    }
133                } else if (arg.startsWith(OUTPUT_XML)) {
134                    if (output == null) {
135                        output = arg.substring(OUTPUT_XML.length());
136                        outputHtml = false;
137                    } else {
138                        System.err.println("Tried to set output twice.");
139                        System.exit(3);
140                    }
141                } else if (arg.startsWith(MODE)) {
142                    if (mode == null) {
143                        String modeStr = arg.substring(MODE.length());
144                        if ("dom".equals(modeStr)) {
145                            mode = Mode.DOM;
146                        } else if ("sax-buffered".equals(modeStr)) {
147                            mode = Mode.BUFFERED_SAX;
148                        } else if ("sax-streaming".equals(modeStr)) {
149                            mode = Mode.STREAMING_SAX;
150                        } else {
151                            System.err.println("Unrecognized mode.");
152                            System.exit(5);
153                        }
154                    } else {
155                        System.err.println("Tried to set mode twice.");
156                        System.exit(4);
157                    }
158                }
159            }
160    
161            if (template == null) {
162                System.err.println("No template specified.");
163                System.exit(6);
164            }
165            if (input == null) {
166                System.err.println("No input specified.");
167                System.exit(7);
168            }
169            if (output == null) {
170                System.err.println("No output specified.");
171                System.exit(8);
172            }
173            if (mode == null) {
174                mode = Mode.BUFFERED_SAX;
175            }
176            
177            SystemErrErrorHandler errorHandler = new SystemErrErrorHandler();
178    
179            SAXParserFactory factory = SAXParserFactory.newInstance();
180            factory.setNamespaceAware(true);
181            factory.setValidating(false);
182            XMLReader reader = factory.newSAXParser().getXMLReader();
183            reader.setErrorHandler(errorHandler);
184    
185            SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance();
186            transformerFactory.setErrorListener(errorHandler);
187            TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler();
188            reader.setContentHandler(templatesHandler);
189            reader.parse(new File(template).toURI().toASCIIString());
190    
191            Templates templates = templatesHandler.getTemplates();
192    
193            FileOutputStream outputStream = new FileOutputStream(output);
194            ContentHandler serializer;
195            if (outputHtml) {
196                serializer = new HtmlSerializer(outputStream);
197            } else {
198                Properties props = OutputPropertiesFactory.getDefaultMethodProperties(Method.XML);
199                Serializer ser = SerializerFactory.getSerializer(props);
200                ser.setOutputStream(outputStream);
201                serializer = ser.asContentHandler();
202            }
203            SAXResult result = new SAXResult(new XmlnsDropper(serializer));
204            result.setLexicalHandler((LexicalHandler) serializer);
205    
206            if (mode == Mode.DOM) {
207                Document inputDoc;
208                DocumentBuilder builder;
209                if (inputHtml) {
210                    builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET);
211                } else {
212                    DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
213                    factory.setNamespaceAware(true);
214                    try {
215                        builder = builderFactory.newDocumentBuilder();
216                    } catch (ParserConfigurationException e) {
217                        throw new RuntimeException(e);
218                    }
219                }
220                inputDoc = builder.parse(new File(input));
221                DOMSource inputSource = new DOMSource(inputDoc,
222                        new File(input).toURI().toASCIIString());
223                Transformer transformer = templates.newTransformer();
224                transformer.setErrorListener(errorHandler);
225                transformer.transform(inputSource, result);
226            } else {
227                if (inputHtml) {
228                    reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
229                    if (mode == Mode.STREAMING_SAX) {
230                        reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL);
231                    }
232                }
233                TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates);
234                transformerHandler.setResult(result);
235                reader.setErrorHandler(errorHandler);
236                reader.setContentHandler(transformerHandler);
237                reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler);
238                reader.parse(new File(input).toURI().toASCIIString());
239            }
240            outputStream.flush();
241            outputStream.close();
242        }
243    
244    }