001    /*
002     * Copyright (c) 2007 Henri Sivonen
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a 
005     * copy of this software and associated documentation files (the "Software"), 
006     * to deal in the Software without restriction, including without limitation 
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
008     * and/or sell copies of the Software, and to permit persons to whom the 
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in 
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
020     * DEALINGS IN THE SOFTWARE.
021     */
022    
023    package nu.validator.htmlparser.test;
024    
025    import java.io.File;
026    import java.io.FileInputStream;
027    import java.io.IOException;
028    import java.io.OutputStreamWriter;
029    import java.io.Writer;
030    
031    import nu.validator.htmlparser.impl.TokenHandler;
032    import nu.validator.htmlparser.impl.Tokenizer;
033    
034    import org.xml.sax.Attributes;
035    import org.xml.sax.ErrorHandler;
036    import org.xml.sax.InputSource;
037    import org.xml.sax.SAXException;
038    import org.xml.sax.SAXParseException;
039    
040    public class TokenPrinter implements TokenHandler, ErrorHandler {
041    
042        private final Writer writer;
043        
044        public void characters(char[] buf, int start, int length)
045                throws SAXException {
046            try {
047            boolean lineStarted = true;
048            writer.write('-');
049            for (int i = start; i < start + length; i++) {
050                if (!lineStarted) {
051                    writer.write("\n-");                
052                    lineStarted = true;
053                }
054                char c = buf[i];
055                if (c == '\n') {
056                    writer.write("\\n");                                
057                    lineStarted = false;                
058                } else {
059                    writer.write(c);                
060                }
061            }
062            writer.write('\n');
063            } catch (IOException e) {
064                throw new SAXException(e);
065            }
066        }
067    
068        public void comment(char[] buf, int length) throws SAXException {
069            try {
070                writer.write('!');
071                writer.write(buf, 0, length);
072                writer.write('\n');
073            } catch (IOException e) {
074                throw new SAXException(e);
075            }
076        }
077    
078        public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean correct) throws SAXException {
079            try {
080                writer.write('D');
081                writer.write(name);
082                writer.write(' ');
083                writer.write("" + correct);
084                writer.write('\n');
085            } catch (IOException e) {
086                throw new SAXException(e);
087            }
088        }
089    
090        public void endTag(String name, Attributes attributes) throws SAXException {
091            try {
092                writer.write(')');
093                writer.write(name);
094                writer.write('\n');
095                for (int i = 0; i < attributes.getLength(); i++) {
096                    writer.write('A');
097                    writer.write(attributes.getQName(i));
098                    writer.write(' ');
099                    writer.write(attributes.getValue(i));
100                    writer.write('\n');                
101                }
102            } catch (IOException e) {
103                throw new SAXException(e);
104            }
105        }
106    
107        public void eof() throws SAXException {
108        try {
109            writer.write("E\n");
110            writer.flush();
111            writer.close();
112        } catch (IOException e) {
113            throw new SAXException(e);
114        }        
115        }
116    
117        public void start(Tokenizer self) throws SAXException {
118    
119        }
120    
121        public void startTag(String name, Attributes attributes)
122                throws SAXException {
123            try {
124                writer.write('(');
125                writer.write(name);
126                writer.write('\n');
127                for (int i = 0; i < attributes.getLength(); i++) {
128                    writer.write('A');
129                    writer.write(attributes.getQName(i));
130                    writer.write(' ');
131                    writer.write(attributes.getValue(i));
132                    writer.write('\n');                
133                }
134            } catch (IOException e) {
135                throw new SAXException(e);
136            }
137        }
138    
139        public boolean wantsComments() throws SAXException {
140            return true;
141        }
142    
143        public static void main(String[] args) throws SAXException, IOException {
144            TokenPrinter printer = new TokenPrinter(new OutputStreamWriter(System.out, "UTF-8")); 
145            Tokenizer tokenizer = new Tokenizer(printer);
146            tokenizer.setErrorHandler(printer);
147            File file = new File(args[0]);
148            InputSource is = new InputSource(new FileInputStream(file));
149            is.setSystemId(file.toURI().toASCIIString());
150            tokenizer.tokenize(is);
151        }
152    
153        /**
154         * @param writer
155         */
156        public TokenPrinter(final Writer writer) {
157            this.writer = writer;
158        }
159    
160        public void error(SAXParseException exception) throws SAXException {
161            try {
162                writer.write("R ");
163                writer.write(exception.getMessage());
164                writer.write("\n");
165            } catch (IOException e) {
166                throw new SAXException(e);
167            }        
168        }
169    
170        public void fatalError(SAXParseException exception) throws SAXException {
171            try {
172                writer.write("F ");
173                writer.write(exception.getMessage());
174                writer.write("\n");
175            } catch (IOException e) {
176                throw new SAXException(e);
177            }        
178        }
179    
180        public void warning(SAXParseException exception) throws SAXException {
181            try {
182                writer.write("W ");
183                writer.write(exception.getMessage());
184                writer.write("\n");
185            } catch (IOException e) {
186                throw new SAXException(e);
187            }        
188        }
189        
190    }