001 /*
002 * Copyright (c) 2007 Henri Sivonen
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package nu.validator.htmlparser.test;
024
025 import java.io.File;
026 import java.io.FileInputStream;
027 import java.io.IOException;
028 import java.io.OutputStreamWriter;
029 import java.io.Writer;
030
031 import nu.validator.htmlparser.impl.TokenHandler;
032 import nu.validator.htmlparser.impl.Tokenizer;
033
034 import org.xml.sax.Attributes;
035 import org.xml.sax.ErrorHandler;
036 import org.xml.sax.InputSource;
037 import org.xml.sax.SAXException;
038 import org.xml.sax.SAXParseException;
039
040 public class TokenPrinter implements TokenHandler, ErrorHandler {
041
042 private final Writer writer;
043
044 public void characters(char[] buf, int start, int length)
045 throws SAXException {
046 try {
047 boolean lineStarted = true;
048 writer.write('-');
049 for (int i = start; i < start + length; i++) {
050 if (!lineStarted) {
051 writer.write("\n-");
052 lineStarted = true;
053 }
054 char c = buf[i];
055 if (c == '\n') {
056 writer.write("\\n");
057 lineStarted = false;
058 } else {
059 writer.write(c);
060 }
061 }
062 writer.write('\n');
063 } catch (IOException e) {
064 throw new SAXException(e);
065 }
066 }
067
068 public void comment(char[] buf, int length) throws SAXException {
069 try {
070 writer.write('!');
071 writer.write(buf, 0, length);
072 writer.write('\n');
073 } catch (IOException e) {
074 throw new SAXException(e);
075 }
076 }
077
078 public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean correct) throws SAXException {
079 try {
080 writer.write('D');
081 writer.write(name);
082 writer.write(' ');
083 writer.write("" + correct);
084 writer.write('\n');
085 } catch (IOException e) {
086 throw new SAXException(e);
087 }
088 }
089
090 public void endTag(String name, Attributes attributes) throws SAXException {
091 try {
092 writer.write(')');
093 writer.write(name);
094 writer.write('\n');
095 for (int i = 0; i < attributes.getLength(); i++) {
096 writer.write('A');
097 writer.write(attributes.getQName(i));
098 writer.write(' ');
099 writer.write(attributes.getValue(i));
100 writer.write('\n');
101 }
102 } catch (IOException e) {
103 throw new SAXException(e);
104 }
105 }
106
107 public void eof() throws SAXException {
108 try {
109 writer.write("E\n");
110 writer.flush();
111 writer.close();
112 } catch (IOException e) {
113 throw new SAXException(e);
114 }
115 }
116
117 public void start(Tokenizer self) throws SAXException {
118
119 }
120
121 public void startTag(String name, Attributes attributes)
122 throws SAXException {
123 try {
124 writer.write('(');
125 writer.write(name);
126 writer.write('\n');
127 for (int i = 0; i < attributes.getLength(); i++) {
128 writer.write('A');
129 writer.write(attributes.getQName(i));
130 writer.write(' ');
131 writer.write(attributes.getValue(i));
132 writer.write('\n');
133 }
134 } catch (IOException e) {
135 throw new SAXException(e);
136 }
137 }
138
139 public boolean wantsComments() throws SAXException {
140 return true;
141 }
142
143 public static void main(String[] args) throws SAXException, IOException {
144 TokenPrinter printer = new TokenPrinter(new OutputStreamWriter(System.out, "UTF-8"));
145 Tokenizer tokenizer = new Tokenizer(printer);
146 tokenizer.setErrorHandler(printer);
147 File file = new File(args[0]);
148 InputSource is = new InputSource(new FileInputStream(file));
149 is.setSystemId(file.toURI().toASCIIString());
150 tokenizer.tokenize(is);
151 }
152
153 /**
154 * @param writer
155 */
156 public TokenPrinter(final Writer writer) {
157 this.writer = writer;
158 }
159
160 public void error(SAXParseException exception) throws SAXException {
161 try {
162 writer.write("R ");
163 writer.write(exception.getMessage());
164 writer.write("\n");
165 } catch (IOException e) {
166 throw new SAXException(e);
167 }
168 }
169
170 public void fatalError(SAXParseException exception) throws SAXException {
171 try {
172 writer.write("F ");
173 writer.write(exception.getMessage());
174 writer.write("\n");
175 } catch (IOException e) {
176 throw new SAXException(e);
177 }
178 }
179
180 public void warning(SAXParseException exception) throws SAXException {
181 try {
182 writer.write("W ");
183 writer.write(exception.getMessage());
184 writer.write("\n");
185 } catch (IOException e) {
186 throw new SAXException(e);
187 }
188 }
189
190 }