001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package nu.validator.htmlparser.test; 024 025 import java.io.File; 026 import java.io.FileInputStream; 027 import java.io.IOException; 028 import java.io.OutputStreamWriter; 029 import java.io.Writer; 030 031 import nu.validator.htmlparser.impl.TokenHandler; 032 import nu.validator.htmlparser.impl.Tokenizer; 033 034 import org.xml.sax.Attributes; 035 import org.xml.sax.ErrorHandler; 036 import org.xml.sax.InputSource; 037 import org.xml.sax.SAXException; 038 import org.xml.sax.SAXParseException; 039 040 public class TokenPrinter implements TokenHandler, ErrorHandler { 041 042 private final Writer writer; 043 044 public void characters(char[] buf, int start, int length) 045 throws SAXException { 046 try { 047 boolean lineStarted = true; 048 writer.write('-'); 049 for (int i = start; i < start + length; i++) { 050 if (!lineStarted) { 051 writer.write("\n-"); 052 lineStarted = true; 053 } 054 char c = buf[i]; 055 if (c == '\n') { 056 writer.write("\\n"); 057 lineStarted = false; 058 } else { 059 writer.write(c); 060 } 061 } 062 writer.write('\n'); 063 } catch (IOException e) { 064 throw new SAXException(e); 065 } 066 } 067 068 public void comment(char[] buf, int length) throws SAXException { 069 try { 070 writer.write('!'); 071 writer.write(buf, 0, length); 072 writer.write('\n'); 073 } catch (IOException e) { 074 throw new SAXException(e); 075 } 076 } 077 078 public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean correct) throws SAXException { 079 try { 080 writer.write('D'); 081 writer.write(name); 082 writer.write(' '); 083 writer.write("" + correct); 084 writer.write('\n'); 085 } catch (IOException e) { 086 throw new SAXException(e); 087 } 088 } 089 090 public void endTag(String name, Attributes attributes) throws SAXException { 091 try { 092 writer.write(')'); 093 writer.write(name); 094 writer.write('\n'); 095 for (int i = 0; i < attributes.getLength(); i++) { 096 writer.write('A'); 097 writer.write(attributes.getQName(i)); 098 writer.write(' '); 099 writer.write(attributes.getValue(i)); 100 writer.write('\n'); 101 } 102 } catch (IOException e) { 103 throw new SAXException(e); 104 } 105 } 106 107 public void eof() throws SAXException { 108 try { 109 writer.write("E\n"); 110 writer.flush(); 111 writer.close(); 112 } catch (IOException e) { 113 throw new SAXException(e); 114 } 115 } 116 117 public void start(Tokenizer self) throws SAXException { 118 119 } 120 121 public void startTag(String name, Attributes attributes) 122 throws SAXException { 123 try { 124 writer.write('('); 125 writer.write(name); 126 writer.write('\n'); 127 for (int i = 0; i < attributes.getLength(); i++) { 128 writer.write('A'); 129 writer.write(attributes.getQName(i)); 130 writer.write(' '); 131 writer.write(attributes.getValue(i)); 132 writer.write('\n'); 133 } 134 } catch (IOException e) { 135 throw new SAXException(e); 136 } 137 } 138 139 public boolean wantsComments() throws SAXException { 140 return true; 141 } 142 143 public static void main(String[] args) throws SAXException, IOException { 144 TokenPrinter printer = new TokenPrinter(new OutputStreamWriter(System.out, "UTF-8")); 145 Tokenizer tokenizer = new Tokenizer(printer); 146 tokenizer.setErrorHandler(printer); 147 File file = new File(args[0]); 148 InputSource is = new InputSource(new FileInputStream(file)); 149 is.setSystemId(file.toURI().toASCIIString()); 150 tokenizer.tokenize(is); 151 } 152 153 /** 154 * @param writer 155 */ 156 public TokenPrinter(final Writer writer) { 157 this.writer = writer; 158 } 159 160 public void error(SAXParseException exception) throws SAXException { 161 try { 162 writer.write("R "); 163 writer.write(exception.getMessage()); 164 writer.write("\n"); 165 } catch (IOException e) { 166 throw new SAXException(e); 167 } 168 } 169 170 public void fatalError(SAXParseException exception) throws SAXException { 171 try { 172 writer.write("F "); 173 writer.write(exception.getMessage()); 174 writer.write("\n"); 175 } catch (IOException e) { 176 throw new SAXException(e); 177 } 178 } 179 180 public void warning(SAXParseException exception) throws SAXException { 181 try { 182 writer.write("W "); 183 writer.write(exception.getMessage()); 184 writer.write("\n"); 185 } catch (IOException e) { 186 throw new SAXException(e); 187 } 188 } 189 190 }