001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package nu.validator.htmlparser.sax; 024 025 import java.io.IOException; 026 import java.io.OutputStream; 027 import java.io.OutputStreamWriter; 028 import java.io.UnsupportedEncodingException; 029 import java.io.Writer; 030 import java.util.Arrays; 031 032 import org.xml.sax.Attributes; 033 import org.xml.sax.ContentHandler; 034 import org.xml.sax.Locator; 035 import org.xml.sax.SAXException; 036 import org.xml.sax.ext.LexicalHandler; 037 038 public class HtmlSerializer implements ContentHandler, LexicalHandler { 039 040 private static final String[] VOID_ELEMENTS = { "area", "base", "basefont", 041 "bgsound", "br", "col", "embed", "frame", "hr", "img", "input", 042 "link", "meta", "param", "spacer", "wbr" }; 043 044 private static final String[] NON_ESCAPING = {"iframe", 045 "noembed", 046 "noframes", 047 "noscript", 048 "plaintext", 049 "script", 050 "style", 051 "xmp" 052 }; 053 054 private static Writer wrap(OutputStream out) { 055 try { 056 return new OutputStreamWriter(out, "UTF-8"); 057 } catch (UnsupportedEncodingException e) { 058 throw new RuntimeException(e); 059 } 060 } 061 062 private int ignoreLevel = 0; 063 064 private int escapeLevel = 0; 065 066 private final Writer writer; 067 068 public HtmlSerializer(OutputStream out) { 069 this(wrap(out)); 070 } 071 072 public HtmlSerializer(Writer out) { 073 this.writer = out; 074 } 075 076 public void characters(char[] ch, int start, int length) throws SAXException { 077 try { 078 if (escapeLevel > 0) { 079 writer.write(ch, start, length); 080 } else { 081 for (int i = start; i < start + length; i++) { 082 char c = ch[i]; 083 switch (c) { 084 case '<': 085 writer.write("<"); 086 break; 087 case '>': 088 writer.write(">"); 089 break; 090 case '&': 091 writer.write("&"); 092 break; 093 default: 094 writer.write(c); 095 break; 096 } 097 } 098 } 099 } catch (IOException e) { 100 throw new SAXException(e); 101 } 102 } 103 104 public void endDocument() throws SAXException { 105 try { 106 writer.flush(); 107 writer.close(); 108 } catch (IOException e) { 109 throw new SAXException(e); 110 } 111 } 112 113 public void endElement(String uri, String localName, String qName) throws SAXException { 114 if (escapeLevel > 0) { 115 escapeLevel--; 116 } 117 if (ignoreLevel > 0) { 118 ignoreLevel--; 119 } else { 120 try { 121 writer.write('<'); 122 writer.write('/'); 123 writer.write(localName); 124 writer.write('>'); 125 } catch (IOException e) { 126 throw new SAXException(e); 127 } 128 } 129 } 130 131 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { 132 characters(ch, start, length); 133 } 134 135 public void processingInstruction(String target, String data) throws SAXException { 136 } 137 138 public void setDocumentLocator(Locator locator) { 139 } 140 141 public void startDocument() throws SAXException { 142 try { 143 writer.write("<!DOCTYPE html>\n"); 144 } catch (IOException e) { 145 throw new SAXException(e); 146 } 147 } 148 149 public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { 150 if (escapeLevel > 0) { 151 escapeLevel++; 152 } 153 if (ignoreLevel > 0 || !"http://www.w3.org/1999/xhtml".equals(uri)) { 154 ignoreLevel++; 155 return; 156 } 157 try { 158 writer.write('<'); 159 writer.write(localName); 160 for (int i = 0; i < atts.getLength(); i++) { 161 writer.write(' '); 162 writer.write(atts.getLocalName(i)); // XXX xml:lang 163 writer.write('='); 164 writer.write('"'); 165 String val = atts.getValue(i); 166 for (int j = 0; j < val.length(); j++) { 167 char c = val.charAt(j); 168 switch (c) { 169 case '"': 170 writer.write("""); 171 break; 172 case '<': 173 writer.write("<"); 174 break; 175 case '>': 176 writer.write(">"); 177 break; 178 case '&': 179 writer.write("&"); 180 break; 181 default: 182 writer.write(c); 183 break; 184 } 185 } 186 writer.write('"'); 187 } 188 writer.write('>'); 189 if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) { 190 ignoreLevel++; 191 return; 192 } 193 if ("pre".equals(localName) || "textarea".equals(localName)) { 194 writer.write('\n'); 195 } 196 if (escapeLevel == 0 && Arrays.binarySearch(NON_ESCAPING, localName) > -1) { 197 escapeLevel = 1; 198 } 199 } catch (IOException e) { 200 throw new SAXException(e); 201 } 202 } 203 204 public void comment(char[] ch, int start, int length) throws SAXException { 205 if (ignoreLevel > 0) { 206 return; 207 } 208 try { 209 writer.write("<!--"); 210 writer.write(ch, start, length); 211 writer.write("-->"); 212 } catch (IOException e) { 213 throw new SAXException(e); 214 } 215 } 216 217 public void endCDATA() throws SAXException { 218 } 219 220 public void endDTD() throws SAXException { 221 } 222 223 public void endEntity(String name) throws SAXException { 224 } 225 226 public void startCDATA() throws SAXException { 227 } 228 229 public void startDTD(String name, String publicId, String systemId) throws SAXException { 230 } 231 232 public void startEntity(String name) throws SAXException { 233 } 234 235 public void startPrefixMapping(String prefix, String uri) throws SAXException { 236 } 237 238 public void endPrefixMapping(String prefix) throws SAXException { 239 } 240 241 public void skippedEntity(String name) throws SAXException { 242 } 243 244 }