001 /*
002 * Copyright (c) 2007 Henri Sivonen
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package nu.validator.htmlparser.sax;
024
025 import java.io.IOException;
026 import java.io.OutputStream;
027 import java.io.OutputStreamWriter;
028 import java.io.UnsupportedEncodingException;
029 import java.io.Writer;
030 import java.util.Arrays;
031
032 import org.xml.sax.Attributes;
033 import org.xml.sax.ContentHandler;
034 import org.xml.sax.Locator;
035 import org.xml.sax.SAXException;
036 import org.xml.sax.ext.LexicalHandler;
037
038 public class HtmlSerializer implements ContentHandler, LexicalHandler {
039
040 private static final String[] VOID_ELEMENTS = { "area", "base", "basefont",
041 "bgsound", "br", "col", "embed", "frame", "hr", "img", "input",
042 "link", "meta", "param", "spacer", "wbr" };
043
044 private static final String[] NON_ESCAPING = {"iframe",
045 "noembed",
046 "noframes",
047 "noscript",
048 "plaintext",
049 "script",
050 "style",
051 "xmp"
052 };
053
054 private static Writer wrap(OutputStream out) {
055 try {
056 return new OutputStreamWriter(out, "UTF-8");
057 } catch (UnsupportedEncodingException e) {
058 throw new RuntimeException(e);
059 }
060 }
061
062 private int ignoreLevel = 0;
063
064 private int escapeLevel = 0;
065
066 private final Writer writer;
067
068 public HtmlSerializer(OutputStream out) {
069 this(wrap(out));
070 }
071
072 public HtmlSerializer(Writer out) {
073 this.writer = out;
074 }
075
076 public void characters(char[] ch, int start, int length) throws SAXException {
077 try {
078 if (escapeLevel > 0) {
079 writer.write(ch, start, length);
080 } else {
081 for (int i = start; i < start + length; i++) {
082 char c = ch[i];
083 switch (c) {
084 case '<':
085 writer.write("<");
086 break;
087 case '>':
088 writer.write(">");
089 break;
090 case '&':
091 writer.write("&");
092 break;
093 default:
094 writer.write(c);
095 break;
096 }
097 }
098 }
099 } catch (IOException e) {
100 throw new SAXException(e);
101 }
102 }
103
104 public void endDocument() throws SAXException {
105 try {
106 writer.flush();
107 writer.close();
108 } catch (IOException e) {
109 throw new SAXException(e);
110 }
111 }
112
113 public void endElement(String uri, String localName, String qName) throws SAXException {
114 if (escapeLevel > 0) {
115 escapeLevel--;
116 }
117 if (ignoreLevel > 0) {
118 ignoreLevel--;
119 } else {
120 try {
121 writer.write('<');
122 writer.write('/');
123 writer.write(localName);
124 writer.write('>');
125 } catch (IOException e) {
126 throw new SAXException(e);
127 }
128 }
129 }
130
131 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
132 characters(ch, start, length);
133 }
134
135 public void processingInstruction(String target, String data) throws SAXException {
136 }
137
138 public void setDocumentLocator(Locator locator) {
139 }
140
141 public void startDocument() throws SAXException {
142 try {
143 writer.write("<!DOCTYPE html>\n");
144 } catch (IOException e) {
145 throw new SAXException(e);
146 }
147 }
148
149 public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
150 if (escapeLevel > 0) {
151 escapeLevel++;
152 }
153 if (ignoreLevel > 0 || !"http://www.w3.org/1999/xhtml".equals(uri)) {
154 ignoreLevel++;
155 return;
156 }
157 try {
158 writer.write('<');
159 writer.write(localName);
160 for (int i = 0; i < atts.getLength(); i++) {
161 writer.write(' ');
162 writer.write(atts.getLocalName(i)); // XXX xml:lang
163 writer.write('=');
164 writer.write('"');
165 String val = atts.getValue(i);
166 for (int j = 0; j < val.length(); j++) {
167 char c = val.charAt(j);
168 switch (c) {
169 case '"':
170 writer.write(""");
171 break;
172 case '<':
173 writer.write("<");
174 break;
175 case '>':
176 writer.write(">");
177 break;
178 case '&':
179 writer.write("&");
180 break;
181 default:
182 writer.write(c);
183 break;
184 }
185 }
186 writer.write('"');
187 }
188 writer.write('>');
189 if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) {
190 ignoreLevel++;
191 return;
192 }
193 if ("pre".equals(localName) || "textarea".equals(localName)) {
194 writer.write('\n');
195 }
196 if (escapeLevel == 0 && Arrays.binarySearch(NON_ESCAPING, localName) > -1) {
197 escapeLevel = 1;
198 }
199 } catch (IOException e) {
200 throw new SAXException(e);
201 }
202 }
203
204 public void comment(char[] ch, int start, int length) throws SAXException {
205 if (ignoreLevel > 0) {
206 return;
207 }
208 try {
209 writer.write("<!--");
210 writer.write(ch, start, length);
211 writer.write("-->");
212 } catch (IOException e) {
213 throw new SAXException(e);
214 }
215 }
216
217 public void endCDATA() throws SAXException {
218 }
219
220 public void endDTD() throws SAXException {
221 }
222
223 public void endEntity(String name) throws SAXException {
224 }
225
226 public void startCDATA() throws SAXException {
227 }
228
229 public void startDTD(String name, String publicId, String systemId) throws SAXException {
230 }
231
232 public void startEntity(String name) throws SAXException {
233 }
234
235 public void startPrefixMapping(String prefix, String uri) throws SAXException {
236 }
237
238 public void endPrefixMapping(String prefix) throws SAXException {
239 }
240
241 public void skippedEntity(String name) throws SAXException {
242 }
243
244 }