001 /*
002 * Copyright (c) 2007 Henri Sivonen
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package nu.validator.htmlparser.test;
024
025 import java.io.FileInputStream;
026 import java.io.IOException;
027 import java.io.InputStream;
028 import java.io.InputStreamReader;
029 import java.io.OutputStreamWriter;
030 import java.io.PrintWriter;
031 import java.io.StringReader;
032 import java.io.UnsupportedEncodingException;
033 import java.io.Writer;
034
035 import nu.validator.htmlparser.common.XmlViolationPolicy;
036 import nu.validator.htmlparser.impl.ContentModelFlag;
037 import nu.validator.htmlparser.impl.Tokenizer;
038
039 import org.xml.sax.InputSource;
040 import org.xml.sax.SAXException;
041
042 import antlr.RecognitionException;
043 import antlr.TokenStreamException;
044
045 import com.sdicons.json.model.JSONArray;
046 import com.sdicons.json.model.JSONObject;
047 import com.sdicons.json.model.JSONString;
048 import com.sdicons.json.model.JSONValue;
049 import com.sdicons.json.parser.JSONParser;
050
051 public class TokenizerTester {
052
053 private static JSONString PLAINTEXT = new JSONString("PLAINTEXT");
054
055 private static JSONString PCDATA = new JSONString("PCDATA");
056
057 private static JSONString RCDATA = new JSONString("RCDATA");
058
059 private static JSONString CDATA = new JSONString("CDATA");
060
061 private static boolean jsonDeepEquals(JSONValue one, JSONValue other) {
062 if (one.isSimple()) {
063 return one.equals(other);
064 } else if (one.isArray()) {
065 if (other.isArray()) {
066 JSONArray oneArr = (JSONArray) one;
067 JSONArray otherArr = (JSONArray) other;
068 return oneArr.getValue().equals(otherArr.getValue());
069 } else {
070 return false;
071 }
072 } else if (one.isObject()) {
073 if (other.isObject()) {
074 JSONObject oneObject = (JSONObject) one;
075 JSONObject otherObject = (JSONObject) other;
076 return oneObject.getValue().equals(otherObject.getValue());
077 } else {
078 return false;
079 }
080 } else {
081 throw new RuntimeException("Should never happen.");
082 }
083 }
084
085 private JSONArray tests;
086
087 private final JSONArrayTokenHandler tokenHandler;
088
089 private final Tokenizer tokenizer;
090
091 private final Writer writer;
092
093 private TokenizerTester(InputStream stream) throws TokenStreamException,
094 RecognitionException, UnsupportedEncodingException {
095 tokenHandler = new JSONArrayTokenHandler();
096 tokenizer = new Tokenizer(tokenHandler);
097 tokenizer.setErrorHandler(tokenHandler);
098 writer = new OutputStreamWriter(System.out, "UTF-8");
099 JSONParser jsonParser = new JSONParser(new InputStreamReader(stream,
100 "UTF-8"));
101 JSONObject obj = (JSONObject) jsonParser.nextValue();
102 tests = (JSONArray) obj.get("tests");
103 if (tests == null) {
104 tests = (JSONArray) obj.get("xmlViolationTests");
105 tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET);
106 tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET);
107 tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
108 tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
109 }
110 }
111
112 private void runTests() throws SAXException, IOException {
113 for (JSONValue val : tests.getValue()) {
114 runTest((JSONObject) val);
115 }
116 writer.flush();
117 }
118
119 private void runTest(JSONObject test) throws SAXException, IOException {
120 String inputString = ((JSONString) test.get("input")).getValue();
121 JSONArray expectedTokens = (JSONArray) test.get("output");
122 String description = ((JSONString) test.get("description")).getValue();
123 JSONString lastStartTagJSON = ((JSONString) test.get("lastStartTag"));
124 String lastStartTag = lastStartTagJSON == null ? null
125 : lastStartTagJSON.getValue();
126 JSONArray contentModelFlags = (JSONArray) test.get("contentModelFlags");
127 if (contentModelFlags == null) {
128 runTestInner(inputString, expectedTokens, description,
129 ContentModelFlag.PCDATA, null);
130 } else {
131 for (JSONValue value : contentModelFlags.getValue()) {
132 if (PCDATA.equals(value)) {
133 runTestInner(inputString, expectedTokens, description,
134 ContentModelFlag.PCDATA, lastStartTag);
135 } else if (CDATA.equals(value)) {
136 runTestInner(inputString, expectedTokens, description,
137 ContentModelFlag.CDATA, lastStartTag);
138 } else if (RCDATA.equals(value)) {
139 runTestInner(inputString, expectedTokens, description,
140 ContentModelFlag.RCDATA, lastStartTag);
141 } else if (PLAINTEXT.equals(value)) {
142 runTestInner(inputString, expectedTokens, description,
143 ContentModelFlag.PLAINTEXT, lastStartTag);
144 } else {
145 throw new RuntimeException("Broken test data.");
146 }
147 }
148 }
149 }
150
151 /**
152 * @param contentModelElement
153 * @param contentModelFlag
154 * @param test
155 * @throws SAXException
156 * @throws IOException
157 */
158 private void runTestInner(String inputString, JSONArray expectedTokens,
159 String description, ContentModelFlag contentModelFlag,
160 String contentModelElement) throws SAXException, IOException {
161 tokenHandler.setContentModelFlag(contentModelFlag, contentModelElement);
162 InputSource is = new InputSource(new StringReader(inputString));
163 try {
164 tokenizer.tokenize(is);
165 JSONArray actualTokens = tokenHandler.getArray();
166 if (jsonDeepEquals(actualTokens, expectedTokens)) {
167 writer.write("Success\n");
168 } else {
169 writer.write("Failure\n");
170 writer.write(description);
171 writer.write("\nInput:\n");
172 writer.write(inputString);
173 writer.write("\nExpected tokens:\n");
174 writer.write(expectedTokens.render(false));
175 writer.write("\nActual tokens:\n");
176 writer.write(actualTokens.render(false));
177 writer.write("\n");
178 }
179 } catch (Throwable t) {
180 writer.write("Failure\n");
181 writer.write(description);
182 writer.write("\nInput:\n");
183 writer.write(inputString);
184 writer.write("\n");
185 t.printStackTrace(new PrintWriter(writer, false));
186 }
187 }
188
189 /**
190 * @param args
191 * @throws RecognitionException
192 * @throws TokenStreamException
193 * @throws IOException
194 * @throws SAXException
195 */
196 public static void main(String[] args) throws TokenStreamException,
197 RecognitionException, SAXException, IOException {
198 for (int i = 0; i < args.length; i++) {
199 TokenizerTester tester = new TokenizerTester(new FileInputStream(
200 args[i]));
201 tester.runTests();
202 }
203 }
204
205 }