001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package nu.validator.htmlparser.test; 024 025 import java.io.FileInputStream; 026 import java.io.IOException; 027 import java.io.InputStream; 028 import java.io.InputStreamReader; 029 import java.io.OutputStreamWriter; 030 import java.io.PrintWriter; 031 import java.io.StringReader; 032 import java.io.UnsupportedEncodingException; 033 import java.io.Writer; 034 035 import nu.validator.htmlparser.common.XmlViolationPolicy; 036 import nu.validator.htmlparser.impl.ContentModelFlag; 037 import nu.validator.htmlparser.impl.Tokenizer; 038 039 import org.xml.sax.InputSource; 040 import org.xml.sax.SAXException; 041 042 import antlr.RecognitionException; 043 import antlr.TokenStreamException; 044 045 import com.sdicons.json.model.JSONArray; 046 import com.sdicons.json.model.JSONObject; 047 import com.sdicons.json.model.JSONString; 048 import com.sdicons.json.model.JSONValue; 049 import com.sdicons.json.parser.JSONParser; 050 051 public class TokenizerTester { 052 053 private static JSONString PLAINTEXT = new JSONString("PLAINTEXT"); 054 055 private static JSONString PCDATA = new JSONString("PCDATA"); 056 057 private static JSONString RCDATA = new JSONString("RCDATA"); 058 059 private static JSONString CDATA = new JSONString("CDATA"); 060 061 private static boolean jsonDeepEquals(JSONValue one, JSONValue other) { 062 if (one.isSimple()) { 063 return one.equals(other); 064 } else if (one.isArray()) { 065 if (other.isArray()) { 066 JSONArray oneArr = (JSONArray) one; 067 JSONArray otherArr = (JSONArray) other; 068 return oneArr.getValue().equals(otherArr.getValue()); 069 } else { 070 return false; 071 } 072 } else if (one.isObject()) { 073 if (other.isObject()) { 074 JSONObject oneObject = (JSONObject) one; 075 JSONObject otherObject = (JSONObject) other; 076 return oneObject.getValue().equals(otherObject.getValue()); 077 } else { 078 return false; 079 } 080 } else { 081 throw new RuntimeException("Should never happen."); 082 } 083 } 084 085 private JSONArray tests; 086 087 private final JSONArrayTokenHandler tokenHandler; 088 089 private final Tokenizer tokenizer; 090 091 private final Writer writer; 092 093 private TokenizerTester(InputStream stream) throws TokenStreamException, 094 RecognitionException, UnsupportedEncodingException { 095 tokenHandler = new JSONArrayTokenHandler(); 096 tokenizer = new Tokenizer(tokenHandler); 097 tokenizer.setErrorHandler(tokenHandler); 098 writer = new OutputStreamWriter(System.out, "UTF-8"); 099 JSONParser jsonParser = new JSONParser(new InputStreamReader(stream, 100 "UTF-8")); 101 JSONObject obj = (JSONObject) jsonParser.nextValue(); 102 tests = (JSONArray) obj.get("tests"); 103 if (tests == null) { 104 tests = (JSONArray) obj.get("xmlViolationTests"); 105 tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET); 106 tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET); 107 tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); 108 tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); 109 } 110 } 111 112 private void runTests() throws SAXException, IOException { 113 for (JSONValue val : tests.getValue()) { 114 runTest((JSONObject) val); 115 } 116 writer.flush(); 117 } 118 119 private void runTest(JSONObject test) throws SAXException, IOException { 120 String inputString = ((JSONString) test.get("input")).getValue(); 121 JSONArray expectedTokens = (JSONArray) test.get("output"); 122 String description = ((JSONString) test.get("description")).getValue(); 123 JSONString lastStartTagJSON = ((JSONString) test.get("lastStartTag")); 124 String lastStartTag = lastStartTagJSON == null ? null 125 : lastStartTagJSON.getValue(); 126 JSONArray contentModelFlags = (JSONArray) test.get("contentModelFlags"); 127 if (contentModelFlags == null) { 128 runTestInner(inputString, expectedTokens, description, 129 ContentModelFlag.PCDATA, null); 130 } else { 131 for (JSONValue value : contentModelFlags.getValue()) { 132 if (PCDATA.equals(value)) { 133 runTestInner(inputString, expectedTokens, description, 134 ContentModelFlag.PCDATA, lastStartTag); 135 } else if (CDATA.equals(value)) { 136 runTestInner(inputString, expectedTokens, description, 137 ContentModelFlag.CDATA, lastStartTag); 138 } else if (RCDATA.equals(value)) { 139 runTestInner(inputString, expectedTokens, description, 140 ContentModelFlag.RCDATA, lastStartTag); 141 } else if (PLAINTEXT.equals(value)) { 142 runTestInner(inputString, expectedTokens, description, 143 ContentModelFlag.PLAINTEXT, lastStartTag); 144 } else { 145 throw new RuntimeException("Broken test data."); 146 } 147 } 148 } 149 } 150 151 /** 152 * @param contentModelElement 153 * @param contentModelFlag 154 * @param test 155 * @throws SAXException 156 * @throws IOException 157 */ 158 private void runTestInner(String inputString, JSONArray expectedTokens, 159 String description, ContentModelFlag contentModelFlag, 160 String contentModelElement) throws SAXException, IOException { 161 tokenHandler.setContentModelFlag(contentModelFlag, contentModelElement); 162 InputSource is = new InputSource(new StringReader(inputString)); 163 try { 164 tokenizer.tokenize(is); 165 JSONArray actualTokens = tokenHandler.getArray(); 166 if (jsonDeepEquals(actualTokens, expectedTokens)) { 167 writer.write("Success\n"); 168 } else { 169 writer.write("Failure\n"); 170 writer.write(description); 171 writer.write("\nInput:\n"); 172 writer.write(inputString); 173 writer.write("\nExpected tokens:\n"); 174 writer.write(expectedTokens.render(false)); 175 writer.write("\nActual tokens:\n"); 176 writer.write(actualTokens.render(false)); 177 writer.write("\n"); 178 } 179 } catch (Throwable t) { 180 writer.write("Failure\n"); 181 writer.write(description); 182 writer.write("\nInput:\n"); 183 writer.write(inputString); 184 writer.write("\n"); 185 t.printStackTrace(new PrintWriter(writer, false)); 186 } 187 } 188 189 /** 190 * @param args 191 * @throws RecognitionException 192 * @throws TokenStreamException 193 * @throws IOException 194 * @throws SAXException 195 */ 196 public static void main(String[] args) throws TokenStreamException, 197 RecognitionException, SAXException, IOException { 198 for (int i = 0; i < args.length; i++) { 199 TokenizerTester tester = new TokenizerTester(new FileInputStream( 200 args[i])); 201 tester.runTests(); 202 } 203 } 204 205 }