001    /*
002     * Copyright (c) 2007 Henri Sivonen
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a 
005     * copy of this software and associated documentation files (the "Software"), 
006     * to deal in the Software without restriction, including without limitation 
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
008     * and/or sell copies of the Software, and to permit persons to whom the 
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in 
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
020     * DEALINGS IN THE SOFTWARE.
021     */
022    
023    package nu.validator.htmlparser.test;
024    
025    import java.io.FileInputStream;
026    import java.io.IOException;
027    import java.io.InputStream;
028    import java.io.InputStreamReader;
029    import java.io.OutputStreamWriter;
030    import java.io.PrintWriter;
031    import java.io.StringReader;
032    import java.io.UnsupportedEncodingException;
033    import java.io.Writer;
034    
035    import nu.validator.htmlparser.common.XmlViolationPolicy;
036    import nu.validator.htmlparser.impl.ContentModelFlag;
037    import nu.validator.htmlparser.impl.Tokenizer;
038    
039    import org.xml.sax.InputSource;
040    import org.xml.sax.SAXException;
041    
042    import antlr.RecognitionException;
043    import antlr.TokenStreamException;
044    
045    import com.sdicons.json.model.JSONArray;
046    import com.sdicons.json.model.JSONObject;
047    import com.sdicons.json.model.JSONString;
048    import com.sdicons.json.model.JSONValue;
049    import com.sdicons.json.parser.JSONParser;
050    
051    public class TokenizerTester {
052    
053        private static JSONString PLAINTEXT = new JSONString("PLAINTEXT");
054    
055        private static JSONString PCDATA = new JSONString("PCDATA");
056    
057        private static JSONString RCDATA = new JSONString("RCDATA");
058    
059        private static JSONString CDATA = new JSONString("CDATA");
060    
061        private static boolean jsonDeepEquals(JSONValue one, JSONValue other) {
062            if (one.isSimple()) {
063                return one.equals(other);
064            } else if (one.isArray()) {
065                if (other.isArray()) {
066                    JSONArray oneArr = (JSONArray) one;
067                    JSONArray otherArr = (JSONArray) other;
068                    return oneArr.getValue().equals(otherArr.getValue());
069                } else {
070                    return false;
071                }
072            } else if (one.isObject()) {
073                if (other.isObject()) {
074                    JSONObject oneObject = (JSONObject) one;
075                    JSONObject otherObject = (JSONObject) other;
076                    return oneObject.getValue().equals(otherObject.getValue());
077                } else {
078                    return false;
079                }
080            } else {
081                throw new RuntimeException("Should never happen.");
082            }
083        }
084    
085        private JSONArray tests;
086    
087        private final JSONArrayTokenHandler tokenHandler;
088    
089        private final Tokenizer tokenizer;
090    
091        private final Writer writer;
092    
093        private TokenizerTester(InputStream stream) throws TokenStreamException,
094                RecognitionException, UnsupportedEncodingException {
095            tokenHandler = new JSONArrayTokenHandler();
096            tokenizer = new Tokenizer(tokenHandler);
097            tokenizer.setErrorHandler(tokenHandler);
098            writer = new OutputStreamWriter(System.out, "UTF-8");
099            JSONParser jsonParser = new JSONParser(new InputStreamReader(stream,
100                    "UTF-8"));
101            JSONObject obj = (JSONObject) jsonParser.nextValue();
102            tests = (JSONArray) obj.get("tests");
103            if (tests == null) {
104                tests = (JSONArray) obj.get("xmlViolationTests");
105                tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET);
106                tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET);
107                tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET);
108                tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
109            }
110        }
111    
112        private void runTests() throws SAXException, IOException {
113            for (JSONValue val : tests.getValue()) {
114                runTest((JSONObject) val);
115            }
116            writer.flush();
117        }
118    
119        private void runTest(JSONObject test) throws SAXException, IOException {
120            String inputString = ((JSONString) test.get("input")).getValue();
121            JSONArray expectedTokens = (JSONArray) test.get("output");
122            String description = ((JSONString) test.get("description")).getValue();
123            JSONString lastStartTagJSON = ((JSONString) test.get("lastStartTag"));
124            String lastStartTag = lastStartTagJSON == null ? null
125                    : lastStartTagJSON.getValue();
126            JSONArray contentModelFlags = (JSONArray) test.get("contentModelFlags");
127            if (contentModelFlags == null) {
128                runTestInner(inputString, expectedTokens, description,
129                        ContentModelFlag.PCDATA, null);
130            } else {
131                for (JSONValue value : contentModelFlags.getValue()) {
132                    if (PCDATA.equals(value)) {
133                        runTestInner(inputString, expectedTokens, description,
134                                ContentModelFlag.PCDATA, lastStartTag);
135                    } else if (CDATA.equals(value)) {
136                        runTestInner(inputString, expectedTokens, description,
137                                ContentModelFlag.CDATA, lastStartTag);
138                    } else if (RCDATA.equals(value)) {
139                        runTestInner(inputString, expectedTokens, description,
140                                ContentModelFlag.RCDATA, lastStartTag);
141                    } else if (PLAINTEXT.equals(value)) {
142                        runTestInner(inputString, expectedTokens, description,
143                                ContentModelFlag.PLAINTEXT, lastStartTag);
144                    } else {
145                        throw new RuntimeException("Broken test data.");
146                    }
147                }
148            }
149        }
150    
151        /**
152         * @param contentModelElement
153         * @param contentModelFlag
154         * @param test
155         * @throws SAXException
156         * @throws IOException
157         */
158        private void runTestInner(String inputString, JSONArray expectedTokens,
159                String description, ContentModelFlag contentModelFlag,
160                String contentModelElement) throws SAXException, IOException {
161            tokenHandler.setContentModelFlag(contentModelFlag, contentModelElement);
162            InputSource is = new InputSource(new StringReader(inputString));
163            try {
164                tokenizer.tokenize(is);
165                JSONArray actualTokens = tokenHandler.getArray();
166                if (jsonDeepEquals(actualTokens, expectedTokens)) {
167                    writer.write("Success\n");
168                } else {
169                    writer.write("Failure\n");
170                    writer.write(description);
171                    writer.write("\nInput:\n");
172                    writer.write(inputString);
173                    writer.write("\nExpected tokens:\n");
174                    writer.write(expectedTokens.render(false));
175                    writer.write("\nActual tokens:\n");
176                    writer.write(actualTokens.render(false));
177                    writer.write("\n");
178                }
179            } catch (Throwable t) {
180                writer.write("Failure\n");
181                writer.write(description);
182                writer.write("\nInput:\n");
183                writer.write(inputString);
184                writer.write("\n");
185                t.printStackTrace(new PrintWriter(writer, false));
186            }
187        }
188    
189        /**
190         * @param args
191         * @throws RecognitionException
192         * @throws TokenStreamException
193         * @throws IOException
194         * @throws SAXException
195         */
196        public static void main(String[] args) throws TokenStreamException,
197                RecognitionException, SAXException, IOException {
198            for (int i = 0; i < args.length; i++) {
199                TokenizerTester tester = new TokenizerTester(new FileInputStream(
200                        args[i]));
201                tester.runTests();
202            }
203        }
204    
205    }