001    /*
002     * Copyright (c) 2007 Henri Sivonen
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a 
005     * copy of this software and associated documentation files (the "Software"), 
006     * to deal in the Software without restriction, including without limitation 
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
008     * and/or sell copies of the Software, and to permit persons to whom the 
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in 
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
020     * DEALINGS IN THE SOFTWARE.
021     */
022    
023    package nu.validator.htmlparser.test;
024    
025    import java.io.ByteArrayInputStream;
026    import java.io.IOException;
027    import java.nio.ByteBuffer;
028    import java.nio.CharBuffer;
029    import java.nio.charset.Charset;
030    import java.nio.charset.CharsetEncoder;
031    import java.nio.charset.CodingErrorAction;
032    
033    import nu.validator.htmlparser.impl.HtmlInputStreamReader;
034    
035    import org.xml.sax.ErrorHandler;
036    import org.xml.sax.SAXException;
037    
038    public class DecoderLoopTester {
039        
040        private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
041    
042        private static final int NUMBER_OR_ASTRAL_CHARS = 24500;
043        
044        private void runTest(int padding) throws SAXException, IOException {
045           Charset utf8 = Charset.forName("UTF-8");
046           char[] charArr = new char[1 + padding + 2 * NUMBER_OR_ASTRAL_CHARS];
047           byte[] byteArr;
048           int i = 0;
049           charArr[i++] = '\uFEFF';
050           for (int j = 0; j < padding; j++) {
051               charArr[i++] = 'x';           
052           }
053           for (int j = 0; j < NUMBER_OR_ASTRAL_CHARS; j++) {
054                int value = 0x10000 + j;
055                charArr[i++] = (char) (LEAD_OFFSET + (value >> 10));
056                charArr[i++] = (char) (0xDC00 + (value & 0x3FF));
057    //            charArr[i++] = 'y';
058    //            charArr[i++] = 'z';
059    
060           }
061           CharBuffer charBuffer = CharBuffer.wrap(charArr);
062           CharsetEncoder enc = utf8.newEncoder();
063           enc.onMalformedInput(CodingErrorAction.REPORT);
064           enc.onUnmappableCharacter(CodingErrorAction.REPORT);
065           ByteBuffer byteBuffer = enc.encode(charBuffer);
066           byteArr = new byte[byteBuffer.limit()];
067           byteBuffer.get(byteArr);
068           
069           ErrorHandler eh = new SystemErrErrorHandler();
070           compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null), padding, charArr, byteArr);
071           compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, utf8.newDecoder()), padding, charArr, byteArr);
072        }
073    
074        /**
075         * @param padding
076         * @param charArr
077         * @param byteArr
078         * @throws SAXException
079         * @throws IOException
080         */
081        private void compare(HtmlInputStreamReader reader, int padding, char[] charArr, byte[] byteArr) throws SAXException, IOException {
082               char[] readBuffer = new char[2048];
083               int offset = 0;
084               int num = 0;
085               int readNum = 0;
086               while ((num = reader.read(readBuffer)) != -1) {
087                   for (int j = 0; j < num; j++) {
088                       System.out.println(offset + j);
089                       if (readBuffer[j] != charArr[offset + j]) {
090                           throw new RuntimeException("Test failed. Char: " + Integer.toHexString(readBuffer[j]) + " j: " + j + " readNum: " + readNum);
091                       }
092                   }
093                   offset += num;
094                   readNum++;
095               }
096        }
097        
098        void runTests() throws SAXException, IOException {
099            for (int i = 0; i < 4; i++) {
100                runTest(i);
101            }
102        }
103        
104        /**
105         * @param args
106         * @throws IOException 
107         * @throws SAXException 
108         */
109        public static void main(String[] args) throws IOException, SAXException {
110            new DecoderLoopTester().runTests();
111        }
112    
113    }