001 /*
002 * Copyright (c) 2007 Henri Sivonen
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package nu.validator.htmlparser.test;
024
025 import java.io.ByteArrayInputStream;
026 import java.io.IOException;
027 import java.nio.ByteBuffer;
028 import java.nio.CharBuffer;
029 import java.nio.charset.Charset;
030 import java.nio.charset.CharsetEncoder;
031 import java.nio.charset.CodingErrorAction;
032
033 import nu.validator.htmlparser.impl.HtmlInputStreamReader;
034
035 import org.xml.sax.ErrorHandler;
036 import org.xml.sax.SAXException;
037
038 public class DecoderLoopTester {
039
040 private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
041
042 private static final int NUMBER_OR_ASTRAL_CHARS = 24500;
043
044 private void runTest(int padding) throws SAXException, IOException {
045 Charset utf8 = Charset.forName("UTF-8");
046 char[] charArr = new char[1 + padding + 2 * NUMBER_OR_ASTRAL_CHARS];
047 byte[] byteArr;
048 int i = 0;
049 charArr[i++] = '\uFEFF';
050 for (int j = 0; j < padding; j++) {
051 charArr[i++] = 'x';
052 }
053 for (int j = 0; j < NUMBER_OR_ASTRAL_CHARS; j++) {
054 int value = 0x10000 + j;
055 charArr[i++] = (char) (LEAD_OFFSET + (value >> 10));
056 charArr[i++] = (char) (0xDC00 + (value & 0x3FF));
057 // charArr[i++] = 'y';
058 // charArr[i++] = 'z';
059
060 }
061 CharBuffer charBuffer = CharBuffer.wrap(charArr);
062 CharsetEncoder enc = utf8.newEncoder();
063 enc.onMalformedInput(CodingErrorAction.REPORT);
064 enc.onUnmappableCharacter(CodingErrorAction.REPORT);
065 ByteBuffer byteBuffer = enc.encode(charBuffer);
066 byteArr = new byte[byteBuffer.limit()];
067 byteBuffer.get(byteArr);
068
069 ErrorHandler eh = new SystemErrErrorHandler();
070 compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null), padding, charArr, byteArr);
071 compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, utf8.newDecoder()), padding, charArr, byteArr);
072 }
073
074 /**
075 * @param padding
076 * @param charArr
077 * @param byteArr
078 * @throws SAXException
079 * @throws IOException
080 */
081 private void compare(HtmlInputStreamReader reader, int padding, char[] charArr, byte[] byteArr) throws SAXException, IOException {
082 char[] readBuffer = new char[2048];
083 int offset = 0;
084 int num = 0;
085 int readNum = 0;
086 while ((num = reader.read(readBuffer)) != -1) {
087 for (int j = 0; j < num; j++) {
088 System.out.println(offset + j);
089 if (readBuffer[j] != charArr[offset + j]) {
090 throw new RuntimeException("Test failed. Char: " + Integer.toHexString(readBuffer[j]) + " j: " + j + " readNum: " + readNum);
091 }
092 }
093 offset += num;
094 readNum++;
095 }
096 }
097
098 void runTests() throws SAXException, IOException {
099 for (int i = 0; i < 4; i++) {
100 runTest(i);
101 }
102 }
103
104 /**
105 * @param args
106 * @throws IOException
107 * @throws SAXException
108 */
109 public static void main(String[] args) throws IOException, SAXException {
110 new DecoderLoopTester().runTests();
111 }
112
113 }