001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package nu.validator.htmlparser.test; 024 025 import java.io.ByteArrayInputStream; 026 import java.io.IOException; 027 import java.nio.ByteBuffer; 028 import java.nio.CharBuffer; 029 import java.nio.charset.Charset; 030 import java.nio.charset.CharsetEncoder; 031 import java.nio.charset.CodingErrorAction; 032 033 import nu.validator.htmlparser.impl.HtmlInputStreamReader; 034 035 import org.xml.sax.ErrorHandler; 036 import org.xml.sax.SAXException; 037 038 public class DecoderLoopTester { 039 040 private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); 041 042 private static final int NUMBER_OR_ASTRAL_CHARS = 24500; 043 044 private void runTest(int padding) throws SAXException, IOException { 045 Charset utf8 = Charset.forName("UTF-8"); 046 char[] charArr = new char[1 + padding + 2 * NUMBER_OR_ASTRAL_CHARS]; 047 byte[] byteArr; 048 int i = 0; 049 charArr[i++] = '\uFEFF'; 050 for (int j = 0; j < padding; j++) { 051 charArr[i++] = 'x'; 052 } 053 for (int j = 0; j < NUMBER_OR_ASTRAL_CHARS; j++) { 054 int value = 0x10000 + j; 055 charArr[i++] = (char) (LEAD_OFFSET + (value >> 10)); 056 charArr[i++] = (char) (0xDC00 + (value & 0x3FF)); 057 // charArr[i++] = 'y'; 058 // charArr[i++] = 'z'; 059 060 } 061 CharBuffer charBuffer = CharBuffer.wrap(charArr); 062 CharsetEncoder enc = utf8.newEncoder(); 063 enc.onMalformedInput(CodingErrorAction.REPORT); 064 enc.onUnmappableCharacter(CodingErrorAction.REPORT); 065 ByteBuffer byteBuffer = enc.encode(charBuffer); 066 byteArr = new byte[byteBuffer.limit()]; 067 byteBuffer.get(byteArr); 068 069 ErrorHandler eh = new SystemErrErrorHandler(); 070 compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null), padding, charArr, byteArr); 071 compare(new HtmlInputStreamReader(new ByteArrayInputStream(byteArr), eh, null, null, utf8.newDecoder()), padding, charArr, byteArr); 072 } 073 074 /** 075 * @param padding 076 * @param charArr 077 * @param byteArr 078 * @throws SAXException 079 * @throws IOException 080 */ 081 private void compare(HtmlInputStreamReader reader, int padding, char[] charArr, byte[] byteArr) throws SAXException, IOException { 082 char[] readBuffer = new char[2048]; 083 int offset = 0; 084 int num = 0; 085 int readNum = 0; 086 while ((num = reader.read(readBuffer)) != -1) { 087 for (int j = 0; j < num; j++) { 088 System.out.println(offset + j); 089 if (readBuffer[j] != charArr[offset + j]) { 090 throw new RuntimeException("Test failed. Char: " + Integer.toHexString(readBuffer[j]) + " j: " + j + " readNum: " + readNum); 091 } 092 } 093 offset += num; 094 readNum++; 095 } 096 } 097 098 void runTests() throws SAXException, IOException { 099 for (int i = 0; i < 4; i++) { 100 runTest(i); 101 } 102 } 103 104 /** 105 * @param args 106 * @throws IOException 107 * @throws SAXException 108 */ 109 public static void main(String[] args) throws IOException, SAXException { 110 new DecoderLoopTester().runTests(); 111 } 112 113 }