001 package com.thaiopensource.relaxng.parse.compact; 002 003 import com.thaiopensource.relaxng.parse.BuildException; 004 import com.thaiopensource.relaxng.parse.IllegalSchemaException; 005 import com.thaiopensource.relaxng.parse.IncludedGrammar; 006 import com.thaiopensource.relaxng.parse.Parseable; 007 import com.thaiopensource.relaxng.parse.ParsedPattern; 008 import com.thaiopensource.relaxng.parse.SchemaBuilder; 009 import com.thaiopensource.relaxng.parse.Scope; 010 import com.thaiopensource.xml.util.EncodingMap; 011 012 import org.xml.sax.EntityResolver; 013 import org.xml.sax.ErrorHandler; 014 import org.xml.sax.InputSource; 015 import org.xml.sax.SAXException; 016 017 import java.io.IOException; 018 import java.io.InputStream; 019 import java.io.InputStreamReader; 020 import java.io.PushbackInputStream; 021 import java.io.Reader; 022 import java.net.URL; 023 024 public class CompactParseable implements Parseable { 025 private final InputSource in; 026 private final ErrorHandler eh; 027 private final EntityResolver er; 028 029 public CompactParseable(InputSource in, ErrorHandler eh, EntityResolver er) { 030 this.in = in; 031 this.eh = eh; 032 this.er = er; 033 } 034 035 public ParsedPattern parse(SchemaBuilder sb, Scope scope) throws BuildException, IllegalSchemaException { 036 return new CompactSyntax(makeReader(in, er), in.getSystemId(), sb, eh).parse(scope); 037 } 038 039 public ParsedPattern parseInclude(String uri, SchemaBuilder sb, IncludedGrammar g) 040 throws BuildException, IllegalSchemaException { 041 InputSource tem = new InputSource(uri); 042 tem.setEncoding(in.getEncoding()); 043 return new CompactSyntax(makeReader(tem, er), uri, sb, eh).parseInclude(g); 044 } 045 046 public ParsedPattern parseExternal(String uri, SchemaBuilder sb, Scope scope) 047 throws BuildException, IllegalSchemaException { 048 InputSource tem = new InputSource(uri); 049 tem.setEncoding(in.getEncoding()); 050 return new CompactSyntax(makeReader(tem, er), uri, sb, eh).parse(scope); 051 } 052 053 private static final String UTF8 = EncodingMap.getJavaName("UTF-8"); 054 private static final String UTF16 = EncodingMap.getJavaName("UTF-16"); 055 056 private static Reader makeReader(InputSource is, EntityResolver er) throws BuildException { 057 try { 058 Reader r = is.getCharacterStream(); 059 if (r == null) { 060 InputStream in = is.getByteStream(); 061 String encoding = is.getEncoding(); 062 if (in == null) { 063 String systemId = is.getSystemId(); 064 if (er == null) { 065 in = new URL(systemId).openStream(); 066 } else { 067 InputSource is2 = er.resolveEntity(null, systemId); 068 if (is2 == null) { 069 throw new IOException("Unable to resolve entity by system ID."); 070 } 071 r = is2.getCharacterStream(); 072 if (r == null) { 073 in = is2.getByteStream(); 074 encoding = is2.getEncoding(); 075 } 076 } 077 } 078 if (encoding == null) { 079 PushbackInputStream pb = new PushbackInputStream(in, 2); 080 encoding = detectEncoding(pb); 081 in = pb; 082 } 083 r = new InputStreamReader(in, encoding); 084 } 085 return r; 086 } 087 catch (IOException e) { 088 throw new BuildException(e); 089 } 090 catch (SAXException e) { 091 throw new BuildException(e); 092 } 093 } 094 095 static private String detectEncoding(PushbackInputStream in) throws IOException { 096 String encoding = UTF8; 097 int b1 = in.read(); 098 if (b1 != -1) { 099 int b2 = in.read(); 100 if (b2 != -1) { 101 in.unread(b2); 102 if ((b1 == 0xFF && b2 == 0xFE) || (b1 == 0xFE && b2 == 0xFF)) 103 encoding = UTF16; 104 } 105 in.unread(b1); 106 } 107 return encoding; 108 } 109 }