001    package com.thaiopensource.relaxng.parse.compact;
002    
003    import com.thaiopensource.relaxng.parse.BuildException;
004    import com.thaiopensource.relaxng.parse.IllegalSchemaException;
005    import com.thaiopensource.relaxng.parse.IncludedGrammar;
006    import com.thaiopensource.relaxng.parse.Parseable;
007    import com.thaiopensource.relaxng.parse.ParsedPattern;
008    import com.thaiopensource.relaxng.parse.SchemaBuilder;
009    import com.thaiopensource.relaxng.parse.Scope;
010    import com.thaiopensource.xml.util.EncodingMap;
011    
012    import org.xml.sax.EntityResolver;
013    import org.xml.sax.ErrorHandler;
014    import org.xml.sax.InputSource;
015    import org.xml.sax.SAXException;
016    
017    import java.io.IOException;
018    import java.io.InputStream;
019    import java.io.InputStreamReader;
020    import java.io.PushbackInputStream;
021    import java.io.Reader;
022    import java.net.URL;
023    
024    public class CompactParseable implements Parseable {
025      private final InputSource in;
026      private final ErrorHandler eh;
027      private final EntityResolver er;
028    
029      public CompactParseable(InputSource in, ErrorHandler eh, EntityResolver er) {
030        this.in = in;
031        this.eh = eh;
032        this.er = er;
033      }
034    
035      public ParsedPattern parse(SchemaBuilder sb, Scope scope) throws BuildException, IllegalSchemaException {
036        return new CompactSyntax(makeReader(in, er), in.getSystemId(), sb, eh).parse(scope);
037      }
038    
039      public ParsedPattern parseInclude(String uri, SchemaBuilder sb, IncludedGrammar g)
040              throws BuildException, IllegalSchemaException {
041        InputSource tem = new InputSource(uri);
042        tem.setEncoding(in.getEncoding());
043        return new CompactSyntax(makeReader(tem, er), uri, sb, eh).parseInclude(g);
044      }
045    
046      public ParsedPattern parseExternal(String uri, SchemaBuilder sb, Scope scope)
047              throws BuildException, IllegalSchemaException {
048        InputSource tem = new InputSource(uri);
049        tem.setEncoding(in.getEncoding());
050        return new CompactSyntax(makeReader(tem, er), uri, sb, eh).parse(scope);
051      }
052    
053      private static final String UTF8 = EncodingMap.getJavaName("UTF-8");
054      private static final String UTF16 = EncodingMap.getJavaName("UTF-16");
055    
056      private static Reader makeReader(InputSource is, EntityResolver er) throws BuildException {
057        try {
058          Reader r = is.getCharacterStream();
059          if (r == null) {
060            InputStream in = is.getByteStream();
061            String encoding = is.getEncoding();
062            if (in == null) {
063              String systemId = is.getSystemId();
064              if (er == null) {
065                in = new URL(systemId).openStream();
066              } else {
067                InputSource is2 = er.resolveEntity(null, systemId);
068                if (is2 == null) {
069                  throw new IOException("Unable to resolve entity by system ID.");
070                }
071                r = is2.getCharacterStream();
072                if (r == null) {
073                  in = is2.getByteStream();
074                  encoding = is2.getEncoding();
075                }
076              }
077            }
078            if (encoding == null) {
079              PushbackInputStream pb = new PushbackInputStream(in, 2);
080              encoding = detectEncoding(pb);
081              in = pb;
082            }
083            r = new InputStreamReader(in, encoding);
084          }
085          return r;
086        }
087        catch (IOException e) {
088          throw new BuildException(e);
089        }
090        catch (SAXException e) {
091          throw new BuildException(e);        
092        }
093      }
094    
095      static private String detectEncoding(PushbackInputStream in) throws IOException {
096        String encoding = UTF8;
097        int b1 = in.read();
098        if (b1 != -1) {
099          int b2 = in.read();
100          if (b2 != -1) {
101            in.unread(b2);
102            if ((b1 == 0xFF && b2 == 0xFE) || (b1 == 0xFE && b2 == 0xFF))
103              encoding = UTF16;
104          }
105          in.unread(b1);
106        }
107        return encoding;
108      }
109    }