nu.validator.htmlparser.io
Class Driver

java.lang.Object
  extended by nu.validator.htmlparser.io.Driver
All Implemented Interfaces:
EncodingDeclarationHandler

public class Driver
extends Object
implements EncodingDeclarationHandler


Nested Class Summary
private  class Driver.ReparseException
           
 
Field Summary
private  boolean allowRewinding
           
private  Encoding characterEncoding
           
private  CharacterHandler[] characterHandlers
          Used for NFC checking if non-null, source code capture, etc.
private  Confidence confidence
           
private  Heuristics heuristics
           
private  Reader reader
          The input UTF-16 code unit stream.
private  RewindableInputStream rewindableInputStream
          The reference to the rewindable byte stream.
private  boolean swallowBom
           
private  Tokenizer tokenizer
           
 
Constructor Summary
Driver(TokenHandler tokenHandler)
           
Driver(TokenHandler tokenHandler, boolean newAttributesEachTime)
           
 
Method Summary
 void addCharacterHandler(CharacterHandler characterHandler)
           
private  void becomeConfident()
           
(package private)  void dontSwallowBom()
           
protected  Encoding encodingFromExternalDeclaration(String encoding)
          Initializes a decoder from external decl.
 String getCharacterEncoding()
           
 Locator getDocumentLocator()
           
 void internalEncodingDeclaration(String internalCharset)
           
 boolean isAllowRewinding()
          Returns the allowRewinding.
 boolean isCheckingNormalization()
          Query if checking normalization.
(package private)  void notifyAboutMetaBoundary()
           
private  void runStates()
           
 void setAllowRewinding(boolean allowRewinding)
          Sets the allowRewinding.
 void setCheckingNormalization(boolean enable)
          Turns NFC checking on or off.
 void setCommentPolicy(XmlViolationPolicy commentPolicy)
           
 void setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
           
 void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
           
 void setEncoding(Encoding encoding, Confidence confidence)
           
 void setErrorHandler(ErrorHandler eh)
           
 void setHeuristics(Heuristics heuristics)
          Sets the encoding sniffing heuristics.
 void setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
           
 void setMappingLangToXmlLang(boolean mappingLangToXmlLang)
           
 void setNamePolicy(XmlViolationPolicy namePolicy)
           
 void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
           
 void tokenize(InputSource is)
          Runs the tokenization.
protected  void warnWithoutLocation(String message)
          Reports a warning without line/col
protected  Encoding whineAboutEncodingAndReturnActual(String encoding, Encoding cs)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

reader

private Reader reader
The input UTF-16 code unit stream. If a byte stream was given, this object is an instance of HtmlInputStreamReader.


rewindableInputStream

private RewindableInputStream rewindableInputStream
The reference to the rewindable byte stream. null if p rohibited or no longer needed.


swallowBom

private boolean swallowBom

characterEncoding

private Encoding characterEncoding

allowRewinding

private boolean allowRewinding

heuristics

private Heuristics heuristics

tokenizer

private final Tokenizer tokenizer

confidence

private Confidence confidence

characterHandlers

private CharacterHandler[] characterHandlers
Used for NFC checking if non-null, source code capture, etc.

Constructor Detail

Driver

public Driver(TokenHandler tokenHandler)

Driver

public Driver(TokenHandler tokenHandler,
              boolean newAttributesEachTime)
Method Detail

isAllowRewinding

public boolean isAllowRewinding()
Returns the allowRewinding.

Returns:
the allowRewinding

setAllowRewinding

public void setAllowRewinding(boolean allowRewinding)
Sets the allowRewinding.

Parameters:
allowRewinding - the allowRewinding to set

setCheckingNormalization

public void setCheckingNormalization(boolean enable)
Turns NFC checking on or off.

Parameters:
enable - true if checking on

addCharacterHandler

public void addCharacterHandler(CharacterHandler characterHandler)

isCheckingNormalization

public boolean isCheckingNormalization()
Query if checking normalization.

Returns:
true if checking on

tokenize

public void tokenize(InputSource is)
              throws SAXException,
                     IOException
Runs the tokenization. This is the main entry point.

Parameters:
is - the input source
Throws:
SAXException - on fatal error (if configured to treat XML violations as fatal) or if the token handler threw
IOException - if the stream threw

dontSwallowBom

void dontSwallowBom()

runStates

private void runStates()
                throws SAXException,
                       IOException
Throws:
SAXException
IOException

setEncoding

public void setEncoding(Encoding encoding,
                        Confidence confidence)

internalEncodingDeclaration

public void internalEncodingDeclaration(String internalCharset)
                                 throws SAXException
Specified by:
internalEncodingDeclaration in interface EncodingDeclarationHandler
Throws:
SAXException

becomeConfident

private void becomeConfident()

setHeuristics

public void setHeuristics(Heuristics heuristics)
Sets the encoding sniffing heuristics.

Parameters:
heuristics - the heuristics to set

warnWithoutLocation

protected void warnWithoutLocation(String message)
                            throws SAXException
Reports a warning without line/col

Parameters:
message - the message
Throws:
SAXException

encodingFromExternalDeclaration

protected Encoding encodingFromExternalDeclaration(String encoding)
                                            throws SAXException
Initializes a decoder from external decl.

Throws:
SAXException

whineAboutEncodingAndReturnActual

protected Encoding whineAboutEncodingAndReturnActual(String encoding,
                                                     Encoding cs)
                                              throws SAXException
Parameters:
encoding -
cs -
Returns:
Throws:
SAXException

notifyAboutMetaBoundary

void notifyAboutMetaBoundary()

setCommentPolicy

public void setCommentPolicy(XmlViolationPolicy commentPolicy)
Parameters:
commentPolicy -
See Also:
Tokenizer.setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)

setContentNonXmlCharPolicy

public void setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
Parameters:
contentNonXmlCharPolicy -
See Also:
Tokenizer.setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)

setContentSpacePolicy

public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
Parameters:
contentSpacePolicy -
See Also:
Tokenizer.setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)

setErrorHandler

public void setErrorHandler(ErrorHandler eh)
Parameters:
eh -
See Also:
Tokenizer.setErrorHandler(org.xml.sax.ErrorHandler)

setHtml4ModeCompatibleWithXhtml1Schemata

public void setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
Parameters:
html4ModeCompatibleWithXhtml1Schemata -
See Also:
Tokenizer.setHtml4ModeCompatibleWithXhtml1Schemata(boolean)

setMappingLangToXmlLang

public void setMappingLangToXmlLang(boolean mappingLangToXmlLang)
Parameters:
mappingLangToXmlLang -
See Also:
Tokenizer.setMappingLangToXmlLang(boolean)

setNamePolicy

public void setNamePolicy(XmlViolationPolicy namePolicy)
Parameters:
namePolicy -
See Also:
Tokenizer.setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)

setXmlnsPolicy

public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
Parameters:
xmlnsPolicy -
See Also:
Tokenizer.setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)

getCharacterEncoding

public String getCharacterEncoding()
                            throws SAXException
Specified by:
getCharacterEncoding in interface EncodingDeclarationHandler
Throws:
SAXException

getDocumentLocator

public Locator getDocumentLocator()