nu.validator.htmlparser.impl
Class Tokenizer

java.lang.Object
  extended by nu.validator.htmlparser.impl.Tokenizer
All Implemented Interfaces:
org.xml.sax.Locator
Direct Known Subclasses:
ErrorReportingTokenizer

public class Tokenizer
extends java.lang.Object
implements org.xml.sax.Locator

An implementation of http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html This class implements the Locator interface. This is not an incidental implementation detail: Users of this class are encouraged to make use of the Locator nature. By default, the tokenizer may report data that XML 1.0 bans. The tokenizer can be configured to treat these conditions as fatal or to coerce the infoset to something that XML 1.0 allows.

Version:
$Id$
Author:
hsivonen

Field Summary
static int AFTER_ATTRIBUTE_NAME
           
static int AFTER_ATTRIBUTE_VALUE_QUOTED
           
static int AFTER_DOCTYPE_NAME
           
static int AFTER_DOCTYPE_PUBLIC_IDENTIFIER
           
static int AFTER_DOCTYPE_PUBLIC_KEYWORD
           
static int AFTER_DOCTYPE_SYSTEM_IDENTIFIER
           
static int AFTER_DOCTYPE_SYSTEM_KEYWORD
           
protected  LocatorImpl ampersandLocation
           
static int ATTRIBUTE_NAME
           
static int ATTRIBUTE_VALUE_DOUBLE_QUOTED
           
static int ATTRIBUTE_VALUE_SINGLE_QUOTED
           
static int ATTRIBUTE_VALUE_UNQUOTED
           
protected  AttributeName attributeName
          The current attribute name.
static int BEFORE_ATTRIBUTE_NAME
           
static int BEFORE_ATTRIBUTE_VALUE
           
static int BEFORE_DOCTYPE_NAME
           
static int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
           
static int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
           
static int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
           
static int BOGUS_COMMENT
           
static int BOGUS_COMMENT_HYPHEN
           
static int BOGUS_DOCTYPE
           
static int CDATA_RSQB
           
static int CDATA_RSQB_RSQB
           
static int CDATA_SECTION
           
static int CDATA_START
           
static int CHARACTER_REFERENCE_HILO_LOOKUP
           
static int CHARACTER_REFERENCE_TAIL
           
static int CLOSE_TAG_OPEN
           
static int COMMENT
           
static int COMMENT_END
           
static int COMMENT_END_BANG
           
static int COMMENT_END_DASH
           
static int COMMENT_START
           
static int COMMENT_START_DASH
           
protected  boolean confident
           
static int CONSUME_CHARACTER_REFERENCE
           
static int CONSUME_NCR
           
protected  int cstart
           
static int DATA
           
static int DECIMAL_NRC_LOOP
           
static int DOCTYPE
           
static int DOCTYPE_NAME
           
static int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
           
static int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
           
static int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
           
static int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
           
static int DOCTYPE_UBLIC
           
static int DOCTYPE_YSTEM
           
protected  EncodingDeclarationHandler encodingDeclarationHandler
           
protected  boolean endTag
          true if tokenizing an end tag
protected  ElementName endTagExpectation
          The element whose end tag closes the current CDATA or RCDATA element.
protected  org.xml.sax.ErrorHandler errorHandler
          The error handler.
static int HANDLE_NCR_VALUE
           
static int HANDLE_NCR_VALUE_RECONSUME
           
static int HEX_NCR_LOOP
           
protected  boolean html4
          true when HTML4-specific additional errors are requested.
protected  int index
           
protected  boolean lastCR
          Whether the previous char read was CR.
static int MARKUP_DECLARATION_HYPHEN
           
static int MARKUP_DECLARATION_OCTYPE
           
static int MARKUP_DECLARATION_OPEN
           
static int NON_DATA_END_TAG_NAME
           
static int PLAINTEXT
           
static int RAWTEXT
           
static int RAWTEXT_RCDATA_LESS_THAN_SIGN
           
static int RCDATA
           
static int SCRIPT_DATA
           
static int SCRIPT_DATA_DOUBLE_ESCAPE_END
           
static int SCRIPT_DATA_DOUBLE_ESCAPE_START
           
static int SCRIPT_DATA_DOUBLE_ESCAPED
           
static int SCRIPT_DATA_DOUBLE_ESCAPED_DASH
           
static int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH
           
static int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN
           
static int SCRIPT_DATA_ESCAPE_START
           
static int SCRIPT_DATA_ESCAPE_START_DASH
           
static int SCRIPT_DATA_ESCAPED
           
static int SCRIPT_DATA_ESCAPED_DASH
           
static int SCRIPT_DATA_ESCAPED_DASH_DASH
           
static int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN
           
static int SCRIPT_DATA_LESS_THAN_SIGN
           
static int SELF_CLOSING_START_TAG
           
protected  int stateSave
           
static int TAG_NAME
           
static int TAG_OPEN
           
protected  TokenHandler tokenHandler
          The token handler.
protected  int value
           
 
Constructor Summary
Tokenizer(TokenHandler tokenHandler)
          The constructor.
Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime)
           
 
Method Summary
 void becomeConfident()
           
protected  char checkChar(char[] buf, int pos)
           
 void end()
           
 void eof()
           
 void err(java.lang.String message)
          Reports a Parse Error.
protected  void errAstralNonCharacter(int ch)
           
protected  void errAttributeValueMissing()
           
protected  void errBadCharAfterLt(char c)
           
protected  void errBadCharBeforeAttributeNameOrNull(char c)
           
protected  void errBogusComment()
           
protected  void errBogusDoctype()
           
protected  void errCharRefLacksSemicolon()
           
protected  void errConsecutiveHyphens()
           
protected  void errDuplicateAttribute()
           
protected  void errEofAfterLt()
           
protected  void errEofInAttributeName()
           
protected  void errEofInAttributeValue()
           
protected  void errEofInComment()
           
protected  void errEofInDoctype()
           
protected  void errEofInEndTag()
           
protected  void errEofInPublicId()
           
protected  void errEofInSystemId()
           
protected  void errEofInTagName()
           
protected  void errEofWithoutGt()
           
protected  void errEqualsSignBeforeAttributeName()
           
protected  void errExpectedPublicId()
           
protected  void errExpectedSystemId()
           
protected  void errGarbageAfterLtSlash()
           
protected  void errGtInPublicId()
           
protected  void errGtInSystemId()
           
protected  void errHtml4LtSlashInRcdata(char folded)
           
protected  void errHtml4NonNameInUnquotedAttribute(char c)
           
protected  void errHtml4XmlVoidSyntax()
           
protected  void errHyphenHyphenBang()
           
protected  void errLtGt()
           
protected  void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
           
protected  void errLtSlashGt()
           
protected  void errMissingSpaceBeforeDoctypeName()
           
protected  void errNamelessDoctype()
           
protected  void errNcrControlChar()
           
protected  char errNcrControlChar(char ch)
           
protected  void errNcrCr()
           
protected  void errNcrInC1Range()
           
protected  char errNcrNonCharacter(char ch)
           
protected  void errNcrOutOfRange()
           
protected  void errNcrSurrogate()
           
protected  void errNcrUnassigned()
           
protected  void errNcrZero()
           
protected  void errNoDigitsInNCR()
           
protected  void errNoNamedCharacterMatch()
           
protected  void errNoSpaceBetweenAttributes()
           
protected  void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
           
protected  void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
           
protected  void errNoSpaceBetweenPublicAndSystemIds()
           
protected  void errNotSemicolonTerminated()
           
protected  void errPrematureEndOfComment()
           
protected  void errProcessingInstruction()
           
protected  void errQuoteBeforeAttributeName(char c)
           
protected  void errQuoteOrLtInAttributeNameOrNull(char c)
           
protected  void errSlashNotFollowedByGt()
           
 void errTreeBuilder(java.lang.String message)
           
protected  void errUnescapedAmpersandInterpretedAsCharacterReference()
           
protected  void errUnquotedAttributeValOrNull(char c)
           
protected  void errWarnLtSlashInRcdata()
           
 void fatal(java.lang.String message)
          Reports an condition that would make the infoset incompatible with XML 1.0 as fatal.
protected  void flushChars(char[] buf, int pos)
          Flushes coalesced character tokens.
 int getCol()
          Returns the col.
 int getColumnNumber()
           
 org.xml.sax.ErrorHandler getErrorHandler()
           
 int getLine()
          Returns the line.
 int getLineNumber()
           
 java.lang.String getPublicId()
           
 java.lang.String getSystemId()
           
 void initializeWithoutStarting()
           
 void initLocation(java.lang.String newPublicId, java.lang.String newSystemId)
           
 boolean internalEncodingDeclaration(java.lang.String internalCharset)
           
 boolean isAlreadyComplainedAboutNonAscii()
          Returns the alreadyComplainedAboutNonAscii.
 boolean isInDataState()
           
 boolean isMappingLangToXmlLang()
          Returns the mappingLangToXmlLang.
 boolean isNextCharOnNewLine()
          Returns the nextCharOnNewLine.
 boolean isPrevCR()
           
 void loadState(Tokenizer other)
           
protected  void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
           
protected  void maybeErrSlashInEndTag(boolean selfClosing)
           
protected  void maybeWarnPrivateUse(char ch)
           
protected  void maybeWarnPrivateUseAstral()
           
protected  void noteAttributeWithoutValue()
           
protected  void noteUnquotedAttributeValue()
           
 void notifyAboutMetaBoundary()
           
 void requestSuspension()
           
 void resetToDataState()
           
 void setCommentPolicy(XmlViolationPolicy commentPolicy)
          Sets the commentPolicy.
 void setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
          Sets the contentNonXmlCharPolicy.
 void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
          Sets the contentSpacePolicy.
 void setEncodingDeclarationHandler(EncodingDeclarationHandler encodingDeclarationHandler)
          Sets the encodingDeclarationHandler.
 void setErrorHandler(org.xml.sax.ErrorHandler eh)
          Sets the error handler.
 void setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
          Sets the html4ModeCompatibleWithXhtml1Schemata.
 void setInterner(Interner interner)
           
 void setLineNumber(int line)
          For C++ use only.
 void setMappingLangToXmlLang(boolean mappingLangToXmlLang)
          Sets the mappingLangToXmlLang.
 void setNamePolicy(XmlViolationPolicy namePolicy)
           
 void setStateAndEndTagExpectation(int specialTokenizerState, ElementName endTagExpectation)
          Sets the tokenizer state and the associated element name.
 void setStateAndEndTagExpectation(int specialTokenizerState, java.lang.String endTagExpectation)
          Sets the tokenizer state and the associated element name.
 void setTransitionBaseOffset(int offset)
          Sets an offset to be added to the position reported to TransitionHandler.
 void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
          Sets the xmlnsPolicy.
protected  void silentCarriageReturn()
           
protected  void silentLineFeed()
           
 void start()
           
protected  void startErrorReporting()
           
protected  java.lang.String strBufToString()
          The smaller buffer as a String.
 boolean tokenizeBuffer(UTF16Buffer buffer)
           
protected  int transition(int from, int to, boolean reconsume, int pos)
           
 void warn(java.lang.String message)
          Reports a warning
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

DATA

public static final int DATA
See Also:
Constant Field Values

RCDATA

public static final int RCDATA
See Also:
Constant Field Values

SCRIPT_DATA

public static final int SCRIPT_DATA
See Also:
Constant Field Values

RAWTEXT

public static final int RAWTEXT
See Also:
Constant Field Values

SCRIPT_DATA_ESCAPED

public static final int SCRIPT_DATA_ESCAPED
See Also:
Constant Field Values

ATTRIBUTE_VALUE_DOUBLE_QUOTED

public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED
See Also:
Constant Field Values

ATTRIBUTE_VALUE_SINGLE_QUOTED

public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED
See Also:
Constant Field Values

ATTRIBUTE_VALUE_UNQUOTED

public static final int ATTRIBUTE_VALUE_UNQUOTED
See Also:
Constant Field Values

PLAINTEXT

public static final int PLAINTEXT
See Also:
Constant Field Values

TAG_OPEN

public static final int TAG_OPEN
See Also:
Constant Field Values

CLOSE_TAG_OPEN

public static final int CLOSE_TAG_OPEN
See Also:
Constant Field Values

TAG_NAME

public static final int TAG_NAME
See Also:
Constant Field Values

BEFORE_ATTRIBUTE_NAME

public static final int BEFORE_ATTRIBUTE_NAME
See Also:
Constant Field Values

ATTRIBUTE_NAME

public static final int ATTRIBUTE_NAME
See Also:
Constant Field Values

AFTER_ATTRIBUTE_NAME

public static final int AFTER_ATTRIBUTE_NAME
See Also:
Constant Field Values

BEFORE_ATTRIBUTE_VALUE

public static final int BEFORE_ATTRIBUTE_VALUE
See Also:
Constant Field Values

AFTER_ATTRIBUTE_VALUE_QUOTED

public static final int AFTER_ATTRIBUTE_VALUE_QUOTED
See Also:
Constant Field Values

BOGUS_COMMENT

public static final int BOGUS_COMMENT
See Also:
Constant Field Values

MARKUP_DECLARATION_OPEN

public static final int MARKUP_DECLARATION_OPEN
See Also:
Constant Field Values

DOCTYPE

public static final int DOCTYPE
See Also:
Constant Field Values

BEFORE_DOCTYPE_NAME

public static final int BEFORE_DOCTYPE_NAME
See Also:
Constant Field Values

DOCTYPE_NAME

public static final int DOCTYPE_NAME
See Also:
Constant Field Values

AFTER_DOCTYPE_NAME

public static final int AFTER_DOCTYPE_NAME
See Also:
Constant Field Values

BEFORE_DOCTYPE_PUBLIC_IDENTIFIER

public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
See Also:
Constant Field Values

DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED

public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
See Also:
Constant Field Values

DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED

public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
See Also:
Constant Field Values

AFTER_DOCTYPE_PUBLIC_IDENTIFIER

public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER
See Also:
Constant Field Values

BEFORE_DOCTYPE_SYSTEM_IDENTIFIER

public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
See Also:
Constant Field Values

DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED

public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
See Also:
Constant Field Values

DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED

public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
See Also:
Constant Field Values

AFTER_DOCTYPE_SYSTEM_IDENTIFIER

public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER
See Also:
Constant Field Values

BOGUS_DOCTYPE

public static final int BOGUS_DOCTYPE
See Also:
Constant Field Values

COMMENT_START

public static final int COMMENT_START
See Also:
Constant Field Values

COMMENT_START_DASH

public static final int COMMENT_START_DASH
See Also:
Constant Field Values

COMMENT

public static final int COMMENT
See Also:
Constant Field Values

COMMENT_END_DASH

public static final int COMMENT_END_DASH
See Also:
Constant Field Values

COMMENT_END

public static final int COMMENT_END
See Also:
Constant Field Values

COMMENT_END_BANG

public static final int COMMENT_END_BANG
See Also:
Constant Field Values

NON_DATA_END_TAG_NAME

public static final int NON_DATA_END_TAG_NAME
See Also:
Constant Field Values

MARKUP_DECLARATION_HYPHEN

public static final int MARKUP_DECLARATION_HYPHEN
See Also:
Constant Field Values

MARKUP_DECLARATION_OCTYPE

public static final int MARKUP_DECLARATION_OCTYPE
See Also:
Constant Field Values

DOCTYPE_UBLIC

public static final int DOCTYPE_UBLIC
See Also:
Constant Field Values

DOCTYPE_YSTEM

public static final int DOCTYPE_YSTEM
See Also:
Constant Field Values

AFTER_DOCTYPE_PUBLIC_KEYWORD

public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD
See Also:
Constant Field Values

BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS

public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
See Also:
Constant Field Values

AFTER_DOCTYPE_SYSTEM_KEYWORD

public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD
See Also:
Constant Field Values

CONSUME_CHARACTER_REFERENCE

public static final int CONSUME_CHARACTER_REFERENCE
See Also:
Constant Field Values

CONSUME_NCR

public static final int CONSUME_NCR
See Also:
Constant Field Values

CHARACTER_REFERENCE_TAIL

public static final int CHARACTER_REFERENCE_TAIL
See Also:
Constant Field Values

HEX_NCR_LOOP

public static final int HEX_NCR_LOOP
See Also:
Constant Field Values

DECIMAL_NRC_LOOP

public static final int DECIMAL_NRC_LOOP
See Also:
Constant Field Values

HANDLE_NCR_VALUE

public static final int HANDLE_NCR_VALUE
See Also:
Constant Field Values

HANDLE_NCR_VALUE_RECONSUME

public static final int HANDLE_NCR_VALUE_RECONSUME
See Also:
Constant Field Values

CHARACTER_REFERENCE_HILO_LOOKUP

public static final int CHARACTER_REFERENCE_HILO_LOOKUP
See Also:
Constant Field Values

SELF_CLOSING_START_TAG

public static final int SELF_CLOSING_START_TAG
See Also:
Constant Field Values

CDATA_START

public static final int CDATA_START
See Also:
Constant Field Values

CDATA_SECTION

public static final int CDATA_SECTION
See Also:
Constant Field Values

CDATA_RSQB

public static final int CDATA_RSQB
See Also:
Constant Field Values

CDATA_RSQB_RSQB

public static final int CDATA_RSQB_RSQB
See Also:
Constant Field Values

SCRIPT_DATA_LESS_THAN_SIGN

public static final int SCRIPT_DATA_LESS_THAN_SIGN
See Also:
Constant Field Values

SCRIPT_DATA_ESCAPE_START

public static final int SCRIPT_DATA_ESCAPE_START
See Also:
Constant Field Values

SCRIPT_DATA_ESCAPE_START_DASH

public static final int SCRIPT_DATA_ESCAPE_START_DASH
See Also:
Constant Field Values

SCRIPT_DATA_ESCAPED_DASH

public static final int SCRIPT_DATA_ESCAPED_DASH
See Also:
Constant Field Values

SCRIPT_DATA_ESCAPED_DASH_DASH

public static final int SCRIPT_DATA_ESCAPED_DASH_DASH
See Also:
Constant Field Values

BOGUS_COMMENT_HYPHEN

public static final int BOGUS_COMMENT_HYPHEN
See Also:
Constant Field Values

RAWTEXT_RCDATA_LESS_THAN_SIGN

public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN
See Also:
Constant Field Values

SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN

public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN
See Also:
Constant Field Values

SCRIPT_DATA_DOUBLE_ESCAPE_START

public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START
See Also:
Constant Field Values

SCRIPT_DATA_DOUBLE_ESCAPED

public static final int SCRIPT_DATA_DOUBLE_ESCAPED
See Also:
Constant Field Values

SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN

public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN
See Also:
Constant Field Values

SCRIPT_DATA_DOUBLE_ESCAPED_DASH

public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH
See Also:
Constant Field Values

SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH

public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH
See Also:
Constant Field Values

SCRIPT_DATA_DOUBLE_ESCAPE_END

public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END
See Also:
Constant Field Values

tokenHandler

protected final TokenHandler tokenHandler
The token handler.


encodingDeclarationHandler

protected EncodingDeclarationHandler encodingDeclarationHandler

errorHandler

protected org.xml.sax.ErrorHandler errorHandler
The error handler.


lastCR

protected boolean lastCR
Whether the previous char read was CR.


stateSave

protected int stateSave

index

protected int index

value

protected int value

cstart

protected int cstart

endTagExpectation

protected ElementName endTagExpectation
The element whose end tag closes the current CDATA or RCDATA element.


endTag

protected boolean endTag
true if tokenizing an end tag


attributeName

protected AttributeName attributeName
The current attribute name.


html4

protected boolean html4
true when HTML4-specific additional errors are requested.


confident

protected boolean confident

ampersandLocation

protected LocatorImpl ampersandLocation
Constructor Detail

Tokenizer

public Tokenizer(TokenHandler tokenHandler,
                 boolean newAttributesEachTime)

Tokenizer

public Tokenizer(TokenHandler tokenHandler)
The constructor.

Parameters:
tokenHandler - the handler for receiving tokens
Method Detail

setInterner

public void setInterner(Interner interner)

initLocation

public void initLocation(java.lang.String newPublicId,
                         java.lang.String newSystemId)

isMappingLangToXmlLang

public boolean isMappingLangToXmlLang()
Returns the mappingLangToXmlLang.

Returns:
the mappingLangToXmlLang

setMappingLangToXmlLang

public void setMappingLangToXmlLang(boolean mappingLangToXmlLang)
Sets the mappingLangToXmlLang.

Parameters:
mappingLangToXmlLang - the mappingLangToXmlLang to set

setErrorHandler

public void setErrorHandler(org.xml.sax.ErrorHandler eh)
Sets the error handler.

See Also:
XMLReader.setErrorHandler(org.xml.sax.ErrorHandler)

getErrorHandler

public org.xml.sax.ErrorHandler getErrorHandler()

setCommentPolicy

public void setCommentPolicy(XmlViolationPolicy commentPolicy)
Sets the commentPolicy.

Parameters:
commentPolicy - the commentPolicy to set

setContentNonXmlCharPolicy

public void setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
Sets the contentNonXmlCharPolicy.

Parameters:
contentNonXmlCharPolicy - the contentNonXmlCharPolicy to set

setContentSpacePolicy

public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
Sets the contentSpacePolicy.

Parameters:
contentSpacePolicy - the contentSpacePolicy to set

setXmlnsPolicy

public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
Sets the xmlnsPolicy.

Parameters:
xmlnsPolicy - the xmlnsPolicy to set

setNamePolicy

public void setNamePolicy(XmlViolationPolicy namePolicy)

setHtml4ModeCompatibleWithXhtml1Schemata

public void setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
Sets the html4ModeCompatibleWithXhtml1Schemata.

Parameters:
html4ModeCompatibleWithXhtml1Schemata - the html4ModeCompatibleWithXhtml1Schemata to set

setStateAndEndTagExpectation

public void setStateAndEndTagExpectation(int specialTokenizerState,
                                         java.lang.String endTagExpectation)
Sets the tokenizer state and the associated element name. This should only ever used to put the tokenizer into one of the states that have a special end tag expectation.

Parameters:
specialTokenizerState - the tokenizer state to set
endTagExpectation - the expected end tag for transitioning back to normal

setStateAndEndTagExpectation

public void setStateAndEndTagExpectation(int specialTokenizerState,
                                         ElementName endTagExpectation)
Sets the tokenizer state and the associated element name. This should only ever used to put the tokenizer into one of the states that have a special end tag expectation.

Parameters:
specialTokenizerState - the tokenizer state to set
endTagExpectation - the expected end tag for transitioning back to normal

setLineNumber

public void setLineNumber(int line)
For C++ use only.


getLineNumber

public int getLineNumber()
Specified by:
getLineNumber in interface org.xml.sax.Locator
See Also:
Locator.getLineNumber()

getColumnNumber

public int getColumnNumber()
Specified by:
getColumnNumber in interface org.xml.sax.Locator
See Also:
Locator.getColumnNumber()

getPublicId

public java.lang.String getPublicId()
Specified by:
getPublicId in interface org.xml.sax.Locator
See Also:
Locator.getPublicId()

getSystemId

public java.lang.String getSystemId()
Specified by:
getSystemId in interface org.xml.sax.Locator
See Also:
Locator.getSystemId()

notifyAboutMetaBoundary

public void notifyAboutMetaBoundary()

strBufToString

protected java.lang.String strBufToString()
The smaller buffer as a String. Currently only used for error reporting.

C++ memory note: The return value must be released.

Returns:
the smaller buffer as a string

flushChars

protected void flushChars(char[] buf,
                          int pos)
                   throws org.xml.sax.SAXException
Flushes coalesced character tokens.

Parameters:
buf - TODO
pos - TODO
Throws:
org.xml.sax.SAXException

fatal

public void fatal(java.lang.String message)
           throws org.xml.sax.SAXException
Reports an condition that would make the infoset incompatible with XML 1.0 as fatal.

Parameters:
message - the message
Throws:
org.xml.sax.SAXException
org.xml.sax.SAXParseException

err

public void err(java.lang.String message)
         throws org.xml.sax.SAXException
Reports a Parse Error.

Parameters:
message - the message
Throws:
org.xml.sax.SAXException

errTreeBuilder

public void errTreeBuilder(java.lang.String message)
                    throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

warn

public void warn(java.lang.String message)
          throws org.xml.sax.SAXException
Reports a warning

Parameters:
message - the message
Throws:
org.xml.sax.SAXException

startErrorReporting

protected void startErrorReporting()
                            throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

start

public void start()
           throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

tokenizeBuffer

public boolean tokenizeBuffer(UTF16Buffer buffer)
                       throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

transition

protected int transition(int from,
                         int to,
                         boolean reconsume,
                         int pos)
                  throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

silentCarriageReturn

protected void silentCarriageReturn()

silentLineFeed

protected void silentLineFeed()

eof

public void eof()
         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

checkChar

protected char checkChar(char[] buf,
                         int pos)
                  throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

isAlreadyComplainedAboutNonAscii

public boolean isAlreadyComplainedAboutNonAscii()
Returns the alreadyComplainedAboutNonAscii.

Returns:
the alreadyComplainedAboutNonAscii

internalEncodingDeclaration

public boolean internalEncodingDeclaration(java.lang.String internalCharset)
                                    throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

end

public void end()
         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

requestSuspension

public void requestSuspension()

becomeConfident

public void becomeConfident()

isNextCharOnNewLine

public boolean isNextCharOnNewLine()
Returns the nextCharOnNewLine.

Returns:
the nextCharOnNewLine

isPrevCR

public boolean isPrevCR()

getLine

public int getLine()
Returns the line.

Returns:
the line

getCol

public int getCol()
Returns the col.

Returns:
the col

isInDataState

public boolean isInDataState()

resetToDataState

public void resetToDataState()

loadState

public void loadState(Tokenizer other)
               throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

initializeWithoutStarting

public void initializeWithoutStarting()
                               throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errGarbageAfterLtSlash

protected void errGarbageAfterLtSlash()
                               throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errLtSlashGt

protected void errLtSlashGt()
                     throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errWarnLtSlashInRcdata

protected void errWarnLtSlashInRcdata()
                               throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errHtml4LtSlashInRcdata

protected void errHtml4LtSlashInRcdata(char folded)
                                throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errCharRefLacksSemicolon

protected void errCharRefLacksSemicolon()
                                 throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNoDigitsInNCR

protected void errNoDigitsInNCR()
                         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errGtInSystemId

protected void errGtInSystemId()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errGtInPublicId

protected void errGtInPublicId()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNamelessDoctype

protected void errNamelessDoctype()
                           throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errConsecutiveHyphens

protected void errConsecutiveHyphens()
                              throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errPrematureEndOfComment

protected void errPrematureEndOfComment()
                                 throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errBogusComment

protected void errBogusComment()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errUnquotedAttributeValOrNull

protected void errUnquotedAttributeValOrNull(char c)
                                      throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errSlashNotFollowedByGt

protected void errSlashNotFollowedByGt()
                                throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errHtml4XmlVoidSyntax

protected void errHtml4XmlVoidSyntax()
                              throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNoSpaceBetweenAttributes

protected void errNoSpaceBetweenAttributes()
                                    throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errHtml4NonNameInUnquotedAttribute

protected void errHtml4NonNameInUnquotedAttribute(char c)
                                           throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errLtOrEqualsOrGraveInUnquotedAttributeOrNull

protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
                                                      throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errAttributeValueMissing

protected void errAttributeValueMissing()
                                 throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errBadCharBeforeAttributeNameOrNull

protected void errBadCharBeforeAttributeNameOrNull(char c)
                                            throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEqualsSignBeforeAttributeName

protected void errEqualsSignBeforeAttributeName()
                                         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errBadCharAfterLt

protected void errBadCharAfterLt(char c)
                          throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errLtGt

protected void errLtGt()
                throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errProcessingInstruction

protected void errProcessingInstruction()
                                 throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errUnescapedAmpersandInterpretedAsCharacterReference

protected void errUnescapedAmpersandInterpretedAsCharacterReference()
                                                             throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNotSemicolonTerminated

protected void errNotSemicolonTerminated()
                                  throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNoNamedCharacterMatch

protected void errNoNamedCharacterMatch()
                                 throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errQuoteBeforeAttributeName

protected void errQuoteBeforeAttributeName(char c)
                                    throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errQuoteOrLtInAttributeNameOrNull

protected void errQuoteOrLtInAttributeNameOrNull(char c)
                                          throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errExpectedPublicId

protected void errExpectedPublicId()
                            throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errBogusDoctype

protected void errBogusDoctype()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

maybeWarnPrivateUseAstral

protected void maybeWarnPrivateUseAstral()
                                  throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

maybeWarnPrivateUse

protected void maybeWarnPrivateUse(char ch)
                            throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

maybeErrAttributesOnEndTag

protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
                                   throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

maybeErrSlashInEndTag

protected void maybeErrSlashInEndTag(boolean selfClosing)
                              throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNcrNonCharacter

protected char errNcrNonCharacter(char ch)
                           throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errAstralNonCharacter

protected void errAstralNonCharacter(int ch)
                              throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNcrSurrogate

protected void errNcrSurrogate()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNcrControlChar

protected char errNcrControlChar(char ch)
                          throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNcrCr

protected void errNcrCr()
                 throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNcrInC1Range

protected void errNcrInC1Range()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofInPublicId

protected void errEofInPublicId()
                         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofInComment

protected void errEofInComment()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofInDoctype

protected void errEofInDoctype()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofInAttributeValue

protected void errEofInAttributeValue()
                               throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofInAttributeName

protected void errEofInAttributeName()
                              throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofWithoutGt

protected void errEofWithoutGt()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofInTagName

protected void errEofInTagName()
                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofInEndTag

protected void errEofInEndTag()
                       throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofAfterLt

protected void errEofAfterLt()
                      throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNcrOutOfRange

protected void errNcrOutOfRange()
                         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNcrUnassigned

protected void errNcrUnassigned()
                         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errDuplicateAttribute

protected void errDuplicateAttribute()
                              throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errEofInSystemId

protected void errEofInSystemId()
                         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errExpectedSystemId

protected void errExpectedSystemId()
                            throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errMissingSpaceBeforeDoctypeName

protected void errMissingSpaceBeforeDoctypeName()
                                         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errHyphenHyphenBang

protected void errHyphenHyphenBang()
                            throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNcrControlChar

protected void errNcrControlChar()
                          throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNcrZero

protected void errNcrZero()
                   throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNoSpaceBetweenDoctypeSystemKeywordAndQuote

protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
                                                      throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNoSpaceBetweenPublicAndSystemIds

protected void errNoSpaceBetweenPublicAndSystemIds()
                                            throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

errNoSpaceBetweenDoctypePublicKeywordAndQuote

protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
                                                      throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

noteAttributeWithoutValue

protected void noteAttributeWithoutValue()
                                  throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

noteUnquotedAttributeValue

protected void noteUnquotedAttributeValue()
                                   throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

setEncodingDeclarationHandler

public void setEncodingDeclarationHandler(EncodingDeclarationHandler encodingDeclarationHandler)
Sets the encodingDeclarationHandler.

Parameters:
encodingDeclarationHandler - the encodingDeclarationHandler to set

setTransitionBaseOffset

public void setTransitionBaseOffset(int offset)
Sets an offset to be added to the position reported to TransitionHandler.

Parameters:
offset - the offset