nu.validator.htmlparser.impl
Class TreeBuilder<T>

java.lang.Object
  extended by nu.validator.htmlparser.impl.TreeBuilder<T>
All Implemented Interfaces:
TokenHandler, TreeBuilderState<T>
Direct Known Subclasses:
CoalescingTreeBuilder

public abstract class TreeBuilder<T>
extends java.lang.Object
implements TokenHandler, TreeBuilderState<T>


Field Summary
protected  char[] charBuffer
           
protected  int charBufferLen
           
protected  org.xml.sax.ErrorHandler errorHandler
           
protected  Tokenizer tokenizer
           
 
Constructor Summary
protected TreeBuilder()
           
 
Method Summary
protected  void accumulateCharacters(char[] buf, int start, int length)
           
protected abstract  void addAttributesToElement(T element, HtmlAttributes attributes)
           
protected abstract  void appendCharacters(T parent, char[] buf, int start, int length)
           
protected abstract  void appendChildrenToNewParent(T oldParent, T newParent)
           
protected abstract  void appendComment(T parent, char[] buf, int start, int length)
           
protected abstract  void appendCommentToDocument(char[] buf, int start, int length)
           
protected  void appendDoctypeToDocument(java.lang.String name, java.lang.String publicIdentifier, java.lang.String systemIdentifier)
           
protected abstract  void appendElement(T child, T newParent)
           
protected abstract  void appendIsindexPrompt(T parent)
           
 boolean cdataSectionAllowed()
          Checks if the CDATA sections are allowed.
 void characters(char[] buf, int start, int length)
          Receive character tokens.
 void comment(char[] buf, int start, int length)
          Receive a comment token.
protected abstract  T createElement(java.lang.String ns, java.lang.String name, HtmlAttributes attributes)
           
protected  T createElement(java.lang.String ns, java.lang.String name, HtmlAttributes attributes, T form)
           
protected abstract  T createHtmlElementSetAsRoot(HtmlAttributes attributes)
           
protected  T currentNode()
           
protected abstract  void detachFromParent(T element)
           
 void doctype(java.lang.String name, java.lang.String publicIdentifier, java.lang.String systemIdentifier, boolean forceQuirks)
          Receive a doctype token.
protected  void documentMode(DocumentMode m, java.lang.String publicIdentifier, java.lang.String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
           
protected  void elementPopped(java.lang.String ns, java.lang.String name, T node)
           
protected  void elementPushed(java.lang.String ns, java.lang.String name, T node)
           
protected  void end()
           
 void endTag(ElementName elementName)
          Receive an end tag token.
 void endTokenization()
          The perform final cleanup.
 void eof()
          The end-of-file token.
static java.lang.String extractCharsetFromContent(java.lang.String attributeValue)
           C++ memory note: The return value must be released.
protected  void fatal()
          Reports an condition that would make the infoset incompatible with XML 1.0 as fatal.
protected  void fatal(java.lang.Exception e)
           
 void flushCharacters()
          Flushes the pending characters.
 T getDeepTreeSurrogateParent()
          Returns the deepTreeSurrogateParent.
 org.xml.sax.ErrorHandler getErrorHandler()
          Returns the errorHandler.
 T getFormPointer()
          Returns the formPointer.
 T getHeadPointer()
          Returns the headPointer.
 nu.validator.htmlparser.impl.StackNode<T>[] getListOfActiveFormattingElements()
          Returns the listOfActiveFormattingElements.
 int getListOfActiveFormattingElementsLength()
          Return the length of the list of active formatting elements.
 int getMode()
          Returns the mode.
 int getOriginalMode()
          Returns the originalMode.
 nu.validator.htmlparser.impl.StackNode<T>[] getStack()
          Returns the stack.
 int getStackLength()
          Return the length of the stack.
protected abstract  boolean hasChildren(T element)
           
protected abstract  void insertFosterParentedCharacters(char[] buf, int start, int length, T table, T stackParent)
           
protected abstract  void insertFosterParentedChild(T child, T table, T stackParent)
           
 boolean isFramesetOk()
          Returns the framesetOk.
 boolean isNeedToDropLF()
          Returns the needToDropLF.
 boolean isQuirks()
          Returns the quirks.
 boolean isScriptingEnabled()
          Returns the scriptingEnabled.
 void loadState(TreeBuilderState<T> snapshot, Interner interner)
           
protected  void markMalformedIfScript(T elt)
           
 TreeBuilderState<T> newSnapshot()
          Creates a comparable snapshot of the tree builder state.
protected  void requestSuspension()
           
 void setDoctypeExpectation(DoctypeExpectation doctypeExpectation)
          Sets the doctypeExpectation.
 void setDocumentModeHandler(DocumentModeHandler documentModeHandler)
          Sets the documentModeHandler.
 void setErrorHandler(org.xml.sax.ErrorHandler errorHandler)
          Sets the errorHandler.
 void setFragmentContext(java.lang.String context)
          The argument MUST be an interned string or null.
 void setFragmentContext(java.lang.String context, java.lang.String ns, T node, boolean quirks)
          The argument MUST be an interned string or null.
 void setIgnoringComments(boolean ignoreComments)
           
 void setNamePolicy(XmlViolationPolicy namePolicy)
           
 void setReportingDoctype(boolean reportingDoctype)
          Sets the reportingDoctype.
 void setScriptingEnabled(boolean scriptingEnabled)
          Sets the scriptingEnabled.
 boolean snapshotMatches(TreeBuilderState<T> snapshot)
           
protected  void start(boolean fragmentMode)
           
 void startTag(ElementName elementName, HtmlAttributes attributes, boolean selfClosing)
          Receive a start tag token.
 void startTokenization(Tokenizer self)
          This method is called at the start of tokenization before any other methods on this interface are called.
 boolean wantsComments()
          If this handler implementation cares about comments, return true.
 void zeroOriginatingReplacementCharacter()
          Reports a U+0000 that's being turned into a U+FFFD.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

tokenizer

protected Tokenizer tokenizer

errorHandler

protected org.xml.sax.ErrorHandler errorHandler

charBuffer

protected char[] charBuffer

charBufferLen

protected int charBufferLen
Constructor Detail

TreeBuilder

protected TreeBuilder()
Method Detail

fatal

protected void fatal()
              throws org.xml.sax.SAXException
Reports an condition that would make the infoset incompatible with XML 1.0 as fatal.

Throws:
org.xml.sax.SAXException
org.xml.sax.SAXParseException

fatal

protected final void fatal(java.lang.Exception e)
                    throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

startTokenization

public final void startTokenization(Tokenizer self)
                             throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
This method is called at the start of tokenization before any other methods on this interface are called. Implementations should hold the reference to the Tokenizer in order to set the content model flag and in order to be able to query for Locator data.

Specified by:
startTokenization in interface TokenHandler
Parameters:
self - the Tokenizer.
Throws:
org.xml.sax.SAXException - if something went wrong

doctype

public final void doctype(java.lang.String name,
                          java.lang.String publicIdentifier,
                          java.lang.String systemIdentifier,
                          boolean forceQuirks)
                   throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
Receive a doctype token.

Specified by:
doctype in interface TokenHandler
Parameters:
name - the name
publicIdentifier - the public id
systemIdentifier - the system id
forceQuirks - whether the token is correct
Throws:
org.xml.sax.SAXException - if something went wrong

comment

public final void comment(char[] buf,
                          int start,
                          int length)
                   throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
Receive a comment token. The data is junk if the wantsComments() returned false.

Specified by:
comment in interface TokenHandler
Parameters:
buf - a buffer holding the data
start - the offset into the buffer
length - the number of code units to read
Throws:
org.xml.sax.SAXException - if something went wrong

characters

public final void characters(char[] buf,
                             int start,
                             int length)
                      throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
Receive character tokens. This method has the same semantics as the SAX method of the same name.

Specified by:
characters in interface TokenHandler
Parameters:
buf - a buffer holding the data
start - offset into the buffer
length - the number of code units to read
Throws:
org.xml.sax.SAXException - if something went wrong
See Also:
TokenHandler.characters(char[], int, int)

zeroOriginatingReplacementCharacter

public void zeroOriginatingReplacementCharacter()
                                         throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
Reports a U+0000 that's being turned into a U+FFFD.

Specified by:
zeroOriginatingReplacementCharacter in interface TokenHandler
Throws:
org.xml.sax.SAXException - if something went wrong
See Also:
TokenHandler.zeroOriginatingReplacementCharacter()

eof

public final void eof()
               throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
The end-of-file token.

Specified by:
eof in interface TokenHandler
Throws:
org.xml.sax.SAXException - if something went wrong

endTokenization

public final void endTokenization()
                           throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
The perform final cleanup.

Specified by:
endTokenization in interface TokenHandler
Throws:
org.xml.sax.SAXException - if something went wrong
See Also:
TokenHandler.endTokenization()

startTag

public final void startTag(ElementName elementName,
                           HtmlAttributes attributes,
                           boolean selfClosing)
                    throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
Receive a start tag token.

Specified by:
startTag in interface TokenHandler
Parameters:
elementName - the tag name
attributes - the attributes
selfClosing - TODO
Throws:
org.xml.sax.SAXException - if something went wrong

extractCharsetFromContent

public static java.lang.String extractCharsetFromContent(java.lang.String attributeValue)

C++ memory note: The return value must be released.

Returns:
Throws:
org.xml.sax.SAXException
StopSniffingException

endTag

public final void endTag(ElementName elementName)
                  throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
Receive an end tag token.

Specified by:
endTag in interface TokenHandler
Parameters:
elementName - the tag name
Throws:
org.xml.sax.SAXException - if something went wrong

accumulateCharacters

protected void accumulateCharacters(char[] buf,
                                    int start,
                                    int length)
                             throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

requestSuspension

protected final void requestSuspension()

createElement

protected abstract T createElement(java.lang.String ns,
                                   java.lang.String name,
                                   HtmlAttributes attributes)
                            throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

createElement

protected T createElement(java.lang.String ns,
                          java.lang.String name,
                          HtmlAttributes attributes,
                          T form)
                   throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

createHtmlElementSetAsRoot

protected abstract T createHtmlElementSetAsRoot(HtmlAttributes attributes)
                                         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

detachFromParent

protected abstract void detachFromParent(T element)
                                  throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

hasChildren

protected abstract boolean hasChildren(T element)
                                throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

appendElement

protected abstract void appendElement(T child,
                                      T newParent)
                               throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

appendChildrenToNewParent

protected abstract void appendChildrenToNewParent(T oldParent,
                                                  T newParent)
                                           throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

insertFosterParentedChild

protected abstract void insertFosterParentedChild(T child,
                                                  T table,
                                                  T stackParent)
                                           throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

insertFosterParentedCharacters

protected abstract void insertFosterParentedCharacters(char[] buf,
                                                       int start,
                                                       int length,
                                                       T table,
                                                       T stackParent)
                                                throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

appendCharacters

protected abstract void appendCharacters(T parent,
                                         char[] buf,
                                         int start,
                                         int length)
                                  throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

appendIsindexPrompt

protected abstract void appendIsindexPrompt(T parent)
                                     throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

appendComment

protected abstract void appendComment(T parent,
                                      char[] buf,
                                      int start,
                                      int length)
                               throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

appendCommentToDocument

protected abstract void appendCommentToDocument(char[] buf,
                                                int start,
                                                int length)
                                         throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

addAttributesToElement

protected abstract void addAttributesToElement(T element,
                                               HtmlAttributes attributes)
                                        throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

markMalformedIfScript

protected void markMalformedIfScript(T elt)
                              throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

start

protected void start(boolean fragmentMode)
              throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

end

protected void end()
            throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

appendDoctypeToDocument

protected void appendDoctypeToDocument(java.lang.String name,
                                       java.lang.String publicIdentifier,
                                       java.lang.String systemIdentifier)
                                throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

elementPushed

protected void elementPushed(java.lang.String ns,
                             java.lang.String name,
                             T node)
                      throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

elementPopped

protected void elementPopped(java.lang.String ns,
                             java.lang.String name,
                             T node)
                      throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

documentMode

protected void documentMode(DocumentMode m,
                            java.lang.String publicIdentifier,
                            java.lang.String systemIdentifier,
                            boolean html4SpecificAdditionalErrorChecks)
                     throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

wantsComments

public boolean wantsComments()
Description copied from interface: TokenHandler
If this handler implementation cares about comments, return true. If not, return false.

Specified by:
wantsComments in interface TokenHandler
Returns:
whether this handler wants comments
See Also:
TokenHandler.wantsComments()

setIgnoringComments

public void setIgnoringComments(boolean ignoreComments)

setErrorHandler

public final void setErrorHandler(org.xml.sax.ErrorHandler errorHandler)
Sets the errorHandler.

Parameters:
errorHandler - the errorHandler to set

getErrorHandler

public org.xml.sax.ErrorHandler getErrorHandler()
Returns the errorHandler.

Returns:
the errorHandler

setFragmentContext

public final void setFragmentContext(java.lang.String context)
The argument MUST be an interned string or null.

Parameters:
context -

cdataSectionAllowed

public boolean cdataSectionAllowed()
                            throws org.xml.sax.SAXException
Description copied from interface: TokenHandler
Checks if the CDATA sections are allowed.

Specified by:
cdataSectionAllowed in interface TokenHandler
Returns:
true if CDATA sections are allowed
Throws:
org.xml.sax.SAXException - if something went wrong
See Also:
TokenHandler.cdataSectionAllowed()

setFragmentContext

public final void setFragmentContext(java.lang.String context,
                                     java.lang.String ns,
                                     T node,
                                     boolean quirks)
The argument MUST be an interned string or null.

Parameters:
context -

currentNode

protected final T currentNode()

isScriptingEnabled

public boolean isScriptingEnabled()
Returns the scriptingEnabled.

Returns:
the scriptingEnabled

setScriptingEnabled

public void setScriptingEnabled(boolean scriptingEnabled)
Sets the scriptingEnabled.

Parameters:
scriptingEnabled - the scriptingEnabled to set

setDoctypeExpectation

public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation)
Sets the doctypeExpectation.

Parameters:
doctypeExpectation - the doctypeExpectation to set

setNamePolicy

public void setNamePolicy(XmlViolationPolicy namePolicy)

setDocumentModeHandler

public void setDocumentModeHandler(DocumentModeHandler documentModeHandler)
Sets the documentModeHandler.

Parameters:
documentModeHandler - the documentModeHandler to set

setReportingDoctype

public void setReportingDoctype(boolean reportingDoctype)
Sets the reportingDoctype.

Parameters:
reportingDoctype - the reportingDoctype to set

flushCharacters

public final void flushCharacters()
                           throws org.xml.sax.SAXException
Flushes the pending characters. Public for document.write use cases only.

Throws:
org.xml.sax.SAXException

newSnapshot

public TreeBuilderState<T> newSnapshot()
                                throws org.xml.sax.SAXException
Creates a comparable snapshot of the tree builder state. Snapshot creation is only supported immediately after a script end tag has been processed. In C++ the caller is responsible for calling delete on the returned object.

Returns:
a snapshot.
Throws:
org.xml.sax.SAXException

snapshotMatches

public boolean snapshotMatches(TreeBuilderState<T> snapshot)

loadState

public void loadState(TreeBuilderState<T> snapshot,
                      Interner interner)
               throws org.xml.sax.SAXException
Throws:
org.xml.sax.SAXException

getFormPointer

public T getFormPointer()
Description copied from interface: TreeBuilderState
Returns the formPointer.

Specified by:
getFormPointer in interface TreeBuilderState<T>
Returns:
the formPointer
See Also:
TreeBuilderState.getFormPointer()

getHeadPointer

public T getHeadPointer()
Returns the headPointer.

Specified by:
getHeadPointer in interface TreeBuilderState<T>
Returns:
the headPointer

getDeepTreeSurrogateParent

public T getDeepTreeSurrogateParent()
Returns the deepTreeSurrogateParent.

Specified by:
getDeepTreeSurrogateParent in interface TreeBuilderState<T>
Returns:
the deepTreeSurrogateParent

getListOfActiveFormattingElements

public nu.validator.htmlparser.impl.StackNode<T>[] getListOfActiveFormattingElements()
Description copied from interface: TreeBuilderState
Returns the listOfActiveFormattingElements.

Specified by:
getListOfActiveFormattingElements in interface TreeBuilderState<T>
Returns:
the listOfActiveFormattingElements
See Also:
TreeBuilderState.getListOfActiveFormattingElements()

getStack

public nu.validator.htmlparser.impl.StackNode<T>[] getStack()
Description copied from interface: TreeBuilderState
Returns the stack.

Specified by:
getStack in interface TreeBuilderState<T>
Returns:
the stack
See Also:
TreeBuilderState.getStack()

getMode

public int getMode()
Returns the mode.

Specified by:
getMode in interface TreeBuilderState<T>
Returns:
the mode

getOriginalMode

public int getOriginalMode()
Returns the originalMode.

Specified by:
getOriginalMode in interface TreeBuilderState<T>
Returns:
the originalMode

isFramesetOk

public boolean isFramesetOk()
Returns the framesetOk.

Specified by:
isFramesetOk in interface TreeBuilderState<T>
Returns:
the framesetOk

isNeedToDropLF

public boolean isNeedToDropLF()
Returns the needToDropLF.

Specified by:
isNeedToDropLF in interface TreeBuilderState<T>
Returns:
the needToDropLF

isQuirks

public boolean isQuirks()
Returns the quirks.

Specified by:
isQuirks in interface TreeBuilderState<T>
Returns:
the quirks

getListOfActiveFormattingElementsLength

public int getListOfActiveFormattingElementsLength()
Description copied from interface: TreeBuilderState
Return the length of the list of active formatting elements.

Specified by:
getListOfActiveFormattingElementsLength in interface TreeBuilderState<T>
Returns:
the length of the list of active formatting elements.
See Also:
TreeBuilderState.getListOfActiveFormattingElementsLength()

getStackLength

public int getStackLength()
Description copied from interface: TreeBuilderState
Return the length of the stack.

Specified by:
getStackLength in interface TreeBuilderState<T>
Returns:
the length of the stack.
See Also:
TreeBuilderState.getStackLength()