001 /*
002 * Copyright (c) 2007 Henri Sivonen
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package nu.validator.htmlparser.impl;
024
025 import org.xml.sax.Attributes;
026 import org.xml.sax.SAXException;
027
028 /**
029 * <code>Tokenizer</code> reports tokens through this interface.
030 *
031 * @version $Id: TokenHandler.java 150 2007-08-16 19:21:25Z hsivonen $
032 * @author hsivonen
033 */
034 public interface TokenHandler {
035
036 /**
037 * This method is called at the start of tokenization before any other
038 * methods on this interface are called. Implementations should hold
039 * the reference to the <code>Tokenizer</code> in order to set the
040 * content model flag and in order to be able to query for
041 * <code>Locator</code> data.
042 *
043 * @param self the <code>Tokenizer</code>.
044 * @throws SAXException if something went wrong
045 */
046 public void start(Tokenizer self) throws SAXException;
047
048 /**
049 * If this handler implementation cares about comments, return <code>true</code>.
050 * If not, return <code>false</code>.
051 *
052 * @return whether this handler wants comments
053 * @throws SAXException if something went wrong
054 */
055 public boolean wantsComments() throws SAXException;
056
057 /**
058 * Receive a doctype token.
059 *
060 * @param name the name
061 * @param publicIdentifier the public id
062 * @param systemIdentifier the system id
063 * @param correct whether the token is correct
064 * @throws SAXException if something went wrong
065 */
066 public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean correct) throws SAXException;
067
068 /**
069 * Receive a start tag token.
070 *
071 * @param name the tag name
072 * @param attributes the attributes
073 * @throws SAXException if something went wrong
074 */
075 public void startTag(String name, Attributes attributes) throws SAXException;
076
077 /**
078 * Receive an end tag token.
079 *
080 * @param name the tag name
081 * @param attributes the attributes
082 * @throws SAXException if something went wrong
083 */
084 public void endTag(String name, Attributes attributes) throws SAXException;
085
086 /**
087 * Receive a comment token. The data is junk if the <code>wantsComments()</code>
088 * returned <code>false</code>.
089 *
090 * @param buf a buffer holding the data
091 * @param length the number of code units to read
092 * @throws SAXException if something went wrong
093 */
094 public void comment(char[] buf, int length) throws SAXException;
095
096 /**
097 * Receive character tokens. This method has the same semantics as
098 * the SAX method of the same name.
099 *
100 * @param buf a buffer holding the data
101 * @param start offset into the buffer
102 * @param length the number of code units to read
103 * @throws SAXException if something went wrong
104 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
105 */
106 public void characters(char[] buf, int start, int length) throws SAXException;
107
108 /**
109 * The end-of-file token.
110 *
111 * @throws SAXException if something went wrong
112 */
113 public void eof() throws SAXException;
114
115 }