001 /*
002 * Copyright (c) 2007 Henri Sivonen
003 * Copyright (c) 2008-2010 Mozilla Foundation
004 *
005 * Permission is hereby granted, free of charge, to any person obtaining a
006 * copy of this software and associated documentation files (the "Software"),
007 * to deal in the Software without restriction, including without limitation
008 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
009 * and/or sell copies of the Software, and to permit persons to whom the
010 * Software is furnished to do so, subject to the following conditions:
011 *
012 * The above copyright notice and this permission notice shall be included in
013 * all copies or substantial portions of the Software.
014 *
015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
021 * DEALINGS IN THE SOFTWARE.
022 */
023
024 package nu.validator.htmlparser.common;
025
026 import nu.validator.htmlparser.annotation.Const;
027 import nu.validator.htmlparser.annotation.NoLength;
028 import nu.validator.htmlparser.impl.ElementName;
029 import nu.validator.htmlparser.impl.HtmlAttributes;
030 import nu.validator.htmlparser.impl.Tokenizer;
031
032 import org.xml.sax.SAXException;
033
034 /**
035 * <code>Tokenizer</code> reports tokens through this interface.
036 *
037 * @version $Id$
038 * @author hsivonen
039 */
040 public interface TokenHandler {
041
042 /**
043 * This method is called at the start of tokenization before any other
044 * methods on this interface are called. Implementations should hold the
045 * reference to the <code>Tokenizer</code> in order to set the content
046 * model flag and in order to be able to query for <code>Locator</code>
047 * data.
048 *
049 * @param self
050 * the <code>Tokenizer</code>.
051 * @throws SAXException
052 * if something went wrong
053 */
054 public void startTokenization(Tokenizer self) throws SAXException;
055
056 /**
057 * If this handler implementation cares about comments, return
058 * <code>true</code>. If not, return <code>false</code>.
059 *
060 * @return whether this handler wants comments
061 * @throws SAXException
062 * if something went wrong
063 */
064 public boolean wantsComments() throws SAXException;
065
066 /**
067 * Receive a doctype token.
068 *
069 * @param name
070 * the name
071 * @param publicIdentifier
072 * the public id
073 * @param systemIdentifier
074 * the system id
075 * @param forceQuirks
076 * whether the token is correct
077 * @throws SAXException
078 * if something went wrong
079 */
080 public void doctype(String name, String publicIdentifier,
081 String systemIdentifier, boolean forceQuirks) throws SAXException;
082
083 /**
084 * Receive a start tag token.
085 *
086 * @param eltName
087 * the tag name
088 * @param attributes
089 * the attributes
090 * @param selfClosing
091 * TODO
092 * @throws SAXException
093 * if something went wrong
094 */
095 public void startTag(ElementName eltName, HtmlAttributes attributes,
096 boolean selfClosing) throws SAXException;
097
098 /**
099 * Receive an end tag token.
100 *
101 * @param eltName
102 * the tag name
103 * @throws SAXException
104 * if something went wrong
105 */
106 public void endTag(ElementName eltName) throws SAXException;
107
108 /**
109 * Receive a comment token. The data is junk if the
110 * <code>wantsComments()</code> returned <code>false</code>.
111 *
112 * @param buf
113 * a buffer holding the data
114 * @param start the offset into the buffer
115 * @param length
116 * the number of code units to read
117 * @throws SAXException
118 * if something went wrong
119 */
120 public void comment(@NoLength char[] buf, int start, int length) throws SAXException;
121
122 /**
123 * Receive character tokens. This method has the same semantics as the SAX
124 * method of the same name.
125 *
126 * @param buf
127 * a buffer holding the data
128 * @param start
129 * offset into the buffer
130 * @param length
131 * the number of code units to read
132 * @throws SAXException
133 * if something went wrong
134 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
135 */
136 public void characters(@Const @NoLength char[] buf, int start, int length)
137 throws SAXException;
138
139 /**
140 * Reports a U+0000 that's being turned into a U+FFFD.
141 *
142 * @throws SAXException
143 * if something went wrong
144 */
145 public void zeroOriginatingReplacementCharacter() throws SAXException;
146
147 /**
148 * The end-of-file token.
149 *
150 * @throws SAXException
151 * if something went wrong
152 */
153 public void eof() throws SAXException;
154
155 /**
156 * The perform final cleanup.
157 *
158 * @throws SAXException
159 * if something went wrong
160 */
161 public void endTokenization() throws SAXException;
162
163 /**
164 * Checks if the CDATA sections are allowed.
165 *
166 * @return <code>true</code> if CDATA sections are allowed
167 * @throws SAXException
168 * if something went wrong
169 */
170 public boolean cdataSectionAllowed() throws SAXException;
171 }