001    /*
002     * Copyright (c) 2007 Henri Sivonen
003     * Copyright (c) 2008-2010 Mozilla Foundation
004     *
005     * Permission is hereby granted, free of charge, to any person obtaining a 
006     * copy of this software and associated documentation files (the "Software"), 
007     * to deal in the Software without restriction, including without limitation 
008     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
009     * and/or sell copies of the Software, and to permit persons to whom the 
010     * Software is furnished to do so, subject to the following conditions:
011     *
012     * The above copyright notice and this permission notice shall be included in 
013     * all copies or substantial portions of the Software.
014     *
015     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
016     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
017     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
018     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
019     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
020     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
021     * DEALINGS IN THE SOFTWARE.
022     */
023    
024    package nu.validator.htmlparser.common;
025    
026    import nu.validator.htmlparser.annotation.Const;
027    import nu.validator.htmlparser.annotation.NoLength;
028    import nu.validator.htmlparser.impl.ElementName;
029    import nu.validator.htmlparser.impl.HtmlAttributes;
030    import nu.validator.htmlparser.impl.Tokenizer;
031    
032    import org.xml.sax.SAXException;
033    
034    /**
035     * <code>Tokenizer</code> reports tokens through this interface.
036     * 
037     * @version $Id$
038     * @author hsivonen
039     */
040    public interface TokenHandler {
041    
042        /**
043         * This method is called at the start of tokenization before any other
044         * methods on this interface are called. Implementations should hold the
045         * reference to the <code>Tokenizer</code> in order to set the content
046         * model flag and in order to be able to query for <code>Locator</code>
047         * data.
048         * 
049         * @param self
050         *            the <code>Tokenizer</code>.
051         * @throws SAXException
052         *             if something went wrong
053         */
054        public void startTokenization(Tokenizer self) throws SAXException;
055    
056        /**
057         * If this handler implementation cares about comments, return
058         * <code>true</code>. If not, return <code>false</code>.
059         * 
060         * @return whether this handler wants comments
061         * @throws SAXException
062         *             if something went wrong
063         */
064        public boolean wantsComments() throws SAXException;
065    
066        /**
067         * Receive a doctype token.
068         * 
069         * @param name
070         *            the name
071         * @param publicIdentifier
072         *            the public id
073         * @param systemIdentifier
074         *            the system id
075         * @param forceQuirks
076         *            whether the token is correct
077         * @throws SAXException
078         *             if something went wrong
079         */
080        public void doctype(String name, String publicIdentifier,
081                String systemIdentifier, boolean forceQuirks) throws SAXException;
082    
083        /**
084         * Receive a start tag token.
085         * 
086         * @param eltName
087         *            the tag name
088         * @param attributes
089         *            the attributes
090         * @param selfClosing
091         *            TODO
092         * @throws SAXException
093         *             if something went wrong
094         */
095        public void startTag(ElementName eltName, HtmlAttributes attributes,
096                boolean selfClosing) throws SAXException;
097    
098        /**
099         * Receive an end tag token.
100         * 
101         * @param eltName
102         *            the tag name
103         * @throws SAXException
104         *             if something went wrong
105         */
106        public void endTag(ElementName eltName) throws SAXException;
107    
108        /**
109         * Receive a comment token. The data is junk if the
110         * <code>wantsComments()</code> returned <code>false</code>.
111         * 
112         * @param buf
113         *            a buffer holding the data
114         * @param start the offset into the buffer
115         * @param length
116         *            the number of code units to read
117         * @throws SAXException
118         *             if something went wrong
119         */
120        public void comment(@NoLength char[] buf, int start, int length) throws SAXException;
121    
122        /**
123         * Receive character tokens. This method has the same semantics as the SAX
124         * method of the same name.
125         * 
126         * @param buf
127         *            a buffer holding the data
128         * @param start
129         *            offset into the buffer
130         * @param length
131         *            the number of code units to read
132         * @throws SAXException
133         *             if something went wrong
134         * @see org.xml.sax.ContentHandler#characters(char[], int, int)
135         */
136        public void characters(@Const @NoLength char[] buf, int start, int length)
137                throws SAXException;
138    
139        /**
140         * Reports a U+0000 that's being turned into a U+FFFD.
141         * 
142         * @throws SAXException
143         *             if something went wrong
144         */
145        public void zeroOriginatingReplacementCharacter() throws SAXException;
146        
147        /**
148         * The end-of-file token.
149         * 
150         * @throws SAXException
151         *             if something went wrong
152         */
153        public void eof() throws SAXException;
154    
155        /**
156         * The perform final cleanup.
157         * 
158         * @throws SAXException
159         *             if something went wrong
160         */
161        public void endTokenization() throws SAXException;
162    
163        /**
164         * Checks if the CDATA sections are allowed.
165         * 
166         * @return <code>true</code> if CDATA sections are allowed
167         * @throws SAXException
168         *             if something went wrong
169         */
170        public boolean cdataSectionAllowed() throws SAXException;
171    }