001    /*
002     * Copyright (c) 2007 Henri Sivonen
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a 
005     * copy of this software and associated documentation files (the "Software"), 
006     * to deal in the Software without restriction, including without limitation 
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
008     * and/or sell copies of the Software, and to permit persons to whom the 
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in 
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
020     * DEALINGS IN THE SOFTWARE.
021     */
022    
023    package nu.validator.htmlparser.impl;
024    
025    import org.xml.sax.Attributes;
026    import org.xml.sax.SAXException;
027    
028    /**
029     * <code>Tokenizer</code> reports tokens through this interface.
030     * 
031     * @version $Id: TokenHandler.java 150 2007-08-16 19:21:25Z hsivonen $
032     * @author hsivonen
033     */
034    public interface TokenHandler {
035    
036        /**
037         * This method is called at the start of tokenization before any other 
038         * methods on this interface are called. Implementations should hold 
039         * the reference to the <code>Tokenizer</code> in order to set the 
040         * content model flag and in order to be able to query for 
041         * <code>Locator</code> data.
042         * 
043         * @param self the <code>Tokenizer</code>.
044         * @throws SAXException if something went wrong
045         */
046        public void start(Tokenizer self) throws SAXException;
047    
048        /**
049         * If this handler implementation cares about comments, return <code>true</code>.
050         * If not, return <code>false</code>.
051         * 
052         * @return whether this handler wants comments
053         * @throws SAXException if something went wrong
054         */
055        public boolean wantsComments() throws SAXException;
056        
057        /**
058         * Receive a doctype token.
059         * 
060         * @param name the name
061         * @param publicIdentifier the public id
062         * @param systemIdentifier the system id
063         * @param correct whether the token is correct
064         * @throws SAXException if something went wrong
065         */
066        public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean correct) throws SAXException;
067    
068        /**
069         * Receive a start tag token.
070         * 
071         * @param name the tag name
072         * @param attributes the attributes
073         * @throws SAXException if something went wrong
074         */
075        public void startTag(String name, Attributes attributes) throws SAXException;
076        
077        /**
078         * Receive an end tag token.
079         * 
080         * @param name the tag name
081         * @param attributes the attributes
082         * @throws SAXException if something went wrong
083         */
084        public void endTag(String name, Attributes attributes) throws SAXException;
085        
086        /**
087         * Receive a comment token. The data is junk if the <code>wantsComments()</code> 
088         * returned <code>false</code>.
089         * 
090         * @param buf a buffer holding the data
091         * @param length the number of code units to read
092         * @throws SAXException if something went wrong
093         */
094        public void comment(char[] buf, int length) throws SAXException;
095        
096        /**
097         * Receive character tokens. This method has the same semantics as 
098         * the SAX method of the same name.
099         * 
100         * @param buf a buffer holding the data
101         * @param start offset into the buffer
102         * @param length the number of code units to read
103         * @throws SAXException if something went wrong
104         * @see org.xml.sax.ContentHandler#characters(char[], int, int)
105         */
106        public void characters(char[] buf, int start, int length) throws SAXException;
107        
108        /**
109         * The end-of-file token.
110         * 
111         * @throws SAXException if something went wrong
112         */
113        public void eof() throws SAXException;
114        
115    }