001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package nu.validator.htmlparser.impl; 024 025 import org.xml.sax.Attributes; 026 import org.xml.sax.SAXException; 027 028 /** 029 * <code>Tokenizer</code> reports tokens through this interface. 030 * 031 * @version $Id: TokenHandler.java 150 2007-08-16 19:21:25Z hsivonen $ 032 * @author hsivonen 033 */ 034 public interface TokenHandler { 035 036 /** 037 * This method is called at the start of tokenization before any other 038 * methods on this interface are called. Implementations should hold 039 * the reference to the <code>Tokenizer</code> in order to set the 040 * content model flag and in order to be able to query for 041 * <code>Locator</code> data. 042 * 043 * @param self the <code>Tokenizer</code>. 044 * @throws SAXException if something went wrong 045 */ 046 public void start(Tokenizer self) throws SAXException; 047 048 /** 049 * If this handler implementation cares about comments, return <code>true</code>. 050 * If not, return <code>false</code>. 051 * 052 * @return whether this handler wants comments 053 * @throws SAXException if something went wrong 054 */ 055 public boolean wantsComments() throws SAXException; 056 057 /** 058 * Receive a doctype token. 059 * 060 * @param name the name 061 * @param publicIdentifier the public id 062 * @param systemIdentifier the system id 063 * @param correct whether the token is correct 064 * @throws SAXException if something went wrong 065 */ 066 public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean correct) throws SAXException; 067 068 /** 069 * Receive a start tag token. 070 * 071 * @param name the tag name 072 * @param attributes the attributes 073 * @throws SAXException if something went wrong 074 */ 075 public void startTag(String name, Attributes attributes) throws SAXException; 076 077 /** 078 * Receive an end tag token. 079 * 080 * @param name the tag name 081 * @param attributes the attributes 082 * @throws SAXException if something went wrong 083 */ 084 public void endTag(String name, Attributes attributes) throws SAXException; 085 086 /** 087 * Receive a comment token. The data is junk if the <code>wantsComments()</code> 088 * returned <code>false</code>. 089 * 090 * @param buf a buffer holding the data 091 * @param length the number of code units to read 092 * @throws SAXException if something went wrong 093 */ 094 public void comment(char[] buf, int length) throws SAXException; 095 096 /** 097 * Receive character tokens. This method has the same semantics as 098 * the SAX method of the same name. 099 * 100 * @param buf a buffer holding the data 101 * @param start offset into the buffer 102 * @param length the number of code units to read 103 * @throws SAXException if something went wrong 104 * @see org.xml.sax.ContentHandler#characters(char[], int, int) 105 */ 106 public void characters(char[] buf, int start, int length) throws SAXException; 107 108 /** 109 * The end-of-file token. 110 * 111 * @throws SAXException if something went wrong 112 */ 113 public void eof() throws SAXException; 114 115 }