001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * Copyright (c) 2008-2010 Mozilla Foundation 004 * 005 * Permission is hereby granted, free of charge, to any person obtaining a 006 * copy of this software and associated documentation files (the "Software"), 007 * to deal in the Software without restriction, including without limitation 008 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 009 * and/or sell copies of the Software, and to permit persons to whom the 010 * Software is furnished to do so, subject to the following conditions: 011 * 012 * The above copyright notice and this permission notice shall be included in 013 * all copies or substantial portions of the Software. 014 * 015 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 016 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 017 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 018 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 019 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 020 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 021 * DEALINGS IN THE SOFTWARE. 022 */ 023 024 package nu.validator.htmlparser.common; 025 026 import nu.validator.htmlparser.annotation.Const; 027 import nu.validator.htmlparser.annotation.NoLength; 028 import nu.validator.htmlparser.impl.ElementName; 029 import nu.validator.htmlparser.impl.HtmlAttributes; 030 import nu.validator.htmlparser.impl.Tokenizer; 031 032 import org.xml.sax.SAXException; 033 034 /** 035 * <code>Tokenizer</code> reports tokens through this interface. 036 * 037 * @version $Id$ 038 * @author hsivonen 039 */ 040 public interface TokenHandler { 041 042 /** 043 * This method is called at the start of tokenization before any other 044 * methods on this interface are called. Implementations should hold the 045 * reference to the <code>Tokenizer</code> in order to set the content 046 * model flag and in order to be able to query for <code>Locator</code> 047 * data. 048 * 049 * @param self 050 * the <code>Tokenizer</code>. 051 * @throws SAXException 052 * if something went wrong 053 */ 054 public void startTokenization(Tokenizer self) throws SAXException; 055 056 /** 057 * If this handler implementation cares about comments, return 058 * <code>true</code>. If not, return <code>false</code>. 059 * 060 * @return whether this handler wants comments 061 * @throws SAXException 062 * if something went wrong 063 */ 064 public boolean wantsComments() throws SAXException; 065 066 /** 067 * Receive a doctype token. 068 * 069 * @param name 070 * the name 071 * @param publicIdentifier 072 * the public id 073 * @param systemIdentifier 074 * the system id 075 * @param forceQuirks 076 * whether the token is correct 077 * @throws SAXException 078 * if something went wrong 079 */ 080 public void doctype(String name, String publicIdentifier, 081 String systemIdentifier, boolean forceQuirks) throws SAXException; 082 083 /** 084 * Receive a start tag token. 085 * 086 * @param eltName 087 * the tag name 088 * @param attributes 089 * the attributes 090 * @param selfClosing 091 * TODO 092 * @throws SAXException 093 * if something went wrong 094 */ 095 public void startTag(ElementName eltName, HtmlAttributes attributes, 096 boolean selfClosing) throws SAXException; 097 098 /** 099 * Receive an end tag token. 100 * 101 * @param eltName 102 * the tag name 103 * @throws SAXException 104 * if something went wrong 105 */ 106 public void endTag(ElementName eltName) throws SAXException; 107 108 /** 109 * Receive a comment token. The data is junk if the 110 * <code>wantsComments()</code> returned <code>false</code>. 111 * 112 * @param buf 113 * a buffer holding the data 114 * @param start the offset into the buffer 115 * @param length 116 * the number of code units to read 117 * @throws SAXException 118 * if something went wrong 119 */ 120 public void comment(@NoLength char[] buf, int start, int length) throws SAXException; 121 122 /** 123 * Receive character tokens. This method has the same semantics as the SAX 124 * method of the same name. 125 * 126 * @param buf 127 * a buffer holding the data 128 * @param start 129 * offset into the buffer 130 * @param length 131 * the number of code units to read 132 * @throws SAXException 133 * if something went wrong 134 * @see org.xml.sax.ContentHandler#characters(char[], int, int) 135 */ 136 public void characters(@Const @NoLength char[] buf, int start, int length) 137 throws SAXException; 138 139 /** 140 * Reports a U+0000 that's being turned into a U+FFFD. 141 * 142 * @throws SAXException 143 * if something went wrong 144 */ 145 public void zeroOriginatingReplacementCharacter() throws SAXException; 146 147 /** 148 * The end-of-file token. 149 * 150 * @throws SAXException 151 * if something went wrong 152 */ 153 public void eof() throws SAXException; 154 155 /** 156 * The perform final cleanup. 157 * 158 * @throws SAXException 159 * if something went wrong 160 */ 161 public void endTokenization() throws SAXException; 162 163 /** 164 * Checks if the CDATA sections are allowed. 165 * 166 * @return <code>true</code> if CDATA sections are allowed 167 * @throws SAXException 168 * if something went wrong 169 */ 170 public boolean cdataSectionAllowed() throws SAXException; 171 }