001 /*
002 * Copyright (c) 2007 Mozilla Foundation
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package nu.validator.spec.html5;
024
025 import java.io.IOException;
026 import java.util.HashMap;
027 import java.util.Map;
028 import java.util.regex.Matcher;
029 import java.util.regex.Pattern;
030
031 import nu.validator.htmlparser.common.XmlViolationPolicy;
032 import nu.validator.htmlparser.sax.HtmlParser;
033 import nu.validator.saxtree.DocumentFragment;
034 import nu.validator.saxtree.TreeBuilder;
035 import nu.validator.spec.Spec;
036 import nu.validator.xml.AttributesImpl;
037 import nu.validator.xml.EmptyAttributes;
038
039 import org.xml.sax.Attributes;
040 import org.xml.sax.ContentHandler;
041 import org.xml.sax.InputSource;
042 import org.xml.sax.Locator;
043 import org.xml.sax.SAXException;
044
045 import com.thaiopensource.xml.util.Name;
046
047 public class Html5SpecBuilder implements ContentHandler {
048
049 private static final String NS = "http://www.w3.org/1999/xhtml";
050
051 private static final String SPEC_URI = "http://www.whatwg.org/specs/web-apps/current-work/";
052
053 private static final Pattern ELEMENT = Pattern.compile("^.*element\\s*$");
054
055 private static final Pattern CONTEXT = Pattern.compile("^\\s*Contexts\\s+in\\s+which\\s+this\\s+element\\s+may\\s+be\\s+used:\\s*");
056
057 private static final Pattern CONTENT_MODEL = Pattern.compile("^\\s*Content\\s+model:\\s*$");
058
059 private static final Pattern ATTRIBUTES = Pattern.compile("^\\s*Element-specific\\s+attributes:\\s*$");
060
061 private enum State {
062 AWAITING_HEADING,
063 IN_H4,
064 IN_CODE_IN_H4,
065 AWAITING_ELEMENT_DL,
066 IN_ELEMENT_DL_START,
067 IN_CONTEXT_DT,
068 CAPTURING_CONTEXT_DDS,
069 IN_CONTENT_MODEL_DT,
070 CAPTURING_CONTENT_MODEL_DDS,
071 IN_ATTRIBUTES_DT,
072 CAPTURING_ATTRIBUTES_DDS
073 }
074
075 private State state = State.AWAITING_HEADING;
076
077 private int captureDepth = 0;
078
079 private String currentId;
080
081 private StringBuilder nameText = new StringBuilder();
082
083 private StringBuilder referenceText = new StringBuilder();
084
085 private TreeBuilder fragmentBuilder;
086
087 private Name currentName;
088
089 private Map<Name, String> urisByElement = new HashMap<Name, String>();
090
091 private Map<Name, DocumentFragment> contextsByElement = new HashMap<Name, DocumentFragment>();
092
093 private Map<Name, DocumentFragment> contentModelsByElement = new HashMap<Name, DocumentFragment>();
094
095 private Map<Name, DocumentFragment> attributesByElement = new HashMap<Name, DocumentFragment>();
096
097 public static Spec parseSpec(InputSource in) throws IOException, SAXException {
098 HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
099 Html5SpecBuilder handler = new Html5SpecBuilder();
100 parser.setContentHandler(handler);
101 parser.parse(in);
102 return handler.buildSpec();
103 }
104
105 public static void main(String[] args) throws IOException, SAXException {
106 parseSpec(new InputSource("http://www.whatwg.org/specs/web-apps/current-work/"));
107 }
108
109 private Spec buildSpec() {
110 return new Spec(urisByElement, contextsByElement, contentModelsByElement, attributesByElement);
111 }
112
113 /**
114 *
115 */
116 private Html5SpecBuilder() {
117 super();
118 }
119
120 public void characters(char[] ch, int start, int length) throws SAXException {
121 switch(state) {
122 case AWAITING_HEADING:
123 break;
124 case IN_H4:
125 referenceText.append(ch, start, length);
126 break;
127 case IN_CODE_IN_H4:
128 nameText.append(ch, start, length);
129 break;
130 case AWAITING_ELEMENT_DL:
131 break;
132 case IN_ELEMENT_DL_START:
133 break;
134 case IN_CONTEXT_DT:
135 case IN_CONTENT_MODEL_DT:
136 case IN_ATTRIBUTES_DT:
137 referenceText.append(ch, start, length);
138 break;
139 case CAPTURING_CONTEXT_DDS:
140 case CAPTURING_CONTENT_MODEL_DDS:
141 case CAPTURING_ATTRIBUTES_DDS:
142 fragmentBuilder.characters(ch, start, length);
143 break;
144 }
145 }
146
147 public void endDocument() throws SAXException {
148 switch(state) {
149 case AWAITING_HEADING:
150 // XXX finish
151 break;
152 case IN_H4:
153 case IN_CODE_IN_H4:
154 case AWAITING_ELEMENT_DL:
155 case IN_ELEMENT_DL_START:
156 case IN_CONTEXT_DT:
157 case IN_CONTENT_MODEL_DT:
158 case IN_ATTRIBUTES_DT:
159 case CAPTURING_CONTEXT_DDS:
160 case CAPTURING_CONTENT_MODEL_DDS:
161 case CAPTURING_ATTRIBUTES_DDS:
162 throw new SAXException(
163 "Malformed spec: Wrong state for document end.");
164 }
165 }
166
167 public void endElement(String uri, String localName, String qName) throws SAXException {
168 switch(state) {
169 case AWAITING_HEADING:
170 break;
171 case IN_H4:
172 if ("h4" == localName && NS == uri) {
173 Matcher m = ELEMENT.matcher(referenceText);
174 if (m.matches()) {
175 String ln = nameText.toString().intern();
176 if ("" == ln) {
177 throw new SAXException(
178 "Malformed spec: no element currentName.");
179 }
180 if (currentId == null) {
181 throw new SAXException(
182 "Malformed spec: no element id.");
183 }
184 currentName = new Name(NS, ln);
185 urisByElement.put(currentName, SPEC_URI + "#" + currentId);
186 state = State.AWAITING_ELEMENT_DL;
187 } else {
188 currentId = null;
189 nameText.setLength(0);
190 state = State.AWAITING_HEADING;
191 }
192 }
193 break;
194 case IN_CODE_IN_H4:
195 if ("code" == localName && NS == uri) {
196 state = State.IN_H4;
197 }
198 break;
199 case AWAITING_ELEMENT_DL:
200 break;
201 case IN_ELEMENT_DL_START:
202 throw new SAXException(
203 "Malformed spec: no children in element dl.");
204 case IN_CONTEXT_DT:
205 if ("dt" == localName && NS == uri) {
206 Matcher m = CONTEXT.matcher(referenceText);
207 if (m.matches()) {
208 state = State.CAPTURING_CONTEXT_DDS;
209 captureDepth = 0;
210 fragmentBuilder = new TreeBuilder(true, true);
211 } else {
212 throw new SAXException(
213 "Malformed spec: Expected dt to be context dt but it was not.");
214 }
215 }
216 break;
217 case IN_CONTENT_MODEL_DT:
218 if ("dt" == localName && NS == uri) {
219 Matcher m = CONTENT_MODEL.matcher(referenceText);
220 if (m.matches()) {
221 state = State.CAPTURING_CONTENT_MODEL_DDS;
222 captureDepth = 0;
223 fragmentBuilder = new TreeBuilder(true, true);
224 } else {
225 throw new SAXException(
226 "Malformed spec: Expected dt to be context dt but it was not.");
227 }
228 }
229 break;
230 case IN_ATTRIBUTES_DT:
231 if ("dt" == localName && NS == uri) {
232 Matcher m = ATTRIBUTES.matcher(referenceText);
233 if (m.matches()) {
234 state = State.CAPTURING_ATTRIBUTES_DDS;
235 captureDepth = 0;
236 fragmentBuilder = new TreeBuilder(true, true);
237 } else {
238 throw new SAXException(
239 "Malformed spec: Expected dt to be context dt but it was not.");
240 }
241 }
242 break;
243 case CAPTURING_CONTEXT_DDS:
244 case CAPTURING_CONTENT_MODEL_DDS:
245 case CAPTURING_ATTRIBUTES_DDS:
246 if (captureDepth == 0) {
247 throw new SAXException(
248 "Malformed spec: Did not see following dt when capturing dds.");
249 }
250 captureDepth--;
251 fragmentBuilder.endElement(uri, localName, qName);
252 break;
253 }
254 }
255
256 public void endPrefixMapping(String prefix) throws SAXException {
257 }
258
259 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
260 }
261
262 public void processingInstruction(String target, String data) throws SAXException {
263 }
264
265 public void setDocumentLocator(Locator locator) {
266 }
267
268 public void skippedEntity(String name) throws SAXException {
269 }
270
271 public void startDocument() throws SAXException {
272 // TODO Auto-generated method stub
273
274 }
275
276 public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
277 switch(state) {
278 case AWAITING_HEADING:
279 if ("h4" == localName && NS == uri) {
280 referenceText.setLength(0);
281 currentId = null;
282 state = State.IN_H4;
283 }
284 break;
285 case IN_H4:
286 if ("code" == localName && NS == uri) {
287 nameText.setLength(0);
288 state = State.IN_CODE_IN_H4;
289 } else if ("dfn" == localName && NS == uri) {
290 currentId = atts.getValue("", "id");
291 }
292 break;
293 case IN_CODE_IN_H4:
294 break;
295 case AWAITING_ELEMENT_DL:
296 if ("dl" == localName && NS == uri && "element".equals(atts.getValue("", "class"))) {
297 state = State.IN_ELEMENT_DL_START;
298 }
299 break;
300 case IN_ELEMENT_DL_START:
301 if ("dt" == localName && NS == uri) {
302 referenceText.setLength(0);
303 state = State.IN_CONTEXT_DT;
304 } else {
305 throw new SAXException(
306 "Malformed spec: Expected dt in dl.");
307 }
308 break;
309 case IN_CONTEXT_DT:
310 case IN_CONTENT_MODEL_DT:
311 case IN_ATTRIBUTES_DT:
312 throw new SAXException(
313 "Malformed spec: Not expecting children in dts.");
314 case CAPTURING_CONTEXT_DDS:
315 case CAPTURING_CONTENT_MODEL_DDS:
316 case CAPTURING_ATTRIBUTES_DDS:
317 if ("dt" == localName && NS == uri && captureDepth == 0) {
318 DocumentFragment fragment = (DocumentFragment) fragmentBuilder.getRoot();
319 fragmentBuilder = null;
320 referenceText.setLength(0);
321 if (state == State.CAPTURING_CONTEXT_DDS) {
322 contextsByElement.put(currentName, fragment);
323 state = State.IN_CONTENT_MODEL_DT;
324 } else if (state == State.CAPTURING_CONTENT_MODEL_DDS) {
325 contentModelsByElement.put(currentName, fragment);
326 state = State.IN_ATTRIBUTES_DT;
327 } else {
328 attributesByElement.put(currentName, fragment);
329 state = State.AWAITING_HEADING;
330 }
331 } else {
332 captureDepth++;
333 String href = null;
334 if ("a" == localName && NS == uri && (href = atts.getValue("", "href")) != null) {
335 if (href.startsWith("#")) {
336 href = SPEC_URI + href;
337 }
338 AttributesImpl attributesImpl = new AttributesImpl();
339 attributesImpl.addAttribute("href", href);
340 fragmentBuilder.startElement(uri, localName, qName, attributesImpl);
341 } else {
342 fragmentBuilder.startElement(uri, localName, qName, EmptyAttributes.EMPTY_ATTRIBUTES);
343 }
344 }
345 break;
346 }
347 }
348
349 public void startPrefixMapping(String prefix, String uri) throws SAXException {
350 }
351
352 }