001 /* 002 * Copyright (c) 2007 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package nu.validator.htmlparser.sax; 024 025 import nu.validator.htmlparser.common.XmlViolationPolicy; 026 import nu.validator.htmlparser.impl.AttributesImpl; 027 import nu.validator.htmlparser.impl.TreeBuilder; 028 import nu.validator.saxtree.Characters; 029 import nu.validator.saxtree.Comment; 030 import nu.validator.saxtree.DTD; 031 import nu.validator.saxtree.Document; 032 import nu.validator.saxtree.DocumentFragment; 033 import nu.validator.saxtree.Element; 034 import nu.validator.saxtree.NodeType; 035 import nu.validator.saxtree.ParentNode; 036 import nu.validator.saxtree.TreeParser; 037 038 import org.xml.sax.Attributes; 039 import org.xml.sax.ContentHandler; 040 import org.xml.sax.SAXException; 041 import org.xml.sax.ext.LexicalHandler; 042 043 class SAXTreeBuilder extends TreeBuilder<Element> { 044 045 private Document document; 046 047 SAXTreeBuilder() { 048 super(XmlViolationPolicy.ALLOW, false); 049 } 050 051 @Override 052 protected void appendComment(Element parent, char[] buf, int start, int length) { 053 parent.appendChild(new Comment(tokenizer, buf, start, length)); 054 } 055 056 @Override 057 protected void appendCommentToDocument(char[] buf, int start, int length) { 058 document.appendChild(new Comment(tokenizer, buf, start, length)); 059 } 060 061 @Override 062 protected void appendCharacters(Element parent, char[] buf, int start, int length) { 063 parent.appendChild(new Characters(tokenizer, buf, start, length)); 064 } 065 066 @Override 067 protected void detachFromParent(Element element) { 068 element.detach(); 069 } 070 071 @Override 072 protected boolean hasChildren(Element element) { 073 return element.getFirstChild() != null; 074 } 075 076 @Override 077 protected Element shallowClone(Element element) { 078 Element newElt = new Element(element, element.getUri(), element.getLocalName(), element.getQName(), element.getAttributes(), true, element.getPrefixMappings()); 079 newElt.copyEndLocator(element); 080 return newElt; 081 } 082 083 @Override 084 protected void detachFromParentAndAppendToNewParent(Element child, Element newParent) { 085 newParent.appendChild(child); 086 } 087 088 @Override 089 protected Element createHtmlElementSetAsRoot(Attributes attributes) { 090 Element newElt = new Element(tokenizer, "http://www.w3.org/1999/xhtml", "html", "html", attributes, true, null); 091 document.appendChild(newElt); 092 return newElt; 093 } 094 095 @Override 096 protected void insertBefore(Element child, Element sibling, Element parent) { 097 parent.insertBefore(child, sibling); 098 } 099 100 @Override 101 protected Element parentElementFor(Element child) { 102 ParentNode parent = child.getParentNode(); 103 if (parent == null) { 104 return null; 105 } 106 if (parent.getNodeType() == NodeType.ELEMENT) { 107 return (Element) parent; 108 } 109 return null; 110 } 111 112 @Override 113 protected void addAttributesToElement(Element element, Attributes attributes) { 114 AttributesImpl existingAttrs = (AttributesImpl) element.getAttributes(); 115 for (int i = 0; i < attributes.getLength(); i++) { 116 String qName = attributes.getQName(i); 117 if (existingAttrs.getIndex(qName) < 0) { 118 existingAttrs.addAttribute(qName, attributes.getValue(i)); 119 } 120 } 121 } 122 123 /** 124 * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String) 125 */ 126 @Override 127 protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) { 128 DTD dtd = new DTD(tokenizer, name, publicIdentifier, systemIdentifier); 129 dtd.setEndLocator(tokenizer); 130 document.appendChild(dtd); 131 } 132 133 /** 134 * Returns the document. 135 * 136 * @return the document 137 */ 138 Document getDocument() { 139 Document rv = document; 140 document = null; 141 return rv; 142 } 143 144 DocumentFragment getDocumentFragment() { 145 DocumentFragment rv = new DocumentFragment(); 146 rv.appendChildren(document.getFirstChild()); 147 document = null; 148 return rv; 149 } 150 151 /** 152 * @throws SAXException 153 * @see nu.validator.htmlparser.impl.TreeBuilder#end() 154 */ 155 @Override 156 protected void end() throws SAXException { 157 document.setEndLocator(tokenizer); 158 } 159 160 /** 161 * @see nu.validator.htmlparser.impl.TreeBuilder#start() 162 */ 163 @Override 164 protected void start(boolean fragment) { 165 document = new Document(tokenizer); 166 } 167 168 @Override 169 protected void appendChildrenToNewParent(Element oldParent, Element newParent) throws SAXException { 170 newParent.appendChildren(oldParent); 171 } 172 173 @Override 174 protected Element createElement(String name, Attributes attributes) throws SAXException { 175 return new Element(tokenizer, "http://www.w3.org/1999/xhtml", name, name, attributes, true, null); 176 } 177 178 @Override 179 protected void insertCharactersBefore(char[] buf, int start, int length, Element sibling, Element parent) throws SAXException { 180 parent.insertBefore(new Characters(tokenizer, buf, start, length), sibling); 181 } 182 183 }