001 /* 002 * Copyright (c) 2007 Mozilla Foundation 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package nu.validator.source; 024 025 import java.util.ArrayList; 026 import java.util.Collections; 027 import java.util.Iterator; 028 import java.util.LinkedList; 029 import java.util.List; 030 import java.util.SortedSet; 031 import java.util.TreeSet; 032 033 import nu.validator.htmlparser.impl.CharacterHandler; 034 import nu.validator.xml.TypedInputSource; 035 036 import org.apache.log4j.Logger; 037 import org.xml.sax.ContentHandler; 038 import org.xml.sax.InputSource; 039 import org.xml.sax.SAXException; 040 041 public final class SourceCode implements CharacterHandler { 042 private static final Logger log4j = Logger.getLogger(SourceCode.class); 043 044 private static Location[] SOURCE_LOCATION_ARRAY_TYPE = new Location[0]; 045 046 private String uri; 047 048 private String type; 049 050 private String encoding; 051 052 private int expectedLength; 053 054 private final SortedSet<Location> reverseSortedLocations = new TreeSet<Location>(Collections.reverseOrder()); 055 056 private final SortedSet<Location> exactErrors = new TreeSet<Location>(); 057 058 private final SortedSet<Location> rangeLasts = new TreeSet<Location>(); 059 060 private final SortedSet<Integer> oneBasedLineErrors = new TreeSet<Integer>(); 061 062 private final List<Line> lines = new ArrayList<Line>(); 063 064 private Line currentLine = null; 065 066 private boolean prevWasCr = false; 067 068 private final LocationRecorder locationRecorder; 069 070 public SourceCode() { 071 this.locationRecorder = new LocationRecorder(this); 072 } 073 074 public void initialize(InputSource inputSource) { 075 this.uri = inputSource.getSystemId(); 076 this.encoding = inputSource.getEncoding(); 077 if (inputSource instanceof TypedInputSource) { 078 TypedInputSource typedInputSource = (TypedInputSource) inputSource; 079 int length = typedInputSource.getLength(); 080 if (length == -1) { 081 expectedLength = 2048; 082 } else { 083 expectedLength = length; 084 } 085 this.type = typedInputSource.getType(); 086 } else { 087 expectedLength = 2048; 088 this.type = null; 089 } 090 } 091 092 public void characters(char[] ch, int start, int length) 093 throws SAXException { 094 int s = start; 095 int end = start + length; 096 for (int i = start; i < end; i++) { 097 char c = ch[i]; 098 switch (c) { 099 case '\r': 100 if (s < i) { 101 currentLine.characters(ch, s, i - s); 102 } 103 newLine(); 104 s = i + 1; 105 prevWasCr = true; 106 break; 107 case '\n': 108 if (!prevWasCr) { 109 if (s < i) { 110 currentLine.characters(ch, s, i - s); 111 } 112 newLine(); 113 } 114 s = i + 1; 115 prevWasCr = false; 116 break; 117 default: 118 prevWasCr = false; 119 break; 120 } 121 } 122 if (s < end) { 123 currentLine.characters(ch, s, end - s); 124 } 125 } 126 127 private void newLine() { 128 int offset; 129 char[] buffer; 130 if (currentLine == null) { 131 offset = 0; 132 buffer = new char[expectedLength]; 133 } else { 134 offset = currentLine.getOffset() + currentLine.getBufferLength(); 135 buffer = currentLine.getBuffer(); 136 } 137 currentLine = new Line(buffer, offset); 138 lines.add(currentLine); 139 } 140 141 public void end() throws SAXException { 142 if (currentLine != null && currentLine.getBufferLength() == 0) { 143 // Theoretical impurity with line separators vs. terminators 144 lines.remove(lines.size() - 1); 145 currentLine = null; 146 } 147 } 148 149 public void start() throws SAXException { 150 reverseSortedLocations.clear(); 151 lines.clear(); 152 currentLine = null; 153 newLine(); 154 prevWasCr = false; 155 } 156 157 void addLocatorLocation(int oneBasedLine, int oneBasedColumn) { 158 log4j.debug(oneBasedLine + ", " + oneBasedColumn); 159 reverseSortedLocations.add(new Location(this, oneBasedLine - 1, 160 oneBasedColumn - 1)); 161 } 162 163 public void exactError(Location location, SourceHandler extractHandler) 164 throws SAXException { 165 exactErrors.add(location); 166 Location start = location.step(-15); 167 Location end = location.step(6); 168 extractHandler.startSource(type, encoding); 169 emitContent(start, location, extractHandler); 170 extractHandler.startCharHilite(location.getLine() + 1, 171 location.getColumn() + 1); 172 emitCharacter(location, extractHandler); 173 extractHandler.endCharHilite(); 174 location = location.next(); 175 emitContent(location, end, extractHandler); 176 extractHandler.endSource(); 177 } 178 179 public void rememberExactError(Location location) { 180 if (location.getColumn() < 0 || location.getLine() < 0) { 181 return; 182 } 183 exactErrors.add(location); 184 } 185 186 public void rangeEndError(Location rangeStart, Location rangeLast, 187 SourceHandler extractHandler) throws SAXException { 188 reverseSortedLocations.add(rangeLast); 189 rangeLasts.add(rangeLast); 190 Location endRange = rangeLast.next(); 191 Location start = rangeStart.step(-10); 192 Location end = endRange.step(6); 193 extractHandler.startSource(type, encoding); 194 emitContent(start, rangeStart, extractHandler); 195 extractHandler.startRange(rangeLast.getLine() + 1, 196 rangeLast.getColumn() + 1); 197 emitContent(rangeStart, endRange, extractHandler); 198 extractHandler.endRange(); 199 emitContent(endRange, end, extractHandler); 200 extractHandler.endSource(); 201 } 202 203 /** 204 * @param rangeLast 205 * @return 206 */ 207 public Location rangeStartForRangeLast(Location rangeLast) { 208 for (Location loc : reverseSortedLocations) { 209 if (loc.compareTo(rangeLast) < 0) { 210 return loc.next(); 211 } 212 } 213 return new Location(this, 0, 0); 214 } 215 216 @SuppressWarnings("boxing") 217 public void lineError(int oneBasedLine, SourceHandler extractHandler) 218 throws SAXException { 219 oneBasedLineErrors.add(oneBasedLine); 220 Line line = lines.get(oneBasedLine - 1); 221 extractHandler.startSource(type, encoding); 222 extractHandler.characters(line.getBuffer(), line.getOffset(), 223 line.getBufferLength()); 224 extractHandler.endSource(); 225 } 226 227 public boolean isWithinKnownSource(Location location) { 228 if (location.getLine() >= lines.size()) { 229 return false; 230 } 231 Line line = lines.get(location.getLine()); 232 if (line.getBufferLength() >= location.getColumn()) { 233 return true; 234 } else { 235 return false; 236 } 237 } 238 239 public boolean isWithinKnownSource(int oneBasedLine) { 240 return !(oneBasedLine > lines.size()); 241 } 242 243 Line getLine(int line) { 244 return lines.get(line); 245 } 246 247 int getNumberOfLines() { 248 return lines.size(); 249 } 250 251 void emitCharacter(Location location, SourceHandler handler) 252 throws SAXException { 253 Line line = getLine(location.getLine()); 254 int col = location.getColumn(); 255 if (col == line.getBufferLength()) { 256 handler.newLine(); 257 } else { 258 handler.characters(line.getBuffer(), line.getOffset() + col, 1); 259 } 260 } 261 262 /** 263 * Emits content between from a location (inclusive) until a location 264 * (exclusive). 265 * 266 * @param from 267 * @param until 268 * @param handler 269 * @throws SAXException 270 */ 271 void emitContent(Location from, Location until, SourceHandler handler) 272 throws SAXException { 273 if (from.compareTo(until) >= 0) { 274 return; 275 } 276 int fromLine = from.getLine(); 277 int untilLine = until.getLine(); 278 Line line = getLine(fromLine); 279 if (fromLine == untilLine) { 280 handler.characters(line.getBuffer(), line.getOffset() 281 + from.getColumn(), until.getColumn() - from.getColumn()); 282 } else { 283 // first line 284 int length = line.getBufferLength() - from.getColumn(); 285 if (length > 0) { 286 handler.characters(line.getBuffer(), line.getOffset() 287 + from.getColumn(), length); 288 } 289 if (fromLine + 1 != lines.size()) { 290 handler.newLine(); 291 } 292 // lines in between 293 int wholeLine = fromLine + 1; 294 while (wholeLine < untilLine) { 295 line = getLine(wholeLine); 296 handler.characters(line.getBuffer(), line.getOffset(), 297 line.getBufferLength()); 298 wholeLine++; 299 if (wholeLine != lines.size()) { 300 handler.newLine(); 301 } 302 } 303 // last line 304 int untilCol = until.getColumn(); 305 if (untilCol > 0) { 306 line = getLine(untilLine); 307 handler.characters(line.getBuffer(), line.getOffset(), untilCol); 308 } 309 } 310 } 311 312 public void emitSource(SourceHandler handler) throws SAXException { 313 List<Range> ranges = new LinkedList<Range>(); 314 Location[] locations = reverseSortedLocations.toArray(SOURCE_LOCATION_ARRAY_TYPE); 315 int i = locations.length - 1; 316 for (Location loc : rangeLasts) { 317 while (i >= 0 && locations[i].compareTo(loc) < 0) { 318 i--; 319 } 320 Location start; 321 if (i == locations.length - 1) { 322 start = loc.next(); 323 } else { 324 start = locations[i + 1].next(); 325 } 326 Location end = loc.next(); 327 ranges.add(new Range(start, end, loc)); 328 } 329 try { 330 handler.startSource(type, encoding); 331 handler.setLineErrors(oneBasedLineErrors); 332 Iterator<Range> rangeIter = ranges.iterator(); 333 Iterator<Location> exactIter = exactErrors.iterator(); 334 Location previousLocation = new Location(this, 0, 0); 335 Location exact = null; 336 Location rangeStart = null; 337 Location rangeEnd = null; 338 Location rangeLoc = null; 339 if (exactIter.hasNext()) { 340 exact = exactIter.next(); 341 } 342 if (rangeIter.hasNext()) { 343 Range r = rangeIter.next(); 344 rangeStart = r.getStart(); 345 rangeEnd = r.getEnd(); 346 rangeLoc = r.getLoc(); 347 } 348 while (exact != null || rangeEnd != null) { 349 if (exact != null 350 && (rangeStart == null || exact.compareTo(rangeStart) < 0) 351 && (rangeEnd == null || exact.compareTo(rangeEnd) < 0)) { // exact 352 // first? 353 emitContent(previousLocation, exact, handler); 354 handler.startCharHilite(exact.getLine() + 1, 355 exact.getColumn() + 1); 356 emitCharacter(exact, handler); 357 handler.endCharHilite(); 358 previousLocation = exact.next(); 359 360 if (exactIter.hasNext()) { 361 exact = exactIter.next(); 362 } else { 363 exact = null; 364 } 365 } else if (rangeStart != null) { // range start first? 366 emitContent(previousLocation, rangeStart, handler); 367 handler.startRange(rangeLoc.getLine() + 1, 368 rangeLoc.getColumn() + 1); 369 previousLocation = rangeStart; 370 rangeStart = null; 371 } else { // range end first? 372 emitContent(previousLocation, rangeEnd, handler); 373 handler.endRange(); 374 previousLocation = rangeEnd; 375 rangeEnd = null; 376 377 if (rangeIter.hasNext()) { 378 Range r = rangeIter.next(); 379 rangeStart = r.getStart(); 380 rangeEnd = r.getEnd(); 381 rangeLoc = r.getLoc(); 382 } else { 383 rangeEnd = null; 384 } 385 } 386 } 387 emitContent(previousLocation, new Location(this, lines.size(), 0), 388 handler); 389 } finally { 390 handler.endSource(); 391 } 392 } 393 394 /** 395 * Returns the uri. 396 * 397 * @return the uri 398 */ 399 public String getUri() { 400 return uri; 401 } 402 403 /** 404 * Returns the locationRecorder. The returned object is guaranteed to also 405 * implement <code>LexicalHandler</code>. 406 * 407 * @return the locationRecorder 408 */ 409 public ContentHandler getLocationRecorder() { 410 return locationRecorder; 411 } 412 413 public Location newLocatorLocation(int oneBasedLine, int oneBasedColumn) { 414 return new Location(this, oneBasedLine - 1, oneBasedColumn - 1); 415 } 416 }