001 /*
002 * Copyright (c) 2007 Mozilla Foundation
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package nu.validator.source;
024
025 import java.util.ArrayList;
026 import java.util.Collections;
027 import java.util.Iterator;
028 import java.util.LinkedList;
029 import java.util.List;
030 import java.util.SortedSet;
031 import java.util.TreeSet;
032
033 import nu.validator.htmlparser.impl.CharacterHandler;
034 import nu.validator.xml.TypedInputSource;
035
036 import org.apache.log4j.Logger;
037 import org.xml.sax.ContentHandler;
038 import org.xml.sax.InputSource;
039 import org.xml.sax.SAXException;
040
041 public final class SourceCode implements CharacterHandler {
042 private static final Logger log4j = Logger.getLogger(SourceCode.class);
043
044 private static Location[] SOURCE_LOCATION_ARRAY_TYPE = new Location[0];
045
046 private String uri;
047
048 private String type;
049
050 private String encoding;
051
052 private int expectedLength;
053
054 private final SortedSet<Location> reverseSortedLocations = new TreeSet<Location>(Collections.reverseOrder());
055
056 private final SortedSet<Location> exactErrors = new TreeSet<Location>();
057
058 private final SortedSet<Location> rangeLasts = new TreeSet<Location>();
059
060 private final SortedSet<Integer> oneBasedLineErrors = new TreeSet<Integer>();
061
062 private final List<Line> lines = new ArrayList<Line>();
063
064 private Line currentLine = null;
065
066 private boolean prevWasCr = false;
067
068 private final LocationRecorder locationRecorder;
069
070 public SourceCode() {
071 this.locationRecorder = new LocationRecorder(this);
072 }
073
074 public void initialize(InputSource inputSource) {
075 this.uri = inputSource.getSystemId();
076 this.encoding = inputSource.getEncoding();
077 if (inputSource instanceof TypedInputSource) {
078 TypedInputSource typedInputSource = (TypedInputSource) inputSource;
079 int length = typedInputSource.getLength();
080 if (length == -1) {
081 expectedLength = 2048;
082 } else {
083 expectedLength = length;
084 }
085 this.type = typedInputSource.getType();
086 } else {
087 expectedLength = 2048;
088 this.type = null;
089 }
090 }
091
092 public void characters(char[] ch, int start, int length)
093 throws SAXException {
094 int s = start;
095 int end = start + length;
096 for (int i = start; i < end; i++) {
097 char c = ch[i];
098 switch (c) {
099 case '\r':
100 if (s < i) {
101 currentLine.characters(ch, s, i - s);
102 }
103 newLine();
104 s = i + 1;
105 prevWasCr = true;
106 break;
107 case '\n':
108 if (!prevWasCr) {
109 if (s < i) {
110 currentLine.characters(ch, s, i - s);
111 }
112 newLine();
113 }
114 s = i + 1;
115 prevWasCr = false;
116 break;
117 default:
118 prevWasCr = false;
119 break;
120 }
121 }
122 if (s < end) {
123 currentLine.characters(ch, s, end - s);
124 }
125 }
126
127 private void newLine() {
128 int offset;
129 char[] buffer;
130 if (currentLine == null) {
131 offset = 0;
132 buffer = new char[expectedLength];
133 } else {
134 offset = currentLine.getOffset() + currentLine.getBufferLength();
135 buffer = currentLine.getBuffer();
136 }
137 currentLine = new Line(buffer, offset);
138 lines.add(currentLine);
139 }
140
141 public void end() throws SAXException {
142 if (currentLine != null && currentLine.getBufferLength() == 0) {
143 // Theoretical impurity with line separators vs. terminators
144 lines.remove(lines.size() - 1);
145 currentLine = null;
146 }
147 }
148
149 public void start() throws SAXException {
150 reverseSortedLocations.clear();
151 lines.clear();
152 currentLine = null;
153 newLine();
154 prevWasCr = false;
155 }
156
157 void addLocatorLocation(int oneBasedLine, int oneBasedColumn) {
158 log4j.debug(oneBasedLine + ", " + oneBasedColumn);
159 reverseSortedLocations.add(new Location(this, oneBasedLine - 1,
160 oneBasedColumn - 1));
161 }
162
163 public void exactError(Location location, SourceHandler extractHandler)
164 throws SAXException {
165 exactErrors.add(location);
166 Location start = location.step(-15);
167 Location end = location.step(6);
168 extractHandler.startSource(type, encoding);
169 emitContent(start, location, extractHandler);
170 extractHandler.startCharHilite(location.getLine() + 1,
171 location.getColumn() + 1);
172 emitCharacter(location, extractHandler);
173 extractHandler.endCharHilite();
174 location = location.next();
175 emitContent(location, end, extractHandler);
176 extractHandler.endSource();
177 }
178
179 public void rememberExactError(Location location) {
180 if (location.getColumn() < 0 || location.getLine() < 0) {
181 return;
182 }
183 exactErrors.add(location);
184 }
185
186 public void rangeEndError(Location rangeStart, Location rangeLast,
187 SourceHandler extractHandler) throws SAXException {
188 reverseSortedLocations.add(rangeLast);
189 rangeLasts.add(rangeLast);
190 Location endRange = rangeLast.next();
191 Location start = rangeStart.step(-10);
192 Location end = endRange.step(6);
193 extractHandler.startSource(type, encoding);
194 emitContent(start, rangeStart, extractHandler);
195 extractHandler.startRange(rangeLast.getLine() + 1,
196 rangeLast.getColumn() + 1);
197 emitContent(rangeStart, endRange, extractHandler);
198 extractHandler.endRange();
199 emitContent(endRange, end, extractHandler);
200 extractHandler.endSource();
201 }
202
203 /**
204 * @param rangeLast
205 * @return
206 */
207 public Location rangeStartForRangeLast(Location rangeLast) {
208 for (Location loc : reverseSortedLocations) {
209 if (loc.compareTo(rangeLast) < 0) {
210 return loc.next();
211 }
212 }
213 return new Location(this, 0, 0);
214 }
215
216 @SuppressWarnings("boxing")
217 public void lineError(int oneBasedLine, SourceHandler extractHandler)
218 throws SAXException {
219 oneBasedLineErrors.add(oneBasedLine);
220 Line line = lines.get(oneBasedLine - 1);
221 extractHandler.startSource(type, encoding);
222 extractHandler.characters(line.getBuffer(), line.getOffset(),
223 line.getBufferLength());
224 extractHandler.endSource();
225 }
226
227 public boolean isWithinKnownSource(Location location) {
228 if (location.getLine() >= lines.size()) {
229 return false;
230 }
231 Line line = lines.get(location.getLine());
232 if (line.getBufferLength() >= location.getColumn()) {
233 return true;
234 } else {
235 return false;
236 }
237 }
238
239 public boolean isWithinKnownSource(int oneBasedLine) {
240 return !(oneBasedLine > lines.size());
241 }
242
243 Line getLine(int line) {
244 return lines.get(line);
245 }
246
247 int getNumberOfLines() {
248 return lines.size();
249 }
250
251 void emitCharacter(Location location, SourceHandler handler)
252 throws SAXException {
253 Line line = getLine(location.getLine());
254 int col = location.getColumn();
255 if (col == line.getBufferLength()) {
256 handler.newLine();
257 } else {
258 handler.characters(line.getBuffer(), line.getOffset() + col, 1);
259 }
260 }
261
262 /**
263 * Emits content between from a location (inclusive) until a location
264 * (exclusive).
265 *
266 * @param from
267 * @param until
268 * @param handler
269 * @throws SAXException
270 */
271 void emitContent(Location from, Location until, SourceHandler handler)
272 throws SAXException {
273 if (from.compareTo(until) >= 0) {
274 return;
275 }
276 int fromLine = from.getLine();
277 int untilLine = until.getLine();
278 Line line = getLine(fromLine);
279 if (fromLine == untilLine) {
280 handler.characters(line.getBuffer(), line.getOffset()
281 + from.getColumn(), until.getColumn() - from.getColumn());
282 } else {
283 // first line
284 int length = line.getBufferLength() - from.getColumn();
285 if (length > 0) {
286 handler.characters(line.getBuffer(), line.getOffset()
287 + from.getColumn(), length);
288 }
289 if (fromLine + 1 != lines.size()) {
290 handler.newLine();
291 }
292 // lines in between
293 int wholeLine = fromLine + 1;
294 while (wholeLine < untilLine) {
295 line = getLine(wholeLine);
296 handler.characters(line.getBuffer(), line.getOffset(),
297 line.getBufferLength());
298 wholeLine++;
299 if (wholeLine != lines.size()) {
300 handler.newLine();
301 }
302 }
303 // last line
304 int untilCol = until.getColumn();
305 if (untilCol > 0) {
306 line = getLine(untilLine);
307 handler.characters(line.getBuffer(), line.getOffset(), untilCol);
308 }
309 }
310 }
311
312 public void emitSource(SourceHandler handler) throws SAXException {
313 List<Range> ranges = new LinkedList<Range>();
314 Location[] locations = reverseSortedLocations.toArray(SOURCE_LOCATION_ARRAY_TYPE);
315 int i = locations.length - 1;
316 for (Location loc : rangeLasts) {
317 while (i >= 0 && locations[i].compareTo(loc) < 0) {
318 i--;
319 }
320 Location start;
321 if (i == locations.length - 1) {
322 start = loc.next();
323 } else {
324 start = locations[i + 1].next();
325 }
326 Location end = loc.next();
327 ranges.add(new Range(start, end, loc));
328 }
329 try {
330 handler.startSource(type, encoding);
331 handler.setLineErrors(oneBasedLineErrors);
332 Iterator<Range> rangeIter = ranges.iterator();
333 Iterator<Location> exactIter = exactErrors.iterator();
334 Location previousLocation = new Location(this, 0, 0);
335 Location exact = null;
336 Location rangeStart = null;
337 Location rangeEnd = null;
338 Location rangeLoc = null;
339 if (exactIter.hasNext()) {
340 exact = exactIter.next();
341 }
342 if (rangeIter.hasNext()) {
343 Range r = rangeIter.next();
344 rangeStart = r.getStart();
345 rangeEnd = r.getEnd();
346 rangeLoc = r.getLoc();
347 }
348 while (exact != null || rangeEnd != null) {
349 if (exact != null
350 && (rangeStart == null || exact.compareTo(rangeStart) < 0)
351 && (rangeEnd == null || exact.compareTo(rangeEnd) < 0)) { // exact
352 // first?
353 emitContent(previousLocation, exact, handler);
354 handler.startCharHilite(exact.getLine() + 1,
355 exact.getColumn() + 1);
356 emitCharacter(exact, handler);
357 handler.endCharHilite();
358 previousLocation = exact.next();
359
360 if (exactIter.hasNext()) {
361 exact = exactIter.next();
362 } else {
363 exact = null;
364 }
365 } else if (rangeStart != null) { // range start first?
366 emitContent(previousLocation, rangeStart, handler);
367 handler.startRange(rangeLoc.getLine() + 1,
368 rangeLoc.getColumn() + 1);
369 previousLocation = rangeStart;
370 rangeStart = null;
371 } else { // range end first?
372 emitContent(previousLocation, rangeEnd, handler);
373 handler.endRange();
374 previousLocation = rangeEnd;
375 rangeEnd = null;
376
377 if (rangeIter.hasNext()) {
378 Range r = rangeIter.next();
379 rangeStart = r.getStart();
380 rangeEnd = r.getEnd();
381 rangeLoc = r.getLoc();
382 } else {
383 rangeEnd = null;
384 }
385 }
386 }
387 emitContent(previousLocation, new Location(this, lines.size(), 0),
388 handler);
389 } finally {
390 handler.endSource();
391 }
392 }
393
394 /**
395 * Returns the uri.
396 *
397 * @return the uri
398 */
399 public String getUri() {
400 return uri;
401 }
402
403 /**
404 * Returns the locationRecorder. The returned object is guaranteed to also
405 * implement <code>LexicalHandler</code>.
406 *
407 * @return the locationRecorder
408 */
409 public ContentHandler getLocationRecorder() {
410 return locationRecorder;
411 }
412
413 public Location newLocatorLocation(int oneBasedLine, int oneBasedColumn) {
414 return new Location(this, oneBasedLine - 1, oneBasedColumn - 1);
415 }
416 }