001    /*
002     * Copyright (c) 2007 Mozilla Foundation
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a 
005     * copy of this software and associated documentation files (the "Software"), 
006     * to deal in the Software without restriction, including without limitation 
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
008     * and/or sell copies of the Software, and to permit persons to whom the 
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in 
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
020     * DEALINGS IN THE SOFTWARE.
021     */
022    
023    package nu.validator.source;
024    
025    import java.util.ArrayList;
026    import java.util.Collections;
027    import java.util.Iterator;
028    import java.util.LinkedList;
029    import java.util.List;
030    import java.util.SortedSet;
031    import java.util.TreeSet;
032    
033    import nu.validator.htmlparser.impl.CharacterHandler;
034    import nu.validator.xml.TypedInputSource;
035    
036    import org.apache.log4j.Logger;
037    import org.xml.sax.ContentHandler;
038    import org.xml.sax.InputSource;
039    import org.xml.sax.SAXException;
040    
041    public final class SourceCode implements CharacterHandler {
042        private static final Logger log4j = Logger.getLogger(SourceCode.class);
043    
044        private static Location[] SOURCE_LOCATION_ARRAY_TYPE = new Location[0];
045    
046        private String uri;
047        
048        private String type;
049        
050        private String encoding;
051    
052        private int expectedLength;
053    
054        private final SortedSet<Location> reverseSortedLocations = new TreeSet<Location>(Collections.reverseOrder());
055    
056        private final SortedSet<Location> exactErrors = new TreeSet<Location>();
057    
058        private final SortedSet<Location> rangeLasts = new TreeSet<Location>();
059        
060        private final SortedSet<Integer> oneBasedLineErrors = new TreeSet<Integer>();
061    
062        private final List<Line> lines = new ArrayList<Line>();
063    
064        private Line currentLine = null;
065    
066        private boolean prevWasCr = false;
067    
068        private final LocationRecorder locationRecorder;
069    
070        public SourceCode() {
071            this.locationRecorder = new LocationRecorder(this);
072        }
073    
074        public void initialize(InputSource inputSource) {
075            this.uri = inputSource.getSystemId();
076            this.encoding = inputSource.getEncoding();
077            if (inputSource instanceof TypedInputSource) {
078                TypedInputSource typedInputSource = (TypedInputSource) inputSource;
079                int length = typedInputSource.getLength();
080                if (length == -1) {
081                    expectedLength = 2048;
082                } else {
083                    expectedLength = length;
084                }
085                this.type = typedInputSource.getType();
086            } else {
087                expectedLength = 2048;
088                this.type = null;
089            }
090        }
091    
092        public void characters(char[] ch, int start, int length)
093                throws SAXException {
094            int s = start;
095            int end = start + length;
096            for (int i = start; i < end; i++) {
097                char c = ch[i];
098                switch (c) {
099                    case '\r':
100                        if (s < i) {
101                            currentLine.characters(ch, s, i - s);
102                        }
103                        newLine();
104                        s = i + 1;
105                        prevWasCr = true;
106                        break;
107                    case '\n':
108                        if (!prevWasCr) {
109                            if (s < i) {
110                                currentLine.characters(ch, s, i - s);
111                            }
112                            newLine();
113                        }
114                        s = i + 1;
115                        prevWasCr = false;
116                        break;
117                    default:
118                        prevWasCr = false;
119                        break;
120                }
121            }
122            if (s < end) {
123                currentLine.characters(ch, s, end - s);
124            }
125        }
126    
127        private void newLine() {
128            int offset;
129            char[] buffer;
130            if (currentLine == null) {
131                offset = 0;
132                buffer = new char[expectedLength];
133            } else {
134                offset = currentLine.getOffset() + currentLine.getBufferLength();
135                buffer = currentLine.getBuffer();
136            }
137            currentLine = new Line(buffer, offset);
138            lines.add(currentLine);
139        }
140    
141        public void end() throws SAXException {
142            if (currentLine != null && currentLine.getBufferLength() == 0) {
143                // Theoretical impurity with line separators vs. terminators
144                lines.remove(lines.size() - 1);
145                currentLine = null;
146            }
147        }
148    
149        public void start() throws SAXException {
150            reverseSortedLocations.clear();
151            lines.clear();
152            currentLine = null;
153            newLine();
154            prevWasCr = false;
155        }
156    
157        void addLocatorLocation(int oneBasedLine, int oneBasedColumn) {
158            log4j.debug(oneBasedLine + ", " + oneBasedColumn);
159            reverseSortedLocations.add(new Location(this, oneBasedLine - 1,
160                    oneBasedColumn - 1));
161        }
162    
163        public void exactError(Location location, SourceHandler extractHandler)
164                throws SAXException {
165            exactErrors.add(location);
166            Location start = location.step(-15);
167            Location end = location.step(6);
168            extractHandler.startSource(type, encoding);
169            emitContent(start, location, extractHandler);
170            extractHandler.startCharHilite(location.getLine() + 1,
171                    location.getColumn() + 1);
172            emitCharacter(location, extractHandler);
173            extractHandler.endCharHilite();
174            location = location.next();
175            emitContent(location, end, extractHandler);
176            extractHandler.endSource();
177        }
178        
179        public void rememberExactError(Location location) {
180            if (location.getColumn() < 0 || location.getLine() < 0) {
181                return;
182            }
183            exactErrors.add(location);        
184        }
185    
186        public void rangeEndError(Location rangeStart, Location rangeLast,
187                SourceHandler extractHandler) throws SAXException {
188            reverseSortedLocations.add(rangeLast);
189            rangeLasts.add(rangeLast);
190            Location endRange = rangeLast.next();
191            Location start = rangeStart.step(-10);
192            Location end = endRange.step(6);
193            extractHandler.startSource(type, encoding);
194            emitContent(start, rangeStart, extractHandler);
195            extractHandler.startRange(rangeLast.getLine() + 1,
196                    rangeLast.getColumn() + 1);
197            emitContent(rangeStart, endRange, extractHandler);
198            extractHandler.endRange();
199            emitContent(endRange, end, extractHandler);
200            extractHandler.endSource();
201        }
202    
203        /**
204         * @param rangeLast
205         * @return
206         */
207        public Location rangeStartForRangeLast(Location rangeLast) {
208            for (Location loc : reverseSortedLocations) {
209                if (loc.compareTo(rangeLast) < 0) {
210                    return loc.next();
211                }
212            }
213            return new Location(this, 0, 0);
214        }
215    
216        @SuppressWarnings("boxing")
217        public void lineError(int oneBasedLine, SourceHandler extractHandler)
218                throws SAXException {
219            oneBasedLineErrors.add(oneBasedLine);
220            Line line = lines.get(oneBasedLine - 1);
221            extractHandler.startSource(type, encoding);
222            extractHandler.characters(line.getBuffer(), line.getOffset(),
223                    line.getBufferLength());
224            extractHandler.endSource();
225        }
226    
227        public boolean isWithinKnownSource(Location location) {
228            if (location.getLine() >= lines.size()) {
229                return false;
230            }
231            Line line = lines.get(location.getLine());
232            if (line.getBufferLength() >= location.getColumn()) {
233                return true;
234            } else {
235                return false;
236            }
237        }
238    
239        public boolean isWithinKnownSource(int oneBasedLine) {
240            return !(oneBasedLine > lines.size());
241        }
242    
243        Line getLine(int line) {
244            return lines.get(line);
245        }
246    
247        int getNumberOfLines() {
248            return lines.size();
249        }
250    
251        void emitCharacter(Location location, SourceHandler handler)
252                throws SAXException {
253            Line line = getLine(location.getLine());
254            int col = location.getColumn();
255            if (col == line.getBufferLength()) {
256                handler.newLine();
257            } else {
258                handler.characters(line.getBuffer(), line.getOffset() + col, 1);
259            }
260        }
261    
262        /**
263         * Emits content between from a location (inclusive) until a location
264         * (exclusive).
265         * 
266         * @param from
267         * @param until
268         * @param handler
269         * @throws SAXException
270         */
271        void emitContent(Location from, Location until, SourceHandler handler)
272                throws SAXException {
273            if (from.compareTo(until) >= 0) {
274                return;
275            }
276            int fromLine = from.getLine();
277            int untilLine = until.getLine();
278            Line line = getLine(fromLine);
279            if (fromLine == untilLine) {
280                handler.characters(line.getBuffer(), line.getOffset()
281                        + from.getColumn(), until.getColumn() - from.getColumn());
282            } else {
283                // first line
284                int length = line.getBufferLength() - from.getColumn();
285                if (length > 0) {
286                    handler.characters(line.getBuffer(), line.getOffset()
287                            + from.getColumn(), length);
288                }
289                if (fromLine + 1 != lines.size()) {
290                    handler.newLine();
291                }
292                // lines in between
293                int wholeLine = fromLine + 1;
294                while (wholeLine < untilLine) {
295                    line = getLine(wholeLine);
296                    handler.characters(line.getBuffer(), line.getOffset(),
297                            line.getBufferLength());
298                    wholeLine++;
299                    if (wholeLine != lines.size()) {
300                        handler.newLine();
301                    }
302                }
303                // last line
304                int untilCol = until.getColumn();
305                if (untilCol > 0) {
306                    line = getLine(untilLine);
307                    handler.characters(line.getBuffer(), line.getOffset(), untilCol);
308                }
309            }
310        }
311    
312        public void emitSource(SourceHandler handler) throws SAXException {
313            List<Range> ranges = new LinkedList<Range>();
314            Location[] locations = reverseSortedLocations.toArray(SOURCE_LOCATION_ARRAY_TYPE);
315            int i = locations.length - 1;
316            for (Location loc : rangeLasts) {
317                while (i >= 0 && locations[i].compareTo(loc) < 0) {
318                    i--;
319                }
320                Location start;
321                if (i == locations.length - 1) {
322                    start = loc.next();
323                } else {
324                    start = locations[i + 1].next();                
325                }
326                Location end = loc.next();
327                ranges.add(new Range(start, end, loc));
328            }
329            try {
330                handler.startSource(type, encoding);
331                handler.setLineErrors(oneBasedLineErrors);
332                Iterator<Range> rangeIter = ranges.iterator();
333                Iterator<Location> exactIter = exactErrors.iterator();
334                Location previousLocation = new Location(this, 0, 0);
335                Location exact = null;
336                Location rangeStart = null;
337                Location rangeEnd = null;
338                Location rangeLoc = null;
339                if (exactIter.hasNext()) {
340                    exact = exactIter.next();
341                }
342                if (rangeIter.hasNext()) {
343                    Range r = rangeIter.next();
344                    rangeStart = r.getStart();
345                    rangeEnd = r.getEnd();
346                    rangeLoc = r.getLoc();
347                }
348                while (exact != null || rangeEnd != null) {
349                    if (exact != null
350                            && (rangeStart == null || exact.compareTo(rangeStart) < 0)
351                            && (rangeEnd == null || exact.compareTo(rangeEnd) < 0)) { // exact
352                                                                                        // first?
353                        emitContent(previousLocation, exact, handler);
354                        handler.startCharHilite(exact.getLine() + 1,
355                                exact.getColumn() + 1);
356                        emitCharacter(exact, handler);
357                        handler.endCharHilite();
358                        previousLocation = exact.next();
359    
360                        if (exactIter.hasNext()) {
361                            exact = exactIter.next();
362                        } else {
363                            exact = null;
364                        }
365                    } else if (rangeStart != null) { // range start first?
366                        emitContent(previousLocation, rangeStart, handler);
367                        handler.startRange(rangeLoc.getLine() + 1,
368                                rangeLoc.getColumn() + 1);
369                        previousLocation = rangeStart;
370                        rangeStart = null;
371                    } else { // range end first?
372                        emitContent(previousLocation, rangeEnd, handler);
373                        handler.endRange();
374                        previousLocation = rangeEnd;
375                        rangeEnd = null;
376    
377                        if (rangeIter.hasNext()) {
378                            Range r = rangeIter.next();
379                            rangeStart = r.getStart();
380                            rangeEnd = r.getEnd();
381                            rangeLoc = r.getLoc();
382                        } else {
383                            rangeEnd = null;
384                        }
385                    }
386                }
387                emitContent(previousLocation, new Location(this, lines.size(), 0),
388                        handler);
389            } finally {
390                handler.endSource();
391            }
392        }
393    
394        /**
395         * Returns the uri.
396         * 
397         * @return the uri
398         */
399        public String getUri() {
400            return uri;
401        }
402    
403        /**
404         * Returns the locationRecorder. The returned object is guaranteed to also
405         * implement <code>LexicalHandler</code>.
406         * 
407         * @return the locationRecorder
408         */
409        public ContentHandler getLocationRecorder() {
410            return locationRecorder;
411        }
412    
413        public Location newLocatorLocation(int oneBasedLine, int oneBasedColumn) {
414            return new Location(this, oneBasedLine - 1, oneBasedColumn - 1);
415        }
416    }