001    /*
002     * Copyright (c) 2007 Henri Sivonen
003     * Copyright (c) 2008-2010 Mozilla Foundation
004     *
005     * Permission is hereby granted, free of charge, to any person obtaining a 
006     * copy of this software and associated documentation files (the "Software"), 
007     * to deal in the Software without restriction, including without limitation 
008     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
009     * and/or sell copies of the Software, and to permit persons to whom the 
010     * Software is furnished to do so, subject to the following conditions:
011     *
012     * The above copyright notice and this permission notice shall be included in 
013     * all copies or substantial portions of the Software.
014     *
015     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
016     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
017     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
018     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
019     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
020     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
021     * DEALINGS IN THE SOFTWARE.
022     */
023    
024    package nu.validator.htmlparser.impl;
025    
026    import java.io.IOException;
027    
028    import nu.validator.htmlparser.annotation.Auto;
029    import nu.validator.htmlparser.annotation.Inline;
030    import nu.validator.htmlparser.common.ByteReadable;
031    
032    import org.xml.sax.SAXException;
033    
034    public abstract class MetaScanner {
035    
036        /**
037         * Constant for "charset".
038         */
039        private static final char[] CHARSET = "harset".toCharArray();
040        
041        /**
042         * Constant for "content".
043         */
044        private static final char[] CONTENT = "ontent".toCharArray();
045    
046        /**
047         * Constant for "http-equiv".
048         */
049        private static final char[] HTTP_EQUIV = "ttp-equiv".toCharArray();
050    
051        /**
052         * Constant for "content-type".
053         */
054        private static final char[] CONTENT_TYPE = "content-type".toCharArray();
055    
056        private static final int NO = 0;
057    
058        private static final int M = 1;
059        
060        private static final int E = 2;
061        
062        private static final int T = 3;
063    
064        private static final int A = 4;
065        
066        private static final int DATA = 0;
067    
068        private static final int TAG_OPEN = 1;
069    
070        private static final int SCAN_UNTIL_GT = 2;
071    
072        private static final int TAG_NAME = 3;
073    
074        private static final int BEFORE_ATTRIBUTE_NAME = 4;
075    
076        private static final int ATTRIBUTE_NAME = 5;
077    
078        private static final int AFTER_ATTRIBUTE_NAME = 6;
079    
080        private static final int BEFORE_ATTRIBUTE_VALUE = 7;
081    
082        private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
083    
084        private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
085    
086        private static final int ATTRIBUTE_VALUE_UNQUOTED = 10;
087    
088        private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
089    
090        private static final int MARKUP_DECLARATION_OPEN = 13;
091        
092        private static final int MARKUP_DECLARATION_HYPHEN = 14;
093    
094        private static final int COMMENT_START = 15;
095    
096        private static final int COMMENT_START_DASH = 16;
097    
098        private static final int COMMENT = 17;
099    
100        private static final int COMMENT_END_DASH = 18;
101    
102        private static final int COMMENT_END = 19;
103        
104        private static final int SELF_CLOSING_START_TAG = 20;
105        
106        private static final int HTTP_EQUIV_NOT_SEEN = 0;
107        
108        private static final int HTTP_EQUIV_CONTENT_TYPE = 1;
109    
110        private static final int HTTP_EQUIV_OTHER = 2;
111    
112        /**
113         * The data source.
114         */
115        protected ByteReadable readable;
116        
117        /**
118         * The state of the state machine that recognizes the tag name "meta".
119         */
120        private int metaState = NO;
121    
122        /**
123         * The current position in recognizing the attribute name "content".
124         */
125        private int contentIndex = Integer.MAX_VALUE;
126        
127        /**
128         * The current position in recognizing the attribute name "charset".
129         */
130        private int charsetIndex = Integer.MAX_VALUE;
131    
132        /**
133         * The current position in recognizing the attribute name "http-equive".
134         */
135        private int httpEquivIndex = Integer.MAX_VALUE;
136    
137        /**
138         * The current position in recognizing the attribute value "content-type".
139         */
140        private int contentTypeIndex = Integer.MAX_VALUE;
141    
142        /**
143         * The tokenizer state.
144         */
145        protected int stateSave = DATA;
146    
147        /**
148         * The currently filled length of strBuf.
149         */
150        private int strBufLen;
151    
152        /**
153         * Accumulation buffer for attribute values.
154         */
155        private @Auto char[] strBuf;
156        
157        private String content;
158        
159        private String charset;
160        
161        private int httpEquivState;
162        
163        public MetaScanner() {
164            this.readable = null;
165            this.metaState = NO;
166            this.contentIndex = Integer.MAX_VALUE;
167            this.charsetIndex = Integer.MAX_VALUE;
168            this.httpEquivIndex = Integer.MAX_VALUE;
169            this.contentTypeIndex = Integer.MAX_VALUE;
170            this.stateSave = DATA;
171            this.strBufLen = 0;
172            this.strBuf = new char[36];
173            this.content = null;
174            this.charset = null;
175            this.httpEquivState = HTTP_EQUIV_NOT_SEEN;
176        }
177        
178        @SuppressWarnings("unused") private void destructor() {
179            Portability.releaseString(content);
180            Portability.releaseString(charset);
181        }
182    
183        // [NOCPP[
184        
185        /**
186         * Reads a byte from the data source.
187         * 
188         * -1 means end.
189         * @return
190         * @throws IOException
191         */
192        protected int read() throws IOException {
193            return readable.readByte();
194        }
195    
196        // ]NOCPP]
197    
198        // WARNING When editing this, makes sure the bytecode length shown by javap
199        // stays under 8000 bytes!
200        /**
201         * The runs the meta scanning algorithm.
202         */
203        protected final void stateLoop(int state)
204                throws SAXException, IOException {
205            int c = -1;
206            boolean reconsume = false;
207            stateloop: for (;;) {
208                switch (state) {
209                    case DATA:
210                        dataloop: for (;;) {
211                            if (reconsume) {
212                                reconsume = false;
213                            } else {
214                                c = read();
215                            }
216                            switch (c) {
217                                case -1:
218                                    break stateloop;
219                                case '<':
220                                    state = MetaScanner.TAG_OPEN;
221                                    break dataloop; // FALL THROUGH continue
222                                // stateloop;
223                                default:
224                                    continue;
225                            }
226                        }
227                        // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
228                    case TAG_OPEN:
229                        tagopenloop: for (;;) {
230                            c = read();
231                            switch (c) {
232                                case -1:
233                                    break stateloop;
234                                case 'm':
235                                case 'M':
236                                    metaState = M;
237                                    state = MetaScanner.TAG_NAME;
238                                    break tagopenloop;
239                                    // continue stateloop;                                
240                                case '!':
241                                    state = MetaScanner.MARKUP_DECLARATION_OPEN;
242                                    continue stateloop;
243                                case '?':
244                                case '/':
245                                    state = MetaScanner.SCAN_UNTIL_GT;
246                                    continue stateloop;
247                                case '>':
248                                    state = MetaScanner.DATA;
249                                    continue stateloop;
250                                default:
251                                    if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
252                                        metaState = NO;
253                                        state = MetaScanner.TAG_NAME;
254                                        break tagopenloop;
255                                        // continue stateloop;
256                                    }
257                                    state = MetaScanner.DATA;
258                                    reconsume = true;
259                                    continue stateloop;
260                            }
261                        }
262                        // FALL THROUGH DON'T REORDER
263                    case TAG_NAME:
264                        tagnameloop: for (;;) {
265                            c = read();
266                            switch (c) {
267                                case -1:
268                                    break stateloop;
269                                case ' ':
270                                case '\t':
271                                case '\n':
272                                case '\u000C':
273                                    state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
274                                    break tagnameloop;
275                                // continue stateloop;
276                                case '/':
277                                    state = MetaScanner.SELF_CLOSING_START_TAG;
278                                    continue stateloop;
279                                case '>':
280                                    state = MetaScanner.DATA;
281                                    continue stateloop;
282                                case 'e':
283                                case 'E':
284                                    if (metaState == M) {
285                                        metaState = E;
286                                    } else {
287                                        metaState = NO;
288                                    }
289                                    continue;
290                                case 't':
291                                case 'T':
292                                    if (metaState == E) {
293                                        metaState = T;
294                                    } else {
295                                        metaState = NO;
296                                    }
297                                    continue;
298                                case 'a':
299                                case 'A':
300                                    if (metaState == T) {
301                                        metaState = A;
302                                    } else {
303                                        metaState = NO;
304                                    }
305                                    continue;
306                                default:
307                                    metaState = NO;
308                                    continue;
309                            }
310                        }
311                        // FALLTHRU DON'T REORDER
312                    case BEFORE_ATTRIBUTE_NAME:
313                        beforeattributenameloop: for (;;) {
314                            if (reconsume) {
315                                reconsume = false;
316                            } else {
317                                c = read();
318                            }
319                            /*
320                             * Consume the next input character:
321                             */
322                            switch (c) {
323                                case -1:
324                                    break stateloop;
325                                case ' ':
326                                case '\t':
327                                case '\n':
328                                case '\u000C':
329                                    continue;
330                                case '/':
331                                    state = MetaScanner.SELF_CLOSING_START_TAG;
332                                    continue stateloop;
333                                case '>':
334                                    if (handleTag()) {
335                                        break stateloop;
336                                    }
337                                    state = DATA;
338                                    continue stateloop;
339                                case 'c':
340                                case 'C':
341                                    contentIndex = 0;
342                                    charsetIndex = 0;
343                                    httpEquivIndex = Integer.MAX_VALUE;
344                                    contentTypeIndex = Integer.MAX_VALUE;
345                                    state = MetaScanner.ATTRIBUTE_NAME;
346                                    break beforeattributenameloop;                                
347                                case 'h':
348                                case 'H':
349                                    contentIndex = Integer.MAX_VALUE;
350                                    charsetIndex = Integer.MAX_VALUE;
351                                    httpEquivIndex = 0;
352                                    contentTypeIndex = Integer.MAX_VALUE;
353                                    state = MetaScanner.ATTRIBUTE_NAME;
354                                    break beforeattributenameloop;                                
355                                default:
356                                    contentIndex = Integer.MAX_VALUE;
357                                    charsetIndex = Integer.MAX_VALUE;
358                                    httpEquivIndex = Integer.MAX_VALUE;
359                                    contentTypeIndex = Integer.MAX_VALUE;
360                                    state = MetaScanner.ATTRIBUTE_NAME;
361                                    break beforeattributenameloop;
362                                // continue stateloop;
363                            }
364                        }
365                        // FALLTHRU DON'T REORDER
366                    case ATTRIBUTE_NAME:
367                        attributenameloop: for (;;) {
368                            c = read();
369                            switch (c) {
370                                case -1:
371                                    break stateloop;
372                                case ' ':
373                                case '\t':
374                                case '\n':
375                                case '\u000C':
376                                    state = MetaScanner.AFTER_ATTRIBUTE_NAME;
377                                    continue stateloop;
378                                case '/':
379                                    state = MetaScanner.SELF_CLOSING_START_TAG;
380                                    continue stateloop;
381                                case '=':
382                                    strBufLen = 0;
383                                    contentTypeIndex = 0;
384                                    state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
385                                    break attributenameloop;
386                                // continue stateloop;
387                                case '>':
388                                    if (handleTag()) {
389                                        break stateloop;
390                                    }
391                                    state = MetaScanner.DATA;
392                                    continue stateloop;
393                                default:
394                                    if (metaState == A) {
395                                        if (c >= 'A' && c <= 'Z') {
396                                            c += 0x20;
397                                        }
398                                        if (contentIndex < CONTENT.length && c == CONTENT[contentIndex]) {
399                                            ++contentIndex;
400                                        } else {
401                                            contentIndex = Integer.MAX_VALUE;
402                                        }
403                                        if (charsetIndex < CHARSET.length && c == CHARSET[charsetIndex]) {
404                                            ++charsetIndex;
405                                        } else {
406                                            charsetIndex = Integer.MAX_VALUE;
407                                        }
408                                        if (httpEquivIndex < HTTP_EQUIV.length && c == HTTP_EQUIV[httpEquivIndex]) {
409                                            ++httpEquivIndex;
410                                        } else {
411                                            httpEquivIndex = Integer.MAX_VALUE;
412                                        }                                    
413                                    }
414                                    continue;
415                            }
416                        }
417                        // FALLTHRU DON'T REORDER
418                    case BEFORE_ATTRIBUTE_VALUE:
419                        beforeattributevalueloop: for (;;) {
420                            c = read();
421                            switch (c) {
422                                case -1:
423                                    break stateloop;
424                                case ' ':
425                                case '\t':
426                                case '\n':
427                                case '\u000C':
428                                    continue;
429                                case '"':
430                                    state = MetaScanner.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
431                                    break beforeattributevalueloop;
432                                // continue stateloop;
433                                case '\'':
434                                    state = MetaScanner.ATTRIBUTE_VALUE_SINGLE_QUOTED;
435                                    continue stateloop;
436                                case '>':
437                                    if (handleTag()) {
438                                        break stateloop;
439                                    }
440                                    state = MetaScanner.DATA;
441                                    continue stateloop;
442                                default:
443                                    handleCharInAttributeValue(c);
444                                    state = MetaScanner.ATTRIBUTE_VALUE_UNQUOTED;
445                                    continue stateloop;
446                            }
447                        }
448                        // FALLTHRU DON'T REORDER
449                    case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
450                        attributevaluedoublequotedloop: for (;;) {
451                            if (reconsume) {
452                                reconsume = false;
453                            } else {
454                                c = read();
455                            }
456                            switch (c) {
457                                case -1:
458                                    break stateloop;
459                                case '"':
460                                    handleAttributeValue();
461                                    state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
462                                    break attributevaluedoublequotedloop;
463                                // continue stateloop;
464                                default:
465                                    handleCharInAttributeValue(c);
466                                    continue;
467                            }
468                        }
469                        // FALLTHRU DON'T REORDER
470                    case AFTER_ATTRIBUTE_VALUE_QUOTED:
471                        afterattributevaluequotedloop: for (;;) {
472                            c = read();
473                            switch (c) {
474                                case -1:
475                                    break stateloop;
476                                case ' ':
477                                case '\t':
478                                case '\n':
479                                case '\u000C':
480                                    state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
481                                    continue stateloop;
482                                case '/':
483                                    state = MetaScanner.SELF_CLOSING_START_TAG;
484                                    break afterattributevaluequotedloop;
485                                // continue stateloop;
486                                case '>':
487                                    if (handleTag()) {
488                                        break stateloop;
489                                    }
490                                    state = MetaScanner.DATA;
491                                    continue stateloop;
492                                default:
493                                    state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
494                                    reconsume = true;
495                                    continue stateloop;
496                            }
497                        }
498                        // FALLTHRU DON'T REORDER
499                    case SELF_CLOSING_START_TAG:
500                        c = read();
501                        switch (c) {
502                            case -1:
503                                break stateloop;
504                            case '>':
505                                if (handleTag()) {
506                                    break stateloop;
507                                }
508                                state = MetaScanner.DATA;
509                                continue stateloop;
510                            default:
511                                state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
512                                reconsume = true;
513                                continue stateloop;
514                        }
515                        // XXX reorder point
516                    case ATTRIBUTE_VALUE_UNQUOTED:
517                        for (;;) {
518                            if (reconsume) {
519                                reconsume = false;
520                            } else {
521                                c = read();
522                            }
523                            switch (c) {
524                                case -1:
525                                    break stateloop;
526                                case ' ':
527                                case '\t':
528                                case '\n':
529    
530                                case '\u000C':
531                                    handleAttributeValue();
532                                    state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
533                                    continue stateloop;
534                                case '>':
535                                    handleAttributeValue();
536                                    if (handleTag()) {
537                                        break stateloop;
538                                    }
539                                    state = MetaScanner.DATA;
540                                    continue stateloop;
541                                default:
542                                    handleCharInAttributeValue(c);
543                                    continue;
544                            }
545                        }
546                        // XXX reorder point
547                    case AFTER_ATTRIBUTE_NAME:
548                        for (;;) {
549                            c = read();
550                            switch (c) {
551                                case -1:
552                                    break stateloop;
553                                case ' ':
554                                case '\t':
555                                case '\n':
556                                case '\u000C':
557                                    continue;
558                                case '/':
559                                    handleAttributeValue();
560                                    state = MetaScanner.SELF_CLOSING_START_TAG;
561                                    continue stateloop;
562                                case '=':
563                                    strBufLen = 0;
564                                    contentTypeIndex = 0;
565                                    state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
566                                    continue stateloop;
567                                case '>':
568                                    handleAttributeValue();
569                                    if (handleTag()) {
570                                        break stateloop;
571                                    }
572                                    state = MetaScanner.DATA;
573                                    continue stateloop;
574                                case 'c':
575                                case 'C':
576                                    contentIndex = 0;
577                                    charsetIndex = 0;
578                                    state = MetaScanner.ATTRIBUTE_NAME;
579                                    continue stateloop;
580                                default:
581                                    contentIndex = -1;
582                                    charsetIndex = -1;
583                                    state = MetaScanner.ATTRIBUTE_NAME;
584                                    continue stateloop;
585                            }
586                        }
587                        // XXX reorder point
588                    case MARKUP_DECLARATION_OPEN:
589                        markupdeclarationopenloop: for (;;) {
590                            c = read();
591                            switch (c) {
592                                case -1:
593                                    break stateloop;
594                                case '-':
595                                    state = MetaScanner.MARKUP_DECLARATION_HYPHEN;
596                                    break markupdeclarationopenloop;
597                                // continue stateloop;
598                                default:
599                                    state = MetaScanner.SCAN_UNTIL_GT;
600                                    reconsume = true;
601                                    continue stateloop;
602                            }
603                        }
604                        // FALLTHRU DON'T REORDER
605                    case MARKUP_DECLARATION_HYPHEN:
606                        markupdeclarationhyphenloop: for (;;) {
607                            c = read();
608                            switch (c) {
609                                case -1:
610                                    break stateloop;
611                                case '-':
612                                    state = MetaScanner.COMMENT_START;
613                                    break markupdeclarationhyphenloop;
614                                // continue stateloop;
615                                default:
616                                    state = MetaScanner.SCAN_UNTIL_GT;
617                                    reconsume = true;
618                                    continue stateloop;
619                            }
620                        }
621                        // FALLTHRU DON'T REORDER
622                    case COMMENT_START:
623                        commentstartloop: for (;;) {
624                            c = read();
625                            switch (c) {
626                                case -1:
627                                    break stateloop;
628                                case '-':
629                                    state = MetaScanner.COMMENT_START_DASH;
630                                    continue stateloop;
631                                case '>':
632                                    state = MetaScanner.DATA;
633                                    continue stateloop;
634                                default:
635                                    state = MetaScanner.COMMENT;
636                                    break commentstartloop;
637                                // continue stateloop;
638                            }
639                        }
640                        // FALLTHRU DON'T REORDER
641                    case COMMENT:
642                        commentloop: for (;;) {
643                            c = read();
644                            switch (c) {
645                                case -1:
646                                    break stateloop;
647                                case '-':
648                                    state = MetaScanner.COMMENT_END_DASH;
649                                    break commentloop;
650                                // continue stateloop;
651                                default:
652                                    continue;
653                            }
654                        }
655                        // FALLTHRU DON'T REORDER
656                    case COMMENT_END_DASH:
657                        commentenddashloop: for (;;) {
658                            c = read();
659                            switch (c) {
660                                case -1:
661                                    break stateloop;
662                                case '-':
663                                    state = MetaScanner.COMMENT_END;
664                                    break commentenddashloop;
665                                // continue stateloop;
666                                default:
667                                    state = MetaScanner.COMMENT;
668                                    continue stateloop;
669                            }
670                        }
671                        // FALLTHRU DON'T REORDER
672                    case COMMENT_END:
673                        for (;;) {
674                            c = read();
675                            switch (c) {
676                                case -1:
677                                    break stateloop;
678                                case '>':
679                                    state = MetaScanner.DATA;
680                                    continue stateloop;
681                                case '-':
682                                    continue;
683                                default:
684                                    state = MetaScanner.COMMENT;
685                                    continue stateloop;
686                            }
687                        }
688                        // XXX reorder point
689                    case COMMENT_START_DASH:
690                        c = read();
691                        switch (c) {
692                            case -1:
693                                break stateloop;
694                            case '-':
695                                state = MetaScanner.COMMENT_END;
696                                continue stateloop;
697                            case '>':
698                                state = MetaScanner.DATA;
699                                continue stateloop;
700                            default:
701                                state = MetaScanner.COMMENT;
702                                continue stateloop;
703                        }
704                        // XXX reorder point
705                    case ATTRIBUTE_VALUE_SINGLE_QUOTED:
706                        for (;;) {
707                            if (reconsume) {
708                                reconsume = false;
709                            } else {
710                                c = read();
711                            }
712                            switch (c) {
713                                case -1:
714                                    break stateloop;
715                                case '\'':
716                                    handleAttributeValue();
717                                    state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
718                                    continue stateloop;
719                                default:
720                                    handleCharInAttributeValue(c);
721                                    continue;
722                            }
723                        }
724                        // XXX reorder point
725                    case SCAN_UNTIL_GT:
726                        for (;;) {
727                            if (reconsume) {
728                                reconsume = false;
729                            } else {
730                                c = read();
731                            }
732                            switch (c) {
733                                case -1:
734                                    break stateloop;
735                                case '>':
736                                    state = MetaScanner.DATA;
737                                    continue stateloop;
738                                default:
739                                    continue;
740                            }
741                        }
742                }
743            }
744            stateSave  = state;
745        }
746    
747        private void handleCharInAttributeValue(int c) {
748            if (metaState == A) {
749                if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) {
750                    addToBuffer(c);
751                } else if (httpEquivIndex == HTTP_EQUIV.length) {
752                    if (contentTypeIndex < CONTENT_TYPE.length && toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) {
753                        ++contentTypeIndex;
754                    } else {
755                        contentTypeIndex = Integer.MAX_VALUE;
756                    }
757                }
758            }
759        }
760    
761        @Inline private int toAsciiLowerCase(int c) {
762            if (c >= 'A' && c <= 'Z') {
763                return c + 0x20;
764            }
765            return c;
766        }
767    
768        /**
769         * Adds a character to the accumulation buffer.
770         * @param c the character to add
771         */
772        private void addToBuffer(int c) {
773            if (strBufLen == strBuf.length) {
774                char[] newBuf = new char[strBuf.length + (strBuf.length << 1)];
775                System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
776                strBuf = newBuf;
777            }
778            strBuf[strBufLen++] = (char)c;
779        }
780    
781        /**
782         * Attempts to extract a charset name from the accumulation buffer.
783         * @return <code>true</code> if successful
784         * @throws SAXException
785         */
786        private void handleAttributeValue() throws SAXException {
787            if (metaState != A) {
788                return;
789            }
790            if (contentIndex == CONTENT.length && content == null) {
791                content = Portability.newStringFromBuffer(strBuf, 0, strBufLen);
792                return;
793            }
794            if (charsetIndex == CHARSET.length && charset == null) {
795                charset = Portability.newStringFromBuffer(strBuf, 0, strBufLen);            
796                return;
797            }
798            if (httpEquivIndex == HTTP_EQUIV.length
799                    && httpEquivState == HTTP_EQUIV_NOT_SEEN) {
800                httpEquivState = (contentTypeIndex == CONTENT_TYPE.length) ? HTTP_EQUIV_CONTENT_TYPE
801                        : HTTP_EQUIV_OTHER;
802                return;
803            }
804        }
805    
806        private boolean handleTag() throws SAXException {
807            boolean stop = handleTagInner();
808            Portability.releaseString(content);
809            content = null;
810            Portability.releaseString(charset);
811            charset = null;
812            httpEquivState = HTTP_EQUIV_NOT_SEEN;
813            return stop;
814        }
815        
816        private boolean handleTagInner() throws SAXException {
817            if (charset != null && tryCharset(charset)) {
818                    return true;
819            }
820            if (content != null && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) {
821                String extract = TreeBuilder.extractCharsetFromContent(content);
822                if (extract == null) {
823                    return false;
824                }
825                boolean success = tryCharset(extract);
826                Portability.releaseString(extract);
827                return success;
828            }
829            return false;
830        }
831    
832        /**
833         * Tries to switch to an encoding.
834         * 
835         * @param encoding
836         * @return <code>true</code> if successful
837         * @throws SAXException
838         */
839        protected abstract boolean tryCharset(String encoding) throws SAXException;
840        
841    }