001    /*
002     * Copyright (c) 2005 Henri Sivonen
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a 
005     * copy of this software and associated documentation files (the "Software"), 
006     * to deal in the Software without restriction, including without limitation 
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
008     * and/or sell copies of the Software, and to permit persons to whom the 
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in 
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
020     * DEALINGS IN THE SOFTWARE.
021     */
022    
023    package nu.validator.htmlparser.impl;
024    
025    /**
026     * @version $Id: Entities.java 150 2007-08-16 19:21:25Z hsivonen $
027     * @author hsivonen
028     */
029    public final class Entities {
030        static final String[] NAMES = { "AElig",
031            "AElig;",
032            "AMP",
033            "AMP;",
034            "Aacute",
035            "Aacute;",
036            "Acirc",
037            "Acirc;",
038            "Agrave",
039            "Agrave;",
040            "Alpha;",
041            "Aring",
042            "Aring;",
043            "Atilde",
044            "Atilde;",
045            "Auml",
046            "Auml;",
047            "Beta;",
048            "COPY",
049            "COPY;",
050            "Ccedil",
051            "Ccedil;",
052            "Chi;",
053            "Dagger;",
054            "Delta;",
055            "ETH",
056            "ETH;",
057            "Eacute",
058            "Eacute;",
059            "Ecirc",
060            "Ecirc;",
061            "Egrave",
062            "Egrave;",
063            "Epsilon;",
064            "Eta;",
065            "Euml",
066            "Euml;",
067            "GT",
068            "GT;",
069            "Gamma;",
070            "Iacute",
071            "Iacute;",
072            "Icirc",
073            "Icirc;",
074            "Igrave",
075            "Igrave;",
076            "Iota;",
077            "Iuml",
078            "Iuml;",
079            "Kappa;",
080            "LT",
081            "LT;",
082            "Lambda;",
083            "Mu;",
084            "Ntilde",
085            "Ntilde;",
086            "Nu;",
087            "OElig;",
088            "Oacute",
089            "Oacute;",
090            "Ocirc",
091            "Ocirc;",
092            "Ograve",
093            "Ograve;",
094            "Omega;",
095            "Omicron;",
096            "Oslash",
097            "Oslash;",
098            "Otilde",
099            "Otilde;",
100            "Ouml",
101            "Ouml;",
102            "Phi;",
103            "Pi;",
104            "Prime;",
105            "Psi;",
106            "QUOT",
107            "QUOT;",
108            "REG",
109            "REG;",
110            "Rho;",
111            "Scaron;",
112            "Sigma;",
113            "THORN",
114            "THORN;",
115            "TRADE;",
116            "Tau;",
117            "Theta;",
118            "Uacute",
119            "Uacute;",
120            "Ucirc",
121            "Ucirc;",
122            "Ugrave",
123            "Ugrave;",
124            "Upsilon;",
125            "Uuml",
126            "Uuml;",
127            "Xi;",
128            "Yacute",
129            "Yacute;",
130            "Yuml;",
131            "Zeta;",
132            "aacute",
133            "aacute;",
134            "acirc",
135            "acirc;",
136            "acute",
137            "acute;",
138            "aelig",
139            "aelig;",
140            "agrave",
141            "agrave;",
142            "alefsym;",
143            "alpha;",
144            "amp",
145            "amp;",
146            "and;",
147            "ang;",
148            "apos;",
149            "aring",
150            "aring;",
151            "asymp;",
152            "atilde",
153            "atilde;",
154            "auml",
155            "auml;",
156            "bdquo;",
157            "beta;",
158            "brvbar",
159            "brvbar;",
160            "bull;",
161            "cap;",
162            "ccedil",
163            "ccedil;",
164            "cedil",
165            "cedil;",
166            "cent",
167            "cent;",
168            "chi;",
169            "circ;",
170            "clubs;",
171            "cong;",
172            "copy",
173            "copy;",
174            "crarr;",
175            "cup;",
176            "curren",
177            "curren;",
178            "dArr;",
179            "dagger;",
180            "darr;",
181            "deg",
182            "deg;",
183            "delta;",
184            "diams;",
185            "divide",
186            "divide;",
187            "eacute",
188            "eacute;",
189            "ecirc",
190            "ecirc;",
191            "egrave",
192            "egrave;",
193            "empty;",
194            "emsp;",
195            "ensp;",
196            "epsilon;",
197            "equiv;",
198            "eta;",
199            "eth",
200            "eth;",
201            "euml",
202            "euml;",
203            "euro;",
204            "exist;",
205            "fnof;",
206            "forall;",
207            "frac12",
208            "frac12;",
209            "frac14",
210            "frac14;",
211            "frac34",
212            "frac34;",
213            "frasl;",
214            "gamma;",
215            "ge;",
216            "gt",
217            "gt;",
218            "hArr;",
219            "harr;",
220            "hearts;",
221            "hellip;",
222            "iacute",
223            "iacute;",
224            "icirc",
225            "icirc;",
226            "iexcl",
227            "iexcl;",
228            "igrave",
229            "igrave;",
230            "image;",
231            "infin;",
232            "int;",
233            "iota;",
234            "iquest",
235            "iquest;",
236            "isin;",
237            "iuml",
238            "iuml;",
239            "kappa;",
240            "lArr;",
241            "lambda;",
242            "lang;",
243            "laquo",
244            "laquo;",
245            "larr;",
246            "lceil;",
247            "ldquo;",
248            "le;",
249            "lfloor;",
250            "lowast;",
251            "loz;",
252            "lrm;",
253            "lsaquo;",
254            "lsquo;",
255            "lt",
256            "lt;",
257            "macr",
258            "macr;",
259            "mdash;",
260            "micro",
261            "micro;",
262            "middot",
263            "middot;",
264            "minus;",
265            "mu;",
266            "nabla;",
267            "nbsp",
268            "nbsp;",
269            "ndash;",
270            "ne;",
271            "ni;",
272            "not",
273            "not;",
274            "notin;",
275            "nsub;",
276            "ntilde",
277            "ntilde;",
278            "nu;",
279            "oacute",
280            "oacute;",
281            "ocirc",
282            "ocirc;",
283            "oelig;",
284            "ograve",
285            "ograve;",
286            "oline;",
287            "omega;",
288            "omicron;",
289            "oplus;",
290            "or;",
291            "ordf",
292            "ordf;",
293            "ordm",
294            "ordm;",
295            "oslash",
296            "oslash;",
297            "otilde",
298            "otilde;",
299            "otimes;",
300            "ouml",
301            "ouml;",
302            "para",
303            "para;",
304            "part;",
305            "permil;",
306            "perp;",
307            "phi;",
308            "pi;",
309            "piv;",
310            "plusmn",
311            "plusmn;",
312            "pound",
313            "pound;",
314            "prime;",
315            "prod;",
316            "prop;",
317            "psi;",
318            "quot",
319            "quot;",
320            "rArr;",
321            "radic;",
322            "rang;",
323            "raquo",
324            "raquo;",
325            "rarr;",
326            "rceil;",
327            "rdquo;",
328            "real;",
329            "reg",
330            "reg;",
331            "rfloor;",
332            "rho;",
333            "rlm;",
334            "rsaquo;",
335            "rsquo;",
336            "sbquo;",
337            "scaron;",
338            "sdot;",
339            "sect",
340            "sect;",
341            "shy",
342            "shy;",
343            "sigma;",
344            "sigmaf;",
345            "sim;",
346            "spades;",
347            "sub;",
348            "sube;",
349            "sum;",
350            "sup1",
351            "sup1;",
352            "sup2",
353            "sup2;",
354            "sup3",
355            "sup3;",
356            "sup;",
357            "supe;",
358            "szlig",
359            "szlig;",
360            "tau;",
361            "there4;",
362            "theta;",
363            "thetasym;",
364            "thinsp;",
365            "thorn",
366            "thorn;",
367            "tilde;",
368            "times",
369            "times;",
370            "trade;",
371            "uArr;",
372            "uacute",
373            "uacute;",
374            "uarr;",
375            "ucirc",
376            "ucirc;",
377            "ugrave",
378            "ugrave;",
379            "uml",
380            "uml;",
381            "upsih;",
382            "upsilon;",
383            "uuml",
384            "uuml;",
385            "weierp;",
386            "xi;",
387            "yacute",
388            "yacute;",
389            "yen",
390            "yen;",
391            "yuml",
392            "yuml;",
393            "zeta;",
394            "zwj;",
395            "zwnj;" };
396    
397        final static char[][] VALUES = { {'\u00C6'},
398            {'\u00C6'},
399            {'\u0026'},
400            {'\u0026'},
401            {'\u00C1'},
402            {'\u00C1'},
403            {'\u00C2'},
404            {'\u00C2'},
405            {'\u00C0'},
406            {'\u00C0'},
407            {'\u0391'},
408            {'\u00C5'},
409            {'\u00C5'},
410            {'\u00C3'},
411            {'\u00C3'},
412            {'\u00C4'},
413            {'\u00C4'},
414            {'\u0392'},
415            {'\u00A9'},
416            {'\u00A9'},
417            {'\u00C7'},
418            {'\u00C7'},
419            {'\u03A7'},
420            {'\u2021'},
421            {'\u0394'},
422            {'\u00D0'},
423            {'\u00D0'},
424            {'\u00C9'},
425            {'\u00C9'},
426            {'\u00CA'},
427            {'\u00CA'},
428            {'\u00C8'},
429            {'\u00C8'},
430            {'\u0395'},
431            {'\u0397'},
432            {'\u00CB'},
433            {'\u00CB'},
434            {'\u003E'},
435            {'\u003E'},
436            {'\u0393'},
437            {'\u00CD'},
438            {'\u00CD'},
439            {'\u00CE'},
440            {'\u00CE'},
441            {'\u00CC'},
442            {'\u00CC'},
443            {'\u0399'},
444            {'\u00CF'},
445            {'\u00CF'},
446            {'\u039A'},
447            {'\u003C'},
448            {'\u003C'},
449            {'\u039B'},
450            {'\u039C'},
451            {'\u00D1'},
452            {'\u00D1'},
453            {'\u039D'},
454            {'\u0152'},
455            {'\u00D3'},
456            {'\u00D3'},
457            {'\u00D4'},
458            {'\u00D4'},
459            {'\u00D2'},
460            {'\u00D2'},
461            {'\u03A9'},
462            {'\u039F'},
463            {'\u00D8'},
464            {'\u00D8'},
465            {'\u00D5'},
466            {'\u00D5'},
467            {'\u00D6'},
468            {'\u00D6'},
469            {'\u03A6'},
470            {'\u03A0'},
471            {'\u2033'},
472            {'\u03A8'},
473            {'\u0022'},
474            {'\u0022'},
475            {'\u00AE'},
476            {'\u00AE'},
477            {'\u03A1'},
478            {'\u0160'},
479            {'\u03A3'},
480            {'\u00DE'},
481            {'\u00DE'},
482            {'\u2122'},
483            {'\u03A4'},
484            {'\u0398'},
485            {'\u00DA'},
486            {'\u00DA'},
487            {'\u00DB'},
488            {'\u00DB'},
489            {'\u00D9'},
490            {'\u00D9'},
491            {'\u03A5'},
492            {'\u00DC'},
493            {'\u00DC'},
494            {'\u039E'},
495            {'\u00DD'},
496            {'\u00DD'},
497            {'\u0178'},
498            {'\u0396'},
499            {'\u00E1'},
500            {'\u00E1'},
501            {'\u00E2'},
502            {'\u00E2'},
503            {'\u00B4'},
504            {'\u00B4'},
505            {'\u00E6'},
506            {'\u00E6'},
507            {'\u00E0'},
508            {'\u00E0'},
509            {'\u2135'},
510            {'\u03B1'},
511            {'\u0026'},
512            {'\u0026'},
513            {'\u2227'},
514            {'\u2220'},
515            {'\''},
516            {'\u00E5'},
517            {'\u00E5'},
518            {'\u2248'},
519            {'\u00E3'},
520            {'\u00E3'},
521            {'\u00E4'},
522            {'\u00E4'},
523            {'\u201E'},
524            {'\u03B2'},
525            {'\u00A6'},
526            {'\u00A6'},
527            {'\u2022'},
528            {'\u2229'},
529            {'\u00E7'},
530            {'\u00E7'},
531            {'\u00B8'},
532            {'\u00B8'},
533            {'\u00A2'},
534            {'\u00A2'},
535            {'\u03C7'},
536            {'\u02C6'},
537            {'\u2663'},
538            {'\u2245'},
539            {'\u00A9'},
540            {'\u00A9'},
541            {'\u21B5'},
542            {'\u222A'},
543            {'\u00A4'},
544            {'\u00A4'},
545            {'\u21D3'},
546            {'\u2020'},
547            {'\u2193'},
548            {'\u00B0'},
549            {'\u00B0'},
550            {'\u03B4'},
551            {'\u2666'},
552            {'\u00F7'},
553            {'\u00F7'},
554            {'\u00E9'},
555            {'\u00E9'},
556            {'\u00EA'},
557            {'\u00EA'},
558            {'\u00E8'},
559            {'\u00E8'},
560            {'\u2205'},
561            {'\u2003'},
562            {'\u2002'},
563            {'\u03B5'},
564            {'\u2261'},
565            {'\u03B7'},
566            {'\u00F0'},
567            {'\u00F0'},
568            {'\u00EB'},
569            {'\u00EB'},
570            {'\u20AC'},
571            {'\u2203'},
572            {'\u0192'},
573            {'\u2200'},
574            {'\u00BD'},
575            {'\u00BD'},
576            {'\u00BC'},
577            {'\u00BC'},
578            {'\u00BE'},
579            {'\u00BE'},
580            {'\u2044'},
581            {'\u03B3'},
582            {'\u2265'},
583            {'\u003E'},
584            {'\u003E'},
585            {'\u21D4'},
586            {'\u2194'},
587            {'\u2665'},
588            {'\u2026'},
589            {'\u00ED'},
590            {'\u00ED'},
591            {'\u00EE'},
592            {'\u00EE'},
593            {'\u00A1'},
594            {'\u00A1'},
595            {'\u00EC'},
596            {'\u00EC'},
597            {'\u2111'},
598            {'\u221E'},
599            {'\u222B'},
600            {'\u03B9'},
601            {'\u00BF'},
602            {'\u00BF'},
603            {'\u2208'},
604            {'\u00EF'},
605            {'\u00EF'},
606            {'\u03BA'},
607            {'\u21D0'},
608            {'\u03BB'},
609            {'\u3008'},
610            {'\u00AB'},
611            {'\u00AB'},
612            {'\u2190'},
613            {'\u2308'},
614            {'\u201C'},
615            {'\u2264'},
616            {'\u230A'},
617            {'\u2217'},
618            {'\u25CA'},
619            {'\u200E'},
620            {'\u2039'},
621            {'\u2018'},
622            {'\u003C'},
623            {'\u003C'},
624            {'\u00AF'},
625            {'\u00AF'},
626            {'\u2014'},
627            {'\u00B5'},
628            {'\u00B5'},
629            {'\u00B7'},
630            {'\u00B7'},
631            {'\u2212'},
632            {'\u03BC'},
633            {'\u2207'},
634            {'\u00A0'},
635            {'\u00A0'},
636            {'\u2013'},
637            {'\u2260'},
638            {'\u220B'},
639            {'\u00AC'},
640            {'\u00AC'},
641            {'\u2209'},
642            {'\u2284'},
643            {'\u00F1'},
644            {'\u00F1'},
645            {'\u03BD'},
646            {'\u00F3'},
647            {'\u00F3'},
648            {'\u00F4'},
649            {'\u00F4'},
650            {'\u0153'},
651            {'\u00F2'},
652            {'\u00F2'},
653            {'\u203E'},
654            {'\u03C9'},
655            {'\u03BF'},
656            {'\u2295'},
657            {'\u2228'},
658            {'\u00AA'},
659            {'\u00AA'},
660            {'\u00BA'},
661            {'\u00BA'},
662            {'\u00F8'},
663            {'\u00F8'},
664            {'\u00F5'},
665            {'\u00F5'},
666            {'\u2297'},
667            {'\u00F6'},
668            {'\u00F6'},
669            {'\u00B6'},
670            {'\u00B6'},
671            {'\u2202'},
672            {'\u2030'},
673            {'\u22A5'},
674            {'\u03C6'},
675            {'\u03C0'},
676            {'\u03D6'},
677            {'\u00B1'},
678            {'\u00B1'},
679            {'\u00A3'},
680            {'\u00A3'},
681            {'\u2032'},
682            {'\u220F'},
683            {'\u221D'},
684            {'\u03C8'},
685            {'\u0022'},
686            {'\u0022'},
687            {'\u21D2'},
688            {'\u221A'},
689            {'\u3009'},
690            {'\u00BB'},
691            {'\u00BB'},
692            {'\u2192'},
693            {'\u2309'},
694            {'\u201D'},
695            {'\u211C'},
696            {'\u00AE'},
697            {'\u00AE'},
698            {'\u230B'},
699            {'\u03C1'},
700            {'\u200F'},
701            {'\u203A'},
702            {'\u2019'},
703            {'\u201A'},
704            {'\u0161'},
705            {'\u22C5'},
706            {'\u00A7'},
707            {'\u00A7'},
708            {'\u00AD'},
709            {'\u00AD'},
710            {'\u03C3'},
711            {'\u03C2'},
712            {'\u223C'},
713            {'\u2660'},
714            {'\u2282'},
715            {'\u2286'},
716            {'\u2211'},
717            {'\u00B9'},
718            {'\u00B9'},
719            {'\u00B2'},
720            {'\u00B2'},
721            {'\u00B3'},
722            {'\u00B3'},
723            {'\u2283'},
724            {'\u2287'},
725            {'\u00DF'},
726            {'\u00DF'},
727            {'\u03C4'},
728            {'\u2234'},
729            {'\u03B8'},
730            {'\u03D1'},
731            {'\u2009'},
732            {'\u00FE'},
733            {'\u00FE'},
734            {'\u02DC'},
735            {'\u00D7'},
736            {'\u00D7'},
737            {'\u2122'},
738            {'\u21D1'},
739            {'\u00FA'},
740            {'\u00FA'},
741            {'\u2191'},
742            {'\u00FB'},
743            {'\u00FB'},
744            {'\u00F9'},
745            {'\u00F9'},
746            {'\u00A8'},
747            {'\u00A8'},
748            {'\u03D2'},
749            {'\u03C5'},
750            {'\u00FC'},
751            {'\u00FC'},
752            {'\u2118'},
753            {'\u03BE'},
754            {'\u00FD'},
755            {'\u00FD'},
756            {'\u00A5'},
757            {'\u00A5'},
758            {'\u00FF'},
759            {'\u00FF'},
760            {'\u03B6'},
761            {'\u200D'},
762            {'\u200C'} };
763        
764        final static char[][] WINDOWS_1252 = {{'\u20AC'},
765            {'\uFFFD'},
766            {'\u201A'},
767            {'\u0192'},
768            {'\u201E'},
769            {'\u2026'},
770            {'\u2020'},
771            {'\u2021'},
772            {'\u02C6'},
773            {'\u2030'},
774            {'\u0160'},
775            {'\u2039'},
776            {'\u0152'},
777            {'\uFFFD'},
778            {'\u017D'},
779            {'\uFFFD'},
780            {'\uFFFD'},
781            {'\u2018'},
782            {'\u2019'},
783            {'\u201C'},
784            {'\u201D'},
785            {'\u2022'},
786            {'\u2013'},
787            {'\u2014'},
788            {'\u02DC'},
789            {'\u2122'},
790            {'\u0161'},
791            {'\u203A'},
792            {'\u0153'},
793            {'\uFFFD'},
794            {'\u017E'},
795            {'\u0178'}};
796    }