001 /* 002 * Copyright (c) 2005 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package nu.validator.htmlparser.impl; 024 025 /** 026 * @version $Id: Entities.java 150 2007-08-16 19:21:25Z hsivonen $ 027 * @author hsivonen 028 */ 029 public final class Entities { 030 static final String[] NAMES = { "AElig", 031 "AElig;", 032 "AMP", 033 "AMP;", 034 "Aacute", 035 "Aacute;", 036 "Acirc", 037 "Acirc;", 038 "Agrave", 039 "Agrave;", 040 "Alpha;", 041 "Aring", 042 "Aring;", 043 "Atilde", 044 "Atilde;", 045 "Auml", 046 "Auml;", 047 "Beta;", 048 "COPY", 049 "COPY;", 050 "Ccedil", 051 "Ccedil;", 052 "Chi;", 053 "Dagger;", 054 "Delta;", 055 "ETH", 056 "ETH;", 057 "Eacute", 058 "Eacute;", 059 "Ecirc", 060 "Ecirc;", 061 "Egrave", 062 "Egrave;", 063 "Epsilon;", 064 "Eta;", 065 "Euml", 066 "Euml;", 067 "GT", 068 "GT;", 069 "Gamma;", 070 "Iacute", 071 "Iacute;", 072 "Icirc", 073 "Icirc;", 074 "Igrave", 075 "Igrave;", 076 "Iota;", 077 "Iuml", 078 "Iuml;", 079 "Kappa;", 080 "LT", 081 "LT;", 082 "Lambda;", 083 "Mu;", 084 "Ntilde", 085 "Ntilde;", 086 "Nu;", 087 "OElig;", 088 "Oacute", 089 "Oacute;", 090 "Ocirc", 091 "Ocirc;", 092 "Ograve", 093 "Ograve;", 094 "Omega;", 095 "Omicron;", 096 "Oslash", 097 "Oslash;", 098 "Otilde", 099 "Otilde;", 100 "Ouml", 101 "Ouml;", 102 "Phi;", 103 "Pi;", 104 "Prime;", 105 "Psi;", 106 "QUOT", 107 "QUOT;", 108 "REG", 109 "REG;", 110 "Rho;", 111 "Scaron;", 112 "Sigma;", 113 "THORN", 114 "THORN;", 115 "TRADE;", 116 "Tau;", 117 "Theta;", 118 "Uacute", 119 "Uacute;", 120 "Ucirc", 121 "Ucirc;", 122 "Ugrave", 123 "Ugrave;", 124 "Upsilon;", 125 "Uuml", 126 "Uuml;", 127 "Xi;", 128 "Yacute", 129 "Yacute;", 130 "Yuml;", 131 "Zeta;", 132 "aacute", 133 "aacute;", 134 "acirc", 135 "acirc;", 136 "acute", 137 "acute;", 138 "aelig", 139 "aelig;", 140 "agrave", 141 "agrave;", 142 "alefsym;", 143 "alpha;", 144 "amp", 145 "amp;", 146 "and;", 147 "ang;", 148 "apos;", 149 "aring", 150 "aring;", 151 "asymp;", 152 "atilde", 153 "atilde;", 154 "auml", 155 "auml;", 156 "bdquo;", 157 "beta;", 158 "brvbar", 159 "brvbar;", 160 "bull;", 161 "cap;", 162 "ccedil", 163 "ccedil;", 164 "cedil", 165 "cedil;", 166 "cent", 167 "cent;", 168 "chi;", 169 "circ;", 170 "clubs;", 171 "cong;", 172 "copy", 173 "copy;", 174 "crarr;", 175 "cup;", 176 "curren", 177 "curren;", 178 "dArr;", 179 "dagger;", 180 "darr;", 181 "deg", 182 "deg;", 183 "delta;", 184 "diams;", 185 "divide", 186 "divide;", 187 "eacute", 188 "eacute;", 189 "ecirc", 190 "ecirc;", 191 "egrave", 192 "egrave;", 193 "empty;", 194 "emsp;", 195 "ensp;", 196 "epsilon;", 197 "equiv;", 198 "eta;", 199 "eth", 200 "eth;", 201 "euml", 202 "euml;", 203 "euro;", 204 "exist;", 205 "fnof;", 206 "forall;", 207 "frac12", 208 "frac12;", 209 "frac14", 210 "frac14;", 211 "frac34", 212 "frac34;", 213 "frasl;", 214 "gamma;", 215 "ge;", 216 "gt", 217 "gt;", 218 "hArr;", 219 "harr;", 220 "hearts;", 221 "hellip;", 222 "iacute", 223 "iacute;", 224 "icirc", 225 "icirc;", 226 "iexcl", 227 "iexcl;", 228 "igrave", 229 "igrave;", 230 "image;", 231 "infin;", 232 "int;", 233 "iota;", 234 "iquest", 235 "iquest;", 236 "isin;", 237 "iuml", 238 "iuml;", 239 "kappa;", 240 "lArr;", 241 "lambda;", 242 "lang;", 243 "laquo", 244 "laquo;", 245 "larr;", 246 "lceil;", 247 "ldquo;", 248 "le;", 249 "lfloor;", 250 "lowast;", 251 "loz;", 252 "lrm;", 253 "lsaquo;", 254 "lsquo;", 255 "lt", 256 "lt;", 257 "macr", 258 "macr;", 259 "mdash;", 260 "micro", 261 "micro;", 262 "middot", 263 "middot;", 264 "minus;", 265 "mu;", 266 "nabla;", 267 "nbsp", 268 "nbsp;", 269 "ndash;", 270 "ne;", 271 "ni;", 272 "not", 273 "not;", 274 "notin;", 275 "nsub;", 276 "ntilde", 277 "ntilde;", 278 "nu;", 279 "oacute", 280 "oacute;", 281 "ocirc", 282 "ocirc;", 283 "oelig;", 284 "ograve", 285 "ograve;", 286 "oline;", 287 "omega;", 288 "omicron;", 289 "oplus;", 290 "or;", 291 "ordf", 292 "ordf;", 293 "ordm", 294 "ordm;", 295 "oslash", 296 "oslash;", 297 "otilde", 298 "otilde;", 299 "otimes;", 300 "ouml", 301 "ouml;", 302 "para", 303 "para;", 304 "part;", 305 "permil;", 306 "perp;", 307 "phi;", 308 "pi;", 309 "piv;", 310 "plusmn", 311 "plusmn;", 312 "pound", 313 "pound;", 314 "prime;", 315 "prod;", 316 "prop;", 317 "psi;", 318 "quot", 319 "quot;", 320 "rArr;", 321 "radic;", 322 "rang;", 323 "raquo", 324 "raquo;", 325 "rarr;", 326 "rceil;", 327 "rdquo;", 328 "real;", 329 "reg", 330 "reg;", 331 "rfloor;", 332 "rho;", 333 "rlm;", 334 "rsaquo;", 335 "rsquo;", 336 "sbquo;", 337 "scaron;", 338 "sdot;", 339 "sect", 340 "sect;", 341 "shy", 342 "shy;", 343 "sigma;", 344 "sigmaf;", 345 "sim;", 346 "spades;", 347 "sub;", 348 "sube;", 349 "sum;", 350 "sup1", 351 "sup1;", 352 "sup2", 353 "sup2;", 354 "sup3", 355 "sup3;", 356 "sup;", 357 "supe;", 358 "szlig", 359 "szlig;", 360 "tau;", 361 "there4;", 362 "theta;", 363 "thetasym;", 364 "thinsp;", 365 "thorn", 366 "thorn;", 367 "tilde;", 368 "times", 369 "times;", 370 "trade;", 371 "uArr;", 372 "uacute", 373 "uacute;", 374 "uarr;", 375 "ucirc", 376 "ucirc;", 377 "ugrave", 378 "ugrave;", 379 "uml", 380 "uml;", 381 "upsih;", 382 "upsilon;", 383 "uuml", 384 "uuml;", 385 "weierp;", 386 "xi;", 387 "yacute", 388 "yacute;", 389 "yen", 390 "yen;", 391 "yuml", 392 "yuml;", 393 "zeta;", 394 "zwj;", 395 "zwnj;" }; 396 397 final static char[][] VALUES = { {'\u00C6'}, 398 {'\u00C6'}, 399 {'\u0026'}, 400 {'\u0026'}, 401 {'\u00C1'}, 402 {'\u00C1'}, 403 {'\u00C2'}, 404 {'\u00C2'}, 405 {'\u00C0'}, 406 {'\u00C0'}, 407 {'\u0391'}, 408 {'\u00C5'}, 409 {'\u00C5'}, 410 {'\u00C3'}, 411 {'\u00C3'}, 412 {'\u00C4'}, 413 {'\u00C4'}, 414 {'\u0392'}, 415 {'\u00A9'}, 416 {'\u00A9'}, 417 {'\u00C7'}, 418 {'\u00C7'}, 419 {'\u03A7'}, 420 {'\u2021'}, 421 {'\u0394'}, 422 {'\u00D0'}, 423 {'\u00D0'}, 424 {'\u00C9'}, 425 {'\u00C9'}, 426 {'\u00CA'}, 427 {'\u00CA'}, 428 {'\u00C8'}, 429 {'\u00C8'}, 430 {'\u0395'}, 431 {'\u0397'}, 432 {'\u00CB'}, 433 {'\u00CB'}, 434 {'\u003E'}, 435 {'\u003E'}, 436 {'\u0393'}, 437 {'\u00CD'}, 438 {'\u00CD'}, 439 {'\u00CE'}, 440 {'\u00CE'}, 441 {'\u00CC'}, 442 {'\u00CC'}, 443 {'\u0399'}, 444 {'\u00CF'}, 445 {'\u00CF'}, 446 {'\u039A'}, 447 {'\u003C'}, 448 {'\u003C'}, 449 {'\u039B'}, 450 {'\u039C'}, 451 {'\u00D1'}, 452 {'\u00D1'}, 453 {'\u039D'}, 454 {'\u0152'}, 455 {'\u00D3'}, 456 {'\u00D3'}, 457 {'\u00D4'}, 458 {'\u00D4'}, 459 {'\u00D2'}, 460 {'\u00D2'}, 461 {'\u03A9'}, 462 {'\u039F'}, 463 {'\u00D8'}, 464 {'\u00D8'}, 465 {'\u00D5'}, 466 {'\u00D5'}, 467 {'\u00D6'}, 468 {'\u00D6'}, 469 {'\u03A6'}, 470 {'\u03A0'}, 471 {'\u2033'}, 472 {'\u03A8'}, 473 {'\u0022'}, 474 {'\u0022'}, 475 {'\u00AE'}, 476 {'\u00AE'}, 477 {'\u03A1'}, 478 {'\u0160'}, 479 {'\u03A3'}, 480 {'\u00DE'}, 481 {'\u00DE'}, 482 {'\u2122'}, 483 {'\u03A4'}, 484 {'\u0398'}, 485 {'\u00DA'}, 486 {'\u00DA'}, 487 {'\u00DB'}, 488 {'\u00DB'}, 489 {'\u00D9'}, 490 {'\u00D9'}, 491 {'\u03A5'}, 492 {'\u00DC'}, 493 {'\u00DC'}, 494 {'\u039E'}, 495 {'\u00DD'}, 496 {'\u00DD'}, 497 {'\u0178'}, 498 {'\u0396'}, 499 {'\u00E1'}, 500 {'\u00E1'}, 501 {'\u00E2'}, 502 {'\u00E2'}, 503 {'\u00B4'}, 504 {'\u00B4'}, 505 {'\u00E6'}, 506 {'\u00E6'}, 507 {'\u00E0'}, 508 {'\u00E0'}, 509 {'\u2135'}, 510 {'\u03B1'}, 511 {'\u0026'}, 512 {'\u0026'}, 513 {'\u2227'}, 514 {'\u2220'}, 515 {'\''}, 516 {'\u00E5'}, 517 {'\u00E5'}, 518 {'\u2248'}, 519 {'\u00E3'}, 520 {'\u00E3'}, 521 {'\u00E4'}, 522 {'\u00E4'}, 523 {'\u201E'}, 524 {'\u03B2'}, 525 {'\u00A6'}, 526 {'\u00A6'}, 527 {'\u2022'}, 528 {'\u2229'}, 529 {'\u00E7'}, 530 {'\u00E7'}, 531 {'\u00B8'}, 532 {'\u00B8'}, 533 {'\u00A2'}, 534 {'\u00A2'}, 535 {'\u03C7'}, 536 {'\u02C6'}, 537 {'\u2663'}, 538 {'\u2245'}, 539 {'\u00A9'}, 540 {'\u00A9'}, 541 {'\u21B5'}, 542 {'\u222A'}, 543 {'\u00A4'}, 544 {'\u00A4'}, 545 {'\u21D3'}, 546 {'\u2020'}, 547 {'\u2193'}, 548 {'\u00B0'}, 549 {'\u00B0'}, 550 {'\u03B4'}, 551 {'\u2666'}, 552 {'\u00F7'}, 553 {'\u00F7'}, 554 {'\u00E9'}, 555 {'\u00E9'}, 556 {'\u00EA'}, 557 {'\u00EA'}, 558 {'\u00E8'}, 559 {'\u00E8'}, 560 {'\u2205'}, 561 {'\u2003'}, 562 {'\u2002'}, 563 {'\u03B5'}, 564 {'\u2261'}, 565 {'\u03B7'}, 566 {'\u00F0'}, 567 {'\u00F0'}, 568 {'\u00EB'}, 569 {'\u00EB'}, 570 {'\u20AC'}, 571 {'\u2203'}, 572 {'\u0192'}, 573 {'\u2200'}, 574 {'\u00BD'}, 575 {'\u00BD'}, 576 {'\u00BC'}, 577 {'\u00BC'}, 578 {'\u00BE'}, 579 {'\u00BE'}, 580 {'\u2044'}, 581 {'\u03B3'}, 582 {'\u2265'}, 583 {'\u003E'}, 584 {'\u003E'}, 585 {'\u21D4'}, 586 {'\u2194'}, 587 {'\u2665'}, 588 {'\u2026'}, 589 {'\u00ED'}, 590 {'\u00ED'}, 591 {'\u00EE'}, 592 {'\u00EE'}, 593 {'\u00A1'}, 594 {'\u00A1'}, 595 {'\u00EC'}, 596 {'\u00EC'}, 597 {'\u2111'}, 598 {'\u221E'}, 599 {'\u222B'}, 600 {'\u03B9'}, 601 {'\u00BF'}, 602 {'\u00BF'}, 603 {'\u2208'}, 604 {'\u00EF'}, 605 {'\u00EF'}, 606 {'\u03BA'}, 607 {'\u21D0'}, 608 {'\u03BB'}, 609 {'\u3008'}, 610 {'\u00AB'}, 611 {'\u00AB'}, 612 {'\u2190'}, 613 {'\u2308'}, 614 {'\u201C'}, 615 {'\u2264'}, 616 {'\u230A'}, 617 {'\u2217'}, 618 {'\u25CA'}, 619 {'\u200E'}, 620 {'\u2039'}, 621 {'\u2018'}, 622 {'\u003C'}, 623 {'\u003C'}, 624 {'\u00AF'}, 625 {'\u00AF'}, 626 {'\u2014'}, 627 {'\u00B5'}, 628 {'\u00B5'}, 629 {'\u00B7'}, 630 {'\u00B7'}, 631 {'\u2212'}, 632 {'\u03BC'}, 633 {'\u2207'}, 634 {'\u00A0'}, 635 {'\u00A0'}, 636 {'\u2013'}, 637 {'\u2260'}, 638 {'\u220B'}, 639 {'\u00AC'}, 640 {'\u00AC'}, 641 {'\u2209'}, 642 {'\u2284'}, 643 {'\u00F1'}, 644 {'\u00F1'}, 645 {'\u03BD'}, 646 {'\u00F3'}, 647 {'\u00F3'}, 648 {'\u00F4'}, 649 {'\u00F4'}, 650 {'\u0153'}, 651 {'\u00F2'}, 652 {'\u00F2'}, 653 {'\u203E'}, 654 {'\u03C9'}, 655 {'\u03BF'}, 656 {'\u2295'}, 657 {'\u2228'}, 658 {'\u00AA'}, 659 {'\u00AA'}, 660 {'\u00BA'}, 661 {'\u00BA'}, 662 {'\u00F8'}, 663 {'\u00F8'}, 664 {'\u00F5'}, 665 {'\u00F5'}, 666 {'\u2297'}, 667 {'\u00F6'}, 668 {'\u00F6'}, 669 {'\u00B6'}, 670 {'\u00B6'}, 671 {'\u2202'}, 672 {'\u2030'}, 673 {'\u22A5'}, 674 {'\u03C6'}, 675 {'\u03C0'}, 676 {'\u03D6'}, 677 {'\u00B1'}, 678 {'\u00B1'}, 679 {'\u00A3'}, 680 {'\u00A3'}, 681 {'\u2032'}, 682 {'\u220F'}, 683 {'\u221D'}, 684 {'\u03C8'}, 685 {'\u0022'}, 686 {'\u0022'}, 687 {'\u21D2'}, 688 {'\u221A'}, 689 {'\u3009'}, 690 {'\u00BB'}, 691 {'\u00BB'}, 692 {'\u2192'}, 693 {'\u2309'}, 694 {'\u201D'}, 695 {'\u211C'}, 696 {'\u00AE'}, 697 {'\u00AE'}, 698 {'\u230B'}, 699 {'\u03C1'}, 700 {'\u200F'}, 701 {'\u203A'}, 702 {'\u2019'}, 703 {'\u201A'}, 704 {'\u0161'}, 705 {'\u22C5'}, 706 {'\u00A7'}, 707 {'\u00A7'}, 708 {'\u00AD'}, 709 {'\u00AD'}, 710 {'\u03C3'}, 711 {'\u03C2'}, 712 {'\u223C'}, 713 {'\u2660'}, 714 {'\u2282'}, 715 {'\u2286'}, 716 {'\u2211'}, 717 {'\u00B9'}, 718 {'\u00B9'}, 719 {'\u00B2'}, 720 {'\u00B2'}, 721 {'\u00B3'}, 722 {'\u00B3'}, 723 {'\u2283'}, 724 {'\u2287'}, 725 {'\u00DF'}, 726 {'\u00DF'}, 727 {'\u03C4'}, 728 {'\u2234'}, 729 {'\u03B8'}, 730 {'\u03D1'}, 731 {'\u2009'}, 732 {'\u00FE'}, 733 {'\u00FE'}, 734 {'\u02DC'}, 735 {'\u00D7'}, 736 {'\u00D7'}, 737 {'\u2122'}, 738 {'\u21D1'}, 739 {'\u00FA'}, 740 {'\u00FA'}, 741 {'\u2191'}, 742 {'\u00FB'}, 743 {'\u00FB'}, 744 {'\u00F9'}, 745 {'\u00F9'}, 746 {'\u00A8'}, 747 {'\u00A8'}, 748 {'\u03D2'}, 749 {'\u03C5'}, 750 {'\u00FC'}, 751 {'\u00FC'}, 752 {'\u2118'}, 753 {'\u03BE'}, 754 {'\u00FD'}, 755 {'\u00FD'}, 756 {'\u00A5'}, 757 {'\u00A5'}, 758 {'\u00FF'}, 759 {'\u00FF'}, 760 {'\u03B6'}, 761 {'\u200D'}, 762 {'\u200C'} }; 763 764 final static char[][] WINDOWS_1252 = {{'\u20AC'}, 765 {'\uFFFD'}, 766 {'\u201A'}, 767 {'\u0192'}, 768 {'\u201E'}, 769 {'\u2026'}, 770 {'\u2020'}, 771 {'\u2021'}, 772 {'\u02C6'}, 773 {'\u2030'}, 774 {'\u0160'}, 775 {'\u2039'}, 776 {'\u0152'}, 777 {'\uFFFD'}, 778 {'\u017D'}, 779 {'\uFFFD'}, 780 {'\uFFFD'}, 781 {'\u2018'}, 782 {'\u2019'}, 783 {'\u201C'}, 784 {'\u201D'}, 785 {'\u2022'}, 786 {'\u2013'}, 787 {'\u2014'}, 788 {'\u02DC'}, 789 {'\u2122'}, 790 {'\u0161'}, 791 {'\u203A'}, 792 {'\u0153'}, 793 {'\uFFFD'}, 794 {'\u017E'}, 795 {'\u0178'}}; 796 }