mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-16 21:35:25 +00:00
6959267: Support Unicode 6.0.0
Reviewed-by: okutsu
This commit is contained in:
parent
12b076396c
commit
4a4fb15178
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -111,7 +111,7 @@ class CharacterData00 extends CharacterData {
|
||||
if ((val & $$maskCaseOffset) == $$maskCaseOffset) {
|
||||
switch(ch) {
|
||||
// map the offset overflow chars
|
||||
case 0x0130 : mapChar = 0x0069; break;
|
||||
case 0x0130 : mapChar = 0x0069; break;
|
||||
case 0x2126 : mapChar = 0x03C9; break;
|
||||
case 0x212A : mapChar = 0x006B; break;
|
||||
case 0x212B : mapChar = 0x00E5; break;
|
||||
@ -192,7 +192,11 @@ class CharacterData00 extends CharacterData {
|
||||
case 0x2C6D : mapChar = 0x0251; break;
|
||||
case 0x2C6E : mapChar = 0x0271; break;
|
||||
case 0x2C6F : mapChar = 0x0250; break;
|
||||
case 0x2C70 : mapChar = 0x0252; break;
|
||||
case 0x2C7E : mapChar = 0x023F; break;
|
||||
case 0x2C7F : mapChar = 0x0240; break;
|
||||
case 0xA77D : mapChar = 0x1D79; break;
|
||||
case 0xA78D : mapChar = 0x0265; break;
|
||||
// default mapChar is already set, so no
|
||||
// need to redo it here.
|
||||
// default : mapChar = ch;
|
||||
@ -246,8 +250,12 @@ class CharacterData00 extends CharacterData {
|
||||
case 0x1FC3 : mapChar = 0x1FCC; break;
|
||||
case 0x1FF3 : mapChar = 0x1FFC; break;
|
||||
|
||||
case 0x023F : mapChar = 0x2C7E; break;
|
||||
case 0x0240 : mapChar = 0x2C7F; break;
|
||||
case 0x0250 : mapChar = 0x2C6F; break;
|
||||
case 0x0251 : mapChar = 0x2C6D; break;
|
||||
case 0x0252 : mapChar = 0x2C70; break;
|
||||
case 0x0265 : mapChar = 0xA78D; break;
|
||||
case 0x026B : mapChar = 0x2C62; break;
|
||||
case 0x0271 : mapChar = 0x2C6E; break;
|
||||
case 0x027D : mapChar = 0x2C64; break;
|
||||
@ -487,8 +495,12 @@ class CharacterData00 extends CharacterData {
|
||||
case 0x017F : mapChar = 0x0053; break;
|
||||
case 0x1FBE : mapChar = 0x0399; break;
|
||||
|
||||
case 0x023F : mapChar = 0x2C7E; break;
|
||||
case 0x0240 : mapChar = 0x2C7F; break;
|
||||
case 0x0250 : mapChar = 0x2C6F; break;
|
||||
case 0x0251 : mapChar = 0x2C6D; break;
|
||||
case 0x0252 : mapChar = 0x2C70; break;
|
||||
case 0x0265 : mapChar = 0xA78D; break;
|
||||
case 0x026B : mapChar = 0x2C62; break;
|
||||
case 0x0271 : mapChar = 0x2C6E; break;
|
||||
case 0x027D : mapChar = 0x2C64; break;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -251,9 +251,40 @@ class CharacterData01 extends CharacterData {
|
||||
case 0x010341: retval = 90; break; // GOTHIC LETTER NINETY
|
||||
case 0x01034A: retval = 900; break; // GOTHIC LETTER NINE HUNDRED
|
||||
case 0x0103D5: retval = 100; break; // OLD PERSIAN NUMBER HUNDRED
|
||||
case 0x01085D: retval = 100; break; // IMPERIAL ARAMAIC NUMBER ONE HUNDRED
|
||||
case 0x01085E: retval = 1000; break; // IMPERIAL ARAMAIC NUMBER ONE THOUSAND
|
||||
case 0x01085F: retval = 10000; break; // IMPERIAL ARAMAIC NUMBER TEN THOUSAND
|
||||
case 0x010919: retval = 100; break; // PHOENICIAN NUMBER ONE HUNDRED
|
||||
case 0x010A46: retval = 100; break; // KHAROSHTHI NUMBER ONE HUNDRED
|
||||
case 0x010A47: retval = 1000; break; // KHAROSHTHI NUMBER ONE THOUSAND
|
||||
case 0x010A7E: retval = 50; break; // OLD SOUTH ARABIAN NUMBER FIFTY
|
||||
case 0x010B5E: retval = 100; break; // INSCRIPTIONAL PARTHIAN NUMBER ONE HUNDRED
|
||||
case 0x010B5F: retval = 1000; break; // INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
|
||||
case 0x010B7E: retval = 100; break; // INSCRIPTIONAL PAHLAVI NUMBER ONE HUNDRED
|
||||
case 0x010B7F: retval = 1000; break; // INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
|
||||
case 0x010E6C: retval = 40; break; // RUMI NUMBER FORTY
|
||||
case 0x010E6D: retval = 50; break; // RUMI NUMBER FIFTY
|
||||
case 0x010E6E: retval = 60; break; // RUMI NUMBER SIXTY
|
||||
case 0x010E6F: retval = 70; break; // RUMI NUMBER SEVENTY
|
||||
case 0x010E70: retval = 80; break; // RUMI NUMBER EIGHTY
|
||||
case 0x010E71: retval = 90; break; // RUMI NUMBER NINETY
|
||||
case 0x010E72: retval = 100; break; // RUMI NUMBER ONE HUNDRED
|
||||
case 0x010E73: retval = 200; break; // RUMI NUMBER TWO HUNDRED
|
||||
case 0x010E74: retval = 300; break; // RUMI NUMBER THREE HUNDRED
|
||||
case 0x010E75: retval = 400; break; // RUMI NUMBER FOUR HUNDRED
|
||||
case 0x010E76: retval = 500; break; // RUMI NUMBER FIVE HUNDRED
|
||||
case 0x010E77: retval = 600; break; // RUMI NUMBER SIX HUNDRED
|
||||
case 0x010E78: retval = 700; break; // RUMI NUMBER SEVEN HUNDRED
|
||||
case 0x010E79: retval = 800; break; // RUMI NUMBER EIGHT HUNDRED
|
||||
case 0x010E7A: retval = 900; break; // RUMI NUMBER NINE HUNDRED
|
||||
case 0x01105E: retval = 40; break; // BRAHMI NUMBER FORTY
|
||||
case 0x01105F: retval = 50; break; // BRAHMI NUMBER FIFTY
|
||||
case 0x011060: retval = 60; break; // BRAHMI NUMBER SIXTY
|
||||
case 0x011061: retval = 70; break; // BRAHMI NUMBER SEVENTY
|
||||
case 0x011062: retval = 80; break; // BRAHMI NUMBER EIGHTY
|
||||
case 0x011063: retval = 90; break; // BRAHMI NUMBER NINETY
|
||||
case 0x011064: retval = 100; break; // BRAHMI NUMBER ONE HUNDRED
|
||||
case 0x011065: retval = 1000; break; // BRAHMI NUMBER ONE THOUSAND
|
||||
case 0x01D36C: retval = 40; break; // COUNTING ROD TENS DIGIT FOUR
|
||||
case 0x01D36D: retval = 50; break; // COUNTING ROD TENS DIGIT FIVE
|
||||
case 0x01D36E: retval = 60; break; // COUNTING ROD TENS DIGIT SIX
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
# Scripts-5.2.0.txt
|
||||
# Date: 2009-08-22, 04:58:43 GMT [MD]
|
||||
# Scripts-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:48:47 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
@ -73,7 +73,7 @@
|
||||
02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
|
||||
02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
|
||||
02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
|
||||
02E5..02EB ; Common # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
|
||||
02E5..02E9 ; Common # Sk [5] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER EXTRA-LOW TONE BAR
|
||||
02EC ; Common # Lm MODIFIER LETTER VOICING
|
||||
02ED ; Common # Sk MODIFIER LETTER UNASPIRATED
|
||||
02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE
|
||||
@ -83,7 +83,6 @@
|
||||
0385 ; Common # Sk GREEK DIALYTIKA TONOS
|
||||
0387 ; Common # Po GREEK ANO TELEIA
|
||||
0589 ; Common # Po ARMENIAN FULL STOP
|
||||
0600..0603 ; Common # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
|
||||
060C ; Common # Po ARABIC COMMA
|
||||
061B ; Common # Po ARABIC SEMICOLON
|
||||
061F ; Common # Po ARABIC QUESTION MARK
|
||||
@ -92,7 +91,6 @@
|
||||
06DD ; Common # Cf ARABIC END OF AYAH
|
||||
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN
|
||||
0CF1..0CF2 ; Common # So [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
|
||||
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
|
||||
10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR
|
||||
@ -148,7 +146,7 @@
|
||||
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
|
||||
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
|
||||
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
|
||||
20A0..20B8 ; Common # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN
|
||||
20A0..20B9 ; Common # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN
|
||||
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
|
||||
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
|
||||
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
|
||||
@ -157,7 +155,8 @@
|
||||
210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
|
||||
2114 ; Common # So L B BAR SYMBOL
|
||||
2115 ; Common # L& DOUBLE-STRUCK CAPITAL N
|
||||
2116..2118 ; Common # So [3] NUMERO SIGN..SCRIPT CAPITAL P
|
||||
2116..2117 ; Common # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT
|
||||
2118 ; Common # Sm SCRIPT CAPITAL P
|
||||
2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
|
||||
211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE
|
||||
2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z
|
||||
@ -213,7 +212,7 @@
|
||||
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
|
||||
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
|
||||
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
|
||||
23E2..23E8 ; Common # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
|
||||
23E2..23F3 ; Common # So [18] WHITE TRAPEZIUM..HOURGLASS WITH FLOWING SAND
|
||||
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
|
||||
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
|
||||
@ -227,18 +226,8 @@
|
||||
25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
|
||||
2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
|
||||
266F ; Common # Sm MUSIC SHARP SIGN
|
||||
2670..26CD ; Common # So [94] WEST SYRIAC CROSS..DISABLED CAR
|
||||
26CF..26E1 ; Common # So [19] PICK..RESTRICTED LEFT ENTRY-2
|
||||
26E3 ; Common # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
|
||||
26E8..26FF ; Common # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||||
2701..2704 ; Common # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
|
||||
2706..2709 ; Common # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
|
||||
270C..2727 ; Common # So [28] VICTORY HAND..WHITE FOUR POINTED STAR
|
||||
2729..274B ; Common # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
|
||||
274D ; Common # So SHADOWED WHITE CIRCLE
|
||||
274F..2752 ; Common # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
|
||||
2756..275E ; Common # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
|
||||
2761..2767 ; Common # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
|
||||
2670..26FF ; Common # So [144] WEST SYRIAC CROSS..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||||
2701..2767 ; Common # So [103] UPPER BLADE SCISSORS..ROTATED FLORAL HEART BULLET
|
||||
2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
|
||||
2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
|
||||
276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
|
||||
@ -254,15 +243,13 @@
|
||||
2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
|
||||
2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
|
||||
2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
|
||||
2794 ; Common # So HEAVY WIDE-HEADED RIGHTWARDS ARROW
|
||||
2798..27AF ; Common # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
|
||||
27B1..27BE ; Common # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
|
||||
2794..27BF ; Common # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP
|
||||
27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
|
||||
27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER
|
||||
27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER
|
||||
27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE
|
||||
27CC ; Common # Sm LONG DIVISION
|
||||
27D0..27E5 ; Common # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK
|
||||
27CE..27E5 ; Common # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK
|
||||
27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
|
||||
27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
|
||||
27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET
|
||||
@ -555,27 +542,51 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
|
||||
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
|
||||
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
|
||||
1F0A0..1F0AE ; Common # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
|
||||
1F0B1..1F0BE ; Common # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS
|
||||
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
|
||||
1F0D1..1F0DF ; Common # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER
|
||||
1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
|
||||
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
|
||||
1F131 ; Common # So SQUARED LATIN CAPITAL LETTER B
|
||||
1F13D ; Common # So SQUARED LATIN CAPITAL LETTER N
|
||||
1F13F ; Common # So SQUARED LATIN CAPITAL LETTER P
|
||||
1F142 ; Common # So SQUARED LATIN CAPITAL LETTER S
|
||||
1F146 ; Common # So SQUARED LATIN CAPITAL LETTER W
|
||||
1F14A..1F14E ; Common # So [5] SQUARED HV..SQUARED PPV
|
||||
1F157 ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H
|
||||
1F15F ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P
|
||||
1F179 ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER J
|
||||
1F17B..1F17C ; Common # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
|
||||
1F17F ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER P
|
||||
1F18A..1F18D ; Common # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
|
||||
1F190 ; Common # So SQUARE DJ
|
||||
1F210..1F231 ; Common # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
|
||||
1F130..1F169 ; Common # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
|
||||
1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
|
||||
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
|
||||
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
|
||||
1F210..1F23A ; Common # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
|
||||
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
||||
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
1F300..1F320 ; Common # So [33] CYCLONE..SHOOTING STAR
|
||||
1F330..1F335 ; Common # So [6] CHESTNUT..CACTUS
|
||||
1F337..1F37C ; Common # So [70] TULIP..BABY BOTTLE
|
||||
1F380..1F393 ; Common # So [20] RIBBON..GRADUATION CAP
|
||||
1F3A0..1F3C4 ; Common # So [37] CAROUSEL HORSE..SURFER
|
||||
1F3C6..1F3CA ; Common # So [5] TROPHY..SWIMMER
|
||||
1F3E0..1F3F0 ; Common # So [17] HOUSE BUILDING..EUROPEAN CASTLE
|
||||
1F400..1F43E ; Common # So [63] RAT..PAW PRINTS
|
||||
1F440 ; Common # So EYES
|
||||
1F442..1F4F7 ; Common # So [182] EAR..CAMERA
|
||||
1F4F9..1F4FC ; Common # So [4] VIDEO CAMERA..VIDEOCASSETTE
|
||||
1F500..1F53D ; Common # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE
|
||||
1F550..1F567 ; Common # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
|
||||
1F5FB..1F5FF ; Common # So [5] MOUNT FUJI..MOYAI
|
||||
1F601..1F610 ; Common # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE
|
||||
1F612..1F614 ; Common # So [3] UNAMUSED FACE..PENSIVE FACE
|
||||
1F616 ; Common # So CONFOUNDED FACE
|
||||
1F618 ; Common # So FACE THROWING A KISS
|
||||
1F61A ; Common # So KISSING FACE WITH CLOSED EYES
|
||||
1F61C..1F61E ; Common # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE
|
||||
1F620..1F625 ; Common # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE
|
||||
1F628..1F62B ; Common # So [4] FEARFUL FACE..TIRED FACE
|
||||
1F62D ; Common # So LOUDLY CRYING FACE
|
||||
1F630..1F633 ; Common # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE
|
||||
1F635..1F640 ; Common # So [12] DIZZY FACE..WEARY CAT FACE
|
||||
1F645..1F64F ; Common # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS
|
||||
1F680..1F6C5 ; Common # So [70] ROCKET..LEFT LUGGAGE
|
||||
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
E0001 ; Common # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 5395
|
||||
# Total code points: 6379
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -603,7 +614,7 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
|
||||
2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I
|
||||
207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N
|
||||
2090..2094 ; Latin # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
|
||||
2090..209C ; Latin # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
|
||||
212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN
|
||||
2132 ; Latin # L& TURNED CAPITAL F
|
||||
214E ; Latin # L& TURNED SMALL F
|
||||
@ -616,13 +627,16 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
|
||||
A770 ; Latin # Lm MODIFIER LETTER US
|
||||
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
|
||||
A78B..A78C ; Latin # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO
|
||||
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||||
A790..A791 ; Latin # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER
|
||||
A7A0..A7A9 ; Latin # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE
|
||||
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
|
||||
A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
|
||||
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
|
||||
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
|
||||
# Total code points: 1244
|
||||
# Total code points: 1267
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -687,12 +701,11 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
|
||||
0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
|
||||
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
|
||||
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
|
||||
048A..0525 ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
|
||||
048A..0527 ; Cyrillic # L& [158] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
|
||||
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
|
||||
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
|
||||
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
A640..A65F ; Cyrillic # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN
|
||||
A662..A66D ; Cyrillic # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
|
||||
A640..A66D ; Cyrillic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
|
||||
A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O
|
||||
A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET
|
||||
A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
|
||||
@ -702,7 +715,7 @@ A67E ; Cyrillic # Po CYRILLIC KAVYKA
|
||||
A67F ; Cyrillic # Lm CYRILLIC PAYEROK
|
||||
A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
|
||||
|
||||
# Total code points: 404
|
||||
# Total code points: 408
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -744,6 +757,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
|
||||
# ================================================
|
||||
|
||||
0600..0603 ; Arabic # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
|
||||
0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
|
||||
0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
|
||||
060B ; Arabic # Sc AFGHANI SIGN
|
||||
@ -751,7 +765,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
|
||||
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
|
||||
0621..063F ; Arabic # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
|
||||
0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS
|
||||
066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
|
||||
@ -760,7 +774,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
06D4 ; Arabic # Po ARABIC FULL STOP
|
||||
06D5 ; Arabic # Lo ARABIC LETTER AE
|
||||
06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
|
||||
06DE ; Arabic # Me ARABIC START OF RUB EL HIZB
|
||||
06DE ; Arabic # So ARABIC START OF RUB EL HIZB
|
||||
06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
|
||||
06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
|
||||
06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
|
||||
@ -773,6 +787,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
|
||||
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
|
||||
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
|
||||
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
|
||||
FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
|
||||
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
|
||||
@ -782,7 +797,7 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN
|
||||
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||||
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
|
||||
|
||||
# Total code points: 1030
|
||||
# Total code points: 1051
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -809,27 +824,29 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
||||
0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA
|
||||
0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
|
||||
093A ; Devanagari # Mn DEVANAGARI VOWEL SIGN OE
|
||||
093B ; Devanagari # Mc DEVANAGARI VOWEL SIGN OOE
|
||||
093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA
|
||||
093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA
|
||||
093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
|
||||
0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
|
||||
094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA
|
||||
094E ; Devanagari # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
|
||||
094E..094F ; Devanagari # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
|
||||
0950 ; Devanagari # Lo DEVANAGARI OM
|
||||
0953..0955 ; Devanagari # Mn [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E
|
||||
0953..0957 ; Devanagari # Mn [5] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN UUE
|
||||
0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
|
||||
0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
|
||||
0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT
|
||||
0972 ; Devanagari # Lo DEVANAGARI LETTER CANDRA A
|
||||
0972..0977 ; Devanagari # Lo [6] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER UUE
|
||||
0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
|
||||
A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
|
||||
A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
|
||||
A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
|
||||
# Total code points: 140
|
||||
# Total code points: 150
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -941,8 +958,9 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
|
||||
0B70 ; Oriya # So ORIYA ISSHAR
|
||||
0B71 ; Oriya # Lo ORIYA LETTER WA
|
||||
0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
|
||||
|
||||
# Total code points: 84
|
||||
# Total code points: 90
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1018,22 +1036,23 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
|
||||
0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
|
||||
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
|
||||
# Total code points: 84
|
||||
# Total code points: 86
|
||||
|
||||
# ================================================
|
||||
|
||||
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
0D12..0D28 ; Malayalam # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
|
||||
0D2A..0D39 ; Malayalam # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
|
||||
0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
|
||||
0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA
|
||||
0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
|
||||
0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
|
||||
0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
|
||||
0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
|
||||
0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA
|
||||
0D4E ; Malayalam # Lo MALAYALAM LETTER DOT REPH
|
||||
0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK
|
||||
0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
|
||||
0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
|
||||
@ -1042,7 +1061,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
0D79 ; Malayalam # So MALAYALAM DATE MARK
|
||||
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
|
||||
|
||||
# Total code points: 95
|
||||
# Total code points: 98
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1132,16 +1151,17 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
|
||||
0F85 ; Tibetan # Po TIBETAN MARK PALUTA
|
||||
0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
|
||||
0F88..0F8B ; Tibetan # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
|
||||
0F90..0F97 ; Tibetan # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
|
||||
0F88..0F8C ; Tibetan # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN
|
||||
0F8D..0F97 ; Tibetan # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
|
||||
0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
|
||||
0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
|
||||
0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN
|
||||
0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
|
||||
0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
|
||||
0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
|
||||
0FD9..0FDA ; Tibetan # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS
|
||||
|
||||
# Total code points: 201
|
||||
# Total code points: 207
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1201,6 +1221,7 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE
|
||||
# ================================================
|
||||
|
||||
1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
|
||||
302E..302F ; Hangul # Mn [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
|
||||
3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
|
||||
3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
|
||||
@ -1214,7 +1235,7 @@ FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL
|
||||
FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
|
||||
FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
||||
|
||||
# Total code points: 11737
|
||||
# Total code points: 11739
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1234,7 +1255,7 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
|
||||
12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
|
||||
1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
|
||||
1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
|
||||
135F ; Ethiopic # Mn ETHIOPIC COMBINING GEMINATION MARK
|
||||
135D..135F ; Ethiopic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
|
||||
1360 ; Ethiopic # So ETHIOPIC SECTION MARK
|
||||
1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
|
||||
1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
|
||||
@ -1249,8 +1270,13 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
|
||||
2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
|
||||
2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
|
||||
2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
|
||||
AB01..AB06 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO
|
||||
AB09..AB0E ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO
|
||||
AB11..AB16 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
|
||||
AB20..AB26 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
|
||||
AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
|
||||
|
||||
# Total code points: 461
|
||||
# Total code points: 495
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1329,9 +1355,10 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
|
||||
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
|
||||
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
|
||||
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
|
||||
1B001 ; Hiragana # Lo HIRAGANA LETTER ARCHAIC YE
|
||||
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
|
||||
|
||||
# Total code points: 90
|
||||
# Total code points: 91
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1343,15 +1370,17 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
|
||||
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
|
||||
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
|
||||
1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E
|
||||
|
||||
# Total code points: 299
|
||||
# Total code points: 300
|
||||
|
||||
# ================================================
|
||||
|
||||
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
|
||||
3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
|
||||
31A0..31B7 ; Bopomofo # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
|
||||
31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
|
||||
|
||||
# Total code points: 65
|
||||
# Total code points: 70
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1370,9 +1399,10 @@ FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILI
|
||||
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 75738
|
||||
# Total code points: 75960
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1410,6 +1440,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
||||
0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
|
||||
0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
|
||||
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
|
||||
065F ; Inherited # Mn ARABIC WAVY HAMZA BELOW
|
||||
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
|
||||
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
@ -1417,14 +1448,14 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
||||
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
|
||||
1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
|
||||
1DFD..1DFF ; Inherited # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE
|
||||
20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
|
||||
20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
|
||||
302A..302F ; Inherited # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
302A..302D ; Inherited # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
|
||||
3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
|
||||
FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
|
||||
@ -1568,8 +1599,9 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
|
||||
19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
|
||||
19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
|
||||
19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
|
||||
19D0..19DA ; New_Tai_Lue # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
|
||||
19DE..19DF ; New_Tai_Lue # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
|
||||
19D0..19D9 ; New_Tai_Lue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
|
||||
19DA ; New_Tai_Lue # No NEW TAI LUE THAM DIGIT ONE
|
||||
19DE..19DF ; New_Tai_Lue # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
|
||||
|
||||
# Total code points: 83
|
||||
|
||||
@ -1584,8 +1616,10 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
|
||||
|
||||
2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
|
||||
2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
|
||||
2D70 ; Tifinagh # Po TIFINAGH SEPARATOR MARK
|
||||
2D7F ; Tifinagh # Mn TIFINAGH CONSONANT JOINER
|
||||
|
||||
# Total code points: 55
|
||||
# Total code points: 57
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1882,8 +1916,9 @@ A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
|
||||
A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
|
||||
A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
|
||||
A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
|
||||
16800..16A38 ; Bamum # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
|
||||
|
||||
# Total code points: 88
|
||||
# Total code points: 657
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1969,4 +2004,40 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
|
||||
# Total code points: 66
|
||||
|
||||
# ================================================
|
||||
|
||||
1BC0..1BE5 ; Batak # Lo [38] BATAK LETTER A..BATAK LETTER U
|
||||
1BE6 ; Batak # Mn BATAK SIGN TOMPI
|
||||
1BE7 ; Batak # Mc BATAK VOWEL SIGN E
|
||||
1BE8..1BE9 ; Batak # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
|
||||
1BEA..1BEC ; Batak # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
|
||||
1BED ; Batak # Mn BATAK VOWEL SIGN KARO O
|
||||
1BEE ; Batak # Mc BATAK VOWEL SIGN U
|
||||
1BEF..1BF1 ; Batak # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
|
||||
1BF2..1BF3 ; Batak # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN
|
||||
1BFC..1BFF ; Batak # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
|
||||
|
||||
# Total code points: 56
|
||||
|
||||
# ================================================
|
||||
|
||||
11000 ; Brahmi # Mc BRAHMI SIGN CANDRABINDU
|
||||
11001 ; Brahmi # Mn BRAHMI SIGN ANUSVARA
|
||||
11002 ; Brahmi # Mc BRAHMI SIGN VISARGA
|
||||
11003..11037 ; Brahmi # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
|
||||
11038..11046 ; Brahmi # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
|
||||
11047..1104D ; Brahmi # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
|
||||
11052..11065 ; Brahmi # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
|
||||
11066..1106F ; Brahmi # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
|
||||
|
||||
# Total code points: 108
|
||||
|
||||
# ================================================
|
||||
|
||||
0840..0858 ; Mandaic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
|
||||
0859..085B ; Mandaic # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
|
||||
085E ; Mandaic # Po MANDAIC PUNCTUATION
|
||||
|
||||
# Total code points: 29
|
||||
|
||||
# EOF
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
# SpecialCasing-5.1.0.txt
|
||||
# Date: 2008-03-03, 21:58:10 GMT [MD]
|
||||
# SpecialCasing-6.0.0.txt
|
||||
# Date: 2010-05-18, 00:49:39 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2008 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see UCD.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# Special Casing Properties
|
||||
#
|
||||
@ -106,11 +106,11 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
|
||||
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
|
||||
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||||
|
||||
# IMPORTANT-when capitalizing iota-subscript (0345)
|
||||
# It MUST be in normalized form--moved to the end of any sequence of combining marks.
|
||||
# This is because logically it represents a following base character!
|
||||
# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript>
|
||||
# It should never be the first character in a word, so in titlecasing it can be left as is.
|
||||
# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
|
||||
# the result will be incorrect unless the iota-subscript is moved to the end
|
||||
# of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
|
||||
# This process can be achieved by first transforming the text to NFC before casing.
|
||||
# E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
|
||||
|
||||
# The following cases are already in the UnicodeData file, so are only commented here.
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1 +1 @@
|
||||
5.1.0
|
||||
6.0.0
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -262,7 +262,23 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
/**
|
||||
* The Cham range with the Cham digits.
|
||||
*/
|
||||
CHAM ('\uaa50', '\uaa00', '\uaa60');
|
||||
CHAM ('\uaa50', '\uaa00', '\uaa60'),
|
||||
/**
|
||||
* The Tai Tham Hora range with the Tai Tham Hora digits.
|
||||
*/
|
||||
TAI_THAM_HORA ('\u1a80', '\u1a20', '\u1ab0'),
|
||||
/**
|
||||
* The Tai Tham Tham range with the Tai Tham Tham digits.
|
||||
*/
|
||||
TAI_THAM_THAM ('\u1a90', '\u1a20', '\u1ab0'),
|
||||
/**
|
||||
* The Javanese range with the Javanese digits.
|
||||
*/
|
||||
JAVANESE ('\ua9d0', '\ua980', '\ua9e0'),
|
||||
/**
|
||||
* The Meetei Mayek range with the Meetei Mayek digits.
|
||||
*/
|
||||
MEETEI_MAYEK ('\uabf0', '\uabc0', '\uac00');
|
||||
|
||||
private static int toRangeIndex(Range script) {
|
||||
int index = script.ordinal();
|
||||
@ -592,10 +608,16 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x07a6, 0x07b1,
|
||||
0x07eb, 0x07f4,
|
||||
0x07f6, 0x07fa,
|
||||
0x0901, 0x0903,
|
||||
0x0816, 0x081a,
|
||||
0x081b, 0x0824,
|
||||
0x0825, 0x0828,
|
||||
0x0829, 0x0830,
|
||||
0x0859, 0x085e,
|
||||
0x0900, 0x0903,
|
||||
0x093a, 0x093b,
|
||||
0x093c, 0x093d,
|
||||
0x0941, 0x0949,
|
||||
0x094d, 0x0950,
|
||||
0x094d, 0x094e,
|
||||
0x0951, 0x0958,
|
||||
0x0962, 0x0964,
|
||||
0x0981, 0x0982,
|
||||
@ -604,7 +626,7 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x09cd, 0x09ce,
|
||||
0x09e2, 0x09e6,
|
||||
0x09f2, 0x09f4,
|
||||
0x0a01, 0x0a03,
|
||||
0x09fb, 0x0a03,
|
||||
0x0a3c, 0x0a3e,
|
||||
0x0a41, 0x0a59,
|
||||
0x0a70, 0x0a72,
|
||||
@ -630,9 +652,8 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x0cbc, 0x0cbd,
|
||||
0x0ccc, 0x0cd5,
|
||||
0x0ce2, 0x0ce6,
|
||||
0x0cf1, 0x0d02,
|
||||
0x0d41, 0x0d46,
|
||||
0x0d4d, 0x0d57,
|
||||
0x0d4d, 0x0d4e,
|
||||
0x0d62, 0x0d66,
|
||||
0x0dca, 0x0dcf,
|
||||
0x0dd2, 0x0dd8,
|
||||
@ -649,7 +670,7 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x0f71, 0x0f7f,
|
||||
0x0f80, 0x0f85,
|
||||
0x0f86, 0x0f88,
|
||||
0x0f90, 0x0fbe,
|
||||
0x0f8d, 0x0fbe,
|
||||
0x0fc6, 0x0fc7,
|
||||
0x102d, 0x1031,
|
||||
0x1032, 0x1038,
|
||||
@ -661,8 +682,10 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x1082, 0x1083,
|
||||
0x1085, 0x1087,
|
||||
0x108d, 0x108e,
|
||||
0x135f, 0x1360,
|
||||
0x109d, 0x109e,
|
||||
0x135d, 0x1360,
|
||||
0x1390, 0x13a0,
|
||||
0x1400, 0x1401,
|
||||
0x1680, 0x1681,
|
||||
0x169b, 0x16a0,
|
||||
0x1712, 0x1720,
|
||||
@ -682,6 +705,11 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x1939, 0x1946,
|
||||
0x19de, 0x1a00,
|
||||
0x1a17, 0x1a19,
|
||||
0x1a56, 0x1a57,
|
||||
0x1a58, 0x1a61,
|
||||
0x1a62, 0x1a63,
|
||||
0x1a65, 0x1a6d,
|
||||
0x1a73, 0x1a80,
|
||||
0x1b00, 0x1b04,
|
||||
0x1b34, 0x1b35,
|
||||
0x1b36, 0x1b3b,
|
||||
@ -691,8 +719,16 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x1b80, 0x1b82,
|
||||
0x1ba2, 0x1ba6,
|
||||
0x1ba8, 0x1baa,
|
||||
0x1be6, 0x1be7,
|
||||
0x1be8, 0x1bea,
|
||||
0x1bed, 0x1bee,
|
||||
0x1bef, 0x1bf2,
|
||||
0x1c2c, 0x1c34,
|
||||
0x1c36, 0x1c3b,
|
||||
0x1cd0, 0x1cd3,
|
||||
0x1cd4, 0x1ce1,
|
||||
0x1ce2, 0x1ce9,
|
||||
0x1ced, 0x1cee,
|
||||
0x1dc0, 0x1e00,
|
||||
0x1fbd, 0x1fbe,
|
||||
0x1fbf, 0x1fc2,
|
||||
@ -716,14 +752,16 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x213a, 0x213c,
|
||||
0x2140, 0x2145,
|
||||
0x214a, 0x214e,
|
||||
0x2153, 0x2160,
|
||||
0x2190, 0x2336,
|
||||
0x2150, 0x2160,
|
||||
0x2189, 0x2336,
|
||||
0x237b, 0x2395,
|
||||
0x2396, 0x249c,
|
||||
0x24ea, 0x26ac,
|
||||
0x26ad, 0x2800,
|
||||
0x2900, 0x2c00,
|
||||
0x2ce5, 0x2d00,
|
||||
0x2ce5, 0x2ceb,
|
||||
0x2cef, 0x2d00,
|
||||
0x2d7f, 0x2d80,
|
||||
0x2de0, 0x3005,
|
||||
0x3008, 0x3021,
|
||||
0x302a, 0x3031,
|
||||
@ -742,25 +780,40 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x33de, 0x33e0,
|
||||
0x33ff, 0x3400,
|
||||
0x4dc0, 0x4e00,
|
||||
0xa490, 0xa500,
|
||||
0xa490, 0xa4d0,
|
||||
0xa60d, 0xa610,
|
||||
0xa66f, 0xa680,
|
||||
0xa6f0, 0xa6f2,
|
||||
0xa700, 0xa722,
|
||||
0xa788, 0xa789,
|
||||
0xa802, 0xa803,
|
||||
0xa806, 0xa807,
|
||||
0xa80b, 0xa80c,
|
||||
0xa825, 0xa827,
|
||||
0xa828, 0xa840,
|
||||
0xa828, 0xa830,
|
||||
0xa838, 0xa840,
|
||||
0xa874, 0xa880,
|
||||
0xa8c4, 0xa8ce,
|
||||
0xa8e0, 0xa8f2,
|
||||
0xa926, 0xa92e,
|
||||
0xa947, 0xa952,
|
||||
0xa980, 0xa983,
|
||||
0xa9b3, 0xa9b4,
|
||||
0xa9b6, 0xa9ba,
|
||||
0xa9bc, 0xa9bd,
|
||||
0xaa29, 0xaa2f,
|
||||
0xaa31, 0xaa33,
|
||||
0xaa35, 0xaa40,
|
||||
0xaa43, 0xaa44,
|
||||
0xaa4c, 0xaa4d,
|
||||
0xaab0, 0xaab1,
|
||||
0xaab2, 0xaab5,
|
||||
0xaab7, 0xaab9,
|
||||
0xaabe, 0xaac0,
|
||||
0xaac1, 0xaac2,
|
||||
0xabe5, 0xabe6,
|
||||
0xabe8, 0xabe9,
|
||||
0xabed, 0xabf0,
|
||||
0xfb1e, 0xfb1f,
|
||||
0xfb29, 0xfb2a,
|
||||
0xfd3e, 0xfd50,
|
||||
@ -775,12 +828,28 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x1091f, 0x10920,
|
||||
0x10a01, 0x10a10,
|
||||
0x10a38, 0x10a40,
|
||||
0x10b39, 0x10b40,
|
||||
0x10e60, 0x11000,
|
||||
0x11001, 0x11002,
|
||||
0x11038, 0x11047,
|
||||
0x11052, 0x11066,
|
||||
0x11080, 0x11082,
|
||||
0x110b3, 0x110b7,
|
||||
0x110b9, 0x110bb,
|
||||
0x1d167, 0x1d16a,
|
||||
0x1d173, 0x1d183,
|
||||
0x1d185, 0x1d18c,
|
||||
0x1d1aa, 0x1d1ae,
|
||||
0x1d200, 0x1d360,
|
||||
0x1d7ce, 0x20000,
|
||||
0x1d6db, 0x1d6dc,
|
||||
0x1d715, 0x1d716,
|
||||
0x1d74f, 0x1d750,
|
||||
0x1d789, 0x1d78a,
|
||||
0x1d7c3, 0x1d7c4,
|
||||
0x1d7ce, 0x1f110,
|
||||
0x1f300, 0x1f48c,
|
||||
0x1f48d, 0x1f524,
|
||||
0x1f525, 0x20000,
|
||||
0xe0001, 0xf0000,
|
||||
0x10fffe, 0x10ffff // sentinel
|
||||
};
|
||||
@ -947,6 +1016,14 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
&& rangeSet.contains(Range.ARABIC)) {
|
||||
rangeSet.remove(Range.ARABIC);
|
||||
}
|
||||
|
||||
// As well as the above case, give precedance to TAI_THAM_THAM if both
|
||||
// TAI_THAM_HORA and TAI_THAM_THAM are specified.
|
||||
if (rangeSet.contains(Range.TAI_THAM_THAM)
|
||||
&& rangeSet.contains(Range.TAI_THAM_HORA)) {
|
||||
rangeSet.remove(Range.TAI_THAM_HORA);
|
||||
}
|
||||
|
||||
rangeArray = rangeSet.toArray(new Range[rangeSet.size()]);
|
||||
if (rangeArray.length > BSEARCH_THRESHOLD) {
|
||||
// sort rangeArray for binary search
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1472,19 +1472,9 @@ public final class NormalizerImpl {
|
||||
}
|
||||
--remove;
|
||||
}
|
||||
} else if(value2!=0) {
|
||||
/* the composition is longer than the starter,
|
||||
* move the intermediate characters back one */
|
||||
} else if(value2!=0) { // for U+1109A, U+1109C, and U+110AB
|
||||
starterIsSupplementary=true;
|
||||
/* temporarily increment for the loop boundary */
|
||||
++starter;
|
||||
q=remove;
|
||||
r=++remove;
|
||||
while(starter<q) {
|
||||
args.source[--r]=args.source[--q];
|
||||
}
|
||||
args.source[starter]=(char)value2;
|
||||
--starter; /* undo the temporary increment */
|
||||
args.source[starter+1]=(char)value2;
|
||||
/* } else { both are on the BMP, nothing more to do */
|
||||
}
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -23,8 +23,8 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 6842557 6943963
|
||||
* @summary confirm that shaping works as expected. (Mainly for new characters which were added in Unicode 5)
|
||||
* @bug 6842557 6943963 6959267
|
||||
* @summary confirm that shaping works as expected. (Mainly for new characters which were added in Unicode 5 and 6)
|
||||
* used where appropriate.
|
||||
*/
|
||||
|
||||
@ -39,6 +39,7 @@ public class ShapingTest {
|
||||
public static void main(String[] args) {
|
||||
test6842557();
|
||||
test6943963();
|
||||
test6903266();
|
||||
|
||||
if (err) {
|
||||
throw new RuntimeException("shape() returned unexpected value.");
|
||||
@ -109,6 +110,34 @@ public class ShapingTest {
|
||||
checkResult("Range.ARABIC, Range.EASTERN_ARABIC", ns, given, expected_EASTERN_ARABIC);
|
||||
}
|
||||
|
||||
private static void test6903266() {
|
||||
NumericShaper ns = getContextualShaper(EnumSet.of(Range.TAI_THAM_HORA));
|
||||
String given = "\u1a20 012";
|
||||
String expected = "\u1a20 \u1a80\u1a81\u1a82";
|
||||
checkResult("Range.TAI_THAM_HORA", ns, given, expected);
|
||||
|
||||
ns = getContextualShaper(EnumSet.of(Range.TAI_THAM_HORA,
|
||||
Range.TAI_THAM_THAM));
|
||||
given = "\u1a20 012";
|
||||
expected = "\u1a20 \u1a90\u1a91\u1a92"; // Tham digits are prioritized.
|
||||
checkResult("Range.TAI_THAM_HORA, Range.TAI_THAM_THAM", ns, given, expected);
|
||||
|
||||
ns = getContextualShaper(EnumSet.of(Range.JAVANESE));
|
||||
given = "\ua984 012";
|
||||
expected = "\ua984 \ua9d0\ua9d1\ua9d2";
|
||||
checkResult("Range.JAVANESE", ns, given, expected);
|
||||
|
||||
ns = getContextualShaper(EnumSet.of(Range.TAI_THAM_THAM));
|
||||
given = "\u1a20 012";
|
||||
expected = "\u1a20 \u1a90\u1a91\u1a92";
|
||||
checkResult("Range.TAI_THAM_THAM", ns, given, expected);
|
||||
|
||||
ns = getContextualShaper(EnumSet.of(Range.MEETEI_MAYEK));
|
||||
given = "\uabc0 012";
|
||||
expected = "\uabc0 \uabf0\uabf1\uabf2";
|
||||
checkResult("Range.MEETEI_MAYEK", ns, given, expected);
|
||||
}
|
||||
|
||||
private static void checkResult(String ranges, NumericShaper ns,
|
||||
String given, String expected) {
|
||||
char[] text = given.toCharArray();
|
||||
|
||||
@ -1,8 +1,7 @@
|
||||
/**
|
||||
* @test
|
||||
* @bug 6945564
|
||||
* @bug 6945564 6959267
|
||||
* @summary Check that the j.l.Character.UnicodeScript
|
||||
* @ignore don't run until #6903266 is integrated
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
@ -15,11 +14,15 @@ public class CheckScript {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
if (args.length != 1) {
|
||||
System.out.println("java CharacterScript script.txt");
|
||||
System.exit(1);
|
||||
BufferedReader sbfr = null;
|
||||
if (args.length == 0) {
|
||||
sbfr = new BufferedReader(new FileReader(new File(System.getProperty("test.src", "."), "Scripts.txt")));
|
||||
} else if (args.length == 1) {
|
||||
sbfr = new BufferedReader(new FileReader(args[0]));
|
||||
} else {
|
||||
System.out.println("java CharacterScript Scripts.txt");
|
||||
throw new RuntimeException("Datafile name should be specified.");
|
||||
}
|
||||
BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
|
||||
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
|
||||
String line = null;
|
||||
HashMap<String,ArrayList<Integer>> scripts = new HashMap<>();
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
# Scripts-5.2.0.txt
|
||||
# Date: 2009-08-22, 04:58:43 GMT [MD]
|
||||
# Scripts-6.0.0.txt
|
||||
# Date: 2010-08-19, 00:48:47 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
@ -73,7 +73,7 @@
|
||||
02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
|
||||
02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
|
||||
02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
|
||||
02E5..02EB ; Common # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
|
||||
02E5..02E9 ; Common # Sk [5] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER EXTRA-LOW TONE BAR
|
||||
02EC ; Common # Lm MODIFIER LETTER VOICING
|
||||
02ED ; Common # Sk MODIFIER LETTER UNASPIRATED
|
||||
02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE
|
||||
@ -83,7 +83,6 @@
|
||||
0385 ; Common # Sk GREEK DIALYTIKA TONOS
|
||||
0387 ; Common # Po GREEK ANO TELEIA
|
||||
0589 ; Common # Po ARMENIAN FULL STOP
|
||||
0600..0603 ; Common # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
|
||||
060C ; Common # Po ARABIC COMMA
|
||||
061B ; Common # Po ARABIC SEMICOLON
|
||||
061F ; Common # Po ARABIC QUESTION MARK
|
||||
@ -92,7 +91,6 @@
|
||||
06DD ; Common # Cf ARABIC END OF AYAH
|
||||
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN
|
||||
0CF1..0CF2 ; Common # So [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
|
||||
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
|
||||
10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR
|
||||
@ -148,7 +146,7 @@
|
||||
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
|
||||
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
|
||||
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
|
||||
20A0..20B8 ; Common # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN
|
||||
20A0..20B9 ; Common # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN
|
||||
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
|
||||
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
|
||||
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
|
||||
@ -157,7 +155,8 @@
|
||||
210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
|
||||
2114 ; Common # So L B BAR SYMBOL
|
||||
2115 ; Common # L& DOUBLE-STRUCK CAPITAL N
|
||||
2116..2118 ; Common # So [3] NUMERO SIGN..SCRIPT CAPITAL P
|
||||
2116..2117 ; Common # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT
|
||||
2118 ; Common # Sm SCRIPT CAPITAL P
|
||||
2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
|
||||
211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE
|
||||
2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z
|
||||
@ -213,7 +212,7 @@
|
||||
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
|
||||
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
|
||||
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
|
||||
23E2..23E8 ; Common # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
|
||||
23E2..23F3 ; Common # So [18] WHITE TRAPEZIUM..HOURGLASS WITH FLOWING SAND
|
||||
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
|
||||
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
|
||||
@ -227,18 +226,8 @@
|
||||
25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
|
||||
2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
|
||||
266F ; Common # Sm MUSIC SHARP SIGN
|
||||
2670..26CD ; Common # So [94] WEST SYRIAC CROSS..DISABLED CAR
|
||||
26CF..26E1 ; Common # So [19] PICK..RESTRICTED LEFT ENTRY-2
|
||||
26E3 ; Common # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
|
||||
26E8..26FF ; Common # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||||
2701..2704 ; Common # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
|
||||
2706..2709 ; Common # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
|
||||
270C..2727 ; Common # So [28] VICTORY HAND..WHITE FOUR POINTED STAR
|
||||
2729..274B ; Common # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
|
||||
274D ; Common # So SHADOWED WHITE CIRCLE
|
||||
274F..2752 ; Common # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
|
||||
2756..275E ; Common # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
|
||||
2761..2767 ; Common # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
|
||||
2670..26FF ; Common # So [144] WEST SYRIAC CROSS..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||||
2701..2767 ; Common # So [103] UPPER BLADE SCISSORS..ROTATED FLORAL HEART BULLET
|
||||
2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
|
||||
2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
|
||||
276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
|
||||
@ -254,15 +243,13 @@
|
||||
2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
|
||||
2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
|
||||
2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
|
||||
2794 ; Common # So HEAVY WIDE-HEADED RIGHTWARDS ARROW
|
||||
2798..27AF ; Common # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
|
||||
27B1..27BE ; Common # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
|
||||
2794..27BF ; Common # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP
|
||||
27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
|
||||
27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER
|
||||
27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER
|
||||
27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE
|
||||
27CC ; Common # Sm LONG DIVISION
|
||||
27D0..27E5 ; Common # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK
|
||||
27CE..27E5 ; Common # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK
|
||||
27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
|
||||
27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
|
||||
27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET
|
||||
@ -555,27 +542,51 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
|
||||
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
|
||||
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
|
||||
1F0A0..1F0AE ; Common # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
|
||||
1F0B1..1F0BE ; Common # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS
|
||||
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
|
||||
1F0D1..1F0DF ; Common # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER
|
||||
1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
|
||||
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
|
||||
1F131 ; Common # So SQUARED LATIN CAPITAL LETTER B
|
||||
1F13D ; Common # So SQUARED LATIN CAPITAL LETTER N
|
||||
1F13F ; Common # So SQUARED LATIN CAPITAL LETTER P
|
||||
1F142 ; Common # So SQUARED LATIN CAPITAL LETTER S
|
||||
1F146 ; Common # So SQUARED LATIN CAPITAL LETTER W
|
||||
1F14A..1F14E ; Common # So [5] SQUARED HV..SQUARED PPV
|
||||
1F157 ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H
|
||||
1F15F ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P
|
||||
1F179 ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER J
|
||||
1F17B..1F17C ; Common # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
|
||||
1F17F ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER P
|
||||
1F18A..1F18D ; Common # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
|
||||
1F190 ; Common # So SQUARE DJ
|
||||
1F210..1F231 ; Common # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
|
||||
1F130..1F169 ; Common # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
|
||||
1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
|
||||
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
|
||||
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
|
||||
1F210..1F23A ; Common # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
|
||||
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
||||
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
1F300..1F320 ; Common # So [33] CYCLONE..SHOOTING STAR
|
||||
1F330..1F335 ; Common # So [6] CHESTNUT..CACTUS
|
||||
1F337..1F37C ; Common # So [70] TULIP..BABY BOTTLE
|
||||
1F380..1F393 ; Common # So [20] RIBBON..GRADUATION CAP
|
||||
1F3A0..1F3C4 ; Common # So [37] CAROUSEL HORSE..SURFER
|
||||
1F3C6..1F3CA ; Common # So [5] TROPHY..SWIMMER
|
||||
1F3E0..1F3F0 ; Common # So [17] HOUSE BUILDING..EUROPEAN CASTLE
|
||||
1F400..1F43E ; Common # So [63] RAT..PAW PRINTS
|
||||
1F440 ; Common # So EYES
|
||||
1F442..1F4F7 ; Common # So [182] EAR..CAMERA
|
||||
1F4F9..1F4FC ; Common # So [4] VIDEO CAMERA..VIDEOCASSETTE
|
||||
1F500..1F53D ; Common # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE
|
||||
1F550..1F567 ; Common # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
|
||||
1F5FB..1F5FF ; Common # So [5] MOUNT FUJI..MOYAI
|
||||
1F601..1F610 ; Common # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE
|
||||
1F612..1F614 ; Common # So [3] UNAMUSED FACE..PENSIVE FACE
|
||||
1F616 ; Common # So CONFOUNDED FACE
|
||||
1F618 ; Common # So FACE THROWING A KISS
|
||||
1F61A ; Common # So KISSING FACE WITH CLOSED EYES
|
||||
1F61C..1F61E ; Common # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE
|
||||
1F620..1F625 ; Common # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE
|
||||
1F628..1F62B ; Common # So [4] FEARFUL FACE..TIRED FACE
|
||||
1F62D ; Common # So LOUDLY CRYING FACE
|
||||
1F630..1F633 ; Common # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE
|
||||
1F635..1F640 ; Common # So [12] DIZZY FACE..WEARY CAT FACE
|
||||
1F645..1F64F ; Common # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS
|
||||
1F680..1F6C5 ; Common # So [70] ROCKET..LEFT LUGGAGE
|
||||
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
E0001 ; Common # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 5395
|
||||
# Total code points: 6379
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -603,7 +614,7 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
|
||||
2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I
|
||||
207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N
|
||||
2090..2094 ; Latin # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
|
||||
2090..209C ; Latin # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
|
||||
212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN
|
||||
2132 ; Latin # L& TURNED CAPITAL F
|
||||
214E ; Latin # L& TURNED SMALL F
|
||||
@ -616,13 +627,16 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
|
||||
A770 ; Latin # Lm MODIFIER LETTER US
|
||||
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
|
||||
A78B..A78C ; Latin # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO
|
||||
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||||
A790..A791 ; Latin # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER
|
||||
A7A0..A7A9 ; Latin # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE
|
||||
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
|
||||
A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
|
||||
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
|
||||
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
|
||||
# Total code points: 1244
|
||||
# Total code points: 1267
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -687,12 +701,11 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
|
||||
0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
|
||||
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
|
||||
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
|
||||
048A..0525 ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
|
||||
048A..0527 ; Cyrillic # L& [158] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
|
||||
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
|
||||
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
|
||||
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
A640..A65F ; Cyrillic # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN
|
||||
A662..A66D ; Cyrillic # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
|
||||
A640..A66D ; Cyrillic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
|
||||
A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O
|
||||
A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET
|
||||
A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
|
||||
@ -702,7 +715,7 @@ A67E ; Cyrillic # Po CYRILLIC KAVYKA
|
||||
A67F ; Cyrillic # Lm CYRILLIC PAYEROK
|
||||
A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
|
||||
|
||||
# Total code points: 404
|
||||
# Total code points: 408
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -744,6 +757,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
|
||||
# ================================================
|
||||
|
||||
0600..0603 ; Arabic # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
|
||||
0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
|
||||
0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
|
||||
060B ; Arabic # Sc AFGHANI SIGN
|
||||
@ -751,7 +765,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
|
||||
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
|
||||
0621..063F ; Arabic # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
|
||||
0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS
|
||||
066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
|
||||
@ -760,7 +774,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
06D4 ; Arabic # Po ARABIC FULL STOP
|
||||
06D5 ; Arabic # Lo ARABIC LETTER AE
|
||||
06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
|
||||
06DE ; Arabic # Me ARABIC START OF RUB EL HIZB
|
||||
06DE ; Arabic # So ARABIC START OF RUB EL HIZB
|
||||
06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
|
||||
06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
|
||||
06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
|
||||
@ -773,6 +787,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
|
||||
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
|
||||
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
|
||||
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
|
||||
FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
|
||||
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
|
||||
@ -782,7 +797,7 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN
|
||||
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||||
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
|
||||
|
||||
# Total code points: 1030
|
||||
# Total code points: 1051
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -809,27 +824,29 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
||||
0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA
|
||||
0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
|
||||
093A ; Devanagari # Mn DEVANAGARI VOWEL SIGN OE
|
||||
093B ; Devanagari # Mc DEVANAGARI VOWEL SIGN OOE
|
||||
093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA
|
||||
093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA
|
||||
093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
|
||||
0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
|
||||
094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA
|
||||
094E ; Devanagari # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
|
||||
094E..094F ; Devanagari # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
|
||||
0950 ; Devanagari # Lo DEVANAGARI OM
|
||||
0953..0955 ; Devanagari # Mn [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E
|
||||
0953..0957 ; Devanagari # Mn [5] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN UUE
|
||||
0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
|
||||
0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
|
||||
0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT
|
||||
0972 ; Devanagari # Lo DEVANAGARI LETTER CANDRA A
|
||||
0972..0977 ; Devanagari # Lo [6] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER UUE
|
||||
0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
|
||||
A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
|
||||
A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
|
||||
A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
|
||||
# Total code points: 140
|
||||
# Total code points: 150
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -941,8 +958,9 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
|
||||
0B70 ; Oriya # So ORIYA ISSHAR
|
||||
0B71 ; Oriya # Lo ORIYA LETTER WA
|
||||
0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
|
||||
|
||||
# Total code points: 84
|
||||
# Total code points: 90
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1018,22 +1036,23 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
|
||||
0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
|
||||
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
|
||||
# Total code points: 84
|
||||
# Total code points: 86
|
||||
|
||||
# ================================================
|
||||
|
||||
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
0D12..0D28 ; Malayalam # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
|
||||
0D2A..0D39 ; Malayalam # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
|
||||
0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
|
||||
0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA
|
||||
0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
|
||||
0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
|
||||
0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
|
||||
0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
|
||||
0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA
|
||||
0D4E ; Malayalam # Lo MALAYALAM LETTER DOT REPH
|
||||
0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK
|
||||
0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
|
||||
0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
|
||||
@ -1042,7 +1061,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
0D79 ; Malayalam # So MALAYALAM DATE MARK
|
||||
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
|
||||
|
||||
# Total code points: 95
|
||||
# Total code points: 98
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1132,16 +1151,17 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
|
||||
0F85 ; Tibetan # Po TIBETAN MARK PALUTA
|
||||
0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
|
||||
0F88..0F8B ; Tibetan # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
|
||||
0F90..0F97 ; Tibetan # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
|
||||
0F88..0F8C ; Tibetan # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN
|
||||
0F8D..0F97 ; Tibetan # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
|
||||
0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
|
||||
0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
|
||||
0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN
|
||||
0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
|
||||
0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
|
||||
0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
|
||||
0FD9..0FDA ; Tibetan # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS
|
||||
|
||||
# Total code points: 201
|
||||
# Total code points: 207
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1201,6 +1221,7 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE
|
||||
# ================================================
|
||||
|
||||
1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
|
||||
302E..302F ; Hangul # Mn [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
|
||||
3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
|
||||
3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
|
||||
@ -1214,7 +1235,7 @@ FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL
|
||||
FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
|
||||
FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
||||
|
||||
# Total code points: 11737
|
||||
# Total code points: 11739
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1234,7 +1255,7 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
|
||||
12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
|
||||
1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
|
||||
1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
|
||||
135F ; Ethiopic # Mn ETHIOPIC COMBINING GEMINATION MARK
|
||||
135D..135F ; Ethiopic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
|
||||
1360 ; Ethiopic # So ETHIOPIC SECTION MARK
|
||||
1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
|
||||
1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
|
||||
@ -1249,8 +1270,13 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
|
||||
2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
|
||||
2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
|
||||
2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
|
||||
AB01..AB06 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO
|
||||
AB09..AB0E ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO
|
||||
AB11..AB16 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
|
||||
AB20..AB26 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
|
||||
AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
|
||||
|
||||
# Total code points: 461
|
||||
# Total code points: 495
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1329,9 +1355,10 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
|
||||
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
|
||||
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
|
||||
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
|
||||
1B001 ; Hiragana # Lo HIRAGANA LETTER ARCHAIC YE
|
||||
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
|
||||
|
||||
# Total code points: 90
|
||||
# Total code points: 91
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1343,15 +1370,17 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
|
||||
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
|
||||
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
|
||||
1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E
|
||||
|
||||
# Total code points: 299
|
||||
# Total code points: 300
|
||||
|
||||
# ================================================
|
||||
|
||||
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
|
||||
3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
|
||||
31A0..31B7 ; Bopomofo # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
|
||||
31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
|
||||
|
||||
# Total code points: 65
|
||||
# Total code points: 70
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1370,9 +1399,10 @@ FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILI
|
||||
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 75738
|
||||
# Total code points: 75960
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1410,6 +1440,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
||||
0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
|
||||
0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
|
||||
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
|
||||
065F ; Inherited # Mn ARABIC WAVY HAMZA BELOW
|
||||
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
|
||||
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
@ -1417,14 +1448,14 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
||||
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
|
||||
1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
|
||||
1DFD..1DFF ; Inherited # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE
|
||||
20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
|
||||
20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
|
||||
302A..302F ; Inherited # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
302A..302D ; Inherited # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
|
||||
3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
|
||||
FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
|
||||
@ -1568,8 +1599,9 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
|
||||
19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
|
||||
19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
|
||||
19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
|
||||
19D0..19DA ; New_Tai_Lue # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
|
||||
19DE..19DF ; New_Tai_Lue # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
|
||||
19D0..19D9 ; New_Tai_Lue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
|
||||
19DA ; New_Tai_Lue # No NEW TAI LUE THAM DIGIT ONE
|
||||
19DE..19DF ; New_Tai_Lue # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
|
||||
|
||||
# Total code points: 83
|
||||
|
||||
@ -1584,8 +1616,10 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
|
||||
|
||||
2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
|
||||
2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
|
||||
2D70 ; Tifinagh # Po TIFINAGH SEPARATOR MARK
|
||||
2D7F ; Tifinagh # Mn TIFINAGH CONSONANT JOINER
|
||||
|
||||
# Total code points: 55
|
||||
# Total code points: 57
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1882,8 +1916,9 @@ A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
|
||||
A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
|
||||
A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
|
||||
A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
|
||||
16800..16A38 ; Bamum # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
|
||||
|
||||
# Total code points: 88
|
||||
# Total code points: 657
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1969,4 +2004,40 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
|
||||
# Total code points: 66
|
||||
|
||||
# ================================================
|
||||
|
||||
1BC0..1BE5 ; Batak # Lo [38] BATAK LETTER A..BATAK LETTER U
|
||||
1BE6 ; Batak # Mn BATAK SIGN TOMPI
|
||||
1BE7 ; Batak # Mc BATAK VOWEL SIGN E
|
||||
1BE8..1BE9 ; Batak # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
|
||||
1BEA..1BEC ; Batak # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
|
||||
1BED ; Batak # Mn BATAK VOWEL SIGN KARO O
|
||||
1BEE ; Batak # Mc BATAK VOWEL SIGN U
|
||||
1BEF..1BF1 ; Batak # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
|
||||
1BF2..1BF3 ; Batak # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN
|
||||
1BFC..1BFF ; Batak # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
|
||||
|
||||
# Total code points: 56
|
||||
|
||||
# ================================================
|
||||
|
||||
11000 ; Brahmi # Mc BRAHMI SIGN CANDRABINDU
|
||||
11001 ; Brahmi # Mn BRAHMI SIGN ANUSVARA
|
||||
11002 ; Brahmi # Mc BRAHMI SIGN VISARGA
|
||||
11003..11037 ; Brahmi # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
|
||||
11038..11046 ; Brahmi # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
|
||||
11047..1104D ; Brahmi # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
|
||||
11052..11065 ; Brahmi # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
|
||||
11066..1106F ; Brahmi # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
|
||||
|
||||
# Total code points: 108
|
||||
|
||||
# ================================================
|
||||
|
||||
0840..0858 ; Mandaic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
|
||||
0859..085B ; Mandaic # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
|
||||
085E ; Mandaic # Po MANDAIC PUNCTUATION
|
||||
|
||||
# Total code points: 29
|
||||
|
||||
# EOF
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user