From a04d1334a968649b1da36eb640d5d9d35eb3f29d Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sun, 21 Apr 2024 19:46:29 +0200 Subject: Add uprop_get_wb() --- data/WordBreakProperty | 1468 +++++++++++++++++++++++++++++++++++++++ gen/data-files | 1 + gen/prop/wb | 86 +++ include/unicode/prop.h | 27 + lib/unicode/prop/uprop_get_wb.c | 1117 +++++++++++++++++++++++++++++ 5 files changed, 2699 insertions(+) create mode 100644 data/WordBreakProperty create mode 100755 gen/prop/wb create mode 100644 lib/unicode/prop/uprop_get_wb.c diff --git a/data/WordBreakProperty b/data/WordBreakProperty new file mode 100644 index 0000000..302a276 --- /dev/null +++ b/data/WordBreakProperty @@ -0,0 +1,1468 @@ +# WordBreakProperty-15.1.0.txt +# Date: 2023-03-31, 03:19:05 GMT +# © 2023 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ + +# ================================================ + +# Property: Word_Break + +# All code points not explicitly listed for Word_Break +# have the value Other (XX). + +# @missing: 0000..10FFFF; Other + +# ================================================ + +0022 ; Double_Quote # Po QUOTATION MARK + +# Total code points: 1 + +# ================================================ + +0027 ; Single_Quote # Po APOSTROPHE + +# Total code points: 1 + +# ================================================ + +05D0..05EA ; Hebrew_Letter # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; Hebrew_Letter # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +FB1D ; Hebrew_Letter # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; Hebrew_Letter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; Hebrew_Letter # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; Hebrew_Letter # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; Hebrew_Letter # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; Hebrew_Letter # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; Hebrew_Letter # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED + +# Total code points: 75 + +# ================================================ + +000D ; CR # Cc + +# Total code points: 1 + +# ================================================ + +000A ; LF # Cc + +# Total code points: 1 + +# ================================================ + +000B..000C ; Newline # Cc [2] .. +0085 ; Newline # Cc +2028 ; Newline # Zl LINE SEPARATOR +2029 ; Newline # Zp PARAGRAPH SEPARATOR + +# Total code points: 5 + +# ================================================ + +0300..036F ; Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; Extend # Mn HEBREW POINT RAFE +05C1..05C2 ; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Extend # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Extend # Mn NKO DANTAYALAN +0816..0819 ; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +0903 ; Extend # Mc DEVANAGARI SIGN VISARGA +093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE +093B ; Extend # Mc DEVANAGARI VOWEL SIGN OOE +093C ; Extend # Mn DEVANAGARI SIGN NUKTA +093E..0940 ; Extend # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Extend # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; Extend # Mn DEVANAGARI SIGN VIRAMA +094E..094F ; Extend # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0951..0957 ; Extend # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Extend # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Extend # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BC ; Extend # Mn BENGALI SIGN NUKTA +09BE..09C0 ; Extend # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Extend # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Extend # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; Extend # Mn BENGALI SIGN VIRAMA +09D7 ; Extend # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; Extend # Mn BENGALI SANDHI MARK +0A01..0A02 ; Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Extend # Mc GURMUKHI SIGN VISARGA +0A3C ; Extend # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; Extend # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Extend # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Extend # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Extend # Mc GUJARATI SIGN VISARGA +0ABC ; Extend # Mn GUJARATI SIGN NUKTA +0ABE..0AC0 ; Extend # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Extend # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Extend # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; Extend # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Extend # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B3C ; Extend # Mn ORIYA SIGN NUKTA +0B3E ; Extend # Mc ORIYA VOWEL SIGN AA +0B3F ; Extend # Mn ORIYA VOWEL SIGN I +0B40 ; Extend # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; Extend # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; Extend # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Extend # Mn TAMIL SIGN ANUSVARA +0BBE..0BBF ; Extend # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Extend # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Extend # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Extend # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Extend # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; Extend # Mn TAMIL SIGN VIRAMA +0BD7 ; Extend # Mc TAMIL AU LENGTH MARK +0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Extend # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; Extend # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Extend # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Extend # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Extend # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBC ; Extend # Mn KANNADA SIGN NUKTA +0CBE ; Extend # Mc KANNADA VOWEL SIGN AA +0CBF ; Extend # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Extend # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Extend # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Extend # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Extend # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CF3 ; Extend # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3E..0D40 ; Extend # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Extend # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Extend # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA +0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Extend # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; Extend # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; Extend # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Extend # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Extend # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E31 ; Extend # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; Extend # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECE ; Extend # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0F18..0F19 ; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Extend # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; Extend # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F71..0F7E ; Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Extend # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; Extend # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; Extend # Mn TIBETAN SYMBOL PADMA GDAN +102B..102C ; Extend # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Extend # Mc MYANMAR VOWEL SIGN E +1032..1037 ; Extend # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; Extend # Mc MYANMAR SIGN VISARGA +1039..103A ; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; Extend # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Extend # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1056..1057 ; Extend # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Extend # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1062..1064 ; Extend # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1067..106D ; Extend # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +1071..1074 ; Extend # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Extend # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Extend # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Extend # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; Extend # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Extend # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109C ; Extend # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; Extend # Mc TAGALOG SIGN PAMUDPOD +1732..1733 ; Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; Extend # Mc HANUNOO SIGN PAMUDPOD +1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; Extend # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Extend # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Extend # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Extend # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Extend # Mn KHMER SIGN ATTHACAN +180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Extend # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Extend # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Extend # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Extend # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Extend # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Extend # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Extend # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Extend # Mn BUGINESE VOWEL SIGN AE +1A55 ; Extend # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Extend # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Extend # Mn TAI THAM SIGN SAKOT +1A61 ; Extend # Mc TAI THAM VOWEL SIGN A +1A62 ; Extend # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Extend # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Extend # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Extend # Mc BALINESE SIGN BISAH +1B34 ; Extend # Mn BALINESE SIGN REREKAN +1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Extend # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Extend # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Extend # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Extend # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; Extend # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B6B..1B73 ; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Extend # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; Extend # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Extend # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; Extend # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; Extend # Mn BATAK SIGN TOMPI +1BE7 ; Extend # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Extend # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Extend # Mn BATAK VOWEL SIGN KARO O +1BEE ; Extend # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Extend # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; Extend # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C24..1C2B ; Extend # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Extend # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; Extend # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE +1CF7 ; Extend # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C ; Extend # Cf ZERO WIDTH NON-JOINER +20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; Extend # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; Extend # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA +A823..A824 ; Extend # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO +A82C ; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; Extend # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; Extend # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Extend # Mc JAVANESE SIGN WIGNYAN +A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; Extend # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Extend # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; Extend # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9C0 ; Extend # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9E5 ; Extend # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Extend # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Extend # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Extend # Mc CHAM CONSONANT SIGN FINAL H +AA7B ; Extend # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Extend # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Extend # Mc MYANMAR SIGN TAI LAING TONE-5 +AAB0 ; Extend # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEB ; Extend # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Extend # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA +ABE3..ABE4 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEC ; Extend # Mc MEETEI MAYEK LUM IYEK +ABED ; Extend # Mn MEETEI MAYEK APUN IYEK +FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +101FD ; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Extend # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Extend # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU +11001 ; Extend # Mn BRAHMI SIGN ANUSVARA +11002 ; Extend # Mc BRAHMI SIGN VISARGA +11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; Extend # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +11082 ; Extend # Mc KAITHI SIGN VISARGA +110B0..110B2 ; Extend # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Extend # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; Extend # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Extend # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11145..11146 ; Extend # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11173 ; Extend # Mn MAHAJANI SIGN NUKTA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Extend # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Extend # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; Extend # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CE ; Extend # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU +1122C..1122E ; Extend # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Extend # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Extend # Mn KHOJKI SIGN ANUSVARA +11235 ; Extend # Mc KHOJKI SIGN VIRAMA +11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Extend # Mn KHOJKI SIGN SUKUN +11241 ; Extend # Mn KHOJKI VOWEL SIGN VOCALIC R +112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Extend # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Extend # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133B..1133C ; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133E..1133F ; Extend # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Extend # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Extend # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Extend # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; Extend # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11357 ; Extend # Mc GRANTHA AU LENGTH MARK +11362..11363 ; Extend # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11435..11437 ; Extend # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Extend # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; Extend # Mc NEWA SIGN VISARGA +11446 ; Extend # Mn NEWA SIGN NUKTA +1145E ; Extend # Mn NEWA SANDHI MARK +114B0..114B2 ; Extend # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Extend # Mc TIRHUTA VOWEL SIGN E +114BA ; Extend # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Extend # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Extend # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Extend # Mc TIRHUTA SIGN VISARGA +114C2..114C3 ; Extend # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115AF..115B1 ; Extend # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Extend # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Extend # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Extend # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Extend # Mc SIDDHAM SIGN VISARGA +115BF..115C0 ; Extend # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; Extend # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11630..11632 ; Extend # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Extend # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Extend # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Extend # Mn MODI SIGN ANUSVARA +1163E ; Extend # Mc MODI SIGN VISARGA +1163F..11640 ; Extend # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AC ; Extend # Mc TAKRI SIGN VISARGA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Extend # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Extend # Mc TAKRI SIGN VIRAMA +116B7 ; Extend # Mn TAKRI SIGN NUKTA +1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Extend # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Extend # Mc AHOM VOWEL SIGN E +11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182C..1182E ; Extend # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; Extend # Mc DOGRA SIGN VISARGA +11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +11930..11935 ; Extend # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; Extend # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; Extend # Mc DIVES AKURU SIGN HALANTA +1193E ; Extend # Mn DIVES AKURU VIRAMA +11940 ; Extend # Mc DIVES AKURU MEDIAL YA +11942 ; Extend # Mc DIVES AKURU MEDIAL RA +11943 ; Extend # Mn DIVES AKURU SIGN NUKTA +119D1..119D3 ; Extend # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; Extend # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E0 ; Extend # Mn NANDINAGARI SIGN VIRAMA +119E4 ; Extend # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A01..11A0A ; Extend # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Extend # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Extend # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Extend # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Extend # Mc BHAIKSUKI SIGN VISARGA +11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Extend # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Extend # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Extend # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Extend # Mn MASARAM GONDI RA-KARA +11D8A..11D8E ; Extend # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; Extend # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; Extend # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; Extend # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; Extend # Mc GUNJALA GONDI SIGN VISARGA +11D97 ; Extend # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; Extend # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; Extend # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F03 ; Extend # Mc KAWI SIGN VISARGA +11F34..11F35 ; Extend # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; Extend # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; Extend # Mn KAWI VOWEL SIGN EU +11F41 ; Extend # Mc KAWI SIGN KILLER +11F42 ; Extend # Mn KAWI CONJOINER +13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F51..16F87 ; Extend # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E130..1E136 ; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Extend # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1F3FB..1F3FF ; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2554 + +# ================================================ + +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# ================================================ + +00AD ; Format # Cf SOFT HYPHEN +061C ; Format # Cf ARABIC LETTER MARK +180E ; Format # Cf MONGOLIAN VOWEL SEPARATOR +200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS +2066..206F ; Format # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE +FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +13430..1343F ; Format # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +1BCA0..1BCA3 ; Format # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0001 ; Format # Cf LANGUAGE TAG + +# Total code points: 58 + +# ================================================ + +3031..3035 ; Katakana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +309B..309C ; Katakana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; Katakana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; Katakana # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO +31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +32D0..32FE ; Katakana # So [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO +3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO +FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; Katakana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +1AFF0..1AFF3 ; Katakana # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Katakana # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Katakana # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E +1B120..1B122 ; Katakana # Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU +1B155 ; Katakana # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; Katakana # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N + +# Total code points: 331 + +# ================================================ + +0041..005A ; ALetter # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; ALetter # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; ALetter # Lo FEMININE ORDINAL INDICATOR +00B5 ; ALetter # L& MICRO SIGN +00BA ; ALetter # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; ALetter # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; ALetter # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; ALetter # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; ALetter # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02D7 ; ALetter # Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN +02DE..02DF ; ALetter # Sk [2] MODIFIER LETTER RHOTIC HOOK..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; ALetter # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; ALetter # Lm MODIFIER LETTER VOICING +02ED ; ALetter # Sk MODIFIER LETTER UNASPIRATED +02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; ALetter # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0370..0373 ; ALetter # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; ALetter # Lm GREEK NUMERAL SIGN +0376..0377 ; ALetter # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; ALetter # Lm GREEK YPOGEGRAMMENI +037B..037D ; ALetter # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; ALetter # L& GREEK CAPITAL LETTER YOT +0386 ; ALetter # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; ALetter # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; ALetter # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; ALetter # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..052F ; ALetter # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055C ; ALetter # Po [3] ARMENIAN APOSTROPHE..ARMENIAN EXCLAMATION MARK +055E ; ALetter # Po ARMENIAN QUESTION MARK +0560..0588 ; ALetter # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +058A ; ALetter # Pd ARMENIAN HYPHEN +05F3 ; ALetter # Po HEBREW PUNCTUATION GERESH +0620..063F ; ALetter # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; ALetter # Lm ARABIC TATWEEL +0641..064A ; ALetter # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066E..066F ; ALetter # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; ALetter # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; ALetter # Lo ARABIC LETTER AE +06E5..06E6 ; ALetter # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EE..06EF ; ALetter # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; ALetter # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; ALetter # Lo ARABIC LETTER HEH WITH INVERTED V +070F ; ALetter # Cf SYRIAC ABBREVIATION MARK +0710 ; ALetter # Lo SYRIAC LETTER ALAPH +0712..072F ; ALetter # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..07A5 ; ALetter # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; ALetter # Lo THAANA LETTER NAA +07CA..07EA ; ALetter # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; ALetter # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; ALetter # Lm NKO LAJANYALAN +0800..0815 ; ALetter # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; ALetter # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; ALetter # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; ALetter # Lm SAMARITAN MODIFIER LETTER I +0840..0858 ; ALetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; ALetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; ALetter # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; ALetter # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; ALetter # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; ALetter # Lm ARABIC SMALL FARSI YEH +0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA +0950 ; ALetter # Lo DEVANAGARI OM +0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0971 ; ALetter # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; ALetter # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; ALetter # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; ALetter # Lo BENGALI LETTER LA +09B6..09B9 ; ALetter # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; ALetter # Lo BENGALI SIGN AVAGRAHA +09CE ; ALetter # Lo BENGALI LETTER KHANDA TA +09DC..09DD ; ALetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; ALetter # Lo BENGALI LETTER VEDIC ANUSVARA +0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; ALetter # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; ALetter # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; ALetter # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; ALetter # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A59..0A5C ; ALetter # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; ALetter # Lo GURMUKHI LETTER FA +0A72..0A74 ; ALetter # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A85..0A8D ; ALetter # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; ALetter # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; ALetter # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; ALetter # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; ALetter # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; ALetter # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; ALetter # Lo GUJARATI SIGN AVAGRAHA +0AD0 ; ALetter # Lo GUJARATI OM +0AE0..0AE1 ; ALetter # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AF9 ; ALetter # Lo GUJARATI LETTER ZHA +0B05..0B0C ; ALetter # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; ALetter # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; ALetter # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; ALetter # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; ALetter # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; ALetter # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; ALetter # Lo ORIYA SIGN AVAGRAHA +0B5C..0B5D ; ALetter # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; ALetter # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B71 ; ALetter # Lo ORIYA LETTER WA +0B83 ; ALetter # Lo TAMIL SIGN VISARGA +0B85..0B8A ; ALetter # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; ALetter # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; ALetter # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; ALetter # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; ALetter # Lo TAMIL LETTER JA +0B9E..0B9F ; ALetter # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; ALetter # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; ALetter # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; ALetter # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BD0 ; ALetter # Lo TAMIL OM +0C05..0C0C ; ALetter # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; ALetter # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; ALetter # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; ALetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA +0C58..0C5A ; ALetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; ALetter # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C80 ; ALetter # Lo KANNADA SIGN SPACING CANDRABINDU +0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; ALetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; ALetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA +0CDD..0CDE ; ALetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CF1..0CF2 ; ALetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D04..0D0C ; ALetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; ALetter # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; ALetter # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; ALetter # Lo MALAYALAM SIGN AVAGRAHA +0D4E ; ALetter # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; ALetter # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D5F..0D61 ; ALetter # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D7A..0D7F ; ALetter # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D85..0D96 ; ALetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; ALetter # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; ALetter # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; ALetter # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; ALetter # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0F00 ; ALetter # Lo TIBETAN SYLLABLE OM +0F40..0F47 ; ALetter # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; ALetter # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F88..0F8C ; ALetter # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ALetter # L& GEORGIAN CAPITAL LETTER YN +10CD ; ALetter # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; ALetter # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; ALetter # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; ALetter # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; ALetter # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; ALetter # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; ALetter # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; ALetter # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; ALetter # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; ALetter # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; ALetter # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; ALetter # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; ALetter # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; ALetter # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; ALetter # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; ALetter # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; ALetter # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +171F..1731 ; ALetter # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1740..1751 ; ALetter # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; ALetter # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; ALetter # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1820..1842 ; ALetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; ALetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; ALetter # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; ALetter # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; ALetter # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; ALetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; ALetter # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; ALetter # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1B05..1B33 ; ALetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B45..1B4C ; ALetter # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B83..1BA0 ; ALetter # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BAE..1BAF ; ALetter # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBA..1BE5 ; ALetter # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1C00..1C23 ; ALetter # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C88 ; ALetter # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; ALetter # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; ALetter # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; ALetter # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; ALetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CFA ; ALetter # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; ALetter # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ALetter # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; ALetter # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; ALetter # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; ALetter # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; ALetter # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; ALetter # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; ALetter # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; ALetter # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; ALetter # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; ALetter # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; ALetter # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; ALetter # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; ALetter # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; ALetter # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; ALetter # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; ALetter # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; ALetter # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; ALetter # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C +2107 ; ALetter # L& EULER CONSTANT +210A..2113 ; ALetter # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; ALetter # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; ALetter # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; ALetter # L& DOUBLE-STRUCK CAPITAL Z +2126 ; ALetter # L& OHM SIGN +2128 ; ALetter # L& BLACK-LETTER CAPITAL Z +212A..212D ; ALetter # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212F..2134 ; ALetter # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; ALetter # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; ALetter # L& INFORMATION SOURCE +213C..213F ; ALetter # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; ALetter # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; ALetter # L& TURNED SMALL F +2160..2182 ; ALetter # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; ALetter # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; ALetter # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C00..2C7B ; ALetter # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ALetter # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; ALetter # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; ALetter # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; ALetter # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; ALetter # L& GEORGIAN SMALL LETTER YN +2D2D ; ALetter # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ALetter # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D80..2D96 ; ALetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; ALetter # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2E2F ; ALetter # Lm VERTICAL TILDE +3005 ; ALetter # Lm IDEOGRAPHIC ITERATION MARK +303B ; ALetter # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; ALetter # Lo MASU MARK +3105..312F ; ALetter # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; ALetter # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E +A015 ; ALetter # Lm YI SYLLABLE WU +A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; ALetter # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; ALetter # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; ALetter # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; ALetter # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; ALetter # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A62A..A62B ; ALetter # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; ALetter # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; ALetter # Lo CYRILLIC LETTER MULTIOCULAR O +A67F ; ALetter # Lm CYRILLIC PAYEROK +A680..A69B ; ALetter # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; ALetter # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; ALetter # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; ALetter # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A708..A716 ; ALetter # Sk [15] MODIFIER LETTER EXTRA-HIGH DOTTED TONE BAR..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; ALetter # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; ALetter # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A722..A76F ; ALetter # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; ALetter # Lm MODIFIER LETTER US +A771..A787 ; ALetter # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; ALetter # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; ALetter # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; ALetter # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; ALetter # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; ALetter # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; ALetter # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; ALetter # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; ALetter # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; ALetter # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; ALetter # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; ALetter # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; ALetter # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A840..A873 ; ALetter # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A882..A8B3 ; ALetter # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8F2..A8F7 ; ALetter # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; ALetter # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; ALetter # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A90A..A925 ; ALetter # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A930..A946 ; ALetter # Lo [23] REJANG LETTER KA..REJANG LETTER A +A960..A97C ; ALetter # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A984..A9B2 ; ALetter # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9CF ; ALetter # Lm JAVANESE PANGRANGKEP +AA00..AA28 ; ALetter # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA40..AA42 ; ALetter # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; ALetter # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AAE0..AAEA ; ALetter # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; ALetter # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ALetter # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AB01..AB06 ; ALetter # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; ALetter # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; ALetter # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; ALetter # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; ALetter # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B ; ALetter # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; ALetter # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; ALetter # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; ALetter # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; ALetter # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; ALetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; ALetter # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; ALetter # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB50..FBB1 ; ALetter # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; ALetter # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; ALetter # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; ALetter # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; ALetter # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FE70..FE74 ; ALetter # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; ALetter # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF21..FF3A ; ALetter # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; ALetter # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FFA0..FFBE ; ALetter # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; ALetter # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; ALetter # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; ALetter # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; ALetter # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; ALetter # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; ALetter # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; ALetter # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; ALetter # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10280..1029C ; ALetter # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; ALetter # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; ALetter # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; ALetter # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; ALetter # Nl GOTHIC LETTER NINETY +10342..10349 ; ALetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; ALetter # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; ALetter # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; ALetter # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; ALetter # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; ALetter # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; ALetter # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; ALetter # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; ALetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104B0..104D3 ; ALetter # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; ALetter # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; ALetter # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; ALetter # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; ALetter # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; ALetter # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; ALetter # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; ALetter # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; ALetter # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; ALetter # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; ALetter # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; ALetter # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; ALetter # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; ALetter # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; ALetter # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; ALetter # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; ALetter # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; ALetter # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; ALetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; ALetter # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; ALetter # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; ALetter # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; ALetter # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; ALetter # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; ALetter # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ALetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ALetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; ALetter # Lo KHAROSHTHI LETTER A +10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; ALetter # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A60..10A7C ; ALetter # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; ALetter # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; ALetter # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; ALetter # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10B00..10B35 ; ALetter # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; ALetter # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; ALetter # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; ALetter # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; ALetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; ALetter # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; ALetter # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; ALetter # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; ALetter # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10FB0..10FC4 ; ALetter # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; ALetter # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11003..11037 ; ALetter # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11071..11072 ; ALetter # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; ALetter # Lo BRAHMI LETTER OLD TAMIL LLA +11083..110AF ; ALetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; ALetter # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; ALetter # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11144 ; ALetter # Lo CHAKMA LETTER LHAA +11147 ; ALetter # Lo CHAKMA LETTER VAA +11150..11172 ; ALetter # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11176 ; ALetter # Lo MAHAJANI LIGATURE SHRI +11183..111B2 ; ALetter # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; ALetter # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111DA ; ALetter # Lo SHARADA EKAM +111DC ; ALetter # Lo SHARADA HEADSTROKE +11200..11211 ; ALetter # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; ALetter # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F..11240 ; ALetter # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11280..11286 ; ALetter # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; ALetter # Lo MULTANI LETTER GHA +1128A..1128D ; ALetter # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; ALetter # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; ALetter # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; ALetter # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +11305..1130C ; ALetter # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; ALetter # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; ALetter # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; ALetter # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; ALetter # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; ALetter # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; ALetter # Lo GRANTHA SIGN AVAGRAHA +11350 ; ALetter # Lo GRANTHA OM +1135D..11361 ; ALetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11400..11434 ; ALetter # Lo [53] NEWA LETTER A..NEWA LETTER HA +11447..1144A ; ALetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1145F..11461 ; ALetter # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; ALetter # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114C4..114C5 ; ALetter # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; ALetter # Lo TIRHUTA OM +11580..115AE ; ALetter # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115D8..115DB ; ALetter # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; ALetter # Lo [48] MODI LETTER A..MODI LETTER LLA +11644 ; ALetter # Lo MODI SIGN HUVA +11680..116AA ; ALetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116B8 ; ALetter # Lo TAKRI LETTER ARCHAIC KHA +11800..1182B ; ALetter # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +118A0..118DF ; ALetter # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118FF..11906 ; ALetter # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; ALetter # Lo DIVES AKURU LETTER O +1190C..11913 ; ALetter # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; ALetter # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; ALetter # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +1193F ; ALetter # Lo DIVES AKURU PREFIXED NASAL SIGN +11941 ; ALetter # Lo DIVES AKURU INITIAL RA +119A0..119A7 ; ALetter # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; ALetter # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119E1 ; ALetter # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; ALetter # Lo NANDINAGARI HEADSTROKE +11A00 ; ALetter # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; ALetter # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A3A ; ALetter # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A50 ; ALetter # Lo SOYOMBO LETTER A +11A5C..11A89 ; ALetter # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A9D ; ALetter # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; ALetter # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; ALetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; ALetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C40 ; ALetter # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; ALetter # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11D00..11D06 ; ALetter # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; ALetter # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; ALetter # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; ALetter # Lo MASARAM GONDI REPHA +11D60..11D65 ; ALetter # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; ALetter # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; ALetter # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D98 ; ALetter # Lo GUNJALA GONDI OM +11EE0..11EF2 ; ALetter # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11F02 ; ALetter # Lo KAWI SIGN REPHA +11F04..11F10 ; ALetter # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; ALetter # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11FB0 ; ALetter # Lo LISU LETTER YHA +12000..12399 ; ALetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; ALetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; ALetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; ALetter # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342F ; ALetter # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; ALetter # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +14400..14646 ; ALetter # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; ALetter # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; ALetter # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A70..16ABE ; ALetter # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AD0..16AED ; ALetter # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16B00..16B2F ; ALetter # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B40..16B43 ; ALetter # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B63..16B77 ; ALetter # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; ALetter # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; ALetter # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; ALetter # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; ALetter # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; ALetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; ALetter # Lm OLD CHINESE ITERATION MARK +1BC00..1BC6A ; ALetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; ALetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; ALetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; ALetter # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; ALetter # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; ALetter # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; ALetter # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; ALetter # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; ALetter # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; ALetter # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; ALetter # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; ALetter # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; ALetter # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; ALetter # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; ALetter # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; ALetter # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; ALetter # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; ALetter # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; ALetter # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; ALetter # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; ALetter # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; ALetter # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; ALetter # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; ALetter # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; ALetter # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; ALetter # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; ALetter # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; ALetter # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; ALetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; ALetter # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; ALetter # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; ALetter # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E100..1E12C ; ALetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; ALetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; ALetter # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; ALetter # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; ALetter # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; ALetter # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; ALetter # Lm NAG MUNDARI SIGN OJOD +1E7E0..1E7E6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; ALetter # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; ALetter # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; ALetter # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; ALetter # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E900..1E943 ; ALetter # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; ALetter # Lm ADLAM NASALIZATION MARK +1EE00..1EE03 ; ALetter # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ALetter # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ALetter # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ALetter # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ALetter # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ALetter # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ALetter # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ALetter # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ALetter # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ALetter # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ALetter # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ALetter # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ALetter # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ALetter # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ALetter # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ALetter # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ALetter # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ALetter # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ALetter # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ALetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ALetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ALetter # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ALetter # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ALetter # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ALetter # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ALetter # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ALetter # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ALetter # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ALetter # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ALetter # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ALetter # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ALetter # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ALetter # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1F130..1F149 ; ALetter # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 29490 + +# ================================================ + +003A ; MidLetter # Po COLON +00B7 ; MidLetter # Po MIDDLE DOT +0387 ; MidLetter # Po GREEK ANO TELEIA +055F ; MidLetter # Po ARMENIAN ABBREVIATION MARK +05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM +2027 ; MidLetter # Po HYPHENATION POINT +FE13 ; MidLetter # Po PRESENTATION FORM FOR VERTICAL COLON +FE55 ; MidLetter # Po SMALL COLON +FF1A ; MidLetter # Po FULLWIDTH COLON + +# Total code points: 9 + +# ================================================ + +002C ; MidNum # Po COMMA +003B ; MidNum # Po SEMICOLON +037E ; MidNum # Po GREEK QUESTION MARK +0589 ; MidNum # Po ARMENIAN FULL STOP +060C..060D ; MidNum # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR +066C ; MidNum # Po ARABIC THOUSANDS SEPARATOR +07F8 ; MidNum # Po NKO COMMA +2044 ; MidNum # Sm FRACTION SLASH +FE10 ; MidNum # Po PRESENTATION FORM FOR VERTICAL COMMA +FE14 ; MidNum # Po PRESENTATION FORM FOR VERTICAL SEMICOLON +FE50 ; MidNum # Po SMALL COMMA +FE54 ; MidNum # Po SMALL SEMICOLON +FF0C ; MidNum # Po FULLWIDTH COMMA +FF1B ; MidNum # Po FULLWIDTH SEMICOLON + +# Total code points: 15 + +# ================================================ + +002E ; MidNumLet # Po FULL STOP +2018 ; MidNumLet # Pi LEFT SINGLE QUOTATION MARK +2019 ; MidNumLet # Pf RIGHT SINGLE QUOTATION MARK +2024 ; MidNumLet # Po ONE DOT LEADER +FE52 ; MidNumLet # Po SMALL FULL STOP +FF07 ; MidNumLet # Po FULLWIDTH APOSTROPHE +FF0E ; MidNumLet # Po FULLWIDTH FULL STOP + +# Total code points: 7 + +# ================================================ + +0030..0039 ; Numeric # Nd [10] DIGIT ZERO..DIGIT NINE +0600..0605 ; Numeric # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +0660..0669 ; Numeric # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066B ; Numeric # Po ARABIC DECIMAL SEPARATOR +06DD ; Numeric # Cf ARABIC END OF AYAH +06F0..06F9 ; Numeric # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +07C0..07C9 ; Numeric # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +0890..0891 ; Numeric # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; Numeric # Cf ARABIC DISPUTED END OF AYAH +0966..096F ; Numeric # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +09E6..09EF ; Numeric # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +0A66..0A6F ; Numeric # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0AE6..0AEF ; Numeric # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0B66..0B6F ; Numeric # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0BE6..0BEF ; Numeric # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0C66..0C6F ; Numeric # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0CE6..0CEF ; Numeric # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0D66..0D6F ; Numeric # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0DE6..0DEF ; Numeric # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0E50..0E59 ; Numeric # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0ED0..0ED9 ; Numeric # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0F20..0F29 ; Numeric # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +1040..1049 ; Numeric # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +1090..1099 ; Numeric # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +1A80..1A89 ; Numeric # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; Numeric # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1B50..1B59 ; Numeric # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1BB0..1BB9 ; Numeric # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1C40..1C49 ; Numeric # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C50..1C59 ; Numeric # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +A620..A629 ; Numeric # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A8D0..A8D9 ; Numeric # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A900..A909 ; Numeric # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A9D0..A9D9 ; Numeric # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9F0..A9F9 ; Numeric # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +10D30..10D39 ; Numeric # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +11066..1106F ; Numeric # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110BD ; Numeric # Cf KAITHI NUMBER SIGN +110CD ; Numeric # Cf KAITHI NUMBER SIGN ABOVE +110F0..110F9 ; Numeric # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +112F0..112F9 ; Numeric # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11450..11459 ; Numeric # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +11950..11959 ; Numeric # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11F50..11F59 ; Numeric # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16AC0..16AC9 ; Numeric # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1E140..1E149 ; Numeric # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E2F0..1E2F9 ; Numeric # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4F0..1E4F9 ; Numeric # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE + +# Total code points: 693 + +# ================================================ + +005F ; ExtendNumLet # Pc LOW LINE +202F ; ExtendNumLet # Zs NARROW NO-BREAK SPACE +203F..2040 ; ExtendNumLet # Pc [2] UNDERTIE..CHARACTER TIE +2054 ; ExtendNumLet # Pc INVERTED UNDERTIE +FE33..FE34 ; ExtendNumLet # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE4D..FE4F ; ExtendNumLet # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FF3F ; ExtendNumLet # Pc FULLWIDTH LOW LINE + +# Total code points: 11 + +# ================================================ + +200D ; ZWJ # Cf ZERO WIDTH JOINER + +# Total code points: 1 + +# ================================================ + +0020 ; WSegSpace # Zs SPACE +1680 ; WSegSpace # Zs OGHAM SPACE MARK +2000..2006 ; WSegSpace # Zs [7] EN QUAD..SIX-PER-EM SPACE +2008..200A ; WSegSpace # Zs [3] PUNCTUATION SPACE..HAIR SPACE +205F ; WSegSpace # Zs MEDIUM MATHEMATICAL SPACE +3000 ; WSegSpace # Zs IDEOGRAPHIC SPACE + +# Total code points: 14 + +# EOF diff --git a/gen/data-files b/gen/data-files index 00b5f1c..ba795cd 100755 --- a/gen/data-files +++ b/gen/data-files @@ -8,6 +8,7 @@ readonly BASE='https://www.unicode.org/Public/15.1.0/ucd' readonly PATHS=' auxiliary/GraphemeBreakProperty + auxiliary/WordBreakProperty BidiBrackets BidiMirroring Blocks diff --git a/gen/prop/wb b/gen/prop/wb new file mode 100755 index 0000000..a6b47f2 --- /dev/null +++ b/gen/prop/wb @@ -0,0 +1,86 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_wb.c + +gawk ' +BEGIN { + FS = " *(; *|#.*)" + + map["ALetter"] = "LE" + map["CR"] = "CR" + map["Double_Quote"] = "DQ" + map["E_Base"] = "EB" + map["E_Base_GAZ"] = "EBG" + map["E_Modifier"] = "EM" + map["Extend"] = "EXTEND" + map["ExtendNumLet"] = "EX" + map["Format"] = "FO" + map["Glue_After_Zwj"] = "GAZ" + map["Hebrew_Letter"] = "HL" + map["Katakana"] = "KA" + map["LF"] = "LF" + map["MidLetter"] = "ML" + map["MidNumLet"] = "MB" + map["MidNum"] = "MN" + map["Newline"] = "NL" + map["Numeric"] = "NU" + map["Other"] = "XX" + map["Regional_Indicator"] = "RI" + map["Single_Quote"] = "SQ" + map["WSegSpace"] = "WSEGSPACE" + map["ZWJ"] = "ZWJ" + + print "/* This file is autogenerated by gen/prop/wb; DO NOT EDIT. */" + print "" + print "#include \"_bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[A-F0-9]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = "WB_" map[$2] +} + +END { + print "static constexpr enum uprop_wb lookup_lat1[] = {" + for (i = 0; i < 0x100; i++) { + if (i % 4 == 0) + printf "\t" + printf "%-13s%s", (props[i] ? props[i] : "WB_XX") ",", \ + i % 4 == 3 ? "\n" : " " + } + print "};" + print "" + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_wb val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + for (lo = i; props[lo] == props[i + 1]; i++) + ; + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "_MLIB_DEFINE_BSEARCH(enum uprop_wb, lookup, WB_XX)" + print "" + print "enum uprop_wb" + print "uprop_get_wb(rune ch)" + print "{" + print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/WordBreakProperty | sed 's/\s*$//' diff --git a/include/unicode/prop.h b/include/unicode/prop.h index 05e16a2..358e0fd 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -1045,6 +1045,32 @@ enum uprop_vo { VO_U, /* Upright */ }; +enum uprop_wb { + WB_XX = 0, /* Other */ + WB_CR, /* CR */ + WB_DQ, /* Double Quote */ + WB_EB, /* E Base */ + WB_EBG, /* E Base GAZ */ + WB_EM, /* E Modifier */ + WB_EX, /* ExtendNumLet */ + WB_EXTEND, /* Extend */ + WB_FO, /* Format */ + WB_GAZ, /* Glue After Zwj */ + WB_HL, /* Hebrew Letter */ + WB_KA, /* Katakana */ + WB_LE, /* ALetter */ + WB_LF, /* LF */ + WB_MB, /* MidNumLet */ + WB_ML, /* MidLetter */ + WB_MN, /* MidNum */ + WB_NL, /* Newline */ + WB_NU, /* Numeric */ + WB_RI, /* Regional Indicator */ + WB_SQ, /* Single Quote */ + WB_WSEGSPACE, /* WSegSpace */ + WB_ZWJ, /* ZWJ */ +}; + /* Not a Unicode property; but a nice-to-have */ [[_mlib_pure]] struct u8view uprop_blkname(enum uprop_blk); @@ -1072,6 +1098,7 @@ enum uprop_vo { [[_mlib_pure]] enum uprop_nt uprop_get_nt(rune); [[_mlib_pure]] enum uprop_sc uprop_get_sc(rune); [[_mlib_pure]] enum uprop_vo uprop_get_vo(rune); +[[_mlib_pure]] enum uprop_wb uprop_get_wb(rune); [[_mlib_pure]] rune uprop_get_bmg(rune); [[_mlib_pure]] rune uprop_get_bpb(rune); [[_mlib_pure]] rune uprop_get_equideo(rune); diff --git a/lib/unicode/prop/uprop_get_wb.c b/lib/unicode/prop/uprop_get_wb.c new file mode 100644 index 0000000..545552d --- /dev/null +++ b/lib/unicode/prop/uprop_get_wb.c @@ -0,0 +1,1117 @@ +/* This file is autogenerated by gen/prop/wb; DO NOT EDIT. */ + +#include "_bsearch.h" +#include "macros.h" +#include "rune.h" +#include "unicode/prop.h" + +static constexpr enum uprop_wb lookup_lat1[] = { + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_LF, WB_NL, + WB_NL, WB_CR, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_WSEGSPACE, WB_XX, WB_DQ, WB_XX, + WB_XX, WB_XX, WB_XX, WB_SQ, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_MN, WB_XX, WB_MB, WB_XX, + WB_NU, WB_NU, WB_NU, WB_NU, + WB_NU, WB_NU, WB_NU, WB_NU, + WB_NU, WB_NU, WB_ML, WB_MN, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_XX, + WB_XX, WB_XX, WB_XX, WB_EX, + WB_XX, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_NL, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_XX, WB_LE, WB_XX, + WB_XX, WB_FO, WB_XX, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_XX, WB_LE, WB_XX, WB_ML, + WB_XX, WB_XX, WB_LE, WB_XX, + WB_XX, WB_XX, WB_XX, WB_XX, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_XX, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_XX, + WB_LE, WB_LE, WB_LE, WB_LE, + WB_LE, WB_LE, WB_LE, WB_LE, +}; + +static const struct { + rune lo, hi; + enum uprop_wb val; +} lookup[] = { + {RUNE_C(0x000100), RUNE_C(0x0002D7), WB_LE}, + {RUNE_C(0x0002DE), RUNE_C(0x0002FF), WB_LE}, + {RUNE_C(0x000300), RUNE_C(0x00036F), WB_EXTEND}, + {RUNE_C(0x000370), RUNE_C(0x000374), WB_LE}, + {RUNE_C(0x000376), RUNE_C(0x000377), WB_LE}, + {RUNE_C(0x00037A), RUNE_C(0x00037D), WB_LE}, + {RUNE_C(0x00037E), RUNE_C(0x00037E), WB_MN}, + {RUNE_C(0x00037F), RUNE_C(0x00037F), WB_LE}, + {RUNE_C(0x000386), RUNE_C(0x000386), WB_LE}, + {RUNE_C(0x000387), RUNE_C(0x000387), WB_ML}, + {RUNE_C(0x000388), RUNE_C(0x00038A), WB_LE}, + {RUNE_C(0x00038C), RUNE_C(0x00038C), WB_LE}, + {RUNE_C(0x00038E), RUNE_C(0x0003A1), WB_LE}, + {RUNE_C(0x0003A3), RUNE_C(0x0003F5), WB_LE}, + {RUNE_C(0x0003F7), RUNE_C(0x000481), WB_LE}, + {RUNE_C(0x000483), RUNE_C(0x000489), WB_EXTEND}, + {RUNE_C(0x00048A), RUNE_C(0x00052F), WB_LE}, + {RUNE_C(0x000531), RUNE_C(0x000556), WB_LE}, + {RUNE_C(0x000559), RUNE_C(0x00055C), WB_LE}, + {RUNE_C(0x00055E), RUNE_C(0x00055E), WB_LE}, + {RUNE_C(0x00055F), RUNE_C(0x00055F), WB_ML}, + {RUNE_C(0x000560), RUNE_C(0x000588), WB_LE}, + {RUNE_C(0x000589), RUNE_C(0x000589), WB_MN}, + {RUNE_C(0x00058A), RUNE_C(0x00058A), WB_LE}, + {RUNE_C(0x000591), RUNE_C(0x0005BD), WB_EXTEND}, + {RUNE_C(0x0005BF), RUNE_C(0x0005BF), WB_EXTEND}, + {RUNE_C(0x0005C1), RUNE_C(0x0005C2), WB_EXTEND}, + {RUNE_C(0x0005C4), RUNE_C(0x0005C5), WB_EXTEND}, + {RUNE_C(0x0005C7), RUNE_C(0x0005C7), WB_EXTEND}, + {RUNE_C(0x0005D0), RUNE_C(0x0005EA), WB_HL}, + {RUNE_C(0x0005EF), RUNE_C(0x0005F2), WB_HL}, + {RUNE_C(0x0005F3), RUNE_C(0x0005F3), WB_LE}, + {RUNE_C(0x0005F4), RUNE_C(0x0005F4), WB_ML}, + {RUNE_C(0x000600), RUNE_C(0x000605), WB_NU}, + {RUNE_C(0x00060C), RUNE_C(0x00060D), WB_MN}, + {RUNE_C(0x000610), RUNE_C(0x00061A), WB_EXTEND}, + {RUNE_C(0x00061C), RUNE_C(0x00061C), WB_FO}, + {RUNE_C(0x000620), RUNE_C(0x00064A), WB_LE}, + {RUNE_C(0x00064B), RUNE_C(0x00065F), WB_EXTEND}, + {RUNE_C(0x000660), RUNE_C(0x000669), WB_NU}, + {RUNE_C(0x00066B), RUNE_C(0x00066B), WB_NU}, + {RUNE_C(0x00066C), RUNE_C(0x00066C), WB_MN}, + {RUNE_C(0x00066E), RUNE_C(0x00066F), WB_LE}, + {RUNE_C(0x000670), RUNE_C(0x000670), WB_EXTEND}, + {RUNE_C(0x000671), RUNE_C(0x0006D3), WB_LE}, + {RUNE_C(0x0006D5), RUNE_C(0x0006D5), WB_LE}, + {RUNE_C(0x0006D6), RUNE_C(0x0006DC), WB_EXTEND}, + {RUNE_C(0x0006DD), RUNE_C(0x0006DD), WB_NU}, + {RUNE_C(0x0006DF), RUNE_C(0x0006E4), WB_EXTEND}, + {RUNE_C(0x0006E5), RUNE_C(0x0006E6), WB_LE}, + {RUNE_C(0x0006E7), RUNE_C(0x0006E8), WB_EXTEND}, + {RUNE_C(0x0006EA), RUNE_C(0x0006ED), WB_EXTEND}, + {RUNE_C(0x0006EE), RUNE_C(0x0006EF), WB_LE}, + {RUNE_C(0x0006F0), RUNE_C(0x0006F9), WB_NU}, + {RUNE_C(0x0006FA), RUNE_C(0x0006FC), WB_LE}, + {RUNE_C(0x0006FF), RUNE_C(0x0006FF), WB_LE}, + {RUNE_C(0x00070F), RUNE_C(0x000710), WB_LE}, + {RUNE_C(0x000711), RUNE_C(0x000711), WB_EXTEND}, + {RUNE_C(0x000712), RUNE_C(0x00072F), WB_LE}, + {RUNE_C(0x000730), RUNE_C(0x00074A), WB_EXTEND}, + {RUNE_C(0x00074D), RUNE_C(0x0007A5), WB_LE}, + {RUNE_C(0x0007A6), RUNE_C(0x0007B0), WB_EXTEND}, + {RUNE_C(0x0007B1), RUNE_C(0x0007B1), WB_LE}, + {RUNE_C(0x0007C0), RUNE_C(0x0007C9), WB_NU}, + {RUNE_C(0x0007CA), RUNE_C(0x0007EA), WB_LE}, + {RUNE_C(0x0007EB), RUNE_C(0x0007F3), WB_EXTEND}, + {RUNE_C(0x0007F4), RUNE_C(0x0007F5), WB_LE}, + {RUNE_C(0x0007F8), RUNE_C(0x0007F8), WB_MN}, + {RUNE_C(0x0007FA), RUNE_C(0x0007FA), WB_LE}, + {RUNE_C(0x0007FD), RUNE_C(0x0007FD), WB_EXTEND}, + {RUNE_C(0x000800), RUNE_C(0x000815), WB_LE}, + {RUNE_C(0x000816), RUNE_C(0x000819), WB_EXTEND}, + {RUNE_C(0x00081A), RUNE_C(0x00081A), WB_LE}, + {RUNE_C(0x00081B), RUNE_C(0x000823), WB_EXTEND}, + {RUNE_C(0x000824), RUNE_C(0x000824), WB_LE}, + {RUNE_C(0x000825), RUNE_C(0x000827), WB_EXTEND}, + {RUNE_C(0x000828), RUNE_C(0x000828), WB_LE}, + {RUNE_C(0x000829), RUNE_C(0x00082D), WB_EXTEND}, + {RUNE_C(0x000840), RUNE_C(0x000858), WB_LE}, + {RUNE_C(0x000859), RUNE_C(0x00085B), WB_EXTEND}, + {RUNE_C(0x000860), RUNE_C(0x00086A), WB_LE}, + {RUNE_C(0x000870), RUNE_C(0x000887), WB_LE}, + {RUNE_C(0x000889), RUNE_C(0x00088E), WB_LE}, + {RUNE_C(0x000890), RUNE_C(0x000891), WB_NU}, + {RUNE_C(0x000898), RUNE_C(0x00089F), WB_EXTEND}, + {RUNE_C(0x0008A0), RUNE_C(0x0008C9), WB_LE}, + {RUNE_C(0x0008CA), RUNE_C(0x0008E1), WB_EXTEND}, + {RUNE_C(0x0008E2), RUNE_C(0x0008E2), WB_NU}, + {RUNE_C(0x0008E3), RUNE_C(0x000903), WB_EXTEND}, + {RUNE_C(0x000904), RUNE_C(0x000939), WB_LE}, + {RUNE_C(0x00093A), RUNE_C(0x00093C), WB_EXTEND}, + {RUNE_C(0x00093D), RUNE_C(0x00093D), WB_LE}, + {RUNE_C(0x00093E), RUNE_C(0x00094F), WB_EXTEND}, + {RUNE_C(0x000950), RUNE_C(0x000950), WB_LE}, + {RUNE_C(0x000951), RUNE_C(0x000957), WB_EXTEND}, + {RUNE_C(0x000958), RUNE_C(0x000961), WB_LE}, + {RUNE_C(0x000962), RUNE_C(0x000963), WB_EXTEND}, + {RUNE_C(0x000966), RUNE_C(0x00096F), WB_NU}, + {RUNE_C(0x000971), RUNE_C(0x000980), WB_LE}, + {RUNE_C(0x000981), RUNE_C(0x000983), WB_EXTEND}, + {RUNE_C(0x000985), RUNE_C(0x00098C), WB_LE}, + {RUNE_C(0x00098F), RUNE_C(0x000990), WB_LE}, + {RUNE_C(0x000993), RUNE_C(0x0009A8), WB_LE}, + {RUNE_C(0x0009AA), RUNE_C(0x0009B0), WB_LE}, + {RUNE_C(0x0009B2), RUNE_C(0x0009B2), WB_LE}, + {RUNE_C(0x0009B6), RUNE_C(0x0009B9), WB_LE}, + {RUNE_C(0x0009BC), RUNE_C(0x0009BC), WB_EXTEND}, + {RUNE_C(0x0009BD), RUNE_C(0x0009BD), WB_LE}, + {RUNE_C(0x0009BE), RUNE_C(0x0009C4), WB_EXTEND}, + {RUNE_C(0x0009C7), RUNE_C(0x0009C8), WB_EXTEND}, + {RUNE_C(0x0009CB), RUNE_C(0x0009CD), WB_EXTEND}, + {RUNE_C(0x0009CE), RUNE_C(0x0009CE), WB_LE}, + {RUNE_C(0x0009D7), RUNE_C(0x0009D7), WB_EXTEND}, + {RUNE_C(0x0009DC), RUNE_C(0x0009DD), WB_LE}, + {RUNE_C(0x0009DF), RUNE_C(0x0009E1), WB_LE}, + {RUNE_C(0x0009E2), RUNE_C(0x0009E3), WB_EXTEND}, + {RUNE_C(0x0009E6), RUNE_C(0x0009EF), WB_NU}, + {RUNE_C(0x0009F0), RUNE_C(0x0009F1), WB_LE}, + {RUNE_C(0x0009FC), RUNE_C(0x0009FC), WB_LE}, + {RUNE_C(0x0009FE), RUNE_C(0x0009FE), WB_EXTEND}, + {RUNE_C(0x000A01), RUNE_C(0x000A03), WB_EXTEND}, + {RUNE_C(0x000A05), RUNE_C(0x000A0A), WB_LE}, + {RUNE_C(0x000A0F), RUNE_C(0x000A10), WB_LE}, + {RUNE_C(0x000A13), RUNE_C(0x000A28), WB_LE}, + {RUNE_C(0x000A2A), RUNE_C(0x000A30), WB_LE}, + {RUNE_C(0x000A32), RUNE_C(0x000A33), WB_LE}, + {RUNE_C(0x000A35), RUNE_C(0x000A36), WB_LE}, + {RUNE_C(0x000A38), RUNE_C(0x000A39), WB_LE}, + {RUNE_C(0x000A3C), RUNE_C(0x000A3C), WB_EXTEND}, + {RUNE_C(0x000A3E), RUNE_C(0x000A42), WB_EXTEND}, + {RUNE_C(0x000A47), RUNE_C(0x000A48), WB_EXTEND}, + {RUNE_C(0x000A4B), RUNE_C(0x000A4D), WB_EXTEND}, + {RUNE_C(0x000A51), RUNE_C(0x000A51), WB_EXTEND}, + {RUNE_C(0x000A59), RUNE_C(0x000A5C), WB_LE}, + {RUNE_C(0x000A5E), RUNE_C(0x000A5E), WB_LE}, + {RUNE_C(0x000A66), RUNE_C(0x000A6F), WB_NU}, + {RUNE_C(0x000A70), RUNE_C(0x000A71), WB_EXTEND}, + {RUNE_C(0x000A72), RUNE_C(0x000A74), WB_LE}, + {RUNE_C(0x000A75), RUNE_C(0x000A75), WB_EXTEND}, + {RUNE_C(0x000A81), RUNE_C(0x000A83), WB_EXTEND}, + {RUNE_C(0x000A85), RUNE_C(0x000A8D), WB_LE}, + {RUNE_C(0x000A8F), RUNE_C(0x000A91), WB_LE}, + {RUNE_C(0x000A93), RUNE_C(0x000AA8), WB_LE}, + {RUNE_C(0x000AAA), RUNE_C(0x000AB0), WB_LE}, + {RUNE_C(0x000AB2), RUNE_C(0x000AB3), WB_LE}, + {RUNE_C(0x000AB5), RUNE_C(0x000AB9), WB_LE}, + {RUNE_C(0x000ABC), RUNE_C(0x000ABC), WB_EXTEND}, + {RUNE_C(0x000ABD), RUNE_C(0x000ABD), WB_LE}, + {RUNE_C(0x000ABE), RUNE_C(0x000AC5), WB_EXTEND}, + {RUNE_C(0x000AC7), RUNE_C(0x000AC9), WB_EXTEND}, + {RUNE_C(0x000ACB), RUNE_C(0x000ACD), WB_EXTEND}, + {RUNE_C(0x000AD0), RUNE_C(0x000AD0), WB_LE}, + {RUNE_C(0x000AE0), RUNE_C(0x000AE1), WB_LE}, + {RUNE_C(0x000AE2), RUNE_C(0x000AE3), WB_EXTEND}, + {RUNE_C(0x000AE6), RUNE_C(0x000AEF), WB_NU}, + {RUNE_C(0x000AF9), RUNE_C(0x000AF9), WB_LE}, + {RUNE_C(0x000AFA), RUNE_C(0x000AFF), WB_EXTEND}, + {RUNE_C(0x000B01), RUNE_C(0x000B03), WB_EXTEND}, + {RUNE_C(0x000B05), RUNE_C(0x000B0C), WB_LE}, + {RUNE_C(0x000B0F), RUNE_C(0x000B10), WB_LE}, + {RUNE_C(0x000B13), RUNE_C(0x000B28), WB_LE}, + {RUNE_C(0x000B2A), RUNE_C(0x000B30), WB_LE}, + {RUNE_C(0x000B32), RUNE_C(0x000B33), WB_LE}, + {RUNE_C(0x000B35), RUNE_C(0x000B39), WB_LE}, + {RUNE_C(0x000B3C), RUNE_C(0x000B3C), WB_EXTEND}, + {RUNE_C(0x000B3D), RUNE_C(0x000B3D), WB_LE}, + {RUNE_C(0x000B3E), RUNE_C(0x000B44), WB_EXTEND}, + {RUNE_C(0x000B47), RUNE_C(0x000B48), WB_EXTEND}, + {RUNE_C(0x000B4B), RUNE_C(0x000B4D), WB_EXTEND}, + {RUNE_C(0x000B55), RUNE_C(0x000B57), WB_EXTEND}, + {RUNE_C(0x000B5C), RUNE_C(0x000B5D), WB_LE}, + {RUNE_C(0x000B5F), RUNE_C(0x000B61), WB_LE}, + {RUNE_C(0x000B62), RUNE_C(0x000B63), WB_EXTEND}, + {RUNE_C(0x000B66), RUNE_C(0x000B6F), WB_NU}, + {RUNE_C(0x000B71), RUNE_C(0x000B71), WB_LE}, + {RUNE_C(0x000B82), RUNE_C(0x000B82), WB_EXTEND}, + {RUNE_C(0x000B83), RUNE_C(0x000B83), WB_LE}, + {RUNE_C(0x000B85), RUNE_C(0x000B8A), WB_LE}, + {RUNE_C(0x000B8E), RUNE_C(0x000B90), WB_LE}, + {RUNE_C(0x000B92), RUNE_C(0x000B95), WB_LE}, + {RUNE_C(0x000B99), RUNE_C(0x000B9A), WB_LE}, + {RUNE_C(0x000B9C), RUNE_C(0x000B9C), WB_LE}, + {RUNE_C(0x000B9E), RUNE_C(0x000B9F), WB_LE}, + {RUNE_C(0x000BA3), RUNE_C(0x000BA4), WB_LE}, + {RUNE_C(0x000BA8), RUNE_C(0x000BAA), WB_LE}, + {RUNE_C(0x000BAE), RUNE_C(0x000BB9), WB_LE}, + {RUNE_C(0x000BBE), RUNE_C(0x000BC2), WB_EXTEND}, + {RUNE_C(0x000BC6), RUNE_C(0x000BC8), WB_EXTEND}, + {RUNE_C(0x000BCA), RUNE_C(0x000BCD), WB_EXTEND}, + {RUNE_C(0x000BD0), RUNE_C(0x000BD0), WB_LE}, + {RUNE_C(0x000BD7), RUNE_C(0x000BD7), WB_EXTEND}, + {RUNE_C(0x000BE6), RUNE_C(0x000BEF), WB_NU}, + {RUNE_C(0x000C00), RUNE_C(0x000C04), WB_EXTEND}, + {RUNE_C(0x000C05), RUNE_C(0x000C0C), WB_LE}, + {RUNE_C(0x000C0E), RUNE_C(0x000C10), WB_LE}, + {RUNE_C(0x000C12), RUNE_C(0x000C28), WB_LE}, + {RUNE_C(0x000C2A), RUNE_C(0x000C39), WB_LE}, + {RUNE_C(0x000C3C), RUNE_C(0x000C3C), WB_EXTEND}, + {RUNE_C(0x000C3D), RUNE_C(0x000C3D), WB_LE}, + {RUNE_C(0x000C3E), RUNE_C(0x000C44), WB_EXTEND}, + {RUNE_C(0x000C46), RUNE_C(0x000C48), WB_EXTEND}, + {RUNE_C(0x000C4A), RUNE_C(0x000C4D), WB_EXTEND}, + {RUNE_C(0x000C55), RUNE_C(0x000C56), WB_EXTEND}, + {RUNE_C(0x000C58), RUNE_C(0x000C5A), WB_LE}, + {RUNE_C(0x000C5D), RUNE_C(0x000C5D), WB_LE}, + {RUNE_C(0x000C60), RUNE_C(0x000C61), WB_LE}, + {RUNE_C(0x000C62), RUNE_C(0x000C63), WB_EXTEND}, + {RUNE_C(0x000C66), RUNE_C(0x000C6F), WB_NU}, + {RUNE_C(0x000C80), RUNE_C(0x000C80), WB_LE}, + {RUNE_C(0x000C81), RUNE_C(0x000C83), WB_EXTEND}, + {RUNE_C(0x000C85), RUNE_C(0x000C8C), WB_LE}, + {RUNE_C(0x000C8E), RUNE_C(0x000C90), WB_LE}, + {RUNE_C(0x000C92), RUNE_C(0x000CA8), WB_LE}, + {RUNE_C(0x000CAA), RUNE_C(0x000CB3), WB_LE}, + {RUNE_C(0x000CB5), RUNE_C(0x000CB9), WB_LE}, + {RUNE_C(0x000CBC), RUNE_C(0x000CBC), WB_EXTEND}, + {RUNE_C(0x000CBD), RUNE_C(0x000CBD), WB_LE}, + {RUNE_C(0x000CBE), RUNE_C(0x000CC4), WB_EXTEND}, + {RUNE_C(0x000CC6), RUNE_C(0x000CC8), WB_EXTEND}, + {RUNE_C(0x000CCA), RUNE_C(0x000CCD), WB_EXTEND}, + {RUNE_C(0x000CD5), RUNE_C(0x000CD6), WB_EXTEND}, + {RUNE_C(0x000CDD), RUNE_C(0x000CDE), WB_LE}, + {RUNE_C(0x000CE0), RUNE_C(0x000CE1), WB_LE}, + {RUNE_C(0x000CE2), RUNE_C(0x000CE3), WB_EXTEND}, + {RUNE_C(0x000CE6), RUNE_C(0x000CEF), WB_NU}, + {RUNE_C(0x000CF1), RUNE_C(0x000CF2), WB_LE}, + {RUNE_C(0x000CF3), RUNE_C(0x000CF3), WB_EXTEND}, + {RUNE_C(0x000D00), RUNE_C(0x000D03), WB_EXTEND}, + {RUNE_C(0x000D04), RUNE_C(0x000D0C), WB_LE}, + {RUNE_C(0x000D0E), RUNE_C(0x000D10), WB_LE}, + {RUNE_C(0x000D12), RUNE_C(0x000D3A), WB_LE}, + {RUNE_C(0x000D3B), RUNE_C(0x000D3C), WB_EXTEND}, + {RUNE_C(0x000D3D), RUNE_C(0x000D3D), WB_LE}, + {RUNE_C(0x000D3E), RUNE_C(0x000D44), WB_EXTEND}, + {RUNE_C(0x000D46), RUNE_C(0x000D48), WB_EXTEND}, + {RUNE_C(0x000D4A), RUNE_C(0x000D4D), WB_EXTEND}, + {RUNE_C(0x000D4E), RUNE_C(0x000D4E), WB_LE}, + {RUNE_C(0x000D54), RUNE_C(0x000D56), WB_LE}, + {RUNE_C(0x000D57), RUNE_C(0x000D57), WB_EXTEND}, + {RUNE_C(0x000D5F), RUNE_C(0x000D61), WB_LE}, + {RUNE_C(0x000D62), RUNE_C(0x000D63), WB_EXTEND}, + {RUNE_C(0x000D66), RUNE_C(0x000D6F), WB_NU}, + {RUNE_C(0x000D7A), RUNE_C(0x000D7F), WB_LE}, + {RUNE_C(0x000D81), RUNE_C(0x000D83), WB_EXTEND}, + {RUNE_C(0x000D85), RUNE_C(0x000D96), WB_LE}, + {RUNE_C(0x000D9A), RUNE_C(0x000DB1), WB_LE}, + {RUNE_C(0x000DB3), RUNE_C(0x000DBB), WB_LE}, + {RUNE_C(0x000DBD), RUNE_C(0x000DBD), WB_LE}, + {RUNE_C(0x000DC0), RUNE_C(0x000DC6), WB_LE}, + {RUNE_C(0x000DCA), RUNE_C(0x000DCA), WB_EXTEND}, + {RUNE_C(0x000DCF), RUNE_C(0x000DD4), WB_EXTEND}, + {RUNE_C(0x000DD6), RUNE_C(0x000DD6), WB_EXTEND}, + {RUNE_C(0x000DD8), RUNE_C(0x000DDF), WB_EXTEND}, + {RUNE_C(0x000DE6), RUNE_C(0x000DEF), WB_NU}, + {RUNE_C(0x000DF2), RUNE_C(0x000DF3), WB_EXTEND}, + {RUNE_C(0x000E31), RUNE_C(0x000E31), WB_EXTEND}, + {RUNE_C(0x000E34), RUNE_C(0x000E3A), WB_EXTEND}, + {RUNE_C(0x000E47), RUNE_C(0x000E4E), WB_EXTEND}, + {RUNE_C(0x000E50), RUNE_C(0x000E59), WB_NU}, + {RUNE_C(0x000EB1), RUNE_C(0x000EB1), WB_EXTEND}, + {RUNE_C(0x000EB4), RUNE_C(0x000EBC), WB_EXTEND}, + {RUNE_C(0x000EC8), RUNE_C(0x000ECE), WB_EXTEND}, + {RUNE_C(0x000ED0), RUNE_C(0x000ED9), WB_NU}, + {RUNE_C(0x000F00), RUNE_C(0x000F00), WB_LE}, + {RUNE_C(0x000F18), RUNE_C(0x000F19), WB_EXTEND}, + {RUNE_C(0x000F20), RUNE_C(0x000F29), WB_NU}, + {RUNE_C(0x000F35), RUNE_C(0x000F35), WB_EXTEND}, + {RUNE_C(0x000F37), RUNE_C(0x000F37), WB_EXTEND}, + {RUNE_C(0x000F39), RUNE_C(0x000F39), WB_EXTEND}, + {RUNE_C(0x000F3E), RUNE_C(0x000F3F), WB_EXTEND}, + {RUNE_C(0x000F40), RUNE_C(0x000F47), WB_LE}, + {RUNE_C(0x000F49), RUNE_C(0x000F6C), WB_LE}, + {RUNE_C(0x000F71), RUNE_C(0x000F84), WB_EXTEND}, + {RUNE_C(0x000F86), RUNE_C(0x000F87), WB_EXTEND}, + {RUNE_C(0x000F88), RUNE_C(0x000F8C), WB_LE}, + {RUNE_C(0x000F8D), RUNE_C(0x000F97), WB_EXTEND}, + {RUNE_C(0x000F99), RUNE_C(0x000FBC), WB_EXTEND}, + {RUNE_C(0x000FC6), RUNE_C(0x000FC6), WB_EXTEND}, + {RUNE_C(0x00102B), RUNE_C(0x00103E), WB_EXTEND}, + {RUNE_C(0x001040), RUNE_C(0x001049), WB_NU}, + {RUNE_C(0x001056), RUNE_C(0x001059), WB_EXTEND}, + {RUNE_C(0x00105E), RUNE_C(0x001060), WB_EXTEND}, + {RUNE_C(0x001062), RUNE_C(0x001064), WB_EXTEND}, + {RUNE_C(0x001067), RUNE_C(0x00106D), WB_EXTEND}, + {RUNE_C(0x001071), RUNE_C(0x001074), WB_EXTEND}, + {RUNE_C(0x001082), RUNE_C(0x00108D), WB_EXTEND}, + {RUNE_C(0x00108F), RUNE_C(0x00108F), WB_EXTEND}, + {RUNE_C(0x001090), RUNE_C(0x001099), WB_NU}, + {RUNE_C(0x00109A), RUNE_C(0x00109D), WB_EXTEND}, + {RUNE_C(0x0010A0), RUNE_C(0x0010C5), WB_LE}, + {RUNE_C(0x0010C7), RUNE_C(0x0010C7), WB_LE}, + {RUNE_C(0x0010CD), RUNE_C(0x0010CD), WB_LE}, + {RUNE_C(0x0010D0), RUNE_C(0x0010FA), WB_LE}, + {RUNE_C(0x0010FC), RUNE_C(0x001248), WB_LE}, + {RUNE_C(0x00124A), RUNE_C(0x00124D), WB_LE}, + {RUNE_C(0x001250), RUNE_C(0x001256), WB_LE}, + {RUNE_C(0x001258), RUNE_C(0x001258), WB_LE}, + {RUNE_C(0x00125A), RUNE_C(0x00125D), WB_LE}, + {RUNE_C(0x001260), RUNE_C(0x001288), WB_LE}, + {RUNE_C(0x00128A), RUNE_C(0x00128D), WB_LE}, + {RUNE_C(0x001290), RUNE_C(0x0012B0), WB_LE}, + {RUNE_C(0x0012B2), RUNE_C(0x0012B5), WB_LE}, + {RUNE_C(0x0012B8), RUNE_C(0x0012BE), WB_LE}, + {RUNE_C(0x0012C0), RUNE_C(0x0012C0), WB_LE}, + {RUNE_C(0x0012C2), RUNE_C(0x0012C5), WB_LE}, + {RUNE_C(0x0012C8), RUNE_C(0x0012D6), WB_LE}, + {RUNE_C(0x0012D8), RUNE_C(0x001310), WB_LE}, + {RUNE_C(0x001312), RUNE_C(0x001315), WB_LE}, + {RUNE_C(0x001318), RUNE_C(0x00135A), WB_LE}, + {RUNE_C(0x00135D), RUNE_C(0x00135F), WB_EXTEND}, + {RUNE_C(0x001380), RUNE_C(0x00138F), WB_LE}, + {RUNE_C(0x0013A0), RUNE_C(0x0013F5), WB_LE}, + {RUNE_C(0x0013F8), RUNE_C(0x0013FD), WB_LE}, + {RUNE_C(0x001401), RUNE_C(0x00166C), WB_LE}, + {RUNE_C(0x00166F), RUNE_C(0x00167F), WB_LE}, + {RUNE_C(0x001680), RUNE_C(0x001680), WB_WSEGSPACE}, + {RUNE_C(0x001681), RUNE_C(0x00169A), WB_LE}, + {RUNE_C(0x0016A0), RUNE_C(0x0016EA), WB_LE}, + {RUNE_C(0x0016EE), RUNE_C(0x0016F8), WB_LE}, + {RUNE_C(0x001700), RUNE_C(0x001711), WB_LE}, + {RUNE_C(0x001712), RUNE_C(0x001715), WB_EXTEND}, + {RUNE_C(0x00171F), RUNE_C(0x001731), WB_LE}, + {RUNE_C(0x001732), RUNE_C(0x001734), WB_EXTEND}, + {RUNE_C(0x001740), RUNE_C(0x001751), WB_LE}, + {RUNE_C(0x001752), RUNE_C(0x001753), WB_EXTEND}, + {RUNE_C(0x001760), RUNE_C(0x00176C), WB_LE}, + {RUNE_C(0x00176E), RUNE_C(0x001770), WB_LE}, + {RUNE_C(0x001772), RUNE_C(0x001773), WB_EXTEND}, + {RUNE_C(0x0017B4), RUNE_C(0x0017D3), WB_EXTEND}, + {RUNE_C(0x0017DD), RUNE_C(0x0017DD), WB_EXTEND}, + {RUNE_C(0x0017E0), RUNE_C(0x0017E9), WB_NU}, + {RUNE_C(0x00180B), RUNE_C(0x00180D), WB_EXTEND}, + {RUNE_C(0x00180E), RUNE_C(0x00180E), WB_FO}, + {RUNE_C(0x00180F), RUNE_C(0x00180F), WB_EXTEND}, + {RUNE_C(0x001810), RUNE_C(0x001819), WB_NU}, + {RUNE_C(0x001820), RUNE_C(0x001878), WB_LE}, + {RUNE_C(0x001880), RUNE_C(0x001884), WB_LE}, + {RUNE_C(0x001885), RUNE_C(0x001886), WB_EXTEND}, + {RUNE_C(0x001887), RUNE_C(0x0018A8), WB_LE}, + {RUNE_C(0x0018A9), RUNE_C(0x0018A9), WB_EXTEND}, + {RUNE_C(0x0018AA), RUNE_C(0x0018AA), WB_LE}, + {RUNE_C(0x0018B0), RUNE_C(0x0018F5), WB_LE}, + {RUNE_C(0x001900), RUNE_C(0x00191E), WB_LE}, + {RUNE_C(0x001920), RUNE_C(0x00192B), WB_EXTEND}, + {RUNE_C(0x001930), RUNE_C(0x00193B), WB_EXTEND}, + {RUNE_C(0x001946), RUNE_C(0x00194F), WB_NU}, + {RUNE_C(0x0019D0), RUNE_C(0x0019D9), WB_NU}, + {RUNE_C(0x001A00), RUNE_C(0x001A16), WB_LE}, + {RUNE_C(0x001A17), RUNE_C(0x001A1B), WB_EXTEND}, + {RUNE_C(0x001A55), RUNE_C(0x001A5E), WB_EXTEND}, + {RUNE_C(0x001A60), RUNE_C(0x001A7C), WB_EXTEND}, + {RUNE_C(0x001A7F), RUNE_C(0x001A7F), WB_EXTEND}, + {RUNE_C(0x001A80), RUNE_C(0x001A89), WB_NU}, + {RUNE_C(0x001A90), RUNE_C(0x001A99), WB_NU}, + {RUNE_C(0x001AB0), RUNE_C(0x001ACE), WB_EXTEND}, + {RUNE_C(0x001B00), RUNE_C(0x001B04), WB_EXTEND}, + {RUNE_C(0x001B05), RUNE_C(0x001B33), WB_LE}, + {RUNE_C(0x001B34), RUNE_C(0x001B44), WB_EXTEND}, + {RUNE_C(0x001B45), RUNE_C(0x001B4C), WB_LE}, + {RUNE_C(0x001B50), RUNE_C(0x001B59), WB_NU}, + {RUNE_C(0x001B6B), RUNE_C(0x001B73), WB_EXTEND}, + {RUNE_C(0x001B80), RUNE_C(0x001B82), WB_EXTEND}, + {RUNE_C(0x001B83), RUNE_C(0x001BA0), WB_LE}, + {RUNE_C(0x001BA1), RUNE_C(0x001BAD), WB_EXTEND}, + {RUNE_C(0x001BAE), RUNE_C(0x001BAF), WB_LE}, + {RUNE_C(0x001BB0), RUNE_C(0x001BB9), WB_NU}, + {RUNE_C(0x001BBA), RUNE_C(0x001BE5), WB_LE}, + {RUNE_C(0x001BE6), RUNE_C(0x001BF3), WB_EXTEND}, + {RUNE_C(0x001C00), RUNE_C(0x001C23), WB_LE}, + {RUNE_C(0x001C24), RUNE_C(0x001C37), WB_EXTEND}, + {RUNE_C(0x001C40), RUNE_C(0x001C49), WB_NU}, + {RUNE_C(0x001C4D), RUNE_C(0x001C4F), WB_LE}, + {RUNE_C(0x001C50), RUNE_C(0x001C59), WB_NU}, + {RUNE_C(0x001C5A), RUNE_C(0x001C7D), WB_LE}, + {RUNE_C(0x001C80), RUNE_C(0x001C88), WB_LE}, + {RUNE_C(0x001C90), RUNE_C(0x001CBA), WB_LE}, + {RUNE_C(0x001CBD), RUNE_C(0x001CBF), WB_LE}, + {RUNE_C(0x001CD0), RUNE_C(0x001CD2), WB_EXTEND}, + {RUNE_C(0x001CD4), RUNE_C(0x001CE8), WB_EXTEND}, + {RUNE_C(0x001CE9), RUNE_C(0x001CEC), WB_LE}, + {RUNE_C(0x001CED), RUNE_C(0x001CED), WB_EXTEND}, + {RUNE_C(0x001CEE), RUNE_C(0x001CF3), WB_LE}, + {RUNE_C(0x001CF4), RUNE_C(0x001CF4), WB_EXTEND}, + {RUNE_C(0x001CF5), RUNE_C(0x001CF6), WB_LE}, + {RUNE_C(0x001CF7), RUNE_C(0x001CF9), WB_EXTEND}, + {RUNE_C(0x001CFA), RUNE_C(0x001CFA), WB_LE}, + {RUNE_C(0x001D00), RUNE_C(0x001DBF), WB_LE}, + {RUNE_C(0x001DC0), RUNE_C(0x001DFF), WB_EXTEND}, + {RUNE_C(0x001E00), RUNE_C(0x001F15), WB_LE}, + {RUNE_C(0x001F18), RUNE_C(0x001F1D), WB_LE}, + {RUNE_C(0x001F20), RUNE_C(0x001F45), WB_LE}, + {RUNE_C(0x001F48), RUNE_C(0x001F4D), WB_LE}, + {RUNE_C(0x001F50), RUNE_C(0x001F57), WB_LE}, + {RUNE_C(0x001F59), RUNE_C(0x001F59), WB_LE}, + {RUNE_C(0x001F5B), RUNE_C(0x001F5B), WB_LE}, + {RUNE_C(0x001F5D), RUNE_C(0x001F5D), WB_LE}, + {RUNE_C(0x001F5F), RUNE_C(0x001F7D), WB_LE}, + {RUNE_C(0x001F80), RUNE_C(0x001FB4), WB_LE}, + {RUNE_C(0x001FB6), RUNE_C(0x001FBC), WB_LE}, + {RUNE_C(0x001FBE), RUNE_C(0x001FBE), WB_LE}, + {RUNE_C(0x001FC2), RUNE_C(0x001FC4), WB_LE}, + {RUNE_C(0x001FC6), RUNE_C(0x001FCC), WB_LE}, + {RUNE_C(0x001FD0), RUNE_C(0x001FD3), WB_LE}, + {RUNE_C(0x001FD6), RUNE_C(0x001FDB), WB_LE}, + {RUNE_C(0x001FE0), RUNE_C(0x001FEC), WB_LE}, + {RUNE_C(0x001FF2), RUNE_C(0x001FF4), WB_LE}, + {RUNE_C(0x001FF6), RUNE_C(0x001FFC), WB_LE}, + {RUNE_C(0x002000), RUNE_C(0x002006), WB_WSEGSPACE}, + {RUNE_C(0x002008), RUNE_C(0x00200A), WB_WSEGSPACE}, + {RUNE_C(0x00200C), RUNE_C(0x00200C), WB_EXTEND}, + {RUNE_C(0x00200D), RUNE_C(0x00200D), WB_ZWJ}, + {RUNE_C(0x00200E), RUNE_C(0x00200F), WB_FO}, + {RUNE_C(0x002018), RUNE_C(0x002019), WB_MB}, + {RUNE_C(0x002024), RUNE_C(0x002024), WB_MB}, + {RUNE_C(0x002027), RUNE_C(0x002027), WB_ML}, + {RUNE_C(0x002028), RUNE_C(0x002029), WB_NL}, + {RUNE_C(0x00202A), RUNE_C(0x00202E), WB_FO}, + {RUNE_C(0x00202F), RUNE_C(0x00202F), WB_EX}, + {RUNE_C(0x00203F), RUNE_C(0x002040), WB_EX}, + {RUNE_C(0x002044), RUNE_C(0x002044), WB_MN}, + {RUNE_C(0x002054), RUNE_C(0x002054), WB_EX}, + {RUNE_C(0x00205F), RUNE_C(0x00205F), WB_WSEGSPACE}, + {RUNE_C(0x002060), RUNE_C(0x002064), WB_FO}, + {RUNE_C(0x002066), RUNE_C(0x00206F), WB_FO}, + {RUNE_C(0x002071), RUNE_C(0x002071), WB_LE}, + {RUNE_C(0x00207F), RUNE_C(0x00207F), WB_LE}, + {RUNE_C(0x002090), RUNE_C(0x00209C), WB_LE}, + {RUNE_C(0x0020D0), RUNE_C(0x0020F0), WB_EXTEND}, + {RUNE_C(0x002102), RUNE_C(0x002102), WB_LE}, + {RUNE_C(0x002107), RUNE_C(0x002107), WB_LE}, + {RUNE_C(0x00210A), RUNE_C(0x002113), WB_LE}, + {RUNE_C(0x002115), RUNE_C(0x002115), WB_LE}, + {RUNE_C(0x002119), RUNE_C(0x00211D), WB_LE}, + {RUNE_C(0x002124), RUNE_C(0x002124), WB_LE}, + {RUNE_C(0x002126), RUNE_C(0x002126), WB_LE}, + {RUNE_C(0x002128), RUNE_C(0x002128), WB_LE}, + {RUNE_C(0x00212A), RUNE_C(0x00212D), WB_LE}, + {RUNE_C(0x00212F), RUNE_C(0x002139), WB_LE}, + {RUNE_C(0x00213C), RUNE_C(0x00213F), WB_LE}, + {RUNE_C(0x002145), RUNE_C(0x002149), WB_LE}, + {RUNE_C(0x00214E), RUNE_C(0x00214E), WB_LE}, + {RUNE_C(0x002160), RUNE_C(0x002188), WB_LE}, + {RUNE_C(0x0024B6), RUNE_C(0x0024E9), WB_LE}, + {RUNE_C(0x002C00), RUNE_C(0x002CE4), WB_LE}, + {RUNE_C(0x002CEB), RUNE_C(0x002CEE), WB_LE}, + {RUNE_C(0x002CEF), RUNE_C(0x002CF1), WB_EXTEND}, + {RUNE_C(0x002CF2), RUNE_C(0x002CF3), WB_LE}, + {RUNE_C(0x002D00), RUNE_C(0x002D25), WB_LE}, + {RUNE_C(0x002D27), RUNE_C(0x002D27), WB_LE}, + {RUNE_C(0x002D2D), RUNE_C(0x002D2D), WB_LE}, + {RUNE_C(0x002D30), RUNE_C(0x002D67), WB_LE}, + {RUNE_C(0x002D6F), RUNE_C(0x002D6F), WB_LE}, + {RUNE_C(0x002D7F), RUNE_C(0x002D7F), WB_EXTEND}, + {RUNE_C(0x002D80), RUNE_C(0x002D96), WB_LE}, + {RUNE_C(0x002DA0), RUNE_C(0x002DA6), WB_LE}, + {RUNE_C(0x002DA8), RUNE_C(0x002DAE), WB_LE}, + {RUNE_C(0x002DB0), RUNE_C(0x002DB6), WB_LE}, + {RUNE_C(0x002DB8), RUNE_C(0x002DBE), WB_LE}, + {RUNE_C(0x002DC0), RUNE_C(0x002DC6), WB_LE}, + {RUNE_C(0x002DC8), RUNE_C(0x002DCE), WB_LE}, + {RUNE_C(0x002DD0), RUNE_C(0x002DD6), WB_LE}, + {RUNE_C(0x002DD8), RUNE_C(0x002DDE), WB_LE}, + {RUNE_C(0x002DE0), RUNE_C(0x002DFF), WB_EXTEND}, + {RUNE_C(0x002E2F), RUNE_C(0x002E2F), WB_LE}, + {RUNE_C(0x003000), RUNE_C(0x003000), WB_WSEGSPACE}, + {RUNE_C(0x003005), RUNE_C(0x003005), WB_LE}, + {RUNE_C(0x00302A), RUNE_C(0x00302F), WB_EXTEND}, + {RUNE_C(0x003031), RUNE_C(0x003035), WB_KA}, + {RUNE_C(0x00303B), RUNE_C(0x00303C), WB_LE}, + {RUNE_C(0x003099), RUNE_C(0x00309A), WB_EXTEND}, + {RUNE_C(0x00309B), RUNE_C(0x00309C), WB_KA}, + {RUNE_C(0x0030A0), RUNE_C(0x0030FA), WB_KA}, + {RUNE_C(0x0030FC), RUNE_C(0x0030FF), WB_KA}, + {RUNE_C(0x003105), RUNE_C(0x00312F), WB_LE}, + {RUNE_C(0x003131), RUNE_C(0x00318E), WB_LE}, + {RUNE_C(0x0031A0), RUNE_C(0x0031BF), WB_LE}, + {RUNE_C(0x0031F0), RUNE_C(0x0031FF), WB_KA}, + {RUNE_C(0x0032D0), RUNE_C(0x0032FE), WB_KA}, + {RUNE_C(0x003300), RUNE_C(0x003357), WB_KA}, + {RUNE_C(0x00A000), RUNE_C(0x00A48C), WB_LE}, + {RUNE_C(0x00A4D0), RUNE_C(0x00A4FD), WB_LE}, + {RUNE_C(0x00A500), RUNE_C(0x00A60C), WB_LE}, + {RUNE_C(0x00A610), RUNE_C(0x00A61F), WB_LE}, + {RUNE_C(0x00A620), RUNE_C(0x00A629), WB_NU}, + {RUNE_C(0x00A62A), RUNE_C(0x00A62B), WB_LE}, + {RUNE_C(0x00A640), RUNE_C(0x00A66E), WB_LE}, + {RUNE_C(0x00A66F), RUNE_C(0x00A672), WB_EXTEND}, + {RUNE_C(0x00A674), RUNE_C(0x00A67D), WB_EXTEND}, + {RUNE_C(0x00A67F), RUNE_C(0x00A69D), WB_LE}, + {RUNE_C(0x00A69E), RUNE_C(0x00A69F), WB_EXTEND}, + {RUNE_C(0x00A6A0), RUNE_C(0x00A6EF), WB_LE}, + {RUNE_C(0x00A6F0), RUNE_C(0x00A6F1), WB_EXTEND}, + {RUNE_C(0x00A708), RUNE_C(0x00A7CA), WB_LE}, + {RUNE_C(0x00A7D0), RUNE_C(0x00A7D1), WB_LE}, + {RUNE_C(0x00A7D3), RUNE_C(0x00A7D3), WB_LE}, + {RUNE_C(0x00A7D5), RUNE_C(0x00A7D9), WB_LE}, + {RUNE_C(0x00A7F2), RUNE_C(0x00A801), WB_LE}, + {RUNE_C(0x00A802), RUNE_C(0x00A802), WB_EXTEND}, + {RUNE_C(0x00A803), RUNE_C(0x00A805), WB_LE}, + {RUNE_C(0x00A806), RUNE_C(0x00A806), WB_EXTEND}, + {RUNE_C(0x00A807), RUNE_C(0x00A80A), WB_LE}, + {RUNE_C(0x00A80B), RUNE_C(0x00A80B), WB_EXTEND}, + {RUNE_C(0x00A80C), RUNE_C(0x00A822), WB_LE}, + {RUNE_C(0x00A823), RUNE_C(0x00A827), WB_EXTEND}, + {RUNE_C(0x00A82C), RUNE_C(0x00A82C), WB_EXTEND}, + {RUNE_C(0x00A840), RUNE_C(0x00A873), WB_LE}, + {RUNE_C(0x00A880), RUNE_C(0x00A881), WB_EXTEND}, + {RUNE_C(0x00A882), RUNE_C(0x00A8B3), WB_LE}, + {RUNE_C(0x00A8B4), RUNE_C(0x00A8C5), WB_EXTEND}, + {RUNE_C(0x00A8D0), RUNE_C(0x00A8D9), WB_NU}, + {RUNE_C(0x00A8E0), RUNE_C(0x00A8F1), WB_EXTEND}, + {RUNE_C(0x00A8F2), RUNE_C(0x00A8F7), WB_LE}, + {RUNE_C(0x00A8FB), RUNE_C(0x00A8FB), WB_LE}, + {RUNE_C(0x00A8FD), RUNE_C(0x00A8FE), WB_LE}, + {RUNE_C(0x00A8FF), RUNE_C(0x00A8FF), WB_EXTEND}, + {RUNE_C(0x00A900), RUNE_C(0x00A909), WB_NU}, + {RUNE_C(0x00A90A), RUNE_C(0x00A925), WB_LE}, + {RUNE_C(0x00A926), RUNE_C(0x00A92D), WB_EXTEND}, + {RUNE_C(0x00A930), RUNE_C(0x00A946), WB_LE}, + {RUNE_C(0x00A947), RUNE_C(0x00A953), WB_EXTEND}, + {RUNE_C(0x00A960), RUNE_C(0x00A97C), WB_LE}, + {RUNE_C(0x00A980), RUNE_C(0x00A983), WB_EXTEND}, + {RUNE_C(0x00A984), RUNE_C(0x00A9B2), WB_LE}, + {RUNE_C(0x00A9B3), RUNE_C(0x00A9C0), WB_EXTEND}, + {RUNE_C(0x00A9CF), RUNE_C(0x00A9CF), WB_LE}, + {RUNE_C(0x00A9D0), RUNE_C(0x00A9D9), WB_NU}, + {RUNE_C(0x00A9E5), RUNE_C(0x00A9E5), WB_EXTEND}, + {RUNE_C(0x00A9F0), RUNE_C(0x00A9F9), WB_NU}, + {RUNE_C(0x00AA00), RUNE_C(0x00AA28), WB_LE}, + {RUNE_C(0x00AA29), RUNE_C(0x00AA36), WB_EXTEND}, + {RUNE_C(0x00AA40), RUNE_C(0x00AA42), WB_LE}, + {RUNE_C(0x00AA43), RUNE_C(0x00AA43), WB_EXTEND}, + {RUNE_C(0x00AA44), RUNE_C(0x00AA4B), WB_LE}, + {RUNE_C(0x00AA4C), RUNE_C(0x00AA4D), WB_EXTEND}, + {RUNE_C(0x00AA50), RUNE_C(0x00AA59), WB_NU}, + {RUNE_C(0x00AA7B), RUNE_C(0x00AA7D), WB_EXTEND}, + {RUNE_C(0x00AAB0), RUNE_C(0x00AAB0), WB_EXTEND}, + {RUNE_C(0x00AAB2), RUNE_C(0x00AAB4), WB_EXTEND}, + {RUNE_C(0x00AAB7), RUNE_C(0x00AAB8), WB_EXTEND}, + {RUNE_C(0x00AABE), RUNE_C(0x00AABF), WB_EXTEND}, + {RUNE_C(0x00AAC1), RUNE_C(0x00AAC1), WB_EXTEND}, + {RUNE_C(0x00AAE0), RUNE_C(0x00AAEA), WB_LE}, + {RUNE_C(0x00AAEB), RUNE_C(0x00AAEF), WB_EXTEND}, + {RUNE_C(0x00AAF2), RUNE_C(0x00AAF4), WB_LE}, + {RUNE_C(0x00AAF5), RUNE_C(0x00AAF6), WB_EXTEND}, + {RUNE_C(0x00AB01), RUNE_C(0x00AB06), WB_LE}, + {RUNE_C(0x00AB09), RUNE_C(0x00AB0E), WB_LE}, + {RUNE_C(0x00AB11), RUNE_C(0x00AB16), WB_LE}, + {RUNE_C(0x00AB20), RUNE_C(0x00AB26), WB_LE}, + {RUNE_C(0x00AB28), RUNE_C(0x00AB2E), WB_LE}, + {RUNE_C(0x00AB30), RUNE_C(0x00AB69), WB_LE}, + {RUNE_C(0x00AB70), RUNE_C(0x00ABE2), WB_LE}, + {RUNE_C(0x00ABE3), RUNE_C(0x00ABEA), WB_EXTEND}, + {RUNE_C(0x00ABEC), RUNE_C(0x00ABED), WB_EXTEND}, + {RUNE_C(0x00ABF0), RUNE_C(0x00ABF9), WB_NU}, + {RUNE_C(0x00AC00), RUNE_C(0x00D7A3), WB_LE}, + {RUNE_C(0x00D7B0), RUNE_C(0x00D7C6), WB_LE}, + {RUNE_C(0x00D7CB), RUNE_C(0x00D7FB), WB_LE}, + {RUNE_C(0x00FB00), RUNE_C(0x00FB06), WB_LE}, + {RUNE_C(0x00FB13), RUNE_C(0x00FB17), WB_LE}, + {RUNE_C(0x00FB1D), RUNE_C(0x00FB1D), WB_HL}, + {RUNE_C(0x00FB1E), RUNE_C(0x00FB1E), WB_EXTEND}, + {RUNE_C(0x00FB1F), RUNE_C(0x00FB28), WB_HL}, + {RUNE_C(0x00FB2A), RUNE_C(0x00FB36), WB_HL}, + {RUNE_C(0x00FB38), RUNE_C(0x00FB3C), WB_HL}, + {RUNE_C(0x00FB3E), RUNE_C(0x00FB3E), WB_HL}, + {RUNE_C(0x00FB40), RUNE_C(0x00FB41), WB_HL}, + {RUNE_C(0x00FB43), RUNE_C(0x00FB44), WB_HL}, + {RUNE_C(0x00FB46), RUNE_C(0x00FB4F), WB_HL}, + {RUNE_C(0x00FB50), RUNE_C(0x00FBB1), WB_LE}, + {RUNE_C(0x00FBD3), RUNE_C(0x00FD3D), WB_LE}, + {RUNE_C(0x00FD50), RUNE_C(0x00FD8F), WB_LE}, + {RUNE_C(0x00FD92), RUNE_C(0x00FDC7), WB_LE}, + {RUNE_C(0x00FDF0), RUNE_C(0x00FDFB), WB_LE}, + {RUNE_C(0x00FE00), RUNE_C(0x00FE0F), WB_EXTEND}, + {RUNE_C(0x00FE10), RUNE_C(0x00FE10), WB_MN}, + {RUNE_C(0x00FE13), RUNE_C(0x00FE13), WB_ML}, + {RUNE_C(0x00FE14), RUNE_C(0x00FE14), WB_MN}, + {RUNE_C(0x00FE20), RUNE_C(0x00FE2F), WB_EXTEND}, + {RUNE_C(0x00FE33), RUNE_C(0x00FE34), WB_EX}, + {RUNE_C(0x00FE4D), RUNE_C(0x00FE4F), WB_EX}, + {RUNE_C(0x00FE50), RUNE_C(0x00FE50), WB_MN}, + {RUNE_C(0x00FE52), RUNE_C(0x00FE52), WB_MB}, + {RUNE_C(0x00FE54), RUNE_C(0x00FE54), WB_MN}, + {RUNE_C(0x00FE55), RUNE_C(0x00FE55), WB_ML}, + {RUNE_C(0x00FE70), RUNE_C(0x00FE74), WB_LE}, + {RUNE_C(0x00FE76), RUNE_C(0x00FEFC), WB_LE}, + {RUNE_C(0x00FEFF), RUNE_C(0x00FEFF), WB_FO}, + {RUNE_C(0x00FF07), RUNE_C(0x00FF07), WB_MB}, + {RUNE_C(0x00FF0C), RUNE_C(0x00FF0C), WB_MN}, + {RUNE_C(0x00FF0E), RUNE_C(0x00FF0E), WB_MB}, + {RUNE_C(0x00FF10), RUNE_C(0x00FF19), WB_NU}, + {RUNE_C(0x00FF1A), RUNE_C(0x00FF1A), WB_ML}, + {RUNE_C(0x00FF1B), RUNE_C(0x00FF1B), WB_MN}, + {RUNE_C(0x00FF21), RUNE_C(0x00FF3A), WB_LE}, + {RUNE_C(0x00FF3F), RUNE_C(0x00FF3F), WB_EX}, + {RUNE_C(0x00FF41), RUNE_C(0x00FF5A), WB_LE}, + {RUNE_C(0x00FF66), RUNE_C(0x00FF9D), WB_KA}, + {RUNE_C(0x00FF9E), RUNE_C(0x00FF9F), WB_EXTEND}, + {RUNE_C(0x00FFA0), RUNE_C(0x00FFBE), WB_LE}, + {RUNE_C(0x00FFC2), RUNE_C(0x00FFC7), WB_LE}, + {RUNE_C(0x00FFCA), RUNE_C(0x00FFCF), WB_LE}, + {RUNE_C(0x00FFD2), RUNE_C(0x00FFD7), WB_LE}, + {RUNE_C(0x00FFDA), RUNE_C(0x00FFDC), WB_LE}, + {RUNE_C(0x00FFF9), RUNE_C(0x00FFFB), WB_FO}, + {RUNE_C(0x010000), RUNE_C(0x01000B), WB_LE}, + {RUNE_C(0x01000D), RUNE_C(0x010026), WB_LE}, + {RUNE_C(0x010028), RUNE_C(0x01003A), WB_LE}, + {RUNE_C(0x01003C), RUNE_C(0x01003D), WB_LE}, + {RUNE_C(0x01003F), RUNE_C(0x01004D), WB_LE}, + {RUNE_C(0x010050), RUNE_C(0x01005D), WB_LE}, + {RUNE_C(0x010080), RUNE_C(0x0100FA), WB_LE}, + {RUNE_C(0x010140), RUNE_C(0x010174), WB_LE}, + {RUNE_C(0x0101FD), RUNE_C(0x0101FD), WB_EXTEND}, + {RUNE_C(0x010280), RUNE_C(0x01029C), WB_LE}, + {RUNE_C(0x0102A0), RUNE_C(0x0102D0), WB_LE}, + {RUNE_C(0x0102E0), RUNE_C(0x0102E0), WB_EXTEND}, + {RUNE_C(0x010300), RUNE_C(0x01031F), WB_LE}, + {RUNE_C(0x01032D), RUNE_C(0x01034A), WB_LE}, + {RUNE_C(0x010350), RUNE_C(0x010375), WB_LE}, + {RUNE_C(0x010376), RUNE_C(0x01037A), WB_EXTEND}, + {RUNE_C(0x010380), RUNE_C(0x01039D), WB_LE}, + {RUNE_C(0x0103A0), RUNE_C(0x0103C3), WB_LE}, + {RUNE_C(0x0103C8), RUNE_C(0x0103CF), WB_LE}, + {RUNE_C(0x0103D1), RUNE_C(0x0103D5), WB_LE}, + {RUNE_C(0x010400), RUNE_C(0x01049D), WB_LE}, + {RUNE_C(0x0104A0), RUNE_C(0x0104A9), WB_NU}, + {RUNE_C(0x0104B0), RUNE_C(0x0104D3), WB_LE}, + {RUNE_C(0x0104D8), RUNE_C(0x0104FB), WB_LE}, + {RUNE_C(0x010500), RUNE_C(0x010527), WB_LE}, + {RUNE_C(0x010530), RUNE_C(0x010563), WB_LE}, + {RUNE_C(0x010570), RUNE_C(0x01057A), WB_LE}, + {RUNE_C(0x01057C), RUNE_C(0x01058A), WB_LE}, + {RUNE_C(0x01058C), RUNE_C(0x010592), WB_LE}, + {RUNE_C(0x010594), RUNE_C(0x010595), WB_LE}, + {RUNE_C(0x010597), RUNE_C(0x0105A1), WB_LE}, + {RUNE_C(0x0105A3), RUNE_C(0x0105B1), WB_LE}, + {RUNE_C(0x0105B3), RUNE_C(0x0105B9), WB_LE}, + {RUNE_C(0x0105BB), RUNE_C(0x0105BC), WB_LE}, + {RUNE_C(0x010600), RUNE_C(0x010736), WB_LE}, + {RUNE_C(0x010740), RUNE_C(0x010755), WB_LE}, + {RUNE_C(0x010760), RUNE_C(0x010767), WB_LE}, + {RUNE_C(0x010780), RUNE_C(0x010785), WB_LE}, + {RUNE_C(0x010787), RUNE_C(0x0107B0), WB_LE}, + {RUNE_C(0x0107B2), RUNE_C(0x0107BA), WB_LE}, + {RUNE_C(0x010800), RUNE_C(0x010805), WB_LE}, + {RUNE_C(0x010808), RUNE_C(0x010808), WB_LE}, + {RUNE_C(0x01080A), RUNE_C(0x010835), WB_LE}, + {RUNE_C(0x010837), RUNE_C(0x010838), WB_LE}, + {RUNE_C(0x01083C), RUNE_C(0x01083C), WB_LE}, + {RUNE_C(0x01083F), RUNE_C(0x010855), WB_LE}, + {RUNE_C(0x010860), RUNE_C(0x010876), WB_LE}, + {RUNE_C(0x010880), RUNE_C(0x01089E), WB_LE}, + {RUNE_C(0x0108E0), RUNE_C(0x0108F2), WB_LE}, + {RUNE_C(0x0108F4), RUNE_C(0x0108F5), WB_LE}, + {RUNE_C(0x010900), RUNE_C(0x010915), WB_LE}, + {RUNE_C(0x010920), RUNE_C(0x010939), WB_LE}, + {RUNE_C(0x010980), RUNE_C(0x0109B7), WB_LE}, + {RUNE_C(0x0109BE), RUNE_C(0x0109BF), WB_LE}, + {RUNE_C(0x010A00), RUNE_C(0x010A00), WB_LE}, + {RUNE_C(0x010A01), RUNE_C(0x010A03), WB_EXTEND}, + {RUNE_C(0x010A05), RUNE_C(0x010A06), WB_EXTEND}, + {RUNE_C(0x010A0C), RUNE_C(0x010A0F), WB_EXTEND}, + {RUNE_C(0x010A10), RUNE_C(0x010A13), WB_LE}, + {RUNE_C(0x010A15), RUNE_C(0x010A17), WB_LE}, + {RUNE_C(0x010A19), RUNE_C(0x010A35), WB_LE}, + {RUNE_C(0x010A38), RUNE_C(0x010A3A), WB_EXTEND}, + {RUNE_C(0x010A3F), RUNE_C(0x010A3F), WB_EXTEND}, + {RUNE_C(0x010A60), RUNE_C(0x010A7C), WB_LE}, + {RUNE_C(0x010A80), RUNE_C(0x010A9C), WB_LE}, + {RUNE_C(0x010AC0), RUNE_C(0x010AC7), WB_LE}, + {RUNE_C(0x010AC9), RUNE_C(0x010AE4), WB_LE}, + {RUNE_C(0x010AE5), RUNE_C(0x010AE6), WB_EXTEND}, + {RUNE_C(0x010B00), RUNE_C(0x010B35), WB_LE}, + {RUNE_C(0x010B40), RUNE_C(0x010B55), WB_LE}, + {RUNE_C(0x010B60), RUNE_C(0x010B72), WB_LE}, + {RUNE_C(0x010B80), RUNE_C(0x010B91), WB_LE}, + {RUNE_C(0x010C00), RUNE_C(0x010C48), WB_LE}, + {RUNE_C(0x010C80), RUNE_C(0x010CB2), WB_LE}, + {RUNE_C(0x010CC0), RUNE_C(0x010CF2), WB_LE}, + {RUNE_C(0x010D00), RUNE_C(0x010D23), WB_LE}, + {RUNE_C(0x010D24), RUNE_C(0x010D27), WB_EXTEND}, + {RUNE_C(0x010D30), RUNE_C(0x010D39), WB_NU}, + {RUNE_C(0x010E80), RUNE_C(0x010EA9), WB_LE}, + {RUNE_C(0x010EAB), RUNE_C(0x010EAC), WB_EXTEND}, + {RUNE_C(0x010EB0), RUNE_C(0x010EB1), WB_LE}, + {RUNE_C(0x010EFD), RUNE_C(0x010EFF), WB_EXTEND}, + {RUNE_C(0x010F00), RUNE_C(0x010F1C), WB_LE}, + {RUNE_C(0x010F27), RUNE_C(0x010F27), WB_LE}, + {RUNE_C(0x010F30), RUNE_C(0x010F45), WB_LE}, + {RUNE_C(0x010F46), RUNE_C(0x010F50), WB_EXTEND}, + {RUNE_C(0x010F70), RUNE_C(0x010F81), WB_LE}, + {RUNE_C(0x010F82), RUNE_C(0x010F85), WB_EXTEND}, + {RUNE_C(0x010FB0), RUNE_C(0x010FC4), WB_LE}, + {RUNE_C(0x010FE0), RUNE_C(0x010FF6), WB_LE}, + {RUNE_C(0x011000), RUNE_C(0x011002), WB_EXTEND}, + {RUNE_C(0x011003), RUNE_C(0x011037), WB_LE}, + {RUNE_C(0x011038), RUNE_C(0x011046), WB_EXTEND}, + {RUNE_C(0x011066), RUNE_C(0x01106F), WB_NU}, + {RUNE_C(0x011070), RUNE_C(0x011070), WB_EXTEND}, + {RUNE_C(0x011071), RUNE_C(0x011072), WB_LE}, + {RUNE_C(0x011073), RUNE_C(0x011074), WB_EXTEND}, + {RUNE_C(0x011075), RUNE_C(0x011075), WB_LE}, + {RUNE_C(0x01107F), RUNE_C(0x011082), WB_EXTEND}, + {RUNE_C(0x011083), RUNE_C(0x0110AF), WB_LE}, + {RUNE_C(0x0110B0), RUNE_C(0x0110BA), WB_EXTEND}, + {RUNE_C(0x0110BD), RUNE_C(0x0110BD), WB_NU}, + {RUNE_C(0x0110C2), RUNE_C(0x0110C2), WB_EXTEND}, + {RUNE_C(0x0110CD), RUNE_C(0x0110CD), WB_NU}, + {RUNE_C(0x0110D0), RUNE_C(0x0110E8), WB_LE}, + {RUNE_C(0x0110F0), RUNE_C(0x0110F9), WB_NU}, + {RUNE_C(0x011100), RUNE_C(0x011102), WB_EXTEND}, + {RUNE_C(0x011103), RUNE_C(0x011126), WB_LE}, + {RUNE_C(0x011127), RUNE_C(0x011134), WB_EXTEND}, + {RUNE_C(0x011136), RUNE_C(0x01113F), WB_NU}, + {RUNE_C(0x011144), RUNE_C(0x011144), WB_LE}, + {RUNE_C(0x011145), RUNE_C(0x011146), WB_EXTEND}, + {RUNE_C(0x011147), RUNE_C(0x011147), WB_LE}, + {RUNE_C(0x011150), RUNE_C(0x011172), WB_LE}, + {RUNE_C(0x011173), RUNE_C(0x011173), WB_EXTEND}, + {RUNE_C(0x011176), RUNE_C(0x011176), WB_LE}, + {RUNE_C(0x011180), RUNE_C(0x011182), WB_EXTEND}, + {RUNE_C(0x011183), RUNE_C(0x0111B2), WB_LE}, + {RUNE_C(0x0111B3), RUNE_C(0x0111C0), WB_EXTEND}, + {RUNE_C(0x0111C1), RUNE_C(0x0111C4), WB_LE}, + {RUNE_C(0x0111C9), RUNE_C(0x0111CC), WB_EXTEND}, + {RUNE_C(0x0111CE), RUNE_C(0x0111CF), WB_EXTEND}, + {RUNE_C(0x0111D0), RUNE_C(0x0111D9), WB_NU}, + {RUNE_C(0x0111DA), RUNE_C(0x0111DA), WB_LE}, + {RUNE_C(0x0111DC), RUNE_C(0x0111DC), WB_LE}, + {RUNE_C(0x011200), RUNE_C(0x011211), WB_LE}, + {RUNE_C(0x011213), RUNE_C(0x01122B), WB_LE}, + {RUNE_C(0x01122C), RUNE_C(0x011237), WB_EXTEND}, + {RUNE_C(0x01123E), RUNE_C(0x01123E), WB_EXTEND}, + {RUNE_C(0x01123F), RUNE_C(0x011240), WB_LE}, + {RUNE_C(0x011241), RUNE_C(0x011241), WB_EXTEND}, + {RUNE_C(0x011280), RUNE_C(0x011286), WB_LE}, + {RUNE_C(0x011288), RUNE_C(0x011288), WB_LE}, + {RUNE_C(0x01128A), RUNE_C(0x01128D), WB_LE}, + {RUNE_C(0x01128F), RUNE_C(0x01129D), WB_LE}, + {RUNE_C(0x01129F), RUNE_C(0x0112A8), WB_LE}, + {RUNE_C(0x0112B0), RUNE_C(0x0112DE), WB_LE}, + {RUNE_C(0x0112DF), RUNE_C(0x0112EA), WB_EXTEND}, + {RUNE_C(0x0112F0), RUNE_C(0x0112F9), WB_NU}, + {RUNE_C(0x011300), RUNE_C(0x011303), WB_EXTEND}, + {RUNE_C(0x011305), RUNE_C(0x01130C), WB_LE}, + {RUNE_C(0x01130F), RUNE_C(0x011310), WB_LE}, + {RUNE_C(0x011313), RUNE_C(0x011328), WB_LE}, + {RUNE_C(0x01132A), RUNE_C(0x011330), WB_LE}, + {RUNE_C(0x011332), RUNE_C(0x011333), WB_LE}, + {RUNE_C(0x011335), RUNE_C(0x011339), WB_LE}, + {RUNE_C(0x01133B), RUNE_C(0x01133C), WB_EXTEND}, + {RUNE_C(0x01133D), RUNE_C(0x01133D), WB_LE}, + {RUNE_C(0x01133E), RUNE_C(0x011344), WB_EXTEND}, + {RUNE_C(0x011347), RUNE_C(0x011348), WB_EXTEND}, + {RUNE_C(0x01134B), RUNE_C(0x01134D), WB_EXTEND}, + {RUNE_C(0x011350), RUNE_C(0x011350), WB_LE}, + {RUNE_C(0x011357), RUNE_C(0x011357), WB_EXTEND}, + {RUNE_C(0x01135D), RUNE_C(0x011361), WB_LE}, + {RUNE_C(0x011362), RUNE_C(0x011363), WB_EXTEND}, + {RUNE_C(0x011366), RUNE_C(0x01136C), WB_EXTEND}, + {RUNE_C(0x011370), RUNE_C(0x011374), WB_EXTEND}, + {RUNE_C(0x011400), RUNE_C(0x011434), WB_LE}, + {RUNE_C(0x011435), RUNE_C(0x011446), WB_EXTEND}, + {RUNE_C(0x011447), RUNE_C(0x01144A), WB_LE}, + {RUNE_C(0x011450), RUNE_C(0x011459), WB_NU}, + {RUNE_C(0x01145E), RUNE_C(0x01145E), WB_EXTEND}, + {RUNE_C(0x01145F), RUNE_C(0x011461), WB_LE}, + {RUNE_C(0x011480), RUNE_C(0x0114AF), WB_LE}, + {RUNE_C(0x0114B0), RUNE_C(0x0114C3), WB_EXTEND}, + {RUNE_C(0x0114C4), RUNE_C(0x0114C5), WB_LE}, + {RUNE_C(0x0114C7), RUNE_C(0x0114C7), WB_LE}, + {RUNE_C(0x0114D0), RUNE_C(0x0114D9), WB_NU}, + {RUNE_C(0x011580), RUNE_C(0x0115AE), WB_LE}, + {RUNE_C(0x0115AF), RUNE_C(0x0115B5), WB_EXTEND}, + {RUNE_C(0x0115B8), RUNE_C(0x0115C0), WB_EXTEND}, + {RUNE_C(0x0115D8), RUNE_C(0x0115DB), WB_LE}, + {RUNE_C(0x0115DC), RUNE_C(0x0115DD), WB_EXTEND}, + {RUNE_C(0x011600), RUNE_C(0x01162F), WB_LE}, + {RUNE_C(0x011630), RUNE_C(0x011640), WB_EXTEND}, + {RUNE_C(0x011644), RUNE_C(0x011644), WB_LE}, + {RUNE_C(0x011650), RUNE_C(0x011659), WB_NU}, + {RUNE_C(0x011680), RUNE_C(0x0116AA), WB_LE}, + {RUNE_C(0x0116AB), RUNE_C(0x0116B7), WB_EXTEND}, + {RUNE_C(0x0116B8), RUNE_C(0x0116B8), WB_LE}, + {RUNE_C(0x0116C0), RUNE_C(0x0116C9), WB_NU}, + {RUNE_C(0x01171D), RUNE_C(0x01172B), WB_EXTEND}, + {RUNE_C(0x011730), RUNE_C(0x011739), WB_NU}, + {RUNE_C(0x011800), RUNE_C(0x01182B), WB_LE}, + {RUNE_C(0x01182C), RUNE_C(0x01183A), WB_EXTEND}, + {RUNE_C(0x0118A0), RUNE_C(0x0118DF), WB_LE}, + {RUNE_C(0x0118E0), RUNE_C(0x0118E9), WB_NU}, + {RUNE_C(0x0118FF), RUNE_C(0x011906), WB_LE}, + {RUNE_C(0x011909), RUNE_C(0x011909), WB_LE}, + {RUNE_C(0x01190C), RUNE_C(0x011913), WB_LE}, + {RUNE_C(0x011915), RUNE_C(0x011916), WB_LE}, + {RUNE_C(0x011918), RUNE_C(0x01192F), WB_LE}, + {RUNE_C(0x011930), RUNE_C(0x011935), WB_EXTEND}, + {RUNE_C(0x011937), RUNE_C(0x011938), WB_EXTEND}, + {RUNE_C(0x01193B), RUNE_C(0x01193E), WB_EXTEND}, + {RUNE_C(0x01193F), RUNE_C(0x01193F), WB_LE}, + {RUNE_C(0x011940), RUNE_C(0x011940), WB_EXTEND}, + {RUNE_C(0x011941), RUNE_C(0x011941), WB_LE}, + {RUNE_C(0x011942), RUNE_C(0x011943), WB_EXTEND}, + {RUNE_C(0x011950), RUNE_C(0x011959), WB_NU}, + {RUNE_C(0x0119A0), RUNE_C(0x0119A7), WB_LE}, + {RUNE_C(0x0119AA), RUNE_C(0x0119D0), WB_LE}, + {RUNE_C(0x0119D1), RUNE_C(0x0119D7), WB_EXTEND}, + {RUNE_C(0x0119DA), RUNE_C(0x0119E0), WB_EXTEND}, + {RUNE_C(0x0119E1), RUNE_C(0x0119E1), WB_LE}, + {RUNE_C(0x0119E3), RUNE_C(0x0119E3), WB_LE}, + {RUNE_C(0x0119E4), RUNE_C(0x0119E4), WB_EXTEND}, + {RUNE_C(0x011A00), RUNE_C(0x011A00), WB_LE}, + {RUNE_C(0x011A01), RUNE_C(0x011A0A), WB_EXTEND}, + {RUNE_C(0x011A0B), RUNE_C(0x011A32), WB_LE}, + {RUNE_C(0x011A33), RUNE_C(0x011A39), WB_EXTEND}, + {RUNE_C(0x011A3A), RUNE_C(0x011A3A), WB_LE}, + {RUNE_C(0x011A3B), RUNE_C(0x011A3E), WB_EXTEND}, + {RUNE_C(0x011A47), RUNE_C(0x011A47), WB_EXTEND}, + {RUNE_C(0x011A50), RUNE_C(0x011A50), WB_LE}, + {RUNE_C(0x011A51), RUNE_C(0x011A5B), WB_EXTEND}, + {RUNE_C(0x011A5C), RUNE_C(0x011A89), WB_LE}, + {RUNE_C(0x011A8A), RUNE_C(0x011A99), WB_EXTEND}, + {RUNE_C(0x011A9D), RUNE_C(0x011A9D), WB_LE}, + {RUNE_C(0x011AB0), RUNE_C(0x011AF8), WB_LE}, + {RUNE_C(0x011C00), RUNE_C(0x011C08), WB_LE}, + {RUNE_C(0x011C0A), RUNE_C(0x011C2E), WB_LE}, + {RUNE_C(0x011C2F), RUNE_C(0x011C36), WB_EXTEND}, + {RUNE_C(0x011C38), RUNE_C(0x011C3F), WB_EXTEND}, + {RUNE_C(0x011C40), RUNE_C(0x011C40), WB_LE}, + {RUNE_C(0x011C50), RUNE_C(0x011C59), WB_NU}, + {RUNE_C(0x011C72), RUNE_C(0x011C8F), WB_LE}, + {RUNE_C(0x011C92), RUNE_C(0x011CA7), WB_EXTEND}, + {RUNE_C(0x011CA9), RUNE_C(0x011CB6), WB_EXTEND}, + {RUNE_C(0x011D00), RUNE_C(0x011D06), WB_LE}, + {RUNE_C(0x011D08), RUNE_C(0x011D09), WB_LE}, + {RUNE_C(0x011D0B), RUNE_C(0x011D30), WB_LE}, + {RUNE_C(0x011D31), RUNE_C(0x011D36), WB_EXTEND}, + {RUNE_C(0x011D3A), RUNE_C(0x011D3A), WB_EXTEND}, + {RUNE_C(0x011D3C), RUNE_C(0x011D3D), WB_EXTEND}, + {RUNE_C(0x011D3F), RUNE_C(0x011D45), WB_EXTEND}, + {RUNE_C(0x011D46), RUNE_C(0x011D46), WB_LE}, + {RUNE_C(0x011D47), RUNE_C(0x011D47), WB_EXTEND}, + {RUNE_C(0x011D50), RUNE_C(0x011D59), WB_NU}, + {RUNE_C(0x011D60), RUNE_C(0x011D65), WB_LE}, + {RUNE_C(0x011D67), RUNE_C(0x011D68), WB_LE}, + {RUNE_C(0x011D6A), RUNE_C(0x011D89), WB_LE}, + {RUNE_C(0x011D8A), RUNE_C(0x011D8E), WB_EXTEND}, + {RUNE_C(0x011D90), RUNE_C(0x011D91), WB_EXTEND}, + {RUNE_C(0x011D93), RUNE_C(0x011D97), WB_EXTEND}, + {RUNE_C(0x011D98), RUNE_C(0x011D98), WB_LE}, + {RUNE_C(0x011DA0), RUNE_C(0x011DA9), WB_NU}, + {RUNE_C(0x011EE0), RUNE_C(0x011EF2), WB_LE}, + {RUNE_C(0x011EF3), RUNE_C(0x011EF6), WB_EXTEND}, + {RUNE_C(0x011F00), RUNE_C(0x011F01), WB_EXTEND}, + {RUNE_C(0x011F02), RUNE_C(0x011F02), WB_LE}, + {RUNE_C(0x011F03), RUNE_C(0x011F03), WB_EXTEND}, + {RUNE_C(0x011F04), RUNE_C(0x011F10), WB_LE}, + {RUNE_C(0x011F12), RUNE_C(0x011F33), WB_LE}, + {RUNE_C(0x011F34), RUNE_C(0x011F3A), WB_EXTEND}, + {RUNE_C(0x011F3E), RUNE_C(0x011F42), WB_EXTEND}, + {RUNE_C(0x011F50), RUNE_C(0x011F59), WB_NU}, + {RUNE_C(0x011FB0), RUNE_C(0x011FB0), WB_LE}, + {RUNE_C(0x012000), RUNE_C(0x012399), WB_LE}, + {RUNE_C(0x012400), RUNE_C(0x01246E), WB_LE}, + {RUNE_C(0x012480), RUNE_C(0x012543), WB_LE}, + {RUNE_C(0x012F90), RUNE_C(0x012FF0), WB_LE}, + {RUNE_C(0x013000), RUNE_C(0x01342F), WB_LE}, + {RUNE_C(0x013430), RUNE_C(0x01343F), WB_FO}, + {RUNE_C(0x013440), RUNE_C(0x013440), WB_EXTEND}, + {RUNE_C(0x013441), RUNE_C(0x013446), WB_LE}, + {RUNE_C(0x013447), RUNE_C(0x013455), WB_EXTEND}, + {RUNE_C(0x014400), RUNE_C(0x014646), WB_LE}, + {RUNE_C(0x016800), RUNE_C(0x016A38), WB_LE}, + {RUNE_C(0x016A40), RUNE_C(0x016A5E), WB_LE}, + {RUNE_C(0x016A60), RUNE_C(0x016A69), WB_NU}, + {RUNE_C(0x016A70), RUNE_C(0x016ABE), WB_LE}, + {RUNE_C(0x016AC0), RUNE_C(0x016AC9), WB_NU}, + {RUNE_C(0x016AD0), RUNE_C(0x016AED), WB_LE}, + {RUNE_C(0x016AF0), RUNE_C(0x016AF4), WB_EXTEND}, + {RUNE_C(0x016B00), RUNE_C(0x016B2F), WB_LE}, + {RUNE_C(0x016B30), RUNE_C(0x016B36), WB_EXTEND}, + {RUNE_C(0x016B40), RUNE_C(0x016B43), WB_LE}, + {RUNE_C(0x016B50), RUNE_C(0x016B59), WB_NU}, + {RUNE_C(0x016B63), RUNE_C(0x016B77), WB_LE}, + {RUNE_C(0x016B7D), RUNE_C(0x016B8F), WB_LE}, + {RUNE_C(0x016E40), RUNE_C(0x016E7F), WB_LE}, + {RUNE_C(0x016F00), RUNE_C(0x016F4A), WB_LE}, + {RUNE_C(0x016F4F), RUNE_C(0x016F4F), WB_EXTEND}, + {RUNE_C(0x016F50), RUNE_C(0x016F50), WB_LE}, + {RUNE_C(0x016F51), RUNE_C(0x016F87), WB_EXTEND}, + {RUNE_C(0x016F8F), RUNE_C(0x016F92), WB_EXTEND}, + {RUNE_C(0x016F93), RUNE_C(0x016F9F), WB_LE}, + {RUNE_C(0x016FE0), RUNE_C(0x016FE1), WB_LE}, + {RUNE_C(0x016FE3), RUNE_C(0x016FE3), WB_LE}, + {RUNE_C(0x016FE4), RUNE_C(0x016FE4), WB_EXTEND}, + {RUNE_C(0x016FF0), RUNE_C(0x016FF1), WB_EXTEND}, + {RUNE_C(0x01AFF0), RUNE_C(0x01AFF3), WB_KA}, + {RUNE_C(0x01AFF5), RUNE_C(0x01AFFB), WB_KA}, + {RUNE_C(0x01AFFD), RUNE_C(0x01AFFE), WB_KA}, + {RUNE_C(0x01B000), RUNE_C(0x01B000), WB_KA}, + {RUNE_C(0x01B120), RUNE_C(0x01B122), WB_KA}, + {RUNE_C(0x01B155), RUNE_C(0x01B155), WB_KA}, + {RUNE_C(0x01B164), RUNE_C(0x01B167), WB_KA}, + {RUNE_C(0x01BC00), RUNE_C(0x01BC6A), WB_LE}, + {RUNE_C(0x01BC70), RUNE_C(0x01BC7C), WB_LE}, + {RUNE_C(0x01BC80), RUNE_C(0x01BC88), WB_LE}, + {RUNE_C(0x01BC90), RUNE_C(0x01BC99), WB_LE}, + {RUNE_C(0x01BC9D), RUNE_C(0x01BC9E), WB_EXTEND}, + {RUNE_C(0x01BCA0), RUNE_C(0x01BCA3), WB_FO}, + {RUNE_C(0x01CF00), RUNE_C(0x01CF2D), WB_EXTEND}, + {RUNE_C(0x01CF30), RUNE_C(0x01CF46), WB_EXTEND}, + {RUNE_C(0x01D165), RUNE_C(0x01D169), WB_EXTEND}, + {RUNE_C(0x01D16D), RUNE_C(0x01D172), WB_EXTEND}, + {RUNE_C(0x01D173), RUNE_C(0x01D17A), WB_FO}, + {RUNE_C(0x01D17B), RUNE_C(0x01D182), WB_EXTEND}, + {RUNE_C(0x01D185), RUNE_C(0x01D18B), WB_EXTEND}, + {RUNE_C(0x01D1AA), RUNE_C(0x01D1AD), WB_EXTEND}, + {RUNE_C(0x01D242), RUNE_C(0x01D244), WB_EXTEND}, + {RUNE_C(0x01D400), RUNE_C(0x01D454), WB_LE}, + {RUNE_C(0x01D456), RUNE_C(0x01D49C), WB_LE}, + {RUNE_C(0x01D49E), RUNE_C(0x01D49F), WB_LE}, + {RUNE_C(0x01D4A2), RUNE_C(0x01D4A2), WB_LE}, + {RUNE_C(0x01D4A5), RUNE_C(0x01D4A6), WB_LE}, + {RUNE_C(0x01D4A9), RUNE_C(0x01D4AC), WB_LE}, + {RUNE_C(0x01D4AE), RUNE_C(0x01D4B9), WB_LE}, + {RUNE_C(0x01D4BB), RUNE_C(0x01D4BB), WB_LE}, + {RUNE_C(0x01D4BD), RUNE_C(0x01D4C3), WB_LE}, + {RUNE_C(0x01D4C5), RUNE_C(0x01D505), WB_LE}, + {RUNE_C(0x01D507), RUNE_C(0x01D50A), WB_LE}, + {RUNE_C(0x01D50D), RUNE_C(0x01D514), WB_LE}, + {RUNE_C(0x01D516), RUNE_C(0x01D51C), WB_LE}, + {RUNE_C(0x01D51E), RUNE_C(0x01D539), WB_LE}, + {RUNE_C(0x01D53B), RUNE_C(0x01D53E), WB_LE}, + {RUNE_C(0x01D540), RUNE_C(0x01D544), WB_LE}, + {RUNE_C(0x01D546), RUNE_C(0x01D546), WB_LE}, + {RUNE_C(0x01D54A), RUNE_C(0x01D550), WB_LE}, + {RUNE_C(0x01D552), RUNE_C(0x01D6A5), WB_LE}, + {RUNE_C(0x01D6A8), RUNE_C(0x01D6C0), WB_LE}, + {RUNE_C(0x01D6C2), RUNE_C(0x01D6DA), WB_LE}, + {RUNE_C(0x01D6DC), RUNE_C(0x01D6FA), WB_LE}, + {RUNE_C(0x01D6FC), RUNE_C(0x01D714), WB_LE}, + {RUNE_C(0x01D716), RUNE_C(0x01D734), WB_LE}, + {RUNE_C(0x01D736), RUNE_C(0x01D74E), WB_LE}, + {RUNE_C(0x01D750), RUNE_C(0x01D76E), WB_LE}, + {RUNE_C(0x01D770), RUNE_C(0x01D788), WB_LE}, + {RUNE_C(0x01D78A), RUNE_C(0x01D7A8), WB_LE}, + {RUNE_C(0x01D7AA), RUNE_C(0x01D7C2), WB_LE}, + {RUNE_C(0x01D7C4), RUNE_C(0x01D7CB), WB_LE}, + {RUNE_C(0x01D7CE), RUNE_C(0x01D7FF), WB_NU}, + {RUNE_C(0x01DA00), RUNE_C(0x01DA36), WB_EXTEND}, + {RUNE_C(0x01DA3B), RUNE_C(0x01DA6C), WB_EXTEND}, + {RUNE_C(0x01DA75), RUNE_C(0x01DA75), WB_EXTEND}, + {RUNE_C(0x01DA84), RUNE_C(0x01DA84), WB_EXTEND}, + {RUNE_C(0x01DA9B), RUNE_C(0x01DA9F), WB_EXTEND}, + {RUNE_C(0x01DAA1), RUNE_C(0x01DAAF), WB_EXTEND}, + {RUNE_C(0x01DF00), RUNE_C(0x01DF1E), WB_LE}, + {RUNE_C(0x01DF25), RUNE_C(0x01DF2A), WB_LE}, + {RUNE_C(0x01E000), RUNE_C(0x01E006), WB_EXTEND}, + {RUNE_C(0x01E008), RUNE_C(0x01E018), WB_EXTEND}, + {RUNE_C(0x01E01B), RUNE_C(0x01E021), WB_EXTEND}, + {RUNE_C(0x01E023), RUNE_C(0x01E024), WB_EXTEND}, + {RUNE_C(0x01E026), RUNE_C(0x01E02A), WB_EXTEND}, + {RUNE_C(0x01E030), RUNE_C(0x01E06D), WB_LE}, + {RUNE_C(0x01E08F), RUNE_C(0x01E08F), WB_EXTEND}, + {RUNE_C(0x01E100), RUNE_C(0x01E12C), WB_LE}, + {RUNE_C(0x01E130), RUNE_C(0x01E136), WB_EXTEND}, + {RUNE_C(0x01E137), RUNE_C(0x01E13D), WB_LE}, + {RUNE_C(0x01E140), RUNE_C(0x01E149), WB_NU}, + {RUNE_C(0x01E14E), RUNE_C(0x01E14E), WB_LE}, + {RUNE_C(0x01E290), RUNE_C(0x01E2AD), WB_LE}, + {RUNE_C(0x01E2AE), RUNE_C(0x01E2AE), WB_EXTEND}, + {RUNE_C(0x01E2C0), RUNE_C(0x01E2EB), WB_LE}, + {RUNE_C(0x01E2EC), RUNE_C(0x01E2EF), WB_EXTEND}, + {RUNE_C(0x01E2F0), RUNE_C(0x01E2F9), WB_NU}, + {RUNE_C(0x01E4D0), RUNE_C(0x01E4EB), WB_LE}, + {RUNE_C(0x01E4EC), RUNE_C(0x01E4EF), WB_EXTEND}, + {RUNE_C(0x01E4F0), RUNE_C(0x01E4F9), WB_NU}, + {RUNE_C(0x01E7E0), RUNE_C(0x01E7E6), WB_LE}, + {RUNE_C(0x01E7E8), RUNE_C(0x01E7EB), WB_LE}, + {RUNE_C(0x01E7ED), RUNE_C(0x01E7EE), WB_LE}, + {RUNE_C(0x01E7F0), RUNE_C(0x01E7FE), WB_LE}, + {RUNE_C(0x01E800), RUNE_C(0x01E8C4), WB_LE}, + {RUNE_C(0x01E8D0), RUNE_C(0x01E8D6), WB_EXTEND}, + {RUNE_C(0x01E900), RUNE_C(0x01E943), WB_LE}, + {RUNE_C(0x01E944), RUNE_C(0x01E94A), WB_EXTEND}, + {RUNE_C(0x01E94B), RUNE_C(0x01E94B), WB_LE}, + {RUNE_C(0x01E950), RUNE_C(0x01E959), WB_NU}, + {RUNE_C(0x01EE00), RUNE_C(0x01EE03), WB_LE}, + {RUNE_C(0x01EE05), RUNE_C(0x01EE1F), WB_LE}, + {RUNE_C(0x01EE21), RUNE_C(0x01EE22), WB_LE}, + {RUNE_C(0x01EE24), RUNE_C(0x01EE24), WB_LE}, + {RUNE_C(0x01EE27), RUNE_C(0x01EE27), WB_LE}, + {RUNE_C(0x01EE29), RUNE_C(0x01EE32), WB_LE}, + {RUNE_C(0x01EE34), RUNE_C(0x01EE37), WB_LE}, + {RUNE_C(0x01EE39), RUNE_C(0x01EE39), WB_LE}, + {RUNE_C(0x01EE3B), RUNE_C(0x01EE3B), WB_LE}, + {RUNE_C(0x01EE42), RUNE_C(0x01EE42), WB_LE}, + {RUNE_C(0x01EE47), RUNE_C(0x01EE47), WB_LE}, + {RUNE_C(0x01EE49), RUNE_C(0x01EE49), WB_LE}, + {RUNE_C(0x01EE4B), RUNE_C(0x01EE4B), WB_LE}, + {RUNE_C(0x01EE4D), RUNE_C(0x01EE4F), WB_LE}, + {RUNE_C(0x01EE51), RUNE_C(0x01EE52), WB_LE}, + {RUNE_C(0x01EE54), RUNE_C(0x01EE54), WB_LE}, + {RUNE_C(0x01EE57), RUNE_C(0x01EE57), WB_LE}, + {RUNE_C(0x01EE59), RUNE_C(0x01EE59), WB_LE}, + {RUNE_C(0x01EE5B), RUNE_C(0x01EE5B), WB_LE}, + {RUNE_C(0x01EE5D), RUNE_C(0x01EE5D), WB_LE}, + {RUNE_C(0x01EE5F), RUNE_C(0x01EE5F), WB_LE}, + {RUNE_C(0x01EE61), RUNE_C(0x01EE62), WB_LE}, + {RUNE_C(0x01EE64), RUNE_C(0x01EE64), WB_LE}, + {RUNE_C(0x01EE67), RUNE_C(0x01EE6A), WB_LE}, + {RUNE_C(0x01EE6C), RUNE_C(0x01EE72), WB_LE}, + {RUNE_C(0x01EE74), RUNE_C(0x01EE77), WB_LE}, + {RUNE_C(0x01EE79), RUNE_C(0x01EE7C), WB_LE}, + {RUNE_C(0x01EE7E), RUNE_C(0x01EE7E), WB_LE}, + {RUNE_C(0x01EE80), RUNE_C(0x01EE89), WB_LE}, + {RUNE_C(0x01EE8B), RUNE_C(0x01EE9B), WB_LE}, + {RUNE_C(0x01EEA1), RUNE_C(0x01EEA3), WB_LE}, + {RUNE_C(0x01EEA5), RUNE_C(0x01EEA9), WB_LE}, + {RUNE_C(0x01EEAB), RUNE_C(0x01EEBB), WB_LE}, + {RUNE_C(0x01F130), RUNE_C(0x01F149), WB_LE}, + {RUNE_C(0x01F150), RUNE_C(0x01F169), WB_LE}, + {RUNE_C(0x01F170), RUNE_C(0x01F189), WB_LE}, + {RUNE_C(0x01F1E6), RUNE_C(0x01F1FF), WB_RI}, + {RUNE_C(0x01F3FB), RUNE_C(0x01F3FF), WB_EXTEND}, + {RUNE_C(0x01FBF0), RUNE_C(0x01FBF9), WB_NU}, + {RUNE_C(0x0E0001), RUNE_C(0x0E0001), WB_FO}, + {RUNE_C(0x0E0020), RUNE_C(0x0E007F), WB_EXTEND}, + {RUNE_C(0x0E0100), RUNE_C(0x0E01EF), WB_EXTEND}, +}; + +_MLIB_DEFINE_BSEARCH(enum uprop_wb, lookup, WB_XX) + +enum uprop_wb +uprop_get_wb(rune ch) +{ + return ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch); +} -- cgit v1.2.3