From 5d2bbdfe3191049bbd4bb237a31f34fdf62f347c Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Thu, 18 Apr 2024 00:20:31 +0200 Subject: Add uprop_get_jt() --- data/DerivedJoiningType | 573 ++++++++++++++++++++++++++++++++++++++++ gen/data-files | 1 + gen/prop/jt | 62 +++++ include/unicode/prop.h | 10 + lib/unicode/prop/uprop_get_jt.c | 560 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 1206 insertions(+) create mode 100644 data/DerivedJoiningType create mode 100755 gen/prop/jt create mode 100644 lib/unicode/prop/uprop_get_jt.c diff --git a/data/DerivedJoiningType b/data/DerivedJoiningType new file mode 100644 index 0000000..a4e01e7 --- /dev/null +++ b/data/DerivedJoiningType @@ -0,0 +1,573 @@ +# DerivedJoiningType-15.1.0.txt +# Date: 2023-01-05, 20:34:38 GMT +# © 2023 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ + +# ================================================ + +# Type T is derived, as described in ArabicShaping.txt + +# All code points not explicitly listed for Joining_Type +# have the value Non_Joining (U). + +# @missing: 0000..10FFFF; Non_Joining + +# ================================================ + +# Joining_Type=Join_Causing + +0640 ; C # Lm ARABIC TATWEEL +07FA ; C # Lm NKO LAJANYALAN +0883..0885 ; C # Lo [3] ARABIC TATWEEL WITH OVERSTRUCK HAMZA..ARABIC TATWEEL WITH TWO DOTS BELOW +180A ; C # Po MONGOLIAN NIRUGU +200D ; C # Cf ZERO WIDTH JOINER + +# Total code points: 7 + +# ================================================ + +# Joining_Type=Dual_Joining + +0620 ; D # Lo ARABIC LETTER KASHMIRI YEH +0626 ; D # Lo ARABIC LETTER YEH WITH HAMZA ABOVE +0628 ; D # Lo ARABIC LETTER BEH +062A..062E ; D # Lo [5] ARABIC LETTER TEH..ARABIC LETTER KHAH +0633..063F ; D # Lo [13] ARABIC LETTER SEEN..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0641..0647 ; D # Lo [7] ARABIC LETTER FEH..ARABIC LETTER HEH +0649..064A ; D # Lo [2] ARABIC LETTER ALEF MAKSURA..ARABIC LETTER YEH +066E..066F ; D # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0678..0687 ; D # Lo [16] ARABIC LETTER HIGH HAMZA YEH..ARABIC LETTER TCHEHEH +069A..06BF ; D # Lo [38] ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE..ARABIC LETTER TCHEH WITH DOT ABOVE +06C1..06C2 ; D # Lo [2] ARABIC LETTER HEH GOAL..ARABIC LETTER HEH GOAL WITH HAMZA ABOVE +06CC ; D # Lo ARABIC LETTER FARSI YEH +06CE ; D # Lo ARABIC LETTER YEH WITH SMALL V +06D0..06D1 ; D # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW +06FA..06FC ; D # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; D # Lo ARABIC LETTER HEH WITH INVERTED V +0712..0714 ; D # Lo [3] SYRIAC LETTER BETH..SYRIAC LETTER GAMAL GARSHUNI +071A..071D ; D # Lo [4] SYRIAC LETTER HETH..SYRIAC LETTER YUDH +071F..0727 ; D # Lo [9] SYRIAC LETTER KAPH..SYRIAC LETTER REVERSED PE +0729 ; D # Lo SYRIAC LETTER QAPH +072B ; D # Lo SYRIAC LETTER SHIN +072D..072E ; D # Lo [2] SYRIAC LETTER PERSIAN BHETH..SYRIAC LETTER PERSIAN GHAMAL +074E..0758 ; D # Lo [11] SYRIAC LETTER SOGDIAN KHAPH..ARABIC LETTER HAH WITH THREE DOTS POINTING UPWARDS BELOW +075C..076A ; D # Lo [15] ARABIC LETTER SEEN WITH FOUR DOTS ABOVE..ARABIC LETTER LAM WITH BAR +076D..0770 ; D # Lo [4] ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER SEEN WITH SMALL ARABIC LETTER TAH AND TWO DOTS +0772 ; D # Lo ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH ABOVE +0775..0777 ; D # Lo [3] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW +077A..077F ; D # Lo [6] ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER KAF WITH TWO DOTS ABOVE +07CA..07EA ; D # Lo [33] NKO LETTER A..NKO LETTER JONA RA +0841..0845 ; D # Lo [5] MANDAIC LETTER AB..MANDAIC LETTER USHENNA +0848 ; D # Lo MANDAIC LETTER ATT +084A..0853 ; D # Lo [10] MANDAIC LETTER AK..MANDAIC LETTER AR +0855 ; D # Lo MANDAIC LETTER AT +0860 ; D # Lo SYRIAC LETTER MALAYALAM NGA +0862..0865 ; D # Lo [4] SYRIAC LETTER MALAYALAM NYA..SYRIAC LETTER MALAYALAM NNNA +0868 ; D # Lo SYRIAC LETTER MALAYALAM LLA +0886 ; D # Lo ARABIC LETTER THIN YEH +0889..088D ; D # Lo [5] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW +08A0..08A9 ; D # Lo [10] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +08AF..08B0 ; D # Lo [2] ARABIC LETTER SAD WITH THREE DOTS BELOW..ARABIC LETTER GAF WITH INVERTED STROKE +08B3..08B8 ; D # Lo [6] ARABIC LETTER AIN WITH THREE DOTS BELOW..ARABIC LETTER TEH WITH SMALL TEH ABOVE +08BA..08C8 ; D # Lo [15] ARABIC LETTER YEH WITH TWO DOTS BELOW AND SMALL NOON ABOVE..ARABIC LETTER GRAF +1807 ; D # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER +1820..1842 ; D # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; D # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; D # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1887..18A8 ; D # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; D # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA +10AC0..10AC4 ; D # Lo [5] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER GHIMEL +10AD3..10AD6 ; D # Lo [4] MANICHAEAN LETTER LAMEDH..MANICHAEAN LETTER MEM +10AD8..10ADC ; D # Lo [5] MANICHAEAN LETTER SAMEKH..MANICHAEAN LETTER FE +10ADE..10AE0 ; D # Lo [3] MANICHAEAN LETTER QOPH..MANICHAEAN LETTER QHOPH +10AEB..10AEE ; D # No [4] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER TWENTY +10B80 ; D # Lo PSALTER PAHLAVI LETTER ALEPH +10B82 ; D # Lo PSALTER PAHLAVI LETTER GIMEL +10B86..10B88 ; D # Lo [3] PSALTER PAHLAVI LETTER ZAYIN..PSALTER PAHLAVI LETTER YODH +10B8A..10B8B ; D # Lo [2] PSALTER PAHLAVI LETTER LAMEDH..PSALTER PAHLAVI LETTER MEM-QOPH +10B8D ; D # Lo PSALTER PAHLAVI LETTER SAMEKH +10B90 ; D # Lo PSALTER PAHLAVI LETTER SHIN +10BAD..10BAE ; D # No [2] PSALTER PAHLAVI NUMBER TEN..PSALTER PAHLAVI NUMBER TWENTY +10D01..10D21 ; D # Lo [33] HANIFI ROHINGYA LETTER BA..HANIFI ROHINGYA VOWEL O +10D23 ; D # Lo HANIFI ROHINGYA MARK NA KHONNA +10F30..10F32 ; D # Lo [3] SOGDIAN LETTER ALEPH..SOGDIAN LETTER GIMEL +10F34..10F44 ; D # Lo [17] SOGDIAN LETTER WAW..SOGDIAN LETTER LESH +10F51..10F53 ; D # No [3] SOGDIAN NUMBER ONE..SOGDIAN NUMBER TWENTY +10F70..10F73 ; D # Lo [4] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER WAW +10F76..10F81 ; D # Lo [12] OLD UYGHUR LETTER YODH..OLD UYGHUR LETTER LESH +10FB0 ; D # Lo CHORASMIAN LETTER ALEPH +10FB2..10FB3 ; D # Lo [2] CHORASMIAN LETTER BETH..CHORASMIAN LETTER GIMEL +10FB8 ; D # Lo CHORASMIAN LETTER ZAYIN +10FBB..10FBC ; D # Lo [2] CHORASMIAN LETTER KAPH..CHORASMIAN LETTER LAMEDH +10FBE..10FBF ; D # Lo [2] CHORASMIAN LETTER NUN..CHORASMIAN LETTER SAMEKH +10FC1 ; D # Lo CHORASMIAN LETTER PE +10FC4 ; D # Lo CHORASMIAN LETTER TAW +10FCA ; D # No CHORASMIAN NUMBER TWENTY +1E900..1E943 ; D # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 610 + +# ================================================ + +# Joining_Type=Right_Joining + +0622..0625 ; R # Lo [4] ARABIC LETTER ALEF WITH MADDA ABOVE..ARABIC LETTER ALEF WITH HAMZA BELOW +0627 ; R # Lo ARABIC LETTER ALEF +0629 ; R # Lo ARABIC LETTER TEH MARBUTA +062F..0632 ; R # Lo [4] ARABIC LETTER DAL..ARABIC LETTER ZAIN +0648 ; R # Lo ARABIC LETTER WAW +0671..0673 ; R # Lo [3] ARABIC LETTER ALEF WASLA..ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0675..0677 ; R # Lo [3] ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER U WITH HAMZA ABOVE +0688..0699 ; R # Lo [18] ARABIC LETTER DDAL..ARABIC LETTER REH WITH FOUR DOTS ABOVE +06C0 ; R # Lo ARABIC LETTER HEH WITH YEH ABOVE +06C3..06CB ; R # Lo [9] ARABIC LETTER TEH MARBUTA GOAL..ARABIC LETTER VE +06CD ; R # Lo ARABIC LETTER YEH WITH TAIL +06CF ; R # Lo ARABIC LETTER WAW WITH DOT ABOVE +06D2..06D3 ; R # Lo [2] ARABIC LETTER YEH BARREE..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; R # Lo ARABIC LETTER AE +06EE..06EF ; R # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +0710 ; R # Lo SYRIAC LETTER ALAPH +0715..0719 ; R # Lo [5] SYRIAC LETTER DALATH..SYRIAC LETTER ZAIN +071E ; R # Lo SYRIAC LETTER YUDH HE +0728 ; R # Lo SYRIAC LETTER SADHE +072A ; R # Lo SYRIAC LETTER RISH +072C ; R # Lo SYRIAC LETTER TAW +072F ; R # Lo SYRIAC LETTER PERSIAN DHALATH +074D ; R # Lo SYRIAC LETTER SOGDIAN ZHAIN +0759..075B ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW AND SMALL TAH..ARABIC LETTER REH WITH STROKE +076B..076C ; R # Lo [2] ARABIC LETTER REH WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER REH WITH HAMZA ABOVE +0771 ; R # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS +0773..0774 ; R # Lo [2] ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE +0778..0779 ; R # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE +0840 ; R # Lo MANDAIC LETTER HALQA +0846..0847 ; R # Lo [2] MANDAIC LETTER AZ..MANDAIC LETTER IT +0849 ; R # Lo MANDAIC LETTER AKSA +0854 ; R # Lo MANDAIC LETTER ASH +0856..0858 ; R # Lo [3] MANDAIC LETTER DUSHENNA..MANDAIC LETTER AIN +0867 ; R # Lo SYRIAC LETTER MALAYALAM RA +0869..086A ; R # Lo [2] SYRIAC LETTER MALAYALAM LLLA..SYRIAC LETTER MALAYALAM SSA +0870..0882 ; R # Lo [19] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC LETTER ALEF WITH ATTACHED LEFT HAMZA +088E ; R # Lo ARABIC VERTICAL TAIL +08AA..08AC ; R # Lo [3] ARABIC LETTER REH WITH LOOP..ARABIC LETTER ROHINGYA YEH +08AE ; R # Lo ARABIC LETTER DAL WITH THREE DOTS BELOW +08B1..08B2 ; R # Lo [2] ARABIC LETTER STRAIGHT WAW..ARABIC LETTER ZAIN WITH INVERTED V ABOVE +08B9 ; R # Lo ARABIC LETTER REH WITH SMALL NOON ABOVE +10AC5 ; R # Lo MANICHAEAN LETTER DALETH +10AC7 ; R # Lo MANICHAEAN LETTER WAW +10AC9..10ACA ; R # Lo [2] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER ZHAYIN +10ACE..10AD2 ; R # Lo [5] MANICHAEAN LETTER TETH..MANICHAEAN LETTER KHAPH +10ADD ; R # Lo MANICHAEAN LETTER SADHE +10AE1 ; R # Lo MANICHAEAN LETTER RESH +10AE4 ; R # Lo MANICHAEAN LETTER TAW +10AEF ; R # No MANICHAEAN NUMBER ONE HUNDRED +10B81 ; R # Lo PSALTER PAHLAVI LETTER BETH +10B83..10B85 ; R # Lo [3] PSALTER PAHLAVI LETTER DALETH..PSALTER PAHLAVI LETTER WAW-AYIN-RESH +10B89 ; R # Lo PSALTER PAHLAVI LETTER KAPH +10B8C ; R # Lo PSALTER PAHLAVI LETTER NUN +10B8E..10B8F ; R # Lo [2] PSALTER PAHLAVI LETTER PE..PSALTER PAHLAVI LETTER SADHE +10B91 ; R # Lo PSALTER PAHLAVI LETTER TAW +10BA9..10BAC ; R # No [4] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER FOUR +10D22 ; R # Lo HANIFI ROHINGYA MARK SAKIN +10F33 ; R # Lo SOGDIAN LETTER HE +10F54 ; R # No SOGDIAN NUMBER ONE HUNDRED +10F74..10F75 ; R # Lo [2] OLD UYGHUR LETTER ZAYIN..OLD UYGHUR LETTER FINAL HETH +10FB4..10FB6 ; R # Lo [3] CHORASMIAN LETTER DALETH..CHORASMIAN LETTER WAW +10FB9..10FBA ; R # Lo [2] CHORASMIAN LETTER HETH..CHORASMIAN LETTER YODH +10FBD ; R # Lo CHORASMIAN LETTER MEM +10FC2..10FC3 ; R # Lo [2] CHORASMIAN LETTER RESH..CHORASMIAN LETTER SHIN +10FC9 ; R # No CHORASMIAN NUMBER TEN + +# Total code points: 152 + +# ================================================ + +# Joining_Type=Left_Joining + +A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA +10ACD ; L # Lo MANICHAEAN LETTER HETH +10AD7 ; L # Lo MANICHAEAN LETTER NUN +10D00 ; L # Lo HANIFI ROHINGYA LETTER A +10FCB ; L # No CHORASMIAN NUMBER ONE HUNDRED + +# Total code points: 5 + +# ================================================ + +# Joining_Type=Transparent + +00AD ; T # Cf SOFT HYPHEN +0300..036F ; T # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; T # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; T # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; T # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; T # Mn HEBREW POINT RAFE +05C1..05C2 ; T # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; T # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; T # Mn HEBREW POINT QAMATS QATAN +0610..061A ; T # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +061C ; T # Cf ARABIC LETTER MARK +064B..065F ; T # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; T # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; T # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; T # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; T # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; T # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +070F ; T # Cf SYRIAC ABBREVIATION MARK +0711 ; T # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; T # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; T # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; T # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; T # Mn NKO DANTAYALAN +0816..0819 ; T # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; T # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; T # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0898..089F ; T # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; T # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; T # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; T # Mn DEVANAGARI VOWEL SIGN OE +093C ; T # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; T # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; T # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; T # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; T # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; T # Mn BENGALI SIGN CANDRABINDU +09BC ; T # Mn BENGALI SIGN NUKTA +09C1..09C4 ; T # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; T # Mn BENGALI SIGN VIRAMA +09E2..09E3 ; T # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; T # Mn BENGALI SANDHI MARK +0A01..0A02 ; T # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; T # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; T # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; T # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; T # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; T # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; T # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; T # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; T # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; T # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; T # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; T # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; T # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; T # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; T # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; T # Mn ORIYA SIGN CANDRABINDU +0B3C ; T # Mn ORIYA SIGN NUKTA +0B3F ; T # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; T # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; T # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; T # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B62..0B63 ; T # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; T # Mn TAMIL SIGN ANUSVARA +0BC0 ; T # Mn TAMIL VOWEL SIGN II +0BCD ; T # Mn TAMIL SIGN VIRAMA +0C00 ; T # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; T # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; T # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; T # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; T # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; T # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; T # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; T # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; T # Mn KANNADA SIGN CANDRABINDU +0CBC ; T # Mn KANNADA SIGN NUKTA +0CBF ; T # Mn KANNADA VOWEL SIGN I +0CC6 ; T # Mn KANNADA VOWEL SIGN E +0CCC..0CCD ; T # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CE2..0CE3 ; T # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; T # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; T # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D41..0D44 ; T # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; T # Mn MALAYALAM SIGN VIRAMA +0D62..0D63 ; T # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; T # Mn SINHALA SIGN CANDRABINDU +0DCA ; T # Mn SINHALA SIGN AL-LAKUNA +0DD2..0DD4 ; T # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; T # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0E31 ; T # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; T # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; T # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; T # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; T # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECE ; T # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0F18..0F19 ; T # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; T # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; T # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; T # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; T # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; T # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; T # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; T # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; T # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; T # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; T # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; T # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; T # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; T # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; T # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; T # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; T # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; T # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; T # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; T # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; T # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; T # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; T # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1732..1733 ; T # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; T # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; T # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; T # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; T # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; T # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; T # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; T # Mn KHMER SIGN ATTHACAN +180B..180D ; T # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; T # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; T # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; T # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; T # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; T # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; T # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; T # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; T # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; T # Mn BUGINESE VOWEL SIGN AE +1A56 ; T # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; T # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; T # Mn TAI THAM SIGN SAKOT +1A62 ; T # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; T # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; T # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; T # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; T # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; T # Mn BALINESE SIGN REREKAN +1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3C ; T # Mn BALINESE VOWEL SIGN LA LENGA +1B42 ; T # Mn BALINESE VOWEL SIGN PEPET +1B6B..1B73 ; T # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; T # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; T # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; T # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB..1BAD ; T # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; T # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; T # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; T # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; T # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; T # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; T # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; T # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; T # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; T # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; T # Mn VEDIC SIGN TIRYAK +1CF4 ; T # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; T # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; T # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200B ; T # Cf ZERO WIDTH SPACE +200E..200F ; T # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; T # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2064 ; T # Cf [5] WORD JOINER..INVISIBLE PLUS +206A..206F ; T # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +20D0..20DC ; T # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; T # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; T # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; T # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; T # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; T # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; T # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; T # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; T # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +3099..309A ; T # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; T # Mn COMBINING CYRILLIC VZMET +A670..A672 ; T # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; T # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; T # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; T # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; T # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; T # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; T # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; T # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; T # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; T # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; T # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; T # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; T # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; T # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; T # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; T # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; T # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; T # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9E5 ; T # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; T # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; T # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; T # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; T # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; T # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; T # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; T # Mn TAI VIET MAI KANG +AAB2..AAB4 ; T # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; T # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; T # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; T # Mn TAI VIET TONE MAI THO +AAEC..AAED ; T # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; T # Mn MEETEI MAYEK VIRAMA +ABE5 ; T # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; T # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; T # Mn MEETEI MAYEK APUN IYEK +FB1E ; T # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; T # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; T # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FEFF ; T # Cf ZERO WIDTH NO-BREAK SPACE +FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +101FD ; T # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; T # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; T # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; T # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; T # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; T # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; T # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; T # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; T # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFD..10EFF ; T # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; T # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; T # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; T # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; T # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; T # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; T # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; T # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; T # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; T # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; T # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; T # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; T # Mn MAHAJANI SIGN NUKTA +11180..11181 ; T # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; T # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C9..111CC ; T # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; T # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; T # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; T # Mn KHOJKI SIGN ANUSVARA +11236..11237 ; T # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; T # Mn KHOJKI SIGN SUKUN +11241 ; T # Mn KHOJKI VOWEL SIGN VOCALIC R +112DF ; T # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; T # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; T # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; T # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11340 ; T # Mn GRANTHA VOWEL SIGN II +11366..1136C ; T # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; T # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11438..1143F ; T # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; T # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; T # Mn NEWA SIGN NUKTA +1145E ; T # Mn NEWA SANDHI MARK +114B3..114B8 ; T # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; T # Mn TIRHUTA VOWEL SIGN SHORT E +114BF..114C0 ; T # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; T # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115B2..115B5 ; T # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; T # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; T # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; T # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; T # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; T # Mn MODI SIGN ANUSVARA +1163F..11640 ; T # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; T # Mn TAKRI SIGN ANUSVARA +116AD ; T # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; T # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; T # Mn TAKRI SIGN NUKTA +1171D..1171F ; T # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; T # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; T # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; T # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; T # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1193B..1193C ; T # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193E ; T # Mn DIVES AKURU VIRAMA +11943 ; T # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; T # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; T # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; T # Mn NANDINAGARI SIGN VIRAMA +11A01..11A0A ; T # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; T # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; T # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; T # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; T # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; T # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; T # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; T # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; T # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; T # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; T # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; T # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; T # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; T # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; T # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; T # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; T # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; T # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; T # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; T # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; T # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; T # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; T # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; T # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; T # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; T # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; T # Mn KAWI VOWEL SIGN EU +11F42 ; T # Mn KAWI CONJOINER +13430..1343F ; T # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +13440 ; T # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; T # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +16AF0..16AF4 ; T # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; T # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; T # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; T # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; T # Mn KHITAN SMALL SCRIPT FILLER +1BC9D..1BC9E ; T # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1BCA0..1BCA3 ; T # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CF00..1CF2D ; T # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; T # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; T # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D173..1D17A ; T # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1D17B..1D182 ; T # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; T # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; T # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; T # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; T # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; T # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; T # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; T # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; T # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; T # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; T # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; T # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; T # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; T # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; T # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; T # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E130..1E136 ; T # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; T # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; T # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; T # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E8D0..1E8D6 ; T # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; T # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; T # Lm ADLAM NASALIZATION MARK +E0001 ; T # Cf LANGUAGE TAG +E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2150 + +# EOF diff --git a/gen/data-files b/gen/data-files index 568cdb5..9222c10 100755 --- a/gen/data-files +++ b/gen/data-files @@ -21,6 +21,7 @@ readonly PATHS=' extracted/DerivedBinaryProperties extracted/DerivedDecompositionType extracted/DerivedEastAsianWidth + extracted/DerivedJoiningType extracted/DerivedLineBreak extracted/DerivedNumericType extracted/DerivedNumericValues diff --git a/gen/prop/jt b/gen/prop/jt new file mode 100755 index 0000000..d8ca9ff --- /dev/null +++ b/gen/prop/jt @@ -0,0 +1,62 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_jt.c + +gawk ' +BEGIN { + FS = "[ ;]+" + + print "/* This file is autogenerated by gen/prop/jt; DO NOT EDIT. */" + print "" + print "#include \"_bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[A-F0-9]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = "JT_" $2 +} + +END { + print "static constexpr enum uprop_jt lookup_lat1[] = {" + for (i = 0; i < 0x100; i++) { + if (i % 8 == 0) + printf "\t" + printf "%s%s", (props[i] ? props[i] : "JT_U") ",", \ + i % 8 == 7 ? "\n" : " " + } + print "};" + print "" + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_jt val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + for (lo = i; props[lo] == props[i + 1]; i++) + ; + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "_MLIB_DEFINE_BSEARCH(enum uprop_jt, lookup, JT_U)" + print "" + print "enum uprop_jt" + print "uprop_get_jt(rune ch)" + print "{" + print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/DerivedJoiningType | sed 's/\s*$//' diff --git a/include/unicode/prop.h b/include/unicode/prop.h index 6d4ca32..61e4441 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -674,6 +674,15 @@ enum uprop_insc { INSC_VOWEL_INDEPENDENT, }; +enum uprop_jt { + JT_U = 0, /* Non Joining */ + JT_C, /* Join Causing */ + JT_D, /* Dual Joining */ + JT_L, /* Left Joining */ + JT_R, /* Right Joining */ + JT_T, /* Transparent */ +}; + enum uprop_lb { LB_XX, /* Unknown */ LB_AI, /* Ambiguous */ @@ -923,6 +932,7 @@ enum uprop_vo { [[_mlib_pure]] enum uprop_hst uprop_get_hst(rune); [[_mlib_pure]] enum uprop_inpc uprop_get_inpc(rune); [[_mlib_pure]] enum uprop_insc uprop_get_insc(rune); +[[_mlib_pure]] enum uprop_jt uprop_get_jt(rune); [[_mlib_pure]] enum uprop_lb uprop_get_lb(rune); [[_mlib_pure]] enum uprop_nt uprop_get_nt(rune); [[_mlib_pure]] enum uprop_sc uprop_get_sc(rune); diff --git a/lib/unicode/prop/uprop_get_jt.c b/lib/unicode/prop/uprop_get_jt.c new file mode 100644 index 0000000..4ccb44f --- /dev/null +++ b/lib/unicode/prop/uprop_get_jt.c @@ -0,0 +1,560 @@ +/* This file is autogenerated by gen/prop/jt; DO NOT EDIT. */ + +#include "_bsearch.h" +#include "macros.h" +#include "rune.h" +#include "unicode/prop.h" + +static constexpr enum uprop_jt lookup_lat1[] = { + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_T, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, + JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, JT_U, +}; + +static const struct { + rune lo, hi; + enum uprop_jt val; +} lookup[] = { + {RUNE_C(0x000300), RUNE_C(0x00036F), JT_T}, + {RUNE_C(0x000483), RUNE_C(0x000489), JT_T}, + {RUNE_C(0x000591), RUNE_C(0x0005BD), JT_T}, + {RUNE_C(0x0005BF), RUNE_C(0x0005BF), JT_T}, + {RUNE_C(0x0005C1), RUNE_C(0x0005C2), JT_T}, + {RUNE_C(0x0005C4), RUNE_C(0x0005C5), JT_T}, + {RUNE_C(0x0005C7), RUNE_C(0x0005C7), JT_T}, + {RUNE_C(0x000610), RUNE_C(0x00061A), JT_T}, + {RUNE_C(0x00061C), RUNE_C(0x00061C), JT_T}, + {RUNE_C(0x000620), RUNE_C(0x000620), JT_D}, + {RUNE_C(0x000622), RUNE_C(0x000625), JT_R}, + {RUNE_C(0x000626), RUNE_C(0x000626), JT_D}, + {RUNE_C(0x000627), RUNE_C(0x000627), JT_R}, + {RUNE_C(0x000628), RUNE_C(0x000628), JT_D}, + {RUNE_C(0x000629), RUNE_C(0x000629), JT_R}, + {RUNE_C(0x00062A), RUNE_C(0x00062E), JT_D}, + {RUNE_C(0x00062F), RUNE_C(0x000632), JT_R}, + {RUNE_C(0x000633), RUNE_C(0x00063F), JT_D}, + {RUNE_C(0x000640), RUNE_C(0x000640), JT_C}, + {RUNE_C(0x000641), RUNE_C(0x000647), JT_D}, + {RUNE_C(0x000648), RUNE_C(0x000648), JT_R}, + {RUNE_C(0x000649), RUNE_C(0x00064A), JT_D}, + {RUNE_C(0x00064B), RUNE_C(0x00065F), JT_T}, + {RUNE_C(0x00066E), RUNE_C(0x00066F), JT_D}, + {RUNE_C(0x000670), RUNE_C(0x000670), JT_T}, + {RUNE_C(0x000671), RUNE_C(0x000673), JT_R}, + {RUNE_C(0x000675), RUNE_C(0x000677), JT_R}, + {RUNE_C(0x000678), RUNE_C(0x000687), JT_D}, + {RUNE_C(0x000688), RUNE_C(0x000699), JT_R}, + {RUNE_C(0x00069A), RUNE_C(0x0006BF), JT_D}, + {RUNE_C(0x0006C0), RUNE_C(0x0006C0), JT_R}, + {RUNE_C(0x0006C1), RUNE_C(0x0006C2), JT_D}, + {RUNE_C(0x0006C3), RUNE_C(0x0006CB), JT_R}, + {RUNE_C(0x0006CC), RUNE_C(0x0006CC), JT_D}, + {RUNE_C(0x0006CD), RUNE_C(0x0006CD), JT_R}, + {RUNE_C(0x0006CE), RUNE_C(0x0006CE), JT_D}, + {RUNE_C(0x0006CF), RUNE_C(0x0006CF), JT_R}, + {RUNE_C(0x0006D0), RUNE_C(0x0006D1), JT_D}, + {RUNE_C(0x0006D2), RUNE_C(0x0006D3), JT_R}, + {RUNE_C(0x0006D5), RUNE_C(0x0006D5), JT_R}, + {RUNE_C(0x0006D6), RUNE_C(0x0006DC), JT_T}, + {RUNE_C(0x0006DF), RUNE_C(0x0006E4), JT_T}, + {RUNE_C(0x0006E7), RUNE_C(0x0006E8), JT_T}, + {RUNE_C(0x0006EA), RUNE_C(0x0006ED), JT_T}, + {RUNE_C(0x0006EE), RUNE_C(0x0006EF), JT_R}, + {RUNE_C(0x0006FA), RUNE_C(0x0006FC), JT_D}, + {RUNE_C(0x0006FF), RUNE_C(0x0006FF), JT_D}, + {RUNE_C(0x00070F), RUNE_C(0x00070F), JT_T}, + {RUNE_C(0x000710), RUNE_C(0x000710), JT_R}, + {RUNE_C(0x000711), RUNE_C(0x000711), JT_T}, + {RUNE_C(0x000712), RUNE_C(0x000714), JT_D}, + {RUNE_C(0x000715), RUNE_C(0x000719), JT_R}, + {RUNE_C(0x00071A), RUNE_C(0x00071D), JT_D}, + {RUNE_C(0x00071E), RUNE_C(0x00071E), JT_R}, + {RUNE_C(0x00071F), RUNE_C(0x000727), JT_D}, + {RUNE_C(0x000728), RUNE_C(0x000728), JT_R}, + {RUNE_C(0x000729), RUNE_C(0x000729), JT_D}, + {RUNE_C(0x00072A), RUNE_C(0x00072A), JT_R}, + {RUNE_C(0x00072B), RUNE_C(0x00072B), JT_D}, + {RUNE_C(0x00072C), RUNE_C(0x00072C), JT_R}, + {RUNE_C(0x00072D), RUNE_C(0x00072E), JT_D}, + {RUNE_C(0x00072F), RUNE_C(0x00072F), JT_R}, + {RUNE_C(0x000730), RUNE_C(0x00074A), JT_T}, + {RUNE_C(0x00074D), RUNE_C(0x00074D), JT_R}, + {RUNE_C(0x00074E), RUNE_C(0x000758), JT_D}, + {RUNE_C(0x000759), RUNE_C(0x00075B), JT_R}, + {RUNE_C(0x00075C), RUNE_C(0x00076A), JT_D}, + {RUNE_C(0x00076B), RUNE_C(0x00076C), JT_R}, + {RUNE_C(0x00076D), RUNE_C(0x000770), JT_D}, + {RUNE_C(0x000771), RUNE_C(0x000771), JT_R}, + {RUNE_C(0x000772), RUNE_C(0x000772), JT_D}, + {RUNE_C(0x000773), RUNE_C(0x000774), JT_R}, + {RUNE_C(0x000775), RUNE_C(0x000777), JT_D}, + {RUNE_C(0x000778), RUNE_C(0x000779), JT_R}, + {RUNE_C(0x00077A), RUNE_C(0x00077F), JT_D}, + {RUNE_C(0x0007A6), RUNE_C(0x0007B0), JT_T}, + {RUNE_C(0x0007CA), RUNE_C(0x0007EA), JT_D}, + {RUNE_C(0x0007EB), RUNE_C(0x0007F3), JT_T}, + {RUNE_C(0x0007FA), RUNE_C(0x0007FA), JT_C}, + {RUNE_C(0x0007FD), RUNE_C(0x0007FD), JT_T}, + {RUNE_C(0x000816), RUNE_C(0x000819), JT_T}, + {RUNE_C(0x00081B), RUNE_C(0x000823), JT_T}, + {RUNE_C(0x000825), RUNE_C(0x000827), JT_T}, + {RUNE_C(0x000829), RUNE_C(0x00082D), JT_T}, + {RUNE_C(0x000840), RUNE_C(0x000840), JT_R}, + {RUNE_C(0x000841), RUNE_C(0x000845), JT_D}, + {RUNE_C(0x000846), RUNE_C(0x000847), JT_R}, + {RUNE_C(0x000848), RUNE_C(0x000848), JT_D}, + {RUNE_C(0x000849), RUNE_C(0x000849), JT_R}, + {RUNE_C(0x00084A), RUNE_C(0x000853), JT_D}, + {RUNE_C(0x000854), RUNE_C(0x000854), JT_R}, + {RUNE_C(0x000855), RUNE_C(0x000855), JT_D}, + {RUNE_C(0x000856), RUNE_C(0x000858), JT_R}, + {RUNE_C(0x000859), RUNE_C(0x00085B), JT_T}, + {RUNE_C(0x000860), RUNE_C(0x000860), JT_D}, + {RUNE_C(0x000862), RUNE_C(0x000865), JT_D}, + {RUNE_C(0x000867), RUNE_C(0x000867), JT_R}, + {RUNE_C(0x000868), RUNE_C(0x000868), JT_D}, + {RUNE_C(0x000869), RUNE_C(0x00086A), JT_R}, + {RUNE_C(0x000870), RUNE_C(0x000882), JT_R}, + {RUNE_C(0x000883), RUNE_C(0x000885), JT_C}, + {RUNE_C(0x000886), RUNE_C(0x000886), JT_D}, + {RUNE_C(0x000889), RUNE_C(0x00088D), JT_D}, + {RUNE_C(0x00088E), RUNE_C(0x00088E), JT_R}, + {RUNE_C(0x000898), RUNE_C(0x00089F), JT_T}, + {RUNE_C(0x0008A0), RUNE_C(0x0008A9), JT_D}, + {RUNE_C(0x0008AA), RUNE_C(0x0008AC), JT_R}, + {RUNE_C(0x0008AE), RUNE_C(0x0008AE), JT_R}, + {RUNE_C(0x0008AF), RUNE_C(0x0008B0), JT_D}, + {RUNE_C(0x0008B1), RUNE_C(0x0008B2), JT_R}, + {RUNE_C(0x0008B3), RUNE_C(0x0008B8), JT_D}, + {RUNE_C(0x0008B9), RUNE_C(0x0008B9), JT_R}, + {RUNE_C(0x0008BA), RUNE_C(0x0008C8), JT_D}, + {RUNE_C(0x0008CA), RUNE_C(0x0008E1), JT_T}, + {RUNE_C(0x0008E3), RUNE_C(0x000902), JT_T}, + {RUNE_C(0x00093A), RUNE_C(0x00093A), JT_T}, + {RUNE_C(0x00093C), RUNE_C(0x00093C), JT_T}, + {RUNE_C(0x000941), RUNE_C(0x000948), JT_T}, + {RUNE_C(0x00094D), RUNE_C(0x00094D), JT_T}, + {RUNE_C(0x000951), RUNE_C(0x000957), JT_T}, + {RUNE_C(0x000962), RUNE_C(0x000963), JT_T}, + {RUNE_C(0x000981), RUNE_C(0x000981), JT_T}, + {RUNE_C(0x0009BC), RUNE_C(0x0009BC), JT_T}, + {RUNE_C(0x0009C1), RUNE_C(0x0009C4), JT_T}, + {RUNE_C(0x0009CD), RUNE_C(0x0009CD), JT_T}, + {RUNE_C(0x0009E2), RUNE_C(0x0009E3), JT_T}, + {RUNE_C(0x0009FE), RUNE_C(0x0009FE), JT_T}, + {RUNE_C(0x000A01), RUNE_C(0x000A02), JT_T}, + {RUNE_C(0x000A3C), RUNE_C(0x000A3C), JT_T}, + {RUNE_C(0x000A41), RUNE_C(0x000A42), JT_T}, + {RUNE_C(0x000A47), RUNE_C(0x000A48), JT_T}, + {RUNE_C(0x000A4B), RUNE_C(0x000A4D), JT_T}, + {RUNE_C(0x000A51), RUNE_C(0x000A51), JT_T}, + {RUNE_C(0x000A70), RUNE_C(0x000A71), JT_T}, + {RUNE_C(0x000A75), RUNE_C(0x000A75), JT_T}, + {RUNE_C(0x000A81), RUNE_C(0x000A82), JT_T}, + {RUNE_C(0x000ABC), RUNE_C(0x000ABC), JT_T}, + {RUNE_C(0x000AC1), RUNE_C(0x000AC5), JT_T}, + {RUNE_C(0x000AC7), RUNE_C(0x000AC8), JT_T}, + {RUNE_C(0x000ACD), RUNE_C(0x000ACD), JT_T}, + {RUNE_C(0x000AE2), RUNE_C(0x000AE3), JT_T}, + {RUNE_C(0x000AFA), RUNE_C(0x000AFF), JT_T}, + {RUNE_C(0x000B01), RUNE_C(0x000B01), JT_T}, + {RUNE_C(0x000B3C), RUNE_C(0x000B3C), JT_T}, + {RUNE_C(0x000B3F), RUNE_C(0x000B3F), JT_T}, + {RUNE_C(0x000B41), RUNE_C(0x000B44), JT_T}, + {RUNE_C(0x000B4D), RUNE_C(0x000B4D), JT_T}, + {RUNE_C(0x000B55), RUNE_C(0x000B56), JT_T}, + {RUNE_C(0x000B62), RUNE_C(0x000B63), JT_T}, + {RUNE_C(0x000B82), RUNE_C(0x000B82), JT_T}, + {RUNE_C(0x000BC0), RUNE_C(0x000BC0), JT_T}, + {RUNE_C(0x000BCD), RUNE_C(0x000BCD), JT_T}, + {RUNE_C(0x000C00), RUNE_C(0x000C00), JT_T}, + {RUNE_C(0x000C04), RUNE_C(0x000C04), JT_T}, + {RUNE_C(0x000C3C), RUNE_C(0x000C3C), JT_T}, + {RUNE_C(0x000C3E), RUNE_C(0x000C40), JT_T}, + {RUNE_C(0x000C46), RUNE_C(0x000C48), JT_T}, + {RUNE_C(0x000C4A), RUNE_C(0x000C4D), JT_T}, + {RUNE_C(0x000C55), RUNE_C(0x000C56), JT_T}, + {RUNE_C(0x000C62), RUNE_C(0x000C63), JT_T}, + {RUNE_C(0x000C81), RUNE_C(0x000C81), JT_T}, + {RUNE_C(0x000CBC), RUNE_C(0x000CBC), JT_T}, + {RUNE_C(0x000CBF), RUNE_C(0x000CBF), JT_T}, + {RUNE_C(0x000CC6), RUNE_C(0x000CC6), JT_T}, + {RUNE_C(0x000CCC), RUNE_C(0x000CCD), JT_T}, + {RUNE_C(0x000CE2), RUNE_C(0x000CE3), JT_T}, + {RUNE_C(0x000D00), RUNE_C(0x000D01), JT_T}, + {RUNE_C(0x000D3B), RUNE_C(0x000D3C), JT_T}, + {RUNE_C(0x000D41), RUNE_C(0x000D44), JT_T}, + {RUNE_C(0x000D4D), RUNE_C(0x000D4D), JT_T}, + {RUNE_C(0x000D62), RUNE_C(0x000D63), JT_T}, + {RUNE_C(0x000D81), RUNE_C(0x000D81), JT_T}, + {RUNE_C(0x000DCA), RUNE_C(0x000DCA), JT_T}, + {RUNE_C(0x000DD2), RUNE_C(0x000DD4), JT_T}, + {RUNE_C(0x000DD6), RUNE_C(0x000DD6), JT_T}, + {RUNE_C(0x000E31), RUNE_C(0x000E31), JT_T}, + {RUNE_C(0x000E34), RUNE_C(0x000E3A), JT_T}, + {RUNE_C(0x000E47), RUNE_C(0x000E4E), JT_T}, + {RUNE_C(0x000EB1), RUNE_C(0x000EB1), JT_T}, + {RUNE_C(0x000EB4), RUNE_C(0x000EBC), JT_T}, + {RUNE_C(0x000EC8), RUNE_C(0x000ECE), JT_T}, + {RUNE_C(0x000F18), RUNE_C(0x000F19), JT_T}, + {RUNE_C(0x000F35), RUNE_C(0x000F35), JT_T}, + {RUNE_C(0x000F37), RUNE_C(0x000F37), JT_T}, + {RUNE_C(0x000F39), RUNE_C(0x000F39), JT_T}, + {RUNE_C(0x000F71), RUNE_C(0x000F7E), JT_T}, + {RUNE_C(0x000F80), RUNE_C(0x000F84), JT_T}, + {RUNE_C(0x000F86), RUNE_C(0x000F87), JT_T}, + {RUNE_C(0x000F8D), RUNE_C(0x000F97), JT_T}, + {RUNE_C(0x000F99), RUNE_C(0x000FBC), JT_T}, + {RUNE_C(0x000FC6), RUNE_C(0x000FC6), JT_T}, + {RUNE_C(0x00102D), RUNE_C(0x001030), JT_T}, + {RUNE_C(0x001032), RUNE_C(0x001037), JT_T}, + {RUNE_C(0x001039), RUNE_C(0x00103A), JT_T}, + {RUNE_C(0x00103D), RUNE_C(0x00103E), JT_T}, + {RUNE_C(0x001058), RUNE_C(0x001059), JT_T}, + {RUNE_C(0x00105E), RUNE_C(0x001060), JT_T}, + {RUNE_C(0x001071), RUNE_C(0x001074), JT_T}, + {RUNE_C(0x001082), RUNE_C(0x001082), JT_T}, + {RUNE_C(0x001085), RUNE_C(0x001086), JT_T}, + {RUNE_C(0x00108D), RUNE_C(0x00108D), JT_T}, + {RUNE_C(0x00109D), RUNE_C(0x00109D), JT_T}, + {RUNE_C(0x00135D), RUNE_C(0x00135F), JT_T}, + {RUNE_C(0x001712), RUNE_C(0x001714), JT_T}, + {RUNE_C(0x001732), RUNE_C(0x001733), JT_T}, + {RUNE_C(0x001752), RUNE_C(0x001753), JT_T}, + {RUNE_C(0x001772), RUNE_C(0x001773), JT_T}, + {RUNE_C(0x0017B4), RUNE_C(0x0017B5), JT_T}, + {RUNE_C(0x0017B7), RUNE_C(0x0017BD), JT_T}, + {RUNE_C(0x0017C6), RUNE_C(0x0017C6), JT_T}, + {RUNE_C(0x0017C9), RUNE_C(0x0017D3), JT_T}, + {RUNE_C(0x0017DD), RUNE_C(0x0017DD), JT_T}, + {RUNE_C(0x001807), RUNE_C(0x001807), JT_D}, + {RUNE_C(0x00180A), RUNE_C(0x00180A), JT_C}, + {RUNE_C(0x00180B), RUNE_C(0x00180D), JT_T}, + {RUNE_C(0x00180F), RUNE_C(0x00180F), JT_T}, + {RUNE_C(0x001820), RUNE_C(0x001878), JT_D}, + {RUNE_C(0x001885), RUNE_C(0x001886), JT_T}, + {RUNE_C(0x001887), RUNE_C(0x0018A8), JT_D}, + {RUNE_C(0x0018A9), RUNE_C(0x0018A9), JT_T}, + {RUNE_C(0x0018AA), RUNE_C(0x0018AA), JT_D}, + {RUNE_C(0x001920), RUNE_C(0x001922), JT_T}, + {RUNE_C(0x001927), RUNE_C(0x001928), JT_T}, + {RUNE_C(0x001932), RUNE_C(0x001932), JT_T}, + {RUNE_C(0x001939), RUNE_C(0x00193B), JT_T}, + {RUNE_C(0x001A17), RUNE_C(0x001A18), JT_T}, + {RUNE_C(0x001A1B), RUNE_C(0x001A1B), JT_T}, + {RUNE_C(0x001A56), RUNE_C(0x001A56), JT_T}, + {RUNE_C(0x001A58), RUNE_C(0x001A5E), JT_T}, + {RUNE_C(0x001A60), RUNE_C(0x001A60), JT_T}, + {RUNE_C(0x001A62), RUNE_C(0x001A62), JT_T}, + {RUNE_C(0x001A65), RUNE_C(0x001A6C), JT_T}, + {RUNE_C(0x001A73), RUNE_C(0x001A7C), JT_T}, + {RUNE_C(0x001A7F), RUNE_C(0x001A7F), JT_T}, + {RUNE_C(0x001AB0), RUNE_C(0x001ACE), JT_T}, + {RUNE_C(0x001B00), RUNE_C(0x001B03), JT_T}, + {RUNE_C(0x001B34), RUNE_C(0x001B34), JT_T}, + {RUNE_C(0x001B36), RUNE_C(0x001B3A), JT_T}, + {RUNE_C(0x001B3C), RUNE_C(0x001B3C), JT_T}, + {RUNE_C(0x001B42), RUNE_C(0x001B42), JT_T}, + {RUNE_C(0x001B6B), RUNE_C(0x001B73), JT_T}, + {RUNE_C(0x001B80), RUNE_C(0x001B81), JT_T}, + {RUNE_C(0x001BA2), RUNE_C(0x001BA5), JT_T}, + {RUNE_C(0x001BA8), RUNE_C(0x001BA9), JT_T}, + {RUNE_C(0x001BAB), RUNE_C(0x001BAD), JT_T}, + {RUNE_C(0x001BE6), RUNE_C(0x001BE6), JT_T}, + {RUNE_C(0x001BE8), RUNE_C(0x001BE9), JT_T}, + {RUNE_C(0x001BED), RUNE_C(0x001BED), JT_T}, + {RUNE_C(0x001BEF), RUNE_C(0x001BF1), JT_T}, + {RUNE_C(0x001C2C), RUNE_C(0x001C33), JT_T}, + {RUNE_C(0x001C36), RUNE_C(0x001C37), JT_T}, + {RUNE_C(0x001CD0), RUNE_C(0x001CD2), JT_T}, + {RUNE_C(0x001CD4), RUNE_C(0x001CE0), JT_T}, + {RUNE_C(0x001CE2), RUNE_C(0x001CE8), JT_T}, + {RUNE_C(0x001CED), RUNE_C(0x001CED), JT_T}, + {RUNE_C(0x001CF4), RUNE_C(0x001CF4), JT_T}, + {RUNE_C(0x001CF8), RUNE_C(0x001CF9), JT_T}, + {RUNE_C(0x001DC0), RUNE_C(0x001DFF), JT_T}, + {RUNE_C(0x00200B), RUNE_C(0x00200B), JT_T}, + {RUNE_C(0x00200D), RUNE_C(0x00200D), JT_C}, + {RUNE_C(0x00200E), RUNE_C(0x00200F), JT_T}, + {RUNE_C(0x00202A), RUNE_C(0x00202E), JT_T}, + {RUNE_C(0x002060), RUNE_C(0x002064), JT_T}, + {RUNE_C(0x00206A), RUNE_C(0x00206F), JT_T}, + {RUNE_C(0x0020D0), RUNE_C(0x0020F0), JT_T}, + {RUNE_C(0x002CEF), RUNE_C(0x002CF1), JT_T}, + {RUNE_C(0x002D7F), RUNE_C(0x002D7F), JT_T}, + {RUNE_C(0x002DE0), RUNE_C(0x002DFF), JT_T}, + {RUNE_C(0x00302A), RUNE_C(0x00302D), JT_T}, + {RUNE_C(0x003099), RUNE_C(0x00309A), JT_T}, + {RUNE_C(0x00A66F), RUNE_C(0x00A672), JT_T}, + {RUNE_C(0x00A674), RUNE_C(0x00A67D), JT_T}, + {RUNE_C(0x00A69E), RUNE_C(0x00A69F), JT_T}, + {RUNE_C(0x00A6F0), RUNE_C(0x00A6F1), JT_T}, + {RUNE_C(0x00A802), RUNE_C(0x00A802), JT_T}, + {RUNE_C(0x00A806), RUNE_C(0x00A806), JT_T}, + {RUNE_C(0x00A80B), RUNE_C(0x00A80B), JT_T}, + {RUNE_C(0x00A825), RUNE_C(0x00A826), JT_T}, + {RUNE_C(0x00A82C), RUNE_C(0x00A82C), JT_T}, + {RUNE_C(0x00A840), RUNE_C(0x00A871), JT_D}, + {RUNE_C(0x00A872), RUNE_C(0x00A872), JT_L}, + {RUNE_C(0x00A8C4), RUNE_C(0x00A8C5), JT_T}, + {RUNE_C(0x00A8E0), RUNE_C(0x00A8F1), JT_T}, + {RUNE_C(0x00A8FF), RUNE_C(0x00A8FF), JT_T}, + {RUNE_C(0x00A926), RUNE_C(0x00A92D), JT_T}, + {RUNE_C(0x00A947), RUNE_C(0x00A951), JT_T}, + {RUNE_C(0x00A980), RUNE_C(0x00A982), JT_T}, + {RUNE_C(0x00A9B3), RUNE_C(0x00A9B3), JT_T}, + {RUNE_C(0x00A9B6), RUNE_C(0x00A9B9), JT_T}, + {RUNE_C(0x00A9BC), RUNE_C(0x00A9BD), JT_T}, + {RUNE_C(0x00A9E5), RUNE_C(0x00A9E5), JT_T}, + {RUNE_C(0x00AA29), RUNE_C(0x00AA2E), JT_T}, + {RUNE_C(0x00AA31), RUNE_C(0x00AA32), JT_T}, + {RUNE_C(0x00AA35), RUNE_C(0x00AA36), JT_T}, + {RUNE_C(0x00AA43), RUNE_C(0x00AA43), JT_T}, + {RUNE_C(0x00AA4C), RUNE_C(0x00AA4C), JT_T}, + {RUNE_C(0x00AA7C), RUNE_C(0x00AA7C), JT_T}, + {RUNE_C(0x00AAB0), RUNE_C(0x00AAB0), JT_T}, + {RUNE_C(0x00AAB2), RUNE_C(0x00AAB4), JT_T}, + {RUNE_C(0x00AAB7), RUNE_C(0x00AAB8), JT_T}, + {RUNE_C(0x00AABE), RUNE_C(0x00AABF), JT_T}, + {RUNE_C(0x00AAC1), RUNE_C(0x00AAC1), JT_T}, + {RUNE_C(0x00AAEC), RUNE_C(0x00AAED), JT_T}, + {RUNE_C(0x00AAF6), RUNE_C(0x00AAF6), JT_T}, + {RUNE_C(0x00ABE5), RUNE_C(0x00ABE5), JT_T}, + {RUNE_C(0x00ABE8), RUNE_C(0x00ABE8), JT_T}, + {RUNE_C(0x00ABED), RUNE_C(0x00ABED), JT_T}, + {RUNE_C(0x00FB1E), RUNE_C(0x00FB1E), JT_T}, + {RUNE_C(0x00FE00), RUNE_C(0x00FE0F), JT_T}, + {RUNE_C(0x00FE20), RUNE_C(0x00FE2F), JT_T}, + {RUNE_C(0x00FEFF), RUNE_C(0x00FEFF), JT_T}, + {RUNE_C(0x00FFF9), RUNE_C(0x00FFFB), JT_T}, + {RUNE_C(0x0101FD), RUNE_C(0x0101FD), JT_T}, + {RUNE_C(0x0102E0), RUNE_C(0x0102E0), JT_T}, + {RUNE_C(0x010376), RUNE_C(0x01037A), JT_T}, + {RUNE_C(0x010A01), RUNE_C(0x010A03), JT_T}, + {RUNE_C(0x010A05), RUNE_C(0x010A06), JT_T}, + {RUNE_C(0x010A0C), RUNE_C(0x010A0F), JT_T}, + {RUNE_C(0x010A38), RUNE_C(0x010A3A), JT_T}, + {RUNE_C(0x010A3F), RUNE_C(0x010A3F), JT_T}, + {RUNE_C(0x010AC0), RUNE_C(0x010AC4), JT_D}, + {RUNE_C(0x010AC5), RUNE_C(0x010AC5), JT_R}, + {RUNE_C(0x010AC7), RUNE_C(0x010AC7), JT_R}, + {RUNE_C(0x010AC9), RUNE_C(0x010ACA), JT_R}, + {RUNE_C(0x010ACD), RUNE_C(0x010ACD), JT_L}, + {RUNE_C(0x010ACE), RUNE_C(0x010AD2), JT_R}, + {RUNE_C(0x010AD3), RUNE_C(0x010AD6), JT_D}, + {RUNE_C(0x010AD7), RUNE_C(0x010AD7), JT_L}, + {RUNE_C(0x010AD8), RUNE_C(0x010ADC), JT_D}, + {RUNE_C(0x010ADD), RUNE_C(0x010ADD), JT_R}, + {RUNE_C(0x010ADE), RUNE_C(0x010AE0), JT_D}, + {RUNE_C(0x010AE1), RUNE_C(0x010AE1), JT_R}, + {RUNE_C(0x010AE4), RUNE_C(0x010AE4), JT_R}, + {RUNE_C(0x010AE5), RUNE_C(0x010AE6), JT_T}, + {RUNE_C(0x010AEB), RUNE_C(0x010AEE), JT_D}, + {RUNE_C(0x010AEF), RUNE_C(0x010AEF), JT_R}, + {RUNE_C(0x010B80), RUNE_C(0x010B80), JT_D}, + {RUNE_C(0x010B81), RUNE_C(0x010B81), JT_R}, + {RUNE_C(0x010B82), RUNE_C(0x010B82), JT_D}, + {RUNE_C(0x010B83), RUNE_C(0x010B85), JT_R}, + {RUNE_C(0x010B86), RUNE_C(0x010B88), JT_D}, + {RUNE_C(0x010B89), RUNE_C(0x010B89), JT_R}, + {RUNE_C(0x010B8A), RUNE_C(0x010B8B), JT_D}, + {RUNE_C(0x010B8C), RUNE_C(0x010B8C), JT_R}, + {RUNE_C(0x010B8D), RUNE_C(0x010B8D), JT_D}, + {RUNE_C(0x010B8E), RUNE_C(0x010B8F), JT_R}, + {RUNE_C(0x010B90), RUNE_C(0x010B90), JT_D}, + {RUNE_C(0x010B91), RUNE_C(0x010B91), JT_R}, + {RUNE_C(0x010BA9), RUNE_C(0x010BAC), JT_R}, + {RUNE_C(0x010BAD), RUNE_C(0x010BAE), JT_D}, + {RUNE_C(0x010D00), RUNE_C(0x010D00), JT_L}, + {RUNE_C(0x010D01), RUNE_C(0x010D21), JT_D}, + {RUNE_C(0x010D22), RUNE_C(0x010D22), JT_R}, + {RUNE_C(0x010D23), RUNE_C(0x010D23), JT_D}, + {RUNE_C(0x010D24), RUNE_C(0x010D27), JT_T}, + {RUNE_C(0x010EAB), RUNE_C(0x010EAC), JT_T}, + {RUNE_C(0x010EFD), RUNE_C(0x010EFF), JT_T}, + {RUNE_C(0x010F30), RUNE_C(0x010F32), JT_D}, + {RUNE_C(0x010F33), RUNE_C(0x010F33), JT_R}, + {RUNE_C(0x010F34), RUNE_C(0x010F44), JT_D}, + {RUNE_C(0x010F46), RUNE_C(0x010F50), JT_T}, + {RUNE_C(0x010F51), RUNE_C(0x010F53), JT_D}, + {RUNE_C(0x010F54), RUNE_C(0x010F54), JT_R}, + {RUNE_C(0x010F70), RUNE_C(0x010F73), JT_D}, + {RUNE_C(0x010F74), RUNE_C(0x010F75), JT_R}, + {RUNE_C(0x010F76), RUNE_C(0x010F81), JT_D}, + {RUNE_C(0x010F82), RUNE_C(0x010F85), JT_T}, + {RUNE_C(0x010FB0), RUNE_C(0x010FB0), JT_D}, + {RUNE_C(0x010FB2), RUNE_C(0x010FB3), JT_D}, + {RUNE_C(0x010FB4), RUNE_C(0x010FB6), JT_R}, + {RUNE_C(0x010FB8), RUNE_C(0x010FB8), JT_D}, + {RUNE_C(0x010FB9), RUNE_C(0x010FBA), JT_R}, + {RUNE_C(0x010FBB), RUNE_C(0x010FBC), JT_D}, + {RUNE_C(0x010FBD), RUNE_C(0x010FBD), JT_R}, + {RUNE_C(0x010FBE), RUNE_C(0x010FBF), JT_D}, + {RUNE_C(0x010FC1), RUNE_C(0x010FC1), JT_D}, + {RUNE_C(0x010FC2), RUNE_C(0x010FC3), JT_R}, + {RUNE_C(0x010FC4), RUNE_C(0x010FC4), JT_D}, + {RUNE_C(0x010FC9), RUNE_C(0x010FC9), JT_R}, + {RUNE_C(0x010FCA), RUNE_C(0x010FCA), JT_D}, + {RUNE_C(0x010FCB), RUNE_C(0x010FCB), JT_L}, + {RUNE_C(0x011001), RUNE_C(0x011001), JT_T}, + {RUNE_C(0x011038), RUNE_C(0x011046), JT_T}, + {RUNE_C(0x011070), RUNE_C(0x011070), JT_T}, + {RUNE_C(0x011073), RUNE_C(0x011074), JT_T}, + {RUNE_C(0x01107F), RUNE_C(0x011081), JT_T}, + {RUNE_C(0x0110B3), RUNE_C(0x0110B6), JT_T}, + {RUNE_C(0x0110B9), RUNE_C(0x0110BA), JT_T}, + {RUNE_C(0x0110C2), RUNE_C(0x0110C2), JT_T}, + {RUNE_C(0x011100), RUNE_C(0x011102), JT_T}, + {RUNE_C(0x011127), RUNE_C(0x01112B), JT_T}, + {RUNE_C(0x01112D), RUNE_C(0x011134), JT_T}, + {RUNE_C(0x011173), RUNE_C(0x011173), JT_T}, + {RUNE_C(0x011180), RUNE_C(0x011181), JT_T}, + {RUNE_C(0x0111B6), RUNE_C(0x0111BE), JT_T}, + {RUNE_C(0x0111C9), RUNE_C(0x0111CC), JT_T}, + {RUNE_C(0x0111CF), RUNE_C(0x0111CF), JT_T}, + {RUNE_C(0x01122F), RUNE_C(0x011231), JT_T}, + {RUNE_C(0x011234), RUNE_C(0x011234), JT_T}, + {RUNE_C(0x011236), RUNE_C(0x011237), JT_T}, + {RUNE_C(0x01123E), RUNE_C(0x01123E), JT_T}, + {RUNE_C(0x011241), RUNE_C(0x011241), JT_T}, + {RUNE_C(0x0112DF), RUNE_C(0x0112DF), JT_T}, + {RUNE_C(0x0112E3), RUNE_C(0x0112EA), JT_T}, + {RUNE_C(0x011300), RUNE_C(0x011301), JT_T}, + {RUNE_C(0x01133B), RUNE_C(0x01133C), JT_T}, + {RUNE_C(0x011340), RUNE_C(0x011340), JT_T}, + {RUNE_C(0x011366), RUNE_C(0x01136C), JT_T}, + {RUNE_C(0x011370), RUNE_C(0x011374), JT_T}, + {RUNE_C(0x011438), RUNE_C(0x01143F), JT_T}, + {RUNE_C(0x011442), RUNE_C(0x011444), JT_T}, + {RUNE_C(0x011446), RUNE_C(0x011446), JT_T}, + {RUNE_C(0x01145E), RUNE_C(0x01145E), JT_T}, + {RUNE_C(0x0114B3), RUNE_C(0x0114B8), JT_T}, + {RUNE_C(0x0114BA), RUNE_C(0x0114BA), JT_T}, + {RUNE_C(0x0114BF), RUNE_C(0x0114C0), JT_T}, + {RUNE_C(0x0114C2), RUNE_C(0x0114C3), JT_T}, + {RUNE_C(0x0115B2), RUNE_C(0x0115B5), JT_T}, + {RUNE_C(0x0115BC), RUNE_C(0x0115BD), JT_T}, + {RUNE_C(0x0115BF), RUNE_C(0x0115C0), JT_T}, + {RUNE_C(0x0115DC), RUNE_C(0x0115DD), JT_T}, + {RUNE_C(0x011633), RUNE_C(0x01163A), JT_T}, + {RUNE_C(0x01163D), RUNE_C(0x01163D), JT_T}, + {RUNE_C(0x01163F), RUNE_C(0x011640), JT_T}, + {RUNE_C(0x0116AB), RUNE_C(0x0116AB), JT_T}, + {RUNE_C(0x0116AD), RUNE_C(0x0116AD), JT_T}, + {RUNE_C(0x0116B0), RUNE_C(0x0116B5), JT_T}, + {RUNE_C(0x0116B7), RUNE_C(0x0116B7), JT_T}, + {RUNE_C(0x01171D), RUNE_C(0x01171F), JT_T}, + {RUNE_C(0x011722), RUNE_C(0x011725), JT_T}, + {RUNE_C(0x011727), RUNE_C(0x01172B), JT_T}, + {RUNE_C(0x01182F), RUNE_C(0x011837), JT_T}, + {RUNE_C(0x011839), RUNE_C(0x01183A), JT_T}, + {RUNE_C(0x01193B), RUNE_C(0x01193C), JT_T}, + {RUNE_C(0x01193E), RUNE_C(0x01193E), JT_T}, + {RUNE_C(0x011943), RUNE_C(0x011943), JT_T}, + {RUNE_C(0x0119D4), RUNE_C(0x0119D7), JT_T}, + {RUNE_C(0x0119DA), RUNE_C(0x0119DB), JT_T}, + {RUNE_C(0x0119E0), RUNE_C(0x0119E0), JT_T}, + {RUNE_C(0x011A01), RUNE_C(0x011A0A), JT_T}, + {RUNE_C(0x011A33), RUNE_C(0x011A38), JT_T}, + {RUNE_C(0x011A3B), RUNE_C(0x011A3E), JT_T}, + {RUNE_C(0x011A47), RUNE_C(0x011A47), JT_T}, + {RUNE_C(0x011A51), RUNE_C(0x011A56), JT_T}, + {RUNE_C(0x011A59), RUNE_C(0x011A5B), JT_T}, + {RUNE_C(0x011A8A), RUNE_C(0x011A96), JT_T}, + {RUNE_C(0x011A98), RUNE_C(0x011A99), JT_T}, + {RUNE_C(0x011C30), RUNE_C(0x011C36), JT_T}, + {RUNE_C(0x011C38), RUNE_C(0x011C3D), JT_T}, + {RUNE_C(0x011C3F), RUNE_C(0x011C3F), JT_T}, + {RUNE_C(0x011C92), RUNE_C(0x011CA7), JT_T}, + {RUNE_C(0x011CAA), RUNE_C(0x011CB0), JT_T}, + {RUNE_C(0x011CB2), RUNE_C(0x011CB3), JT_T}, + {RUNE_C(0x011CB5), RUNE_C(0x011CB6), JT_T}, + {RUNE_C(0x011D31), RUNE_C(0x011D36), JT_T}, + {RUNE_C(0x011D3A), RUNE_C(0x011D3A), JT_T}, + {RUNE_C(0x011D3C), RUNE_C(0x011D3D), JT_T}, + {RUNE_C(0x011D3F), RUNE_C(0x011D45), JT_T}, + {RUNE_C(0x011D47), RUNE_C(0x011D47), JT_T}, + {RUNE_C(0x011D90), RUNE_C(0x011D91), JT_T}, + {RUNE_C(0x011D95), RUNE_C(0x011D95), JT_T}, + {RUNE_C(0x011D97), RUNE_C(0x011D97), JT_T}, + {RUNE_C(0x011EF3), RUNE_C(0x011EF4), JT_T}, + {RUNE_C(0x011F00), RUNE_C(0x011F01), JT_T}, + {RUNE_C(0x011F36), RUNE_C(0x011F3A), JT_T}, + {RUNE_C(0x011F40), RUNE_C(0x011F40), JT_T}, + {RUNE_C(0x011F42), RUNE_C(0x011F42), JT_T}, + {RUNE_C(0x013430), RUNE_C(0x013440), JT_T}, + {RUNE_C(0x013447), RUNE_C(0x013455), JT_T}, + {RUNE_C(0x016AF0), RUNE_C(0x016AF4), JT_T}, + {RUNE_C(0x016B30), RUNE_C(0x016B36), JT_T}, + {RUNE_C(0x016F4F), RUNE_C(0x016F4F), JT_T}, + {RUNE_C(0x016F8F), RUNE_C(0x016F92), JT_T}, + {RUNE_C(0x016FE4), RUNE_C(0x016FE4), JT_T}, + {RUNE_C(0x01BC9D), RUNE_C(0x01BC9E), JT_T}, + {RUNE_C(0x01BCA0), RUNE_C(0x01BCA3), JT_T}, + {RUNE_C(0x01CF00), RUNE_C(0x01CF2D), JT_T}, + {RUNE_C(0x01CF30), RUNE_C(0x01CF46), JT_T}, + {RUNE_C(0x01D167), RUNE_C(0x01D169), JT_T}, + {RUNE_C(0x01D173), RUNE_C(0x01D182), JT_T}, + {RUNE_C(0x01D185), RUNE_C(0x01D18B), JT_T}, + {RUNE_C(0x01D1AA), RUNE_C(0x01D1AD), JT_T}, + {RUNE_C(0x01D242), RUNE_C(0x01D244), JT_T}, + {RUNE_C(0x01DA00), RUNE_C(0x01DA36), JT_T}, + {RUNE_C(0x01DA3B), RUNE_C(0x01DA6C), JT_T}, + {RUNE_C(0x01DA75), RUNE_C(0x01DA75), JT_T}, + {RUNE_C(0x01DA84), RUNE_C(0x01DA84), JT_T}, + {RUNE_C(0x01DA9B), RUNE_C(0x01DA9F), JT_T}, + {RUNE_C(0x01DAA1), RUNE_C(0x01DAAF), JT_T}, + {RUNE_C(0x01E000), RUNE_C(0x01E006), JT_T}, + {RUNE_C(0x01E008), RUNE_C(0x01E018), JT_T}, + {RUNE_C(0x01E01B), RUNE_C(0x01E021), JT_T}, + {RUNE_C(0x01E023), RUNE_C(0x01E024), JT_T}, + {RUNE_C(0x01E026), RUNE_C(0x01E02A), JT_T}, + {RUNE_C(0x01E08F), RUNE_C(0x01E08F), JT_T}, + {RUNE_C(0x01E130), RUNE_C(0x01E136), JT_T}, + {RUNE_C(0x01E2AE), RUNE_C(0x01E2AE), JT_T}, + {RUNE_C(0x01E2EC), RUNE_C(0x01E2EF), JT_T}, + {RUNE_C(0x01E4EC), RUNE_C(0x01E4EF), JT_T}, + {RUNE_C(0x01E8D0), RUNE_C(0x01E8D6), JT_T}, + {RUNE_C(0x01E900), RUNE_C(0x01E943), JT_D}, + {RUNE_C(0x01E944), RUNE_C(0x01E94B), JT_T}, + {RUNE_C(0x0E0001), RUNE_C(0x0E0001), JT_T}, + {RUNE_C(0x0E0020), RUNE_C(0x0E007F), JT_T}, + {RUNE_C(0x0E0100), RUNE_C(0x0E01EF), JT_T}, +}; + +_MLIB_DEFINE_BSEARCH(enum uprop_jt, lookup, JT_U) + +enum uprop_jt +uprop_get_jt(rune ch) +{ + return ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch); +} -- cgit v1.2.3