diff options
-rw-r--r-- | data/ScriptExtensions | 635 | ||||
-rwxr-xr-x | gen/data-files | 1 | ||||
-rwxr-xr-x | gen/prop/scx | 97 | ||||
-rw-r--r-- | include/unicode/prop.h | 1 | ||||
-rw-r--r-- | lib/unicode/prop/uprop_get_scx.c | 180 |
5 files changed, 914 insertions, 0 deletions
diff --git a/data/ScriptExtensions b/data/ScriptExtensions new file mode 100644 index 0000000..23141fb --- /dev/null +++ b/data/ScriptExtensions @@ -0,0 +1,635 @@ +# ScriptExtensions-15.1.0.txt +# Date: 2023-02-01, 23:02:24 GMT +# © 2023 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# The Script_Extensions property indicates which characters are commonly used +# with more than one script, but with a limited number of scripts. +# For each code point, there is one or more property values. Each such value is a Script property value. +# For more information, see: +# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/ +# Especially the sections: +# https://www.unicode.org/reports/tr24/#Assignment_Script_Values +# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values +# +# Each Script_Extensions value in this file consists of a set +# of one or more abbreviated Script property values. The ordering of the +# values in that set is not material, but for stability in presentation +# it is given here as alphabetical. +# +# The Script_Extensions values are presented in sorted order in the file. +# They are sorted first by the number of Script property values in their sets, +# and then alphabetically by first differing Script property value. +# +# Following each distinct Script_Extensions value is the list of code +# points associated with that value, listed in code point order. +# +# All code points not explicitly listed for Script_Extensions +# have as their value the corresponding Script property value +# +# @missing: 0000..10FFFF; <script> + +# ================================================ + +# Property: Script_Extensions + +# ================================================ + +# Script_Extensions=Beng + +1CF7 ; Beng # Mc VEDIC SIGN ATIKRAMA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva + +1CD1 ; Deva # Mn VEDIC TONE SHARA +1CD4 ; Deva # Mn VEDIC SIGN YAJURVEDIC MIDLINE SVARITA +1CDB ; Deva # Mn VEDIC TONE TRIPLE SVARITA +1CDE..1CDF ; Deva # Mn [2] VEDIC TONE TWO DOTS BELOW..VEDIC TONE THREE DOTS BELOW +1CE2..1CE8 ; Deva # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CEB..1CEC ; Deva # Lo [2] VEDIC SIGN ANUSVARA VAMAGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF1 ; Deva # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA + +# Total code points: 18 + +# ================================================ + +# Script_Extensions=Dupl + +1BCA0..1BCA3 ; Dupl # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Grek + +0342 ; Grek # Mn COMBINING GREEK PERISPOMENI +0345 ; Grek # Mn COMBINING GREEK YPOGEGRAMMENI +1DC0..1DC1 ; Grek # Mn [2] COMBINING DOTTED GRAVE ACCENT..COMBINING DOTTED ACUTE ACCENT + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Hani + +3006 ; Hani # Lo IDEOGRAPHIC CLOSING MARK +303E..303F ; Hani # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +3190..3191 ; Hani # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; Hani # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; Hani # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31C0..31E3 ; Hani # So [36] CJK STROKE T..CJK STROKE Q +3220..3229 ; Hani # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; Hani # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3280..3289 ; Hani # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; Hani # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32C0..32CB ; Hani # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER +32FF ; Hani # So SQUARE ERA NAME REIWA +3358..3370 ; Hani # So [25] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR +337B..337F ; Hani # So [5] SQUARE ERA NAME HEISEI..SQUARE CORPORATION +33E0..33FE ; Hani # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE +1D360..1D371 ; Hani # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE +1F250..1F251 ; Hani # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT + +# Total code points: 238 + +# ================================================ + +# Script_Extensions=Latn + +0363..036F ; Latn # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X + +# Total code points: 13 + +# ================================================ + +# Script_Extensions=Nand + +1CFA ; Nand # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Syrc + +1DFA ; Syrc # Mn COMBINING DOT BELOW LEFT + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Arab Copt + +102E0 ; Arab Copt # Mn COPTIC EPACT THOUSANDS MARK +102E1..102FB ; Arab Copt # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED + +# Total code points: 28 + +# ================================================ + +# Script_Extensions=Arab Nkoo + +FD3E ; Arab Nkoo # Pe ORNATE LEFT PARENTHESIS +FD3F ; Arab Nkoo # Ps ORNATE RIGHT PARENTHESIS + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Arab Rohg + +06D4 ; Arab Rohg # Po ARABIC FULL STOP + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Arab Syrc + +064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW +0670 ; Arab Syrc # Mn ARABIC LETTER SUPERSCRIPT ALEF + +# Total code points: 12 + +# ================================================ + +# Script_Extensions=Arab Thaa + +FDF2 ; Arab Thaa # Lo ARABIC LIGATURE ALLAH ISOLATED FORM +FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Beng Deva + +1CD5..1CD6 ; Beng Deva # Mn [2] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC INDEPENDENT SVARITA +1CD8 ; Beng Deva # Mn VEDIC TONE CANDRA BELOW +1CE1 ; Beng Deva # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CEA ; Beng Deva # Lo VEDIC SIGN ANUSVARA BAHIRGOMUKHA +1CED ; Beng Deva # Mn VEDIC SIGN TIRYAK +1CF5..1CF6 ; Beng Deva # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +A8F1 ; Beng Deva # Mn COMBINING DEVANAGARI SIGN AVAGRAHA + +# Total code points: 9 + +# ================================================ + +# Script_Extensions=Bopo Hani + +302A..302D ; Bopo Hani # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Bugi Java + +A9CF ; Bugi Java # Lm JAVANESE PANGRANGKEP + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Cprt Linb + +10102 ; Cprt Linb # Po AEGEAN CHECK MARK +10137..1013F ; Cprt Linb # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Cyrl Glag + +0484 ; Cyrl Glag # Mn COMBINING CYRILLIC PALATALIZATION +0487 ; Cyrl Glag # Mn COMBINING CYRILLIC POKRYTIE +2E43 ; Cyrl Glag # Po DASH WITH LEFT UPTURN +A66F ; Cyrl Glag # Mn COMBINING CYRILLIC VZMET + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Cyrl Latn + +0485..0486 ; Cyrl Latn # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Cyrl Perm + +0483 ; Cyrl Perm # Mn COMBINING CYRILLIC TITLO + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Cyrl Syrc + +1DF8 ; Cyrl Syrc # Mn COMBINING DOT ABOVE LEFT + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Gran + +1CD3 ; Deva Gran # Po VEDIC SIGN NIHSHVASA +1CF3 ; Deva Gran # Lo VEDIC SIGN ROTATED ARDHAVISARGA +1CF8..1CF9 ; Deva Gran # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Deva Nand + +1CE9 ; Deva Nand # Lo VEDIC SIGN ANUSVARA ANTARGOMUKHA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Shrd + +1CD7 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA +1CD9 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER +1CDC..1CDD ; Deva Shrd # Mn [2] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE DOT BELOW +1CE0 ; Deva Shrd # Mn VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA + +# Total code points: 5 + +# ================================================ + +# Script_Extensions=Deva Taml + +A8F3 ; Deva Taml # Lo DEVANAGARI SIGN CANDRABINDU VIRAMA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Geor Latn + +10FB ; Geor Latn # Po GEORGIAN PARAGRAPH SEPARATOR + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Gran Taml + +0BE6..0BEF ; Gran Taml # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; Gran Taml # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3 ; Gran Taml # So TAMIL DAY SIGN +11301 ; Gran Taml # Mn GRANTHA SIGN CANDRABINDU +11303 ; Gran Taml # Mc GRANTHA SIGN VISARGA +1133B..1133C ; Gran Taml # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11FD0..11FD1 ; Gran Taml # No [2] TAMIL FRACTION ONE QUARTER..TAMIL FRACTION ONE HALF-1 +11FD3 ; Gran Taml # No TAMIL FRACTION THREE QUARTERS + +# Total code points: 21 + +# ================================================ + +# Script_Extensions=Gujr Khoj + +0AE6..0AEF ; Gujr Khoj # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Guru Mult + +0A66..0A6F ; Guru Mult # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Hani Latn + +A700..A707 ; Hani Latn # Sk [8] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER CHINESE TONE YANG RU + +# Total code points: 8 + +# ================================================ + +# Script_Extensions=Hira Kana + +3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3099..309A ; Hira Kana # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Hira Kana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; Hira Kana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30FC ; Hira Kana # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF70 ; Hira Kana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK + +# Total code points: 14 + +# ================================================ + +# Script_Extensions=Knda Nand + +0CE6..0CEF ; Knda Nand # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Latn Mong + +202F ; Latn Mong # Zs NARROW NO-BREAK SPACE + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Mani Ougr + +10AF2 ; Mani Ougr # Po MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Mong Phag + +1802..1803 ; Mong Phag # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP +1805 ; Mong Phag # Po MONGOLIAN FOUR DOTS + +# Total code points: 3 + +# ================================================ + +# Script_Extensions=Arab Syrc Thaa + +061C ; Arab Syrc Thaa # Cf ARABIC LETTER MARK + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Arab Thaa Yezi + +0660..0669 ; Arab Thaa Yezi # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Beng Cakm Sylo + +09E6..09EF ; Beng Cakm Sylo # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Cakm Mymr Tale + +1040..1049 ; Cakm Mymr Tale # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Cpmn Cprt Linb + +10100..10101 ; Cpmn Cprt Linb # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Cprt Lina Linb + +10107..10133 ; Cprt Lina Linb # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND + +# Total code points: 45 + +# ================================================ + +# Script_Extensions=Deva Gran Knda + +1CF4 ; Deva Gran Knda # Mn VEDIC TONE CANDRA ABOVE + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Gran Latn + +20F0 ; Deva Gran Latn # Mn COMBINING ASTERISK ABOVE + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Hani Hira Kana + +303C ; Hani Hira Kana # Lo MASU MARK +303D ; Hani Hira Kana # Po PART ALTERNATION MARK + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Kali Latn Mymr + +A92E ; Kali Latn Mymr # Po KAYAH LI SIGN CWI + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Beng Deva Gran Knda + +1CD0 ; Beng Deva Gran Knda # Mn VEDIC TONE KARSHANA +1CD2 ; Beng Deva Gran Knda # Mn VEDIC TONE PRENKHA + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Buhd Hano Tagb Tglg + +1735..1736 ; Buhd Hano Tagb Tglg # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Deva Dogr Kthi Mahj + +0966..096F ; Deva Dogr Kthi Mahj # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Bopo Hang Hani Hira Kana + +3003 ; Bopo Hang Hani Hira Kana # Po DITTO MARK +3013 ; Bopo Hang Hani Hira Kana # So GETA MARK +301C ; Bopo Hang Hani Hira Kana # Pd WAVE DASH +301D ; Bopo Hang Hani Hira Kana # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Bopo Hang Hani Hira Kana # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3030 ; Bopo Hang Hani Hira Kana # Pd WAVY DASH +3037 ; Bopo Hang Hani Hira Kana # So IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +FE45..FE46 ; Bopo Hang Hani Hira Kana # Po [2] SESAME DOT..WHITE SESAME DOT + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Arab Nkoo Rohg Syrc Thaa Yezi + +060C ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC COMMA +061B ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC SEMICOLON + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Bopo Hang Hani Hira Kana Yiii + +3001..3002 ; Bopo Hang Hani Hira Kana Yiii # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP +3008 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT ANGLE BRACKET +3009 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT ANGLE BRACKET +300A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT CORNER BRACKET +300D ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT CORNER BRACKET +300E ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE CORNER BRACKET +300F ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE CORNER BRACKET +3010 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT BLACK LENTICULAR BRACKET +3014 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE SQUARE BRACKET +301B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE SQUARE BRACKET +30FB ; Bopo Hang Hani Hira Kana Yiii # Po KATAKANA MIDDLE DOT +FF61 ; Bopo Hang Hani Hira Kana Yiii # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; Bopo Hang Hani Hira Kana Yiii # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Bopo Hang Hani Hira Kana Yiii # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 26 + +# ================================================ + +# Script_Extensions=Deva Knda Mlym Orya Taml Telu + +1CDA ; Deva Knda Mlym Orya Taml Telu # Mn VEDIC TONE DOUBLE SVARITA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Adlm Arab Nkoo Rohg Syrc Thaa Yezi + +061F ; Adlm Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC QUESTION MARK + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc + +0640 ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm ARABIC TATWEEL + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Beng Deva Gran Knda Mlym Nand Orya Sinh Telu Tirh + +1CF2 ; Beng Deva Gran Knda Mlym Nand Orya Sinh Telu Tirh # Lo VEDIC SIGN ARDHAVISARGA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh + +A836..A837 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So NORTH INDIC QUANTITY MARK + +# Total code points: 3 + +# ================================================ + +# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh + +0952 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN ANUDATTA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Shrd Sind Takr Tirh + +A838 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Shrd Sind Takr Tirh # Sc NORTH INDIC RUPEE MARK + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh + +0951 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN UDATTA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Shrd Sind Takr Tirh + +A833..A835 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Shrd Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS + +# Total code points: 3 + +# ================================================ + +# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Shrd Sind Takr Tirh + +A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Shrd Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS + +# Total code points: 3 + +# ================================================ + +# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh + +0964 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DANDA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh + +0965 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DOUBLE DANDA + +# Total code points: 1 + +# EOF diff --git a/gen/data-files b/gen/data-files index 9da5452..00b5f1c 100755 --- a/gen/data-files +++ b/gen/data-files @@ -30,6 +30,7 @@ readonly PATHS=' IndicPositionalCategory IndicSyllabicCategory PropList + ScriptExtensions Scripts SpecialCasing UnicodeData diff --git a/gen/prop/scx b/gen/prop/scx new file mode 100755 index 0000000..ec5b03f --- /dev/null +++ b/gen/prop/scx @@ -0,0 +1,97 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_scx.c + +gawk ' +BEGIN { + FS = " *(; *|#.*)" + + print "/* This file is autogenerated by gen/prop/scx; DO NOT EDIT. */" + print "" + print "#include \"_bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" + print "#define CAST(...) (const enum uprop_sc []){__VA_ARGS__}" + print "#define _(...) {CAST(__VA_ARGS__), lengthof(CAST(__VA_ARGS__))}" + print "" + print "struct uprop_sc_view {" + print "\tconst enum uprop_sc *p;" + print "\tsize_t n;" + print "};" + print "" + print "static constexpr enum uprop_sc fallback[] = {" + print "\tSC_ZZZZ, SC_ADLM, SC_AGHB, SC_AHOM, SC_ARAB, SC_ARMI, SC_ARMN, SC_AVST," + print "\tSC_BALI, SC_BAMU, SC_BASS, SC_BATK, SC_BENG, SC_BHKS, SC_BOPO, SC_BRAH," + print "\tSC_BRAI, SC_BUGI, SC_BUHD, SC_CAKM, SC_CANS, SC_CARI, SC_CHAM, SC_CHER," + print "\tSC_CHRS, SC_COPT, SC_CPMN, SC_CPRT, SC_CYRL, SC_DEVA, SC_DIAK, SC_DOGR," + print "\tSC_DSRT, SC_DUPL, SC_EGYP, SC_ELBA, SC_ELYM, SC_ETHI, SC_GEOR, SC_GLAG," + print "\tSC_GONG, SC_GONM, SC_GOTH, SC_GRAN, SC_GREK, SC_GUJR, SC_GURU, SC_HANG," + print "\tSC_HANI, SC_HANO, SC_HATR, SC_HEBR, SC_HIRA, SC_HLUW, SC_HMNG, SC_HMNP," + print "\tSC_HRKT, SC_HUNG, SC_ITAL, SC_JAVA, SC_KALI, SC_KANA, SC_KAWI, SC_KHAR," + print "\tSC_KHMR, SC_KHOJ, SC_KITS, SC_KNDA, SC_KTHI, SC_LANA, SC_LAOO, SC_LATN," + print "\tSC_LEPC, SC_LIMB, SC_LINA, SC_LINB, SC_LISU, SC_LYCI, SC_LYDI, SC_MAHJ," + print "\tSC_MAKA, SC_MAND, SC_MANI, SC_MARC, SC_MEDF, SC_MEND, SC_MERC, SC_MERO," + print "\tSC_MLYM, SC_MODI, SC_MONG, SC_MROO, SC_MTEI, SC_MULT, SC_MYMR, SC_NAGM," + print "\tSC_NAND, SC_NARB, SC_NBAT, SC_NEWA, SC_NKOO, SC_NSHU, SC_OGAM, SC_OLCK," + print "\tSC_ORKH, SC_ORYA, SC_OSGE, SC_OSMA, SC_OUGR, SC_PALM, SC_PAUC, SC_PERM," + print "\tSC_PHAG, SC_PHLI, SC_PHLP, SC_PHNX, SC_PLRD, SC_PRTI, SC_RJNG, SC_ROHG," + print "\tSC_RUNR, SC_SAMR, SC_SARB, SC_SAUR, SC_SGNW, SC_SHAW, SC_SHRD, SC_SIDD," + print "\tSC_SIND, SC_SINH, SC_SOGD, SC_SOGO, SC_SORA, SC_SOYO, SC_SUND, SC_SYLO," + print "\tSC_SYRC, SC_TAGB, SC_TAKR, SC_TALE, SC_TALU, SC_TAML, SC_TANG, SC_TAVT," + print "\tSC_TELU, SC_TFNG, SC_TGLG, SC_THAA, SC_THAI, SC_TIBT, SC_TIRH, SC_TNSA," + print "\tSC_TOTO, SC_UGAR, SC_VAII, SC_VITH, SC_WARA, SC_WCHO, SC_XPEO, SC_XSUX," + print "\tSC_YEZI, SC_YIII, SC_ZANB, SC_ZINH, SC_ZYYY," + print "};" + print "" +} + +/^[A-F0-9]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = $2 +} + +END { + print "static const struct {" + print "\trune lo, hi;" + print "\tstruct uprop_sc_view val;" + print "} lookup[] = {" + + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + for (lo = i; props[lo] == props[i + 1]; i++) + ; + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), _(", lo, i + split(props[i], xs, / /) + for (j in xs) { + printf "SC_%s", toupper(xs[j]) + if (j < length(xs)) + printf ", " + } + printf ")},\n" + } + + print "};" + print "" + print "_MLIB_DEFINE_BSEARCH(struct uprop_sc_view, lookup, ((struct uprop_sc_view){" + print "\t.p = fallback + uprop_get_sc(ch)," + print "\t.n = 1," + print "}))" + print "" + print "const enum uprop_sc *" + print "uprop_get_scx(rune ch, size_t *n)" + print "{" + print "\tstruct uprop_sc_view v = mlib_lookup(ch);" + print "\t*n = v.n;" + print "\treturn v.p;" + print "}" +} +' data/ScriptExtensions diff --git a/include/unicode/prop.h b/include/unicode/prop.h index 1cb6b83..05e16a2 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -1048,6 +1048,7 @@ enum uprop_vo { /* Not a Unicode property; but a nice-to-have */ [[_mlib_pure]] struct u8view uprop_blkname(enum uprop_blk); +[[_mlib_pure]] const enum uprop_sc *uprop_get_scx(rune, size_t *); [[_mlib_pure]] double uprop_get_nv(rune); [[_mlib_pure]] enum uprop_age uprop_get_age(rune); [[_mlib_pure]] enum uprop_bc uprop_get_bc(rune); diff --git a/lib/unicode/prop/uprop_get_scx.c b/lib/unicode/prop/uprop_get_scx.c new file mode 100644 index 0000000..f9aff23 --- /dev/null +++ b/lib/unicode/prop/uprop_get_scx.c @@ -0,0 +1,180 @@ +/* This file is autogenerated by gen/prop/scx; DO NOT EDIT. */ + +#include "_bsearch.h" +#include "macros.h" +#include "rune.h" +#include "unicode/prop.h" + +#define CAST(...) (const enum uprop_sc []){__VA_ARGS__} +#define _(...) {CAST(__VA_ARGS__), lengthof(CAST(__VA_ARGS__))} + +struct uprop_sc_view { + const enum uprop_sc *p; + size_t n; +}; + +static constexpr enum uprop_sc fallback[] = { + SC_ZZZZ, SC_ADLM, SC_AGHB, SC_AHOM, SC_ARAB, SC_ARMI, SC_ARMN, SC_AVST, + SC_BALI, SC_BAMU, SC_BASS, SC_BATK, SC_BENG, SC_BHKS, SC_BOPO, SC_BRAH, + SC_BRAI, SC_BUGI, SC_BUHD, SC_CAKM, SC_CANS, SC_CARI, SC_CHAM, SC_CHER, + SC_CHRS, SC_COPT, SC_CPMN, SC_CPRT, SC_CYRL, SC_DEVA, SC_DIAK, SC_DOGR, + SC_DSRT, SC_DUPL, SC_EGYP, SC_ELBA, SC_ELYM, SC_ETHI, SC_GEOR, SC_GLAG, + SC_GONG, SC_GONM, SC_GOTH, SC_GRAN, SC_GREK, SC_GUJR, SC_GURU, SC_HANG, + SC_HANI, SC_HANO, SC_HATR, SC_HEBR, SC_HIRA, SC_HLUW, SC_HMNG, SC_HMNP, + SC_HRKT, SC_HUNG, SC_ITAL, SC_JAVA, SC_KALI, SC_KANA, SC_KAWI, SC_KHAR, + SC_KHMR, SC_KHOJ, SC_KITS, SC_KNDA, SC_KTHI, SC_LANA, SC_LAOO, SC_LATN, + SC_LEPC, SC_LIMB, SC_LINA, SC_LINB, SC_LISU, SC_LYCI, SC_LYDI, SC_MAHJ, + SC_MAKA, SC_MAND, SC_MANI, SC_MARC, SC_MEDF, SC_MEND, SC_MERC, SC_MERO, + SC_MLYM, SC_MODI, SC_MONG, SC_MROO, SC_MTEI, SC_MULT, SC_MYMR, SC_NAGM, + SC_NAND, SC_NARB, SC_NBAT, SC_NEWA, SC_NKOO, SC_NSHU, SC_OGAM, SC_OLCK, + SC_ORKH, SC_ORYA, SC_OSGE, SC_OSMA, SC_OUGR, SC_PALM, SC_PAUC, SC_PERM, + SC_PHAG, SC_PHLI, SC_PHLP, SC_PHNX, SC_PLRD, SC_PRTI, SC_RJNG, SC_ROHG, + SC_RUNR, SC_SAMR, SC_SARB, SC_SAUR, SC_SGNW, SC_SHAW, SC_SHRD, SC_SIDD, + SC_SIND, SC_SINH, SC_SOGD, SC_SOGO, SC_SORA, SC_SOYO, SC_SUND, SC_SYLO, + SC_SYRC, SC_TAGB, SC_TAKR, SC_TALE, SC_TALU, SC_TAML, SC_TANG, SC_TAVT, + SC_TELU, SC_TFNG, SC_TGLG, SC_THAA, SC_THAI, SC_TIBT, SC_TIRH, SC_TNSA, + SC_TOTO, SC_UGAR, SC_VAII, SC_VITH, SC_WARA, SC_WCHO, SC_XPEO, SC_XSUX, + SC_YEZI, SC_YIII, SC_ZANB, SC_ZINH, SC_ZYYY, +}; + +static const struct { + rune lo, hi; + struct uprop_sc_view val; +} lookup[] = { + {RUNE_C(0x000342), RUNE_C(0x000342), _(SC_GREK)}, + {RUNE_C(0x000345), RUNE_C(0x000345), _(SC_GREK)}, + {RUNE_C(0x000363), RUNE_C(0x00036F), _(SC_LATN)}, + {RUNE_C(0x000483), RUNE_C(0x000483), _(SC_CYRL, SC_PERM)}, + {RUNE_C(0x000484), RUNE_C(0x000484), _(SC_CYRL, SC_GLAG)}, + {RUNE_C(0x000485), RUNE_C(0x000486), _(SC_CYRL, SC_LATN)}, + {RUNE_C(0x000487), RUNE_C(0x000487), _(SC_CYRL, SC_GLAG)}, + {RUNE_C(0x00060C), RUNE_C(0x00060C), _(SC_ARAB, SC_NKOO, SC_ROHG, SC_SYRC, SC_THAA, SC_YEZI)}, + {RUNE_C(0x00061B), RUNE_C(0x00061B), _(SC_ARAB, SC_NKOO, SC_ROHG, SC_SYRC, SC_THAA, SC_YEZI)}, + {RUNE_C(0x00061C), RUNE_C(0x00061C), _(SC_ARAB, SC_SYRC, SC_THAA)}, + {RUNE_C(0x00061F), RUNE_C(0x00061F), _(SC_ADLM, SC_ARAB, SC_NKOO, SC_ROHG, SC_SYRC, SC_THAA, SC_YEZI)}, + {RUNE_C(0x000640), RUNE_C(0x000640), _(SC_ADLM, SC_ARAB, SC_MAND, SC_MANI, SC_OUGR, SC_PHLP, SC_ROHG, SC_SOGD, SC_SYRC)}, + {RUNE_C(0x00064B), RUNE_C(0x000655), _(SC_ARAB, SC_SYRC)}, + {RUNE_C(0x000660), RUNE_C(0x000669), _(SC_ARAB, SC_THAA, SC_YEZI)}, + {RUNE_C(0x000670), RUNE_C(0x000670), _(SC_ARAB, SC_SYRC)}, + {RUNE_C(0x0006D4), RUNE_C(0x0006D4), _(SC_ARAB, SC_ROHG)}, + {RUNE_C(0x000951), RUNE_C(0x000951), _(SC_BENG, SC_DEVA, SC_GRAN, SC_GUJR, SC_GURU, SC_KNDA, SC_LATN, SC_MLYM, SC_ORYA, SC_SHRD, SC_TAML, SC_TELU, SC_TIRH)}, + {RUNE_C(0x000952), RUNE_C(0x000952), _(SC_BENG, SC_DEVA, SC_GRAN, SC_GUJR, SC_GURU, SC_KNDA, SC_LATN, SC_MLYM, SC_ORYA, SC_TAML, SC_TELU, SC_TIRH)}, + {RUNE_C(0x000964), RUNE_C(0x000964), _(SC_BENG, SC_DEVA, SC_DOGR, SC_GONG, SC_GONM, SC_GRAN, SC_GUJR, SC_GURU, SC_KNDA, SC_MAHJ, SC_MLYM, SC_NAND, SC_ORYA, SC_SIND, SC_SINH, SC_SYLO, SC_TAKR, SC_TAML, SC_TELU, SC_TIRH)}, + {RUNE_C(0x000965), RUNE_C(0x000965), _(SC_BENG, SC_DEVA, SC_DOGR, SC_GONG, SC_GONM, SC_GRAN, SC_GUJR, SC_GURU, SC_KNDA, SC_LIMB, SC_MAHJ, SC_MLYM, SC_NAND, SC_ORYA, SC_SIND, SC_SINH, SC_SYLO, SC_TAKR, SC_TAML, SC_TELU, SC_TIRH)}, + {RUNE_C(0x000966), RUNE_C(0x00096F), _(SC_DEVA, SC_DOGR, SC_KTHI, SC_MAHJ)}, + {RUNE_C(0x0009E6), RUNE_C(0x0009EF), _(SC_BENG, SC_CAKM, SC_SYLO)}, + {RUNE_C(0x000A66), RUNE_C(0x000A6F), _(SC_GURU, SC_MULT)}, + {RUNE_C(0x000AE6), RUNE_C(0x000AEF), _(SC_GUJR, SC_KHOJ)}, + {RUNE_C(0x000BE6), RUNE_C(0x000BF3), _(SC_GRAN, SC_TAML)}, + {RUNE_C(0x000CE6), RUNE_C(0x000CEF), _(SC_KNDA, SC_NAND)}, + {RUNE_C(0x001040), RUNE_C(0x001049), _(SC_CAKM, SC_MYMR, SC_TALE)}, + {RUNE_C(0x0010FB), RUNE_C(0x0010FB), _(SC_GEOR, SC_LATN)}, + {RUNE_C(0x001735), RUNE_C(0x001736), _(SC_BUHD, SC_HANO, SC_TAGB, SC_TGLG)}, + {RUNE_C(0x001802), RUNE_C(0x001803), _(SC_MONG, SC_PHAG)}, + {RUNE_C(0x001805), RUNE_C(0x001805), _(SC_MONG, SC_PHAG)}, + {RUNE_C(0x001CD0), RUNE_C(0x001CD0), _(SC_BENG, SC_DEVA, SC_GRAN, SC_KNDA)}, + {RUNE_C(0x001CD1), RUNE_C(0x001CD1), _(SC_DEVA)}, + {RUNE_C(0x001CD2), RUNE_C(0x001CD2), _(SC_BENG, SC_DEVA, SC_GRAN, SC_KNDA)}, + {RUNE_C(0x001CD3), RUNE_C(0x001CD3), _(SC_DEVA, SC_GRAN)}, + {RUNE_C(0x001CD4), RUNE_C(0x001CD4), _(SC_DEVA)}, + {RUNE_C(0x001CD5), RUNE_C(0x001CD6), _(SC_BENG, SC_DEVA)}, + {RUNE_C(0x001CD7), RUNE_C(0x001CD7), _(SC_DEVA, SC_SHRD)}, + {RUNE_C(0x001CD8), RUNE_C(0x001CD8), _(SC_BENG, SC_DEVA)}, + {RUNE_C(0x001CD9), RUNE_C(0x001CD9), _(SC_DEVA, SC_SHRD)}, + {RUNE_C(0x001CDA), RUNE_C(0x001CDA), _(SC_DEVA, SC_KNDA, SC_MLYM, SC_ORYA, SC_TAML, SC_TELU)}, + {RUNE_C(0x001CDB), RUNE_C(0x001CDB), _(SC_DEVA)}, + {RUNE_C(0x001CDC), RUNE_C(0x001CDD), _(SC_DEVA, SC_SHRD)}, + {RUNE_C(0x001CDE), RUNE_C(0x001CDF), _(SC_DEVA)}, + {RUNE_C(0x001CE0), RUNE_C(0x001CE0), _(SC_DEVA, SC_SHRD)}, + {RUNE_C(0x001CE1), RUNE_C(0x001CE1), _(SC_BENG, SC_DEVA)}, + {RUNE_C(0x001CE2), RUNE_C(0x001CE8), _(SC_DEVA)}, + {RUNE_C(0x001CE9), RUNE_C(0x001CE9), _(SC_DEVA, SC_NAND)}, + {RUNE_C(0x001CEA), RUNE_C(0x001CEA), _(SC_BENG, SC_DEVA)}, + {RUNE_C(0x001CEB), RUNE_C(0x001CEC), _(SC_DEVA)}, + {RUNE_C(0x001CED), RUNE_C(0x001CED), _(SC_BENG, SC_DEVA)}, + {RUNE_C(0x001CEE), RUNE_C(0x001CF1), _(SC_DEVA)}, + {RUNE_C(0x001CF2), RUNE_C(0x001CF2), _(SC_BENG, SC_DEVA, SC_GRAN, SC_KNDA, SC_MLYM, SC_NAND, SC_ORYA, SC_SINH, SC_TELU, SC_TIRH)}, + {RUNE_C(0x001CF3), RUNE_C(0x001CF3), _(SC_DEVA, SC_GRAN)}, + {RUNE_C(0x001CF4), RUNE_C(0x001CF4), _(SC_DEVA, SC_GRAN, SC_KNDA)}, + {RUNE_C(0x001CF5), RUNE_C(0x001CF6), _(SC_BENG, SC_DEVA)}, + {RUNE_C(0x001CF7), RUNE_C(0x001CF7), _(SC_BENG)}, + {RUNE_C(0x001CF8), RUNE_C(0x001CF9), _(SC_DEVA, SC_GRAN)}, + {RUNE_C(0x001CFA), RUNE_C(0x001CFA), _(SC_NAND)}, + {RUNE_C(0x001DC0), RUNE_C(0x001DC1), _(SC_GREK)}, + {RUNE_C(0x001DF8), RUNE_C(0x001DF8), _(SC_CYRL, SC_SYRC)}, + {RUNE_C(0x001DFA), RUNE_C(0x001DFA), _(SC_SYRC)}, + {RUNE_C(0x00202F), RUNE_C(0x00202F), _(SC_LATN, SC_MONG)}, + {RUNE_C(0x0020F0), RUNE_C(0x0020F0), _(SC_DEVA, SC_GRAN, SC_LATN)}, + {RUNE_C(0x002E43), RUNE_C(0x002E43), _(SC_CYRL, SC_GLAG)}, + {RUNE_C(0x003001), RUNE_C(0x003002), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)}, + {RUNE_C(0x003003), RUNE_C(0x003003), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)}, + {RUNE_C(0x003006), RUNE_C(0x003006), _(SC_HANI)}, + {RUNE_C(0x003008), RUNE_C(0x003011), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)}, + {RUNE_C(0x003013), RUNE_C(0x003013), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)}, + {RUNE_C(0x003014), RUNE_C(0x00301B), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)}, + {RUNE_C(0x00301C), RUNE_C(0x00301F), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)}, + {RUNE_C(0x00302A), RUNE_C(0x00302D), _(SC_BOPO, SC_HANI)}, + {RUNE_C(0x003030), RUNE_C(0x003030), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)}, + {RUNE_C(0x003031), RUNE_C(0x003035), _(SC_HIRA, SC_KANA)}, + {RUNE_C(0x003037), RUNE_C(0x003037), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)}, + {RUNE_C(0x00303C), RUNE_C(0x00303D), _(SC_HANI, SC_HIRA, SC_KANA)}, + {RUNE_C(0x00303E), RUNE_C(0x00303F), _(SC_HANI)}, + {RUNE_C(0x003099), RUNE_C(0x00309C), _(SC_HIRA, SC_KANA)}, + {RUNE_C(0x0030A0), RUNE_C(0x0030A0), _(SC_HIRA, SC_KANA)}, + {RUNE_C(0x0030FB), RUNE_C(0x0030FB), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)}, + {RUNE_C(0x0030FC), RUNE_C(0x0030FC), _(SC_HIRA, SC_KANA)}, + {RUNE_C(0x003190), RUNE_C(0x00319F), _(SC_HANI)}, + {RUNE_C(0x0031C0), RUNE_C(0x0031E3), _(SC_HANI)}, + {RUNE_C(0x003220), RUNE_C(0x003247), _(SC_HANI)}, + {RUNE_C(0x003280), RUNE_C(0x0032B0), _(SC_HANI)}, + {RUNE_C(0x0032C0), RUNE_C(0x0032CB), _(SC_HANI)}, + {RUNE_C(0x0032FF), RUNE_C(0x0032FF), _(SC_HANI)}, + {RUNE_C(0x003358), RUNE_C(0x003370), _(SC_HANI)}, + {RUNE_C(0x00337B), RUNE_C(0x00337F), _(SC_HANI)}, + {RUNE_C(0x0033E0), RUNE_C(0x0033FE), _(SC_HANI)}, + {RUNE_C(0x00A66F), RUNE_C(0x00A66F), _(SC_CYRL, SC_GLAG)}, + {RUNE_C(0x00A700), RUNE_C(0x00A707), _(SC_HANI, SC_LATN)}, + {RUNE_C(0x00A830), RUNE_C(0x00A832), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KNDA, SC_KTHI, SC_MAHJ, SC_MLYM, SC_MODI, SC_NAND, SC_SHRD, SC_SIND, SC_TAKR, SC_TIRH)}, + {RUNE_C(0x00A833), RUNE_C(0x00A835), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KNDA, SC_KTHI, SC_MAHJ, SC_MODI, SC_NAND, SC_SHRD, SC_SIND, SC_TAKR, SC_TIRH)}, + {RUNE_C(0x00A836), RUNE_C(0x00A837), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KTHI, SC_MAHJ, SC_MODI, SC_SIND, SC_TAKR, SC_TIRH)}, + {RUNE_C(0x00A838), RUNE_C(0x00A838), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KTHI, SC_MAHJ, SC_MODI, SC_SHRD, SC_SIND, SC_TAKR, SC_TIRH)}, + {RUNE_C(0x00A839), RUNE_C(0x00A839), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KTHI, SC_MAHJ, SC_MODI, SC_SIND, SC_TAKR, SC_TIRH)}, + {RUNE_C(0x00A8F1), RUNE_C(0x00A8F1), _(SC_BENG, SC_DEVA)}, + {RUNE_C(0x00A8F3), RUNE_C(0x00A8F3), _(SC_DEVA, SC_TAML)}, + {RUNE_C(0x00A92E), RUNE_C(0x00A92E), _(SC_KALI, SC_LATN, SC_MYMR)}, + {RUNE_C(0x00A9CF), RUNE_C(0x00A9CF), _(SC_BUGI, SC_JAVA)}, + {RUNE_C(0x00FD3E), RUNE_C(0x00FD3F), _(SC_ARAB, SC_NKOO)}, + {RUNE_C(0x00FDF2), RUNE_C(0x00FDF2), _(SC_ARAB, SC_THAA)}, + {RUNE_C(0x00FDFD), RUNE_C(0x00FDFD), _(SC_ARAB, SC_THAA)}, + {RUNE_C(0x00FE45), RUNE_C(0x00FE46), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)}, + {RUNE_C(0x00FF61), RUNE_C(0x00FF65), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)}, + {RUNE_C(0x00FF70), RUNE_C(0x00FF70), _(SC_HIRA, SC_KANA)}, + {RUNE_C(0x00FF9E), RUNE_C(0x00FF9F), _(SC_HIRA, SC_KANA)}, + {RUNE_C(0x010100), RUNE_C(0x010101), _(SC_CPMN, SC_CPRT, SC_LINB)}, + {RUNE_C(0x010102), RUNE_C(0x010102), _(SC_CPRT, SC_LINB)}, + {RUNE_C(0x010107), RUNE_C(0x010133), _(SC_CPRT, SC_LINA, SC_LINB)}, + {RUNE_C(0x010137), RUNE_C(0x01013F), _(SC_CPRT, SC_LINB)}, + {RUNE_C(0x0102E0), RUNE_C(0x0102FB), _(SC_ARAB, SC_COPT)}, + {RUNE_C(0x010AF2), RUNE_C(0x010AF2), _(SC_MANI, SC_OUGR)}, + {RUNE_C(0x011301), RUNE_C(0x011301), _(SC_GRAN, SC_TAML)}, + {RUNE_C(0x011303), RUNE_C(0x011303), _(SC_GRAN, SC_TAML)}, + {RUNE_C(0x01133B), RUNE_C(0x01133C), _(SC_GRAN, SC_TAML)}, + {RUNE_C(0x011FD0), RUNE_C(0x011FD1), _(SC_GRAN, SC_TAML)}, + {RUNE_C(0x011FD3), RUNE_C(0x011FD3), _(SC_GRAN, SC_TAML)}, + {RUNE_C(0x01BCA0), RUNE_C(0x01BCA3), _(SC_DUPL)}, + {RUNE_C(0x01D360), RUNE_C(0x01D371), _(SC_HANI)}, + {RUNE_C(0x01F250), RUNE_C(0x01F251), _(SC_HANI)}, +}; + +_MLIB_DEFINE_BSEARCH(struct uprop_sc_view, lookup, ((struct uprop_sc_view){ + .p = fallback + uprop_get_sc(ch), + .n = 1, +})) + +const enum uprop_sc * +uprop_get_scx(rune ch, size_t *n) +{ + struct uprop_sc_view v = mlib_lookup(ch); + *n = v.n; + return v.p; +} |