aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--data/ScriptExtensions635
-rwxr-xr-xgen/data-files1
-rwxr-xr-xgen/prop/scx97
-rw-r--r--include/unicode/prop.h1
-rw-r--r--lib/unicode/prop/uprop_get_scx.c180
5 files changed, 914 insertions, 0 deletions
diff --git a/data/ScriptExtensions b/data/ScriptExtensions
new file mode 100644
index 0000000..23141fb
--- /dev/null
+++ b/data/ScriptExtensions
@@ -0,0 +1,635 @@
+# ScriptExtensions-15.1.0.txt
+# Date: 2023-02-01, 23:02:24 GMT
+# © 2023 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see https://www.unicode.org/reports/tr44/
+#
+# The Script_Extensions property indicates which characters are commonly used
+# with more than one script, but with a limited number of scripts.
+# For each code point, there is one or more property values. Each such value is a Script property value.
+# For more information, see:
+# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
+# Especially the sections:
+# https://www.unicode.org/reports/tr24/#Assignment_Script_Values
+# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
+#
+# Each Script_Extensions value in this file consists of a set
+# of one or more abbreviated Script property values. The ordering of the
+# values in that set is not material, but for stability in presentation
+# it is given here as alphabetical.
+#
+# The Script_Extensions values are presented in sorted order in the file.
+# They are sorted first by the number of Script property values in their sets,
+# and then alphabetically by first differing Script property value.
+#
+# Following each distinct Script_Extensions value is the list of code
+# points associated with that value, listed in code point order.
+#
+# All code points not explicitly listed for Script_Extensions
+# have as their value the corresponding Script property value
+#
+# @missing: 0000..10FFFF; <script>
+
+# ================================================
+
+# Property: Script_Extensions
+
+# ================================================
+
+# Script_Extensions=Beng
+
+1CF7 ; Beng # Mc VEDIC SIGN ATIKRAMA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva
+
+1CD1 ; Deva # Mn VEDIC TONE SHARA
+1CD4 ; Deva # Mn VEDIC SIGN YAJURVEDIC MIDLINE SVARITA
+1CDB ; Deva # Mn VEDIC TONE TRIPLE SVARITA
+1CDE..1CDF ; Deva # Mn [2] VEDIC TONE TWO DOTS BELOW..VEDIC TONE THREE DOTS BELOW
+1CE2..1CE8 ; Deva # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
+1CEB..1CEC ; Deva # Lo [2] VEDIC SIGN ANUSVARA VAMAGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
+1CEE..1CF1 ; Deva # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
+
+# Total code points: 18
+
+# ================================================
+
+# Script_Extensions=Dupl
+
+1BCA0..1BCA3 ; Dupl # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Grek
+
+0342 ; Grek # Mn COMBINING GREEK PERISPOMENI
+0345 ; Grek # Mn COMBINING GREEK YPOGEGRAMMENI
+1DC0..1DC1 ; Grek # Mn [2] COMBINING DOTTED GRAVE ACCENT..COMBINING DOTTED ACUTE ACCENT
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Hani
+
+3006 ; Hani # Lo IDEOGRAPHIC CLOSING MARK
+303E..303F ; Hani # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
+3190..3191 ; Hani # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
+3192..3195 ; Hani # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
+3196..319F ; Hani # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
+31C0..31E3 ; Hani # So [36] CJK STROKE T..CJK STROKE Q
+3220..3229 ; Hani # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
+322A..3247 ; Hani # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
+3280..3289 ; Hani # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
+328A..32B0 ; Hani # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
+32C0..32CB ; Hani # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER
+32FF ; Hani # So SQUARE ERA NAME REIWA
+3358..3370 ; Hani # So [25] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR
+337B..337F ; Hani # So [5] SQUARE ERA NAME HEISEI..SQUARE CORPORATION
+33E0..33FE ; Hani # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE
+1D360..1D371 ; Hani # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
+1F250..1F251 ; Hani # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
+
+# Total code points: 238
+
+# ================================================
+
+# Script_Extensions=Latn
+
+0363..036F ; Latn # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X
+
+# Total code points: 13
+
+# ================================================
+
+# Script_Extensions=Nand
+
+1CFA ; Nand # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Syrc
+
+1DFA ; Syrc # Mn COMBINING DOT BELOW LEFT
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Arab Copt
+
+102E0 ; Arab Copt # Mn COPTIC EPACT THOUSANDS MARK
+102E1..102FB ; Arab Copt # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
+
+# Total code points: 28
+
+# ================================================
+
+# Script_Extensions=Arab Nkoo
+
+FD3E ; Arab Nkoo # Pe ORNATE LEFT PARENTHESIS
+FD3F ; Arab Nkoo # Ps ORNATE RIGHT PARENTHESIS
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Arab Rohg
+
+06D4 ; Arab Rohg # Po ARABIC FULL STOP
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Arab Syrc
+
+064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
+0670 ; Arab Syrc # Mn ARABIC LETTER SUPERSCRIPT ALEF
+
+# Total code points: 12
+
+# ================================================
+
+# Script_Extensions=Arab Thaa
+
+FDF2 ; Arab Thaa # Lo ARABIC LIGATURE ALLAH ISOLATED FORM
+FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Beng Deva
+
+1CD5..1CD6 ; Beng Deva # Mn [2] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC INDEPENDENT SVARITA
+1CD8 ; Beng Deva # Mn VEDIC TONE CANDRA BELOW
+1CE1 ; Beng Deva # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
+1CEA ; Beng Deva # Lo VEDIC SIGN ANUSVARA BAHIRGOMUKHA
+1CED ; Beng Deva # Mn VEDIC SIGN TIRYAK
+1CF5..1CF6 ; Beng Deva # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
+A8F1 ; Beng Deva # Mn COMBINING DEVANAGARI SIGN AVAGRAHA
+
+# Total code points: 9
+
+# ================================================
+
+# Script_Extensions=Bopo Hani
+
+302A..302D ; Bopo Hani # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Bugi Java
+
+A9CF ; Bugi Java # Lm JAVANESE PANGRANGKEP
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Cprt Linb
+
+10102 ; Cprt Linb # Po AEGEAN CHECK MARK
+10137..1013F ; Cprt Linb # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Cyrl Glag
+
+0484 ; Cyrl Glag # Mn COMBINING CYRILLIC PALATALIZATION
+0487 ; Cyrl Glag # Mn COMBINING CYRILLIC POKRYTIE
+2E43 ; Cyrl Glag # Po DASH WITH LEFT UPTURN
+A66F ; Cyrl Glag # Mn COMBINING CYRILLIC VZMET
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Cyrl Latn
+
+0485..0486 ; Cyrl Latn # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Cyrl Perm
+
+0483 ; Cyrl Perm # Mn COMBINING CYRILLIC TITLO
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Cyrl Syrc
+
+1DF8 ; Cyrl Syrc # Mn COMBINING DOT ABOVE LEFT
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Gran
+
+1CD3 ; Deva Gran # Po VEDIC SIGN NIHSHVASA
+1CF3 ; Deva Gran # Lo VEDIC SIGN ROTATED ARDHAVISARGA
+1CF8..1CF9 ; Deva Gran # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
+
+# Total code points: 4
+
+# ================================================
+
+# Script_Extensions=Deva Nand
+
+1CE9 ; Deva Nand # Lo VEDIC SIGN ANUSVARA ANTARGOMUKHA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Shrd
+
+1CD7 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA
+1CD9 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER
+1CDC..1CDD ; Deva Shrd # Mn [2] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE DOT BELOW
+1CE0 ; Deva Shrd # Mn VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
+
+# Total code points: 5
+
+# ================================================
+
+# Script_Extensions=Deva Taml
+
+A8F3 ; Deva Taml # Lo DEVANAGARI SIGN CANDRABINDU VIRAMA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Geor Latn
+
+10FB ; Geor Latn # Po GEORGIAN PARAGRAPH SEPARATOR
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Gran Taml
+
+0BE6..0BEF ; Gran Taml # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
+0BF0..0BF2 ; Gran Taml # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
+0BF3 ; Gran Taml # So TAMIL DAY SIGN
+11301 ; Gran Taml # Mn GRANTHA SIGN CANDRABINDU
+11303 ; Gran Taml # Mc GRANTHA SIGN VISARGA
+1133B..1133C ; Gran Taml # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
+11FD0..11FD1 ; Gran Taml # No [2] TAMIL FRACTION ONE QUARTER..TAMIL FRACTION ONE HALF-1
+11FD3 ; Gran Taml # No TAMIL FRACTION THREE QUARTERS
+
+# Total code points: 21
+
+# ================================================
+
+# Script_Extensions=Gujr Khoj
+
+0AE6..0AEF ; Gujr Khoj # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Guru Mult
+
+0A66..0A6F ; Guru Mult # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Hani Latn
+
+A700..A707 ; Hani Latn # Sk [8] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER CHINESE TONE YANG RU
+
+# Total code points: 8
+
+# ================================================
+
+# Script_Extensions=Hira Kana
+
+3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
+3099..309A ; Hira Kana # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+309B..309C ; Hira Kana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+30A0 ; Hira Kana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
+30FC ; Hira Kana # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK
+FF70 ; Hira Kana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+
+# Total code points: 14
+
+# ================================================
+
+# Script_Extensions=Knda Nand
+
+0CE6..0CEF ; Knda Nand # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Latn Mong
+
+202F ; Latn Mong # Zs NARROW NO-BREAK SPACE
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Mani Ougr
+
+10AF2 ; Mani Ougr # Po MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Mong Phag
+
+1802..1803 ; Mong Phag # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
+1805 ; Mong Phag # Po MONGOLIAN FOUR DOTS
+
+# Total code points: 3
+
+# ================================================
+
+# Script_Extensions=Arab Syrc Thaa
+
+061C ; Arab Syrc Thaa # Cf ARABIC LETTER MARK
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Arab Thaa Yezi
+
+0660..0669 ; Arab Thaa Yezi # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Beng Cakm Sylo
+
+09E6..09EF ; Beng Cakm Sylo # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Cakm Mymr Tale
+
+1040..1049 ; Cakm Mymr Tale # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Cpmn Cprt Linb
+
+10100..10101 ; Cpmn Cprt Linb # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Cprt Lina Linb
+
+10107..10133 ; Cprt Lina Linb # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
+
+# Total code points: 45
+
+# ================================================
+
+# Script_Extensions=Deva Gran Knda
+
+1CF4 ; Deva Gran Knda # Mn VEDIC TONE CANDRA ABOVE
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Gran Latn
+
+20F0 ; Deva Gran Latn # Mn COMBINING ASTERISK ABOVE
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Hani Hira Kana
+
+303C ; Hani Hira Kana # Lo MASU MARK
+303D ; Hani Hira Kana # Po PART ALTERNATION MARK
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Kali Latn Mymr
+
+A92E ; Kali Latn Mymr # Po KAYAH LI SIGN CWI
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Beng Deva Gran Knda
+
+1CD0 ; Beng Deva Gran Knda # Mn VEDIC TONE KARSHANA
+1CD2 ; Beng Deva Gran Knda # Mn VEDIC TONE PRENKHA
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Buhd Hano Tagb Tglg
+
+1735..1736 ; Buhd Hano Tagb Tglg # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Deva Dogr Kthi Mahj
+
+0966..096F ; Deva Dogr Kthi Mahj # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Bopo Hang Hani Hira Kana
+
+3003 ; Bopo Hang Hani Hira Kana # Po DITTO MARK
+3013 ; Bopo Hang Hani Hira Kana # So GETA MARK
+301C ; Bopo Hang Hani Hira Kana # Pd WAVE DASH
+301D ; Bopo Hang Hani Hira Kana # Ps REVERSED DOUBLE PRIME QUOTATION MARK
+301E..301F ; Bopo Hang Hani Hira Kana # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
+3030 ; Bopo Hang Hani Hira Kana # Pd WAVY DASH
+3037 ; Bopo Hang Hani Hira Kana # So IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
+FE45..FE46 ; Bopo Hang Hani Hira Kana # Po [2] SESAME DOT..WHITE SESAME DOT
+
+# Total code points: 10
+
+# ================================================
+
+# Script_Extensions=Arab Nkoo Rohg Syrc Thaa Yezi
+
+060C ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC COMMA
+061B ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC SEMICOLON
+
+# Total code points: 2
+
+# ================================================
+
+# Script_Extensions=Bopo Hang Hani Hira Kana Yiii
+
+3001..3002 ; Bopo Hang Hani Hira Kana Yiii # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
+3008 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT ANGLE BRACKET
+3009 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT ANGLE BRACKET
+300A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT DOUBLE ANGLE BRACKET
+300B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT DOUBLE ANGLE BRACKET
+300C ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT CORNER BRACKET
+300D ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT CORNER BRACKET
+300E ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE CORNER BRACKET
+300F ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE CORNER BRACKET
+3010 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT BLACK LENTICULAR BRACKET
+3011 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT BLACK LENTICULAR BRACKET
+3014 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT TORTOISE SHELL BRACKET
+3015 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT TORTOISE SHELL BRACKET
+3016 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE LENTICULAR BRACKET
+3017 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE LENTICULAR BRACKET
+3018 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE TORTOISE SHELL BRACKET
+3019 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE TORTOISE SHELL BRACKET
+301A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE SQUARE BRACKET
+301B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE SQUARE BRACKET
+30FB ; Bopo Hang Hani Hira Kana Yiii # Po KATAKANA MIDDLE DOT
+FF61 ; Bopo Hang Hani Hira Kana Yiii # Po HALFWIDTH IDEOGRAPHIC FULL STOP
+FF62 ; Bopo Hang Hani Hira Kana Yiii # Ps HALFWIDTH LEFT CORNER BRACKET
+FF63 ; Bopo Hang Hani Hira Kana Yiii # Pe HALFWIDTH RIGHT CORNER BRACKET
+FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
+
+# Total code points: 26
+
+# ================================================
+
+# Script_Extensions=Deva Knda Mlym Orya Taml Telu
+
+1CDA ; Deva Knda Mlym Orya Taml Telu # Mn VEDIC TONE DOUBLE SVARITA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Adlm Arab Nkoo Rohg Syrc Thaa Yezi
+
+061F ; Adlm Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC QUESTION MARK
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc
+
+0640 ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm ARABIC TATWEEL
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Beng Deva Gran Knda Mlym Nand Orya Sinh Telu Tirh
+
+1CF2 ; Beng Deva Gran Knda Mlym Nand Orya Sinh Telu Tirh # Lo VEDIC SIGN ARDHAVISARGA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh
+
+A836..A837 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
+A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So NORTH INDIC QUANTITY MARK
+
+# Total code points: 3
+
+# ================================================
+
+# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh
+
+0952 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN ANUDATTA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Shrd Sind Takr Tirh
+
+A838 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Shrd Sind Takr Tirh # Sc NORTH INDIC RUPEE MARK
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh
+
+0951 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN UDATTA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Shrd Sind Takr Tirh
+
+A833..A835 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Shrd Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS
+
+# Total code points: 3
+
+# ================================================
+
+# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Shrd Sind Takr Tirh
+
+A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Shrd Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS
+
+# Total code points: 3
+
+# ================================================
+
+# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh
+
+0964 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DANDA
+
+# Total code points: 1
+
+# ================================================
+
+# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh
+
+0965 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DOUBLE DANDA
+
+# Total code points: 1
+
+# EOF
diff --git a/gen/data-files b/gen/data-files
index 9da5452..00b5f1c 100755
--- a/gen/data-files
+++ b/gen/data-files
@@ -30,6 +30,7 @@ readonly PATHS='
IndicPositionalCategory
IndicSyllabicCategory
PropList
+ ScriptExtensions
Scripts
SpecialCasing
UnicodeData
diff --git a/gen/prop/scx b/gen/prop/scx
new file mode 100755
index 0000000..ec5b03f
--- /dev/null
+++ b/gen/prop/scx
@@ -0,0 +1,97 @@
+#!/bin/sh
+
+set -e
+cd "${0%/*}/../.."
+exec >lib/unicode/prop/uprop_get_scx.c
+
+gawk '
+BEGIN {
+ FS = " *(; *|#.*)"
+
+ print "/* This file is autogenerated by gen/prop/scx; DO NOT EDIT. */"
+ print ""
+ print "#include \"_bsearch.h\""
+ print "#include \"macros.h\""
+ print "#include \"rune.h\""
+ print "#include \"unicode/prop.h\""
+ print ""
+ print "#define CAST(...) (const enum uprop_sc []){__VA_ARGS__}"
+ print "#define _(...) {CAST(__VA_ARGS__), lengthof(CAST(__VA_ARGS__))}"
+ print ""
+ print "struct uprop_sc_view {"
+ print "\tconst enum uprop_sc *p;"
+ print "\tsize_t n;"
+ print "};"
+ print ""
+ print "static constexpr enum uprop_sc fallback[] = {"
+ print "\tSC_ZZZZ, SC_ADLM, SC_AGHB, SC_AHOM, SC_ARAB, SC_ARMI, SC_ARMN, SC_AVST,"
+ print "\tSC_BALI, SC_BAMU, SC_BASS, SC_BATK, SC_BENG, SC_BHKS, SC_BOPO, SC_BRAH,"
+ print "\tSC_BRAI, SC_BUGI, SC_BUHD, SC_CAKM, SC_CANS, SC_CARI, SC_CHAM, SC_CHER,"
+ print "\tSC_CHRS, SC_COPT, SC_CPMN, SC_CPRT, SC_CYRL, SC_DEVA, SC_DIAK, SC_DOGR,"
+ print "\tSC_DSRT, SC_DUPL, SC_EGYP, SC_ELBA, SC_ELYM, SC_ETHI, SC_GEOR, SC_GLAG,"
+ print "\tSC_GONG, SC_GONM, SC_GOTH, SC_GRAN, SC_GREK, SC_GUJR, SC_GURU, SC_HANG,"
+ print "\tSC_HANI, SC_HANO, SC_HATR, SC_HEBR, SC_HIRA, SC_HLUW, SC_HMNG, SC_HMNP,"
+ print "\tSC_HRKT, SC_HUNG, SC_ITAL, SC_JAVA, SC_KALI, SC_KANA, SC_KAWI, SC_KHAR,"
+ print "\tSC_KHMR, SC_KHOJ, SC_KITS, SC_KNDA, SC_KTHI, SC_LANA, SC_LAOO, SC_LATN,"
+ print "\tSC_LEPC, SC_LIMB, SC_LINA, SC_LINB, SC_LISU, SC_LYCI, SC_LYDI, SC_MAHJ,"
+ print "\tSC_MAKA, SC_MAND, SC_MANI, SC_MARC, SC_MEDF, SC_MEND, SC_MERC, SC_MERO,"
+ print "\tSC_MLYM, SC_MODI, SC_MONG, SC_MROO, SC_MTEI, SC_MULT, SC_MYMR, SC_NAGM,"
+ print "\tSC_NAND, SC_NARB, SC_NBAT, SC_NEWA, SC_NKOO, SC_NSHU, SC_OGAM, SC_OLCK,"
+ print "\tSC_ORKH, SC_ORYA, SC_OSGE, SC_OSMA, SC_OUGR, SC_PALM, SC_PAUC, SC_PERM,"
+ print "\tSC_PHAG, SC_PHLI, SC_PHLP, SC_PHNX, SC_PLRD, SC_PRTI, SC_RJNG, SC_ROHG,"
+ print "\tSC_RUNR, SC_SAMR, SC_SARB, SC_SAUR, SC_SGNW, SC_SHAW, SC_SHRD, SC_SIDD,"
+ print "\tSC_SIND, SC_SINH, SC_SOGD, SC_SOGO, SC_SORA, SC_SOYO, SC_SUND, SC_SYLO,"
+ print "\tSC_SYRC, SC_TAGB, SC_TAKR, SC_TALE, SC_TALU, SC_TAML, SC_TANG, SC_TAVT,"
+ print "\tSC_TELU, SC_TFNG, SC_TGLG, SC_THAA, SC_THAI, SC_TIBT, SC_TIRH, SC_TNSA,"
+ print "\tSC_TOTO, SC_UGAR, SC_VAII, SC_VITH, SC_WARA, SC_WCHO, SC_XPEO, SC_XSUX,"
+ print "\tSC_YEZI, SC_YIII, SC_ZANB, SC_ZINH, SC_ZYYY,"
+ print "};"
+ print ""
+}
+
+/^[A-F0-9]/ {
+ n = split($1, a, /\.\./)
+ lo = strtonum("0X" a[1])
+ hi = strtonum("0X" a[n])
+
+ for (i = lo; i <= hi; i++)
+ props[i] = $2
+}
+
+END {
+ print "static const struct {"
+ print "\trune lo, hi;"
+ print "\tstruct uprop_sc_view val;"
+ print "} lookup[] = {"
+
+ for (i = 0; i <= 0x10FFFF; i++) {
+ if (!props[i])
+ continue
+ for (lo = i; props[lo] == props[i + 1]; i++)
+ ;
+ printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), _(", lo, i
+ split(props[i], xs, / /)
+ for (j in xs) {
+ printf "SC_%s", toupper(xs[j])
+ if (j < length(xs))
+ printf ", "
+ }
+ printf ")},\n"
+ }
+
+ print "};"
+ print ""
+ print "_MLIB_DEFINE_BSEARCH(struct uprop_sc_view, lookup, ((struct uprop_sc_view){"
+ print "\t.p = fallback + uprop_get_sc(ch),"
+ print "\t.n = 1,"
+ print "}))"
+ print ""
+ print "const enum uprop_sc *"
+ print "uprop_get_scx(rune ch, size_t *n)"
+ print "{"
+ print "\tstruct uprop_sc_view v = mlib_lookup(ch);"
+ print "\t*n = v.n;"
+ print "\treturn v.p;"
+ print "}"
+}
+' data/ScriptExtensions
diff --git a/include/unicode/prop.h b/include/unicode/prop.h
index 1cb6b83..05e16a2 100644
--- a/include/unicode/prop.h
+++ b/include/unicode/prop.h
@@ -1048,6 +1048,7 @@ enum uprop_vo {
/* Not a Unicode property; but a nice-to-have */
[[_mlib_pure]] struct u8view uprop_blkname(enum uprop_blk);
+[[_mlib_pure]] const enum uprop_sc *uprop_get_scx(rune, size_t *);
[[_mlib_pure]] double uprop_get_nv(rune);
[[_mlib_pure]] enum uprop_age uprop_get_age(rune);
[[_mlib_pure]] enum uprop_bc uprop_get_bc(rune);
diff --git a/lib/unicode/prop/uprop_get_scx.c b/lib/unicode/prop/uprop_get_scx.c
new file mode 100644
index 0000000..f9aff23
--- /dev/null
+++ b/lib/unicode/prop/uprop_get_scx.c
@@ -0,0 +1,180 @@
+/* This file is autogenerated by gen/prop/scx; DO NOT EDIT. */
+
+#include "_bsearch.h"
+#include "macros.h"
+#include "rune.h"
+#include "unicode/prop.h"
+
+#define CAST(...) (const enum uprop_sc []){__VA_ARGS__}
+#define _(...) {CAST(__VA_ARGS__), lengthof(CAST(__VA_ARGS__))}
+
+struct uprop_sc_view {
+ const enum uprop_sc *p;
+ size_t n;
+};
+
+static constexpr enum uprop_sc fallback[] = {
+ SC_ZZZZ, SC_ADLM, SC_AGHB, SC_AHOM, SC_ARAB, SC_ARMI, SC_ARMN, SC_AVST,
+ SC_BALI, SC_BAMU, SC_BASS, SC_BATK, SC_BENG, SC_BHKS, SC_BOPO, SC_BRAH,
+ SC_BRAI, SC_BUGI, SC_BUHD, SC_CAKM, SC_CANS, SC_CARI, SC_CHAM, SC_CHER,
+ SC_CHRS, SC_COPT, SC_CPMN, SC_CPRT, SC_CYRL, SC_DEVA, SC_DIAK, SC_DOGR,
+ SC_DSRT, SC_DUPL, SC_EGYP, SC_ELBA, SC_ELYM, SC_ETHI, SC_GEOR, SC_GLAG,
+ SC_GONG, SC_GONM, SC_GOTH, SC_GRAN, SC_GREK, SC_GUJR, SC_GURU, SC_HANG,
+ SC_HANI, SC_HANO, SC_HATR, SC_HEBR, SC_HIRA, SC_HLUW, SC_HMNG, SC_HMNP,
+ SC_HRKT, SC_HUNG, SC_ITAL, SC_JAVA, SC_KALI, SC_KANA, SC_KAWI, SC_KHAR,
+ SC_KHMR, SC_KHOJ, SC_KITS, SC_KNDA, SC_KTHI, SC_LANA, SC_LAOO, SC_LATN,
+ SC_LEPC, SC_LIMB, SC_LINA, SC_LINB, SC_LISU, SC_LYCI, SC_LYDI, SC_MAHJ,
+ SC_MAKA, SC_MAND, SC_MANI, SC_MARC, SC_MEDF, SC_MEND, SC_MERC, SC_MERO,
+ SC_MLYM, SC_MODI, SC_MONG, SC_MROO, SC_MTEI, SC_MULT, SC_MYMR, SC_NAGM,
+ SC_NAND, SC_NARB, SC_NBAT, SC_NEWA, SC_NKOO, SC_NSHU, SC_OGAM, SC_OLCK,
+ SC_ORKH, SC_ORYA, SC_OSGE, SC_OSMA, SC_OUGR, SC_PALM, SC_PAUC, SC_PERM,
+ SC_PHAG, SC_PHLI, SC_PHLP, SC_PHNX, SC_PLRD, SC_PRTI, SC_RJNG, SC_ROHG,
+ SC_RUNR, SC_SAMR, SC_SARB, SC_SAUR, SC_SGNW, SC_SHAW, SC_SHRD, SC_SIDD,
+ SC_SIND, SC_SINH, SC_SOGD, SC_SOGO, SC_SORA, SC_SOYO, SC_SUND, SC_SYLO,
+ SC_SYRC, SC_TAGB, SC_TAKR, SC_TALE, SC_TALU, SC_TAML, SC_TANG, SC_TAVT,
+ SC_TELU, SC_TFNG, SC_TGLG, SC_THAA, SC_THAI, SC_TIBT, SC_TIRH, SC_TNSA,
+ SC_TOTO, SC_UGAR, SC_VAII, SC_VITH, SC_WARA, SC_WCHO, SC_XPEO, SC_XSUX,
+ SC_YEZI, SC_YIII, SC_ZANB, SC_ZINH, SC_ZYYY,
+};
+
+static const struct {
+ rune lo, hi;
+ struct uprop_sc_view val;
+} lookup[] = {
+ {RUNE_C(0x000342), RUNE_C(0x000342), _(SC_GREK)},
+ {RUNE_C(0x000345), RUNE_C(0x000345), _(SC_GREK)},
+ {RUNE_C(0x000363), RUNE_C(0x00036F), _(SC_LATN)},
+ {RUNE_C(0x000483), RUNE_C(0x000483), _(SC_CYRL, SC_PERM)},
+ {RUNE_C(0x000484), RUNE_C(0x000484), _(SC_CYRL, SC_GLAG)},
+ {RUNE_C(0x000485), RUNE_C(0x000486), _(SC_CYRL, SC_LATN)},
+ {RUNE_C(0x000487), RUNE_C(0x000487), _(SC_CYRL, SC_GLAG)},
+ {RUNE_C(0x00060C), RUNE_C(0x00060C), _(SC_ARAB, SC_NKOO, SC_ROHG, SC_SYRC, SC_THAA, SC_YEZI)},
+ {RUNE_C(0x00061B), RUNE_C(0x00061B), _(SC_ARAB, SC_NKOO, SC_ROHG, SC_SYRC, SC_THAA, SC_YEZI)},
+ {RUNE_C(0x00061C), RUNE_C(0x00061C), _(SC_ARAB, SC_SYRC, SC_THAA)},
+ {RUNE_C(0x00061F), RUNE_C(0x00061F), _(SC_ADLM, SC_ARAB, SC_NKOO, SC_ROHG, SC_SYRC, SC_THAA, SC_YEZI)},
+ {RUNE_C(0x000640), RUNE_C(0x000640), _(SC_ADLM, SC_ARAB, SC_MAND, SC_MANI, SC_OUGR, SC_PHLP, SC_ROHG, SC_SOGD, SC_SYRC)},
+ {RUNE_C(0x00064B), RUNE_C(0x000655), _(SC_ARAB, SC_SYRC)},
+ {RUNE_C(0x000660), RUNE_C(0x000669), _(SC_ARAB, SC_THAA, SC_YEZI)},
+ {RUNE_C(0x000670), RUNE_C(0x000670), _(SC_ARAB, SC_SYRC)},
+ {RUNE_C(0x0006D4), RUNE_C(0x0006D4), _(SC_ARAB, SC_ROHG)},
+ {RUNE_C(0x000951), RUNE_C(0x000951), _(SC_BENG, SC_DEVA, SC_GRAN, SC_GUJR, SC_GURU, SC_KNDA, SC_LATN, SC_MLYM, SC_ORYA, SC_SHRD, SC_TAML, SC_TELU, SC_TIRH)},
+ {RUNE_C(0x000952), RUNE_C(0x000952), _(SC_BENG, SC_DEVA, SC_GRAN, SC_GUJR, SC_GURU, SC_KNDA, SC_LATN, SC_MLYM, SC_ORYA, SC_TAML, SC_TELU, SC_TIRH)},
+ {RUNE_C(0x000964), RUNE_C(0x000964), _(SC_BENG, SC_DEVA, SC_DOGR, SC_GONG, SC_GONM, SC_GRAN, SC_GUJR, SC_GURU, SC_KNDA, SC_MAHJ, SC_MLYM, SC_NAND, SC_ORYA, SC_SIND, SC_SINH, SC_SYLO, SC_TAKR, SC_TAML, SC_TELU, SC_TIRH)},
+ {RUNE_C(0x000965), RUNE_C(0x000965), _(SC_BENG, SC_DEVA, SC_DOGR, SC_GONG, SC_GONM, SC_GRAN, SC_GUJR, SC_GURU, SC_KNDA, SC_LIMB, SC_MAHJ, SC_MLYM, SC_NAND, SC_ORYA, SC_SIND, SC_SINH, SC_SYLO, SC_TAKR, SC_TAML, SC_TELU, SC_TIRH)},
+ {RUNE_C(0x000966), RUNE_C(0x00096F), _(SC_DEVA, SC_DOGR, SC_KTHI, SC_MAHJ)},
+ {RUNE_C(0x0009E6), RUNE_C(0x0009EF), _(SC_BENG, SC_CAKM, SC_SYLO)},
+ {RUNE_C(0x000A66), RUNE_C(0x000A6F), _(SC_GURU, SC_MULT)},
+ {RUNE_C(0x000AE6), RUNE_C(0x000AEF), _(SC_GUJR, SC_KHOJ)},
+ {RUNE_C(0x000BE6), RUNE_C(0x000BF3), _(SC_GRAN, SC_TAML)},
+ {RUNE_C(0x000CE6), RUNE_C(0x000CEF), _(SC_KNDA, SC_NAND)},
+ {RUNE_C(0x001040), RUNE_C(0x001049), _(SC_CAKM, SC_MYMR, SC_TALE)},
+ {RUNE_C(0x0010FB), RUNE_C(0x0010FB), _(SC_GEOR, SC_LATN)},
+ {RUNE_C(0x001735), RUNE_C(0x001736), _(SC_BUHD, SC_HANO, SC_TAGB, SC_TGLG)},
+ {RUNE_C(0x001802), RUNE_C(0x001803), _(SC_MONG, SC_PHAG)},
+ {RUNE_C(0x001805), RUNE_C(0x001805), _(SC_MONG, SC_PHAG)},
+ {RUNE_C(0x001CD0), RUNE_C(0x001CD0), _(SC_BENG, SC_DEVA, SC_GRAN, SC_KNDA)},
+ {RUNE_C(0x001CD1), RUNE_C(0x001CD1), _(SC_DEVA)},
+ {RUNE_C(0x001CD2), RUNE_C(0x001CD2), _(SC_BENG, SC_DEVA, SC_GRAN, SC_KNDA)},
+ {RUNE_C(0x001CD3), RUNE_C(0x001CD3), _(SC_DEVA, SC_GRAN)},
+ {RUNE_C(0x001CD4), RUNE_C(0x001CD4), _(SC_DEVA)},
+ {RUNE_C(0x001CD5), RUNE_C(0x001CD6), _(SC_BENG, SC_DEVA)},
+ {RUNE_C(0x001CD7), RUNE_C(0x001CD7), _(SC_DEVA, SC_SHRD)},
+ {RUNE_C(0x001CD8), RUNE_C(0x001CD8), _(SC_BENG, SC_DEVA)},
+ {RUNE_C(0x001CD9), RUNE_C(0x001CD9), _(SC_DEVA, SC_SHRD)},
+ {RUNE_C(0x001CDA), RUNE_C(0x001CDA), _(SC_DEVA, SC_KNDA, SC_MLYM, SC_ORYA, SC_TAML, SC_TELU)},
+ {RUNE_C(0x001CDB), RUNE_C(0x001CDB), _(SC_DEVA)},
+ {RUNE_C(0x001CDC), RUNE_C(0x001CDD), _(SC_DEVA, SC_SHRD)},
+ {RUNE_C(0x001CDE), RUNE_C(0x001CDF), _(SC_DEVA)},
+ {RUNE_C(0x001CE0), RUNE_C(0x001CE0), _(SC_DEVA, SC_SHRD)},
+ {RUNE_C(0x001CE1), RUNE_C(0x001CE1), _(SC_BENG, SC_DEVA)},
+ {RUNE_C(0x001CE2), RUNE_C(0x001CE8), _(SC_DEVA)},
+ {RUNE_C(0x001CE9), RUNE_C(0x001CE9), _(SC_DEVA, SC_NAND)},
+ {RUNE_C(0x001CEA), RUNE_C(0x001CEA), _(SC_BENG, SC_DEVA)},
+ {RUNE_C(0x001CEB), RUNE_C(0x001CEC), _(SC_DEVA)},
+ {RUNE_C(0x001CED), RUNE_C(0x001CED), _(SC_BENG, SC_DEVA)},
+ {RUNE_C(0x001CEE), RUNE_C(0x001CF1), _(SC_DEVA)},
+ {RUNE_C(0x001CF2), RUNE_C(0x001CF2), _(SC_BENG, SC_DEVA, SC_GRAN, SC_KNDA, SC_MLYM, SC_NAND, SC_ORYA, SC_SINH, SC_TELU, SC_TIRH)},
+ {RUNE_C(0x001CF3), RUNE_C(0x001CF3), _(SC_DEVA, SC_GRAN)},
+ {RUNE_C(0x001CF4), RUNE_C(0x001CF4), _(SC_DEVA, SC_GRAN, SC_KNDA)},
+ {RUNE_C(0x001CF5), RUNE_C(0x001CF6), _(SC_BENG, SC_DEVA)},
+ {RUNE_C(0x001CF7), RUNE_C(0x001CF7), _(SC_BENG)},
+ {RUNE_C(0x001CF8), RUNE_C(0x001CF9), _(SC_DEVA, SC_GRAN)},
+ {RUNE_C(0x001CFA), RUNE_C(0x001CFA), _(SC_NAND)},
+ {RUNE_C(0x001DC0), RUNE_C(0x001DC1), _(SC_GREK)},
+ {RUNE_C(0x001DF8), RUNE_C(0x001DF8), _(SC_CYRL, SC_SYRC)},
+ {RUNE_C(0x001DFA), RUNE_C(0x001DFA), _(SC_SYRC)},
+ {RUNE_C(0x00202F), RUNE_C(0x00202F), _(SC_LATN, SC_MONG)},
+ {RUNE_C(0x0020F0), RUNE_C(0x0020F0), _(SC_DEVA, SC_GRAN, SC_LATN)},
+ {RUNE_C(0x002E43), RUNE_C(0x002E43), _(SC_CYRL, SC_GLAG)},
+ {RUNE_C(0x003001), RUNE_C(0x003002), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)},
+ {RUNE_C(0x003003), RUNE_C(0x003003), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)},
+ {RUNE_C(0x003006), RUNE_C(0x003006), _(SC_HANI)},
+ {RUNE_C(0x003008), RUNE_C(0x003011), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)},
+ {RUNE_C(0x003013), RUNE_C(0x003013), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)},
+ {RUNE_C(0x003014), RUNE_C(0x00301B), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)},
+ {RUNE_C(0x00301C), RUNE_C(0x00301F), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)},
+ {RUNE_C(0x00302A), RUNE_C(0x00302D), _(SC_BOPO, SC_HANI)},
+ {RUNE_C(0x003030), RUNE_C(0x003030), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)},
+ {RUNE_C(0x003031), RUNE_C(0x003035), _(SC_HIRA, SC_KANA)},
+ {RUNE_C(0x003037), RUNE_C(0x003037), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)},
+ {RUNE_C(0x00303C), RUNE_C(0x00303D), _(SC_HANI, SC_HIRA, SC_KANA)},
+ {RUNE_C(0x00303E), RUNE_C(0x00303F), _(SC_HANI)},
+ {RUNE_C(0x003099), RUNE_C(0x00309C), _(SC_HIRA, SC_KANA)},
+ {RUNE_C(0x0030A0), RUNE_C(0x0030A0), _(SC_HIRA, SC_KANA)},
+ {RUNE_C(0x0030FB), RUNE_C(0x0030FB), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)},
+ {RUNE_C(0x0030FC), RUNE_C(0x0030FC), _(SC_HIRA, SC_KANA)},
+ {RUNE_C(0x003190), RUNE_C(0x00319F), _(SC_HANI)},
+ {RUNE_C(0x0031C0), RUNE_C(0x0031E3), _(SC_HANI)},
+ {RUNE_C(0x003220), RUNE_C(0x003247), _(SC_HANI)},
+ {RUNE_C(0x003280), RUNE_C(0x0032B0), _(SC_HANI)},
+ {RUNE_C(0x0032C0), RUNE_C(0x0032CB), _(SC_HANI)},
+ {RUNE_C(0x0032FF), RUNE_C(0x0032FF), _(SC_HANI)},
+ {RUNE_C(0x003358), RUNE_C(0x003370), _(SC_HANI)},
+ {RUNE_C(0x00337B), RUNE_C(0x00337F), _(SC_HANI)},
+ {RUNE_C(0x0033E0), RUNE_C(0x0033FE), _(SC_HANI)},
+ {RUNE_C(0x00A66F), RUNE_C(0x00A66F), _(SC_CYRL, SC_GLAG)},
+ {RUNE_C(0x00A700), RUNE_C(0x00A707), _(SC_HANI, SC_LATN)},
+ {RUNE_C(0x00A830), RUNE_C(0x00A832), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KNDA, SC_KTHI, SC_MAHJ, SC_MLYM, SC_MODI, SC_NAND, SC_SHRD, SC_SIND, SC_TAKR, SC_TIRH)},
+ {RUNE_C(0x00A833), RUNE_C(0x00A835), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KNDA, SC_KTHI, SC_MAHJ, SC_MODI, SC_NAND, SC_SHRD, SC_SIND, SC_TAKR, SC_TIRH)},
+ {RUNE_C(0x00A836), RUNE_C(0x00A837), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KTHI, SC_MAHJ, SC_MODI, SC_SIND, SC_TAKR, SC_TIRH)},
+ {RUNE_C(0x00A838), RUNE_C(0x00A838), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KTHI, SC_MAHJ, SC_MODI, SC_SHRD, SC_SIND, SC_TAKR, SC_TIRH)},
+ {RUNE_C(0x00A839), RUNE_C(0x00A839), _(SC_DEVA, SC_DOGR, SC_GUJR, SC_GURU, SC_KHOJ, SC_KTHI, SC_MAHJ, SC_MODI, SC_SIND, SC_TAKR, SC_TIRH)},
+ {RUNE_C(0x00A8F1), RUNE_C(0x00A8F1), _(SC_BENG, SC_DEVA)},
+ {RUNE_C(0x00A8F3), RUNE_C(0x00A8F3), _(SC_DEVA, SC_TAML)},
+ {RUNE_C(0x00A92E), RUNE_C(0x00A92E), _(SC_KALI, SC_LATN, SC_MYMR)},
+ {RUNE_C(0x00A9CF), RUNE_C(0x00A9CF), _(SC_BUGI, SC_JAVA)},
+ {RUNE_C(0x00FD3E), RUNE_C(0x00FD3F), _(SC_ARAB, SC_NKOO)},
+ {RUNE_C(0x00FDF2), RUNE_C(0x00FDF2), _(SC_ARAB, SC_THAA)},
+ {RUNE_C(0x00FDFD), RUNE_C(0x00FDFD), _(SC_ARAB, SC_THAA)},
+ {RUNE_C(0x00FE45), RUNE_C(0x00FE46), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA)},
+ {RUNE_C(0x00FF61), RUNE_C(0x00FF65), _(SC_BOPO, SC_HANG, SC_HANI, SC_HIRA, SC_KANA, SC_YIII)},
+ {RUNE_C(0x00FF70), RUNE_C(0x00FF70), _(SC_HIRA, SC_KANA)},
+ {RUNE_C(0x00FF9E), RUNE_C(0x00FF9F), _(SC_HIRA, SC_KANA)},
+ {RUNE_C(0x010100), RUNE_C(0x010101), _(SC_CPMN, SC_CPRT, SC_LINB)},
+ {RUNE_C(0x010102), RUNE_C(0x010102), _(SC_CPRT, SC_LINB)},
+ {RUNE_C(0x010107), RUNE_C(0x010133), _(SC_CPRT, SC_LINA, SC_LINB)},
+ {RUNE_C(0x010137), RUNE_C(0x01013F), _(SC_CPRT, SC_LINB)},
+ {RUNE_C(0x0102E0), RUNE_C(0x0102FB), _(SC_ARAB, SC_COPT)},
+ {RUNE_C(0x010AF2), RUNE_C(0x010AF2), _(SC_MANI, SC_OUGR)},
+ {RUNE_C(0x011301), RUNE_C(0x011301), _(SC_GRAN, SC_TAML)},
+ {RUNE_C(0x011303), RUNE_C(0x011303), _(SC_GRAN, SC_TAML)},
+ {RUNE_C(0x01133B), RUNE_C(0x01133C), _(SC_GRAN, SC_TAML)},
+ {RUNE_C(0x011FD0), RUNE_C(0x011FD1), _(SC_GRAN, SC_TAML)},
+ {RUNE_C(0x011FD3), RUNE_C(0x011FD3), _(SC_GRAN, SC_TAML)},
+ {RUNE_C(0x01BCA0), RUNE_C(0x01BCA3), _(SC_DUPL)},
+ {RUNE_C(0x01D360), RUNE_C(0x01D371), _(SC_HANI)},
+ {RUNE_C(0x01F250), RUNE_C(0x01F251), _(SC_HANI)},
+};
+
+_MLIB_DEFINE_BSEARCH(struct uprop_sc_view, lookup, ((struct uprop_sc_view){
+ .p = fallback + uprop_get_sc(ch),
+ .n = 1,
+}))
+
+const enum uprop_sc *
+uprop_get_scx(rune ch, size_t *n)
+{
+ struct uprop_sc_view v = mlib_lookup(ch);
+ *n = v.n;
+ return v.p;
+}