aboutsummaryrefslogtreecommitdiff
path: root/gen
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-21 15:20:27 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-21 15:20:27 +0200
commit5b14562e05457d96a6524b5aa2e533e69cf30fb2 (patch)
treedffebad583a6d5089b258cd337d64cc7788ba049 /gen
parent274191d21c2b198860249016e1ed5af10f1a3865 (diff)
Add uprop_get_scx()
Diffstat (limited to 'gen')
-rwxr-xr-xgen/data-files1
-rwxr-xr-xgen/prop/scx97
2 files changed, 98 insertions, 0 deletions
diff --git a/gen/data-files b/gen/data-files
index 9da5452..00b5f1c 100755
--- a/gen/data-files
+++ b/gen/data-files
@@ -30,6 +30,7 @@ readonly PATHS='
IndicPositionalCategory
IndicSyllabicCategory
PropList
+ ScriptExtensions
Scripts
SpecialCasing
UnicodeData
diff --git a/gen/prop/scx b/gen/prop/scx
new file mode 100755
index 0000000..ec5b03f
--- /dev/null
+++ b/gen/prop/scx
@@ -0,0 +1,97 @@
+#!/bin/sh
+
+set -e
+cd "${0%/*}/../.."
+exec >lib/unicode/prop/uprop_get_scx.c
+
+gawk '
+BEGIN {
+ FS = " *(; *|#.*)"
+
+ print "/* This file is autogenerated by gen/prop/scx; DO NOT EDIT. */"
+ print ""
+ print "#include \"_bsearch.h\""
+ print "#include \"macros.h\""
+ print "#include \"rune.h\""
+ print "#include \"unicode/prop.h\""
+ print ""
+ print "#define CAST(...) (const enum uprop_sc []){__VA_ARGS__}"
+ print "#define _(...) {CAST(__VA_ARGS__), lengthof(CAST(__VA_ARGS__))}"
+ print ""
+ print "struct uprop_sc_view {"
+ print "\tconst enum uprop_sc *p;"
+ print "\tsize_t n;"
+ print "};"
+ print ""
+ print "static constexpr enum uprop_sc fallback[] = {"
+ print "\tSC_ZZZZ, SC_ADLM, SC_AGHB, SC_AHOM, SC_ARAB, SC_ARMI, SC_ARMN, SC_AVST,"
+ print "\tSC_BALI, SC_BAMU, SC_BASS, SC_BATK, SC_BENG, SC_BHKS, SC_BOPO, SC_BRAH,"
+ print "\tSC_BRAI, SC_BUGI, SC_BUHD, SC_CAKM, SC_CANS, SC_CARI, SC_CHAM, SC_CHER,"
+ print "\tSC_CHRS, SC_COPT, SC_CPMN, SC_CPRT, SC_CYRL, SC_DEVA, SC_DIAK, SC_DOGR,"
+ print "\tSC_DSRT, SC_DUPL, SC_EGYP, SC_ELBA, SC_ELYM, SC_ETHI, SC_GEOR, SC_GLAG,"
+ print "\tSC_GONG, SC_GONM, SC_GOTH, SC_GRAN, SC_GREK, SC_GUJR, SC_GURU, SC_HANG,"
+ print "\tSC_HANI, SC_HANO, SC_HATR, SC_HEBR, SC_HIRA, SC_HLUW, SC_HMNG, SC_HMNP,"
+ print "\tSC_HRKT, SC_HUNG, SC_ITAL, SC_JAVA, SC_KALI, SC_KANA, SC_KAWI, SC_KHAR,"
+ print "\tSC_KHMR, SC_KHOJ, SC_KITS, SC_KNDA, SC_KTHI, SC_LANA, SC_LAOO, SC_LATN,"
+ print "\tSC_LEPC, SC_LIMB, SC_LINA, SC_LINB, SC_LISU, SC_LYCI, SC_LYDI, SC_MAHJ,"
+ print "\tSC_MAKA, SC_MAND, SC_MANI, SC_MARC, SC_MEDF, SC_MEND, SC_MERC, SC_MERO,"
+ print "\tSC_MLYM, SC_MODI, SC_MONG, SC_MROO, SC_MTEI, SC_MULT, SC_MYMR, SC_NAGM,"
+ print "\tSC_NAND, SC_NARB, SC_NBAT, SC_NEWA, SC_NKOO, SC_NSHU, SC_OGAM, SC_OLCK,"
+ print "\tSC_ORKH, SC_ORYA, SC_OSGE, SC_OSMA, SC_OUGR, SC_PALM, SC_PAUC, SC_PERM,"
+ print "\tSC_PHAG, SC_PHLI, SC_PHLP, SC_PHNX, SC_PLRD, SC_PRTI, SC_RJNG, SC_ROHG,"
+ print "\tSC_RUNR, SC_SAMR, SC_SARB, SC_SAUR, SC_SGNW, SC_SHAW, SC_SHRD, SC_SIDD,"
+ print "\tSC_SIND, SC_SINH, SC_SOGD, SC_SOGO, SC_SORA, SC_SOYO, SC_SUND, SC_SYLO,"
+ print "\tSC_SYRC, SC_TAGB, SC_TAKR, SC_TALE, SC_TALU, SC_TAML, SC_TANG, SC_TAVT,"
+ print "\tSC_TELU, SC_TFNG, SC_TGLG, SC_THAA, SC_THAI, SC_TIBT, SC_TIRH, SC_TNSA,"
+ print "\tSC_TOTO, SC_UGAR, SC_VAII, SC_VITH, SC_WARA, SC_WCHO, SC_XPEO, SC_XSUX,"
+ print "\tSC_YEZI, SC_YIII, SC_ZANB, SC_ZINH, SC_ZYYY,"
+ print "};"
+ print ""
+}
+
+/^[A-F0-9]/ {
+ n = split($1, a, /\.\./)
+ lo = strtonum("0X" a[1])
+ hi = strtonum("0X" a[n])
+
+ for (i = lo; i <= hi; i++)
+ props[i] = $2
+}
+
+END {
+ print "static const struct {"
+ print "\trune lo, hi;"
+ print "\tstruct uprop_sc_view val;"
+ print "} lookup[] = {"
+
+ for (i = 0; i <= 0x10FFFF; i++) {
+ if (!props[i])
+ continue
+ for (lo = i; props[lo] == props[i + 1]; i++)
+ ;
+ printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), _(", lo, i
+ split(props[i], xs, / /)
+ for (j in xs) {
+ printf "SC_%s", toupper(xs[j])
+ if (j < length(xs))
+ printf ", "
+ }
+ printf ")},\n"
+ }
+
+ print "};"
+ print ""
+ print "_MLIB_DEFINE_BSEARCH(struct uprop_sc_view, lookup, ((struct uprop_sc_view){"
+ print "\t.p = fallback + uprop_get_sc(ch),"
+ print "\t.n = 1,"
+ print "}))"
+ print ""
+ print "const enum uprop_sc *"
+ print "uprop_get_scx(rune ch, size_t *n)"
+ print "{"
+ print "\tstruct uprop_sc_view v = mlib_lookup(ch);"
+ print "\t*n = v.n;"
+ print "\treturn v.p;"
+ print "}"
+}
+' data/ScriptExtensions