diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-21 15:20:27 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-21 15:20:27 +0200 |
commit | 5b14562e05457d96a6524b5aa2e533e69cf30fb2 (patch) | |
tree | dffebad583a6d5089b258cd337d64cc7788ba049 /gen | |
parent | 274191d21c2b198860249016e1ed5af10f1a3865 (diff) |
Add uprop_get_scx()
Diffstat (limited to 'gen')
-rwxr-xr-x | gen/data-files | 1 | ||||
-rwxr-xr-x | gen/prop/scx | 97 |
2 files changed, 98 insertions, 0 deletions
diff --git a/gen/data-files b/gen/data-files index 9da5452..00b5f1c 100755 --- a/gen/data-files +++ b/gen/data-files @@ -30,6 +30,7 @@ readonly PATHS=' IndicPositionalCategory IndicSyllabicCategory PropList + ScriptExtensions Scripts SpecialCasing UnicodeData diff --git a/gen/prop/scx b/gen/prop/scx new file mode 100755 index 0000000..ec5b03f --- /dev/null +++ b/gen/prop/scx @@ -0,0 +1,97 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_scx.c + +gawk ' +BEGIN { + FS = " *(; *|#.*)" + + print "/* This file is autogenerated by gen/prop/scx; DO NOT EDIT. */" + print "" + print "#include \"_bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" + print "#define CAST(...) (const enum uprop_sc []){__VA_ARGS__}" + print "#define _(...) {CAST(__VA_ARGS__), lengthof(CAST(__VA_ARGS__))}" + print "" + print "struct uprop_sc_view {" + print "\tconst enum uprop_sc *p;" + print "\tsize_t n;" + print "};" + print "" + print "static constexpr enum uprop_sc fallback[] = {" + print "\tSC_ZZZZ, SC_ADLM, SC_AGHB, SC_AHOM, SC_ARAB, SC_ARMI, SC_ARMN, SC_AVST," + print "\tSC_BALI, SC_BAMU, SC_BASS, SC_BATK, SC_BENG, SC_BHKS, SC_BOPO, SC_BRAH," + print "\tSC_BRAI, SC_BUGI, SC_BUHD, SC_CAKM, SC_CANS, SC_CARI, SC_CHAM, SC_CHER," + print "\tSC_CHRS, SC_COPT, SC_CPMN, SC_CPRT, SC_CYRL, SC_DEVA, SC_DIAK, SC_DOGR," + print "\tSC_DSRT, SC_DUPL, SC_EGYP, SC_ELBA, SC_ELYM, SC_ETHI, SC_GEOR, SC_GLAG," + print "\tSC_GONG, SC_GONM, SC_GOTH, SC_GRAN, SC_GREK, SC_GUJR, SC_GURU, SC_HANG," + print "\tSC_HANI, SC_HANO, SC_HATR, SC_HEBR, SC_HIRA, SC_HLUW, SC_HMNG, SC_HMNP," + print "\tSC_HRKT, SC_HUNG, SC_ITAL, SC_JAVA, SC_KALI, SC_KANA, SC_KAWI, SC_KHAR," + print "\tSC_KHMR, SC_KHOJ, SC_KITS, SC_KNDA, SC_KTHI, SC_LANA, SC_LAOO, SC_LATN," + print "\tSC_LEPC, SC_LIMB, SC_LINA, SC_LINB, SC_LISU, SC_LYCI, SC_LYDI, SC_MAHJ," + print "\tSC_MAKA, SC_MAND, SC_MANI, SC_MARC, SC_MEDF, SC_MEND, SC_MERC, SC_MERO," + print "\tSC_MLYM, SC_MODI, SC_MONG, SC_MROO, SC_MTEI, SC_MULT, SC_MYMR, SC_NAGM," + print "\tSC_NAND, SC_NARB, SC_NBAT, SC_NEWA, SC_NKOO, SC_NSHU, SC_OGAM, SC_OLCK," + print "\tSC_ORKH, SC_ORYA, SC_OSGE, SC_OSMA, SC_OUGR, SC_PALM, SC_PAUC, SC_PERM," + print "\tSC_PHAG, SC_PHLI, SC_PHLP, SC_PHNX, SC_PLRD, SC_PRTI, SC_RJNG, SC_ROHG," + print "\tSC_RUNR, SC_SAMR, SC_SARB, SC_SAUR, SC_SGNW, SC_SHAW, SC_SHRD, SC_SIDD," + print "\tSC_SIND, SC_SINH, SC_SOGD, SC_SOGO, SC_SORA, SC_SOYO, SC_SUND, SC_SYLO," + print "\tSC_SYRC, SC_TAGB, SC_TAKR, SC_TALE, SC_TALU, SC_TAML, SC_TANG, SC_TAVT," + print "\tSC_TELU, SC_TFNG, SC_TGLG, SC_THAA, SC_THAI, SC_TIBT, SC_TIRH, SC_TNSA," + print "\tSC_TOTO, SC_UGAR, SC_VAII, SC_VITH, SC_WARA, SC_WCHO, SC_XPEO, SC_XSUX," + print "\tSC_YEZI, SC_YIII, SC_ZANB, SC_ZINH, SC_ZYYY," + print "};" + print "" +} + +/^[A-F0-9]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = $2 +} + +END { + print "static const struct {" + print "\trune lo, hi;" + print "\tstruct uprop_sc_view val;" + print "} lookup[] = {" + + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + for (lo = i; props[lo] == props[i + 1]; i++) + ; + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), _(", lo, i + split(props[i], xs, / /) + for (j in xs) { + printf "SC_%s", toupper(xs[j]) + if (j < length(xs)) + printf ", " + } + printf ")},\n" + } + + print "};" + print "" + print "_MLIB_DEFINE_BSEARCH(struct uprop_sc_view, lookup, ((struct uprop_sc_view){" + print "\t.p = fallback + uprop_get_sc(ch)," + print "\t.n = 1," + print "}))" + print "" + print "const enum uprop_sc *" + print "uprop_get_scx(rune ch, size_t *n)" + print "{" + print "\tstruct uprop_sc_view v = mlib_lookup(ch);" + print "\t*n = v.n;" + print "\treturn v.p;" + print "}" +} +' data/ScriptExtensions |