diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-16 22:03:49 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-16 22:03:49 +0200 |
commit | d1d0bc722d1d10936aaf3252b0ec5842715d0d31 (patch) | |
tree | 8949b4eb4f279bfb33a6fd6f0a12cb0576020970 /gen | |
parent | 2bdff13305d08e8b6bdb582fdf064b7ca7860202 (diff) |
Add uprop_get_insc()
Diffstat (limited to 'gen')
-rwxr-xr-x | gen/data-files | 1 | ||||
-rwxr-xr-x | gen/prop/insc | 63 |
2 files changed, 64 insertions, 0 deletions
diff --git a/gen/data-files b/gen/data-files index 63e0e76..de118c5 100755 --- a/gen/data-files +++ b/gen/data-files @@ -25,6 +25,7 @@ readonly PATHS=' extracted/DerivedNumericValues HangulSyllableType IndicPositionalCategory + IndicSyllabicCategory PropList Scripts SpecialCasing diff --git a/gen/prop/insc b/gen/prop/insc new file mode 100755 index 0000000..7ed2b96 --- /dev/null +++ b/gen/prop/insc @@ -0,0 +1,63 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_insc.c + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" + + print "/* This file is autogenerated by gen/prop/insc; DO NOT EDIT. */" + print "" + print "#include \"_bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + gsub(/^; /, "", $2) + props[i] = "INSC_" toupper($2) + } +} + +END { + print "static constexpr enum uprop_insc lookup_lat1[LATIN1_MAX] = {" + for (i = 0; i < 0x100; i++) { + if (props[i]) + printf "\t[0x%02X] = %s,\n", i, props[i] + } + print "};" + print "" + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_insc val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "_MLIB_DEFINE_BSEARCH(enum uprop_insc, lookup, INSC_OTHER)" + print "" + print "enum uprop_insc" + print "uprop_get_insc(rune ch)" + print "{" + print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/IndicSyllabicCategory | sed 's/\s*$//' |