diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-09 23:20:36 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-09 23:20:36 +0200 |
commit | 7672a052e519e2a6458e578d830b9436e3699777 (patch) | |
tree | 0ae43ce5f73c70e920cc47a11e49545a2112391e /gen | |
parent | a83a4de7820cf0a7c470c3a8acbfebf1a93b215f (diff) |
Add uprop_get_dt()
Diffstat (limited to 'gen')
-rwxr-xr-x | gen/data-files | 1 | ||||
-rwxr-xr-x | gen/prop/dt | 84 |
2 files changed, 85 insertions, 0 deletions
diff --git a/gen/data-files b/gen/data-files index 0946f7e..3c87b73 100755 --- a/gen/data-files +++ b/gen/data-files @@ -13,6 +13,7 @@ DerivedCoreProperties.txt DerivedNormalizationProps.txt emoji/emoji-data.txt extracted/DerivedBinaryProperties.txt +extracted/DerivedDecompositionType.txt extracted/DerivedEastAsianWidth.txt extracted/DerivedNumericType.txt extracted/DerivedNumericValues.txt diff --git a/gen/prop/dt b/gen/prop/dt new file mode 100755 index 0000000..ba0582a --- /dev/null +++ b/gen/prop/dt @@ -0,0 +1,84 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_dt.c + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" + + map["Canonical"] = "CAN" + map["Compat"] = "COM" + map["Circle"] = "ENC" + map["Final"] = "FIN" + map["Font"] = "FONT" + map["Fraction"] = "FRA" + map["Initial"] = "INIT" + map["Isolated"] = "ISO" + map["Medial"] = "MED" + map["Narrow"] = "NAR" + map["Nobreak"] = "NB" + map["Small"] = "SML" + map["Square"] = "SQR" + map["Sub"] = "SUB" + map["Super"] = "SUP" + map["Vertical"] = "VERT" + map["Wide"] = "WIDE" + + + print "/* This file is autogenerated by gen/prop/dt; DO NOT EDIT. */" + print "" + print "#include \"__bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + gsub(/^; /, "", $2) + props[i] = "DT_" map[$2] + } +} + +END { + print "static constexpr enum uprop_dt lookup_lat1[] = {" + for (i = 0; i < 0x100; i++) { + if (i % 8 == 0) + printf "\t" + printf "%-8s%s", (props[i] ? props[i] : "DT_NONE") ",", \ + i % 8 == 7 ? "\n" : " " + } + print "};" + print "" + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_dt val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "__MLIB_DEFINE_BSEARCH(enum uprop_dt, lookup, DT_NONE)" + print "" + print "enum uprop_dt" + print "uprop_get_dt(rune ch)" + print "{" + print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/DerivedDecompositionType.txt |