aboutsummaryrefslogtreecommitdiff
path: root/gen
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-09 23:20:36 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-09 23:20:36 +0200
commit7672a052e519e2a6458e578d830b9436e3699777 (patch)
tree0ae43ce5f73c70e920cc47a11e49545a2112391e /gen
parenta83a4de7820cf0a7c470c3a8acbfebf1a93b215f (diff)
Add uprop_get_dt()
Diffstat (limited to 'gen')
-rwxr-xr-xgen/data-files1
-rwxr-xr-xgen/prop/dt84
2 files changed, 85 insertions, 0 deletions
diff --git a/gen/data-files b/gen/data-files
index 0946f7e..3c87b73 100755
--- a/gen/data-files
+++ b/gen/data-files
@@ -13,6 +13,7 @@ DerivedCoreProperties.txt
DerivedNormalizationProps.txt
emoji/emoji-data.txt
extracted/DerivedBinaryProperties.txt
+extracted/DerivedDecompositionType.txt
extracted/DerivedEastAsianWidth.txt
extracted/DerivedNumericType.txt
extracted/DerivedNumericValues.txt
diff --git a/gen/prop/dt b/gen/prop/dt
new file mode 100755
index 0000000..ba0582a
--- /dev/null
+++ b/gen/prop/dt
@@ -0,0 +1,84 @@
+#!/bin/sh
+
+set -e
+cd "${0%/*}/../.."
+exec >lib/unicode/prop/uprop_get_dt.c
+
+gawk '
+BEGIN {
+ FS = "( *#.*| +; +)"
+
+ map["Canonical"] = "CAN"
+ map["Compat"] = "COM"
+ map["Circle"] = "ENC"
+ map["Final"] = "FIN"
+ map["Font"] = "FONT"
+ map["Fraction"] = "FRA"
+ map["Initial"] = "INIT"
+ map["Isolated"] = "ISO"
+ map["Medial"] = "MED"
+ map["Narrow"] = "NAR"
+ map["Nobreak"] = "NB"
+ map["Small"] = "SML"
+ map["Square"] = "SQR"
+ map["Sub"] = "SUB"
+ map["Super"] = "SUP"
+ map["Vertical"] = "VERT"
+ map["Wide"] = "WIDE"
+
+
+ print "/* This file is autogenerated by gen/prop/dt; DO NOT EDIT. */"
+ print ""
+ print "#include \"__bsearch.h\""
+ print "#include \"macros.h\""
+ print "#include \"rune.h\""
+ print "#include \"unicode/prop.h\""
+ print ""
+}
+
+/^[^#]/ {
+ n = split($1, a, /\.\./)
+ lo = strtonum("0X" a[1])
+ hi = strtonum("0X" a[n])
+
+ for (i = lo; i <= hi; i++) {
+ gsub(/^; /, "", $2)
+ props[i] = "DT_" map[$2]
+ }
+}
+
+END {
+ print "static constexpr enum uprop_dt lookup_lat1[] = {"
+ for (i = 0; i < 0x100; i++) {
+ if (i % 8 == 0)
+ printf "\t"
+ printf "%-8s%s", (props[i] ? props[i] : "DT_NONE") ",", \
+ i % 8 == 7 ? "\n" : " "
+ }
+ print "};"
+ print ""
+ print "static const struct {"
+ print "\trune lo, hi;"
+ print "\tenum uprop_dt val;"
+ print "} lookup[] = {"
+
+ for (i = 0x100; i <= 0x10FFFF; i++) {
+ if (!props[i])
+ continue
+ lo = i
+ while (props[lo] == props[i + 1])
+ i++
+ printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i]
+ }
+
+ print "};"
+ print ""
+ print "__MLIB_DEFINE_BSEARCH(enum uprop_dt, lookup, DT_NONE)"
+ print ""
+ print "enum uprop_dt"
+ print "uprop_get_dt(rune ch)"
+ print "{"
+ print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);"
+ print "}"
+}
+' data/DerivedDecompositionType.txt