aboutsummaryrefslogtreecommitdiff
path: root/gen
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-15 12:38:16 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-15 12:38:16 +0200
commit29abb22298efa1f70968e309c75f13966e9343a8 (patch)
tree448a66ec29d640a51009100f98b624e129cdeaea /gen
parent5d3787cdca5e7b594e7b615fa664855c0df626f5 (diff)
Add uprop_get_age()
Diffstat (limited to 'gen')
-rwxr-xr-xgen/data-files1
-rwxr-xr-xgen/prop/age55
2 files changed, 56 insertions, 0 deletions
diff --git a/gen/data-files b/gen/data-files
index 1f154ac..801f591 100755
--- a/gen/data-files
+++ b/gen/data-files
@@ -9,6 +9,7 @@ readonly BASE=https://www.unicode.org/Public/UCD/latest/ucd
readonly PATHS='
auxiliary/GraphemeBreakProperty
BidiBrackets
+DerivedAge
DerivedCoreProperties
DerivedNormalizationProps
emoji/emoji-data
diff --git a/gen/prop/age b/gen/prop/age
new file mode 100755
index 0000000..d0f742f
--- /dev/null
+++ b/gen/prop/age
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+set -e
+cd "${0%/*}/../.."
+exec >lib/unicode/prop/uprop_get_age.c
+
+gawk '
+BEGIN {
+ FS = " *(; *|#.*)"
+
+ print "/* This file is autogenerated by gen/prop/age; DO NOT EDIT. */"
+ print ""
+ print "#include \"__bsearch.h\""
+ print "#include \"rune.h\""
+ print "#include \"unicode/prop.h\""
+ print ""
+}
+
+/^[^#]/ {
+ n = split($1, a, /\.\./)
+ lo = strtonum("0X" a[1])
+ hi = strtonum("0X" a[n])
+
+ for (i = lo; i <= hi; i++) {
+ gsub(/^; /, "", $2)
+ props[i] = "AGE_V" int($2) "_" ($2 % 1 * 10)
+ }
+}
+
+END {
+ print "static const struct {"
+ print "\trune lo, hi;"
+ print "\tenum uprop_age val;"
+ print "} lookup[] = {"
+
+ for (i = 0x1F6; i <= 0x10FFFF; i++) {
+ if (!props[i])
+ continue
+ lo = i
+ while (props[lo] == props[i + 1])
+ i++
+ printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i]
+ }
+
+ print "};"
+ print ""
+ print "__MLIB_DEFINE_BSEARCH(enum uprop_age, lookup, AGE_NA)"
+ print ""
+ print "enum uprop_age"
+ print "uprop_get_age(rune ch)"
+ print "{"
+ print "\treturn ch <= RUNE_C(0x01F5) ? AGE_V1_1 : mlib_lookup(ch);"
+ print "}"
+}
+' data/DerivedAge | sed 's/\s*$//'