From 73df1a0e994f6d777591d7dae7c14f291c876fb2 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 9 Apr 2024 19:01:28 +0200 Subject: Add uprop_get_nt() --- gen/data-files | 1 + gen/prop/nt | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100755 gen/prop/nt (limited to 'gen') diff --git a/gen/data-files b/gen/data-files index a3df9f3..4ac2f07 100755 --- a/gen/data-files +++ b/gen/data-files @@ -12,6 +12,7 @@ DerivedCoreProperties.txt DerivedNormalizationProps.txt emoji/emoji-data.txt extracted/DerivedBinaryProperties.txt +extracted/DerivedNumericType.txt extracted/DerivedNumericValues.txt PropList.txt UnicodeData.txt diff --git a/gen/prop/nt b/gen/prop/nt new file mode 100755 index 0000000..569cc18 --- /dev/null +++ b/gen/prop/nt @@ -0,0 +1,68 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_nt.c + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" + + map["Decimal"] = "DE" + map["Digit"] = "DI" + map["Numeric"] = "NU" + + print "/* This file is autogenerated by gen/prop/nt; DO NOT EDIT. */" + print "" + print "#include \"__bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + gsub(/^; /, "", $2) + props[i] = "NT_" map[$2] + } +} + +END { + print "static constexpr enum uprop_nt lookup_lat1[] = {" + for (i = 0; i < 0x100; i++) { + if (i % 8 == 0) + printf "\t" + printf "%7s,%s", props[i] ? props[i] : "NT_NONE", i % 8 == 7 ? "\n" : " " + } + print "};" + print "" + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_nt val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "__MLIB_DEFINE_BSEARCH(enum uprop_nt, lookup, NT_NONE)" + print "" + print "enum uprop_nt" + print "uprop_get_nt(rune ch)" + print "{" + print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/DerivedNumericType.txt -- cgit v1.2.3