diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-21 19:50:10 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-21 19:50:10 +0200 |
commit | 43be23fd5c22a23a2fe141df93d73ba14a0fa28d (patch) | |
tree | 1f37f2183be655547d84f6a13483a84912705e96 /gen | |
parent | a04d1334a968649b1da36eb640d5d9d35eb3f29d (diff) |
Add uprop_get_sb()
Diffstat (limited to 'gen')
-rwxr-xr-x | gen/data-files | 1 | ||||
-rwxr-xr-x | gen/prop/sb | 78 |
2 files changed, 79 insertions, 0 deletions
diff --git a/gen/data-files b/gen/data-files index ba795cd..ac984b9 100755 --- a/gen/data-files +++ b/gen/data-files @@ -8,6 +8,7 @@ readonly BASE='https://www.unicode.org/Public/15.1.0/ucd' readonly PATHS=' auxiliary/GraphemeBreakProperty + auxiliary/SentenceBreakProperty auxiliary/WordBreakProperty BidiBrackets BidiMirroring diff --git a/gen/prop/sb b/gen/prop/sb new file mode 100755 index 0000000..aff06fd --- /dev/null +++ b/gen/prop/sb @@ -0,0 +1,78 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_sb.c + +gawk ' +BEGIN { + FS = " *(; *|#.*)" + + map["ATerm"] = "AT" + map["Close"] = "CL" + map["CR"] = "CR" + map["Extend"] = "EX" + map["Format"] = "FO" + map["LF"] = "LF" + map["Lower"] = "LO" + map["Numeric"] = "NU" + map["OLetter"] = "LE" + map["Other"] = "XX" + map["SContinue"] = "SC" + map["Sep"] = "SE" + map["Sp"] = "SP" + map["STerm"] = "ST" + map["Upper"] = "UP" + + print "/* This file is autogenerated by gen/prop/sb; DO NOT EDIT. */" + print "" + print "#include \"_bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[A-F0-9]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = "SB_" map[$2] +} + +END { + print "static constexpr enum uprop_sb lookup_lat1[] = {" + for (i = 0; i < 0x100; i++) { + if (i % 8 == 0) + printf "\t" + printf "%s%s", (props[i] ? props[i] : "SB_XX") ",", \ + i % 8 == 7 ? "\n" : " " + } + print "};" + print "" + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_sb val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + for (lo = i; props[lo] == props[i + 1]; i++) + ; + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "_MLIB_DEFINE_BSEARCH(enum uprop_sb, lookup, SB_XX)" + print "" + print "enum uprop_sb" + print "uprop_get_sb(rune ch)" + print "{" + print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/SentenceBreakProperty | sed 's/\s*$//' |