From a04d1334a968649b1da36eb640d5d9d35eb3f29d Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sun, 21 Apr 2024 19:46:29 +0200 Subject: Add uprop_get_wb() --- gen/prop/wb | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100755 gen/prop/wb (limited to 'gen/prop/wb') diff --git a/gen/prop/wb b/gen/prop/wb new file mode 100755 index 0000000..a6b47f2 --- /dev/null +++ b/gen/prop/wb @@ -0,0 +1,86 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_wb.c + +gawk ' +BEGIN { + FS = " *(; *|#.*)" + + map["ALetter"] = "LE" + map["CR"] = "CR" + map["Double_Quote"] = "DQ" + map["E_Base"] = "EB" + map["E_Base_GAZ"] = "EBG" + map["E_Modifier"] = "EM" + map["Extend"] = "EXTEND" + map["ExtendNumLet"] = "EX" + map["Format"] = "FO" + map["Glue_After_Zwj"] = "GAZ" + map["Hebrew_Letter"] = "HL" + map["Katakana"] = "KA" + map["LF"] = "LF" + map["MidLetter"] = "ML" + map["MidNumLet"] = "MB" + map["MidNum"] = "MN" + map["Newline"] = "NL" + map["Numeric"] = "NU" + map["Other"] = "XX" + map["Regional_Indicator"] = "RI" + map["Single_Quote"] = "SQ" + map["WSegSpace"] = "WSEGSPACE" + map["ZWJ"] = "ZWJ" + + print "/* This file is autogenerated by gen/prop/wb; DO NOT EDIT. */" + print "" + print "#include \"_bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[A-F0-9]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = "WB_" map[$2] +} + +END { + print "static constexpr enum uprop_wb lookup_lat1[] = {" + for (i = 0; i < 0x100; i++) { + if (i % 4 == 0) + printf "\t" + printf "%-13s%s", (props[i] ? props[i] : "WB_XX") ",", \ + i % 4 == 3 ? "\n" : " " + } + print "};" + print "" + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_wb val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + for (lo = i; props[lo] == props[i + 1]; i++) + ; + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "_MLIB_DEFINE_BSEARCH(enum uprop_wb, lookup, WB_XX)" + print "" + print "enum uprop_wb" + print "uprop_get_wb(rune ch)" + print "{" + print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/WordBreakProperty | sed 's/\s*$//' -- cgit v1.2.3