#!/bin/sh set -e cd "${0%/*}/../.." exec >lib/unicode/prop/uprop_get_wb.c gawk ' BEGIN { FS = " *(; *|#.*)" map["ALetter"] = "LE" map["CR"] = "CR" map["Double_Quote"] = "DQ" map["E_Base"] = "EB" map["E_Base_GAZ"] = "EBG" map["E_Modifier"] = "EM" map["Extend"] = "EXTEND" map["ExtendNumLet"] = "EX" map["Format"] = "FO" map["Glue_After_Zwj"] = "GAZ" map["Hebrew_Letter"] = "HL" map["Katakana"] = "KA" map["LF"] = "LF" map["MidLetter"] = "ML" map["MidNumLet"] = "MB" map["MidNum"] = "MN" map["Newline"] = "NL" map["Numeric"] = "NU" map["Other"] = "XX" map["Regional_Indicator"] = "RI" map["Single_Quote"] = "SQ" map["WSegSpace"] = "WSEGSPACE" map["ZWJ"] = "ZWJ" print "/* This file is autogenerated by gen/prop/wb; DO NOT EDIT. */" print "" print "#include \"_bsearch.h\"" print "#include \"macros.h\"" print "#include \"rune.h\"" print "#include \"unicode/prop.h\"" print "" } /^[A-F0-9]/ { n = split($1, a, /\.\./) lo = strtonum("0X" a[1]) hi = strtonum("0X" a[n]) for (i = lo; i <= hi; i++) props[i] = "WB_" map[$2] } END { print "static constexpr enum uprop_wb lookup_lat1[] = {" for (i = 0; i < 0x100; i++) { if (i % 4 == 0) printf "\t" printf "%-13s%s", (props[i] ? props[i] : "WB_XX") ",", \ i % 4 == 3 ? "\n" : " " } print "};" print "" print "static const struct {" print "\trune lo, hi;" print "\tenum uprop_wb val;" print "} lookup[] = {" for (i = 0x100; i <= 0x10FFFF; i++) { if (!props[i]) continue for (lo = i; props[lo] == props[i + 1]; i++) ; printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] } print "};" print "" print "_MLIB_DEFINE_BSEARCH(enum uprop_wb, lookup, WB_XX)" print "" print "enum uprop_wb" print "uprop_get_wb(rune ch)" print "{" print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" print "}" } ' data/WordBreakProperty | sed 's/\s*$//'