From 903080d6b1bddde8d9097359eed21a9b9ee74fd0 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 16 Apr 2024 23:05:29 +0200 Subject: Add uprop_get_ccc() --- gen/prop/ccc | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100755 gen/prop/ccc (limited to 'gen') diff --git a/gen/prop/ccc b/gen/prop/ccc new file mode 100755 index 0000000..4f370e7 --- /dev/null +++ b/gen/prop/ccc @@ -0,0 +1,116 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_ccc.c + +gawk ' +BEGIN { + FS = ";" + + map[1] = "OV" + map[6] = "HANR" + map[7] = "NK" + map[8] = "KV" + map[9] = "VR" + map[10] = "CCC10" + map[11] = "CCC11" + map[12] = "CCC12" + map[13] = "CCC13" + map[14] = "CCC14" + map[15] = "CCC15" + map[16] = "CCC16" + map[17] = "CCC17" + map[18] = "CCC18" + map[19] = "CCC19" + map[20] = "CCC20" + map[21] = "CCC21" + map[22] = "CCC22" + map[23] = "CCC23" + map[24] = "CCC24" + map[25] = "CCC25" + map[26] = "CCC26" + map[27] = "CCC27" + map[28] = "CCC28" + map[29] = "CCC29" + map[30] = "CCC30" + map[31] = "CCC31" + map[32] = "CCC32" + map[33] = "CCC33" + map[34] = "CCC34" + map[35] = "CCC35" + map[36] = "CCC36" + map[84] = "CCC84" + map[91] = "CCC91" + map[103] = "CCC103" + map[107] = "CCC107" + map[118] = "CCC118" + map[122] = "CCC122" + map[129] = "CCC129" + map[130] = "CCC130" + map[132] = "CCC132" + map[133] = "CCC133" + map[200] = "ATBL" + map[202] = "ATB" + map[214] = "ATA" + map[216] = "ATAR" + map[218] = "BL" + map[220] = "B" + map[222] = "BR" + map[224] = "L" + map[226] = "R" + map[228] = "AL" + map[230] = "A" + map[232] = "AR" + map[233] = "DB" + map[234] = "DA" + map[240] = "IS" + + print "/* This file is autogenerated by gen/prop/ccc; DO NOT EDIT. */" + print "" + print "#include \"_bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +{ + s = "CCC_" (map[$4] ? map[$4] : "NR") + lo = strtonum("0X" $1) + + if ($2 ~ /First/) { + getline + hi = strtonum("0X" $1) + } else + hi = lo + + for (i = lo; i <= hi; i++) + props[i] = s +} + +END { + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_ccc val;" + print "} lookup[] = {" + + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i] || props[i] == "CCC_NR") + continue + for (lo = i; props[lo] == props[i + 1]; i++) + ; + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[lo] + } + + print "};" + print "" + print "_MLIB_DEFINE_BSEARCH(enum uprop_ccc, lookup, CCC_NR)" + print "" + print "enum uprop_ccc" + print "uprop_get_ccc(rune ch)" + print "{" + print "\treturn ch < lookup[0].lo ? CCC_NR : mlib_lookup(ch);" + print "}" +} +' data/UnicodeData -- cgit v1.2.3