From db816ef1e678e20d91d1b5308b2d7a182a3f114c Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Thu, 18 Apr 2024 12:04:03 +0200 Subject: Add the U8*_SCALE constants --- gen/string/scale | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100755 gen/string/scale (limited to 'gen/string/scale') diff --git a/gen/string/scale b/gen/string/scale new file mode 100755 index 0000000..0455447 --- /dev/null +++ b/gen/string/scale @@ -0,0 +1,55 @@ +#!/bin/sh + +# Usage: scale -v utf=X -v mapping=X [-v az=X] [-v lt=X] +# Example: scale -v utf=8 -v mapping=title -v lt=1 + +set -e +cd "${0%/*}/../.." + +gawk "$@" ' +function bcnt(x) +{ + x = strtonum("0X" x) + if (utf == 32) + return 4 + if (utf == 16) + return x < 0x10000 ? 2 : 4 + return x < 0x00080 ? 1 \ + : x < 0x00800 ? 2 \ + : x < 0x10000 ? 3 \ + : /* ... */ 4 +} + +function max(x, y) +{ + return x > y ? x : y +} + +BEGIN { + FS = " *; *" + if (mapping == "lower") + field = 2 + else if (mapping == "title") + field = 3 + else if (mapping == "upper") + field = 4 +} + +$5 ~ /^(az|tr)/ && !az { next } +$5 ~ /^lt/ && !lt { next } + +/^[A-F0-9]/ { + to = 0 + from = bcnt($1) + split($field, xs, / /) + for (i in xs) + to += bcnt(xs[i]) + results[g_i++] = to / from +} + +END { + for (i = 1; i <= g_i; i++) + n = max(n, results[i]) + print n +} +' data/SpecialCasing -- cgit v1.2.3