diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-18 12:04:03 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-18 12:04:03 +0200 |
commit | db816ef1e678e20d91d1b5308b2d7a182a3f114c (patch) | |
tree | ab30b8deca32f2f1d9a4b238235b45fa99c092fc /gen/string | |
parent | 1616feb4901579da80452c95c6e0f732b945c7d5 (diff) |
Add the U8*_SCALE constants
Diffstat (limited to 'gen/string')
-rwxr-xr-x | gen/string/scale | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/gen/string/scale b/gen/string/scale new file mode 100755 index 0000000..0455447 --- /dev/null +++ b/gen/string/scale @@ -0,0 +1,55 @@ +#!/bin/sh + +# Usage: scale -v utf=X -v mapping=X [-v az=X] [-v lt=X] +# Example: scale -v utf=8 -v mapping=title -v lt=1 + +set -e +cd "${0%/*}/../.." + +gawk "$@" ' +function bcnt(x) +{ + x = strtonum("0X" x) + if (utf == 32) + return 4 + if (utf == 16) + return x < 0x10000 ? 2 : 4 + return x < 0x00080 ? 1 \ + : x < 0x00800 ? 2 \ + : x < 0x10000 ? 3 \ + : /* ... */ 4 +} + +function max(x, y) +{ + return x > y ? x : y +} + +BEGIN { + FS = " *; *" + if (mapping == "lower") + field = 2 + else if (mapping == "title") + field = 3 + else if (mapping == "upper") + field = 4 +} + +$5 ~ /^(az|tr)/ && !az { next } +$5 ~ /^lt/ && !lt { next } + +/^[A-F0-9]/ { + to = 0 + from = bcnt($1) + split($field, xs, / /) + for (i in xs) + to += bcnt(xs[i]) + results[g_i++] = to / from +} + +END { + for (i = 1; i <= g_i; i++) + n = max(n, results[i]) + print n +} +' data/SpecialCasing |