aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xgen/string/scale55
-rw-r--r--include/unicode/string.h5
2 files changed, 60 insertions, 0 deletions
diff --git a/gen/string/scale b/gen/string/scale
new file mode 100755
index 0000000..0455447
--- /dev/null
+++ b/gen/string/scale
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+# Usage: scale -v utf=X -v mapping=X [-v az=X] [-v lt=X]
+# Example: scale -v utf=8 -v mapping=title -v lt=1
+
+set -e
+cd "${0%/*}/../.."
+
+gawk "$@" '
+function bcnt(x)
+{
+ x = strtonum("0X" x)
+ if (utf == 32)
+ return 4
+ if (utf == 16)
+ return x < 0x10000 ? 2 : 4
+ return x < 0x00080 ? 1 \
+ : x < 0x00800 ? 2 \
+ : x < 0x10000 ? 3 \
+ : /* ... */ 4
+}
+
+function max(x, y)
+{
+ return x > y ? x : y
+}
+
+BEGIN {
+ FS = " *; *"
+ if (mapping == "lower")
+ field = 2
+ else if (mapping == "title")
+ field = 3
+ else if (mapping == "upper")
+ field = 4
+}
+
+$5 ~ /^(az|tr)/ && !az { next }
+$5 ~ /^lt/ && !lt { next }
+
+/^[A-F0-9]/ {
+ to = 0
+ from = bcnt($1)
+ split($field, xs, / /)
+ for (i in xs)
+ to += bcnt(xs[i])
+ results[g_i++] = to / from
+}
+
+END {
+ for (i = 1; i <= g_i; i++)
+ n = max(n, results[i])
+ print n
+}
+' data/SpecialCasing
diff --git a/include/unicode/string.h b/include/unicode/string.h
index 6363d9d..0cee934 100644
--- a/include/unicode/string.h
+++ b/include/unicode/string.h
@@ -29,4 +29,9 @@ size_t u8gnext(struct u8view *, const char8_t **, size_t *);
size_t, enum caseflags);
#undef mlib_warn_trunc
+constexpr double U8LOWER_SCALE = 1.5;
+constexpr double U8LOWER_SCALE_LT = 3;
+constexpr double U8TITLE_SCALE = 3;
+constexpr double U8UPPER_SCALE = 3;
+
#endif /* !MLIB_UNICODE_STRING_H */