aboutsummaryrefslogtreecommitdiff
path: root/gen
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-14 20:53:13 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-14 20:53:22 +0200
commita39a4797730a09ec4fbb41c11b7dc1f7d245bc15 (patch)
tree13f2a025d52535d66cc8e1c4be0aa502ffaeab7b /gen
parent4e88af1babd6555c389b1e14316c29b78146f8f0 (diff)
Implement NFD string normalization
Diffstat (limited to 'gen')
-rwxr-xr-xgen/string/scale-norm.c77
1 files changed, 77 insertions, 0 deletions
diff --git a/gen/string/scale-norm.c b/gen/string/scale-norm.c
new file mode 100755
index 0000000..2b926fa
--- /dev/null
+++ b/gen/string/scale-norm.c
@@ -0,0 +1,77 @@
+#if 0
+cd "${0%/*}/../.."
+trap 'rm -f /tmp/scale-norm' EXIT
+cc -Iinclude -std=c23 -Wno-attributes -fsanitize=address,undefined \
+ -o /tmp/scale-norm gen/string/scale-norm.c libmlib.a
+/tmp/scale-norm
+exit 0
+#endif
+
+#include <stdio.h>
+
+#include <macros.h>
+#include <rune.h>
+#include <unicode/prop.h>
+
+static double scale(rune ch);
+static int bcnt(rune ch);
+static int hscnt(rune ch);
+static int scale2(rune ch);
+
+int
+main(void)
+{
+ double maxscale = 1;
+ for (rune ch = 0; ch <= RUNE_MAX; ch++) {
+ double n = scale(ch);
+ maxscale = MAX(n, maxscale);
+ }
+ printf("NFD\t%g\n", maxscale);
+}
+
+double
+scale(rune ch)
+{
+ int old, new;
+ old = bcnt(ch);
+ new = scale2(ch);
+ return (double)new / (double)old;
+}
+
+int
+scale2(rune ch)
+{
+ if (uprop_get_hst(ch) != HST_NA)
+ return hscnt(ch);
+ if (uprop_get_dt(ch) != DT_CAN)
+ return bcnt(ch);
+ int acc = 0;
+ struct rview rv = uprop_get_dm(ch);
+ for (size_t i = 0; i < rv.len; i++)
+ acc += scale2(rv.p[i]);
+ return acc;
+}
+
+int
+bcnt(rune ch)
+{
+ return ch < 0x80 ? 1 : ch < 0x800 ? 2 : ch < 0x10'000 ? 3 : 4;
+}
+
+int
+hscnt(rune s)
+{
+ const rune sbase = 0xAC00, lbase = 0x1100, vbase = 0x1161, tbase = 0x11A7;
+ const int lcnt = 19, vcnt = 21, tcnt = 28, ncnt = 588, scnt = 11172;
+
+ int sidx = s - sbase;
+ if (sidx < 0 || sidx > scnt)
+ return bcnt(s);
+ rune l = lbase + sidx / ncnt;
+ rune v = vbase + (sidx % ncnt) / tcnt;
+ rune t = tbase + sidx % tcnt;
+ int acc = bcnt(l) + bcnt(v);
+ if (t != tbase)
+ acc += bcnt(t);
+ return acc;
+}