aboutsummaryrefslogtreecommitdiff
path: root/gen/prop/bool-props.py
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-28 06:15:55 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-28 06:15:55 +0200
commit4ea2dd117e656f950c41f9954bd593c313e34ee2 (patch)
treed239c3426240bb886f129bccf901b0fc970c6073 /gen/prop/bool-props.py
parent50787ecf06854f99eaf4b966abd11d23554bd221 (diff)
Implement boolean props using 2-stage lookup
Diffstat (limited to 'gen/prop/bool-props.py')
-rwxr-xr-xgen/prop/bool-props.py148
1 files changed, 148 insertions, 0 deletions
diff --git a/gen/prop/bool-props.py b/gen/prop/bool-props.py
new file mode 100755
index 0000000..a913904
--- /dev/null
+++ b/gen/prop/bool-props.py
@@ -0,0 +1,148 @@
+#!/usr/bin/python3
+
+import functools
+import math
+import sys
+from typing import Generator
+
+
+def chunks[T](xs: list[T], n: int) -> Generator[list[T], None, None]:
+ for i in range(0, len(xs), n):
+ yield xs[i:i + n]
+
+def powers_of_2() -> Generator[int, None, None]:
+ i = 0
+ while True:
+ yield 2 ** i
+ i += 1
+
+def bytes_per_col(n: int) -> int:
+ xs = list(set(functools.reduce(list.__add__, (
+ [i, n // i] for i in range(1, int(n ** 0.5) + 1) if n % i == 0)
+ )))
+ for x in sorted(xs, reverse=True):
+ y = 5
+ y += x * 5
+ y += x - 1
+ if y <= 80:
+ return x
+
+ raise ValueError
+
+def isize(x: int) -> int:
+ if x < 256:
+ return 1
+ if x < 65535:
+ return 2
+ if x < 4294967295:
+ return 3
+ if x < 18446744073709551615:
+ return 4
+ raise ValueError
+
+def typename(x: int) -> str:
+ if x < 256:
+ return "uint8_t"
+ if x < 65535:
+ return "uint16_t"
+ if x < 4294967295:
+ return "uint32_t"
+ if x < 18446744073709551615:
+ return "uint64_t"
+ raise ValueError
+
+def parse(file: str) -> list[bool]:
+ xs = [False] * 0x110000
+ if sys.argv[1] == 'Indic_Conjunct_Break':
+ sys.argv[1] = 'InCB;'
+ with open(file, 'r') as f:
+ for line in f.readlines():
+ if (
+ len(line) == 0
+ or line[0] == '#'
+ or sys.argv[1] not in line
+ ):
+ continue
+ parts = [int(x, 16) for x in line.split(';')[0].strip().split('..')]
+ for i in range(parts[0], parts[len(parts) - 1] + 1):
+ xs[i] = True
+ return xs
+
+def genfile(cs: list[tuple[bool, ...]], blksize: int) -> None:
+ Cs = cs
+ cs = list(dict.fromkeys(Cs))
+
+ print('''\
+/* This file is autogenerated by gen/prop/bool-props; DO NOT EDIT. */
+
+#include "bitset.h"
+#include "unicode/prop.h"
+''')
+
+ print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{')
+ for i, c in enumerate(Cs):
+ print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='')
+ if i % 16 == 15:
+ print()
+ print('};')
+
+ print()
+
+ bcnt = blksize // 8
+ bpc = bytes_per_col(bcnt)
+ print(f'static constexpr unsigned char stage2[][{bcnt}] = {{')
+ for c in cs:
+ x = sum(map(lambda x: x[1] << x[0], enumerate(c)))
+
+ for i in range(bcnt // bpc):
+ print('\t{' if i == 0 else '\t ', end='')
+ for j in range(bpc):
+ print('0x%02X' % (x & 0xFF), end='')
+ x >>= 8
+ if i < bcnt // bpc - 1 or j < bpc - 1:
+ print(',', end='')
+ if j < bpc - 1:
+ print(' ', end='')
+ if i < bcnt // bpc - 1:
+ print()
+ print('},')
+ print('};')
+
+ print()
+
+ print(f'''\
+bool
+uprop_is_{sys.argv[2]}(rune ch)
+{{
+ return TESTBIT(stage2[stage1[ch / {blksize}]], ch % {blksize});
+}}''')
+
+def main() -> None:
+ if len(sys.argv) != 4:
+ print('Usage: bool-props.py name shortname file', file=sys.stderr)
+ exit(1)
+
+ xs = parse(sys.argv[3])
+
+ blksize = -1
+ smallest = math.inf
+
+ for bs in powers_of_2():
+ if bs > len(xs):
+ break
+ Cs = [tuple(x) for x in chunks(xs, bs)]
+ cs = list(dict.fromkeys(Cs))
+
+ sz_s1 = len(Cs) * isize(len(cs) - 1)
+ sz_s2 = len(cs) * bs
+ sz = sz_s1 + sz_s2
+
+ if sz < smallest:
+ smallest = sz
+ blksize = bs
+
+ Cs = [tuple(x) for x in chunks(xs, blksize)]
+ genfile(Cs, blksize)
+
+if __name__ == '__main__':
+ main()