diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-05-14 17:48:03 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-05-14 17:48:03 +0200 |
commit | 334ce7347e9217463ac13edaf8e1480cf08c9fcd (patch) | |
tree | c02c1460b40ca6e0303f6e6cef2d8ab697e80e60 /gen | |
parent | 702ca783218c22e223a64d20baf81de79a489df8 (diff) |
Add uprop_get_dm()
Diffstat (limited to 'gen')
-rwxr-xr-x | gen/prop/dm | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/gen/prop/dm b/gen/prop/dm new file mode 100755 index 0000000..5d2d98e --- /dev/null +++ b/gen/prop/dm @@ -0,0 +1,115 @@ +#!/usr/bin/python3 + +import math + +from lib import * + + +longest = 0 + +def parse(file: str) -> list[str]: + global longest + + xs = ['{}'] * 0x110000 + with open(file, 'r') as f: + for line in f.readlines(): + if len(line.strip()) == 0 or line[0] == '#': + continue + + parts = line.split(';') + if parts[5] == '': + continue + n = int(parts[0], 16) + xs[n] = ( + '_(' + + ', '.join(f'0x{x}' for x in parts[5].split() if '<' not in x) + + ')' + ) + longest = max(longest, len(xs[n])) + return xs + +def genfile(cs: list[tuple[bool, ...]], blksize: int) -> None: + Cs = cs + cs = list(dict.fromkeys(Cs)) + + print('''\ +/* This file is autogenerated by gen/prop/dm; DO NOT EDIT. */ + +#include <stdint.h> + +#include "macros.h" +#include "unicode/prop.h" + +#define _(...) \\ + {(const rune []){__VA_ARGS__}, lengthof(((const rune []){__VA_ARGS__}))} +''') + + print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{') + for i, c in enumerate(Cs): + print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='') + if i % 16 == 15: + print() + print('};') + + print() + + ppc = columns(blksize, longest + 1) + print(f'static const struct rview stage2[][{blksize}] = {{') + for c in cs: + for i in range(blksize // ppc): + print('\t{' if i == 0 else '\t ', end='') + for j in range(ppc): + print(c[i*ppc + j], end='') + if i < blksize // ppc - 1 or j < ppc - 1: + print(',', end='') + if j < ppc - 1: + print(' ' * (longest + 1 - len(c[i*ppc + j])), end='') + if i < blksize // ppc - 1: + print() + print('},') + print('};') + + print() + + print(f'''\ +struct rview +uprop_get_dm(rune ch) +{{ + static thread_local rune hack; + struct rview rv = stage2[stage1[ch / {blksize}]][ch % {blksize}]; + if (rv.p != nullptr) + return rv; + hack = ch; + return (struct rview){{&hack, 1}}; +}}''') + +def main() -> None: + cwd_init() + xs = parse('data/UnicodeData') + + blksize = -1 + smallest = math.inf + + for bs in powers_of_2(): + if bs > len(xs): + break + Cs = [tuple(x) for x in chunks(xs, bs)] + cs = set(Cs) + + sz_s1 = len(Cs) * isize(len(cs) - 1) + sz_s2 = len(cs) * bs * 16 # (rune *) + size_t + sz = sz_s1 + sz_s2 + + if sz < smallest: + smallest = sz + blksize = bs + + Cs = [tuple(x) for x in chunks(xs, blksize)] + with open('lib/unicode/prop/uprop_get_dm.c', 'w') as f: + sys.stdout = f + genfile(Cs, blksize) + + report_size(len(xs), smallest) + +if __name__ == '__main__': + main() |