#!/usr/bin/python3 import functools import math import sys from typing import Generator def chunks[T](xs: list[T], n: int) -> Generator[list[T], None, None]: for i in range(0, len(xs), n): yield xs[i:i + n] def powers_of_2() -> Generator[int, None, None]: i = 0 while True: yield 2 ** i i += 1 def bytes_per_col(n: int) -> int: xs = list(set(functools.reduce(list.__add__, ( [i, n // i] for i in range(1, int(n ** 0.5) + 1) if n % i == 0) ))) for x in sorted(xs, reverse=True): y = 5 y += x * 5 y += x - 1 if y <= 80: return x raise ValueError def isize(x: int) -> int: if x < 256: return 1 if x < 65535: return 2 if x < 4294967295: return 3 if x < 18446744073709551615: return 4 raise ValueError def typename(x: int) -> str: if x < 256: return "uint8_t" if x < 65535: return "uint16_t" if x < 4294967295: return "uint32_t" if x < 18446744073709551615: return "uint64_t" raise ValueError def parse(file: str) -> list[bool]: xs = [False] * 0x110000 if sys.argv[1] == 'Indic_Conjunct_Break': sys.argv[1] = 'InCB;' with open(file, 'r') as f: for line in f.readlines(): if ( len(line) == 0 or line[0] == '#' or sys.argv[1] not in line ): continue parts = [int(x, 16) for x in line.split(';')[0].strip().split('..')] for i in range(parts[0], parts[len(parts) - 1] + 1): xs[i] = True return xs def genfile(cs: list[tuple[bool, ...]], blksize: int) -> None: Cs = cs cs = list(dict.fromkeys(Cs)) print('''\ /* This file is autogenerated by gen/prop/bool-props; DO NOT EDIT. */ #include "bitset.h" #include "unicode/prop.h" ''') print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{') for i, c in enumerate(Cs): print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='') if i % 16 == 15: print() print('};') print() bcnt = blksize // 8 bpc = bytes_per_col(bcnt) print(f'static constexpr unsigned char stage2[][{bcnt}] = {{') for c in cs: x = sum(map(lambda x: x[1] << x[0], enumerate(c))) for i in range(bcnt // bpc): print('\t{' if i == 0 else '\t ', end='') for j in range(bpc): print('0x%02X' % (x & 0xFF), end='') x >>= 8 if i < bcnt // bpc - 1 or j < bpc - 1: print(',', end='') if j < bpc - 1: print(' ', end='') if i < bcnt // bpc - 1: print() print('},') print('};') print() print(f'''\ bool uprop_is_{sys.argv[2]}(rune ch) {{ return TESTBIT(stage2[stage1[ch / {blksize}]], ch % {blksize}); }}''') def main() -> None: if len(sys.argv) != 4: print('Usage: bool-props.py name shortname file', file=sys.stderr) exit(1) xs = parse(sys.argv[3]) blksize = -1 smallest = math.inf for bs in powers_of_2(): if bs > len(xs): break Cs = [tuple(x) for x in chunks(xs, bs)] cs = list(dict.fromkeys(Cs)) sz_s1 = len(Cs) * isize(len(cs) - 1) sz_s2 = len(cs) * bs sz = sz_s1 + sz_s2 if sz < smallest: smallest = sz blksize = bs Cs = [tuple(x) for x in chunks(xs, blksize)] genfile(Cs, blksize) if __name__ == '__main__': main()