#!/usr/bin/python3 import math from lib import * longest = 0 TYPES = ['c', 'd', 'kc', 'kd'] def parse(file: str, _type: str) -> list[bool]: global longest _type = _type.upper() xs = [f'NF{_type}_QC_Y'] * 0x110000 with open(file, 'r') as f: for line in f.readlines(): if ( len(line.strip()) == 0 or line[0] == '#' or f'NF{_type}_QC' not in line ): continue parts = line.split(';') ranges = [int(x, 16) for x in parts[0].strip().split('..')] prop = f'NF{_type}_QC_' + parts[2].split('#')[0].strip() longest = max(longest, len(prop)) for i in range(ranges[0], ranges[len(ranges) - 1] + 1): xs[i] = prop return xs def genfile(cs: list[tuple[bool, ...]], blksize: int, _type: str) -> None: Cs = cs cs = list(dict.fromkeys(Cs)) print('''\ /* This file is autogenerated by gen/prop/nfXX_qc; DO NOT EDIT. */ #include "unicode/prop.h" ''') print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{') for i, c in enumerate(Cs): print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='') if i % 16 == 15: print() print('};') print() ppc = columns(blksize, longest + 1) print(f'static constexpr enum uprop_nf{_type}_qc stage2[][{blksize}] = {{') for c in cs: for i in range(blksize // ppc): print('\t{' if i == 0 else '\t ', end='') for j in range(ppc): print(c[i*ppc + j], end='') if i < blksize // ppc - 1 or j < ppc - 1: print(',', end='') if j < ppc - 1: print(' ' * (longest + 1 - len(c[i*ppc + j])), end='') if i < blksize // ppc - 1: print() print('},') print('};') print() print(f'''\ enum uprop_nf{_type}_qc uprop_get_nf{_type}_qc(rune ch) {{ return stage2[stage1[ch / {blksize}]][ch % {blksize}]; }}''') def main(_type: str) -> None: cwd_init() xs = parse('data/DerivedNormalizationProps', _type) blksize = -1 smallest = math.inf for bs in powers_of_2(): if bs > len(xs): break Cs = [tuple(x) for x in chunks(xs, bs)] cs = set(Cs) sz_s1 = len(Cs) * isize(len(cs) - 1) sz_s2 = len(cs) * bs sz = sz_s1 + sz_s2 if sz < smallest: smallest = sz blksize = bs Cs = [tuple(x) for x in chunks(xs, blksize)] with open(f'lib/unicode/prop/uprop_get_nf{_type}_qc.c', 'w') as f: sys.stdout = f genfile(Cs, blksize, _type) report_size(len(xs), smallest) if __name__ == '__main__': for _type in TYPES: longest = 0 main(_type)