diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-04-28 06:15:55 +0200 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-28 06:15:55 +0200 | 
| commit | 4ea2dd117e656f950c41f9954bd593c313e34ee2 (patch) | |
| tree | d239c3426240bb886f129bccf901b0fc970c6073 /gen/prop/bool-props.py | |
| parent | 50787ecf06854f99eaf4b966abd11d23554bd221 (diff) | |
Implement boolean props using 2-stage lookup
Diffstat (limited to 'gen/prop/bool-props.py')
| -rwxr-xr-x | gen/prop/bool-props.py | 148 | 
1 files changed, 148 insertions, 0 deletions
| diff --git a/gen/prop/bool-props.py b/gen/prop/bool-props.py new file mode 100755 index 0000000..a913904 --- /dev/null +++ b/gen/prop/bool-props.py @@ -0,0 +1,148 @@ +#!/usr/bin/python3 + +import functools +import math +import sys +from typing import Generator + + +def chunks[T](xs: list[T], n: int) -> Generator[list[T], None, None]: +	for i in range(0, len(xs), n): +		yield xs[i:i + n] + +def powers_of_2() -> Generator[int, None, None]: +	i = 0 +	while True: +		yield 2 ** i +		i += 1 + +def bytes_per_col(n: int) -> int: +	xs = list(set(functools.reduce(list.__add__, ( +		[i, n // i] for i in range(1, int(n ** 0.5) + 1) if n % i == 0) +	))) +	for x in sorted(xs, reverse=True): +		y = 5 +		y += x * 5 +		y += x - 1 +		if y <= 80: +			return x + +	raise ValueError + +def isize(x: int) -> int: +	if x < 256: +		return 1 +	if x < 65535: +		return 2 +	if x < 4294967295: +		return 3 +	if x < 18446744073709551615: +		return 4 +	raise ValueError + +def typename(x: int) -> str: +	if x < 256: +		return "uint8_t" +	if x < 65535: +		return "uint16_t" +	if x < 4294967295: +		return "uint32_t" +	if x < 18446744073709551615: +		return "uint64_t" +	raise ValueError + +def parse(file: str) -> list[bool]: +	xs = [False] * 0x110000 +	if sys.argv[1] == 'Indic_Conjunct_Break': +		sys.argv[1] = 'InCB;' +	with open(file, 'r') as f: +		for line in f.readlines(): +			if ( +				len(line) == 0 +				or line[0] == '#' +				or sys.argv[1] not in line +			): +				continue +			parts = [int(x, 16) for x in line.split(';')[0].strip().split('..')] +			for i in range(parts[0], parts[len(parts) - 1] + 1): +				xs[i] = True +	return xs + +def genfile(cs: list[tuple[bool, ...]], blksize: int) -> None: +	Cs = cs +	cs = list(dict.fromkeys(Cs)) + +	print('''\ +/* This file is autogenerated by gen/prop/bool-props; DO NOT EDIT. */ + +#include "bitset.h" +#include "unicode/prop.h" +''') + +	print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{') +	for i, c in enumerate(Cs): +		print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='') +		if i % 16 == 15: +			print() +	print('};') + +	print() + +	bcnt = blksize // 8 +	bpc = bytes_per_col(bcnt) +	print(f'static constexpr unsigned char stage2[][{bcnt}] = {{') +	for c in cs: +		x = sum(map(lambda x: x[1] << x[0], enumerate(c))) + +		for i in range(bcnt // bpc): +			print('\t{' if i == 0 else '\t ', end='') +			for j in range(bpc): +				print('0x%02X' % (x & 0xFF), end='') +				x >>= 8 +				if i < bcnt // bpc - 1 or j < bpc - 1: +					print(',', end='') +				if j < bpc - 1: +					print(' ', end='') +			if i < bcnt // bpc - 1: +				print() +		print('},') +	print('};') + +	print() + +	print(f'''\ +bool +uprop_is_{sys.argv[2]}(rune ch) +{{ +	return TESTBIT(stage2[stage1[ch / {blksize}]], ch % {blksize}); +}}''') + +def main() -> None: +	if len(sys.argv) != 4: +		print('Usage: bool-props.py name shortname file', file=sys.stderr) +		exit(1) + +	xs = parse(sys.argv[3]) + +	blksize = -1 +	smallest = math.inf + +	for bs in powers_of_2(): +		if bs > len(xs): +			break +		Cs = [tuple(x) for x in chunks(xs, bs)] +		cs = list(dict.fromkeys(Cs)) + +		sz_s1 = len(Cs) * isize(len(cs) - 1) +		sz_s2 = len(cs) * bs +		sz = sz_s1 + sz_s2 + +		if sz < smallest: +			smallest = sz +			blksize = bs + +	Cs = [tuple(x) for x in chunks(xs, blksize)] +	genfile(Cs, blksize) + +if __name__ == '__main__': +	main() |