diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-05-14 17:48:03 +0200 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-05-14 17:48:03 +0200 | 
| commit | 334ce7347e9217463ac13edaf8e1480cf08c9fcd (patch) | |
| tree | c02c1460b40ca6e0303f6e6cef2d8ab697e80e60 /gen | |
| parent | 702ca783218c22e223a64d20baf81de79a489df8 (diff) | |
Add uprop_get_dm()
Diffstat (limited to 'gen')
| -rwxr-xr-x | gen/prop/dm | 115 | 
1 files changed, 115 insertions, 0 deletions
diff --git a/gen/prop/dm b/gen/prop/dm new file mode 100755 index 0000000..5d2d98e --- /dev/null +++ b/gen/prop/dm @@ -0,0 +1,115 @@ +#!/usr/bin/python3 + +import math + +from lib import * + + +longest = 0 + +def parse(file: str) -> list[str]: +	global longest + +	xs = ['{}'] * 0x110000 +	with open(file, 'r') as f: +		for line in f.readlines(): +			if len(line.strip()) == 0 or line[0] == '#': +				continue + +			parts = line.split(';') +			if parts[5] == '': +				continue +			n = int(parts[0], 16) +			xs[n] = ( +				'_(' +				+ ', '.join(f'0x{x}' for x in parts[5].split() if '<' not in x) +				+ ')' +			) +			longest = max(longest, len(xs[n])) +	return xs + +def genfile(cs: list[tuple[bool, ...]], blksize: int) -> None: +	Cs = cs +	cs = list(dict.fromkeys(Cs)) + +	print('''\ +/* This file is autogenerated by gen/prop/dm; DO NOT EDIT. */ + +#include <stdint.h> + +#include "macros.h" +#include "unicode/prop.h" + +#define _(...) \\ +	{(const rune []){__VA_ARGS__}, lengthof(((const rune []){__VA_ARGS__}))} +''') + +	print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{') +	for i, c in enumerate(Cs): +		print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='') +		if i % 16 == 15: +			print() +	print('};') + +	print() + +	ppc = columns(blksize, longest + 1) +	print(f'static const struct rview stage2[][{blksize}] = {{') +	for c in cs: +		for i in range(blksize // ppc): +			print('\t{' if i == 0 else '\t ', end='') +			for j in range(ppc): +				print(c[i*ppc + j], end='') +				if i < blksize // ppc - 1 or j < ppc - 1: +					print(',', end='') +				if j < ppc - 1: +					print(' ' * (longest + 1 - len(c[i*ppc + j])), end='') +			if i < blksize // ppc - 1: +				print() +		print('},') +	print('};') + +	print() + +	print(f'''\ +struct rview +uprop_get_dm(rune ch) +{{ +	static thread_local rune hack; +	struct rview rv =  stage2[stage1[ch / {blksize}]][ch % {blksize}]; +	if (rv.p != nullptr) +		return rv; +	hack = ch; +	return (struct rview){{&hack, 1}}; +}}''') + +def main() -> None: +	cwd_init() +	xs = parse('data/UnicodeData') + +	blksize = -1 +	smallest = math.inf + +	for bs in powers_of_2(): +		if bs > len(xs): +			break +		Cs = [tuple(x) for x in chunks(xs, bs)] +		cs = set(Cs) + +		sz_s1 = len(Cs) * isize(len(cs) - 1) +		sz_s2 = len(cs) * bs * 16  # (rune *) + size_t +		sz = sz_s1 + sz_s2 + +		if sz < smallest: +			smallest = sz +			blksize = bs + +	Cs = [tuple(x) for x in chunks(xs, blksize)] +	with open('lib/unicode/prop/uprop_get_dm.c', 'w') as f: +		sys.stdout = f +		genfile(Cs, blksize) + +	report_size(len(xs), smallest) + +if __name__ == '__main__': +	main()  |