aboutsummaryrefslogtreecommitdiff
path: root/gen/prop/scx
blob: 0d6664ec285ce41c7dd9d946f7df50bfd933c988 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/python3

import math

from lib import *


longest = 0

def parse(file: str) -> list[bool]:
	global longest

	xs = ['{}'] * 0x110000
	with open(file, 'r') as f:
		for line in f.readlines():
			if len(line.strip()) == 0 or line[0] == '#':
				continue

			parts = line.split(';')
			ranges = [int(x, 16) for x in parts[0].strip().split('..')]
			scs = [
				f'SC_{x}' for x in (
					parts[1]
						.split('#')[0]
						.strip()
						.upper()
						.split()
				)
			]
			prop = f'_({', '.join(scs)})'
			longest = max(longest, len(prop))

			for i in range(ranges[0], ranges[len(ranges) - 1] + 1):
				xs[i] = prop
	return xs

def genfile(cs: list[tuple[bool, ...]], blksize: int) -> None:
	Cs = cs
	cs = list(dict.fromkeys(Cs))

	print('''\
/* This file is autogenerated by gen/prop/scx; DO NOT EDIT. */

#include <stdint.h>

#include "macros.h"
#include "unicode/prop.h"

#define CAST(...) (const enum uprop_sc []){__VA_ARGS__}
#define _(...)    {CAST(__VA_ARGS__), lengthof(CAST(__VA_ARGS__))}

static constexpr enum uprop_sc fallback[] = {
	SC_ZZZZ, SC_ADLM, SC_AGHB, SC_AHOM, SC_ARAB, SC_ARMI, SC_ARMN, SC_AVST,
	SC_BALI, SC_BAMU, SC_BASS, SC_BATK, SC_BENG, SC_BHKS, SC_BOPO, SC_BRAH,
	SC_BRAI, SC_BUGI, SC_BUHD, SC_CAKM, SC_CANS, SC_CARI, SC_CHAM, SC_CHER,
	SC_CHRS, SC_COPT, SC_CPMN, SC_CPRT, SC_CYRL, SC_DEVA, SC_DIAK, SC_DOGR,
	SC_DSRT, SC_DUPL, SC_EGYP, SC_ELBA, SC_ELYM, SC_ETHI, SC_GEOR, SC_GLAG,
	SC_GONG, SC_GONM, SC_GOTH, SC_GRAN, SC_GREK, SC_GUJR, SC_GURU, SC_HANG,
	SC_HANI, SC_HANO, SC_HATR, SC_HEBR, SC_HIRA, SC_HLUW, SC_HMNG, SC_HMNP,
	SC_HRKT, SC_HUNG, SC_ITAL, SC_JAVA, SC_KALI, SC_KANA, SC_KAWI, SC_KHAR,
	SC_KHMR, SC_KHOJ, SC_KITS, SC_KNDA, SC_KTHI, SC_LANA, SC_LAOO, SC_LATN,
	SC_LEPC, SC_LIMB, SC_LINA, SC_LINB, SC_LISU, SC_LYCI, SC_LYDI, SC_MAHJ,
	SC_MAKA, SC_MAND, SC_MANI, SC_MARC, SC_MEDF, SC_MEND, SC_MERC, SC_MERO,
	SC_MLYM, SC_MODI, SC_MONG, SC_MROO, SC_MTEI, SC_MULT, SC_MYMR, SC_NAGM,
	SC_NAND, SC_NARB, SC_NBAT, SC_NEWA, SC_NKOO, SC_NSHU, SC_OGAM, SC_OLCK,
	SC_ORKH, SC_ORYA, SC_OSGE, SC_OSMA, SC_OUGR, SC_PALM, SC_PAUC, SC_PERM,
	SC_PHAG, SC_PHLI, SC_PHLP, SC_PHNX, SC_PLRD, SC_PRTI, SC_RJNG, SC_ROHG,
	SC_RUNR, SC_SAMR, SC_SARB, SC_SAUR, SC_SGNW, SC_SHAW, SC_SHRD, SC_SIDD,
	SC_SIND, SC_SINH, SC_SOGD, SC_SOGO, SC_SORA, SC_SOYO, SC_SUND, SC_SYLO,
	SC_SYRC, SC_TAGB, SC_TAKR, SC_TALE, SC_TALU, SC_TAML, SC_TANG, SC_TAVT,
	SC_TELU, SC_TFNG, SC_TGLG, SC_THAA, SC_THAI, SC_TIBT, SC_TIRH, SC_TNSA,
	SC_TOTO, SC_UGAR, SC_VAII, SC_VITH, SC_WARA, SC_WCHO, SC_XPEO, SC_XSUX,
	SC_YEZI, SC_YIII, SC_ZANB, SC_ZINH, SC_ZYYY,
};
''')

	print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{')
	for i, c in enumerate(Cs):
		print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='')
		if i % 16 == 15:
			print()
	print('};')

	print()

	ppc = columns(blksize, longest + 1)
	print(f'static const struct uprop_sc_view stage2[][{blksize}] = {{')
	for c in cs:
		for i in range(blksize // ppc):
			print('\t{' if i == 0 else '\t ', end='')
			for j in range(ppc):
				print(c[i*ppc + j], end='')
				if i < blksize // ppc - 1 or j < ppc - 1:
					print(',', end='')
				if j < ppc - 1:
					print(' ' * (longest + 1 - len(c[i*ppc + j])), end='')
			if i < blksize // ppc - 1:
				print()
		print('},')
	print('};')

	print()

	print(f'''\
struct uprop_sc_view
uprop_get_scx(rune ch)
{{
	struct uprop_sc_view scv = stage2[stage1[ch / {blksize}]][ch % {blksize}];
	return scv.p == nullptr
		? (struct uprop_sc_view){{fallback + uprop_get_sc(ch), 1}}
		: scv;
}}''')

def main() -> None:
	cwd_init()
	xs = parse('data/ScriptExtensions')

	blksize = -1
	smallest = math.inf

	for bs in powers_of_2():
		if bs > len(xs):
			break
		Cs = [tuple(x) for x in chunks(xs, bs)]
		cs = set(Cs)

		sz_s1 = len(Cs) * isize(len(cs) - 1)
		sz_s2 = len(cs) * bs
		sz = sz_s1 + sz_s2

		if sz < smallest:
			smallest = sz
			blksize = bs

	Cs = [tuple(x) for x in chunks(xs, blksize)]
	with open('lib/unicode/prop/uprop_get_scx.c', 'w') as f:
		sys.stdout = f
		genfile(Cs, blksize)

	report_size(len(xs), smallest)

if __name__ == '__main__':
	main()