1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
#!/usr/bin/python3
import math
from lib import *
longest = 0
def parse(file: str) -> list[bool]:
global longest
xs = ['{}'] * 0x110000
with open(file, 'r') as f:
for line in f.readlines():
if len(line.strip()) == 0 or line[0] == '#':
continue
parts = line.split(';')
ranges = [int(x, 16) for x in parts[0].strip().split('..')]
scs = [
f'SC_{x}' for x in (
parts[1]
.split('#')[0]
.strip()
.upper()
.split()
)
]
prop = f'_({', '.join(scs)})'
longest = max(longest, len(prop))
for i in range(ranges[0], ranges[len(ranges) - 1] + 1):
xs[i] = prop
return xs
def genfile(cs: list[tuple[bool, ...]], blksize: int) -> None:
Cs = cs
cs = list(dict.fromkeys(Cs))
print('''\
/* This file is autogenerated by gen/prop/scx; DO NOT EDIT. */
#include <stdint.h>
#include "macros.h"
#include "unicode/prop.h"
#define CAST(...) (const enum uprop_sc []){__VA_ARGS__}
#define _(...) {CAST(__VA_ARGS__), lengthof(CAST(__VA_ARGS__))}
static constexpr enum uprop_sc fallback[] = {
SC_ZZZZ, SC_ADLM, SC_AGHB, SC_AHOM, SC_ARAB, SC_ARMI, SC_ARMN, SC_AVST,
SC_BALI, SC_BAMU, SC_BASS, SC_BATK, SC_BENG, SC_BHKS, SC_BOPO, SC_BRAH,
SC_BRAI, SC_BUGI, SC_BUHD, SC_CAKM, SC_CANS, SC_CARI, SC_CHAM, SC_CHER,
SC_CHRS, SC_COPT, SC_CPMN, SC_CPRT, SC_CYRL, SC_DEVA, SC_DIAK, SC_DOGR,
SC_DSRT, SC_DUPL, SC_EGYP, SC_ELBA, SC_ELYM, SC_ETHI, SC_GEOR, SC_GLAG,
SC_GONG, SC_GONM, SC_GOTH, SC_GRAN, SC_GREK, SC_GUJR, SC_GURU, SC_HANG,
SC_HANI, SC_HANO, SC_HATR, SC_HEBR, SC_HIRA, SC_HLUW, SC_HMNG, SC_HMNP,
SC_HRKT, SC_HUNG, SC_ITAL, SC_JAVA, SC_KALI, SC_KANA, SC_KAWI, SC_KHAR,
SC_KHMR, SC_KHOJ, SC_KITS, SC_KNDA, SC_KTHI, SC_LANA, SC_LAOO, SC_LATN,
SC_LEPC, SC_LIMB, SC_LINA, SC_LINB, SC_LISU, SC_LYCI, SC_LYDI, SC_MAHJ,
SC_MAKA, SC_MAND, SC_MANI, SC_MARC, SC_MEDF, SC_MEND, SC_MERC, SC_MERO,
SC_MLYM, SC_MODI, SC_MONG, SC_MROO, SC_MTEI, SC_MULT, SC_MYMR, SC_NAGM,
SC_NAND, SC_NARB, SC_NBAT, SC_NEWA, SC_NKOO, SC_NSHU, SC_OGAM, SC_OLCK,
SC_ORKH, SC_ORYA, SC_OSGE, SC_OSMA, SC_OUGR, SC_PALM, SC_PAUC, SC_PERM,
SC_PHAG, SC_PHLI, SC_PHLP, SC_PHNX, SC_PLRD, SC_PRTI, SC_RJNG, SC_ROHG,
SC_RUNR, SC_SAMR, SC_SARB, SC_SAUR, SC_SGNW, SC_SHAW, SC_SHRD, SC_SIDD,
SC_SIND, SC_SINH, SC_SOGD, SC_SOGO, SC_SORA, SC_SOYO, SC_SUND, SC_SYLO,
SC_SYRC, SC_TAGB, SC_TAKR, SC_TALE, SC_TALU, SC_TAML, SC_TANG, SC_TAVT,
SC_TELU, SC_TFNG, SC_TGLG, SC_THAA, SC_THAI, SC_TIBT, SC_TIRH, SC_TNSA,
SC_TOTO, SC_UGAR, SC_VAII, SC_VITH, SC_WARA, SC_WCHO, SC_XPEO, SC_XSUX,
SC_YEZI, SC_YIII, SC_ZANB, SC_ZINH, SC_ZYYY,
};
''')
print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{')
for i, c in enumerate(Cs):
print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='')
if i % 16 == 15:
print()
print('};')
print()
ppc = columns(blksize, longest + 1)
print(f'static const struct uprop_sc_view stage2[][{blksize}] = {{')
for c in cs:
for i in range(blksize // ppc):
print('\t{' if i == 0 else '\t ', end='')
for j in range(ppc):
print(c[i*ppc + j], end='')
if i < blksize // ppc - 1 or j < ppc - 1:
print(',', end='')
if j < ppc - 1:
print(' ' * (longest + 1 - len(c[i*ppc + j])), end='')
if i < blksize // ppc - 1:
print()
print('},')
print('};')
print()
print(f'''\
struct uprop_sc_view
uprop_get_scx(rune ch)
{{
struct uprop_sc_view scv = stage2[stage1[ch / {blksize}]][ch % {blksize}];
return scv.p == nullptr
? (struct uprop_sc_view){{fallback + uprop_get_sc(ch), 1}}
: scv;
}}''')
def main() -> None:
cwd_init()
xs = parse('data/ScriptExtensions')
blksize = -1
smallest = math.inf
for bs in powers_of_2():
if bs > len(xs):
break
Cs = [tuple(x) for x in chunks(xs, bs)]
cs = set(Cs)
sz_s1 = len(Cs) * isize(len(cs) - 1)
sz_s2 = len(cs) * bs
sz = sz_s1 + sz_s2
if sz < smallest:
smallest = sz
blksize = bs
Cs = [tuple(x) for x in chunks(xs, blksize)]
with open('lib/unicode/prop/uprop_get_scx.c', 'w') as f:
sys.stdout = f
genfile(Cs, blksize)
report_size(len(xs), smallest)
if __name__ == '__main__':
main()
|