#!/usr/bin/python3

import math

from lib import *


longest = 0

def parse(file: str) -> list[bool]:
	global longest

	xs = ['{}'] * 0x110000
	with open(file, 'r') as f:
		for line in f.readlines():
			if len(line.strip()) == 0 or line[0] == '#':
				continue

			parts = line.split(';')
			ranges = [int(x, 16) for x in parts[0].strip().split('..')]
			scs = [
				f'SC_{x}' for x in (
					parts[1]
						.split('#')[0]
						.strip()
						.upper()
						.split()
				)
			]
			prop = f'_({', '.join(scs)})'
			longest = max(longest, len(prop))

			for i in range(ranges[0], ranges[len(ranges) - 1] + 1):
				xs[i] = prop
	return xs

def genfile(cs: list[tuple[bool, ...]], blksize: int) -> None:
	Cs = cs
	cs = list(dict.fromkeys(Cs))

	print('''\
/* This file is autogenerated by gen/prop/scx; DO NOT EDIT. */

#include <stdint.h>

#include "macros.h"
#include "unicode/prop.h"

#define CAST(...) (const enum uprop_sc []){__VA_ARGS__}
#define _(...)    {CAST(__VA_ARGS__), lengthof(CAST(__VA_ARGS__))}

static constexpr enum uprop_sc fallback[] = {
	SC_ZZZZ, SC_ADLM, SC_AGHB, SC_AHOM, SC_ARAB, SC_ARMI, SC_ARMN, SC_AVST,
	SC_BALI, SC_BAMU, SC_BASS, SC_BATK, SC_BENG, SC_BHKS, SC_BOPO, SC_BRAH,
	SC_BRAI, SC_BUGI, SC_BUHD, SC_CAKM, SC_CANS, SC_CARI, SC_CHAM, SC_CHER,
	SC_CHRS, SC_COPT, SC_CPMN, SC_CPRT, SC_CYRL, SC_DEVA, SC_DIAK, SC_DOGR,
	SC_DSRT, SC_DUPL, SC_EGYP, SC_ELBA, SC_ELYM, SC_ETHI, SC_GEOR, SC_GLAG,
	SC_GONG, SC_GONM, SC_GOTH, SC_GRAN, SC_GREK, SC_GUJR, SC_GURU, SC_HANG,
	SC_HANI, SC_HANO, SC_HATR, SC_HEBR, SC_HIRA, SC_HLUW, SC_HMNG, SC_HMNP,
	SC_HRKT, SC_HUNG, SC_ITAL, SC_JAVA, SC_KALI, SC_KANA, SC_KAWI, SC_KHAR,
	SC_KHMR, SC_KHOJ, SC_KITS, SC_KNDA, SC_KTHI, SC_LANA, SC_LAOO, SC_LATN,
	SC_LEPC, SC_LIMB, SC_LINA, SC_LINB, SC_LISU, SC_LYCI, SC_LYDI, SC_MAHJ,
	SC_MAKA, SC_MAND, SC_MANI, SC_MARC, SC_MEDF, SC_MEND, SC_MERC, SC_MERO,
	SC_MLYM, SC_MODI, SC_MONG, SC_MROO, SC_MTEI, SC_MULT, SC_MYMR, SC_NAGM,
	SC_NAND, SC_NARB, SC_NBAT, SC_NEWA, SC_NKOO, SC_NSHU, SC_OGAM, SC_OLCK,
	SC_ORKH, SC_ORYA, SC_OSGE, SC_OSMA, SC_OUGR, SC_PALM, SC_PAUC, SC_PERM,
	SC_PHAG, SC_PHLI, SC_PHLP, SC_PHNX, SC_PLRD, SC_PRTI, SC_RJNG, SC_ROHG,
	SC_RUNR, SC_SAMR, SC_SARB, SC_SAUR, SC_SGNW, SC_SHAW, SC_SHRD, SC_SIDD,
	SC_SIND, SC_SINH, SC_SOGD, SC_SOGO, SC_SORA, SC_SOYO, SC_SUND, SC_SYLO,
	SC_SYRC, SC_TAGB, SC_TAKR, SC_TALE, SC_TALU, SC_TAML, SC_TANG, SC_TAVT,
	SC_TELU, SC_TFNG, SC_TGLG, SC_THAA, SC_THAI, SC_TIBT, SC_TIRH, SC_TNSA,
	SC_TOTO, SC_UGAR, SC_VAII, SC_VITH, SC_WARA, SC_WCHO, SC_XPEO, SC_XSUX,
	SC_YEZI, SC_YIII, SC_ZANB, SC_ZINH, SC_ZYYY,
};
''')

	print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{')
	for i, c in enumerate(Cs):
		print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='')
		if i % 16 == 15:
			print()
	print('};')

	print()

	ppc = columns(blksize, longest + 1)
	print(f'static const struct uprop_sc_view stage2[][{blksize}] = {{')
	for c in cs:
		for i in range(blksize // ppc):
			print('\t{' if i == 0 else '\t ', end='')
			for j in range(ppc):
				print(c[i*ppc + j], end='')
				if i < blksize // ppc - 1 or j < ppc - 1:
					print(',', end='')
				if j < ppc - 1:
					print(' ' * (longest + 1 - len(c[i*ppc + j])), end='')
			if i < blksize // ppc - 1:
				print()
		print('},')
	print('};')

	print()

	print(f'''\
struct uprop_sc_view
uprop_get_scx(rune ch)
{{
	struct uprop_sc_view scv = stage2[stage1[ch / {blksize}]][ch % {blksize}];
	return scv.p == nullptr
		? (struct uprop_sc_view){{fallback + uprop_get_sc(ch), 1}}
		: scv;
}}''')

def main() -> None:
	cwd_init()
	xs = parse('data/ScriptExtensions')

	blksize = -1
	smallest = math.inf

	for bs in powers_of_2():
		if bs > len(xs):
			break
		Cs = [tuple(x) for x in chunks(xs, bs)]
		cs = set(Cs)

		sz_s1 = len(Cs) * isize(len(cs) - 1)
		sz_s2 = len(cs) * bs
		sz = sz_s1 + sz_s2

		if sz < smallest:
			smallest = sz
			blksize = bs

	Cs = [tuple(x) for x in chunks(xs, blksize)]
	with open('lib/unicode/prop/uprop_get_scx.c', 'w') as f:
		sys.stdout = f
		genfile(Cs, blksize)

	report_size(len(xs), smallest)

if __name__ == '__main__':
	main()