#!/usr/bin/python3

import math

from lib import *


MAP = {
	'0'  : 'NR',
	'1'  : 'OV',
	'6'  : 'HANR',
	'7'  : 'NK',
	'8'  : 'KV',
	'9'  : 'VR',
	'10' : '10',
	'11' : '11',
	'12' : '12',
	'13' : '13',
	'14' : '14',
	'15' : '15',
	'16' : '16',
	'17' : '17',
	'18' : '18',
	'19' : '19',
	'20' : '20',
	'21' : '21',
	'22' : '22',
	'23' : '23',
	'24' : '24',
	'25' : '25',
	'26' : '26',
	'27' : '27',
	'28' : '28',
	'29' : '29',
	'30' : '30',
	'31' : '31',
	'32' : '32',
	'33' : '33',
	'34' : '34',
	'35' : '35',
	'36' : '36',
	'84' : '84',
	'91' : '91',
	'103': '103',
	'107': '107',
	'118': '118',
	'122': '122',
	'129': '129',
	'130': '130',
	'132': '132',
	'133': '133',
	'200': 'ATBL',
	'202': 'ATB',
	'214': 'ATA',
	'216': 'ATAR',
	'218': 'BL',
	'220': 'B',
	'222': 'BR',
	'224': 'L',
	'226': 'R',
	'228': 'AL',
	'230': 'A',
	'232': 'AR',
	'233': 'DB',
	'234': 'DA',
	'240': 'IS',
}

longest = 0

def parse(file: str) -> list[bool]:
	global longest

	xs = ['CCC_NR'] * 0x110000
	with open(file, 'r') as f:
		for line in f.readlines():
			parts = line.split(';')
			parts[0] = int(parts[0], 16)
			if 'First' in parts[1]:
				lo = parts[0]
			elif 'Last' in parts[1]:
				hi = parts[0]
				for i in range(lo, hi + 1):
					xs[i] = f'CCC_{MAP[parts[3]]}'
					longest = max(longest, len(xs[i]))
			else:
				xs[parts[0]] = f'CCC_{MAP[parts[3]]}'
				longest = max(longest, len(xs[parts[0]]))
	return xs

def genfile(cs: list[tuple[bool, ...]], blksize: int) -> None:
	Cs = cs
	cs = list(dict.fromkeys(Cs))

	print('''\
/* This file is autogenerated by gen/prop/ccc; DO NOT EDIT. */

#include "unicode/prop.h"
''')

	print(f'static constexpr {typename(len(cs) - 1)} stage1[] = {{')
	for i, c in enumerate(Cs):
		print(f'%c%{len(str(len(cs) - 1))}d,' % ('\t' if i % 16 == 0 else ' ', cs.index(c)), end='')
		if i % 16 == 15:
			print()
	print('};')

	print()

	ppc = columns(blksize, longest + 1)
	print(f'static constexpr enum uprop_ccc stage2[][{blksize}] = {{')
	for c in cs:
		for i in range(blksize // ppc):
			print('\t{' if i == 0 else '\t ', end='')
			for j in range(ppc):
				print(c[i*ppc + j], end='')
				if i < blksize // ppc - 1 or j < ppc - 1:
					print(',', end='')
				if j < ppc - 1:
					print(' ' * (longest + 1 - len(c[i*ppc + j])), end='')
			if i < blksize // ppc - 1:
				print()
		print('},')
	print('};')

	print()

	print(f'''\
enum uprop_ccc
uprop_get_ccc(rune ch)
{{
	return stage2[stage1[ch / {blksize}]][ch % {blksize}];
}}''')

def main() -> None:
	cwd_init()
	sys.stdout = open('lib/unicode/prop/uprop_get_ccc.c', 'w')
	xs = parse('data/UnicodeData')

	blksize = -1
	smallest = math.inf

	for bs in powers_of_2():
		if bs > len(xs):
			break
		Cs = [tuple(x) for x in chunks(xs, bs)]
		cs = set(Cs)

		sz_s1 = len(Cs) * isize(len(cs) - 1)
		sz_s2 = len(cs) * bs * 2
		sz = sz_s1 + sz_s2

		if sz < smallest:
			smallest = sz
			blksize = bs

	Cs = [tuple(x) for x in chunks(xs, blksize)]
	genfile(Cs, blksize)

	report_size(len(xs), smallest)

if __name__ == '__main__':
	main()