aboutsummaryrefslogtreecommitdiff
path: root/vendor/librune/gen/gbrk
blob: 72ee2f726b132072400bf00c7ebd9f33d210a0c9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/sh

cache()
{
	name="/tmp/librune/gbrk/$(basename "$1")"
	if test ! -f "$name"
	then
		mkdir -p /tmp/librune/gbrk
		wget -q "$1" -O "$name"
	fi
}

set -e
cd "${0%/*}/.."
exec >include/internal/gbrk_lookup.h

readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt'
readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt'
readonly URL3='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt'

cache "$URL1" &
cache "$URL2" &
cache "$URL3" &
wait

cat <<C
/* This file is autogenerated by gen/gbrk; DO NOT EDIT. */

/* TODO: Change tables to constexpr from const when Clangd gets better */

#ifndef RUNE_INTERNAL_GBRK_LOOKUP_H
#define RUNE_INTERNAL_GBRK_LOOKUP_H

/* IWYU pragma: private */
/* clang-format off */

#include "types.h"

typedef enum {
	GBP_OTHER = 0,

	GBP_CTRL = 1 << 0, /* Control */
	GBP_EXT  = 1 << 1, /* Extend */
	GBP_PIC  = 1 << 2, /* Extended_Pictographic */
	GBP_PREP = 1 << 3, /* Prepend */
	GBP_RI   = 1 << 4, /* Regional_Indicator */
	GBP_SM   = 1 << 5, /* SpacingMark */
	GBP_ZWJ  = 1 << 6, /* ZWJ */

	GBP_HNGL_L   = 1 <<  7, /* Hangul L */
	GBP_HNGL_LV  = 1 <<  8, /* Hangul LV */
	GBP_HNGL_LVT = 1 <<  9, /* Hangul LVT */
	GBP_HNGL_T   = 1 << 10, /* Hangul T */
	GBP_HNGL_V   = 1 << 11, /* Hangul V */

	GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */
	GBP_INDC_EXT   = 1 << 13, /* Indic Extend */
	GBP_INDC_LNK   = 1 << 14, /* Indic Linker */
} gbrk_prop;

static const struct {
	rune lo, hi;
	gbrk_prop prop;
} gbrk_prop_tbl[] = {
C

gawk '
BEGIN {
	FS = "( *#.*| +; +)"
	map["Control"]               = "CTRL"
	map["Extend"]                = "EXT"
	map["Extended_Pictographic"] = "PIC"
	map["Prepend"]               = "PREP"
	map["Regional_Indicator"]    = "RI"
	map["SpacingMark"]           = "SM"
	map["ZWJ"]                   = "ZWJ"

	map["L"]   = "HNGL_L"
	map["LV"]  = "HNGL_LV"
	map["LVT"] = "HNGL_LVT"
	map["T"]   = "HNGL_T"
	map["V"]   = "HNGL_V"

	map["InCB; Consonant"] = "INDC_CNSNT"
	map["InCB; Extend"]    = "INDC_EXT"
	map["InCB; Linker"]    = "INDC_LNK"
}

map[$2] {
	n = split($1, a, /\.\./)
	lo = strtonum("0X" a[1])
	hi = strtonum("0X" a[n])

	for (i = lo; i <= hi; i++) {
		s = "GBP_" map[$2]
		props[i] = props[i] ? props[i] " | " s : s
	}
}

END {
	for (i = 0; i <= 0x10FFFF; i++) {
		if (!props[i])
			continue
		lo = i
		while (props[lo] == props[i + 1])
			i++
		printf "\t{0x%06X, 0x%06X, %s},\n", lo, i, props[lo]
	}
}
' /tmp/librune/gbrk/* | sort

cat <<C
};

#endif /* !RUNE_INTERNAL_GBRK_LOOKUP_H */
C