1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
#!/bin/sh
set -e
cd "${0%/*}/../.."
exec >include/unicode/_gbrk.h
cat <<C
/* This file is autogenerated by gen/string/gbrk; DO NOT EDIT. */
#ifndef MLIB_UNICODE__GBRK_H
#define MLIB_UNICODE__GBRK_H
/* clang-format off */
#include "_rune.h"
typedef enum {
GBP_OTHER = 0,
GBP_CTRL = 1 << 0, /* Control */
GBP_EXT = 1 << 1, /* Extend */
GBP_PIC = 1 << 2, /* Extended_Pictographic */
GBP_PREP = 1 << 3, /* Prepend */
GBP_RI = 1 << 4, /* Regional_Indicator */
GBP_SM = 1 << 5, /* SpacingMark */
GBP_ZWJ = 1 << 6, /* ZWJ */
GBP_HNGL_L = 1 << 7, /* Hangul L */
GBP_HNGL_LV = 1 << 8, /* Hangul LV */
GBP_HNGL_LVT = 1 << 9, /* Hangul LVT */
GBP_HNGL_T = 1 << 10, /* Hangul T */
GBP_HNGL_V = 1 << 11, /* Hangul V */
GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */
GBP_INDC_EXT = 1 << 13, /* Indic Extend */
GBP_INDC_LNK = 1 << 14, /* Indic Linker */
} gbrk_prop;
static const struct {
rune lo, hi;
gbrk_prop val;
} gbrk_prop_tbl[] = {
C
gawk '
BEGIN {
FS = "( *#.*| +; +)"
map["Control"] = "CTRL"
map["Extend"] = "EXT"
map["Extended_Pictographic"] = "PIC"
map["Prepend"] = "PREP"
map["Regional_Indicator"] = "RI"
map["SpacingMark"] = "SM"
map["ZWJ"] = "ZWJ"
map["L"] = "HNGL_L"
map["LV"] = "HNGL_LV"
map["LVT"] = "HNGL_LVT"
map["T"] = "HNGL_T"
map["V"] = "HNGL_V"
map["InCB; Consonant"] = "INDC_CNSNT"
map["InCB; Extend"] = "INDC_EXT"
map["InCB; Linker"] = "INDC_LNK"
}
map[$2] {
n = split($1, a, /\.\./)
lo = strtonum("0X" a[1])
hi = strtonum("0X" a[n])
for (i = lo; i <= hi; i++) {
s = "GBP_" map[$2]
props[i] = props[i] ? props[i] " | " s : s
}
}
END {
for (i = 0; i <= 0x10FFFF; i++) {
if (!props[i])
continue
lo = i
while (props[lo] == props[i + 1])
i++
printf "\t{0x%06X, 0x%06X, %s},\n", lo, i, props[lo]
}
}
' data/GraphemeBreakProperty \
data/DerivedCoreProperties \
data/emoji-data \
| sort
cat <<C
};
#endif /* !MLIB_UNICODE__GBRK_H */
C
|