aboutsummaryrefslogtreecommitdiff
path: root/gen/prop/wb
blob: a6b47f2b0cf760ef8defd4412f7505e04338fcfe (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/sh

set -e
cd "${0%/*}/../.."
exec >lib/unicode/prop/uprop_get_wb.c

gawk '
BEGIN {
	FS = " *(; *|#.*)"

	map["ALetter"]            = "LE"
	map["CR"]                 = "CR"
	map["Double_Quote"]       = "DQ"
	map["E_Base"]             = "EB"
	map["E_Base_GAZ"]         = "EBG"
	map["E_Modifier"]         = "EM"
	map["Extend"]             = "EXTEND"
	map["ExtendNumLet"]       = "EX"
	map["Format"]             = "FO"
	map["Glue_After_Zwj"]     = "GAZ"
	map["Hebrew_Letter"]      = "HL"
	map["Katakana"]           = "KA"
	map["LF"]                 = "LF"
	map["MidLetter"]          = "ML"
	map["MidNumLet"]          = "MB"
	map["MidNum"]             = "MN"
	map["Newline"]            = "NL"
	map["Numeric"]            = "NU"
	map["Other"]              = "XX"
	map["Regional_Indicator"] = "RI"
	map["Single_Quote"]       = "SQ"
	map["WSegSpace"]          = "WSEGSPACE"
	map["ZWJ"]                = "ZWJ"

	print "/* This file is autogenerated by gen/prop/wb; DO NOT EDIT. */"
	print ""
	print "#include \"_bsearch.h\""
	print "#include \"macros.h\""
	print "#include \"rune.h\""
	print "#include \"unicode/prop.h\""
	print ""
}

/^[A-F0-9]/ {
	n = split($1, a, /\.\./)
	lo = strtonum("0X" a[1])
	hi = strtonum("0X" a[n])

	for (i = lo; i <= hi; i++)
		props[i] = "WB_" map[$2]
}

END {
	print "static constexpr enum uprop_wb lookup_lat1[] = {"
	for (i = 0; i < 0x100; i++) {
		if (i % 4 == 0)
			printf "\t"
		printf "%-13s%s", (props[i] ? props[i] : "WB_XX") ",", \
			i % 4 == 3 ? "\n" : " "
	}
	print "};"
	print ""
	print "static const struct {"
	print "\trune lo, hi;"
	print "\tenum uprop_wb val;"
	print "} lookup[] = {"

	for (i = 0x100; i <= 0x10FFFF; i++) {
		if (!props[i])
			continue
		for (lo = i; props[lo] == props[i + 1]; i++)
			;
		printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i]
	}

	print "};"
	print ""
	print "_MLIB_DEFINE_BSEARCH(enum uprop_wb, lookup, WB_XX)"
	print ""
	print "enum uprop_wb"
	print "uprop_get_wb(rune ch)"
	print "{"
	print "\treturn ch < lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);"
	print "}"
}
' data/WordBreakProperty | sed 's/\s*$//'