aboutsummaryrefslogtreecommitdiff
path: root/gen/prop/sc
blob: 40fc39b11ca38b55bbdc3006210c24408133b578 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#!/bin/sh

#!/bin/sh

set -e
cd "${0%/*}/../.."
exec >lib/unicode/prop/uprop_get_sc.c

gawk '
BEGIN {
	FS = " *(; *|#.*)"

	map["Adlam"]                  = "ADLM"
	map["Caucasian_Albanian"]     = "AGHB"
	map["Ahom"]                   = "AHOM"
	map["Arabic"]                 = "ARAB"
	map["Imperial_Aramaic"]       = "ARMI"
	map["Armenian"]               = "ARMN"
	map["Avestan"]                = "AVST"
	map["Balinese"]               = "BALI"
	map["Bamum"]                  = "BAMU"
	map["Bassa_Vah"]              = "BASS"
	map["Batak"]                  = "BATK"
	map["Bengali"]                = "BENG"
	map["Bhaiksuki"]              = "BHKS"
	map["Bopomofo"]               = "BOPO"
	map["Brahmi"]                 = "BRAH"
	map["Braille"]                = "BRAI"
	map["Buginese"]               = "BUGI"
	map["Buhid"]                  = "BUHD"
	map["Chakma"]                 = "CAKM"
	map["Canadian_Aboriginal"]    = "CANS"
	map["Carian"]                 = "CARI"
	map["Cham"]                   = "CHAM"
	map["Cherokee"]               = "CHER"
	map["Chorasmian"]             = "CHRS"
	map["Coptic"]                 = "COPT"
	map["Cypro_Minoan"]           = "CPMN"
	map["Cypriot"]                = "CPRT"
	map["Cyrillic"]               = "CYRL"
	map["Devanagari"]             = "DEVA"
	map["Dives_Akuru"]            = "DIAK"
	map["Dogra"]                  = "DOGR"
	map["Deseret"]                = "DSRT"
	map["Duployan"]               = "DUPL"
	map["Egyptian_Hieroglyphs"]   = "EGYP"
	map["Elbasan"]                = "ELBA"
	map["Elymaic"]                = "ELYM"
	map["Ethiopic"]               = "ETHI"
	map["Georgian"]               = "GEOR"
	map["Glagolitic"]             = "GLAG"
	map["Gunjala_Gondi"]          = "GONG"
	map["Masaram_Gondi"]          = "GONM"
	map["Gothic"]                 = "GOTH"
	map["Grantha"]                = "GRAN"
	map["Greek"]                  = "GREK"
	map["Gujarati"]               = "GUJR"
	map["Gurmukhi"]               = "GURU"
	map["Hangul"]                 = "HANG"
	map["Han"]                    = "HANI"
	map["Hanunoo"]                = "HANO"
	map["Hatran"]                 = "HATR"
	map["Hebrew"]                 = "HEBR"
	map["Hiragana"]               = "HIRA"
	map["Anatolian_Hieroglyphs"]  = "HLUW"
	map["Pahawh_Hmong"]           = "HMNG"
	map["Nyiakeng_Puachue_Hmong"] = "HMNP"
	map["Katakana_Or_Hiragana"]   = "HRKT"
	map["Old_Hungarian"]          = "HUNG"
	map["Old_Italic"]             = "ITAL"
	map["Javanese"]               = "JAVA"
	map["Kayah_Li"]               = "KALI"
	map["Katakana"]               = "KANA"
	map["Kawi"]                   = "KAWI"
	map["Kharoshthi"]             = "KHAR"
	map["Khmer"]                  = "KHMR"
	map["Khojki"]                 = "KHOJ"
	map["Khitan_Small_Script"]    = "KITS"
	map["Kannada"]                = "KNDA"
	map["Kaithi"]                 = "KTHI"
	map["Tai_Tham"]               = "LANA"
	map["Lao"]                    = "LAOO"
	map["Latin"]                  = "LATN"
	map["Lepcha"]                 = "LEPC"
	map["Limbu"]                  = "LIMB"
	map["Linear_A"]               = "LINA"
	map["Linear_B"]               = "LINB"
	map["Lisu"]                   = "LISU"
	map["Lycian"]                 = "LYCI"
	map["Lydian"]                 = "LYDI"
	map["Mahajani"]               = "MAHJ"
	map["Makasar"]                = "MAKA"
	map["Mandaic"]                = "MAND"
	map["Manichaean"]             = "MANI"
	map["Marchen"]                = "MARC"
	map["Medefaidrin"]            = "MEDF"
	map["Mende_Kikakui"]          = "MEND"
	map["Meroitic_Cursive"]       = "MERC"
	map["Meroitic_Hieroglyphs"]   = "MERO"
	map["Malayalam"]              = "MLYM"
	map["Modi"]                   = "MODI"
	map["Mongolian"]              = "MONG"
	map["Mro"]                    = "MROO"
	map["Meetei_Mayek"]           = "MTEI"
	map["Multani"]                = "MULT"
	map["Myanmar"]                = "MYMR"
	map["Nag_Mundari"]            = "NAGM"
	map["Nandinagari"]            = "NAND"
	map["Old_North_Arabian"]      = "NARB"
	map["Nabataean"]              = "NBAT"
	map["Newa"]                   = "NEWA"
	map["Nko"]                    = "NKOO"
	map["Nushu"]                  = "NSHU"
	map["Ogham"]                  = "OGAM"
	map["Ol_Chiki"]               = "OLCK"
	map["Old_Turkic"]             = "ORKH"
	map["Oriya"]                  = "ORYA"
	map["Osage"]                  = "OSGE"
	map["Osmanya"]                = "OSMA"
	map["Old_Uyghur"]             = "OUGR"
	map["Palmyrene"]              = "PALM"
	map["Pau_Cin_Hau"]            = "PAUC"
	map["Old_Permic"]             = "PERM"
	map["Phags_Pa"]               = "PHAG"
	map["Inscriptional_Pahlavi"]  = "PHLI"
	map["Psalter_Pahlavi"]        = "PHLP"
	map["Phoenician"]             = "PHNX"
	map["Miao"]                   = "PLRD"
	map["Inscriptional_Parthian"] = "PRTI"
	map["Rejang"]                 = "RJNG"
	map["Hanifi_Rohingya"]        = "ROHG"
	map["Runic"]                  = "RUNR"
	map["Samaritan"]              = "SAMR"
	map["Old_South_Arabian"]      = "SARB"
	map["Saurashtra"]             = "SAUR"
	map["SignWriting"]            = "SGNW"
	map["Shavian"]                = "SHAW"
	map["Sharada"]                = "SHRD"
	map["Siddham"]                = "SIDD"
	map["Khudawadi"]              = "SIND"
	map["Sinhala"]                = "SINH"
	map["Sogdian"]                = "SOGD"
	map["Old_Sogdian"]            = "SOGO"
	map["Sora_Sompeng"]           = "SORA"
	map["Soyombo"]                = "SOYO"
	map["Sundanese"]              = "SUND"
	map["Syloti_Nagri"]           = "SYLO"
	map["Syriac"]                 = "SYRC"
	map["Tagbanwa"]               = "TAGB"
	map["Takri"]                  = "TAKR"
	map["Tai_Le"]                 = "TALE"
	map["New_Tai_Lue"]            = "TALU"
	map["Tamil"]                  = "TAML"
	map["Tangut"]                 = "TANG"
	map["Tai_Viet"]               = "TAVT"
	map["Telugu"]                 = "TELU"
	map["Tifinagh"]               = "TFNG"
	map["Tagalog"]                = "TGLG"
	map["Thaana"]                 = "THAA"
	map["Thai"]                   = "THAI"
	map["Tibetan"]                = "TIBT"
	map["Tirhuta"]                = "TIRH"
	map["Tangsa"]                 = "TNSA"
	map["Toto"]                   = "TOTO"
	map["Ugaritic"]               = "UGAR"
	map["Vai"]                    = "VAII"
	map["Vithkuqi"]               = "VITH"
	map["Warang_Citi"]            = "WARA"
	map["Wancho"]                 = "WCHO"
	map["Old_Persian"]            = "XPEO"
	map["Cuneiform"]              = "XSUX"
	map["Yezidi"]                 = "YEZI"
	map["Yi"]                     = "YIII"
	map["Zanabazar_Square"]       = "ZANB"
	map["Inherited"]              = "ZINH"
	map["Common"]                 = "ZYYY"

	print "/* This file is autogenerated by gen/prop/sc; DO NOT EDIT. */"
	print ""
	print "#include \"__bsearch.h\""
	print "#include \"macros.h\""
	print "#include \"rune.h\""
	print "#include \"unicode/prop.h\""
	print ""
}

/^[^#]/ {
	n = split($1, a, /\.\./)
	lo = strtonum("0X" a[1])
	hi = strtonum("0X" a[n])

	for (i = lo; i <= hi; i++) {
		gsub(/^; /, "", $2)
		props[i] = "SC_" map[$2]
	}
}

END {
	print "static constexpr enum uprop_sc lookup_lat1[] = {"
	for (i = 0; i < 0x100; i++) {
		if (i % 8 == 0)
			printf "\t"
		printf "%-7s,%s", props[i] ? props[i] : 0, i % 8 == 7 ? "\n" : " "
	}
	print "};"
	print ""

	print "static const struct {"
	print "\trune lo, hi;"
	print "\tenum uprop_sc val;"
	print "} lookup[] = {"

	for (i = 0x100; i <= 0x10FFFF; i++) {
		if (!props[i])
			continue
		lo = i
		while (props[lo] == props[i + 1])
			i++
		printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i]
	}

	print "};"
	print ""
	print "__MLIB_DEFINE_BSEARCH(enum uprop_sc, lookup, SC_ZZZZ)"
	print ""
	print "enum uprop_sc"
	print "uprop_get_sc(rune ch)"
	print "{"
	print "\treturn ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);"
	print "}"
}
' data/Scripts | sed 's/\s*$//'