1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
|
#!/bin/sh
set -e
cd "${0%/*}/../.."
exec >lib/unicode/prop/uprop_get_blk.c
gawk '
BEGIN {
FS = " *(; *|#.*)"
map["adlam"] = "ADLAM"
map["aegean_numbers"] = "AEGEAN_NUMBERS"
map["ahom"] = "AHOM"
map["alchemical_symbols"] = "ALCHEMICAL"
map["alphabetic_presentation_forms"] = "ALPHABETIC_PF"
map["anatolian_hieroglyphs"] = "ANATOLIAN_HIEROGLYPHS"
map["ancient_greek_musical_notation"] = "ANCIENT_GREEK_MUSIC"
map["ancient_greek_numbers"] = "ANCIENT_GREEK_NUMBERS"
map["ancient_symbols"] = "ANCIENT_SYMBOLS"
map["arabic"] = "ARABIC"
map["arabic_extended_a"] = "ARABIC_EXT_A"
map["arabic_extended_b"] = "ARABIC_EXT_B"
map["arabic_extended_c"] = "ARABIC_EXT_C"
map["arabic_mathematical_alphabetic_symbols"] = "ARABIC_MATH"
map["arabic_presentation_forms_a"] = "ARABIC_PF_A"
map["arabic_presentation_forms_b"] = "ARABIC_PF_B"
map["arabic_supplement"] = "ARABIC_SUP"
map["armenian"] = "ARMENIAN"
map["arrows"] = "ARROWS"
map["avestan"] = "AVESTAN"
map["balinese"] = "BALINESE"
map["bamum"] = "BAMUM"
map["bamum_supplement"] = "BAMUM_SUP"
map["basic_latin"] = "ASCII"
map["bassa_vah"] = "BASSA_VAH"
map["batak"] = "BATAK"
map["bengali"] = "BENGALI"
map["bhaiksuki"] = "BHAIKSUKI"
map["block_elements"] = "BLOCK_ELEMENTS"
map["bopomofo"] = "BOPOMOFO"
map["bopomofo_extended"] = "BOPOMOFO_EXT"
map["box_drawing"] = "BOX_DRAWING"
map["brahmi"] = "BRAHMI"
map["braille_patterns"] = "BRAILLE"
map["buginese"] = "BUGINESE"
map["buhid"] = "BUHID"
map["byzantine_musical_symbols"] = "BYZANTINE_MUSIC"
map["carian"] = "CARIAN"
map["caucasian_albanian"] = "CAUCASIAN_ALBANIAN"
map["chakma"] = "CHAKMA"
map["cham"] = "CHAM"
map["cherokee"] = "CHEROKEE"
map["cherokee_supplement"] = "CHEROKEE_SUP"
map["chess_symbols"] = "CHESS_SYMBOLS"
map["chorasmian"] = "CHORASMIAN"
map["cjk_compatibility"] = "CJK_COMPAT"
map["cjk_compatibility_forms"] = "CJK_COMPAT_FORMS"
map["cjk_compatibility_ideographs"] = "CJK_COMPAT_IDEOGRAPHS"
map["cjk_compatibility_ideographs_supplement"] = "CJK_COMPAT_IDEOGRAPHS_SUP"
map["cjk_radicals_supplement"] = "CJK_RADICALS_SUP"
map["cjk_strokes"] = "CJK_STROKES"
map["cjk_symbols_and_punctuation"] = "CJK_SYMBOLS"
map["cjk_unified_ideographs"] = "CJK"
map["cjk_unified_ideographs_extension_a"] = "CJK_EXT_A"
map["cjk_unified_ideographs_extension_b"] = "CJK_EXT_B"
map["cjk_unified_ideographs_extension_c"] = "CJK_EXT_C"
map["cjk_unified_ideographs_extension_d"] = "CJK_EXT_D"
map["cjk_unified_ideographs_extension_e"] = "CJK_EXT_E"
map["cjk_unified_ideographs_extension_f"] = "CJK_EXT_F"
map["cjk_unified_ideographs_extension_g"] = "CJK_EXT_G"
map["cjk_unified_ideographs_extension_h"] = "CJK_EXT_H"
map["cjk_unified_ideographs_extension_i"] = "CJK_EXT_I"
map["combining_diacritical_marks"] = "DIACRITICALS"
map["combining_diacritical_marks_extended"] = "DIACRITICALS_EXT"
map["combining_diacritical_marks_for_symbols"] = "DIACRITICALS_FOR_SYMBOLS"
map["combining_diacritical_marks_supplement"] = "DIACRITICALS_SUP"
map["combining_half_marks"] = "HALF_MARKS"
map["common_indic_number_forms"] = "INDIC_NUMBER_FORMS"
map["control_pictures"] = "CONTROL_PICTURES"
map["coptic"] = "COPTIC"
map["coptic_epact_numbers"] = "COPTIC_EPACT_NUMBERS"
map["counting_rod_numerals"] = "COUNTING_ROD"
map["cuneiform"] = "CUNEIFORM"
map["cuneiform_numbers_and_punctuation"] = "CUNEIFORM_NUMBERS"
map["currency_symbols"] = "CURRENCY_SYMBOLS"
map["cypriot_syllabary"] = "CYPRIOT_SYLLABARY"
map["cypro_minoan"] = "CYPRO_MINOAN"
map["cyrillic"] = "CYRILLIC"
map["cyrillic_extended_a"] = "CYRILLIC_EXT_A"
map["cyrillic_extended_b"] = "CYRILLIC_EXT_B"
map["cyrillic_extended_c"] = "CYRILLIC_EXT_C"
map["cyrillic_extended_d"] = "CYRILLIC_EXT_D"
map["cyrillic_supplement"] = "CYRILLIC_SUP"
map["deseret"] = "DESERET"
map["devanagari"] = "DEVANAGARI"
map["devanagari_extended_a"] = "DEVANAGARI_EXT_A"
map["devanagari_extended"] = "DEVANAGARI_EXT"
map["dingbats"] = "DINGBATS"
map["dives_akuru"] = "DIVES_AKURU"
map["dogra"] = "DOGRA"
map["domino_tiles"] = "DOMINO"
map["duployan"] = "DUPLOYAN"
map["early_dynastic_cuneiform"] = "EARLY_DYNASTIC_CUNEIFORM"
map["egyptian_hieroglyph_format_controls"] = "EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS"
map["egyptian_hieroglyphs"] = "EGYPTIAN_HIEROGLYPHS"
map["elbasan"] = "ELBASAN"
map["elymaic"] = "ELYMAIC"
map["emoticons"] = "EMOTICONS"
map["enclosed_alphanumerics"] = "ENCLOSED_ALPHANUM"
map["enclosed_alphanumeric_supplement"] = "ENCLOSED_ALPHANUM_SUP"
map["enclosed_cjk_letters_and_months"] = "ENCLOSED_CJK"
map["enclosed_ideographic_supplement"] = "ENCLOSED_IDEOGRAPHIC_SUP"
map["ethiopic"] = "ETHIOPIC"
map["ethiopic_extended_a"] = "ETHIOPIC_EXT_A"
map["ethiopic_extended_b"] = "ETHIOPIC_EXT_B"
map["ethiopic_extended"] = "ETHIOPIC_EXT"
map["ethiopic_supplement"] = "ETHIOPIC_SUP"
map["general_punctuation"] = "PUNCTUATION"
map["geometric_shapes_extended"] = "GEOMETRIC_SHAPES_EXT"
map["geometric_shapes"] = "GEOMETRIC_SHAPES"
map["georgian_extended"] = "GEORGIAN_EXT"
map["georgian"] = "GEORGIAN"
map["georgian_supplement"] = "GEORGIAN_SUP"
map["glagolitic"] = "GLAGOLITIC"
map["glagolitic_supplement"] = "GLAGOLITIC_SUP"
map["gothic"] = "GOTHIC"
map["grantha"] = "GRANTHA"
map["greek_and_coptic"] = "GREEK"
map["greek_extended"] = "GREEK_EXT"
map["gujarati"] = "GUJARATI"
map["gunjala_gondi"] = "GUNJALA_GONDI"
map["gurmukhi"] = "GURMUKHI"
map["halfwidth_and_fullwidth_forms"] = "HALF_AND_FULL_FORMS"
map["hangul_compatibility_jamo"] = "COMPAT_JAMO"
map["hangul_jamo_extended_a"] = "JAMO_EXT_A"
map["hangul_jamo_extended_b"] = "JAMO_EXT_B"
map["hangul_jamo"] = "JAMO"
map["hangul_syllables"] = "HANGUL"
map["hanifi_rohingya"] = "HANIFI_ROHINGYA"
map["hanunoo"] = "HANUNOO"
map["hatran"] = "HATRAN"
map["hebrew"] = "HEBREW"
map["high_private_use_surrogates"] = "HIGH_PU_SURROGATES"
map["high_surrogates"] = "HIGH_SURROGATES"
map["hiragana"] = "HIRAGANA"
map["ideographic_description_characters"] = "IDC"
map["ideographic_symbols_and_punctuation"] = "IDEOGRAPHIC_SYMBOLS"
map["imperial_aramaic"] = "IMPERIAL_ARAMAIC"
map["indic_siyaq_numbers"] = "INDIC_SIYAQ_NUMBERS"
map["inscriptional_pahlavi"] = "INSCRIPTIONAL_PAHLAVI"
map["inscriptional_parthian"] = "INSCRIPTIONAL_PARTHIAN"
map["ipa_extensions"] = "IPA_EXT"
map["javanese"] = "JAVANESE"
map["kaithi"] = "KAITHI"
map["kaktovik_numerals"] = "KAKTOVIK_NUMERALS"
map["kana_extended_a"] = "KANA_EXT_A"
map["kana_extended_b"] = "KANA_EXT_B"
map["kana_supplement"] = "KANA_SUP"
map["kanbun"] = "KANBUN"
map["kangxi_radicals"] = "KANGXI"
map["kannada"] = "KANNADA"
map["katakana"] = "KATAKANA"
map["katakana_phonetic_extensions"] = "KATAKANA_EXT"
map["kawi"] = "KAWI"
map["kayah_li"] = "KAYAH_LI"
map["kharoshthi"] = "KHAROSHTHI"
map["khitan_small_script"] = "KHITAN_SMALL_SCRIPT"
map["khmer"] = "KHMER"
map["khmer_symbols"] = "KHMER_SYMBOLS"
map["khojki"] = "KHOJKI"
map["khudawadi"] = "KHUDAWADI"
map["lao"] = "LAO"
map["latin_1_supplement"] = "LATIN_1_SUP"
map["latin_extended_additional"] = "LATIN_EXT_ADDITIONAL"
map["latin_extended_a"] = "LATIN_EXT_A"
map["latin_extended_b"] = "LATIN_EXT_B"
map["latin_extended_c"] = "LATIN_EXT_C"
map["latin_extended_d"] = "LATIN_EXT_D"
map["latin_extended_e"] = "LATIN_EXT_E"
map["latin_extended_f"] = "LATIN_EXT_F"
map["latin_extended_g"] = "LATIN_EXT_G"
map["lepcha"] = "LEPCHA"
map["letterlike_symbols"] = "LETTERLIKE_SYMBOLS"
map["limbu"] = "LIMBU"
map["linear_a"] = "LINEAR_A"
map["linear_b_ideograms"] = "LINEAR_B_IDEOGRAMS"
map["linear_b_syllabary"] = "LINEAR_B_SYLLABARY"
map["lisu"] = "LISU"
map["lisu_supplement"] = "LISU_SUP"
map["low_surrogates"] = "LOW_SURROGATES"
map["lycian"] = "LYCIAN"
map["lydian"] = "LYDIAN"
map["mahajani"] = "MAHAJANI"
map["mahjong_tiles"] = "MAHJONG"
map["makasar"] = "MAKASAR"
map["malayalam"] = "MALAYALAM"
map["mandaic"] = "MANDAIC"
map["manichaean"] = "MANICHAEAN"
map["marchen"] = "MARCHEN"
map["masaram_gondi"] = "MASARAM_GONDI"
map["mathematical_alphanumeric_symbols"] = "MATH_ALPHANUM"
map["mathematical_operators"] = "MATH_OPERATORS"
map["mayan_numerals"] = "MAYAN_NUMERALS"
map["medefaidrin"] = "MEDEFAIDRIN"
map["meetei_mayek_extensions"] = "MEETEI_MAYEK_EXT"
map["meetei_mayek"] = "MEETEI_MAYEK"
map["mende_kikakui"] = "MENDE_KIKAKUI"
map["meroitic_cursive"] = "MEROITIC_CURSIVE"
map["meroitic_hieroglyphs"] = "MEROITIC_HIEROGLYPHS"
map["miao"] = "MIAO"
map["miscellaneous_mathematical_symbols_a"] = "MISC_MATH_SYMBOLS_A"
map["miscellaneous_mathematical_symbols_b"] = "MISC_MATH_SYMBOLS_B"
map["miscellaneous_symbols_and_arrows"] = "MISC_ARROWS"
map["miscellaneous_symbols_and_pictographs"] = "MISC_PICTOGRAPHS"
map["miscellaneous_symbols"] = "MISC_SYMBOLS"
map["miscellaneous_technical"] = "MISC_TECHNICAL"
map["modifier_tone_letters"] = "MODIFIER_TONE_LETTERS"
map["modi"] = "MODI"
map["mongolian"] = "MONGOLIAN"
map["mongolian_supplement"] = "MONGOLIAN_SUP"
map["mro"] = "MRO"
map["multani"] = "MULTANI"
map["musical_symbols"] = "MUSIC"
map["myanmar_extended_a"] = "MYANMAR_EXT_A"
map["myanmar_extended_b"] = "MYANMAR_EXT_B"
map["myanmar"] = "MYANMAR"
map["nabataean"] = "NABATAEAN"
map["nag_mundari"] = "NAG_MUNDARI"
map["nandinagari"] = "NANDINAGARI"
map["newa"] = "NEWA"
map["new_tai_lue"] = "NEW_TAI_LUE"
map["nko"] = "NKO"
map["number_forms"] = "NUMBER_FORMS"
map["nushu"] = "NUSHU"
map["nyiakeng_puachue_hmong"] = "NYIAKENG_PUACHUE_HMONG"
map["ogham"] = "OGHAM"
map["ol_chiki"] = "OL_CHIKI"
map["old_hungarian"] = "OLD_HUNGARIAN"
map["old_italic"] = "OLD_ITALIC"
map["old_north_arabian"] = "OLD_NORTH_ARABIAN"
map["old_permic"] = "OLD_PERMIC"
map["old_persian"] = "OLD_PERSIAN"
map["old_sogdian"] = "OLD_SOGDIAN"
map["old_south_arabian"] = "OLD_SOUTH_ARABIAN"
map["old_turkic"] = "OLD_TURKIC"
map["old_uyghur"] = "OLD_UYGHUR"
map["optical_character_recognition"] = "OCR"
map["oriya"] = "ORIYA"
map["ornamental_dingbats"] = "ORNAMENTAL_DINGBATS"
map["osage"] = "OSAGE"
map["osmanya"] = "OSMANYA"
map["ottoman_siyaq_numbers"] = "OTTOMAN_SIYAQ_NUMBERS"
map["pahawh_hmong"] = "PAHAWH_HMONG"
map["palmyrene"] = "PALMYRENE"
map["pau_cin_hau"] = "PAU_CIN_HAU"
map["phags_pa"] = "PHAGS_PA"
map["phaistos_disc"] = "PHAISTOS"
map["phoenician"] = "PHOENICIAN"
map["phonetic_extensions"] = "PHONETIC_EXT"
map["phonetic_extensions_supplement"] = "PHONETIC_EXT_SUP"
map["playing_cards"] = "PLAYING_CARDS"
map["private_use_area"] = "PUA"
map["psalter_pahlavi"] = "PSALTER_PAHLAVI"
map["rejang"] = "REJANG"
map["rumi_numeral_symbols"] = "RUMI"
map["runic"] = "RUNIC"
map["samaritan"] = "SAMARITAN"
map["saurashtra"] = "SAURASHTRA"
map["sharada"] = "SHARADA"
map["shavian"] = "SHAVIAN"
map["shorthand_format_controls"] = "SHORTHAND_FORMAT_CONTROLS"
map["siddham"] = "SIDDHAM"
map["sinhala_archaic_numbers"] = "SINHALA_ARCHAIC_NUMBERS"
map["sinhala"] = "SINHALA"
map["small_form_variants"] = "SMALL_FORMS"
map["small_kana_extension"] = "SMALL_KANA_EXT"
map["sogdian"] = "SOGDIAN"
map["sora_sompeng"] = "SORA_SOMPENG"
map["soyombo"] = "SOYOMBO"
map["spacing_modifier_letters"] = "MODIFIER_LETTERS"
map["specials"] = "SPECIALS"
map["sundanese"] = "SUNDANESE"
map["sundanese_supplement"] = "SUNDANESE_SUP"
map["superscripts_and_subscripts"] = "SUPER_AND_SUB"
map["supplemental_arrows_a"] = "SUP_ARROWS_A"
map["supplemental_arrows_b"] = "SUP_ARROWS_B"
map["supplemental_arrows_c"] = "SUP_ARROWS_C"
map["supplemental_mathematical_operators"] = "SUP_MATH_OPERATORS"
map["supplemental_punctuation"] = "SUP_PUNCTUATION"
map["supplemental_symbols_and_pictographs"] = "SUP_SYMBOLS_AND_PICTOGRAPHS"
map["supplementary_private_use_area_a"] = "SUP_PUA_A"
map["supplementary_private_use_area_b"] = "SUP_PUA_B"
map["sutton_signwriting"] = "SUTTON_SIGNWRITING"
map["syloti_nagri"] = "SYLOTI_NAGRI"
map["symbols_and_pictographs_extended_a"] = "SYMBOLS_AND_PICTOGRAPHS_EXT_A"
map["symbols_for_legacy_computing"] = "SYMBOLS_FOR_LEGACY_COMPUTING"
map["syriac_supplement"] = "SYRIAC_SUP"
map["syriac"] = "SYRIAC"
map["tagalog"] = "TAGALOG"
map["tagbanwa"] = "TAGBANWA"
map["tags"] = "TAGS"
map["tai_le"] = "TAI_LE"
map["tai_tham"] = "TAI_THAM"
map["tai_viet"] = "TAI_VIET"
map["tai_xuan_jing_symbols"] = "TAI_XUAN_JING"
map["takri"] = "TAKRI"
map["tamil_supplement"] = "TAMIL_SUP"
map["tamil"] = "TAMIL"
map["tangsa"] = "TANGSA"
map["tangut_components"] = "TANGUT_COMPONENTS"
map["tangut_supplement"] = "TANGUT_SUP"
map["tangut"] = "TANGUT"
map["telugu"] = "TELUGU"
map["thaana"] = "THAANA"
map["thai"] = "THAI"
map["tibetan"] = "TIBETAN"
map["tifinagh"] = "TIFINAGH"
map["tirhuta"] = "TIRHUTA"
map["toto"] = "TOTO"
map["transport_and_map_symbols"] = "TRANSPORT_AND_MAP"
map["ugaritic"] = "UGARITIC"
map["unified_canadian_aboriginal_syllabics_extended_a"] = "UCAS_EXT_A"
map["unified_canadian_aboriginal_syllabics_extended"] = "UCAS_EXT"
map["unified_canadian_aboriginal_syllabics"] = "UCAS"
map["vai"] = "VAI"
map["variation_selectors_supplement"] = "VS_SUP"
map["variation_selectors"] = "VS"
map["vedic_extensions"] = "VEDIC_EXT"
map["vertical_forms"] = "VERTICAL_FORMS"
map["vithkuqi"] = "VITHKUQI"
map["wancho"] = "WANCHO"
map["warang_citi"] = "WARANG_CITI"
map["yezidi"] = "YEZIDI"
map["yijing_hexagram_symbols"] = "YIJING"
map["yi_radicals"] = "YI_RADICALS"
map["yi_syllables"] = "YI_SYLLABLES"
map["zanabazar_square"] = "ZANABAZAR_SQUARE"
map["znamenny_musical_notation"] = "ZNAMENNY_MUSIC"
print "/* This file is autogenerated by gen/prop/blk; DO NOT EDIT. */"
print ""
print "#include \"__bsearch.h\""
print "#include \"macros.h\""
print "#include \"rune.h\""
print "#include \"unicode/prop.h\""
print ""
}
/^[^#]/ {
n = split($1, a, /\.\./)
lo = strtonum("0X" a[1])
hi = strtonum("0X" a[n])
for (i = lo; i <= hi; i++) {
gsub(/^; /, "", $2)
gsub(/[- ]/, "_", $2)
props[i] = "BLK_" map[tolower($2)]
}
}
END {
print "static constexpr enum uprop_blk lookup_lat1[] = {"
for (i = 0; i < 0x100; i++) {
if (i % 8 == 0)
printf "\t"
printf "%-15s,%s", props[i] ? props[i] : 0, i % 8 == 7 ? "\n" : " "
}
print "};"
print ""
print "static const struct {"
print "\trune lo, hi;"
print "\tenum uprop_blk val;"
print "} lookup[] = {"
for (i = 0x100; i <= 0x10FFFF; i++) {
if (!props[i])
continue
lo = i
while (props[lo] == props[i + 1])
i++
printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i]
}
print "};"
print ""
print "__MLIB_DEFINE_BSEARCH(enum uprop_blk, lookup, BLK_NB)"
print ""
print "enum uprop_blk"
print "uprop_get_blk(rune ch)"
print "{"
print "\treturn ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);"
print "}"
}
' data/Blocks | sed 's/\s*$//'
|