aboutsummaryrefslogtreecommitdiff
path: root/gen/prop/blk
diff options
context:
space:
mode:
Diffstat (limited to 'gen/prop/blk')
-rwxr-xr-xgen/prop/blk395
1 files changed, 395 insertions, 0 deletions
diff --git a/gen/prop/blk b/gen/prop/blk
new file mode 100755
index 0000000..574f5af
--- /dev/null
+++ b/gen/prop/blk
@@ -0,0 +1,395 @@
+#!/bin/sh
+
+set -e
+cd "${0%/*}/../.."
+exec >lib/unicode/prop/uprop_get_blk.c
+
+gawk '
+BEGIN {
+ FS = " *(; *|#.*)"
+
+ map["adlam"] = "ADLAM"
+ map["aegean_numbers"] = "AEGEAN_NUMBERS"
+ map["ahom"] = "AHOM"
+ map["alchemical_symbols"] = "ALCHEMICAL"
+ map["alphabetic_presentation_forms"] = "ALPHABETIC_PF"
+ map["anatolian_hieroglyphs"] = "ANATOLIAN_HIEROGLYPHS"
+ map["ancient_greek_musical_notation"] = "ANCIENT_GREEK_MUSIC"
+ map["ancient_greek_numbers"] = "ANCIENT_GREEK_NUMBERS"
+ map["ancient_symbols"] = "ANCIENT_SYMBOLS"
+ map["arabic"] = "ARABIC"
+ map["arabic_extended_a"] = "ARABIC_EXT_A"
+ map["arabic_extended_b"] = "ARABIC_EXT_B"
+ map["arabic_extended_c"] = "ARABIC_EXT_C"
+ map["arabic_mathematical_alphabetic_symbols"] = "ARABIC_MATH"
+ map["arabic_presentation_forms_a"] = "ARABIC_PF_A"
+ map["arabic_presentation_forms_b"] = "ARABIC_PF_B"
+ map["arabic_supplement"] = "ARABIC_SUP"
+ map["armenian"] = "ARMENIAN"
+ map["arrows"] = "ARROWS"
+ map["avestan"] = "AVESTAN"
+ map["balinese"] = "BALINESE"
+ map["bamum"] = "BAMUM"
+ map["bamum_supplement"] = "BAMUM_SUP"
+ map["basic_latin"] = "ASCII"
+ map["bassa_vah"] = "BASSA_VAH"
+ map["batak"] = "BATAK"
+ map["bengali"] = "BENGALI"
+ map["bhaiksuki"] = "BHAIKSUKI"
+ map["block_elements"] = "BLOCK_ELEMENTS"
+ map["bopomofo"] = "BOPOMOFO"
+ map["bopomofo_extended"] = "BOPOMOFO_EXT"
+ map["box_drawing"] = "BOX_DRAWING"
+ map["brahmi"] = "BRAHMI"
+ map["braille_patterns"] = "BRAILLE"
+ map["buginese"] = "BUGINESE"
+ map["buhid"] = "BUHID"
+ map["byzantine_musical_symbols"] = "BYZANTINE_MUSIC"
+ map["carian"] = "CARIAN"
+ map["caucasian_albanian"] = "CAUCASIAN_ALBANIAN"
+ map["chakma"] = "CHAKMA"
+ map["cham"] = "CHAM"
+ map["cherokee"] = "CHEROKEE"
+ map["cherokee_supplement"] = "CHEROKEE_SUP"
+ map["chess_symbols"] = "CHESS_SYMBOLS"
+ map["chorasmian"] = "CHORASMIAN"
+ map["cjk_compatibility"] = "CJK_COMPAT"
+ map["cjk_compatibility_forms"] = "CJK_COMPAT_FORMS"
+ map["cjk_compatibility_ideographs"] = "CJK_COMPAT_IDEOGRAPHS"
+ map["cjk_compatibility_ideographs_supplement"] = "CJK_COMPAT_IDEOGRAPHS_SUP"
+ map["cjk_radicals_supplement"] = "CJK_RADICALS_SUP"
+ map["cjk_strokes"] = "CJK_STROKES"
+ map["cjk_symbols_and_punctuation"] = "CJK_SYMBOLS"
+ map["cjk_unified_ideographs"] = "CJK"
+ map["cjk_unified_ideographs_extension_a"] = "CJK_EXT_A"
+ map["cjk_unified_ideographs_extension_b"] = "CJK_EXT_B"
+ map["cjk_unified_ideographs_extension_c"] = "CJK_EXT_C"
+ map["cjk_unified_ideographs_extension_d"] = "CJK_EXT_D"
+ map["cjk_unified_ideographs_extension_e"] = "CJK_EXT_E"
+ map["cjk_unified_ideographs_extension_f"] = "CJK_EXT_F"
+ map["cjk_unified_ideographs_extension_g"] = "CJK_EXT_G"
+ map["cjk_unified_ideographs_extension_h"] = "CJK_EXT_H"
+ map["cjk_unified_ideographs_extension_i"] = "CJK_EXT_I"
+ map["combining_diacritical_marks"] = "DIACRITICALS"
+ map["combining_diacritical_marks_extended"] = "DIACRITICALS_EXT"
+ map["combining_diacritical_marks_for_symbols"] = "DIACRITICALS_FOR_SYMBOLS"
+ map["combining_diacritical_marks_supplement"] = "DIACRITICALS_SUP"
+ map["combining_half_marks"] = "HALF_MARKS"
+ map["common_indic_number_forms"] = "INDIC_NUMBER_FORMS"
+ map["control_pictures"] = "CONTROL_PICTURES"
+ map["coptic"] = "COPTIC"
+ map["coptic_epact_numbers"] = "COPTIC_EPACT_NUMBERS"
+ map["counting_rod_numerals"] = "COUNTING_ROD"
+ map["cuneiform"] = "CUNEIFORM"
+ map["cuneiform_numbers_and_punctuation"] = "CUNEIFORM_NUMBERS"
+ map["currency_symbols"] = "CURRENCY_SYMBOLS"
+ map["cypriot_syllabary"] = "CYPRIOT_SYLLABARY"
+ map["cypro_minoan"] = "CYPRO_MINOAN"
+ map["cyrillic"] = "CYRILLIC"
+ map["cyrillic_extended_a"] = "CYRILLIC_EXT_A"
+ map["cyrillic_extended_b"] = "CYRILLIC_EXT_B"
+ map["cyrillic_extended_c"] = "CYRILLIC_EXT_C"
+ map["cyrillic_extended_d"] = "CYRILLIC_EXT_D"
+ map["cyrillic_supplement"] = "CYRILLIC_SUP"
+ map["deseret"] = "DESERET"
+ map["devanagari"] = "DEVANAGARI"
+ map["devanagari_extended_a"] = "DEVANAGARI_EXT_A"
+ map["devanagari_extended"] = "DEVANAGARI_EXT"
+ map["dingbats"] = "DINGBATS"
+ map["dives_akuru"] = "DIVES_AKURU"
+ map["dogra"] = "DOGRA"
+ map["domino_tiles"] = "DOMINO"
+ map["duployan"] = "DUPLOYAN"
+ map["early_dynastic_cuneiform"] = "EARLY_DYNASTIC_CUNEIFORM"
+ map["egyptian_hieroglyph_format_controls"] = "EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS"
+ map["egyptian_hieroglyphs"] = "EGYPTIAN_HIEROGLYPHS"
+ map["elbasan"] = "ELBASAN"
+ map["elymaic"] = "ELYMAIC"
+ map["emoticons"] = "EMOTICONS"
+ map["enclosed_alphanumerics"] = "ENCLOSED_ALPHANUM"
+ map["enclosed_alphanumeric_supplement"] = "ENCLOSED_ALPHANUM_SUP"
+ map["enclosed_cjk_letters_and_months"] = "ENCLOSED_CJK"
+ map["enclosed_ideographic_supplement"] = "ENCLOSED_IDEOGRAPHIC_SUP"
+ map["ethiopic"] = "ETHIOPIC"
+ map["ethiopic_extended_a"] = "ETHIOPIC_EXT_A"
+ map["ethiopic_extended_b"] = "ETHIOPIC_EXT_B"
+ map["ethiopic_extended"] = "ETHIOPIC_EXT"
+ map["ethiopic_supplement"] = "ETHIOPIC_SUP"
+ map["general_punctuation"] = "PUNCTUATION"
+ map["geometric_shapes_extended"] = "GEOMETRIC_SHAPES_EXT"
+ map["geometric_shapes"] = "GEOMETRIC_SHAPES"
+ map["georgian_extended"] = "GEORGIAN_EXT"
+ map["georgian"] = "GEORGIAN"
+ map["georgian_supplement"] = "GEORGIAN_SUP"
+ map["glagolitic"] = "GLAGOLITIC"
+ map["glagolitic_supplement"] = "GLAGOLITIC_SUP"
+ map["gothic"] = "GOTHIC"
+ map["grantha"] = "GRANTHA"
+ map["greek_and_coptic"] = "GREEK"
+ map["greek_extended"] = "GREEK_EXT"
+ map["gujarati"] = "GUJARATI"
+ map["gunjala_gondi"] = "GUNJALA_GONDI"
+ map["gurmukhi"] = "GURMUKHI"
+ map["halfwidth_and_fullwidth_forms"] = "HALF_AND_FULL_FORMS"
+ map["hangul_compatibility_jamo"] = "COMPAT_JAMO"
+ map["hangul_jamo_extended_a"] = "JAMO_EXT_A"
+ map["hangul_jamo_extended_b"] = "JAMO_EXT_B"
+ map["hangul_jamo"] = "JAMO"
+ map["hangul_syllables"] = "HANGUL"
+ map["hanifi_rohingya"] = "HANIFI_ROHINGYA"
+ map["hanunoo"] = "HANUNOO"
+ map["hatran"] = "HATRAN"
+ map["hebrew"] = "HEBREW"
+ map["high_private_use_surrogates"] = "HIGH_PU_SURROGATES"
+ map["high_surrogates"] = "HIGH_SURROGATES"
+ map["hiragana"] = "HIRAGANA"
+ map["ideographic_description_characters"] = "IDC"
+ map["ideographic_symbols_and_punctuation"] = "IDEOGRAPHIC_SYMBOLS"
+ map["imperial_aramaic"] = "IMPERIAL_ARAMAIC"
+ map["indic_siyaq_numbers"] = "INDIC_SIYAQ_NUMBERS"
+ map["inscriptional_pahlavi"] = "INSCRIPTIONAL_PAHLAVI"
+ map["inscriptional_parthian"] = "INSCRIPTIONAL_PARTHIAN"
+ map["ipa_extensions"] = "IPA_EXT"
+ map["javanese"] = "JAVANESE"
+ map["kaithi"] = "KAITHI"
+ map["kaktovik_numerals"] = "KAKTOVIK_NUMERALS"
+ map["kana_extended_a"] = "KANA_EXT_A"
+ map["kana_extended_b"] = "KANA_EXT_B"
+ map["kana_supplement"] = "KANA_SUP"
+ map["kanbun"] = "KANBUN"
+ map["kangxi_radicals"] = "KANGXI"
+ map["kannada"] = "KANNADA"
+ map["katakana"] = "KATAKANA"
+ map["katakana_phonetic_extensions"] = "KATAKANA_EXT"
+ map["kawi"] = "KAWI"
+ map["kayah_li"] = "KAYAH_LI"
+ map["kharoshthi"] = "KHAROSHTHI"
+ map["khitan_small_script"] = "KHITAN_SMALL_SCRIPT"
+ map["khmer"] = "KHMER"
+ map["khmer_symbols"] = "KHMER_SYMBOLS"
+ map["khojki"] = "KHOJKI"
+ map["khudawadi"] = "KHUDAWADI"
+ map["lao"] = "LAO"
+ map["latin_1_supplement"] = "LATIN_1_SUP"
+ map["latin_extended_additional"] = "LATIN_EXT_ADDITIONAL"
+ map["latin_extended_a"] = "LATIN_EXT_A"
+ map["latin_extended_b"] = "LATIN_EXT_B"
+ map["latin_extended_c"] = "LATIN_EXT_C"
+ map["latin_extended_d"] = "LATIN_EXT_D"
+ map["latin_extended_e"] = "LATIN_EXT_E"
+ map["latin_extended_f"] = "LATIN_EXT_F"
+ map["latin_extended_g"] = "LATIN_EXT_G"
+ map["lepcha"] = "LEPCHA"
+ map["letterlike_symbols"] = "LETTERLIKE_SYMBOLS"
+ map["limbu"] = "LIMBU"
+ map["linear_a"] = "LINEAR_A"
+ map["linear_b_ideograms"] = "LINEAR_B_IDEOGRAMS"
+ map["linear_b_syllabary"] = "LINEAR_B_SYLLABARY"
+ map["lisu"] = "LISU"
+ map["lisu_supplement"] = "LISU_SUP"
+ map["low_surrogates"] = "LOW_SURROGATES"
+ map["lycian"] = "LYCIAN"
+ map["lydian"] = "LYDIAN"
+ map["mahajani"] = "MAHAJANI"
+ map["mahjong_tiles"] = "MAHJONG"
+ map["makasar"] = "MAKASAR"
+ map["malayalam"] = "MALAYALAM"
+ map["mandaic"] = "MANDAIC"
+ map["manichaean"] = "MANICHAEAN"
+ map["marchen"] = "MARCHEN"
+ map["masaram_gondi"] = "MASARAM_GONDI"
+ map["mathematical_alphanumeric_symbols"] = "MATH_ALPHANUM"
+ map["mathematical_operators"] = "MATH_OPERATORS"
+ map["mayan_numerals"] = "MAYAN_NUMERALS"
+ map["medefaidrin"] = "MEDEFAIDRIN"
+ map["meetei_mayek_extensions"] = "MEETEI_MAYEK_EXT"
+ map["meetei_mayek"] = "MEETEI_MAYEK"
+ map["mende_kikakui"] = "MENDE_KIKAKUI"
+ map["meroitic_cursive"] = "MEROITIC_CURSIVE"
+ map["meroitic_hieroglyphs"] = "MEROITIC_HIEROGLYPHS"
+ map["miao"] = "MIAO"
+ map["miscellaneous_mathematical_symbols_a"] = "MISC_MATH_SYMBOLS_A"
+ map["miscellaneous_mathematical_symbols_b"] = "MISC_MATH_SYMBOLS_B"
+ map["miscellaneous_symbols_and_arrows"] = "MISC_ARROWS"
+ map["miscellaneous_symbols_and_pictographs"] = "MISC_PICTOGRAPHS"
+ map["miscellaneous_symbols"] = "MISC_SYMBOLS"
+ map["miscellaneous_technical"] = "MISC_TECHNICAL"
+ map["modifier_tone_letters"] = "MODIFIER_TONE_LETTERS"
+ map["modi"] = "MODI"
+ map["mongolian"] = "MONGOLIAN"
+ map["mongolian_supplement"] = "MONGOLIAN_SUP"
+ map["mro"] = "MRO"
+ map["multani"] = "MULTANI"
+ map["musical_symbols"] = "MUSIC"
+ map["myanmar_extended_a"] = "MYANMAR_EXT_A"
+ map["myanmar_extended_b"] = "MYANMAR_EXT_B"
+ map["myanmar"] = "MYANMAR"
+ map["nabataean"] = "NABATAEAN"
+ map["nag_mundari"] = "NAG_MUNDARI"
+ map["nandinagari"] = "NANDINAGARI"
+ map["newa"] = "NEWA"
+ map["new_tai_lue"] = "NEW_TAI_LUE"
+ map["nko"] = "NKO"
+ map["number_forms"] = "NUMBER_FORMS"
+ map["nushu"] = "NUSHU"
+ map["nyiakeng_puachue_hmong"] = "NYIAKENG_PUACHUE_HMONG"
+ map["ogham"] = "OGHAM"
+ map["ol_chiki"] = "OL_CHIKI"
+ map["old_hungarian"] = "OLD_HUNGARIAN"
+ map["old_italic"] = "OLD_ITALIC"
+ map["old_north_arabian"] = "OLD_NORTH_ARABIAN"
+ map["old_permic"] = "OLD_PERMIC"
+ map["old_persian"] = "OLD_PERSIAN"
+ map["old_sogdian"] = "OLD_SOGDIAN"
+ map["old_south_arabian"] = "OLD_SOUTH_ARABIAN"
+ map["old_turkic"] = "OLD_TURKIC"
+ map["old_uyghur"] = "OLD_UYGHUR"
+ map["optical_character_recognition"] = "OCR"
+ map["oriya"] = "ORIYA"
+ map["ornamental_dingbats"] = "ORNAMENTAL_DINGBATS"
+ map["osage"] = "OSAGE"
+ map["osmanya"] = "OSMANYA"
+ map["ottoman_siyaq_numbers"] = "OTTOMAN_SIYAQ_NUMBERS"
+ map["pahawh_hmong"] = "PAHAWH_HMONG"
+ map["palmyrene"] = "PALMYRENE"
+ map["pau_cin_hau"] = "PAU_CIN_HAU"
+ map["phags_pa"] = "PHAGS_PA"
+ map["phaistos_disc"] = "PHAISTOS"
+ map["phoenician"] = "PHOENICIAN"
+ map["phonetic_extensions"] = "PHONETIC_EXT"
+ map["phonetic_extensions_supplement"] = "PHONETIC_EXT_SUP"
+ map["playing_cards"] = "PLAYING_CARDS"
+ map["private_use_area"] = "PUA"
+ map["psalter_pahlavi"] = "PSALTER_PAHLAVI"
+ map["rejang"] = "REJANG"
+ map["rumi_numeral_symbols"] = "RUMI"
+ map["runic"] = "RUNIC"
+ map["samaritan"] = "SAMARITAN"
+ map["saurashtra"] = "SAURASHTRA"
+ map["sharada"] = "SHARADA"
+ map["shavian"] = "SHAVIAN"
+ map["shorthand_format_controls"] = "SHORTHAND_FORMAT_CONTROLS"
+ map["siddham"] = "SIDDHAM"
+ map["sinhala_archaic_numbers"] = "SINHALA_ARCHAIC_NUMBERS"
+ map["sinhala"] = "SINHALA"
+ map["small_form_variants"] = "SMALL_FORMS"
+ map["small_kana_extension"] = "SMALL_KANA_EXT"
+ map["sogdian"] = "SOGDIAN"
+ map["sora_sompeng"] = "SORA_SOMPENG"
+ map["soyombo"] = "SOYOMBO"
+ map["spacing_modifier_letters"] = "MODIFIER_LETTERS"
+ map["specials"] = "SPECIALS"
+ map["sundanese"] = "SUNDANESE"
+ map["sundanese_supplement"] = "SUNDANESE_SUP"
+ map["superscripts_and_subscripts"] = "SUPER_AND_SUB"
+ map["supplemental_arrows_a"] = "SUP_ARROWS_A"
+ map["supplemental_arrows_b"] = "SUP_ARROWS_B"
+ map["supplemental_arrows_c"] = "SUP_ARROWS_C"
+ map["supplemental_mathematical_operators"] = "SUP_MATH_OPERATORS"
+ map["supplemental_punctuation"] = "SUP_PUNCTUATION"
+ map["supplemental_symbols_and_pictographs"] = "SUP_SYMBOLS_AND_PICTOGRAPHS"
+ map["supplementary_private_use_area_a"] = "SUP_PUA_A"
+ map["supplementary_private_use_area_b"] = "SUP_PUA_B"
+ map["sutton_signwriting"] = "SUTTON_SIGNWRITING"
+ map["syloti_nagri"] = "SYLOTI_NAGRI"
+ map["symbols_and_pictographs_extended_a"] = "SYMBOLS_AND_PICTOGRAPHS_EXT_A"
+ map["symbols_for_legacy_computing"] = "SYMBOLS_FOR_LEGACY_COMPUTING"
+ map["syriac_supplement"] = "SYRIAC_SUP"
+ map["syriac"] = "SYRIAC"
+ map["tagalog"] = "TAGALOG"
+ map["tagbanwa"] = "TAGBANWA"
+ map["tags"] = "TAGS"
+ map["tai_le"] = "TAI_LE"
+ map["tai_tham"] = "TAI_THAM"
+ map["tai_viet"] = "TAI_VIET"
+ map["tai_xuan_jing_symbols"] = "TAI_XUAN_JING"
+ map["takri"] = "TAKRI"
+ map["tamil_supplement"] = "TAMIL_SUP"
+ map["tamil"] = "TAMIL"
+ map["tangsa"] = "TANGSA"
+ map["tangut_components"] = "TANGUT_COMPONENTS"
+ map["tangut_supplement"] = "TANGUT_SUP"
+ map["tangut"] = "TANGUT"
+ map["telugu"] = "TELUGU"
+ map["thaana"] = "THAANA"
+ map["thai"] = "THAI"
+ map["tibetan"] = "TIBETAN"
+ map["tifinagh"] = "TIFINAGH"
+ map["tirhuta"] = "TIRHUTA"
+ map["toto"] = "TOTO"
+ map["transport_and_map_symbols"] = "TRANSPORT_AND_MAP"
+ map["ugaritic"] = "UGARITIC"
+ map["unified_canadian_aboriginal_syllabics_extended_a"] = "UCAS_EXT_A"
+ map["unified_canadian_aboriginal_syllabics_extended"] = "UCAS_EXT"
+ map["unified_canadian_aboriginal_syllabics"] = "UCAS"
+ map["vai"] = "VAI"
+ map["variation_selectors_supplement"] = "VS_SUP"
+ map["variation_selectors"] = "VS"
+ map["vedic_extensions"] = "VEDIC_EXT"
+ map["vertical_forms"] = "VERTICAL_FORMS"
+ map["vithkuqi"] = "VITHKUQI"
+ map["wancho"] = "WANCHO"
+ map["warang_citi"] = "WARANG_CITI"
+ map["yezidi"] = "YEZIDI"
+ map["yijing_hexagram_symbols"] = "YIJING"
+ map["yi_radicals"] = "YI_RADICALS"
+ map["yi_syllables"] = "YI_SYLLABLES"
+ map["zanabazar_square"] = "ZANABAZAR_SQUARE"
+ map["znamenny_musical_notation"] = "ZNAMENNY_MUSIC"
+
+ print "/* This file is autogenerated by gen/prop/blk; DO NOT EDIT. */"
+ print ""
+ print "#include \"__bsearch.h\""
+ print "#include \"macros.h\""
+ print "#include \"rune.h\""
+ print "#include \"unicode/prop.h\""
+ print ""
+}
+
+/^[^#]/ {
+ n = split($1, a, /\.\./)
+ lo = strtonum("0X" a[1])
+ hi = strtonum("0X" a[n])
+
+ for (i = lo; i <= hi; i++) {
+ gsub(/^; /, "", $2)
+ gsub(/[- ]/, "_", $2)
+ props[i] = "BLK_" map[tolower($2)]
+ }
+}
+
+END {
+ print "static constexpr enum uprop_blk lookup_lat1[] = {"
+ for (i = 0; i < 0x100; i++) {
+ if (i % 8 == 0)
+ printf "\t"
+ printf "%-15s,%s", props[i] ? props[i] : 0, i % 8 == 7 ? "\n" : " "
+ }
+ print "};"
+ print ""
+
+ print "static const struct {"
+ print "\trune lo, hi;"
+ print "\tenum uprop_blk val;"
+ print "} lookup[] = {"
+
+ for (i = 0x100; i <= 0x10FFFF; i++) {
+ if (!props[i])
+ continue
+ lo = i
+ while (props[lo] == props[i + 1])
+ i++
+ printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i]
+ }
+
+ print "};"
+ print ""
+ print "__MLIB_DEFINE_BSEARCH(enum uprop_blk, lookup, BLK_NB)"
+ print ""
+ print "enum uprop_blk"
+ print "uprop_get_blk(rune ch)"
+ print "{"
+ print "\treturn ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);"
+ print "}"
+}
+' data/Blocks | sed 's/\s*$//'