#!/bin/sh set -e cd "${0%/*}/../.." exec >lib/unicode/prop/uprop_get_blk.c gawk ' BEGIN { FS = " *(; *|#.*)" map["adlam"] = "ADLAM" map["aegean_numbers"] = "AEGEAN_NUMBERS" map["ahom"] = "AHOM" map["alchemical_symbols"] = "ALCHEMICAL" map["alphabetic_presentation_forms"] = "ALPHABETIC_PF" map["anatolian_hieroglyphs"] = "ANATOLIAN_HIEROGLYPHS" map["ancient_greek_musical_notation"] = "ANCIENT_GREEK_MUSIC" map["ancient_greek_numbers"] = "ANCIENT_GREEK_NUMBERS" map["ancient_symbols"] = "ANCIENT_SYMBOLS" map["arabic"] = "ARABIC" map["arabic_extended_a"] = "ARABIC_EXT_A" map["arabic_extended_b"] = "ARABIC_EXT_B" map["arabic_extended_c"] = "ARABIC_EXT_C" map["arabic_mathematical_alphabetic_symbols"] = "ARABIC_MATH" map["arabic_presentation_forms_a"] = "ARABIC_PF_A" map["arabic_presentation_forms_b"] = "ARABIC_PF_B" map["arabic_supplement"] = "ARABIC_SUP" map["armenian"] = "ARMENIAN" map["arrows"] = "ARROWS" map["avestan"] = "AVESTAN" map["balinese"] = "BALINESE" map["bamum"] = "BAMUM" map["bamum_supplement"] = "BAMUM_SUP" map["basic_latin"] = "ASCII" map["bassa_vah"] = "BASSA_VAH" map["batak"] = "BATAK" map["bengali"] = "BENGALI" map["bhaiksuki"] = "BHAIKSUKI" map["block_elements"] = "BLOCK_ELEMENTS" map["bopomofo"] = "BOPOMOFO" map["bopomofo_extended"] = "BOPOMOFO_EXT" map["box_drawing"] = "BOX_DRAWING" map["brahmi"] = "BRAHMI" map["braille_patterns"] = "BRAILLE" map["buginese"] = "BUGINESE" map["buhid"] = "BUHID" map["byzantine_musical_symbols"] = "BYZANTINE_MUSIC" map["carian"] = "CARIAN" map["caucasian_albanian"] = "CAUCASIAN_ALBANIAN" map["chakma"] = "CHAKMA" map["cham"] = "CHAM" map["cherokee"] = "CHEROKEE" map["cherokee_supplement"] = "CHEROKEE_SUP" map["chess_symbols"] = "CHESS_SYMBOLS" map["chorasmian"] = "CHORASMIAN" map["cjk_compatibility"] = "CJK_COMPAT" map["cjk_compatibility_forms"] = "CJK_COMPAT_FORMS" map["cjk_compatibility_ideographs"] = "CJK_COMPAT_IDEOGRAPHS" map["cjk_compatibility_ideographs_supplement"] = "CJK_COMPAT_IDEOGRAPHS_SUP" map["cjk_radicals_supplement"] = "CJK_RADICALS_SUP" map["cjk_strokes"] = "CJK_STROKES" map["cjk_symbols_and_punctuation"] = "CJK_SYMBOLS" map["cjk_unified_ideographs"] = "CJK" map["cjk_unified_ideographs_extension_a"] = "CJK_EXT_A" map["cjk_unified_ideographs_extension_b"] = "CJK_EXT_B" map["cjk_unified_ideographs_extension_c"] = "CJK_EXT_C" map["cjk_unified_ideographs_extension_d"] = "CJK_EXT_D" map["cjk_unified_ideographs_extension_e"] = "CJK_EXT_E" map["cjk_unified_ideographs_extension_f"] = "CJK_EXT_F" map["cjk_unified_ideographs_extension_g"] = "CJK_EXT_G" map["cjk_unified_ideographs_extension_h"] = "CJK_EXT_H" map["cjk_unified_ideographs_extension_i"] = "CJK_EXT_I" map["combining_diacritical_marks"] = "DIACRITICALS" map["combining_diacritical_marks_extended"] = "DIACRITICALS_EXT" map["combining_diacritical_marks_for_symbols"] = "DIACRITICALS_FOR_SYMBOLS" map["combining_diacritical_marks_supplement"] = "DIACRITICALS_SUP" map["combining_half_marks"] = "HALF_MARKS" map["common_indic_number_forms"] = "INDIC_NUMBER_FORMS" map["control_pictures"] = "CONTROL_PICTURES" map["coptic"] = "COPTIC" map["coptic_epact_numbers"] = "COPTIC_EPACT_NUMBERS" map["counting_rod_numerals"] = "COUNTING_ROD" map["cuneiform"] = "CUNEIFORM" map["cuneiform_numbers_and_punctuation"] = "CUNEIFORM_NUMBERS" map["currency_symbols"] = "CURRENCY_SYMBOLS" map["cypriot_syllabary"] = "CYPRIOT_SYLLABARY" map["cypro_minoan"] = "CYPRO_MINOAN" map["cyrillic"] = "CYRILLIC" map["cyrillic_extended_a"] = "CYRILLIC_EXT_A" map["cyrillic_extended_b"] = "CYRILLIC_EXT_B" map["cyrillic_extended_c"] = "CYRILLIC_EXT_C" map["cyrillic_extended_d"] = "CYRILLIC_EXT_D" map["cyrillic_supplement"] = "CYRILLIC_SUP" map["deseret"] = "DESERET" map["devanagari"] = "DEVANAGARI" map["devanagari_extended_a"] = "DEVANAGARI_EXT_A" map["devanagari_extended"] = "DEVANAGARI_EXT" map["dingbats"] = "DINGBATS" map["dives_akuru"] = "DIVES_AKURU" map["dogra"] = "DOGRA" map["domino_tiles"] = "DOMINO" map["duployan"] = "DUPLOYAN" map["early_dynastic_cuneiform"] = "EARLY_DYNASTIC_CUNEIFORM" map["egyptian_hieroglyph_format_controls"] = "EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS" map["egyptian_hieroglyphs"] = "EGYPTIAN_HIEROGLYPHS" map["elbasan"] = "ELBASAN" map["elymaic"] = "ELYMAIC" map["emoticons"] = "EMOTICONS" map["enclosed_alphanumerics"] = "ENCLOSED_ALPHANUM" map["enclosed_alphanumeric_supplement"] = "ENCLOSED_ALPHANUM_SUP" map["enclosed_cjk_letters_and_months"] = "ENCLOSED_CJK" map["enclosed_ideographic_supplement"] = "ENCLOSED_IDEOGRAPHIC_SUP" map["ethiopic"] = "ETHIOPIC" map["ethiopic_extended_a"] = "ETHIOPIC_EXT_A" map["ethiopic_extended_b"] = "ETHIOPIC_EXT_B" map["ethiopic_extended"] = "ETHIOPIC_EXT" map["ethiopic_supplement"] = "ETHIOPIC_SUP" map["general_punctuation"] = "PUNCTUATION" map["geometric_shapes_extended"] = "GEOMETRIC_SHAPES_EXT" map["geometric_shapes"] = "GEOMETRIC_SHAPES" map["georgian_extended"] = "GEORGIAN_EXT" map["georgian"] = "GEORGIAN" map["georgian_supplement"] = "GEORGIAN_SUP" map["glagolitic"] = "GLAGOLITIC" map["glagolitic_supplement"] = "GLAGOLITIC_SUP" map["gothic"] = "GOTHIC" map["grantha"] = "GRANTHA" map["greek_and_coptic"] = "GREEK" map["greek_extended"] = "GREEK_EXT" map["gujarati"] = "GUJARATI" map["gunjala_gondi"] = "GUNJALA_GONDI" map["gurmukhi"] = "GURMUKHI" map["halfwidth_and_fullwidth_forms"] = "HALF_AND_FULL_FORMS" map["hangul_compatibility_jamo"] = "COMPAT_JAMO" map["hangul_jamo_extended_a"] = "JAMO_EXT_A" map["hangul_jamo_extended_b"] = "JAMO_EXT_B" map["hangul_jamo"] = "JAMO" map["hangul_syllables"] = "HANGUL" map["hanifi_rohingya"] = "HANIFI_ROHINGYA" map["hanunoo"] = "HANUNOO" map["hatran"] = "HATRAN" map["hebrew"] = "HEBREW" map["high_private_use_surrogates"] = "HIGH_PU_SURROGATES" map["high_surrogates"] = "HIGH_SURROGATES" map["hiragana"] = "HIRAGANA" map["ideographic_description_characters"] = "IDC" map["ideographic_symbols_and_punctuation"] = "IDEOGRAPHIC_SYMBOLS" map["imperial_aramaic"] = "IMPERIAL_ARAMAIC" map["indic_siyaq_numbers"] = "INDIC_SIYAQ_NUMBERS" map["inscriptional_pahlavi"] = "INSCRIPTIONAL_PAHLAVI" map["inscriptional_parthian"] = "INSCRIPTIONAL_PARTHIAN" map["ipa_extensions"] = "IPA_EXT" map["javanese"] = "JAVANESE" map["kaithi"] = "KAITHI" map["kaktovik_numerals"] = "KAKTOVIK_NUMERALS" map["kana_extended_a"] = "KANA_EXT_A" map["kana_extended_b"] = "KANA_EXT_B" map["kana_supplement"] = "KANA_SUP" map["kanbun"] = "KANBUN" map["kangxi_radicals"] = "KANGXI" map["kannada"] = "KANNADA" map["katakana"] = "KATAKANA" map["katakana_phonetic_extensions"] = "KATAKANA_EXT" map["kawi"] = "KAWI" map["kayah_li"] = "KAYAH_LI" map["kharoshthi"] = "KHAROSHTHI" map["khitan_small_script"] = "KHITAN_SMALL_SCRIPT" map["khmer"] = "KHMER" map["khmer_symbols"] = "KHMER_SYMBOLS" map["khojki"] = "KHOJKI" map["khudawadi"] = "KHUDAWADI" map["lao"] = "LAO" map["latin_1_supplement"] = "LATIN_1_SUP" map["latin_extended_additional"] = "LATIN_EXT_ADDITIONAL" map["latin_extended_a"] = "LATIN_EXT_A" map["latin_extended_b"] = "LATIN_EXT_B" map["latin_extended_c"] = "LATIN_EXT_C" map["latin_extended_d"] = "LATIN_EXT_D" map["latin_extended_e"] = "LATIN_EXT_E" map["latin_extended_f"] = "LATIN_EXT_F" map["latin_extended_g"] = "LATIN_EXT_G" map["lepcha"] = "LEPCHA" map["letterlike_symbols"] = "LETTERLIKE_SYMBOLS" map["limbu"] = "LIMBU" map["linear_a"] = "LINEAR_A" map["linear_b_ideograms"] = "LINEAR_B_IDEOGRAMS" map["linear_b_syllabary"] = "LINEAR_B_SYLLABARY" map["lisu"] = "LISU" map["lisu_supplement"] = "LISU_SUP" map["low_surrogates"] = "LOW_SURROGATES" map["lycian"] = "LYCIAN" map["lydian"] = "LYDIAN" map["mahajani"] = "MAHAJANI" map["mahjong_tiles"] = "MAHJONG" map["makasar"] = "MAKASAR" map["malayalam"] = "MALAYALAM" map["mandaic"] = "MANDAIC" map["manichaean"] = "MANICHAEAN" map["marchen"] = "MARCHEN" map["masaram_gondi"] = "MASARAM_GONDI" map["mathematical_alphanumeric_symbols"] = "MATH_ALPHANUM" map["mathematical_operators"] = "MATH_OPERATORS" map["mayan_numerals"] = "MAYAN_NUMERALS" map["medefaidrin"] = "MEDEFAIDRIN" map["meetei_mayek_extensions"] = "MEETEI_MAYEK_EXT" map["meetei_mayek"] = "MEETEI_MAYEK" map["mende_kikakui"] = "MENDE_KIKAKUI" map["meroitic_cursive"] = "MEROITIC_CURSIVE" map["meroitic_hieroglyphs"] = "MEROITIC_HIEROGLYPHS" map["miao"] = "MIAO" map["miscellaneous_mathematical_symbols_a"] = "MISC_MATH_SYMBOLS_A" map["miscellaneous_mathematical_symbols_b"] = "MISC_MATH_SYMBOLS_B" map["miscellaneous_symbols_and_arrows"] = "MISC_ARROWS" map["miscellaneous_symbols_and_pictographs"] = "MISC_PICTOGRAPHS" map["miscellaneous_symbols"] = "MISC_SYMBOLS" map["miscellaneous_technical"] = "MISC_TECHNICAL" map["modifier_tone_letters"] = "MODIFIER_TONE_LETTERS" map["modi"] = "MODI" map["mongolian"] = "MONGOLIAN" map["mongolian_supplement"] = "MONGOLIAN_SUP" map["mro"] = "MRO" map["multani"] = "MULTANI" map["musical_symbols"] = "MUSIC" map["myanmar_extended_a"] = "MYANMAR_EXT_A" map["myanmar_extended_b"] = "MYANMAR_EXT_B" map["myanmar"] = "MYANMAR" map["nabataean"] = "NABATAEAN" map["nag_mundari"] = "NAG_MUNDARI" map["nandinagari"] = "NANDINAGARI" map["newa"] = "NEWA" map["new_tai_lue"] = "NEW_TAI_LUE" map["nko"] = "NKO" map["number_forms"] = "NUMBER_FORMS" map["nushu"] = "NUSHU" map["nyiakeng_puachue_hmong"] = "NYIAKENG_PUACHUE_HMONG" map["ogham"] = "OGHAM" map["ol_chiki"] = "OL_CHIKI" map["old_hungarian"] = "OLD_HUNGARIAN" map["old_italic"] = "OLD_ITALIC" map["old_north_arabian"] = "OLD_NORTH_ARABIAN" map["old_permic"] = "OLD_PERMIC" map["old_persian"] = "OLD_PERSIAN" map["old_sogdian"] = "OLD_SOGDIAN" map["old_south_arabian"] = "OLD_SOUTH_ARABIAN" map["old_turkic"] = "OLD_TURKIC" map["old_uyghur"] = "OLD_UYGHUR" map["optical_character_recognition"] = "OCR" map["oriya"] = "ORIYA" map["ornamental_dingbats"] = "ORNAMENTAL_DINGBATS" map["osage"] = "OSAGE" map["osmanya"] = "OSMANYA" map["ottoman_siyaq_numbers"] = "OTTOMAN_SIYAQ_NUMBERS" map["pahawh_hmong"] = "PAHAWH_HMONG" map["palmyrene"] = "PALMYRENE" map["pau_cin_hau"] = "PAU_CIN_HAU" map["phags_pa"] = "PHAGS_PA" map["phaistos_disc"] = "PHAISTOS" map["phoenician"] = "PHOENICIAN" map["phonetic_extensions"] = "PHONETIC_EXT" map["phonetic_extensions_supplement"] = "PHONETIC_EXT_SUP" map["playing_cards"] = "PLAYING_CARDS" map["private_use_area"] = "PUA" map["psalter_pahlavi"] = "PSALTER_PAHLAVI" map["rejang"] = "REJANG" map["rumi_numeral_symbols"] = "RUMI" map["runic"] = "RUNIC" map["samaritan"] = "SAMARITAN" map["saurashtra"] = "SAURASHTRA" map["sharada"] = "SHARADA" map["shavian"] = "SHAVIAN" map["shorthand_format_controls"] = "SHORTHAND_FORMAT_CONTROLS" map["siddham"] = "SIDDHAM" map["sinhala_archaic_numbers"] = "SINHALA_ARCHAIC_NUMBERS" map["sinhala"] = "SINHALA" map["small_form_variants"] = "SMALL_FORMS" map["small_kana_extension"] = "SMALL_KANA_EXT" map["sogdian"] = "SOGDIAN" map["sora_sompeng"] = "SORA_SOMPENG" map["soyombo"] = "SOYOMBO" map["spacing_modifier_letters"] = "MODIFIER_LETTERS" map["specials"] = "SPECIALS" map["sundanese"] = "SUNDANESE" map["sundanese_supplement"] = "SUNDANESE_SUP" map["superscripts_and_subscripts"] = "SUPER_AND_SUB" map["supplemental_arrows_a"] = "SUP_ARROWS_A" map["supplemental_arrows_b"] = "SUP_ARROWS_B" map["supplemental_arrows_c"] = "SUP_ARROWS_C" map["supplemental_mathematical_operators"] = "SUP_MATH_OPERATORS" map["supplemental_punctuation"] = "SUP_PUNCTUATION" map["supplemental_symbols_and_pictographs"] = "SUP_SYMBOLS_AND_PICTOGRAPHS" map["supplementary_private_use_area_a"] = "SUP_PUA_A" map["supplementary_private_use_area_b"] = "SUP_PUA_B" map["sutton_signwriting"] = "SUTTON_SIGNWRITING" map["syloti_nagri"] = "SYLOTI_NAGRI" map["symbols_and_pictographs_extended_a"] = "SYMBOLS_AND_PICTOGRAPHS_EXT_A" map["symbols_for_legacy_computing"] = "SYMBOLS_FOR_LEGACY_COMPUTING" map["syriac_supplement"] = "SYRIAC_SUP" map["syriac"] = "SYRIAC" map["tagalog"] = "TAGALOG" map["tagbanwa"] = "TAGBANWA" map["tags"] = "TAGS" map["tai_le"] = "TAI_LE" map["tai_tham"] = "TAI_THAM" map["tai_viet"] = "TAI_VIET" map["tai_xuan_jing_symbols"] = "TAI_XUAN_JING" map["takri"] = "TAKRI" map["tamil_supplement"] = "TAMIL_SUP" map["tamil"] = "TAMIL" map["tangsa"] = "TANGSA" map["tangut_components"] = "TANGUT_COMPONENTS" map["tangut_supplement"] = "TANGUT_SUP" map["tangut"] = "TANGUT" map["telugu"] = "TELUGU" map["thaana"] = "THAANA" map["thai"] = "THAI" map["tibetan"] = "TIBETAN" map["tifinagh"] = "TIFINAGH" map["tirhuta"] = "TIRHUTA" map["toto"] = "TOTO" map["transport_and_map_symbols"] = "TRANSPORT_AND_MAP" map["ugaritic"] = "UGARITIC" map["unified_canadian_aboriginal_syllabics_extended_a"] = "UCAS_EXT_A" map["unified_canadian_aboriginal_syllabics_extended"] = "UCAS_EXT" map["unified_canadian_aboriginal_syllabics"] = "UCAS" map["vai"] = "VAI" map["variation_selectors_supplement"] = "VS_SUP" map["variation_selectors"] = "VS" map["vedic_extensions"] = "VEDIC_EXT" map["vertical_forms"] = "VERTICAL_FORMS" map["vithkuqi"] = "VITHKUQI" map["wancho"] = "WANCHO" map["warang_citi"] = "WARANG_CITI" map["yezidi"] = "YEZIDI" map["yijing_hexagram_symbols"] = "YIJING" map["yi_radicals"] = "YI_RADICALS" map["yi_syllables"] = "YI_SYLLABLES" map["zanabazar_square"] = "ZANABAZAR_SQUARE" map["znamenny_musical_notation"] = "ZNAMENNY_MUSIC" print "/* This file is autogenerated by gen/prop/blk; DO NOT EDIT. */" print "" print "#include \"_bsearch.h\"" print "#include \"macros.h\"" print "#include \"rune.h\"" print "#include \"unicode/prop.h\"" print "" } /^[^#]/ { n = split($1, a, /\.\./) lo = strtonum("0X" a[1]) hi = strtonum("0X" a[n]) for (i = lo; i <= hi; i++) { gsub(/^; /, "", $2) gsub(/[- ]/, "_", $2) props[i] = "BLK_" map[tolower($2)] } } END { print "static constexpr enum uprop_blk lookup_lat1[] = {" for (i = 0; i < 0x100; i++) { if (i % 8 == 0) printf "\t" printf "%-15s,%s", props[i] ? props[i] : 0, i % 8 == 7 ? "\n" : " " } print "};" print "" print "static const struct {" print "\trune lo, hi;" print "\tenum uprop_blk val;" print "} lookup[] = {" for (i = 0x100; i <= 0x10FFFF; i++) { if (!props[i]) continue lo = i while (props[lo] == props[i + 1]) i++ printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] } print "};" print "" print "_MLIB_DEFINE_BSEARCH(enum uprop_blk, lookup, BLK_NB)" print "" print "enum uprop_blk" print "uprop_get_blk(rune ch)" print "{" print "\treturn ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" print "}" } ' data/Blocks | sed 's/\s*$//'