diff options
Diffstat (limited to 'gen')
-rwxr-xr-x | gen/data-files | 1 | ||||
-rwxr-xr-x | gen/prop/blk | 395 |
2 files changed, 396 insertions, 0 deletions
diff --git a/gen/data-files b/gen/data-files index 801f591..b2c4197 100755 --- a/gen/data-files +++ b/gen/data-files @@ -9,6 +9,7 @@ readonly BASE=https://www.unicode.org/Public/UCD/latest/ucd readonly PATHS=' auxiliary/GraphemeBreakProperty BidiBrackets +Blocks DerivedAge DerivedCoreProperties DerivedNormalizationProps diff --git a/gen/prop/blk b/gen/prop/blk new file mode 100755 index 0000000..574f5af --- /dev/null +++ b/gen/prop/blk @@ -0,0 +1,395 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_blk.c + +gawk ' +BEGIN { + FS = " *(; *|#.*)" + + map["adlam"] = "ADLAM" + map["aegean_numbers"] = "AEGEAN_NUMBERS" + map["ahom"] = "AHOM" + map["alchemical_symbols"] = "ALCHEMICAL" + map["alphabetic_presentation_forms"] = "ALPHABETIC_PF" + map["anatolian_hieroglyphs"] = "ANATOLIAN_HIEROGLYPHS" + map["ancient_greek_musical_notation"] = "ANCIENT_GREEK_MUSIC" + map["ancient_greek_numbers"] = "ANCIENT_GREEK_NUMBERS" + map["ancient_symbols"] = "ANCIENT_SYMBOLS" + map["arabic"] = "ARABIC" + map["arabic_extended_a"] = "ARABIC_EXT_A" + map["arabic_extended_b"] = "ARABIC_EXT_B" + map["arabic_extended_c"] = "ARABIC_EXT_C" + map["arabic_mathematical_alphabetic_symbols"] = "ARABIC_MATH" + map["arabic_presentation_forms_a"] = "ARABIC_PF_A" + map["arabic_presentation_forms_b"] = "ARABIC_PF_B" + map["arabic_supplement"] = "ARABIC_SUP" + map["armenian"] = "ARMENIAN" + map["arrows"] = "ARROWS" + map["avestan"] = "AVESTAN" + map["balinese"] = "BALINESE" + map["bamum"] = "BAMUM" + map["bamum_supplement"] = "BAMUM_SUP" + map["basic_latin"] = "ASCII" + map["bassa_vah"] = "BASSA_VAH" + map["batak"] = "BATAK" + map["bengali"] = "BENGALI" + map["bhaiksuki"] = "BHAIKSUKI" + map["block_elements"] = "BLOCK_ELEMENTS" + map["bopomofo"] = "BOPOMOFO" + map["bopomofo_extended"] = "BOPOMOFO_EXT" + map["box_drawing"] = "BOX_DRAWING" + map["brahmi"] = "BRAHMI" + map["braille_patterns"] = "BRAILLE" + map["buginese"] = "BUGINESE" + map["buhid"] = "BUHID" + map["byzantine_musical_symbols"] = "BYZANTINE_MUSIC" + map["carian"] = "CARIAN" + map["caucasian_albanian"] = "CAUCASIAN_ALBANIAN" + map["chakma"] = "CHAKMA" + map["cham"] = "CHAM" + map["cherokee"] = "CHEROKEE" + map["cherokee_supplement"] = "CHEROKEE_SUP" + map["chess_symbols"] = "CHESS_SYMBOLS" + map["chorasmian"] = "CHORASMIAN" + map["cjk_compatibility"] = "CJK_COMPAT" + map["cjk_compatibility_forms"] = "CJK_COMPAT_FORMS" + map["cjk_compatibility_ideographs"] = "CJK_COMPAT_IDEOGRAPHS" + map["cjk_compatibility_ideographs_supplement"] = "CJK_COMPAT_IDEOGRAPHS_SUP" + map["cjk_radicals_supplement"] = "CJK_RADICALS_SUP" + map["cjk_strokes"] = "CJK_STROKES" + map["cjk_symbols_and_punctuation"] = "CJK_SYMBOLS" + map["cjk_unified_ideographs"] = "CJK" + map["cjk_unified_ideographs_extension_a"] = "CJK_EXT_A" + map["cjk_unified_ideographs_extension_b"] = "CJK_EXT_B" + map["cjk_unified_ideographs_extension_c"] = "CJK_EXT_C" + map["cjk_unified_ideographs_extension_d"] = "CJK_EXT_D" + map["cjk_unified_ideographs_extension_e"] = "CJK_EXT_E" + map["cjk_unified_ideographs_extension_f"] = "CJK_EXT_F" + map["cjk_unified_ideographs_extension_g"] = "CJK_EXT_G" + map["cjk_unified_ideographs_extension_h"] = "CJK_EXT_H" + map["cjk_unified_ideographs_extension_i"] = "CJK_EXT_I" + map["combining_diacritical_marks"] = "DIACRITICALS" + map["combining_diacritical_marks_extended"] = "DIACRITICALS_EXT" + map["combining_diacritical_marks_for_symbols"] = "DIACRITICALS_FOR_SYMBOLS" + map["combining_diacritical_marks_supplement"] = "DIACRITICALS_SUP" + map["combining_half_marks"] = "HALF_MARKS" + map["common_indic_number_forms"] = "INDIC_NUMBER_FORMS" + map["control_pictures"] = "CONTROL_PICTURES" + map["coptic"] = "COPTIC" + map["coptic_epact_numbers"] = "COPTIC_EPACT_NUMBERS" + map["counting_rod_numerals"] = "COUNTING_ROD" + map["cuneiform"] = "CUNEIFORM" + map["cuneiform_numbers_and_punctuation"] = "CUNEIFORM_NUMBERS" + map["currency_symbols"] = "CURRENCY_SYMBOLS" + map["cypriot_syllabary"] = "CYPRIOT_SYLLABARY" + map["cypro_minoan"] = "CYPRO_MINOAN" + map["cyrillic"] = "CYRILLIC" + map["cyrillic_extended_a"] = "CYRILLIC_EXT_A" + map["cyrillic_extended_b"] = "CYRILLIC_EXT_B" + map["cyrillic_extended_c"] = "CYRILLIC_EXT_C" + map["cyrillic_extended_d"] = "CYRILLIC_EXT_D" + map["cyrillic_supplement"] = "CYRILLIC_SUP" + map["deseret"] = "DESERET" + map["devanagari"] = "DEVANAGARI" + map["devanagari_extended_a"] = "DEVANAGARI_EXT_A" + map["devanagari_extended"] = "DEVANAGARI_EXT" + map["dingbats"] = "DINGBATS" + map["dives_akuru"] = "DIVES_AKURU" + map["dogra"] = "DOGRA" + map["domino_tiles"] = "DOMINO" + map["duployan"] = "DUPLOYAN" + map["early_dynastic_cuneiform"] = "EARLY_DYNASTIC_CUNEIFORM" + map["egyptian_hieroglyph_format_controls"] = "EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS" + map["egyptian_hieroglyphs"] = "EGYPTIAN_HIEROGLYPHS" + map["elbasan"] = "ELBASAN" + map["elymaic"] = "ELYMAIC" + map["emoticons"] = "EMOTICONS" + map["enclosed_alphanumerics"] = "ENCLOSED_ALPHANUM" + map["enclosed_alphanumeric_supplement"] = "ENCLOSED_ALPHANUM_SUP" + map["enclosed_cjk_letters_and_months"] = "ENCLOSED_CJK" + map["enclosed_ideographic_supplement"] = "ENCLOSED_IDEOGRAPHIC_SUP" + map["ethiopic"] = "ETHIOPIC" + map["ethiopic_extended_a"] = "ETHIOPIC_EXT_A" + map["ethiopic_extended_b"] = "ETHIOPIC_EXT_B" + map["ethiopic_extended"] = "ETHIOPIC_EXT" + map["ethiopic_supplement"] = "ETHIOPIC_SUP" + map["general_punctuation"] = "PUNCTUATION" + map["geometric_shapes_extended"] = "GEOMETRIC_SHAPES_EXT" + map["geometric_shapes"] = "GEOMETRIC_SHAPES" + map["georgian_extended"] = "GEORGIAN_EXT" + map["georgian"] = "GEORGIAN" + map["georgian_supplement"] = "GEORGIAN_SUP" + map["glagolitic"] = "GLAGOLITIC" + map["glagolitic_supplement"] = "GLAGOLITIC_SUP" + map["gothic"] = "GOTHIC" + map["grantha"] = "GRANTHA" + map["greek_and_coptic"] = "GREEK" + map["greek_extended"] = "GREEK_EXT" + map["gujarati"] = "GUJARATI" + map["gunjala_gondi"] = "GUNJALA_GONDI" + map["gurmukhi"] = "GURMUKHI" + map["halfwidth_and_fullwidth_forms"] = "HALF_AND_FULL_FORMS" + map["hangul_compatibility_jamo"] = "COMPAT_JAMO" + map["hangul_jamo_extended_a"] = "JAMO_EXT_A" + map["hangul_jamo_extended_b"] = "JAMO_EXT_B" + map["hangul_jamo"] = "JAMO" + map["hangul_syllables"] = "HANGUL" + map["hanifi_rohingya"] = "HANIFI_ROHINGYA" + map["hanunoo"] = "HANUNOO" + map["hatran"] = "HATRAN" + map["hebrew"] = "HEBREW" + map["high_private_use_surrogates"] = "HIGH_PU_SURROGATES" + map["high_surrogates"] = "HIGH_SURROGATES" + map["hiragana"] = "HIRAGANA" + map["ideographic_description_characters"] = "IDC" + map["ideographic_symbols_and_punctuation"] = "IDEOGRAPHIC_SYMBOLS" + map["imperial_aramaic"] = "IMPERIAL_ARAMAIC" + map["indic_siyaq_numbers"] = "INDIC_SIYAQ_NUMBERS" + map["inscriptional_pahlavi"] = "INSCRIPTIONAL_PAHLAVI" + map["inscriptional_parthian"] = "INSCRIPTIONAL_PARTHIAN" + map["ipa_extensions"] = "IPA_EXT" + map["javanese"] = "JAVANESE" + map["kaithi"] = "KAITHI" + map["kaktovik_numerals"] = "KAKTOVIK_NUMERALS" + map["kana_extended_a"] = "KANA_EXT_A" + map["kana_extended_b"] = "KANA_EXT_B" + map["kana_supplement"] = "KANA_SUP" + map["kanbun"] = "KANBUN" + map["kangxi_radicals"] = "KANGXI" + map["kannada"] = "KANNADA" + map["katakana"] = "KATAKANA" + map["katakana_phonetic_extensions"] = "KATAKANA_EXT" + map["kawi"] = "KAWI" + map["kayah_li"] = "KAYAH_LI" + map["kharoshthi"] = "KHAROSHTHI" + map["khitan_small_script"] = "KHITAN_SMALL_SCRIPT" + map["khmer"] = "KHMER" + map["khmer_symbols"] = "KHMER_SYMBOLS" + map["khojki"] = "KHOJKI" + map["khudawadi"] = "KHUDAWADI" + map["lao"] = "LAO" + map["latin_1_supplement"] = "LATIN_1_SUP" + map["latin_extended_additional"] = "LATIN_EXT_ADDITIONAL" + map["latin_extended_a"] = "LATIN_EXT_A" + map["latin_extended_b"] = "LATIN_EXT_B" + map["latin_extended_c"] = "LATIN_EXT_C" + map["latin_extended_d"] = "LATIN_EXT_D" + map["latin_extended_e"] = "LATIN_EXT_E" + map["latin_extended_f"] = "LATIN_EXT_F" + map["latin_extended_g"] = "LATIN_EXT_G" + map["lepcha"] = "LEPCHA" + map["letterlike_symbols"] = "LETTERLIKE_SYMBOLS" + map["limbu"] = "LIMBU" + map["linear_a"] = "LINEAR_A" + map["linear_b_ideograms"] = "LINEAR_B_IDEOGRAMS" + map["linear_b_syllabary"] = "LINEAR_B_SYLLABARY" + map["lisu"] = "LISU" + map["lisu_supplement"] = "LISU_SUP" + map["low_surrogates"] = "LOW_SURROGATES" + map["lycian"] = "LYCIAN" + map["lydian"] = "LYDIAN" + map["mahajani"] = "MAHAJANI" + map["mahjong_tiles"] = "MAHJONG" + map["makasar"] = "MAKASAR" + map["malayalam"] = "MALAYALAM" + map["mandaic"] = "MANDAIC" + map["manichaean"] = "MANICHAEAN" + map["marchen"] = "MARCHEN" + map["masaram_gondi"] = "MASARAM_GONDI" + map["mathematical_alphanumeric_symbols"] = "MATH_ALPHANUM" + map["mathematical_operators"] = "MATH_OPERATORS" + map["mayan_numerals"] = "MAYAN_NUMERALS" + map["medefaidrin"] = "MEDEFAIDRIN" + map["meetei_mayek_extensions"] = "MEETEI_MAYEK_EXT" + map["meetei_mayek"] = "MEETEI_MAYEK" + map["mende_kikakui"] = "MENDE_KIKAKUI" + map["meroitic_cursive"] = "MEROITIC_CURSIVE" + map["meroitic_hieroglyphs"] = "MEROITIC_HIEROGLYPHS" + map["miao"] = "MIAO" + map["miscellaneous_mathematical_symbols_a"] = "MISC_MATH_SYMBOLS_A" + map["miscellaneous_mathematical_symbols_b"] = "MISC_MATH_SYMBOLS_B" + map["miscellaneous_symbols_and_arrows"] = "MISC_ARROWS" + map["miscellaneous_symbols_and_pictographs"] = "MISC_PICTOGRAPHS" + map["miscellaneous_symbols"] = "MISC_SYMBOLS" + map["miscellaneous_technical"] = "MISC_TECHNICAL" + map["modifier_tone_letters"] = "MODIFIER_TONE_LETTERS" + map["modi"] = "MODI" + map["mongolian"] = "MONGOLIAN" + map["mongolian_supplement"] = "MONGOLIAN_SUP" + map["mro"] = "MRO" + map["multani"] = "MULTANI" + map["musical_symbols"] = "MUSIC" + map["myanmar_extended_a"] = "MYANMAR_EXT_A" + map["myanmar_extended_b"] = "MYANMAR_EXT_B" + map["myanmar"] = "MYANMAR" + map["nabataean"] = "NABATAEAN" + map["nag_mundari"] = "NAG_MUNDARI" + map["nandinagari"] = "NANDINAGARI" + map["newa"] = "NEWA" + map["new_tai_lue"] = "NEW_TAI_LUE" + map["nko"] = "NKO" + map["number_forms"] = "NUMBER_FORMS" + map["nushu"] = "NUSHU" + map["nyiakeng_puachue_hmong"] = "NYIAKENG_PUACHUE_HMONG" + map["ogham"] = "OGHAM" + map["ol_chiki"] = "OL_CHIKI" + map["old_hungarian"] = "OLD_HUNGARIAN" + map["old_italic"] = "OLD_ITALIC" + map["old_north_arabian"] = "OLD_NORTH_ARABIAN" + map["old_permic"] = "OLD_PERMIC" + map["old_persian"] = "OLD_PERSIAN" + map["old_sogdian"] = "OLD_SOGDIAN" + map["old_south_arabian"] = "OLD_SOUTH_ARABIAN" + map["old_turkic"] = "OLD_TURKIC" + map["old_uyghur"] = "OLD_UYGHUR" + map["optical_character_recognition"] = "OCR" + map["oriya"] = "ORIYA" + map["ornamental_dingbats"] = "ORNAMENTAL_DINGBATS" + map["osage"] = "OSAGE" + map["osmanya"] = "OSMANYA" + map["ottoman_siyaq_numbers"] = "OTTOMAN_SIYAQ_NUMBERS" + map["pahawh_hmong"] = "PAHAWH_HMONG" + map["palmyrene"] = "PALMYRENE" + map["pau_cin_hau"] = "PAU_CIN_HAU" + map["phags_pa"] = "PHAGS_PA" + map["phaistos_disc"] = "PHAISTOS" + map["phoenician"] = "PHOENICIAN" + map["phonetic_extensions"] = "PHONETIC_EXT" + map["phonetic_extensions_supplement"] = "PHONETIC_EXT_SUP" + map["playing_cards"] = "PLAYING_CARDS" + map["private_use_area"] = "PUA" + map["psalter_pahlavi"] = "PSALTER_PAHLAVI" + map["rejang"] = "REJANG" + map["rumi_numeral_symbols"] = "RUMI" + map["runic"] = "RUNIC" + map["samaritan"] = "SAMARITAN" + map["saurashtra"] = "SAURASHTRA" + map["sharada"] = "SHARADA" + map["shavian"] = "SHAVIAN" + map["shorthand_format_controls"] = "SHORTHAND_FORMAT_CONTROLS" + map["siddham"] = "SIDDHAM" + map["sinhala_archaic_numbers"] = "SINHALA_ARCHAIC_NUMBERS" + map["sinhala"] = "SINHALA" + map["small_form_variants"] = "SMALL_FORMS" + map["small_kana_extension"] = "SMALL_KANA_EXT" + map["sogdian"] = "SOGDIAN" + map["sora_sompeng"] = "SORA_SOMPENG" + map["soyombo"] = "SOYOMBO" + map["spacing_modifier_letters"] = "MODIFIER_LETTERS" + map["specials"] = "SPECIALS" + map["sundanese"] = "SUNDANESE" + map["sundanese_supplement"] = "SUNDANESE_SUP" + map["superscripts_and_subscripts"] = "SUPER_AND_SUB" + map["supplemental_arrows_a"] = "SUP_ARROWS_A" + map["supplemental_arrows_b"] = "SUP_ARROWS_B" + map["supplemental_arrows_c"] = "SUP_ARROWS_C" + map["supplemental_mathematical_operators"] = "SUP_MATH_OPERATORS" + map["supplemental_punctuation"] = "SUP_PUNCTUATION" + map["supplemental_symbols_and_pictographs"] = "SUP_SYMBOLS_AND_PICTOGRAPHS" + map["supplementary_private_use_area_a"] = "SUP_PUA_A" + map["supplementary_private_use_area_b"] = "SUP_PUA_B" + map["sutton_signwriting"] = "SUTTON_SIGNWRITING" + map["syloti_nagri"] = "SYLOTI_NAGRI" + map["symbols_and_pictographs_extended_a"] = "SYMBOLS_AND_PICTOGRAPHS_EXT_A" + map["symbols_for_legacy_computing"] = "SYMBOLS_FOR_LEGACY_COMPUTING" + map["syriac_supplement"] = "SYRIAC_SUP" + map["syriac"] = "SYRIAC" + map["tagalog"] = "TAGALOG" + map["tagbanwa"] = "TAGBANWA" + map["tags"] = "TAGS" + map["tai_le"] = "TAI_LE" + map["tai_tham"] = "TAI_THAM" + map["tai_viet"] = "TAI_VIET" + map["tai_xuan_jing_symbols"] = "TAI_XUAN_JING" + map["takri"] = "TAKRI" + map["tamil_supplement"] = "TAMIL_SUP" + map["tamil"] = "TAMIL" + map["tangsa"] = "TANGSA" + map["tangut_components"] = "TANGUT_COMPONENTS" + map["tangut_supplement"] = "TANGUT_SUP" + map["tangut"] = "TANGUT" + map["telugu"] = "TELUGU" + map["thaana"] = "THAANA" + map["thai"] = "THAI" + map["tibetan"] = "TIBETAN" + map["tifinagh"] = "TIFINAGH" + map["tirhuta"] = "TIRHUTA" + map["toto"] = "TOTO" + map["transport_and_map_symbols"] = "TRANSPORT_AND_MAP" + map["ugaritic"] = "UGARITIC" + map["unified_canadian_aboriginal_syllabics_extended_a"] = "UCAS_EXT_A" + map["unified_canadian_aboriginal_syllabics_extended"] = "UCAS_EXT" + map["unified_canadian_aboriginal_syllabics"] = "UCAS" + map["vai"] = "VAI" + map["variation_selectors_supplement"] = "VS_SUP" + map["variation_selectors"] = "VS" + map["vedic_extensions"] = "VEDIC_EXT" + map["vertical_forms"] = "VERTICAL_FORMS" + map["vithkuqi"] = "VITHKUQI" + map["wancho"] = "WANCHO" + map["warang_citi"] = "WARANG_CITI" + map["yezidi"] = "YEZIDI" + map["yijing_hexagram_symbols"] = "YIJING" + map["yi_radicals"] = "YI_RADICALS" + map["yi_syllables"] = "YI_SYLLABLES" + map["zanabazar_square"] = "ZANABAZAR_SQUARE" + map["znamenny_musical_notation"] = "ZNAMENNY_MUSIC" + + print "/* This file is autogenerated by gen/prop/blk; DO NOT EDIT. */" + print "" + print "#include \"__bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + gsub(/^; /, "", $2) + gsub(/[- ]/, "_", $2) + props[i] = "BLK_" map[tolower($2)] + } +} + +END { + print "static constexpr enum uprop_blk lookup_lat1[] = {" + for (i = 0; i < 0x100; i++) { + if (i % 8 == 0) + printf "\t" + printf "%-15s,%s", props[i] ? props[i] : 0, i % 8 == 7 ? "\n" : " " + } + print "};" + print "" + + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_blk val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "__MLIB_DEFINE_BSEARCH(enum uprop_blk, lookup, BLK_NB)" + print "" + print "enum uprop_blk" + print "uprop_get_blk(rune ch)" + print "{" + print "\treturn ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/Blocks | sed 's/\s*$//' |