From a7ca6ec2e70dd32ae83f725a93003b0cb147992e Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 15 Apr 2024 13:28:29 +0200 Subject: Add uprop_get_blk() --- data/Blocks | 364 ++++++++++++++++++++++++++++++++++++ gen/data-files | 1 + gen/prop/blk | 395 +++++++++++++++++++++++++++++++++++++++ include/unicode/prop.h | 335 +++++++++++++++++++++++++++++++++ lib/unicode/prop/uprop_get_blk.c | 381 +++++++++++++++++++++++++++++++++++++ 5 files changed, 1476 insertions(+) create mode 100644 data/Blocks create mode 100755 gen/prop/blk create mode 100644 lib/unicode/prop/uprop_get_blk.c diff --git a/data/Blocks b/data/Blocks new file mode 100644 index 0000000..8fa3eaa --- /dev/null +++ b/data/Blocks @@ -0,0 +1,364 @@ +# Blocks-15.1.0.txt +# Date: 2023-07-28, 15:47:20 GMT +# © 2023 Unicode®, Inc. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# Format: +# Start Code..End Code; Block Name + +# ================================================ + +# Note: When comparing block names, casing, whitespace, hyphens, +# and underbars are ignored. +# For example, "Latin Extended-A" and "latin extended a" are equivalent. +# For more information on the comparison of property values, +# see UAX #44: https://www.unicode.org/reports/tr44/ +# +# All block ranges start with a value where (cp MOD 16) = 0, +# and end with a value where (cp MOD 16) = 15. In other words, +# the last hexadecimal digit of the start of range is ...0 +# and the last hexadecimal digit of the end of range is ...F. +# This constraint on block ranges guarantees that allocations +# are done in terms of whole columns, and that code chart display +# never involves splitting columns in the charts. +# +# All code points not explicitly listed for Block +# have the value No_Block. + +# Property: Block +# +# @missing: 0000..10FFFF; No_Block + +0000..007F; Basic Latin +0080..00FF; Latin-1 Supplement +0100..017F; Latin Extended-A +0180..024F; Latin Extended-B +0250..02AF; IPA Extensions +02B0..02FF; Spacing Modifier Letters +0300..036F; Combining Diacritical Marks +0370..03FF; Greek and Coptic +0400..04FF; Cyrillic +0500..052F; Cyrillic Supplement +0530..058F; Armenian +0590..05FF; Hebrew +0600..06FF; Arabic +0700..074F; Syriac +0750..077F; Arabic Supplement +0780..07BF; Thaana +07C0..07FF; NKo +0800..083F; Samaritan +0840..085F; Mandaic +0860..086F; Syriac Supplement +0870..089F; Arabic Extended-B +08A0..08FF; Arabic Extended-A +0900..097F; Devanagari +0980..09FF; Bengali +0A00..0A7F; Gurmukhi +0A80..0AFF; Gujarati +0B00..0B7F; Oriya +0B80..0BFF; Tamil +0C00..0C7F; Telugu +0C80..0CFF; Kannada +0D00..0D7F; Malayalam +0D80..0DFF; Sinhala +0E00..0E7F; Thai +0E80..0EFF; Lao +0F00..0FFF; Tibetan +1000..109F; Myanmar +10A0..10FF; Georgian +1100..11FF; Hangul Jamo +1200..137F; Ethiopic +1380..139F; Ethiopic Supplement +13A0..13FF; Cherokee +1400..167F; Unified Canadian Aboriginal Syllabics +1680..169F; Ogham +16A0..16FF; Runic +1700..171F; Tagalog +1720..173F; Hanunoo +1740..175F; Buhid +1760..177F; Tagbanwa +1780..17FF; Khmer +1800..18AF; Mongolian +18B0..18FF; Unified Canadian Aboriginal Syllabics Extended +1900..194F; Limbu +1950..197F; Tai Le +1980..19DF; New Tai Lue +19E0..19FF; Khmer Symbols +1A00..1A1F; Buginese +1A20..1AAF; Tai Tham +1AB0..1AFF; Combining Diacritical Marks Extended +1B00..1B7F; Balinese +1B80..1BBF; Sundanese +1BC0..1BFF; Batak +1C00..1C4F; Lepcha +1C50..1C7F; Ol Chiki +1C80..1C8F; Cyrillic Extended-C +1C90..1CBF; Georgian Extended +1CC0..1CCF; Sundanese Supplement +1CD0..1CFF; Vedic Extensions +1D00..1D7F; Phonetic Extensions +1D80..1DBF; Phonetic Extensions Supplement +1DC0..1DFF; Combining Diacritical Marks Supplement +1E00..1EFF; Latin Extended Additional +1F00..1FFF; Greek Extended +2000..206F; General Punctuation +2070..209F; Superscripts and Subscripts +20A0..20CF; Currency Symbols +20D0..20FF; Combining Diacritical Marks for Symbols +2100..214F; Letterlike Symbols +2150..218F; Number Forms +2190..21FF; Arrows +2200..22FF; Mathematical Operators +2300..23FF; Miscellaneous Technical +2400..243F; Control Pictures +2440..245F; Optical Character Recognition +2460..24FF; Enclosed Alphanumerics +2500..257F; Box Drawing +2580..259F; Block Elements +25A0..25FF; Geometric Shapes +2600..26FF; Miscellaneous Symbols +2700..27BF; Dingbats +27C0..27EF; Miscellaneous Mathematical Symbols-A +27F0..27FF; Supplemental Arrows-A +2800..28FF; Braille Patterns +2900..297F; Supplemental Arrows-B +2980..29FF; Miscellaneous Mathematical Symbols-B +2A00..2AFF; Supplemental Mathematical Operators +2B00..2BFF; Miscellaneous Symbols and Arrows +2C00..2C5F; Glagolitic +2C60..2C7F; Latin Extended-C +2C80..2CFF; Coptic +2D00..2D2F; Georgian Supplement +2D30..2D7F; Tifinagh +2D80..2DDF; Ethiopic Extended +2DE0..2DFF; Cyrillic Extended-A +2E00..2E7F; Supplemental Punctuation +2E80..2EFF; CJK Radicals Supplement +2F00..2FDF; Kangxi Radicals +2FF0..2FFF; Ideographic Description Characters +3000..303F; CJK Symbols and Punctuation +3040..309F; Hiragana +30A0..30FF; Katakana +3100..312F; Bopomofo +3130..318F; Hangul Compatibility Jamo +3190..319F; Kanbun +31A0..31BF; Bopomofo Extended +31C0..31EF; CJK Strokes +31F0..31FF; Katakana Phonetic Extensions +3200..32FF; Enclosed CJK Letters and Months +3300..33FF; CJK Compatibility +3400..4DBF; CJK Unified Ideographs Extension A +4DC0..4DFF; Yijing Hexagram Symbols +4E00..9FFF; CJK Unified Ideographs +A000..A48F; Yi Syllables +A490..A4CF; Yi Radicals +A4D0..A4FF; Lisu +A500..A63F; Vai +A640..A69F; Cyrillic Extended-B +A6A0..A6FF; Bamum +A700..A71F; Modifier Tone Letters +A720..A7FF; Latin Extended-D +A800..A82F; Syloti Nagri +A830..A83F; Common Indic Number Forms +A840..A87F; Phags-pa +A880..A8DF; Saurashtra +A8E0..A8FF; Devanagari Extended +A900..A92F; Kayah Li +A930..A95F; Rejang +A960..A97F; Hangul Jamo Extended-A +A980..A9DF; Javanese +A9E0..A9FF; Myanmar Extended-B +AA00..AA5F; Cham +AA60..AA7F; Myanmar Extended-A +AA80..AADF; Tai Viet +AAE0..AAFF; Meetei Mayek Extensions +AB00..AB2F; Ethiopic Extended-A +AB30..AB6F; Latin Extended-E +AB70..ABBF; Cherokee Supplement +ABC0..ABFF; Meetei Mayek +AC00..D7AF; Hangul Syllables +D7B0..D7FF; Hangul Jamo Extended-B +D800..DB7F; High Surrogates +DB80..DBFF; High Private Use Surrogates +DC00..DFFF; Low Surrogates +E000..F8FF; Private Use Area +F900..FAFF; CJK Compatibility Ideographs +FB00..FB4F; Alphabetic Presentation Forms +FB50..FDFF; Arabic Presentation Forms-A +FE00..FE0F; Variation Selectors +FE10..FE1F; Vertical Forms +FE20..FE2F; Combining Half Marks +FE30..FE4F; CJK Compatibility Forms +FE50..FE6F; Small Form Variants +FE70..FEFF; Arabic Presentation Forms-B +FF00..FFEF; Halfwidth and Fullwidth Forms +FFF0..FFFF; Specials +10000..1007F; Linear B Syllabary +10080..100FF; Linear B Ideograms +10100..1013F; Aegean Numbers +10140..1018F; Ancient Greek Numbers +10190..101CF; Ancient Symbols +101D0..101FF; Phaistos Disc +10280..1029F; Lycian +102A0..102DF; Carian +102E0..102FF; Coptic Epact Numbers +10300..1032F; Old Italic +10330..1034F; Gothic +10350..1037F; Old Permic +10380..1039F; Ugaritic +103A0..103DF; Old Persian +10400..1044F; Deseret +10450..1047F; Shavian +10480..104AF; Osmanya +104B0..104FF; Osage +10500..1052F; Elbasan +10530..1056F; Caucasian Albanian +10570..105BF; Vithkuqi +10600..1077F; Linear A +10780..107BF; Latin Extended-F +10800..1083F; Cypriot Syllabary +10840..1085F; Imperial Aramaic +10860..1087F; Palmyrene +10880..108AF; Nabataean +108E0..108FF; Hatran +10900..1091F; Phoenician +10920..1093F; Lydian +10980..1099F; Meroitic Hieroglyphs +109A0..109FF; Meroitic Cursive +10A00..10A5F; Kharoshthi +10A60..10A7F; Old South Arabian +10A80..10A9F; Old North Arabian +10AC0..10AFF; Manichaean +10B00..10B3F; Avestan +10B40..10B5F; Inscriptional Parthian +10B60..10B7F; Inscriptional Pahlavi +10B80..10BAF; Psalter Pahlavi +10C00..10C4F; Old Turkic +10C80..10CFF; Old Hungarian +10D00..10D3F; Hanifi Rohingya +10E60..10E7F; Rumi Numeral Symbols +10E80..10EBF; Yezidi +10EC0..10EFF; Arabic Extended-C +10F00..10F2F; Old Sogdian +10F30..10F6F; Sogdian +10F70..10FAF; Old Uyghur +10FB0..10FDF; Chorasmian +10FE0..10FFF; Elymaic +11000..1107F; Brahmi +11080..110CF; Kaithi +110D0..110FF; Sora Sompeng +11100..1114F; Chakma +11150..1117F; Mahajani +11180..111DF; Sharada +111E0..111FF; Sinhala Archaic Numbers +11200..1124F; Khojki +11280..112AF; Multani +112B0..112FF; Khudawadi +11300..1137F; Grantha +11400..1147F; Newa +11480..114DF; Tirhuta +11580..115FF; Siddham +11600..1165F; Modi +11660..1167F; Mongolian Supplement +11680..116CF; Takri +11700..1174F; Ahom +11800..1184F; Dogra +118A0..118FF; Warang Citi +11900..1195F; Dives Akuru +119A0..119FF; Nandinagari +11A00..11A4F; Zanabazar Square +11A50..11AAF; Soyombo +11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A +11AC0..11AFF; Pau Cin Hau +11B00..11B5F; Devanagari Extended-A +11C00..11C6F; Bhaiksuki +11C70..11CBF; Marchen +11D00..11D5F; Masaram Gondi +11D60..11DAF; Gunjala Gondi +11EE0..11EFF; Makasar +11F00..11F5F; Kawi +11FB0..11FBF; Lisu Supplement +11FC0..11FFF; Tamil Supplement +12000..123FF; Cuneiform +12400..1247F; Cuneiform Numbers and Punctuation +12480..1254F; Early Dynastic Cuneiform +12F90..12FFF; Cypro-Minoan +13000..1342F; Egyptian Hieroglyphs +13430..1345F; Egyptian Hieroglyph Format Controls +14400..1467F; Anatolian Hieroglyphs +16800..16A3F; Bamum Supplement +16A40..16A6F; Mro +16A70..16ACF; Tangsa +16AD0..16AFF; Bassa Vah +16B00..16B8F; Pahawh Hmong +16E40..16E9F; Medefaidrin +16F00..16F9F; Miao +16FE0..16FFF; Ideographic Symbols and Punctuation +17000..187FF; Tangut +18800..18AFF; Tangut Components +18B00..18CFF; Khitan Small Script +18D00..18D7F; Tangut Supplement +1AFF0..1AFFF; Kana Extended-B +1B000..1B0FF; Kana Supplement +1B100..1B12F; Kana Extended-A +1B130..1B16F; Small Kana Extension +1B170..1B2FF; Nushu +1BC00..1BC9F; Duployan +1BCA0..1BCAF; Shorthand Format Controls +1CF00..1CFCF; Znamenny Musical Notation +1D000..1D0FF; Byzantine Musical Symbols +1D100..1D1FF; Musical Symbols +1D200..1D24F; Ancient Greek Musical Notation +1D2C0..1D2DF; Kaktovik Numerals +1D2E0..1D2FF; Mayan Numerals +1D300..1D35F; Tai Xuan Jing Symbols +1D360..1D37F; Counting Rod Numerals +1D400..1D7FF; Mathematical Alphanumeric Symbols +1D800..1DAAF; Sutton SignWriting +1DF00..1DFFF; Latin Extended-G +1E000..1E02F; Glagolitic Supplement +1E030..1E08F; Cyrillic Extended-D +1E100..1E14F; Nyiakeng Puachue Hmong +1E290..1E2BF; Toto +1E2C0..1E2FF; Wancho +1E4D0..1E4FF; Nag Mundari +1E7E0..1E7FF; Ethiopic Extended-B +1E800..1E8DF; Mende Kikakui +1E900..1E95F; Adlam +1EC70..1ECBF; Indic Siyaq Numbers +1ED00..1ED4F; Ottoman Siyaq Numbers +1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols +1F000..1F02F; Mahjong Tiles +1F030..1F09F; Domino Tiles +1F0A0..1F0FF; Playing Cards +1F100..1F1FF; Enclosed Alphanumeric Supplement +1F200..1F2FF; Enclosed Ideographic Supplement +1F300..1F5FF; Miscellaneous Symbols and Pictographs +1F600..1F64F; Emoticons +1F650..1F67F; Ornamental Dingbats +1F680..1F6FF; Transport and Map Symbols +1F700..1F77F; Alchemical Symbols +1F780..1F7FF; Geometric Shapes Extended +1F800..1F8FF; Supplemental Arrows-C +1F900..1F9FF; Supplemental Symbols and Pictographs +1FA00..1FA6F; Chess Symbols +1FA70..1FAFF; Symbols and Pictographs Extended-A +1FB00..1FBFF; Symbols for Legacy Computing +20000..2A6DF; CJK Unified Ideographs Extension B +2A700..2B73F; CJK Unified Ideographs Extension C +2B740..2B81F; CJK Unified Ideographs Extension D +2B820..2CEAF; CJK Unified Ideographs Extension E +2CEB0..2EBEF; CJK Unified Ideographs Extension F +2EBF0..2EE5F; CJK Unified Ideographs Extension I +2F800..2FA1F; CJK Compatibility Ideographs Supplement +30000..3134F; CJK Unified Ideographs Extension G +31350..323AF; CJK Unified Ideographs Extension H +E0000..E007F; Tags +E0100..E01EF; Variation Selectors Supplement +F0000..FFFFF; Supplementary Private Use Area-A +100000..10FFFF; Supplementary Private Use Area-B + +# EOF diff --git a/gen/data-files b/gen/data-files index 801f591..b2c4197 100755 --- a/gen/data-files +++ b/gen/data-files @@ -9,6 +9,7 @@ readonly BASE=https://www.unicode.org/Public/UCD/latest/ucd readonly PATHS=' auxiliary/GraphemeBreakProperty BidiBrackets +Blocks DerivedAge DerivedCoreProperties DerivedNormalizationProps diff --git a/gen/prop/blk b/gen/prop/blk new file mode 100755 index 0000000..574f5af --- /dev/null +++ b/gen/prop/blk @@ -0,0 +1,395 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_blk.c + +gawk ' +BEGIN { + FS = " *(; *|#.*)" + + map["adlam"] = "ADLAM" + map["aegean_numbers"] = "AEGEAN_NUMBERS" + map["ahom"] = "AHOM" + map["alchemical_symbols"] = "ALCHEMICAL" + map["alphabetic_presentation_forms"] = "ALPHABETIC_PF" + map["anatolian_hieroglyphs"] = "ANATOLIAN_HIEROGLYPHS" + map["ancient_greek_musical_notation"] = "ANCIENT_GREEK_MUSIC" + map["ancient_greek_numbers"] = "ANCIENT_GREEK_NUMBERS" + map["ancient_symbols"] = "ANCIENT_SYMBOLS" + map["arabic"] = "ARABIC" + map["arabic_extended_a"] = "ARABIC_EXT_A" + map["arabic_extended_b"] = "ARABIC_EXT_B" + map["arabic_extended_c"] = "ARABIC_EXT_C" + map["arabic_mathematical_alphabetic_symbols"] = "ARABIC_MATH" + map["arabic_presentation_forms_a"] = "ARABIC_PF_A" + map["arabic_presentation_forms_b"] = "ARABIC_PF_B" + map["arabic_supplement"] = "ARABIC_SUP" + map["armenian"] = "ARMENIAN" + map["arrows"] = "ARROWS" + map["avestan"] = "AVESTAN" + map["balinese"] = "BALINESE" + map["bamum"] = "BAMUM" + map["bamum_supplement"] = "BAMUM_SUP" + map["basic_latin"] = "ASCII" + map["bassa_vah"] = "BASSA_VAH" + map["batak"] = "BATAK" + map["bengali"] = "BENGALI" + map["bhaiksuki"] = "BHAIKSUKI" + map["block_elements"] = "BLOCK_ELEMENTS" + map["bopomofo"] = "BOPOMOFO" + map["bopomofo_extended"] = "BOPOMOFO_EXT" + map["box_drawing"] = "BOX_DRAWING" + map["brahmi"] = "BRAHMI" + map["braille_patterns"] = "BRAILLE" + map["buginese"] = "BUGINESE" + map["buhid"] = "BUHID" + map["byzantine_musical_symbols"] = "BYZANTINE_MUSIC" + map["carian"] = "CARIAN" + map["caucasian_albanian"] = "CAUCASIAN_ALBANIAN" + map["chakma"] = "CHAKMA" + map["cham"] = "CHAM" + map["cherokee"] = "CHEROKEE" + map["cherokee_supplement"] = "CHEROKEE_SUP" + map["chess_symbols"] = "CHESS_SYMBOLS" + map["chorasmian"] = "CHORASMIAN" + map["cjk_compatibility"] = "CJK_COMPAT" + map["cjk_compatibility_forms"] = "CJK_COMPAT_FORMS" + map["cjk_compatibility_ideographs"] = "CJK_COMPAT_IDEOGRAPHS" + map["cjk_compatibility_ideographs_supplement"] = "CJK_COMPAT_IDEOGRAPHS_SUP" + map["cjk_radicals_supplement"] = "CJK_RADICALS_SUP" + map["cjk_strokes"] = "CJK_STROKES" + map["cjk_symbols_and_punctuation"] = "CJK_SYMBOLS" + map["cjk_unified_ideographs"] = "CJK" + map["cjk_unified_ideographs_extension_a"] = "CJK_EXT_A" + map["cjk_unified_ideographs_extension_b"] = "CJK_EXT_B" + map["cjk_unified_ideographs_extension_c"] = "CJK_EXT_C" + map["cjk_unified_ideographs_extension_d"] = "CJK_EXT_D" + map["cjk_unified_ideographs_extension_e"] = "CJK_EXT_E" + map["cjk_unified_ideographs_extension_f"] = "CJK_EXT_F" + map["cjk_unified_ideographs_extension_g"] = "CJK_EXT_G" + map["cjk_unified_ideographs_extension_h"] = "CJK_EXT_H" + map["cjk_unified_ideographs_extension_i"] = "CJK_EXT_I" + map["combining_diacritical_marks"] = "DIACRITICALS" + map["combining_diacritical_marks_extended"] = "DIACRITICALS_EXT" + map["combining_diacritical_marks_for_symbols"] = "DIACRITICALS_FOR_SYMBOLS" + map["combining_diacritical_marks_supplement"] = "DIACRITICALS_SUP" + map["combining_half_marks"] = "HALF_MARKS" + map["common_indic_number_forms"] = "INDIC_NUMBER_FORMS" + map["control_pictures"] = "CONTROL_PICTURES" + map["coptic"] = "COPTIC" + map["coptic_epact_numbers"] = "COPTIC_EPACT_NUMBERS" + map["counting_rod_numerals"] = "COUNTING_ROD" + map["cuneiform"] = "CUNEIFORM" + map["cuneiform_numbers_and_punctuation"] = "CUNEIFORM_NUMBERS" + map["currency_symbols"] = "CURRENCY_SYMBOLS" + map["cypriot_syllabary"] = "CYPRIOT_SYLLABARY" + map["cypro_minoan"] = "CYPRO_MINOAN" + map["cyrillic"] = "CYRILLIC" + map["cyrillic_extended_a"] = "CYRILLIC_EXT_A" + map["cyrillic_extended_b"] = "CYRILLIC_EXT_B" + map["cyrillic_extended_c"] = "CYRILLIC_EXT_C" + map["cyrillic_extended_d"] = "CYRILLIC_EXT_D" + map["cyrillic_supplement"] = "CYRILLIC_SUP" + map["deseret"] = "DESERET" + map["devanagari"] = "DEVANAGARI" + map["devanagari_extended_a"] = "DEVANAGARI_EXT_A" + map["devanagari_extended"] = "DEVANAGARI_EXT" + map["dingbats"] = "DINGBATS" + map["dives_akuru"] = "DIVES_AKURU" + map["dogra"] = "DOGRA" + map["domino_tiles"] = "DOMINO" + map["duployan"] = "DUPLOYAN" + map["early_dynastic_cuneiform"] = "EARLY_DYNASTIC_CUNEIFORM" + map["egyptian_hieroglyph_format_controls"] = "EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS" + map["egyptian_hieroglyphs"] = "EGYPTIAN_HIEROGLYPHS" + map["elbasan"] = "ELBASAN" + map["elymaic"] = "ELYMAIC" + map["emoticons"] = "EMOTICONS" + map["enclosed_alphanumerics"] = "ENCLOSED_ALPHANUM" + map["enclosed_alphanumeric_supplement"] = "ENCLOSED_ALPHANUM_SUP" + map["enclosed_cjk_letters_and_months"] = "ENCLOSED_CJK" + map["enclosed_ideographic_supplement"] = "ENCLOSED_IDEOGRAPHIC_SUP" + map["ethiopic"] = "ETHIOPIC" + map["ethiopic_extended_a"] = "ETHIOPIC_EXT_A" + map["ethiopic_extended_b"] = "ETHIOPIC_EXT_B" + map["ethiopic_extended"] = "ETHIOPIC_EXT" + map["ethiopic_supplement"] = "ETHIOPIC_SUP" + map["general_punctuation"] = "PUNCTUATION" + map["geometric_shapes_extended"] = "GEOMETRIC_SHAPES_EXT" + map["geometric_shapes"] = "GEOMETRIC_SHAPES" + map["georgian_extended"] = "GEORGIAN_EXT" + map["georgian"] = "GEORGIAN" + map["georgian_supplement"] = "GEORGIAN_SUP" + map["glagolitic"] = "GLAGOLITIC" + map["glagolitic_supplement"] = "GLAGOLITIC_SUP" + map["gothic"] = "GOTHIC" + map["grantha"] = "GRANTHA" + map["greek_and_coptic"] = "GREEK" + map["greek_extended"] = "GREEK_EXT" + map["gujarati"] = "GUJARATI" + map["gunjala_gondi"] = "GUNJALA_GONDI" + map["gurmukhi"] = "GURMUKHI" + map["halfwidth_and_fullwidth_forms"] = "HALF_AND_FULL_FORMS" + map["hangul_compatibility_jamo"] = "COMPAT_JAMO" + map["hangul_jamo_extended_a"] = "JAMO_EXT_A" + map["hangul_jamo_extended_b"] = "JAMO_EXT_B" + map["hangul_jamo"] = "JAMO" + map["hangul_syllables"] = "HANGUL" + map["hanifi_rohingya"] = "HANIFI_ROHINGYA" + map["hanunoo"] = "HANUNOO" + map["hatran"] = "HATRAN" + map["hebrew"] = "HEBREW" + map["high_private_use_surrogates"] = "HIGH_PU_SURROGATES" + map["high_surrogates"] = "HIGH_SURROGATES" + map["hiragana"] = "HIRAGANA" + map["ideographic_description_characters"] = "IDC" + map["ideographic_symbols_and_punctuation"] = "IDEOGRAPHIC_SYMBOLS" + map["imperial_aramaic"] = "IMPERIAL_ARAMAIC" + map["indic_siyaq_numbers"] = "INDIC_SIYAQ_NUMBERS" + map["inscriptional_pahlavi"] = "INSCRIPTIONAL_PAHLAVI" + map["inscriptional_parthian"] = "INSCRIPTIONAL_PARTHIAN" + map["ipa_extensions"] = "IPA_EXT" + map["javanese"] = "JAVANESE" + map["kaithi"] = "KAITHI" + map["kaktovik_numerals"] = "KAKTOVIK_NUMERALS" + map["kana_extended_a"] = "KANA_EXT_A" + map["kana_extended_b"] = "KANA_EXT_B" + map["kana_supplement"] = "KANA_SUP" + map["kanbun"] = "KANBUN" + map["kangxi_radicals"] = "KANGXI" + map["kannada"] = "KANNADA" + map["katakana"] = "KATAKANA" + map["katakana_phonetic_extensions"] = "KATAKANA_EXT" + map["kawi"] = "KAWI" + map["kayah_li"] = "KAYAH_LI" + map["kharoshthi"] = "KHAROSHTHI" + map["khitan_small_script"] = "KHITAN_SMALL_SCRIPT" + map["khmer"] = "KHMER" + map["khmer_symbols"] = "KHMER_SYMBOLS" + map["khojki"] = "KHOJKI" + map["khudawadi"] = "KHUDAWADI" + map["lao"] = "LAO" + map["latin_1_supplement"] = "LATIN_1_SUP" + map["latin_extended_additional"] = "LATIN_EXT_ADDITIONAL" + map["latin_extended_a"] = "LATIN_EXT_A" + map["latin_extended_b"] = "LATIN_EXT_B" + map["latin_extended_c"] = "LATIN_EXT_C" + map["latin_extended_d"] = "LATIN_EXT_D" + map["latin_extended_e"] = "LATIN_EXT_E" + map["latin_extended_f"] = "LATIN_EXT_F" + map["latin_extended_g"] = "LATIN_EXT_G" + map["lepcha"] = "LEPCHA" + map["letterlike_symbols"] = "LETTERLIKE_SYMBOLS" + map["limbu"] = "LIMBU" + map["linear_a"] = "LINEAR_A" + map["linear_b_ideograms"] = "LINEAR_B_IDEOGRAMS" + map["linear_b_syllabary"] = "LINEAR_B_SYLLABARY" + map["lisu"] = "LISU" + map["lisu_supplement"] = "LISU_SUP" + map["low_surrogates"] = "LOW_SURROGATES" + map["lycian"] = "LYCIAN" + map["lydian"] = "LYDIAN" + map["mahajani"] = "MAHAJANI" + map["mahjong_tiles"] = "MAHJONG" + map["makasar"] = "MAKASAR" + map["malayalam"] = "MALAYALAM" + map["mandaic"] = "MANDAIC" + map["manichaean"] = "MANICHAEAN" + map["marchen"] = "MARCHEN" + map["masaram_gondi"] = "MASARAM_GONDI" + map["mathematical_alphanumeric_symbols"] = "MATH_ALPHANUM" + map["mathematical_operators"] = "MATH_OPERATORS" + map["mayan_numerals"] = "MAYAN_NUMERALS" + map["medefaidrin"] = "MEDEFAIDRIN" + map["meetei_mayek_extensions"] = "MEETEI_MAYEK_EXT" + map["meetei_mayek"] = "MEETEI_MAYEK" + map["mende_kikakui"] = "MENDE_KIKAKUI" + map["meroitic_cursive"] = "MEROITIC_CURSIVE" + map["meroitic_hieroglyphs"] = "MEROITIC_HIEROGLYPHS" + map["miao"] = "MIAO" + map["miscellaneous_mathematical_symbols_a"] = "MISC_MATH_SYMBOLS_A" + map["miscellaneous_mathematical_symbols_b"] = "MISC_MATH_SYMBOLS_B" + map["miscellaneous_symbols_and_arrows"] = "MISC_ARROWS" + map["miscellaneous_symbols_and_pictographs"] = "MISC_PICTOGRAPHS" + map["miscellaneous_symbols"] = "MISC_SYMBOLS" + map["miscellaneous_technical"] = "MISC_TECHNICAL" + map["modifier_tone_letters"] = "MODIFIER_TONE_LETTERS" + map["modi"] = "MODI" + map["mongolian"] = "MONGOLIAN" + map["mongolian_supplement"] = "MONGOLIAN_SUP" + map["mro"] = "MRO" + map["multani"] = "MULTANI" + map["musical_symbols"] = "MUSIC" + map["myanmar_extended_a"] = "MYANMAR_EXT_A" + map["myanmar_extended_b"] = "MYANMAR_EXT_B" + map["myanmar"] = "MYANMAR" + map["nabataean"] = "NABATAEAN" + map["nag_mundari"] = "NAG_MUNDARI" + map["nandinagari"] = "NANDINAGARI" + map["newa"] = "NEWA" + map["new_tai_lue"] = "NEW_TAI_LUE" + map["nko"] = "NKO" + map["number_forms"] = "NUMBER_FORMS" + map["nushu"] = "NUSHU" + map["nyiakeng_puachue_hmong"] = "NYIAKENG_PUACHUE_HMONG" + map["ogham"] = "OGHAM" + map["ol_chiki"] = "OL_CHIKI" + map["old_hungarian"] = "OLD_HUNGARIAN" + map["old_italic"] = "OLD_ITALIC" + map["old_north_arabian"] = "OLD_NORTH_ARABIAN" + map["old_permic"] = "OLD_PERMIC" + map["old_persian"] = "OLD_PERSIAN" + map["old_sogdian"] = "OLD_SOGDIAN" + map["old_south_arabian"] = "OLD_SOUTH_ARABIAN" + map["old_turkic"] = "OLD_TURKIC" + map["old_uyghur"] = "OLD_UYGHUR" + map["optical_character_recognition"] = "OCR" + map["oriya"] = "ORIYA" + map["ornamental_dingbats"] = "ORNAMENTAL_DINGBATS" + map["osage"] = "OSAGE" + map["osmanya"] = "OSMANYA" + map["ottoman_siyaq_numbers"] = "OTTOMAN_SIYAQ_NUMBERS" + map["pahawh_hmong"] = "PAHAWH_HMONG" + map["palmyrene"] = "PALMYRENE" + map["pau_cin_hau"] = "PAU_CIN_HAU" + map["phags_pa"] = "PHAGS_PA" + map["phaistos_disc"] = "PHAISTOS" + map["phoenician"] = "PHOENICIAN" + map["phonetic_extensions"] = "PHONETIC_EXT" + map["phonetic_extensions_supplement"] = "PHONETIC_EXT_SUP" + map["playing_cards"] = "PLAYING_CARDS" + map["private_use_area"] = "PUA" + map["psalter_pahlavi"] = "PSALTER_PAHLAVI" + map["rejang"] = "REJANG" + map["rumi_numeral_symbols"] = "RUMI" + map["runic"] = "RUNIC" + map["samaritan"] = "SAMARITAN" + map["saurashtra"] = "SAURASHTRA" + map["sharada"] = "SHARADA" + map["shavian"] = "SHAVIAN" + map["shorthand_format_controls"] = "SHORTHAND_FORMAT_CONTROLS" + map["siddham"] = "SIDDHAM" + map["sinhala_archaic_numbers"] = "SINHALA_ARCHAIC_NUMBERS" + map["sinhala"] = "SINHALA" + map["small_form_variants"] = "SMALL_FORMS" + map["small_kana_extension"] = "SMALL_KANA_EXT" + map["sogdian"] = "SOGDIAN" + map["sora_sompeng"] = "SORA_SOMPENG" + map["soyombo"] = "SOYOMBO" + map["spacing_modifier_letters"] = "MODIFIER_LETTERS" + map["specials"] = "SPECIALS" + map["sundanese"] = "SUNDANESE" + map["sundanese_supplement"] = "SUNDANESE_SUP" + map["superscripts_and_subscripts"] = "SUPER_AND_SUB" + map["supplemental_arrows_a"] = "SUP_ARROWS_A" + map["supplemental_arrows_b"] = "SUP_ARROWS_B" + map["supplemental_arrows_c"] = "SUP_ARROWS_C" + map["supplemental_mathematical_operators"] = "SUP_MATH_OPERATORS" + map["supplemental_punctuation"] = "SUP_PUNCTUATION" + map["supplemental_symbols_and_pictographs"] = "SUP_SYMBOLS_AND_PICTOGRAPHS" + map["supplementary_private_use_area_a"] = "SUP_PUA_A" + map["supplementary_private_use_area_b"] = "SUP_PUA_B" + map["sutton_signwriting"] = "SUTTON_SIGNWRITING" + map["syloti_nagri"] = "SYLOTI_NAGRI" + map["symbols_and_pictographs_extended_a"] = "SYMBOLS_AND_PICTOGRAPHS_EXT_A" + map["symbols_for_legacy_computing"] = "SYMBOLS_FOR_LEGACY_COMPUTING" + map["syriac_supplement"] = "SYRIAC_SUP" + map["syriac"] = "SYRIAC" + map["tagalog"] = "TAGALOG" + map["tagbanwa"] = "TAGBANWA" + map["tags"] = "TAGS" + map["tai_le"] = "TAI_LE" + map["tai_tham"] = "TAI_THAM" + map["tai_viet"] = "TAI_VIET" + map["tai_xuan_jing_symbols"] = "TAI_XUAN_JING" + map["takri"] = "TAKRI" + map["tamil_supplement"] = "TAMIL_SUP" + map["tamil"] = "TAMIL" + map["tangsa"] = "TANGSA" + map["tangut_components"] = "TANGUT_COMPONENTS" + map["tangut_supplement"] = "TANGUT_SUP" + map["tangut"] = "TANGUT" + map["telugu"] = "TELUGU" + map["thaana"] = "THAANA" + map["thai"] = "THAI" + map["tibetan"] = "TIBETAN" + map["tifinagh"] = "TIFINAGH" + map["tirhuta"] = "TIRHUTA" + map["toto"] = "TOTO" + map["transport_and_map_symbols"] = "TRANSPORT_AND_MAP" + map["ugaritic"] = "UGARITIC" + map["unified_canadian_aboriginal_syllabics_extended_a"] = "UCAS_EXT_A" + map["unified_canadian_aboriginal_syllabics_extended"] = "UCAS_EXT" + map["unified_canadian_aboriginal_syllabics"] = "UCAS" + map["vai"] = "VAI" + map["variation_selectors_supplement"] = "VS_SUP" + map["variation_selectors"] = "VS" + map["vedic_extensions"] = "VEDIC_EXT" + map["vertical_forms"] = "VERTICAL_FORMS" + map["vithkuqi"] = "VITHKUQI" + map["wancho"] = "WANCHO" + map["warang_citi"] = "WARANG_CITI" + map["yezidi"] = "YEZIDI" + map["yijing_hexagram_symbols"] = "YIJING" + map["yi_radicals"] = "YI_RADICALS" + map["yi_syllables"] = "YI_SYLLABLES" + map["zanabazar_square"] = "ZANABAZAR_SQUARE" + map["znamenny_musical_notation"] = "ZNAMENNY_MUSIC" + + print "/* This file is autogenerated by gen/prop/blk; DO NOT EDIT. */" + print "" + print "#include \"__bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + gsub(/^; /, "", $2) + gsub(/[- ]/, "_", $2) + props[i] = "BLK_" map[tolower($2)] + } +} + +END { + print "static constexpr enum uprop_blk lookup_lat1[] = {" + for (i = 0; i < 0x100; i++) { + if (i % 8 == 0) + printf "\t" + printf "%-15s,%s", props[i] ? props[i] : 0, i % 8 == 7 ? "\n" : " " + } + print "};" + print "" + + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_blk val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "__MLIB_DEFINE_BSEARCH(enum uprop_blk, lookup, BLK_NB)" + print "" + print "enum uprop_blk" + print "uprop_get_blk(rune ch)" + print "{" + print "\treturn ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/Blocks | sed 's/\s*$//' diff --git a/include/unicode/prop.h b/include/unicode/prop.h index 98941f0..ee030ae 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -83,6 +83,340 @@ uprop_age_minor(enum uprop_age a) return a & 0xFF; } +enum uprop_blk { + BLK_NB = 0, /* No Block */ + BLK_ADLAM, /* Adlam */ + BLK_AEGEAN_NUMBERS, /* Aegean Numbers */ + BLK_AHOM, /* Ahom */ + BLK_ALCHEMICAL, /* Alchemical Symbols */ + BLK_ALPHABETIC_PF, /* Alphabetic Presentation Forms */ + BLK_ANATOLIAN_HIEROGLYPHS, /* Anatolian Hieroglyphs */ + BLK_ANCIENT_GREEK_MUSIC, /* Ancient Greek Musical Notation */ + BLK_ANCIENT_GREEK_NUMBERS, /* Ancient Greek Numbers */ + BLK_ANCIENT_SYMBOLS, /* Ancient Symbols */ + BLK_ARABIC, /* Arabic */ + BLK_ARABIC_EXT_A, /* Arabic Extended A */ + BLK_ARABIC_EXT_B, /* Arabic Extended B */ + BLK_ARABIC_EXT_C, /* Arabic Extended C */ + BLK_ARABIC_MATH, /* Arabic Mathematical Alphabetic Symbols */ + BLK_ARABIC_PF_A, /* Arabic Presentation Forms_A */ + BLK_ARABIC_PF_B, /* Arabic Presentation Forms_B */ + BLK_ARABIC_SUP, /* Arabic Supplement */ + BLK_ARMENIAN, /* Armenian */ + BLK_ARROWS, /* Arrows */ + BLK_ASCII, /* Basic Latin */ + BLK_AVESTAN, /* Avestan */ + BLK_BALINESE, /* Balinese */ + BLK_BAMUM, /* Bamum */ + BLK_BAMUM_SUP, /* Bamum Supplement */ + BLK_BASSA_VAH, /* Bassa Vah */ + BLK_BATAK, /* Batak */ + BLK_BENGALI, /* Bengali */ + BLK_BHAIKSUKI, /* Bhaiksuki */ + BLK_BLOCK_ELEMENTS, /* Block Elements */ + BLK_BOPOMOFO, /* Bopomofo */ + BLK_BOPOMOFO_EXT, /* Bopomofo Extended */ + BLK_BOX_DRAWING, /* Box Drawing */ + BLK_BRAHMI, /* Brahmi */ + BLK_BRAILLE, /* Braille Patterns */ + BLK_BUGINESE, /* Buginese */ + BLK_BUHID, /* Buhid */ + BLK_BYZANTINE_MUSIC, /* Byzantine Musical Symbols */ + BLK_CARIAN, /* Carian */ + BLK_CAUCASIAN_ALBANIAN, /* Caucasian Albanian */ + BLK_CHAKMA, /* Chakma */ + BLK_CHAM, /* Cham */ + BLK_CHEROKEE, /* Cherokee */ + BLK_CHEROKEE_SUP, /* Cherokee Supplement */ + BLK_CHESS_SYMBOLS, /* Chess Symbols */ + BLK_CHORASMIAN, /* Chorasmian */ + BLK_CJK, /* CJK Unified Ideographs */ + BLK_CJK_COMPAT, /* CJK Compatibility */ + BLK_CJK_COMPAT_FORMS, /* CJK Compatibility Forms */ + BLK_CJK_COMPAT_IDEOGRAPHS, /* CJK Compatibility Ideographs */ + BLK_CJK_COMPAT_IDEOGRAPHS_SUP, /* CJK Compatibility Ideographs Supplement */ + BLK_CJK_EXT_A, /* CJK Unified Ideographs Extension A */ + BLK_CJK_EXT_B, /* CJK Unified Ideographs Extension B */ + BLK_CJK_EXT_C, /* CJK Unified Ideographs Extension C */ + BLK_CJK_EXT_D, /* CJK Unified Ideographs Extension D */ + BLK_CJK_EXT_E, /* CJK Unified Ideographs Extension E */ + BLK_CJK_EXT_F, /* CJK Unified Ideographs Extension F */ + BLK_CJK_EXT_G, /* CJK Unified Ideographs Extension G */ + BLK_CJK_EXT_H, /* CJK Unified Ideographs Extension H */ + BLK_CJK_EXT_I, /* CJK Unified Ideographs Extension I */ + BLK_CJK_RADICALS_SUP, /* CJK Radicals Supplement */ + BLK_CJK_STROKES, /* CJK Strokes */ + BLK_CJK_SYMBOLS, /* CJK Symbols And Punctuation */ + BLK_COMPAT_JAMO, /* Hangul Compatibility Jamo */ + BLK_CONTROL_PICTURES, /* Control Pictures */ + BLK_COPTIC, /* Coptic */ + BLK_COPTIC_EPACT_NUMBERS, /* Coptic Epact Numbers */ + BLK_COUNTING_ROD, /* Counting Rod Numerals */ + BLK_CUNEIFORM, /* Cuneiform */ + BLK_CUNEIFORM_NUMBERS, /* Cuneiform Numbers And Punctuation */ + BLK_CURRENCY_SYMBOLS, /* Currency Symbols */ + BLK_CYPRIOT_SYLLABARY, /* Cypriot Syllabary */ + BLK_CYPRO_MINOAN, /* Cypro Minoan */ + BLK_CYRILLIC, /* Cyrillic */ + BLK_CYRILLIC_EXT_A, /* Cyrillic Extended A */ + BLK_CYRILLIC_EXT_B, /* Cyrillic Extended B */ + BLK_CYRILLIC_EXT_C, /* Cyrillic Extended C */ + BLK_CYRILLIC_EXT_D, /* Cyrillic Extended D */ + BLK_CYRILLIC_SUP, /* Cyrillic Supplement */ + BLK_DESERET, /* Deseret */ + BLK_DEVANAGARI, /* Devanagari */ + BLK_DEVANAGARI_EXT, /* Devanagari Extended */ + BLK_DEVANAGARI_EXT_A, /* Devanagari Extended A */ + BLK_DIACRITICALS, /* Combining Diacritical Marks */ + BLK_DIACRITICALS_EXT, /* Combining Diacritical Marks Extended */ + BLK_DIACRITICALS_FOR_SYMBOLS, /* Combining Diacritical Marks For Symbols */ + BLK_DIACRITICALS_SUP, /* Combining Diacritical Marks Supplement */ + BLK_DINGBATS, /* Dingbats */ + BLK_DIVES_AKURU, /* Dives Akuru */ + BLK_DOGRA, /* Dogra */ + BLK_DOMINO, /* Domino Tiles */ + BLK_DUPLOYAN, /* Duployan */ + BLK_EARLY_DYNASTIC_CUNEIFORM, /* Early Dynastic Cuneiform */ + BLK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, /* Egyptian Hieroglyph Format + * Controls + */ + BLK_EGYPTIAN_HIEROGLYPHS, /* Egyptian Hieroglyphs */ + BLK_ELBASAN, /* Elbasan */ + BLK_ELYMAIC, /* Elymaic */ + BLK_EMOTICONS, /* Emoticons */ + BLK_ENCLOSED_ALPHANUM, /* Enclosed Alphanumerics */ + BLK_ENCLOSED_ALPHANUM_SUP, /* Enclosed Alphanumeric Supplement */ + BLK_ENCLOSED_CJK, /* Enclosed CJK_Letters And Months */ + BLK_ENCLOSED_IDEOGRAPHIC_SUP, /* Enclosed Ideographic Supplement */ + BLK_ETHIOPIC, /* Ethiopic */ + BLK_ETHIOPIC_EXT, /* Ethiopic Extended */ + BLK_ETHIOPIC_EXT_A, /* Ethiopic Extended A */ + BLK_ETHIOPIC_EXT_B, /* Ethiopic Extended B */ + BLK_ETHIOPIC_SUP, /* Ethiopic Supplement */ + BLK_GEOMETRIC_SHAPES, /* Geometric Shapes */ + BLK_GEOMETRIC_SHAPES_EXT, /* Geometric Shapes Extended */ + BLK_GEORGIAN, /* Georgian */ + BLK_GEORGIAN_EXT, /* Georgian Extended */ + BLK_GEORGIAN_SUP, /* Georgian Supplement */ + BLK_GLAGOLITIC, /* Glagolitic */ + BLK_GLAGOLITIC_SUP, /* Glagolitic Supplement */ + BLK_GOTHIC, /* Gothic */ + BLK_GRANTHA, /* Grantha */ + BLK_GREEK, /* Greek And Coptic */ + BLK_GREEK_EXT, /* Greek Extended */ + BLK_GUJARATI, /* Gujarati */ + BLK_GUNJALA_GONDI, /* Gunjala Gondi */ + BLK_GURMUKHI, /* Gurmukhi */ + BLK_HALF_AND_FULL_FORMS, /* Halfwidth And Fullwidth Forms */ + BLK_HALF_MARKS, /* Combining Half Marks */ + BLK_HANGUL, /* Hangul Syllables */ + BLK_HANIFI_ROHINGYA, /* Hanifi Rohingya */ + BLK_HANUNOO, /* Hanunoo */ + BLK_HATRAN, /* Hatran */ + BLK_HEBREW, /* Hebrew */ + BLK_HIGH_PU_SURROGATES, /* High Private Use Surrogates */ + BLK_HIGH_SURROGATES, /* High Surrogates */ + BLK_HIRAGANA, /* Hiragana */ + BLK_IDC, /* Ideographic Description Characters */ + BLK_IDEOGRAPHIC_SYMBOLS, /* Ideographic Symbols And Punctuation */ + BLK_IMPERIAL_ARAMAIC, /* Imperial Aramaic */ + BLK_INDIC_NUMBER_FORMS, /* Common Indic Number Forms */ + BLK_INDIC_SIYAQ_NUMBERS, /* Indic Siyaq Numbers */ + BLK_INSCRIPTIONAL_PAHLAVI, /* Inscriptional Pahlavi */ + BLK_INSCRIPTIONAL_PARTHIAN, /* Inscriptional Parthian */ + BLK_IPA_EXT, /* IPA Extensions */ + BLK_JAMO, /* Hangul Jamo */ + BLK_JAMO_EXT_A, /* Hangul Jamo Extended A */ + BLK_JAMO_EXT_B, /* Hangul Jamo Extended B */ + BLK_JAVANESE, /* Javanese */ + BLK_KAITHI, /* Kaithi */ + BLK_KAKTOVIK_NUMERALS, /* Kaktovik Numerals */ + BLK_KANA_EXT_A, /* Kana Extended A */ + BLK_KANA_EXT_B, /* Kana Extended B */ + BLK_KANA_SUP, /* Kana Supplement */ + BLK_KANBUN, /* Kanbun */ + BLK_KANGXI, /* Kangxi Radicals */ + BLK_KANNADA, /* Kannada */ + BLK_KATAKANA, /* Katakana */ + BLK_KATAKANA_EXT, /* Katakana Phonetic Extensions */ + BLK_KAWI, /* Kawi */ + BLK_KAYAH_LI, /* Kayah Li */ + BLK_KHAROSHTHI, /* Kharoshthi */ + BLK_KHITAN_SMALL_SCRIPT, /* Khitan Small Script */ + BLK_KHMER, /* Khmer */ + BLK_KHMER_SYMBOLS, /* Khmer Symbols */ + BLK_KHOJKI, /* Khojki */ + BLK_KHUDAWADI, /* Khudawadi */ + BLK_LAO, /* Lao */ + BLK_LATIN_1_SUP, /* Latin 1 Supplement */ + BLK_LATIN_EXT_A, /* Latin Extended A */ + BLK_LATIN_EXT_ADDITIONAL, /* Latin Extended Additional */ + BLK_LATIN_EXT_B, /* Latin Extended B */ + BLK_LATIN_EXT_C, /* Latin Extended C */ + BLK_LATIN_EXT_D, /* Latin Extended D */ + BLK_LATIN_EXT_E, /* Latin Extended E */ + BLK_LATIN_EXT_F, /* Latin Extended F */ + BLK_LATIN_EXT_G, /* Latin Extended G */ + BLK_LEPCHA, /* Lepcha */ + BLK_LETTERLIKE_SYMBOLS, /* Letterlike Symbols */ + BLK_LIMBU, /* Limbu */ + BLK_LINEAR_A, /* Linear A */ + BLK_LINEAR_B_IDEOGRAMS, /* Linear B Ideograms */ + BLK_LINEAR_B_SYLLABARY, /* Linear B Syllabary */ + BLK_LISU, /* Lisu */ + BLK_LISU_SUP, /* Lisu Supplement */ + BLK_LOW_SURROGATES, /* Low Surrogates */ + BLK_LYCIAN, /* Lycian */ + BLK_LYDIAN, /* Lydian */ + BLK_MAHAJANI, /* Mahajani */ + BLK_MAHJONG, /* Mahjong Tiles */ + BLK_MAKASAR, /* Makasar */ + BLK_MALAYALAM, /* Malayalam */ + BLK_MANDAIC, /* Mandaic */ + BLK_MANICHAEAN, /* Manichaean */ + BLK_MARCHEN, /* Marchen */ + BLK_MASARAM_GONDI, /* Masaram Gondi */ + BLK_MATH_ALPHANUM, /* Mathematical Alphanumeric Symbols */ + BLK_MATH_OPERATORS, /* Mathematical Operators */ + BLK_MAYAN_NUMERALS, /* Mayan Numerals */ + BLK_MEDEFAIDRIN, /* Medefaidrin */ + BLK_MEETEI_MAYEK, /* Meetei Mayek */ + BLK_MEETEI_MAYEK_EXT, /* Meetei Mayek Extensions */ + BLK_MENDE_KIKAKUI, /* Mende Kikakui */ + BLK_MEROITIC_CURSIVE, /* Meroitic Cursive */ + BLK_MEROITIC_HIEROGLYPHS, /* Meroitic Hieroglyphs */ + BLK_MIAO, /* Miao */ + BLK_MISC_ARROWS, /* Miscellaneous Symbols And Arrows */ + BLK_MISC_MATH_SYMBOLS_A, /* Miscellaneous Mathematical Symbols A */ + BLK_MISC_MATH_SYMBOLS_B, /* Miscellaneous Mathematical Symbols B */ + BLK_MISC_PICTOGRAPHS, /* Miscellaneous Symbols And Pictographs */ + BLK_MISC_SYMBOLS, /* Miscellaneous Symbols */ + BLK_MISC_TECHNICAL, /* Miscellaneous Technical */ + BLK_MODI, /* Modi */ + BLK_MODIFIER_LETTERS, /* Spacing Modifier Letters */ + BLK_MODIFIER_TONE_LETTERS, /* Modifier Tone Letters */ + BLK_MONGOLIAN, /* Mongolian */ + BLK_MONGOLIAN_SUP, /* Mongolian Supplement */ + BLK_MRO, /* Mro */ + BLK_MULTANI, /* Multani */ + BLK_MUSIC, /* Musical Symbols */ + BLK_MYANMAR, /* Myanmar */ + BLK_MYANMAR_EXT_A, /* Myanmar Extended A */ + BLK_MYANMAR_EXT_B, /* Myanmar Extended B */ + BLK_NABATAEAN, /* Nabataean */ + BLK_NAG_MUNDARI, /* Nag Mundari */ + BLK_NANDINAGARI, /* Nandinagari */ + BLK_NEW_TAI_LUE, /* New Tai Lue */ + BLK_NEWA, /* Newa */ + BLK_NKO, /* NKo */ + BLK_NUMBER_FORMS, /* Number Forms */ + BLK_NUSHU, /* Nushu */ + BLK_NYIAKENG_PUACHUE_HMONG, /* Nyiakeng Puachue Hmong */ + BLK_OCR, /* Optical Character Recognition */ + BLK_OGHAM, /* Ogham */ + BLK_OL_CHIKI, /* Ol Chiki */ + BLK_OLD_HUNGARIAN, /* Old Hungarian */ + BLK_OLD_ITALIC, /* Old Italic */ + BLK_OLD_NORTH_ARABIAN, /* Old North Arabian */ + BLK_OLD_PERMIC, /* Old Permic */ + BLK_OLD_PERSIAN, /* Old Persian */ + BLK_OLD_SOGDIAN, /* Old Sogdian */ + BLK_OLD_SOUTH_ARABIAN, /* Old South Arabian */ + BLK_OLD_TURKIC, /* Old Turkic */ + BLK_OLD_UYGHUR, /* Old Uyghur */ + BLK_ORIYA, /* Oriya */ + BLK_ORNAMENTAL_DINGBATS, /* Ornamental Dingbats */ + BLK_OSAGE, /* Osage */ + BLK_OSMANYA, /* Osmanya */ + BLK_OTTOMAN_SIYAQ_NUMBERS, /* Ottoman Siyaq Numbers */ + BLK_PAHAWH_HMONG, /* Pahawh Hmong */ + BLK_PALMYRENE, /* Palmyrene */ + BLK_PAU_CIN_HAU, /* Pau Cin Hau */ + BLK_PHAGS_PA, /* Phags Pa */ + BLK_PHAISTOS, /* Phaistos Disc */ + BLK_PHOENICIAN, /* Phoenician */ + BLK_PHONETIC_EXT, /* Phonetic Extensions */ + BLK_PHONETIC_EXT_SUP, /* Phonetic Extensions Supplement */ + BLK_PLAYING_CARDS, /* Playing Cards */ + BLK_PSALTER_PAHLAVI, /* Psalter Pahlavi */ + BLK_PUA, /* Private Use Area */ + BLK_PUNCTUATION, /* General Punctuation */ + BLK_REJANG, /* Rejang */ + BLK_RUMI, /* Rumi Numeral Symbols */ + BLK_RUNIC, /* Runic */ + BLK_SAMARITAN, /* Samaritan */ + BLK_SAURASHTRA, /* Saurashtra */ + BLK_SHARADA, /* Sharada */ + BLK_SHAVIAN, /* Shavian */ + BLK_SHORTHAND_FORMAT_CONTROLS, /* Shorthand Format Controls */ + BLK_SIDDHAM, /* Siddham */ + BLK_SINHALA, /* Sinhala */ + BLK_SINHALA_ARCHAIC_NUMBERS, /* Sinhala Archaic Numbers */ + BLK_SMALL_FORMS, /* Small Form Variants */ + BLK_SMALL_KANA_EXT, /* Small Kana Extension */ + BLK_SOGDIAN, /* Sogdian */ + BLK_SORA_SOMPENG, /* Sora_Sompeng */ + BLK_SOYOMBO, /* Soyombo */ + BLK_SPECIALS, /* Specials */ + BLK_SUNDANESE, /* Sundanese */ + BLK_SUNDANESE_SUP, /* Sundanese_Supplement */ + BLK_SUP_ARROWS_A, /* Supplemental Arrows A */ + BLK_SUP_ARROWS_B, /* Supplemental Arrows B */ + BLK_SUP_ARROWS_C, /* Supplemental Arrows C */ + BLK_SUP_MATH_OPERATORS, /* Supplemental Mathematical Operators */ + BLK_SUP_PUA_A, /* Supplementary Private Use Area A */ + BLK_SUP_PUA_B, /* Supplementary Private Use Area B */ + BLK_SUP_PUNCTUATION, /* Supplemental Punctuation */ + BLK_SUP_SYMBOLS_AND_PICTOGRAPHS, /* Supplemental Symbols And Pictographs */ + BLK_SUPER_AND_SUB, /* Superscripts And Subscripts */ + BLK_SUTTON_SIGNWRITING, /* Sutton SignWriting */ + BLK_SYLOTI_NAGRI, /* Syloti Nagri */ + BLK_SYMBOLS_AND_PICTOGRAPHS_EXT_A, /* Symbols And Pictographs Extended A */ + BLK_SYMBOLS_FOR_LEGACY_COMPUTING, /* Symbols For Legacy Computing */ + BLK_SYRIAC, /* Syriac */ + BLK_SYRIAC_SUP, /* Syriac Supplement */ + BLK_TAGALOG, /* Tagalog */ + BLK_TAGBANWA, /* Tagbanwa */ + BLK_TAGS, /* Tags */ + BLK_TAI_LE, /* Tai Le */ + BLK_TAI_THAM, /* Tai Tham */ + BLK_TAI_VIET, /* Tai Viet */ + BLK_TAI_XUAN_JING, /* Tai Xuan Jing Symbols */ + BLK_TAKRI, /* Takri */ + BLK_TAMIL, /* Tamil */ + BLK_TAMIL_SUP, /* Tamil Supplement */ + BLK_TANGSA, /* Tangsa */ + BLK_TANGUT, /* Tangut */ + BLK_TANGUT_COMPONENTS, /* Tangut Components */ + BLK_TANGUT_SUP, /* Tangut Supplement */ + BLK_TELUGU, /* Telugu */ + BLK_THAANA, /* Thaana */ + BLK_THAI, /* Thai */ + BLK_TIBETAN, /* Tibetan */ + BLK_TIFINAGH, /* Tifinagh */ + BLK_TIRHUTA, /* Tirhuta */ + BLK_TOTO, /* Toto */ + BLK_TRANSPORT_AND_MAP, /* Transport And Map Symbols */ + BLK_UCAS, /* Unified Canadian Aboriginal Syllabics */ + BLK_UCAS_EXT, /* Unified Canadian Aboriginal Syllabics Extended */ + BLK_UCAS_EXT_A, /* Unified Canadian Aboriginal Syllabics Extended A */ + BLK_UGARITIC, /* Ugaritic */ + BLK_VAI, /* Vai */ + BLK_VEDIC_EXT, /* Vedic Extensions */ + BLK_VERTICAL_FORMS, /* Vertical_Forms */ + BLK_VITHKUQI, /* Vithkuqi */ + BLK_VS, /* Variation Selectors */ + BLK_VS_SUP, /* Variation Selectors Supplement */ + BLK_WANCHO, /* Wancho */ + BLK_WARANG_CITI, /* Warang Citi */ + BLK_YEZIDI, /* Yezidi */ + BLK_YI_RADICALS, /* Yi Radicals */ + BLK_YI_SYLLABLES, /* Yi Syllables */ + BLK_YIJING, /* Yijing Hexagram Symbols */ + BLK_ZANABAZAR_SQUARE, /* Zanabazar Square */ + BLK_ZNAMENNY_MUSIC, /* Znamenny Musical Notation */ +}; + enum uprop_bpt { BPT_N, /* None */ BPT_C, /* Close */ @@ -222,6 +556,7 @@ enum uprop_nt { [[__mlib_uprop_attrs]] double uprop_get_nv(rune); [[__mlib_uprop_attrs]] enum uprop_age uprop_get_age(rune); +[[__mlib_uprop_attrs]] enum uprop_blk uprop_get_blk(rune); [[__mlib_uprop_attrs]] enum uprop_bpt uprop_get_bpt(rune); [[__mlib_uprop_attrs]] enum uprop_dt uprop_get_dt(rune); [[__mlib_uprop_attrs]] enum uprop_ea uprop_get_ea(rune); diff --git a/lib/unicode/prop/uprop_get_blk.c b/lib/unicode/prop/uprop_get_blk.c new file mode 100644 index 0000000..a1a6c39 --- /dev/null +++ b/lib/unicode/prop/uprop_get_blk.c @@ -0,0 +1,381 @@ +/* This file is autogenerated by gen/prop/blk; DO NOT EDIT. */ + +#include "__bsearch.h" +#include "macros.h" +#include "rune.h" +#include "unicode/prop.h" + +static constexpr enum uprop_blk lookup_lat1[] = {}; + +static const struct { + rune lo, hi; + enum uprop_blk val; +} lookup[] = { + {RUNE_C(0x000100), RUNE_C(0x00017F), BLK_LATIN_EXT_A}, + {RUNE_C(0x000180), RUNE_C(0x00024F), BLK_LATIN_EXT_B}, + {RUNE_C(0x000250), RUNE_C(0x0002AF), BLK_IPA_EXT}, + {RUNE_C(0x0002B0), RUNE_C(0x0002FF), BLK_MODIFIER_LETTERS}, + {RUNE_C(0x000300), RUNE_C(0x00036F), BLK_DIACRITICALS}, + {RUNE_C(0x000370), RUNE_C(0x0003FF), BLK_GREEK}, + {RUNE_C(0x000400), RUNE_C(0x0004FF), BLK_CYRILLIC}, + {RUNE_C(0x000500), RUNE_C(0x00052F), BLK_CYRILLIC_SUP}, + {RUNE_C(0x000530), RUNE_C(0x00058F), BLK_ARMENIAN}, + {RUNE_C(0x000590), RUNE_C(0x0005FF), BLK_HEBREW}, + {RUNE_C(0x000600), RUNE_C(0x0006FF), BLK_ARABIC}, + {RUNE_C(0x000700), RUNE_C(0x00074F), BLK_SYRIAC}, + {RUNE_C(0x000750), RUNE_C(0x00077F), BLK_ARABIC_SUP}, + {RUNE_C(0x000780), RUNE_C(0x0007BF), BLK_THAANA}, + {RUNE_C(0x0007C0), RUNE_C(0x0007FF), BLK_NKO}, + {RUNE_C(0x000800), RUNE_C(0x00083F), BLK_SAMARITAN}, + {RUNE_C(0x000840), RUNE_C(0x00085F), BLK_MANDAIC}, + {RUNE_C(0x000860), RUNE_C(0x00086F), BLK_SYRIAC_SUP}, + {RUNE_C(0x000870), RUNE_C(0x00089F), BLK_ARABIC_EXT_B}, + {RUNE_C(0x0008A0), RUNE_C(0x0008FF), BLK_ARABIC_EXT_A}, + {RUNE_C(0x000900), RUNE_C(0x00097F), BLK_DEVANAGARI}, + {RUNE_C(0x000980), RUNE_C(0x0009FF), BLK_BENGALI}, + {RUNE_C(0x000A00), RUNE_C(0x000A7F), BLK_GURMUKHI}, + {RUNE_C(0x000A80), RUNE_C(0x000AFF), BLK_GUJARATI}, + {RUNE_C(0x000B00), RUNE_C(0x000B7F), BLK_ORIYA}, + {RUNE_C(0x000B80), RUNE_C(0x000BFF), BLK_TAMIL}, + {RUNE_C(0x000C00), RUNE_C(0x000C7F), BLK_TELUGU}, + {RUNE_C(0x000C80), RUNE_C(0x000CFF), BLK_KANNADA}, + {RUNE_C(0x000D00), RUNE_C(0x000D7F), BLK_MALAYALAM}, + {RUNE_C(0x000D80), RUNE_C(0x000DFF), BLK_SINHALA}, + {RUNE_C(0x000E00), RUNE_C(0x000E7F), BLK_THAI}, + {RUNE_C(0x000E80), RUNE_C(0x000EFF), BLK_LAO}, + {RUNE_C(0x000F00), RUNE_C(0x000FFF), BLK_TIBETAN}, + {RUNE_C(0x001000), RUNE_C(0x00109F), BLK_MYANMAR}, + {RUNE_C(0x0010A0), RUNE_C(0x0010FF), BLK_GEORGIAN}, + {RUNE_C(0x001100), RUNE_C(0x0011FF), BLK_JAMO}, + {RUNE_C(0x001200), RUNE_C(0x00137F), BLK_ETHIOPIC}, + {RUNE_C(0x001380), RUNE_C(0x00139F), BLK_ETHIOPIC_SUP}, + {RUNE_C(0x0013A0), RUNE_C(0x0013FF), BLK_CHEROKEE}, + {RUNE_C(0x001400), RUNE_C(0x00167F), BLK_UCAS}, + {RUNE_C(0x001680), RUNE_C(0x00169F), BLK_OGHAM}, + {RUNE_C(0x0016A0), RUNE_C(0x0016FF), BLK_RUNIC}, + {RUNE_C(0x001700), RUNE_C(0x00171F), BLK_TAGALOG}, + {RUNE_C(0x001720), RUNE_C(0x00173F), BLK_HANUNOO}, + {RUNE_C(0x001740), RUNE_C(0x00175F), BLK_BUHID}, + {RUNE_C(0x001760), RUNE_C(0x00177F), BLK_TAGBANWA}, + {RUNE_C(0x001780), RUNE_C(0x0017FF), BLK_KHMER}, + {RUNE_C(0x001800), RUNE_C(0x0018AF), BLK_MONGOLIAN}, + {RUNE_C(0x0018B0), RUNE_C(0x0018FF), BLK_UCAS_EXT}, + {RUNE_C(0x001900), RUNE_C(0x00194F), BLK_LIMBU}, + {RUNE_C(0x001950), RUNE_C(0x00197F), BLK_TAI_LE}, + {RUNE_C(0x001980), RUNE_C(0x0019DF), BLK_NEW_TAI_LUE}, + {RUNE_C(0x0019E0), RUNE_C(0x0019FF), BLK_KHMER_SYMBOLS}, + {RUNE_C(0x001A00), RUNE_C(0x001A1F), BLK_BUGINESE}, + {RUNE_C(0x001A20), RUNE_C(0x001AAF), BLK_TAI_THAM}, + {RUNE_C(0x001AB0), RUNE_C(0x001AFF), BLK_DIACRITICALS_EXT}, + {RUNE_C(0x001B00), RUNE_C(0x001B7F), BLK_BALINESE}, + {RUNE_C(0x001B80), RUNE_C(0x001BBF), BLK_SUNDANESE}, + {RUNE_C(0x001BC0), RUNE_C(0x001BFF), BLK_BATAK}, + {RUNE_C(0x001C00), RUNE_C(0x001C4F), BLK_LEPCHA}, + {RUNE_C(0x001C50), RUNE_C(0x001C7F), BLK_OL_CHIKI}, + {RUNE_C(0x001C80), RUNE_C(0x001C8F), BLK_CYRILLIC_EXT_C}, + {RUNE_C(0x001C90), RUNE_C(0x001CBF), BLK_GEORGIAN_EXT}, + {RUNE_C(0x001CC0), RUNE_C(0x001CCF), BLK_SUNDANESE_SUP}, + {RUNE_C(0x001CD0), RUNE_C(0x001CFF), BLK_VEDIC_EXT}, + {RUNE_C(0x001D00), RUNE_C(0x001D7F), BLK_PHONETIC_EXT}, + {RUNE_C(0x001D80), RUNE_C(0x001DBF), BLK_PHONETIC_EXT_SUP}, + {RUNE_C(0x001DC0), RUNE_C(0x001DFF), BLK_DIACRITICALS_SUP}, + {RUNE_C(0x001E00), RUNE_C(0x001EFF), BLK_LATIN_EXT_ADDITIONAL}, + {RUNE_C(0x001F00), RUNE_C(0x001FFF), BLK_GREEK_EXT}, + {RUNE_C(0x002000), RUNE_C(0x00206F), BLK_PUNCTUATION}, + {RUNE_C(0x002070), RUNE_C(0x00209F), BLK_SUPER_AND_SUB}, + {RUNE_C(0x0020A0), RUNE_C(0x0020CF), BLK_CURRENCY_SYMBOLS}, + {RUNE_C(0x0020D0), RUNE_C(0x0020FF), BLK_DIACRITICALS_FOR_SYMBOLS}, + {RUNE_C(0x002100), RUNE_C(0x00214F), BLK_LETTERLIKE_SYMBOLS}, + {RUNE_C(0x002150), RUNE_C(0x00218F), BLK_NUMBER_FORMS}, + {RUNE_C(0x002190), RUNE_C(0x0021FF), BLK_ARROWS}, + {RUNE_C(0x002200), RUNE_C(0x0022FF), BLK_MATH_OPERATORS}, + {RUNE_C(0x002300), RUNE_C(0x0023FF), BLK_MISC_TECHNICAL}, + {RUNE_C(0x002400), RUNE_C(0x00243F), BLK_CONTROL_PICTURES}, + {RUNE_C(0x002440), RUNE_C(0x00245F), BLK_OCR}, + {RUNE_C(0x002460), RUNE_C(0x0024FF), BLK_ENCLOSED_ALPHANUM}, + {RUNE_C(0x002500), RUNE_C(0x00257F), BLK_BOX_DRAWING}, + {RUNE_C(0x002580), RUNE_C(0x00259F), BLK_BLOCK_ELEMENTS}, + {RUNE_C(0x0025A0), RUNE_C(0x0025FF), BLK_GEOMETRIC_SHAPES}, + {RUNE_C(0x002600), RUNE_C(0x0026FF), BLK_MISC_SYMBOLS}, + {RUNE_C(0x002700), RUNE_C(0x0027BF), BLK_DINGBATS}, + {RUNE_C(0x0027C0), RUNE_C(0x0027EF), BLK_MISC_MATH_SYMBOLS_A}, + {RUNE_C(0x0027F0), RUNE_C(0x0027FF), BLK_SUP_ARROWS_A}, + {RUNE_C(0x002800), RUNE_C(0x0028FF), BLK_BRAILLE}, + {RUNE_C(0x002900), RUNE_C(0x00297F), BLK_SUP_ARROWS_B}, + {RUNE_C(0x002980), RUNE_C(0x0029FF), BLK_MISC_MATH_SYMBOLS_B}, + {RUNE_C(0x002A00), RUNE_C(0x002AFF), BLK_SUP_MATH_OPERATORS}, + {RUNE_C(0x002B00), RUNE_C(0x002BFF), BLK_MISC_ARROWS}, + {RUNE_C(0x002C00), RUNE_C(0x002C5F), BLK_GLAGOLITIC}, + {RUNE_C(0x002C60), RUNE_C(0x002C7F), BLK_LATIN_EXT_C}, + {RUNE_C(0x002C80), RUNE_C(0x002CFF), BLK_COPTIC}, + {RUNE_C(0x002D00), RUNE_C(0x002D2F), BLK_GEORGIAN_SUP}, + {RUNE_C(0x002D30), RUNE_C(0x002D7F), BLK_TIFINAGH}, + {RUNE_C(0x002D80), RUNE_C(0x002DDF), BLK_ETHIOPIC_EXT}, + {RUNE_C(0x002DE0), RUNE_C(0x002DFF), BLK_CYRILLIC_EXT_A}, + {RUNE_C(0x002E00), RUNE_C(0x002E7F), BLK_SUP_PUNCTUATION}, + {RUNE_C(0x002E80), RUNE_C(0x002EFF), BLK_CJK_RADICALS_SUP}, + {RUNE_C(0x002F00), RUNE_C(0x002FDF), BLK_KANGXI}, + {RUNE_C(0x002FF0), RUNE_C(0x002FFF), BLK_IDC}, + {RUNE_C(0x003000), RUNE_C(0x00303F), BLK_CJK_SYMBOLS}, + {RUNE_C(0x003040), RUNE_C(0x00309F), BLK_HIRAGANA}, + {RUNE_C(0x0030A0), RUNE_C(0x0030FF), BLK_KATAKANA}, + {RUNE_C(0x003100), RUNE_C(0x00312F), BLK_BOPOMOFO}, + {RUNE_C(0x003130), RUNE_C(0x00318F), BLK_COMPAT_JAMO}, + {RUNE_C(0x003190), RUNE_C(0x00319F), BLK_KANBUN}, + {RUNE_C(0x0031A0), RUNE_C(0x0031BF), BLK_BOPOMOFO_EXT}, + {RUNE_C(0x0031C0), RUNE_C(0x0031EF), BLK_CJK_STROKES}, + {RUNE_C(0x0031F0), RUNE_C(0x0031FF), BLK_KATAKANA_EXT}, + {RUNE_C(0x003200), RUNE_C(0x0032FF), BLK_ENCLOSED_CJK}, + {RUNE_C(0x003300), RUNE_C(0x0033FF), BLK_CJK_COMPAT}, + {RUNE_C(0x003400), RUNE_C(0x004DBF), BLK_CJK_EXT_A}, + {RUNE_C(0x004DC0), RUNE_C(0x004DFF), BLK_YIJING}, + {RUNE_C(0x004E00), RUNE_C(0x009FFF), BLK_CJK}, + {RUNE_C(0x00A000), RUNE_C(0x00A48F), BLK_YI_SYLLABLES}, + {RUNE_C(0x00A490), RUNE_C(0x00A4CF), BLK_YI_RADICALS}, + {RUNE_C(0x00A4D0), RUNE_C(0x00A4FF), BLK_LISU}, + {RUNE_C(0x00A500), RUNE_C(0x00A63F), BLK_VAI}, + {RUNE_C(0x00A640), RUNE_C(0x00A69F), BLK_CYRILLIC_EXT_B}, + {RUNE_C(0x00A6A0), RUNE_C(0x00A6FF), BLK_BAMUM}, + {RUNE_C(0x00A700), RUNE_C(0x00A71F), BLK_MODIFIER_TONE_LETTERS}, + {RUNE_C(0x00A720), RUNE_C(0x00A7FF), BLK_LATIN_EXT_D}, + {RUNE_C(0x00A800), RUNE_C(0x00A82F), BLK_SYLOTI_NAGRI}, + {RUNE_C(0x00A830), RUNE_C(0x00A83F), BLK_INDIC_NUMBER_FORMS}, + {RUNE_C(0x00A840), RUNE_C(0x00A87F), BLK_PHAGS_PA}, + {RUNE_C(0x00A880), RUNE_C(0x00A8DF), BLK_SAURASHTRA}, + {RUNE_C(0x00A8E0), RUNE_C(0x00A8FF), BLK_DEVANAGARI_EXT}, + {RUNE_C(0x00A900), RUNE_C(0x00A92F), BLK_KAYAH_LI}, + {RUNE_C(0x00A930), RUNE_C(0x00A95F), BLK_REJANG}, + {RUNE_C(0x00A960), RUNE_C(0x00A97F), BLK_JAMO_EXT_A}, + {RUNE_C(0x00A980), RUNE_C(0x00A9DF), BLK_JAVANESE}, + {RUNE_C(0x00A9E0), RUNE_C(0x00A9FF), BLK_MYANMAR_EXT_B}, + {RUNE_C(0x00AA00), RUNE_C(0x00AA5F), BLK_CHAM}, + {RUNE_C(0x00AA60), RUNE_C(0x00AA7F), BLK_MYANMAR_EXT_A}, + {RUNE_C(0x00AA80), RUNE_C(0x00AADF), BLK_TAI_VIET}, + {RUNE_C(0x00AAE0), RUNE_C(0x00AAFF), BLK_MEETEI_MAYEK_EXT}, + {RUNE_C(0x00AB00), RUNE_C(0x00AB2F), BLK_ETHIOPIC_EXT_A}, + {RUNE_C(0x00AB30), RUNE_C(0x00AB6F), BLK_LATIN_EXT_E}, + {RUNE_C(0x00AB70), RUNE_C(0x00ABBF), BLK_CHEROKEE_SUP}, + {RUNE_C(0x00ABC0), RUNE_C(0x00ABFF), BLK_MEETEI_MAYEK}, + {RUNE_C(0x00AC00), RUNE_C(0x00D7AF), BLK_HANGUL}, + {RUNE_C(0x00D7B0), RUNE_C(0x00D7FF), BLK_JAMO_EXT_B}, + {RUNE_C(0x00D800), RUNE_C(0x00DB7F), BLK_HIGH_SURROGATES}, + {RUNE_C(0x00DB80), RUNE_C(0x00DBFF), BLK_HIGH_PU_SURROGATES}, + {RUNE_C(0x00DC00), RUNE_C(0x00DFFF), BLK_LOW_SURROGATES}, + {RUNE_C(0x00E000), RUNE_C(0x00F8FF), BLK_PUA}, + {RUNE_C(0x00F900), RUNE_C(0x00FAFF), BLK_CJK_COMPAT_IDEOGRAPHS}, + {RUNE_C(0x00FB00), RUNE_C(0x00FB4F), BLK_ALPHABETIC_PF}, + {RUNE_C(0x00FB50), RUNE_C(0x00FDFF), BLK_ARABIC_PF_A}, + {RUNE_C(0x00FE00), RUNE_C(0x00FE0F), BLK_VS}, + {RUNE_C(0x00FE10), RUNE_C(0x00FE1F), BLK_VERTICAL_FORMS}, + {RUNE_C(0x00FE20), RUNE_C(0x00FE2F), BLK_HALF_MARKS}, + {RUNE_C(0x00FE30), RUNE_C(0x00FE4F), BLK_CJK_COMPAT_FORMS}, + {RUNE_C(0x00FE50), RUNE_C(0x00FE6F), BLK_SMALL_FORMS}, + {RUNE_C(0x00FE70), RUNE_C(0x00FEFF), BLK_ARABIC_PF_B}, + {RUNE_C(0x00FF00), RUNE_C(0x00FFEF), BLK_HALF_AND_FULL_FORMS}, + {RUNE_C(0x00FFF0), RUNE_C(0x00FFFF), BLK_SPECIALS}, + {RUNE_C(0x010000), RUNE_C(0x01007F), BLK_LINEAR_B_SYLLABARY}, + {RUNE_C(0x010080), RUNE_C(0x0100FF), BLK_LINEAR_B_IDEOGRAMS}, + {RUNE_C(0x010100), RUNE_C(0x01013F), BLK_AEGEAN_NUMBERS}, + {RUNE_C(0x010140), RUNE_C(0x01018F), BLK_ANCIENT_GREEK_NUMBERS}, + {RUNE_C(0x010190), RUNE_C(0x0101CF), BLK_ANCIENT_SYMBOLS}, + {RUNE_C(0x0101D0), RUNE_C(0x0101FF), BLK_PHAISTOS}, + {RUNE_C(0x010280), RUNE_C(0x01029F), BLK_LYCIAN}, + {RUNE_C(0x0102A0), RUNE_C(0x0102DF), BLK_CARIAN}, + {RUNE_C(0x0102E0), RUNE_C(0x0102FF), BLK_COPTIC_EPACT_NUMBERS}, + {RUNE_C(0x010300), RUNE_C(0x01032F), BLK_OLD_ITALIC}, + {RUNE_C(0x010330), RUNE_C(0x01034F), BLK_GOTHIC}, + {RUNE_C(0x010350), RUNE_C(0x01037F), BLK_OLD_PERMIC}, + {RUNE_C(0x010380), RUNE_C(0x01039F), BLK_UGARITIC}, + {RUNE_C(0x0103A0), RUNE_C(0x0103DF), BLK_OLD_PERSIAN}, + {RUNE_C(0x010400), RUNE_C(0x01044F), BLK_DESERET}, + {RUNE_C(0x010450), RUNE_C(0x01047F), BLK_SHAVIAN}, + {RUNE_C(0x010480), RUNE_C(0x0104AF), BLK_OSMANYA}, + {RUNE_C(0x0104B0), RUNE_C(0x0104FF), BLK_OSAGE}, + {RUNE_C(0x010500), RUNE_C(0x01052F), BLK_ELBASAN}, + {RUNE_C(0x010530), RUNE_C(0x01056F), BLK_CAUCASIAN_ALBANIAN}, + {RUNE_C(0x010570), RUNE_C(0x0105BF), BLK_VITHKUQI}, + {RUNE_C(0x010600), RUNE_C(0x01077F), BLK_LINEAR_A}, + {RUNE_C(0x010780), RUNE_C(0x0107BF), BLK_LATIN_EXT_F}, + {RUNE_C(0x010800), RUNE_C(0x01083F), BLK_CYPRIOT_SYLLABARY}, + {RUNE_C(0x010840), RUNE_C(0x01085F), BLK_IMPERIAL_ARAMAIC}, + {RUNE_C(0x010860), RUNE_C(0x01087F), BLK_PALMYRENE}, + {RUNE_C(0x010880), RUNE_C(0x0108AF), BLK_NABATAEAN}, + {RUNE_C(0x0108E0), RUNE_C(0x0108FF), BLK_HATRAN}, + {RUNE_C(0x010900), RUNE_C(0x01091F), BLK_PHOENICIAN}, + {RUNE_C(0x010920), RUNE_C(0x01093F), BLK_LYDIAN}, + {RUNE_C(0x010980), RUNE_C(0x01099F), BLK_MEROITIC_HIEROGLYPHS}, + {RUNE_C(0x0109A0), RUNE_C(0x0109FF), BLK_MEROITIC_CURSIVE}, + {RUNE_C(0x010A00), RUNE_C(0x010A5F), BLK_KHAROSHTHI}, + {RUNE_C(0x010A60), RUNE_C(0x010A7F), BLK_OLD_SOUTH_ARABIAN}, + {RUNE_C(0x010A80), RUNE_C(0x010A9F), BLK_OLD_NORTH_ARABIAN}, + {RUNE_C(0x010AC0), RUNE_C(0x010AFF), BLK_MANICHAEAN}, + {RUNE_C(0x010B00), RUNE_C(0x010B3F), BLK_AVESTAN}, + {RUNE_C(0x010B40), RUNE_C(0x010B5F), BLK_INSCRIPTIONAL_PARTHIAN}, + {RUNE_C(0x010B60), RUNE_C(0x010B7F), BLK_INSCRIPTIONAL_PAHLAVI}, + {RUNE_C(0x010B80), RUNE_C(0x010BAF), BLK_PSALTER_PAHLAVI}, + {RUNE_C(0x010C00), RUNE_C(0x010C4F), BLK_OLD_TURKIC}, + {RUNE_C(0x010C80), RUNE_C(0x010CFF), BLK_OLD_HUNGARIAN}, + {RUNE_C(0x010D00), RUNE_C(0x010D3F), BLK_HANIFI_ROHINGYA}, + {RUNE_C(0x010E60), RUNE_C(0x010E7F), BLK_RUMI}, + {RUNE_C(0x010E80), RUNE_C(0x010EBF), BLK_YEZIDI}, + {RUNE_C(0x010EC0), RUNE_C(0x010EFF), BLK_ARABIC_EXT_C}, + {RUNE_C(0x010F00), RUNE_C(0x010F2F), BLK_OLD_SOGDIAN}, + {RUNE_C(0x010F30), RUNE_C(0x010F6F), BLK_SOGDIAN}, + {RUNE_C(0x010F70), RUNE_C(0x010FAF), BLK_OLD_UYGHUR}, + {RUNE_C(0x010FB0), RUNE_C(0x010FDF), BLK_CHORASMIAN}, + {RUNE_C(0x010FE0), RUNE_C(0x010FFF), BLK_ELYMAIC}, + {RUNE_C(0x011000), RUNE_C(0x01107F), BLK_BRAHMI}, + {RUNE_C(0x011080), RUNE_C(0x0110CF), BLK_KAITHI}, + {RUNE_C(0x0110D0), RUNE_C(0x0110FF), BLK_SORA_SOMPENG}, + {RUNE_C(0x011100), RUNE_C(0x01114F), BLK_CHAKMA}, + {RUNE_C(0x011150), RUNE_C(0x01117F), BLK_MAHAJANI}, + {RUNE_C(0x011180), RUNE_C(0x0111DF), BLK_SHARADA}, + {RUNE_C(0x0111E0), RUNE_C(0x0111FF), BLK_SINHALA_ARCHAIC_NUMBERS}, + {RUNE_C(0x011200), RUNE_C(0x01124F), BLK_KHOJKI}, + {RUNE_C(0x011280), RUNE_C(0x0112AF), BLK_MULTANI}, + {RUNE_C(0x0112B0), RUNE_C(0x0112FF), BLK_KHUDAWADI}, + {RUNE_C(0x011300), RUNE_C(0x01137F), BLK_GRANTHA}, + {RUNE_C(0x011400), RUNE_C(0x01147F), BLK_NEWA}, + {RUNE_C(0x011480), RUNE_C(0x0114DF), BLK_TIRHUTA}, + {RUNE_C(0x011580), RUNE_C(0x0115FF), BLK_SIDDHAM}, + {RUNE_C(0x011600), RUNE_C(0x01165F), BLK_MODI}, + {RUNE_C(0x011660), RUNE_C(0x01167F), BLK_MONGOLIAN_SUP}, + {RUNE_C(0x011680), RUNE_C(0x0116CF), BLK_TAKRI}, + {RUNE_C(0x011700), RUNE_C(0x01174F), BLK_AHOM}, + {RUNE_C(0x011800), RUNE_C(0x01184F), BLK_DOGRA}, + {RUNE_C(0x0118A0), RUNE_C(0x0118FF), BLK_WARANG_CITI}, + {RUNE_C(0x011900), RUNE_C(0x01195F), BLK_DIVES_AKURU}, + {RUNE_C(0x0119A0), RUNE_C(0x0119FF), BLK_NANDINAGARI}, + {RUNE_C(0x011A00), RUNE_C(0x011A4F), BLK_ZANABAZAR_SQUARE}, + {RUNE_C(0x011A50), RUNE_C(0x011AAF), BLK_SOYOMBO}, + {RUNE_C(0x011AB0), RUNE_C(0x011ABF), BLK_UCAS_EXT_A}, + {RUNE_C(0x011AC0), RUNE_C(0x011AFF), BLK_PAU_CIN_HAU}, + {RUNE_C(0x011B00), RUNE_C(0x011B5F), BLK_DEVANAGARI_EXT_A}, + {RUNE_C(0x011C00), RUNE_C(0x011C6F), BLK_BHAIKSUKI}, + {RUNE_C(0x011C70), RUNE_C(0x011CBF), BLK_MARCHEN}, + {RUNE_C(0x011D00), RUNE_C(0x011D5F), BLK_MASARAM_GONDI}, + {RUNE_C(0x011D60), RUNE_C(0x011DAF), BLK_GUNJALA_GONDI}, + {RUNE_C(0x011EE0), RUNE_C(0x011EFF), BLK_MAKASAR}, + {RUNE_C(0x011F00), RUNE_C(0x011F5F), BLK_KAWI}, + {RUNE_C(0x011FB0), RUNE_C(0x011FBF), BLK_LISU_SUP}, + {RUNE_C(0x011FC0), RUNE_C(0x011FFF), BLK_TAMIL_SUP}, + {RUNE_C(0x012000), RUNE_C(0x0123FF), BLK_CUNEIFORM}, + {RUNE_C(0x012400), RUNE_C(0x01247F), BLK_CUNEIFORM_NUMBERS}, + {RUNE_C(0x012480), RUNE_C(0x01254F), BLK_EARLY_DYNASTIC_CUNEIFORM}, + {RUNE_C(0x012F90), RUNE_C(0x012FFF), BLK_CYPRO_MINOAN}, + {RUNE_C(0x013000), RUNE_C(0x01342F), BLK_EGYPTIAN_HIEROGLYPHS}, + {RUNE_C(0x013430), RUNE_C(0x01345F), BLK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS}, + {RUNE_C(0x014400), RUNE_C(0x01467F), BLK_ANATOLIAN_HIEROGLYPHS}, + {RUNE_C(0x016800), RUNE_C(0x016A3F), BLK_BAMUM_SUP}, + {RUNE_C(0x016A40), RUNE_C(0x016A6F), BLK_MRO}, + {RUNE_C(0x016A70), RUNE_C(0x016ACF), BLK_TANGSA}, + {RUNE_C(0x016AD0), RUNE_C(0x016AFF), BLK_BASSA_VAH}, + {RUNE_C(0x016B00), RUNE_C(0x016B8F), BLK_PAHAWH_HMONG}, + {RUNE_C(0x016E40), RUNE_C(0x016E9F), BLK_MEDEFAIDRIN}, + {RUNE_C(0x016F00), RUNE_C(0x016F9F), BLK_MIAO}, + {RUNE_C(0x016FE0), RUNE_C(0x016FFF), BLK_IDEOGRAPHIC_SYMBOLS}, + {RUNE_C(0x017000), RUNE_C(0x0187FF), BLK_TANGUT}, + {RUNE_C(0x018800), RUNE_C(0x018AFF), BLK_TANGUT_COMPONENTS}, + {RUNE_C(0x018B00), RUNE_C(0x018CFF), BLK_KHITAN_SMALL_SCRIPT}, + {RUNE_C(0x018D00), RUNE_C(0x018D7F), BLK_TANGUT_SUP}, + {RUNE_C(0x01AFF0), RUNE_C(0x01AFFF), BLK_KANA_EXT_B}, + {RUNE_C(0x01B000), RUNE_C(0x01B0FF), BLK_KANA_SUP}, + {RUNE_C(0x01B100), RUNE_C(0x01B12F), BLK_KANA_EXT_A}, + {RUNE_C(0x01B130), RUNE_C(0x01B16F), BLK_SMALL_KANA_EXT}, + {RUNE_C(0x01B170), RUNE_C(0x01B2FF), BLK_NUSHU}, + {RUNE_C(0x01BC00), RUNE_C(0x01BC9F), BLK_DUPLOYAN}, + {RUNE_C(0x01BCA0), RUNE_C(0x01BCAF), BLK_SHORTHAND_FORMAT_CONTROLS}, + {RUNE_C(0x01CF00), RUNE_C(0x01CFCF), BLK_ZNAMENNY_MUSIC}, + {RUNE_C(0x01D000), RUNE_C(0x01D0FF), BLK_BYZANTINE_MUSIC}, + {RUNE_C(0x01D100), RUNE_C(0x01D1FF), BLK_MUSIC}, + {RUNE_C(0x01D200), RUNE_C(0x01D24F), BLK_ANCIENT_GREEK_MUSIC}, + {RUNE_C(0x01D2C0), RUNE_C(0x01D2DF), BLK_KAKTOVIK_NUMERALS}, + {RUNE_C(0x01D2E0), RUNE_C(0x01D2FF), BLK_MAYAN_NUMERALS}, + {RUNE_C(0x01D300), RUNE_C(0x01D35F), BLK_TAI_XUAN_JING}, + {RUNE_C(0x01D360), RUNE_C(0x01D37F), BLK_COUNTING_ROD}, + {RUNE_C(0x01D400), RUNE_C(0x01D7FF), BLK_MATH_ALPHANUM}, + {RUNE_C(0x01D800), RUNE_C(0x01DAAF), BLK_SUTTON_SIGNWRITING}, + {RUNE_C(0x01DF00), RUNE_C(0x01DFFF), BLK_LATIN_EXT_G}, + {RUNE_C(0x01E000), RUNE_C(0x01E02F), BLK_GLAGOLITIC_SUP}, + {RUNE_C(0x01E030), RUNE_C(0x01E08F), BLK_CYRILLIC_EXT_D}, + {RUNE_C(0x01E100), RUNE_C(0x01E14F), BLK_NYIAKENG_PUACHUE_HMONG}, + {RUNE_C(0x01E290), RUNE_C(0x01E2BF), BLK_TOTO}, + {RUNE_C(0x01E2C0), RUNE_C(0x01E2FF), BLK_WANCHO}, + {RUNE_C(0x01E4D0), RUNE_C(0x01E4FF), BLK_NAG_MUNDARI}, + {RUNE_C(0x01E7E0), RUNE_C(0x01E7FF), BLK_ETHIOPIC_EXT_B}, + {RUNE_C(0x01E800), RUNE_C(0x01E8DF), BLK_MENDE_KIKAKUI}, + {RUNE_C(0x01E900), RUNE_C(0x01E95F), BLK_ADLAM}, + {RUNE_C(0x01EC70), RUNE_C(0x01ECBF), BLK_INDIC_SIYAQ_NUMBERS}, + {RUNE_C(0x01ED00), RUNE_C(0x01ED4F), BLK_OTTOMAN_SIYAQ_NUMBERS}, + {RUNE_C(0x01EE00), RUNE_C(0x01EEFF), BLK_ARABIC_MATH}, + {RUNE_C(0x01F000), RUNE_C(0x01F02F), BLK_MAHJONG}, + {RUNE_C(0x01F030), RUNE_C(0x01F09F), BLK_DOMINO}, + {RUNE_C(0x01F0A0), RUNE_C(0x01F0FF), BLK_PLAYING_CARDS}, + {RUNE_C(0x01F100), RUNE_C(0x01F1FF), BLK_ENCLOSED_ALPHANUM_SUP}, + {RUNE_C(0x01F200), RUNE_C(0x01F2FF), BLK_ENCLOSED_IDEOGRAPHIC_SUP}, + {RUNE_C(0x01F300), RUNE_C(0x01F5FF), BLK_MISC_PICTOGRAPHS}, + {RUNE_C(0x01F600), RUNE_C(0x01F64F), BLK_EMOTICONS}, + {RUNE_C(0x01F650), RUNE_C(0x01F67F), BLK_ORNAMENTAL_DINGBATS}, + {RUNE_C(0x01F680), RUNE_C(0x01F6FF), BLK_TRANSPORT_AND_MAP}, + {RUNE_C(0x01F700), RUNE_C(0x01F77F), BLK_ALCHEMICAL}, + {RUNE_C(0x01F780), RUNE_C(0x01F7FF), BLK_GEOMETRIC_SHAPES_EXT}, + {RUNE_C(0x01F800), RUNE_C(0x01F8FF), BLK_SUP_ARROWS_C}, + {RUNE_C(0x01F900), RUNE_C(0x01F9FF), BLK_SUP_SYMBOLS_AND_PICTOGRAPHS}, + {RUNE_C(0x01FA00), RUNE_C(0x01FA6F), BLK_CHESS_SYMBOLS}, + {RUNE_C(0x01FA70), RUNE_C(0x01FAFF), BLK_SYMBOLS_AND_PICTOGRAPHS_EXT_A}, + {RUNE_C(0x01FB00), RUNE_C(0x01FBFF), BLK_SYMBOLS_FOR_LEGACY_COMPUTING}, + {RUNE_C(0x020000), RUNE_C(0x02A6DF), BLK_CJK_EXT_B}, + {RUNE_C(0x02A700), RUNE_C(0x02B73F), BLK_CJK_EXT_C}, + {RUNE_C(0x02B740), RUNE_C(0x02B81F), BLK_CJK_EXT_D}, + {RUNE_C(0x02B820), RUNE_C(0x02CEAF), BLK_CJK_EXT_E}, + {RUNE_C(0x02CEB0), RUNE_C(0x02EBEF), BLK_CJK_EXT_F}, + {RUNE_C(0x02EBF0), RUNE_C(0x02EE5F), BLK_CJK_EXT_I}, + {RUNE_C(0x02F800), RUNE_C(0x02FA1F), BLK_CJK_COMPAT_IDEOGRAPHS_SUP}, + {RUNE_C(0x030000), RUNE_C(0x03134F), BLK_CJK_EXT_G}, + {RUNE_C(0x031350), RUNE_C(0x0323AF), BLK_CJK_EXT_H}, + {RUNE_C(0x0E0000), RUNE_C(0x0E007F), BLK_TAGS}, + {RUNE_C(0x0E0100), RUNE_C(0x0E01EF), BLK_VS_SUP}, + {RUNE_C(0x0F0000), RUNE_C(0x0FFFFF), BLK_SUP_PUA_A}, + {RUNE_C(0x100000), RUNE_C(0x10FFFF), BLK_SUP_PUA_B}, +}; + +__MLIB_DEFINE_BSEARCH(enum uprop_blk, lookup, BLK_NB) + +enum uprop_blk +uprop_get_blk(rune ch) +{ + return ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch); +} -- cgit v1.2.3