#!/bin/sh #!/bin/sh set -e cd "${0%/*}/../.." exec >lib/unicode/prop/uprop_get_sc.c gawk ' BEGIN { FS = " *(; *|#.*)" map["Adlam"] = "ADLM" map["Caucasian_Albanian"] = "AGHB" map["Ahom"] = "AHOM" map["Arabic"] = "ARAB" map["Imperial_Aramaic"] = "ARMI" map["Armenian"] = "ARMN" map["Avestan"] = "AVST" map["Balinese"] = "BALI" map["Bamum"] = "BAMU" map["Bassa_Vah"] = "BASS" map["Batak"] = "BATK" map["Bengali"] = "BENG" map["Bhaiksuki"] = "BHKS" map["Bopomofo"] = "BOPO" map["Brahmi"] = "BRAH" map["Braille"] = "BRAI" map["Buginese"] = "BUGI" map["Buhid"] = "BUHD" map["Chakma"] = "CAKM" map["Canadian_Aboriginal"] = "CANS" map["Carian"] = "CARI" map["Cham"] = "CHAM" map["Cherokee"] = "CHER" map["Chorasmian"] = "CHRS" map["Coptic"] = "COPT" map["Cypro_Minoan"] = "CPMN" map["Cypriot"] = "CPRT" map["Cyrillic"] = "CYRL" map["Devanagari"] = "DEVA" map["Dives_Akuru"] = "DIAK" map["Dogra"] = "DOGR" map["Deseret"] = "DSRT" map["Duployan"] = "DUPL" map["Egyptian_Hieroglyphs"] = "EGYP" map["Elbasan"] = "ELBA" map["Elymaic"] = "ELYM" map["Ethiopic"] = "ETHI" map["Georgian"] = "GEOR" map["Glagolitic"] = "GLAG" map["Gunjala_Gondi"] = "GONG" map["Masaram_Gondi"] = "GONM" map["Gothic"] = "GOTH" map["Grantha"] = "GRAN" map["Greek"] = "GREK" map["Gujarati"] = "GUJR" map["Gurmukhi"] = "GURU" map["Hangul"] = "HANG" map["Han"] = "HANI" map["Hanunoo"] = "HANO" map["Hatran"] = "HATR" map["Hebrew"] = "HEBR" map["Hiragana"] = "HIRA" map["Anatolian_Hieroglyphs"] = "HLUW" map["Pahawh_Hmong"] = "HMNG" map["Nyiakeng_Puachue_Hmong"] = "HMNP" map["Katakana_Or_Hiragana"] = "HRKT" map["Old_Hungarian"] = "HUNG" map["Old_Italic"] = "ITAL" map["Javanese"] = "JAVA" map["Kayah_Li"] = "KALI" map["Katakana"] = "KANA" map["Kawi"] = "KAWI" map["Kharoshthi"] = "KHAR" map["Khmer"] = "KHMR" map["Khojki"] = "KHOJ" map["Khitan_Small_Script"] = "KITS" map["Kannada"] = "KNDA" map["Kaithi"] = "KTHI" map["Tai_Tham"] = "LANA" map["Lao"] = "LAOO" map["Latin"] = "LATN" map["Lepcha"] = "LEPC" map["Limbu"] = "LIMB" map["Linear_A"] = "LINA" map["Linear_B"] = "LINB" map["Lisu"] = "LISU" map["Lycian"] = "LYCI" map["Lydian"] = "LYDI" map["Mahajani"] = "MAHJ" map["Makasar"] = "MAKA" map["Mandaic"] = "MAND" map["Manichaean"] = "MANI" map["Marchen"] = "MARC" map["Medefaidrin"] = "MEDF" map["Mende_Kikakui"] = "MEND" map["Meroitic_Cursive"] = "MERC" map["Meroitic_Hieroglyphs"] = "MERO" map["Malayalam"] = "MLYM" map["Modi"] = "MODI" map["Mongolian"] = "MONG" map["Mro"] = "MROO" map["Meetei_Mayek"] = "MTEI" map["Multani"] = "MULT" map["Myanmar"] = "MYMR" map["Nag_Mundari"] = "NAGM" map["Nandinagari"] = "NAND" map["Old_North_Arabian"] = "NARB" map["Nabataean"] = "NBAT" map["Newa"] = "NEWA" map["Nko"] = "NKOO" map["Nushu"] = "NSHU" map["Ogham"] = "OGAM" map["Ol_Chiki"] = "OLCK" map["Old_Turkic"] = "ORKH" map["Oriya"] = "ORYA" map["Osage"] = "OSGE" map["Osmanya"] = "OSMA" map["Old_Uyghur"] = "OUGR" map["Palmyrene"] = "PALM" map["Pau_Cin_Hau"] = "PAUC" map["Old_Permic"] = "PERM" map["Phags_Pa"] = "PHAG" map["Inscriptional_Pahlavi"] = "PHLI" map["Psalter_Pahlavi"] = "PHLP" map["Phoenician"] = "PHNX" map["Miao"] = "PLRD" map["Inscriptional_Parthian"] = "PRTI" map["Rejang"] = "RJNG" map["Hanifi_Rohingya"] = "ROHG" map["Runic"] = "RUNR" map["Samaritan"] = "SAMR" map["Old_South_Arabian"] = "SARB" map["Saurashtra"] = "SAUR" map["SignWriting"] = "SGNW" map["Shavian"] = "SHAW" map["Sharada"] = "SHRD" map["Siddham"] = "SIDD" map["Khudawadi"] = "SIND" map["Sinhala"] = "SINH" map["Sogdian"] = "SOGD" map["Old_Sogdian"] = "SOGO" map["Sora_Sompeng"] = "SORA" map["Soyombo"] = "SOYO" map["Sundanese"] = "SUND" map["Syloti_Nagri"] = "SYLO" map["Syriac"] = "SYRC" map["Tagbanwa"] = "TAGB" map["Takri"] = "TAKR" map["Tai_Le"] = "TALE" map["New_Tai_Lue"] = "TALU" map["Tamil"] = "TAML" map["Tangut"] = "TANG" map["Tai_Viet"] = "TAVT" map["Telugu"] = "TELU" map["Tifinagh"] = "TFNG" map["Tagalog"] = "TGLG" map["Thaana"] = "THAA" map["Thai"] = "THAI" map["Tibetan"] = "TIBT" map["Tirhuta"] = "TIRH" map["Tangsa"] = "TNSA" map["Toto"] = "TOTO" map["Ugaritic"] = "UGAR" map["Vai"] = "VAII" map["Vithkuqi"] = "VITH" map["Warang_Citi"] = "WARA" map["Wancho"] = "WCHO" map["Old_Persian"] = "XPEO" map["Cuneiform"] = "XSUX" map["Yezidi"] = "YEZI" map["Yi"] = "YIII" map["Zanabazar_Square"] = "ZANB" map["Inherited"] = "ZINH" map["Common"] = "ZYYY" print "/* This file is autogenerated by gen/prop/sc; DO NOT EDIT. */" print "" print "#include \"_bsearch.h\"" print "#include \"macros.h\"" print "#include \"rune.h\"" print "#include \"unicode/prop.h\"" print "" } /^[^#]/ { n = split($1, a, /\.\./) lo = strtonum("0X" a[1]) hi = strtonum("0X" a[n]) for (i = lo; i <= hi; i++) { gsub(/^; /, "", $2) props[i] = "SC_" map[$2] } } END { print "static constexpr enum uprop_sc lookup_lat1[] = {" for (i = 0; i < 0x100; i++) { if (i % 8 == 0) printf "\t" printf "%-7s,%s", props[i] ? props[i] : 0, i % 8 == 7 ? "\n" : " " } print "};" print "" print "static const struct {" print "\trune lo, hi;" print "\tenum uprop_sc val;" print "} lookup[] = {" for (i = 0x100; i <= 0x10FFFF; i++) { if (!props[i]) continue lo = i while (props[lo] == props[i + 1]) i++ printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] } print "};" print "" print "_MLIB_DEFINE_BSEARCH(enum uprop_sc, lookup, SC_ZZZZ)" print "" print "enum uprop_sc" print "uprop_get_sc(rune ch)" print "{" print "\treturn ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" print "}" } ' data/Scripts | sed 's/\s*$//'