From 6713c56fee21a549ff8a3494bbb52da9234a00aa Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 15 Apr 2024 14:13:25 +0200 Subject: Add uprop_get_sc() --- gen/prop/sc | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100755 gen/prop/sc (limited to 'gen/prop') diff --git a/gen/prop/sc b/gen/prop/sc new file mode 100755 index 0000000..40fc39b --- /dev/null +++ b/gen/prop/sc @@ -0,0 +1,232 @@ +#!/bin/sh + +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_sc.c + +gawk ' +BEGIN { + FS = " *(; *|#.*)" + + map["Adlam"] = "ADLM" + map["Caucasian_Albanian"] = "AGHB" + map["Ahom"] = "AHOM" + map["Arabic"] = "ARAB" + map["Imperial_Aramaic"] = "ARMI" + map["Armenian"] = "ARMN" + map["Avestan"] = "AVST" + map["Balinese"] = "BALI" + map["Bamum"] = "BAMU" + map["Bassa_Vah"] = "BASS" + map["Batak"] = "BATK" + map["Bengali"] = "BENG" + map["Bhaiksuki"] = "BHKS" + map["Bopomofo"] = "BOPO" + map["Brahmi"] = "BRAH" + map["Braille"] = "BRAI" + map["Buginese"] = "BUGI" + map["Buhid"] = "BUHD" + map["Chakma"] = "CAKM" + map["Canadian_Aboriginal"] = "CANS" + map["Carian"] = "CARI" + map["Cham"] = "CHAM" + map["Cherokee"] = "CHER" + map["Chorasmian"] = "CHRS" + map["Coptic"] = "COPT" + map["Cypro_Minoan"] = "CPMN" + map["Cypriot"] = "CPRT" + map["Cyrillic"] = "CYRL" + map["Devanagari"] = "DEVA" + map["Dives_Akuru"] = "DIAK" + map["Dogra"] = "DOGR" + map["Deseret"] = "DSRT" + map["Duployan"] = "DUPL" + map["Egyptian_Hieroglyphs"] = "EGYP" + map["Elbasan"] = "ELBA" + map["Elymaic"] = "ELYM" + map["Ethiopic"] = "ETHI" + map["Georgian"] = "GEOR" + map["Glagolitic"] = "GLAG" + map["Gunjala_Gondi"] = "GONG" + map["Masaram_Gondi"] = "GONM" + map["Gothic"] = "GOTH" + map["Grantha"] = "GRAN" + map["Greek"] = "GREK" + map["Gujarati"] = "GUJR" + map["Gurmukhi"] = "GURU" + map["Hangul"] = "HANG" + map["Han"] = "HANI" + map["Hanunoo"] = "HANO" + map["Hatran"] = "HATR" + map["Hebrew"] = "HEBR" + map["Hiragana"] = "HIRA" + map["Anatolian_Hieroglyphs"] = "HLUW" + map["Pahawh_Hmong"] = "HMNG" + map["Nyiakeng_Puachue_Hmong"] = "HMNP" + map["Katakana_Or_Hiragana"] = "HRKT" + map["Old_Hungarian"] = "HUNG" + map["Old_Italic"] = "ITAL" + map["Javanese"] = "JAVA" + map["Kayah_Li"] = "KALI" + map["Katakana"] = "KANA" + map["Kawi"] = "KAWI" + map["Kharoshthi"] = "KHAR" + map["Khmer"] = "KHMR" + map["Khojki"] = "KHOJ" + map["Khitan_Small_Script"] = "KITS" + map["Kannada"] = "KNDA" + map["Kaithi"] = "KTHI" + map["Tai_Tham"] = "LANA" + map["Lao"] = "LAOO" + map["Latin"] = "LATN" + map["Lepcha"] = "LEPC" + map["Limbu"] = "LIMB" + map["Linear_A"] = "LINA" + map["Linear_B"] = "LINB" + map["Lisu"] = "LISU" + map["Lycian"] = "LYCI" + map["Lydian"] = "LYDI" + map["Mahajani"] = "MAHJ" + map["Makasar"] = "MAKA" + map["Mandaic"] = "MAND" + map["Manichaean"] = "MANI" + map["Marchen"] = "MARC" + map["Medefaidrin"] = "MEDF" + map["Mende_Kikakui"] = "MEND" + map["Meroitic_Cursive"] = "MERC" + map["Meroitic_Hieroglyphs"] = "MERO" + map["Malayalam"] = "MLYM" + map["Modi"] = "MODI" + map["Mongolian"] = "MONG" + map["Mro"] = "MROO" + map["Meetei_Mayek"] = "MTEI" + map["Multani"] = "MULT" + map["Myanmar"] = "MYMR" + map["Nag_Mundari"] = "NAGM" + map["Nandinagari"] = "NAND" + map["Old_North_Arabian"] = "NARB" + map["Nabataean"] = "NBAT" + map["Newa"] = "NEWA" + map["Nko"] = "NKOO" + map["Nushu"] = "NSHU" + map["Ogham"] = "OGAM" + map["Ol_Chiki"] = "OLCK" + map["Old_Turkic"] = "ORKH" + map["Oriya"] = "ORYA" + map["Osage"] = "OSGE" + map["Osmanya"] = "OSMA" + map["Old_Uyghur"] = "OUGR" + map["Palmyrene"] = "PALM" + map["Pau_Cin_Hau"] = "PAUC" + map["Old_Permic"] = "PERM" + map["Phags_Pa"] = "PHAG" + map["Inscriptional_Pahlavi"] = "PHLI" + map["Psalter_Pahlavi"] = "PHLP" + map["Phoenician"] = "PHNX" + map["Miao"] = "PLRD" + map["Inscriptional_Parthian"] = "PRTI" + map["Rejang"] = "RJNG" + map["Hanifi_Rohingya"] = "ROHG" + map["Runic"] = "RUNR" + map["Samaritan"] = "SAMR" + map["Old_South_Arabian"] = "SARB" + map["Saurashtra"] = "SAUR" + map["SignWriting"] = "SGNW" + map["Shavian"] = "SHAW" + map["Sharada"] = "SHRD" + map["Siddham"] = "SIDD" + map["Khudawadi"] = "SIND" + map["Sinhala"] = "SINH" + map["Sogdian"] = "SOGD" + map["Old_Sogdian"] = "SOGO" + map["Sora_Sompeng"] = "SORA" + map["Soyombo"] = "SOYO" + map["Sundanese"] = "SUND" + map["Syloti_Nagri"] = "SYLO" + map["Syriac"] = "SYRC" + map["Tagbanwa"] = "TAGB" + map["Takri"] = "TAKR" + map["Tai_Le"] = "TALE" + map["New_Tai_Lue"] = "TALU" + map["Tamil"] = "TAML" + map["Tangut"] = "TANG" + map["Tai_Viet"] = "TAVT" + map["Telugu"] = "TELU" + map["Tifinagh"] = "TFNG" + map["Tagalog"] = "TGLG" + map["Thaana"] = "THAA" + map["Thai"] = "THAI" + map["Tibetan"] = "TIBT" + map["Tirhuta"] = "TIRH" + map["Tangsa"] = "TNSA" + map["Toto"] = "TOTO" + map["Ugaritic"] = "UGAR" + map["Vai"] = "VAII" + map["Vithkuqi"] = "VITH" + map["Warang_Citi"] = "WARA" + map["Wancho"] = "WCHO" + map["Old_Persian"] = "XPEO" + map["Cuneiform"] = "XSUX" + map["Yezidi"] = "YEZI" + map["Yi"] = "YIII" + map["Zanabazar_Square"] = "ZANB" + map["Inherited"] = "ZINH" + map["Common"] = "ZYYY" + + print "/* This file is autogenerated by gen/prop/sc; DO NOT EDIT. */" + print "" + print "#include \"__bsearch.h\"" + print "#include \"macros.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + gsub(/^; /, "", $2) + props[i] = "SC_" map[$2] + } +} + +END { + print "static constexpr enum uprop_sc lookup_lat1[] = {" + for (i = 0; i < 0x100; i++) { + if (i % 8 == 0) + printf "\t" + printf "%-7s,%s", props[i] ? props[i] : 0, i % 8 == 7 ? "\n" : " " + } + print "};" + print "" + + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_sc val;" + print "} lookup[] = {" + + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "__MLIB_DEFINE_BSEARCH(enum uprop_sc, lookup, SC_ZZZZ)" + print "" + print "enum uprop_sc" + print "uprop_get_sc(rune ch)" + print "{" + print "\treturn ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);" + print "}" +} +' data/Scripts | sed 's/\s*$//' -- cgit v1.2.3