aboutsummaryrefslogtreecommitdiff
path: root/gen
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-15 14:13:25 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-15 14:13:25 +0200
commit6713c56fee21a549ff8a3494bbb52da9234a00aa (patch)
treecb73e39d6b344348dbcbd71cecf0318d644d020f /gen
parentb12a2227b37b79f4bb8cd69143d13c99ad04df96 (diff)
Add uprop_get_sc()
Diffstat (limited to 'gen')
-rwxr-xr-xgen/data-files1
-rwxr-xr-xgen/prop/sc232
2 files changed, 233 insertions, 0 deletions
diff --git a/gen/data-files b/gen/data-files
index b2c4197..5b7921c 100755
--- a/gen/data-files
+++ b/gen/data-files
@@ -21,6 +21,7 @@ extracted/DerivedLineBreak
extracted/DerivedNumericType
extracted/DerivedNumericValues
PropList
+Scripts
SpecialCasing
UnicodeData
'
diff --git a/gen/prop/sc b/gen/prop/sc
new file mode 100755
index 0000000..40fc39b
--- /dev/null
+++ b/gen/prop/sc
@@ -0,0 +1,232 @@
+#!/bin/sh
+
+#!/bin/sh
+
+set -e
+cd "${0%/*}/../.."
+exec >lib/unicode/prop/uprop_get_sc.c
+
+gawk '
+BEGIN {
+ FS = " *(; *|#.*)"
+
+ map["Adlam"] = "ADLM"
+ map["Caucasian_Albanian"] = "AGHB"
+ map["Ahom"] = "AHOM"
+ map["Arabic"] = "ARAB"
+ map["Imperial_Aramaic"] = "ARMI"
+ map["Armenian"] = "ARMN"
+ map["Avestan"] = "AVST"
+ map["Balinese"] = "BALI"
+ map["Bamum"] = "BAMU"
+ map["Bassa_Vah"] = "BASS"
+ map["Batak"] = "BATK"
+ map["Bengali"] = "BENG"
+ map["Bhaiksuki"] = "BHKS"
+ map["Bopomofo"] = "BOPO"
+ map["Brahmi"] = "BRAH"
+ map["Braille"] = "BRAI"
+ map["Buginese"] = "BUGI"
+ map["Buhid"] = "BUHD"
+ map["Chakma"] = "CAKM"
+ map["Canadian_Aboriginal"] = "CANS"
+ map["Carian"] = "CARI"
+ map["Cham"] = "CHAM"
+ map["Cherokee"] = "CHER"
+ map["Chorasmian"] = "CHRS"
+ map["Coptic"] = "COPT"
+ map["Cypro_Minoan"] = "CPMN"
+ map["Cypriot"] = "CPRT"
+ map["Cyrillic"] = "CYRL"
+ map["Devanagari"] = "DEVA"
+ map["Dives_Akuru"] = "DIAK"
+ map["Dogra"] = "DOGR"
+ map["Deseret"] = "DSRT"
+ map["Duployan"] = "DUPL"
+ map["Egyptian_Hieroglyphs"] = "EGYP"
+ map["Elbasan"] = "ELBA"
+ map["Elymaic"] = "ELYM"
+ map["Ethiopic"] = "ETHI"
+ map["Georgian"] = "GEOR"
+ map["Glagolitic"] = "GLAG"
+ map["Gunjala_Gondi"] = "GONG"
+ map["Masaram_Gondi"] = "GONM"
+ map["Gothic"] = "GOTH"
+ map["Grantha"] = "GRAN"
+ map["Greek"] = "GREK"
+ map["Gujarati"] = "GUJR"
+ map["Gurmukhi"] = "GURU"
+ map["Hangul"] = "HANG"
+ map["Han"] = "HANI"
+ map["Hanunoo"] = "HANO"
+ map["Hatran"] = "HATR"
+ map["Hebrew"] = "HEBR"
+ map["Hiragana"] = "HIRA"
+ map["Anatolian_Hieroglyphs"] = "HLUW"
+ map["Pahawh_Hmong"] = "HMNG"
+ map["Nyiakeng_Puachue_Hmong"] = "HMNP"
+ map["Katakana_Or_Hiragana"] = "HRKT"
+ map["Old_Hungarian"] = "HUNG"
+ map["Old_Italic"] = "ITAL"
+ map["Javanese"] = "JAVA"
+ map["Kayah_Li"] = "KALI"
+ map["Katakana"] = "KANA"
+ map["Kawi"] = "KAWI"
+ map["Kharoshthi"] = "KHAR"
+ map["Khmer"] = "KHMR"
+ map["Khojki"] = "KHOJ"
+ map["Khitan_Small_Script"] = "KITS"
+ map["Kannada"] = "KNDA"
+ map["Kaithi"] = "KTHI"
+ map["Tai_Tham"] = "LANA"
+ map["Lao"] = "LAOO"
+ map["Latin"] = "LATN"
+ map["Lepcha"] = "LEPC"
+ map["Limbu"] = "LIMB"
+ map["Linear_A"] = "LINA"
+ map["Linear_B"] = "LINB"
+ map["Lisu"] = "LISU"
+ map["Lycian"] = "LYCI"
+ map["Lydian"] = "LYDI"
+ map["Mahajani"] = "MAHJ"
+ map["Makasar"] = "MAKA"
+ map["Mandaic"] = "MAND"
+ map["Manichaean"] = "MANI"
+ map["Marchen"] = "MARC"
+ map["Medefaidrin"] = "MEDF"
+ map["Mende_Kikakui"] = "MEND"
+ map["Meroitic_Cursive"] = "MERC"
+ map["Meroitic_Hieroglyphs"] = "MERO"
+ map["Malayalam"] = "MLYM"
+ map["Modi"] = "MODI"
+ map["Mongolian"] = "MONG"
+ map["Mro"] = "MROO"
+ map["Meetei_Mayek"] = "MTEI"
+ map["Multani"] = "MULT"
+ map["Myanmar"] = "MYMR"
+ map["Nag_Mundari"] = "NAGM"
+ map["Nandinagari"] = "NAND"
+ map["Old_North_Arabian"] = "NARB"
+ map["Nabataean"] = "NBAT"
+ map["Newa"] = "NEWA"
+ map["Nko"] = "NKOO"
+ map["Nushu"] = "NSHU"
+ map["Ogham"] = "OGAM"
+ map["Ol_Chiki"] = "OLCK"
+ map["Old_Turkic"] = "ORKH"
+ map["Oriya"] = "ORYA"
+ map["Osage"] = "OSGE"
+ map["Osmanya"] = "OSMA"
+ map["Old_Uyghur"] = "OUGR"
+ map["Palmyrene"] = "PALM"
+ map["Pau_Cin_Hau"] = "PAUC"
+ map["Old_Permic"] = "PERM"
+ map["Phags_Pa"] = "PHAG"
+ map["Inscriptional_Pahlavi"] = "PHLI"
+ map["Psalter_Pahlavi"] = "PHLP"
+ map["Phoenician"] = "PHNX"
+ map["Miao"] = "PLRD"
+ map["Inscriptional_Parthian"] = "PRTI"
+ map["Rejang"] = "RJNG"
+ map["Hanifi_Rohingya"] = "ROHG"
+ map["Runic"] = "RUNR"
+ map["Samaritan"] = "SAMR"
+ map["Old_South_Arabian"] = "SARB"
+ map["Saurashtra"] = "SAUR"
+ map["SignWriting"] = "SGNW"
+ map["Shavian"] = "SHAW"
+ map["Sharada"] = "SHRD"
+ map["Siddham"] = "SIDD"
+ map["Khudawadi"] = "SIND"
+ map["Sinhala"] = "SINH"
+ map["Sogdian"] = "SOGD"
+ map["Old_Sogdian"] = "SOGO"
+ map["Sora_Sompeng"] = "SORA"
+ map["Soyombo"] = "SOYO"
+ map["Sundanese"] = "SUND"
+ map["Syloti_Nagri"] = "SYLO"
+ map["Syriac"] = "SYRC"
+ map["Tagbanwa"] = "TAGB"
+ map["Takri"] = "TAKR"
+ map["Tai_Le"] = "TALE"
+ map["New_Tai_Lue"] = "TALU"
+ map["Tamil"] = "TAML"
+ map["Tangut"] = "TANG"
+ map["Tai_Viet"] = "TAVT"
+ map["Telugu"] = "TELU"
+ map["Tifinagh"] = "TFNG"
+ map["Tagalog"] = "TGLG"
+ map["Thaana"] = "THAA"
+ map["Thai"] = "THAI"
+ map["Tibetan"] = "TIBT"
+ map["Tirhuta"] = "TIRH"
+ map["Tangsa"] = "TNSA"
+ map["Toto"] = "TOTO"
+ map["Ugaritic"] = "UGAR"
+ map["Vai"] = "VAII"
+ map["Vithkuqi"] = "VITH"
+ map["Warang_Citi"] = "WARA"
+ map["Wancho"] = "WCHO"
+ map["Old_Persian"] = "XPEO"
+ map["Cuneiform"] = "XSUX"
+ map["Yezidi"] = "YEZI"
+ map["Yi"] = "YIII"
+ map["Zanabazar_Square"] = "ZANB"
+ map["Inherited"] = "ZINH"
+ map["Common"] = "ZYYY"
+
+ print "/* This file is autogenerated by gen/prop/sc; DO NOT EDIT. */"
+ print ""
+ print "#include \"__bsearch.h\""
+ print "#include \"macros.h\""
+ print "#include \"rune.h\""
+ print "#include \"unicode/prop.h\""
+ print ""
+}
+
+/^[^#]/ {
+ n = split($1, a, /\.\./)
+ lo = strtonum("0X" a[1])
+ hi = strtonum("0X" a[n])
+
+ for (i = lo; i <= hi; i++) {
+ gsub(/^; /, "", $2)
+ props[i] = "SC_" map[$2]
+ }
+}
+
+END {
+ print "static constexpr enum uprop_sc lookup_lat1[] = {"
+ for (i = 0; i < 0x100; i++) {
+ if (i % 8 == 0)
+ printf "\t"
+ printf "%-7s,%s", props[i] ? props[i] : 0, i % 8 == 7 ? "\n" : " "
+ }
+ print "};"
+ print ""
+
+ print "static const struct {"
+ print "\trune lo, hi;"
+ print "\tenum uprop_sc val;"
+ print "} lookup[] = {"
+
+ for (i = 0x100; i <= 0x10FFFF; i++) {
+ if (!props[i])
+ continue
+ lo = i
+ while (props[lo] == props[i + 1])
+ i++
+ printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i]
+ }
+
+ print "};"
+ print ""
+ print "__MLIB_DEFINE_BSEARCH(enum uprop_sc, lookup, SC_ZZZZ)"
+ print ""
+ print "enum uprop_sc"
+ print "uprop_get_sc(rune ch)"
+ print "{"
+ print "\treturn ch <= lengthof(lookup_lat1) ? lookup_lat1[ch] : mlib_lookup(ch);"
+ print "}"
+}
+' data/Scripts | sed 's/\s*$//'