From 5e226cc757791a47267fb778fb5e5bc680fe1aa1 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 15 Apr 2024 18:21:40 +0200 Subject: Add uprop_get_cf() --- NOTES | 2 +- include/unicode/prop.h | 1 + lib/unicode/prop/uprop_get_cf.c | 130 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 lib/unicode/prop/uprop_get_cf.c diff --git a/NOTES b/NOTES index 4a77403..e03e85f 100644 --- a/NOTES +++ b/NOTES @@ -1,6 +1,6 @@ Developer Implementation Details - • uprop_get_lc(), uprop_get_tc(), uprop_get_uc() + • uprop_get_cf(), uprop_get_lc(), uprop_get_tc(), uprop_get_uc() These properties are not auto-generated via a script, and no build script informs the user of this. Extra care should be taken to ensure these are diff --git a/include/unicode/prop.h b/include/unicode/prop.h index f1ca36a..7a357cc 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -744,6 +744,7 @@ enum uprop_sc { [[__mlib_uprop_attrs]] rune uprop_get_slc(rune); [[__mlib_uprop_attrs]] rune uprop_get_stc(rune); [[__mlib_uprop_attrs]] rune uprop_get_suc(rune); +[[__mlib_uprop_attrs]] struct rview uprop_get_cf(rune, bool); [[__mlib_uprop_attrs]] struct rview uprop_get_lc(rune, struct lcctx); [[__mlib_uprop_attrs]] struct rview uprop_get_tc(rune, struct tcctx); [[__mlib_uprop_attrs]] struct rview uprop_get_uc(rune, struct ucctx); diff --git a/lib/unicode/prop/uprop_get_cf.c b/lib/unicode/prop/uprop_get_cf.c new file mode 100644 index 0000000..4b0e90b --- /dev/null +++ b/lib/unicode/prop/uprop_get_cf.c @@ -0,0 +1,130 @@ +#include "__bsearch.h" +#include "unicode/prop.h" + +#define M(...) ((struct rview)_(__VA_ARGS__)) +#define _(...) \ + {(const rune []){__VA_ARGS__}, lengthof(((const rune []){__VA_ARGS__}))} + +static const struct { + rune k; + struct rview v; +} lookup[] = { + {0x00DF, /* ß */ _('s', 's')}, + {0x0149, /* ʼn */ _(U'ʼ', 'n')}, + {0x01F0, /* ǰ */ _('j', 0x30C)}, + {0x0390, /* ΐ */ _(U'ι', 0x308, 0x301)}, + {0x03B0, /* ΰ */ _(U'υ', 0x308, 0x301)}, + {0x0587, /* և */ _(U'ե', U'ւ')}, + {0x1E96, /* ẖ */ _('h', 0x331)}, + {0x1E97, /* ẗ */ _('t', 0x308)}, + {0x1E98, /* ẘ */ _('w', 0x30A)}, + {0x1E99, /* ẙ */ _('y', 0x30A)}, + {0x1E9A, /* ẚ */ _('a', U'ʾ')}, + {0x1E9E, /* ẞ */ _('s', 's')}, + {0x1F50, /* ὐ */ _(U'υ', 0x303)}, + {0x1F52, /* ὒ */ _(U'υ', 0x303, 0x300)}, + {0x1F54, /* ὔ */ _(U'υ', 0x303, 0x301)}, + {0x1F56, /* ὖ */ _(U'υ', 0x303, 0x342)}, + {0x1F80, /* ᾀ */ _(U'ἀ', U'ι')}, + {0x1F81, /* ᾁ */ _(U'ἁ', U'ι')}, + {0x1F82, /* ᾂ */ _(U'ἂ', U'ι')}, + {0x1F83, /* ᾃ */ _(U'ἃ', U'ι')}, + {0x1F84, /* ᾄ */ _(U'ἄ', U'ι')}, + {0x1F85, /* ᾅ */ _(U'ἅ', U'ι')}, + {0x1F86, /* ᾆ */ _(U'ἆ', U'ι')}, + {0x1F87, /* ᾇ */ _(U'ἇ', U'ι')}, + {0x1F88, /* ᾈ */ _(U'ἀ', U'ι')}, + {0x1F89, /* ᾉ */ _(U'ἁ', U'ι')}, + {0x1F8A, /* ᾊ */ _(U'ἂ', U'ι')}, + {0x1F8B, /* ᾋ */ _(U'ἃ', U'ι')}, + {0x1F8C, /* ᾌ */ _(U'ἄ', U'ι')}, + {0x1F8D, /* ᾍ */ _(U'ἅ', U'ι')}, + {0x1F8E, /* ᾎ */ _(U'ἆ', U'ι')}, + {0x1F8F, /* ᾏ */ _(U'ἇ', U'ι')}, + {0x1F90, /* ᾐ */ _(U'ἠ', U'ι')}, + {0x1F91, /* ᾑ */ _(U'ἡ', U'ι')}, + {0x1F92, /* ᾒ */ _(U'ἢ', U'ι')}, + {0x1F93, /* ᾓ */ _(U'ἣ', U'ι')}, + {0x1F94, /* ᾔ */ _(U'ἤ', U'ι')}, + {0x1F95, /* ᾕ */ _(U'ἥ', U'ι')}, + {0x1F96, /* ᾖ */ _(U'ἦ', U'ι')}, + {0x1F97, /* ᾗ */ _(U'ἧ', U'ι')}, + {0x1F98, /* ᾘ */ _(U'ἠ', U'ι')}, + {0x1F99, /* ᾙ */ _(U'ἡ', U'ι')}, + {0x1F9A, /* ᾚ */ _(U'ἢ', U'ι')}, + {0x1F9B, /* ᾛ */ _(U'ἣ', U'ι')}, + {0x1F9C, /* ᾜ */ _(U'ἤ', U'ι')}, + {0x1F9D, /* ᾝ */ _(U'ἥ', U'ι')}, + {0x1F9E, /* ᾞ */ _(U'ἦ', U'ι')}, + {0x1F9F, /* ᾟ */ _(U'ἧ', U'ι')}, + {0x1FA0, /* ᾠ */ _(U'ὠ', U'ι')}, + {0x1FA1, /* ᾡ */ _(U'ὡ', U'ι')}, + {0x1FA2, /* ᾢ */ _(U'ὢ', U'ι')}, + {0x1FA3, /* ᾣ */ _(U'ὣ', U'ι')}, + {0x1FA4, /* ᾤ */ _(U'ὤ', U'ι')}, + {0x1FA5, /* ᾥ */ _(U'ὥ', U'ι')}, + {0x1FA6, /* ᾦ */ _(U'ὦ', U'ι')}, + {0x1FA7, /* ᾧ */ _(U'ὧ', U'ι')}, + {0x1FA8, /* ᾨ */ _(U'ὠ', U'ι')}, + {0x1FA9, /* ᾩ */ _(U'ὡ', U'ι')}, + {0x1FAA, /* ᾪ */ _(U'ὢ', U'ι')}, + {0x1FAB, /* ᾫ */ _(U'ὣ', U'ι')}, + {0x1FAC, /* ᾬ */ _(U'ὤ', U'ι')}, + {0x1FAD, /* ᾭ */ _(U'ὥ', U'ι')}, + {0x1FAE, /* ᾮ */ _(U'ὦ', U'ι')}, + {0x1FAF, /* ᾯ */ _(U'ὧ', U'ι')}, + {0x1FB2, /* ᾲ */ _(U'ὰ', U'ι')}, + {0x1FB3, /* ᾳ */ _(U'α', U'ι')}, + {0x1FB4, /* ᾴ */ _(U'ά', U'ι')}, + {0x1FB6, /* ᾶ */ _(U'α', 0x342)}, + {0x1FB7, /* ᾷ */ _(U'α', 0x342, U'ι')}, + {0x1FBC, /* ᾼ */ _(U'α', U'ι')}, + {0x1FC2, /* ῂ */ _(U'ὴ', U'ι')}, + {0x1FC3, /* ῃ */ _(U'η', U'ι')}, + {0x1FC4, /* ῄ */ _(U'ή', U'ι')}, + {0x1FC6, /* ῆ */ _(U'η', 0x342)}, + {0x1FC7, /* ῇ */ _(U'η', 0x342, U'ι')}, + {0x1FCC, /* ῌ */ _(U'η', U'ι')}, + {0x1FD2, /* ῒ */ _(U'ι', 0x308, 0x300)}, + {0x1FD3, /* ΐ */ _(U'ι', 0x308, 0x301)}, + {0x1FD6, /* ῖ */ _(U'ι', 0x342)}, + {0x1FD7, /* ῗ */ _(U'ι', 0x308, 0x342)}, + {0x1FE2, /* ῢ */ _(U'υ', 0x308, 0x300)}, + {0x1FE3, /* ΰ */ _(U'υ', 0x308, 0x301)}, + {0x1FE4, /* ῤ */ _(U'ρ', 0x303)}, + {0x1FE6, /* ῦ */ _(U'υ', 0x342)}, + {0x1FE7, /* ῧ */ _(U'υ', 0x308, 0x342)}, + {0x1FF2, /* ῲ */ _(U'ὼ', U'ι')}, + {0x1FF3, /* ῳ */ _(U'ω', U'ι')}, + {0x1FF4, /* ῴ */ _(U'ώ', U'ι')}, + {0x1FF6, /* ῶ */ _(U'ω', 0x342)}, + {0x1FF7, /* ῷ */ _(U'ω', 0x342, U'ι')}, + {0x1FFC, /* ῼ */ _(U'ω', U'ι')}, + {0xFB00, /* ff */ _('f', 'f')}, + {0xFB01, /* fi */ _('f', 'i')}, + {0xFB02, /* fl */ _('f', 'l')}, + {0xFB03, /* ffi */ _('f', 'f', 'i')}, + {0xFB04, /* ffl */ _('f', 'f', 'l')}, + {0xFB05, /* ſt */ _('s', 't')}, + {0xFB06, /* st */ _('s', 't')}, + {0xFB13, /* ﬓ */ _(U'մ', U'ն')}, + {0xFB14, /* ﬔ */ _(U'մ', U'ե')}, + {0xFB15, /* ﬕ */ _(U'մ', U'ի')}, + {0xFB16, /* ﬖ */ _(U'վ', U'ն')}, + {0xFB17, /* ﬗ */ _(U'մ', U'խ')}, +}; + +__MLIB_DEFINE_BSEARCH_KV(struct rview, lookup, M()) + +struct rview +uprop_get_cf(rune ch, bool az_or_tr) +{ + if (ch == U'İ') + return az_or_tr ? M('i') : M('i', 0x307); + struct rview rv = mlib_lookup_kv(ch); + if (rv.len == 0) { + ch = uprop_get_scf(ch, az_or_tr); + return M(ch); + } + return rv; +} -- cgit v1.2.3