aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-13 00:37:32 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-13 00:37:32 +0200
commit5cd8e9069a5e78a45932055678eae77a1de4fd66 (patch)
tree20f97d6269f25371416a5a451cc79a0ef76403e9
parentb60237fe9105febf5792c372a5ee41084f5e8431 (diff)
Add uprop_get_lc()
-rw-r--r--include/unicode/prop.h10
-rw-r--r--lib/unicode/prop/uprop_get_lc.c45
2 files changed, 55 insertions, 0 deletions
diff --git a/include/unicode/prop.h b/include/unicode/prop.h
index cf02a74..76d9200 100644
--- a/include/unicode/prop.h
+++ b/include/unicode/prop.h
@@ -12,6 +12,15 @@ struct rview {
size_t len;
};
+struct lcctx {
+ bool az_or_tr : 1; /* Azeri or Turkish */
+ bool az_tr_after_I : 1; /* After ‘I’ in Azeri or Turkish */
+ bool az_tr_not_before_dot : 1; /* Not before a dot in Azeri or Turkish */
+ bool eow : 1; /* End of word */
+ bool lt : 1; /* Lithuanian */
+ bool lt_acc_after : 1; /* Accent after ‘i’ or ‘j’ in Lithuanian */
+};
+
struct tcctx {
bool az_or_tr : 1; /* Azeri or Turkish */
bool lt_after_i : 1; /* After ‘i’ in Lithuanian */
@@ -173,6 +182,7 @@ enum uprop_nt {
[[__mlib_uprop_attrs]] rune uprop_get_slc(rune);
[[__mlib_uprop_attrs]] rune uprop_get_stc(rune);
[[__mlib_uprop_attrs]] rune uprop_get_suc(rune);
+[[__mlib_uprop_attrs]] struct rview uprop_get_lc(rune, struct lcctx);
[[__mlib_uprop_attrs]] struct rview uprop_get_tc(rune, struct tcctx);
[[__mlib_uprop_attrs]] struct rview uprop_get_uc(rune, struct ucctx);
[[__mlib_uprop_attrs]] struct u8view uprop_get_na1(rune);
diff --git a/lib/unicode/prop/uprop_get_lc.c b/lib/unicode/prop/uprop_get_lc.c
new file mode 100644
index 0000000..4bbdc12
--- /dev/null
+++ b/lib/unicode/prop/uprop_get_lc.c
@@ -0,0 +1,45 @@
+#include "macros.h"
+#include "unicode/prop.h"
+
+#define M(...) ((struct rview)_(__VA_ARGS__))
+#define _(...) \
+ {(const rune[]){__VA_ARGS__}, lengthof(((const rune[]){__VA_ARGS__}))}
+
+struct rview
+uprop_get_lc(rune ch, struct lcctx ctx)
+{
+ if (ch == U'Σ')
+ return ctx.eow ? M(U'ς') : M(U'σ');
+ if (ch == U'İ')
+ return ctx.az_or_tr ? M('i') : M('i', 0x307);
+
+ if (ctx.lt_acc_after) {
+ switch (ch) {
+ case 'I':
+ return M('i', 0x307);
+ case 'J':
+ return M('j', 0x307);
+ case U'Į':
+ return M(U'į', 0x307);
+ }
+ }
+
+ if (ctx.lt) {
+ switch (ch) {
+ case U'Ì':
+ return M('i', 0x307, 0x300);
+ case U'Í':
+ return M('i', 0x307, 0x301);
+ case U'Ĩ':
+ return M('i', 0x307, 0x303);
+ }
+ }
+
+ if (ch == 0x307 && ctx.az_tr_after_I)
+ return M();
+ if (ch == 'I' && ctx.az_tr_not_before_dot)
+ return M(U'ı');
+
+ ch = uprop_get_slc(ch);
+ return M(ch);
+}