diff options
-rw-r--r-- | include/unicode/prop.h | 10 | ||||
-rw-r--r-- | lib/unicode/prop/uprop_get_lc.c | 45 |
2 files changed, 55 insertions, 0 deletions
diff --git a/include/unicode/prop.h b/include/unicode/prop.h index cf02a74..76d9200 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -12,6 +12,15 @@ struct rview { size_t len; }; +struct lcctx { + bool az_or_tr : 1; /* Azeri or Turkish */ + bool az_tr_after_I : 1; /* After ‘I’ in Azeri or Turkish */ + bool az_tr_not_before_dot : 1; /* Not before a dot in Azeri or Turkish */ + bool eow : 1; /* End of word */ + bool lt : 1; /* Lithuanian */ + bool lt_acc_after : 1; /* Accent after ‘i’ or ‘j’ in Lithuanian */ +}; + struct tcctx { bool az_or_tr : 1; /* Azeri or Turkish */ bool lt_after_i : 1; /* After ‘i’ in Lithuanian */ @@ -173,6 +182,7 @@ enum uprop_nt { [[__mlib_uprop_attrs]] rune uprop_get_slc(rune); [[__mlib_uprop_attrs]] rune uprop_get_stc(rune); [[__mlib_uprop_attrs]] rune uprop_get_suc(rune); +[[__mlib_uprop_attrs]] struct rview uprop_get_lc(rune, struct lcctx); [[__mlib_uprop_attrs]] struct rview uprop_get_tc(rune, struct tcctx); [[__mlib_uprop_attrs]] struct rview uprop_get_uc(rune, struct ucctx); [[__mlib_uprop_attrs]] struct u8view uprop_get_na1(rune); diff --git a/lib/unicode/prop/uprop_get_lc.c b/lib/unicode/prop/uprop_get_lc.c new file mode 100644 index 0000000..4bbdc12 --- /dev/null +++ b/lib/unicode/prop/uprop_get_lc.c @@ -0,0 +1,45 @@ +#include "macros.h" +#include "unicode/prop.h" + +#define M(...) ((struct rview)_(__VA_ARGS__)) +#define _(...) \ + {(const rune[]){__VA_ARGS__}, lengthof(((const rune[]){__VA_ARGS__}))} + +struct rview +uprop_get_lc(rune ch, struct lcctx ctx) +{ + if (ch == U'Σ') + return ctx.eow ? M(U'ς') : M(U'σ'); + if (ch == U'İ') + return ctx.az_or_tr ? M('i') : M('i', 0x307); + + if (ctx.lt_acc_after) { + switch (ch) { + case 'I': + return M('i', 0x307); + case 'J': + return M('j', 0x307); + case U'Į': + return M(U'į', 0x307); + } + } + + if (ctx.lt) { + switch (ch) { + case U'Ì': + return M('i', 0x307, 0x300); + case U'Í': + return M('i', 0x307, 0x301); + case U'Ĩ': + return M('i', 0x307, 0x303); + } + } + + if (ch == 0x307 && ctx.az_tr_after_I) + return M(); + if (ch == 'I' && ctx.az_tr_not_before_dot) + return M(U'ı'); + + ch = uprop_get_slc(ch); + return M(ch); +} |