From 5ea567bef8836164ae6b3829762b237c9ea31bc3 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 15 Apr 2024 01:19:12 +0200 Subject: Add u8lower() --- lib/unicode/string/u8lower.c | 46 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 lib/unicode/string/u8lower.c (limited to 'lib') diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c new file mode 100644 index 0000000..052217f --- /dev/null +++ b/lib/unicode/string/u8lower.c @@ -0,0 +1,46 @@ +#include "mbstring.h" +#include "unicode/prop.h" +#include "unicode/string.h" + +constexpr rune COMB_GRAVE = 0x0300; +constexpr rune COMB_ACUTE = 0x0301; +constexpr rune COMB_TILDE = 0x0303; +constexpr rune COMB_DOT_ABOVE = 0x0307; + +size_t +u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, + enum caseflags flags) +{ + struct lcctx ctx = { + .az_or_tr = flags & CF_LANG_AZ, + .lt = flags & CF_LANG_LT, + }; + + rune ch; + size_t n = 0; + + while (u8next(&ch, &src, &srcn)) { + /* TODO: Set ‘eow’ once word-segmentation is implemented */ + + rune next = 0; + if (srcn > 0) + u8tor(&next, src); + ctx.before_dot = next == COMB_DOT_ABOVE; + ctx.before_acc = next == COMB_GRAVE + || next == COMB_ACUTE + || next == COMB_TILDE; + + struct rview rv = uprop_get_lc(ch, ctx); + for (size_t i = 0; i < rv.len; i++) { + if (n >= dstn) { + char8_t buf[U8_LEN_MAX]; + n += rtou8(buf, sizeof(buf), rv.p[i]); + } else + n += rtou8(dst + n, dstn - n, rv.p[i]); + } + + ctx.after_I = ch == 'I'; + } + + return n; +} -- cgit v1.2.3