From c62ed622ee5612e155e4176767ab1028f89a3b82 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 22 Apr 2024 22:54:45 +0200 Subject: Properly lowercase ‘Σ’ to ‘ς’ when at end of word MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/unicode/string/u8lower.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c index 052217f..77b0e18 100644 --- a/lib/unicode/string/u8lower.c +++ b/lib/unicode/string/u8lower.c @@ -18,13 +18,16 @@ u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, rune ch; size_t n = 0; + struct u8view word = {}, cpy = {src, srcn}; while (u8next(&ch, &src, &srcn)) { - /* TODO: Set ‘eow’ once word-segmentation is implemented */ - rune next = 0; if (srcn > 0) u8tor(&next, src); + if (src > word.p + word.len) + u8wnext(&word, U8_ARGSP(cpy)); + + ctx.eow = src == word.p + word.len; ctx.before_dot = next == COMB_DOT_ABOVE; ctx.before_acc = next == COMB_GRAVE || next == COMB_ACUTE -- cgit v1.2.3