diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-22 22:54:45 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-22 22:54:45 +0200 |
commit | c62ed622ee5612e155e4176767ab1028f89a3b82 (patch) | |
tree | e6f9f0bb81590113411b3ad22944db0c5b1fd0aa /lib/unicode/string | |
parent | cc550bfa9b14bb81590ded2c0912c0a91fc3be89 (diff) |
Properly lowercase ‘Σ’ to ‘ς’ when at end of word
Diffstat (limited to 'lib/unicode/string')
-rw-r--r-- | lib/unicode/string/u8lower.c | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c index 052217f..77b0e18 100644 --- a/lib/unicode/string/u8lower.c +++ b/lib/unicode/string/u8lower.c @@ -18,13 +18,16 @@ u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, rune ch; size_t n = 0; + struct u8view word = {}, cpy = {src, srcn}; while (u8next(&ch, &src, &srcn)) { - /* TODO: Set ‘eow’ once word-segmentation is implemented */ - rune next = 0; if (srcn > 0) u8tor(&next, src); + if (src > word.p + word.len) + u8wnext(&word, U8_ARGSP(cpy)); + + ctx.eow = src == word.p + word.len; ctx.before_dot = next == COMB_DOT_ABOVE; ctx.before_acc = next == COMB_GRAVE || next == COMB_ACUTE |