diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-27 15:36:29 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-27 15:36:29 +0200 |
commit | 3c6a99ea878086781b07539ec28e60e9deea5750 (patch) | |
tree | 3c381ac716239595db234b6c4eef9f5843cd21da | |
parent | a5d222b7fffac55c2007c1bc90ef84a71b799ae9 (diff) |
Add Dutch titlecase tailoring (ijssel → IJssel)
-rw-r--r-- | README | 1 | ||||
-rw-r--r-- | lib/unicode/string/u8title.c | 22 |
2 files changed, 15 insertions, 8 deletions
@@ -113,7 +113,6 @@ FEATURES: PLANNED FEATURES: - • Dutch titlecase tailorings (unicode/string.h) • Line- and sentence segmentation (unicode/string.h) • String casefolding (unicode/string.h) • String collation (unicode/string.h) diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c index 536c4ed..4545fda 100644 --- a/lib/unicode/string/u8title.c +++ b/lib/unicode/string/u8title.c @@ -24,6 +24,7 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, ctx_t.lt = ctx_l.lt = flags & CF_LANG_LT; rune ch; + bool nl_IJ = false; size_t n, before_dot_cnt, more_above_cnt; struct u8view word = {}, wcpy = {src, srcn}; struct { @@ -39,10 +40,6 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, n = before_dot_cnt = more_above_cnt = 0; while (u8next(&ch, &src, &srcn)) { - rune next = 0; - if (srcn > 0) - u8tor(&next, src); - if (src > word.p + word.len) { u8wnext(&word, U8_ARGSP(wcpy)); ctx_t.after_soft_dotted = false; @@ -89,11 +86,22 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, if (state == BETWEEN && uprop_is_cased(ch)) state = LOWER; - struct rview rv = - state == LOWER ? uprop_get_lc(ch, ctx_l) : uprop_get_tc(ch, ctx_t); - if (state == TITLE && uprop_is_cased(ch)) + struct rview rv = state == LOWER && !nl_IJ ? uprop_get_lc(ch, ctx_l) + : uprop_get_tc(ch, ctx_t); + if (nl_IJ) + nl_IJ = false; + if (state == TITLE && uprop_is_cased(ch)) { state = BETWEEN; + if (flags & CF_LANG_NL) { + rune next = 0; + if (srcn > 0) + u8tor(&next, src); + nl_IJ = + (ch == 'i' || ch == 'I') && (next == 'j' || next == 'J'); + } + } + for (size_t i = 0; i < rv.len; i++) { if (n >= dstn) { char8_t buf[U8_LEN_MAX]; |