diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-24 00:28:01 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-24 00:28:01 +0200 |
commit | b5e7acf641d4ef3538803b746723b90a822ea1ad (patch) | |
tree | 08cde8ff2d9db97507b00726b15831b2417fa5e1 /lib/unicode/string | |
parent | 3b797a5f3ce1d77fa7d0ed991b52553c1b3e8757 (diff) |
Support titlecasing Dutch ‘IJ’ properly
Diffstat (limited to 'lib/unicode/string')
-rw-r--r-- | lib/unicode/string/u8title.c | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c index b704eef..dcf0b2e 100644 --- a/lib/unicode/string/u8title.c +++ b/lib/unicode/string/u8title.c @@ -23,9 +23,11 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, int w; rune ch; size_t n = 0; - bool lt_special = false; + bool lt_special, nl_special; struct u8view word = {}, cpy = {src, srcn}; + lt_special = nl_special = false; + while (w = u8next(&ch, &src, &srcn)) { rune next = 0; if (srcn > 0) @@ -39,8 +41,12 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, ctx_l.before_acc = next == COMB_GRAVE || next == COMB_ACUTE || next == COMB_TILDE; - struct rview rv = sow || lt_special ? uprop_get_tc(ch, ctx_t) - : uprop_get_lc(ch, ctx_l); + struct rview rv; + if (nl_special && (ch == 'j' || ch == 'J')) + rv = (struct rview){.p = U"J", .len = 1}; + else + rv = sow || lt_special ? uprop_get_tc(ch, ctx_t) + : uprop_get_lc(ch, ctx_l); for (size_t i = 0; i < rv.len; i++) { if (n >= dstn) { char8_t buf[U8_LEN_MAX]; @@ -49,6 +55,8 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, n += rtou8(dst + n, dstn - n, rv.p[i]); } + if (flags & CF_LANG_NL) + nl_special = sow && (ch == 'i' || ch == 'I'); if (ctx_t.lt) { /* If the rune at SOW is Soft_Dotted, then the next rune should be titlecased if it is U+0307 or if does not have ccc=0 and ccc=230. |