From b5e7acf641d4ef3538803b746723b90a822ea1ad Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Wed, 24 Apr 2024 00:28:01 +0200 Subject: Support titlecasing Dutch ‘IJ’ properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/unicode/string/u8title.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'lib/unicode') diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c index b704eef..dcf0b2e 100644 --- a/lib/unicode/string/u8title.c +++ b/lib/unicode/string/u8title.c @@ -23,9 +23,11 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, int w; rune ch; size_t n = 0; - bool lt_special = false; + bool lt_special, nl_special; struct u8view word = {}, cpy = {src, srcn}; + lt_special = nl_special = false; + while (w = u8next(&ch, &src, &srcn)) { rune next = 0; if (srcn > 0) @@ -39,8 +41,12 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, ctx_l.before_acc = next == COMB_GRAVE || next == COMB_ACUTE || next == COMB_TILDE; - struct rview rv = sow || lt_special ? uprop_get_tc(ch, ctx_t) - : uprop_get_lc(ch, ctx_l); + struct rview rv; + if (nl_special && (ch == 'j' || ch == 'J')) + rv = (struct rview){.p = U"J", .len = 1}; + else + rv = sow || lt_special ? uprop_get_tc(ch, ctx_t) + : uprop_get_lc(ch, ctx_l); for (size_t i = 0; i < rv.len; i++) { if (n >= dstn) { char8_t buf[U8_LEN_MAX]; @@ -49,6 +55,8 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, n += rtou8(dst + n, dstn - n, rv.p[i]); } + if (flags & CF_LANG_NL) + nl_special = sow && (ch == 'i' || ch == 'I'); if (ctx_t.lt) { /* If the rune at SOW is Soft_Dotted, then the next rune should be titlecased if it is U+0307 or if does not have ccc=0 and ccc=230. -- cgit v1.2.3