From b5e7acf641d4ef3538803b746723b90a822ea1ad Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Wed, 24 Apr 2024 00:28:01 +0200 Subject: Support titlecasing Dutch ‘IJ’ properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/unicode/string.h | 3 ++- lib/unicode/string/u8title.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/unicode/string.h b/include/unicode/string.h index cb19821..4b75864 100644 --- a/include/unicode/string.h +++ b/include/unicode/string.h @@ -14,7 +14,8 @@ enum [[clang::flag_enum]] caseflags { CF_LANG_AZ = 1 << 0, /* Azeri; alias for CF_LANG_TR */ CF_LANG_TR = 1 << 0, /* Turkish; alias for CF_LANG_AZ */ CF_LANG_LT = 1 << 1, /* Lithuanian */ - CF_ẞ = 1 << 2, /* Use ‘ẞ’ as the uppercase of ‘ß’ */ + CF_LANG_NL = 1 << 2, /* Dutch */ + CF_ẞ = 1 << 3, /* Use ‘ẞ’ as the uppercase of ‘ß’ */ }; /* clang-format on */ diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c index b704eef..dcf0b2e 100644 --- a/lib/unicode/string/u8title.c +++ b/lib/unicode/string/u8title.c @@ -23,9 +23,11 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, int w; rune ch; size_t n = 0; - bool lt_special = false; + bool lt_special, nl_special; struct u8view word = {}, cpy = {src, srcn}; + lt_special = nl_special = false; + while (w = u8next(&ch, &src, &srcn)) { rune next = 0; if (srcn > 0) @@ -39,8 +41,12 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, ctx_l.before_acc = next == COMB_GRAVE || next == COMB_ACUTE || next == COMB_TILDE; - struct rview rv = sow || lt_special ? uprop_get_tc(ch, ctx_t) - : uprop_get_lc(ch, ctx_l); + struct rview rv; + if (nl_special && (ch == 'j' || ch == 'J')) + rv = (struct rview){.p = U"J", .len = 1}; + else + rv = sow || lt_special ? uprop_get_tc(ch, ctx_t) + : uprop_get_lc(ch, ctx_l); for (size_t i = 0; i < rv.len; i++) { if (n >= dstn) { char8_t buf[U8_LEN_MAX]; @@ -49,6 +55,8 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, n += rtou8(dst + n, dstn - n, rv.p[i]); } + if (flags & CF_LANG_NL) + nl_special = sow && (ch == 'i' || ch == 'I'); if (ctx_t.lt) { /* If the rune at SOW is Soft_Dotted, then the next rune should be titlecased if it is U+0307 or if does not have ccc=0 and ccc=230. -- cgit v1.2.3