diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-24 00:28:01 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-24 00:28:01 +0200 |
commit | b5e7acf641d4ef3538803b746723b90a822ea1ad (patch) | |
tree | 08cde8ff2d9db97507b00726b15831b2417fa5e1 | |
parent | 3b797a5f3ce1d77fa7d0ed991b52553c1b3e8757 (diff) |
Support titlecasing Dutch ‘IJ’ properly
-rw-r--r-- | include/unicode/string.h | 3 | ||||
-rw-r--r-- | lib/unicode/string/u8title.c | 14 |
2 files changed, 13 insertions, 4 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h index cb19821..4b75864 100644 --- a/include/unicode/string.h +++ b/include/unicode/string.h @@ -14,7 +14,8 @@ enum [[clang::flag_enum]] caseflags { CF_LANG_AZ = 1 << 0, /* Azeri; alias for CF_LANG_TR */ CF_LANG_TR = 1 << 0, /* Turkish; alias for CF_LANG_AZ */ CF_LANG_LT = 1 << 1, /* Lithuanian */ - CF_ẞ = 1 << 2, /* Use ‘ẞ’ as the uppercase of ‘ß’ */ + CF_LANG_NL = 1 << 2, /* Dutch */ + CF_ẞ = 1 << 3, /* Use ‘ẞ’ as the uppercase of ‘ß’ */ }; /* clang-format on */ diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c index b704eef..dcf0b2e 100644 --- a/lib/unicode/string/u8title.c +++ b/lib/unicode/string/u8title.c @@ -23,9 +23,11 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, int w; rune ch; size_t n = 0; - bool lt_special = false; + bool lt_special, nl_special; struct u8view word = {}, cpy = {src, srcn}; + lt_special = nl_special = false; + while (w = u8next(&ch, &src, &srcn)) { rune next = 0; if (srcn > 0) @@ -39,8 +41,12 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, ctx_l.before_acc = next == COMB_GRAVE || next == COMB_ACUTE || next == COMB_TILDE; - struct rview rv = sow || lt_special ? uprop_get_tc(ch, ctx_t) - : uprop_get_lc(ch, ctx_l); + struct rview rv; + if (nl_special && (ch == 'j' || ch == 'J')) + rv = (struct rview){.p = U"J", .len = 1}; + else + rv = sow || lt_special ? uprop_get_tc(ch, ctx_t) + : uprop_get_lc(ch, ctx_l); for (size_t i = 0; i < rv.len; i++) { if (n >= dstn) { char8_t buf[U8_LEN_MAX]; @@ -49,6 +55,8 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, n += rtou8(dst + n, dstn - n, rv.p[i]); } + if (flags & CF_LANG_NL) + nl_special = sow && (ch == 'i' || ch == 'I'); if (ctx_t.lt) { /* If the rune at SOW is Soft_Dotted, then the next rune should be titlecased if it is U+0307 or if does not have ccc=0 and ccc=230. |