aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-27 15:36:29 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-27 15:36:29 +0200
commit3c6a99ea878086781b07539ec28e60e9deea5750 (patch)
tree3c381ac716239595db234b6c4eef9f5843cd21da
parenta5d222b7fffac55c2007c1bc90ef84a71b799ae9 (diff)
Add Dutch titlecase tailoring (ijssel → IJssel)
-rw-r--r--README1
-rw-r--r--lib/unicode/string/u8title.c22
2 files changed, 15 insertions, 8 deletions
diff --git a/README b/README
index 422489b..98491fd 100644
--- a/README
+++ b/README
@@ -113,7 +113,6 @@ FEATURES:
PLANNED FEATURES:
- • Dutch titlecase tailorings (unicode/string.h)
• Line- and sentence segmentation (unicode/string.h)
• String casefolding (unicode/string.h)
• String collation (unicode/string.h)
diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c
index 536c4ed..4545fda 100644
--- a/lib/unicode/string/u8title.c
+++ b/lib/unicode/string/u8title.c
@@ -24,6 +24,7 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
ctx_t.lt = ctx_l.lt = flags & CF_LANG_LT;
rune ch;
+ bool nl_IJ = false;
size_t n, before_dot_cnt, more_above_cnt;
struct u8view word = {}, wcpy = {src, srcn};
struct {
@@ -39,10 +40,6 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
n = before_dot_cnt = more_above_cnt = 0;
while (u8next(&ch, &src, &srcn)) {
- rune next = 0;
- if (srcn > 0)
- u8tor(&next, src);
-
if (src > word.p + word.len) {
u8wnext(&word, U8_ARGSP(wcpy));
ctx_t.after_soft_dotted = false;
@@ -89,11 +86,22 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
if (state == BETWEEN && uprop_is_cased(ch))
state = LOWER;
- struct rview rv =
- state == LOWER ? uprop_get_lc(ch, ctx_l) : uprop_get_tc(ch, ctx_t);
- if (state == TITLE && uprop_is_cased(ch))
+ struct rview rv = state == LOWER && !nl_IJ ? uprop_get_lc(ch, ctx_l)
+ : uprop_get_tc(ch, ctx_t);
+ if (nl_IJ)
+ nl_IJ = false;
+ if (state == TITLE && uprop_is_cased(ch)) {
state = BETWEEN;
+ if (flags & CF_LANG_NL) {
+ rune next = 0;
+ if (srcn > 0)
+ u8tor(&next, src);
+ nl_IJ =
+ (ch == 'i' || ch == 'I') && (next == 'j' || next == 'J');
+ }
+ }
+
for (size_t i = 0; i < rv.len; i++) {
if (n >= dstn) {
char8_t buf[U8_LEN_MAX];