diff options
-rw-r--r-- | include/unicode/prop.h | 27 | ||||
-rw-r--r-- | lib/unicode/prop/uprop_get_lc.c | 30 | ||||
-rw-r--r-- | lib/unicode/prop/uprop_get_tc.c | 2 | ||||
-rw-r--r-- | lib/unicode/prop/uprop_get_uc.c | 4 | ||||
-rw-r--r-- | lib/unicode/string/u8upper.c | 11 |
5 files changed, 38 insertions, 36 deletions
diff --git a/include/unicode/prop.h b/include/unicode/prop.h index 76d9200..8def75b 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -13,23 +13,28 @@ struct rview { }; struct lcctx { - bool az_or_tr : 1; /* Azeri or Turkish */ - bool az_tr_after_I : 1; /* After ‘I’ in Azeri or Turkish */ - bool az_tr_not_before_dot : 1; /* Not before a dot in Azeri or Turkish */ - bool eow : 1; /* End of word */ - bool lt : 1; /* Lithuanian */ - bool lt_acc_after : 1; /* Accent after ‘i’ or ‘j’ in Lithuanian */ + bool az_or_tr : 1; /* Azeri or Turkish */ + bool lt : 1; /* Lithuanian */ + + bool after_I : 1; /* After ‘I’ */ + bool before_acc : 1; /* Before accent on ‘i’ or ‘j’ in Lithuanian */ + bool before_dot : 1; /* Before U+0307 */ + bool eow : 1; /* End of word */ }; struct tcctx { - bool az_or_tr : 1; /* Azeri or Turkish */ - bool lt_after_i : 1; /* After ‘i’ in Lithuanian */ + bool az_or_tr : 1; /* Azeri or Turkish */ + bool lt : 1; /* Lithuanian */ + + bool after_i : 1; /* After ‘i’ */ }; struct ucctx { - bool az_or_tr : 1; /* Azeri or Turkish */ - bool cap_eszett : 1; /* Use capital eszett */ - bool lt_after_i : 1; /* After ‘i’ in Lithuanian */ + bool az_or_tr : 1; /* Azeri or Turkish */ + bool lt : 1; /* Lithuanian */ + + bool ẞ : 1; /* Uppercase ‘ß’ into ‘ẞ’ (instead of ‘SS’) */ + bool after_i : 1; /* After ‘i’ */ }; enum uprop_bpt { diff --git a/lib/unicode/prop/uprop_get_lc.c b/lib/unicode/prop/uprop_get_lc.c index 4bbdc12..7d90d8d 100644 --- a/lib/unicode/prop/uprop_get_lc.c +++ b/lib/unicode/prop/uprop_get_lc.c @@ -13,18 +13,18 @@ uprop_get_lc(rune ch, struct lcctx ctx) if (ch == U'İ') return ctx.az_or_tr ? M('i') : M('i', 0x307); - if (ctx.lt_acc_after) { - switch (ch) { - case 'I': - return M('i', 0x307); - case 'J': - return M('j', 0x307); - case U'Į': - return M(U'į', 0x307); + if (ctx.lt) { + if (ctx.before_acc) { + switch (ch) { + case 'I': + return M('i', 0x307); + case 'J': + return M('j', 0x307); + case U'Į': + return M(U'į', 0x307); + } } - } - if (ctx.lt) { switch (ch) { case U'Ì': return M('i', 0x307, 0x300); @@ -35,10 +35,12 @@ uprop_get_lc(rune ch, struct lcctx ctx) } } - if (ch == 0x307 && ctx.az_tr_after_I) - return M(); - if (ch == 'I' && ctx.az_tr_not_before_dot) - return M(U'ı'); + if (ctx.az_or_tr) { + if (ch == 0x307 && ctx.after_I) + return M(); + if (ch == 'I' && !ctx.before_dot) + return M(U'ı'); + } ch = uprop_get_slc(ch); return M(ch); diff --git a/lib/unicode/prop/uprop_get_tc.c b/lib/unicode/prop/uprop_get_tc.c index c4c8070..029ef51 100644 --- a/lib/unicode/prop/uprop_get_tc.c +++ b/lib/unicode/prop/uprop_get_tc.c @@ -66,7 +66,7 @@ uprop_get_tc(rune ch, struct tcctx ctx) { if (ch == 'i' && ctx.az_or_tr) return M(U'İ'); - if (ch == 0x307 && ctx.lt_after_i) + if (ch == 0x307 && ctx.lt && ctx.after_i) return M(); rune CH = uprop_get_stc(ch); diff --git a/lib/unicode/prop/uprop_get_uc.c b/lib/unicode/prop/uprop_get_uc.c index d3ab274..69435b8 100644 --- a/lib/unicode/prop/uprop_get_uc.c +++ b/lib/unicode/prop/uprop_get_uc.c @@ -120,10 +120,10 @@ struct rview uprop_get_uc(rune ch, struct ucctx ctx) { if (ch == U'ß') - return ctx.cap_eszett ? M(U'ẞ') : M('S', 'S'); + return ctx.ẞ ? M(U'ẞ') : M('S', 'S'); if (ch == 'i' && ctx.az_or_tr) return M(U'İ'); - if (ch == 0x307 && ctx.lt_after_i) + if (ch == 0x307 && ctx.lt && ctx.after_i) return M(); rune CH = uprop_get_suc(ch); diff --git a/lib/unicode/string/u8upper.c b/lib/unicode/string/u8upper.c index 5ce0e77..6b041f6 100644 --- a/lib/unicode/string/u8upper.c +++ b/lib/unicode/string/u8upper.c @@ -8,17 +8,14 @@ u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, { struct ucctx ctx = { .az_or_tr = flags & CF_LANG_AZ, - .cap_eszett = flags & CF_ẞ, + .lt = flags & CF_LANG_LT, + .ẞ = flags & CF_ẞ, }; rune ch; size_t n = 0; - bool prev_was_i = false; while (u8next(&ch, &src, &srcn)) { - if (ch == 0x307 && prev_was_i && (flags & CF_LANG_LT)) - ctx.lt_after_i = true; - struct rview rv = uprop_get_uc(ch, ctx); for (size_t i = 0; i < rv.len; i++) { if (n >= dstn) { @@ -27,9 +24,7 @@ u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, } else n += rtou8(dst + n, dstn - n, rv.p[i]); } - - prev_was_i = ch == 'i'; - ctx.lt_after_i = false; + ctx.after_i = ch == 'i'; } return n; |