aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-14 23:03:25 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-14 23:03:25 +0200
commitfa481efe8e777bae2c2cca3649176d1ac7da6413 (patch)
tree40a32e492594310b490ed1f301817b72f982d617
parent5984a3211cc8a214305b9acbff07b3f3b78cac09 (diff)
Reorganize some things
-rw-r--r--include/unicode/prop.h27
-rw-r--r--lib/unicode/prop/uprop_get_lc.c30
-rw-r--r--lib/unicode/prop/uprop_get_tc.c2
-rw-r--r--lib/unicode/prop/uprop_get_uc.c4
-rw-r--r--lib/unicode/string/u8upper.c11
5 files changed, 38 insertions, 36 deletions
diff --git a/include/unicode/prop.h b/include/unicode/prop.h
index 76d9200..8def75b 100644
--- a/include/unicode/prop.h
+++ b/include/unicode/prop.h
@@ -13,23 +13,28 @@ struct rview {
};
struct lcctx {
- bool az_or_tr : 1; /* Azeri or Turkish */
- bool az_tr_after_I : 1; /* After ‘I’ in Azeri or Turkish */
- bool az_tr_not_before_dot : 1; /* Not before a dot in Azeri or Turkish */
- bool eow : 1; /* End of word */
- bool lt : 1; /* Lithuanian */
- bool lt_acc_after : 1; /* Accent after ‘i’ or ‘j’ in Lithuanian */
+ bool az_or_tr : 1; /* Azeri or Turkish */
+ bool lt : 1; /* Lithuanian */
+
+ bool after_I : 1; /* After ‘I’ */
+ bool before_acc : 1; /* Before accent on ‘i’ or ‘j’ in Lithuanian */
+ bool before_dot : 1; /* Before U+0307 */
+ bool eow : 1; /* End of word */
};
struct tcctx {
- bool az_or_tr : 1; /* Azeri or Turkish */
- bool lt_after_i : 1; /* After ‘i’ in Lithuanian */
+ bool az_or_tr : 1; /* Azeri or Turkish */
+ bool lt : 1; /* Lithuanian */
+
+ bool after_i : 1; /* After ‘i’ */
};
struct ucctx {
- bool az_or_tr : 1; /* Azeri or Turkish */
- bool cap_eszett : 1; /* Use capital eszett */
- bool lt_after_i : 1; /* After ‘i’ in Lithuanian */
+ bool az_or_tr : 1; /* Azeri or Turkish */
+ bool lt : 1; /* Lithuanian */
+
+ bool ẞ : 1; /* Uppercase ‘ß’ into ‘ẞ’ (instead of ‘SS’) */
+ bool after_i : 1; /* After ‘i’ */
};
enum uprop_bpt {
diff --git a/lib/unicode/prop/uprop_get_lc.c b/lib/unicode/prop/uprop_get_lc.c
index 4bbdc12..7d90d8d 100644
--- a/lib/unicode/prop/uprop_get_lc.c
+++ b/lib/unicode/prop/uprop_get_lc.c
@@ -13,18 +13,18 @@ uprop_get_lc(rune ch, struct lcctx ctx)
if (ch == U'İ')
return ctx.az_or_tr ? M('i') : M('i', 0x307);
- if (ctx.lt_acc_after) {
- switch (ch) {
- case 'I':
- return M('i', 0x307);
- case 'J':
- return M('j', 0x307);
- case U'Į':
- return M(U'į', 0x307);
+ if (ctx.lt) {
+ if (ctx.before_acc) {
+ switch (ch) {
+ case 'I':
+ return M('i', 0x307);
+ case 'J':
+ return M('j', 0x307);
+ case U'Į':
+ return M(U'į', 0x307);
+ }
}
- }
- if (ctx.lt) {
switch (ch) {
case U'Ì':
return M('i', 0x307, 0x300);
@@ -35,10 +35,12 @@ uprop_get_lc(rune ch, struct lcctx ctx)
}
}
- if (ch == 0x307 && ctx.az_tr_after_I)
- return M();
- if (ch == 'I' && ctx.az_tr_not_before_dot)
- return M(U'ı');
+ if (ctx.az_or_tr) {
+ if (ch == 0x307 && ctx.after_I)
+ return M();
+ if (ch == 'I' && !ctx.before_dot)
+ return M(U'ı');
+ }
ch = uprop_get_slc(ch);
return M(ch);
diff --git a/lib/unicode/prop/uprop_get_tc.c b/lib/unicode/prop/uprop_get_tc.c
index c4c8070..029ef51 100644
--- a/lib/unicode/prop/uprop_get_tc.c
+++ b/lib/unicode/prop/uprop_get_tc.c
@@ -66,7 +66,7 @@ uprop_get_tc(rune ch, struct tcctx ctx)
{
if (ch == 'i' && ctx.az_or_tr)
return M(U'İ');
- if (ch == 0x307 && ctx.lt_after_i)
+ if (ch == 0x307 && ctx.lt && ctx.after_i)
return M();
rune CH = uprop_get_stc(ch);
diff --git a/lib/unicode/prop/uprop_get_uc.c b/lib/unicode/prop/uprop_get_uc.c
index d3ab274..69435b8 100644
--- a/lib/unicode/prop/uprop_get_uc.c
+++ b/lib/unicode/prop/uprop_get_uc.c
@@ -120,10 +120,10 @@ struct rview
uprop_get_uc(rune ch, struct ucctx ctx)
{
if (ch == U'ß')
- return ctx.cap_eszett ? M(U'ẞ') : M('S', 'S');
+ return ctx.ẞ ? M(U'ẞ') : M('S', 'S');
if (ch == 'i' && ctx.az_or_tr)
return M(U'İ');
- if (ch == 0x307 && ctx.lt_after_i)
+ if (ch == 0x307 && ctx.lt && ctx.after_i)
return M();
rune CH = uprop_get_suc(ch);
diff --git a/lib/unicode/string/u8upper.c b/lib/unicode/string/u8upper.c
index 5ce0e77..6b041f6 100644
--- a/lib/unicode/string/u8upper.c
+++ b/lib/unicode/string/u8upper.c
@@ -8,17 +8,14 @@ u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
{
struct ucctx ctx = {
.az_or_tr = flags & CF_LANG_AZ,
- .cap_eszett = flags & CF_ẞ,
+ .lt = flags & CF_LANG_LT,
+ .ẞ = flags & CF_ẞ,
};
rune ch;
size_t n = 0;
- bool prev_was_i = false;
while (u8next(&ch, &src, &srcn)) {
- if (ch == 0x307 && prev_was_i && (flags & CF_LANG_LT))
- ctx.lt_after_i = true;
-
struct rview rv = uprop_get_uc(ch, ctx);
for (size_t i = 0; i < rv.len; i++) {
if (n >= dstn) {
@@ -27,9 +24,7 @@ u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
} else
n += rtou8(dst + n, dstn - n, rv.p[i]);
}
-
- prev_was_i = ch == 'i';
- ctx.lt_after_i = false;
+ ctx.after_i = ch == 'i';
}
return n;