From ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sat, 4 May 2024 04:01:45 +0200 Subject: Go all in on string views, and fix manuals --- lib/unicode/string/u8casefold.c | 4 ++-- lib/unicode/string/u8gcnt.c | 4 ++-- lib/unicode/string/u8gnext.c | 13 ++++++------- lib/unicode/string/u8lower.c | 16 ++++++++-------- lib/unicode/string/u8title.c | 22 +++++++++++----------- lib/unicode/string/u8upper.c | 4 ++-- lib/unicode/string/u8wcnt.c | 4 ++-- lib/unicode/string/u8wcnt_human.c | 4 ++-- lib/unicode/string/u8wnext.c | 28 +++++++++++++--------------- lib/unicode/string/u8wnext_human.c | 11 +++++------ 10 files changed, 53 insertions(+), 57 deletions(-) (limited to 'lib/unicode') diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c index 6c0b61d..2ab7c7c 100644 --- a/lib/unicode/string/u8casefold.c +++ b/lib/unicode/string/u8casefold.c @@ -3,13 +3,13 @@ #include "unicode/string.h" size_t -u8casefold(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, +u8casefold(char8_t *restrict dst, size_t dstn, struct u8view sv, enum caseflags flags) { rune ch; size_t n = 0; - while (u8next(&ch, &src, &srcn)) { + while (u8next(&ch, &sv)) { struct rview rv = uprop_get_cf(ch, flags & CF_LANG_AZ); for (size_t i = 0; i < rv.len; i++) { if (n >= dstn) { diff --git a/lib/unicode/string/u8gcnt.c b/lib/unicode/string/u8gcnt.c index 81a0f97..6dfc519 100644 --- a/lib/unicode/string/u8gcnt.c +++ b/lib/unicode/string/u8gcnt.c @@ -1,10 +1,10 @@ #include "unicode/string.h" size_t -u8gcnt(const char8_t *s, size_t n) +u8gcnt(struct u8view sv) { size_t m = 0; - while (u8gnext(nullptr, &s, &n)) + while (u8gnext(nullptr, &sv)) m++; return m; } diff --git a/lib/unicode/string/u8gnext.c b/lib/unicode/string/u8gnext.c index a050bd5..3b0b410 100644 --- a/lib/unicode/string/u8gnext.c +++ b/lib/unicode/string/u8gnext.c @@ -20,17 +20,17 @@ static bool u8isgbrk(rune, rune, struct gbrk_state *); _MLIB_DEFINE_BSEARCH(gbrk_prop, gbrk_prop_tbl, GBP_OTHER) size_t -u8gnext(struct u8view *g, const char8_t **s, size_t *n) +u8gnext(struct u8view *g, struct u8view *sv) { int m; rune ch1; const char8_t *p; struct gbrk_state gs = {0}; - if (*n == 0) + if (sv->len == 0) return 0; - p = *s; + p = sv->p; if (g) g->p = p; p += u8tor(&ch1, p); @@ -38,14 +38,13 @@ u8gnext(struct u8view *g, const char8_t **s, size_t *n) for (;;) { rune ch2; - if ((size_t)(p - *s) >= *n) + if ((size_t)(p - sv->p) >= sv->len) ch2 = 0; else m = u8tor(&ch2, p); if (u8isgbrk(ch1, ch2, &gs)) { - ptrdiff_t d = p - *s; - *n -= d; - *s = p; + ptrdiff_t d = p - sv->p; + VSHFT(sv, d); if (g) g->len = d; return d; diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c index 63fdae4..907077b 100644 --- a/lib/unicode/string/u8lower.c +++ b/lib/unicode/string/u8lower.c @@ -13,7 +13,7 @@ uprop_ccc_0_or_230(rune ch) } size_t -u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, +u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv, enum caseflags flags) { struct lcctx ctx = { @@ -32,21 +32,21 @@ u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, n = before_dot_cnt = more_above_cnt = 0; - while (u8next(&ch, &src, &srcn)) { + while (u8next(&ch, &sv)) { rune next = 0; - if (srcn > 0) - u8tor(&next, src); + if (sv.len > 0) + u8tor(&next, sv.p); if (ctx.az_or_tr || ctx.lt) { if (before_dot_cnt == 0 || more_above_cnt == 0) { rune ch = 0; before_dot_cnt = more_above_cnt = 0; - struct u8view cpy = {src, srcn}; + struct u8view cpy = sv; do { before_dot_cnt++; more_above_cnt++; - } while (u8next(&ch, U8_ARGSP(cpy)) && !uprop_ccc_0_or_230(ch)); + } while (u8next(&ch, &cpy) && !uprop_ccc_0_or_230(ch)); if (ch != COMB_DOT_ABOVE) before_dot_cnt = 0; @@ -60,11 +60,11 @@ u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, if (final_sigma.after == 0) { rune ch = 0; - struct u8view cpy = {src, srcn}; + struct u8view cpy = sv; do final_sigma.after++; - while (u8next(&ch, U8_ARGSP(cpy)) && uprop_is_ci(ch)); + while (u8next(&ch, &cpy) && uprop_is_ci(ch)); if (!uprop_is_cased(ch)) final_sigma.after = 0; diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c index 01e9d2e..f4d9b7e 100644 --- a/lib/unicode/string/u8title.c +++ b/lib/unicode/string/u8title.c @@ -14,7 +14,7 @@ uprop_ccc_0_or_230(rune ch) } size_t -u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, +u8title(char8_t *restrict dst, size_t dstn, struct u8view sv, enum caseflags flags) { struct tcctx ctx_t; @@ -26,7 +26,7 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, rune ch; bool nl_IJ = false; size_t n, before_dot_cnt, more_above_cnt; - struct u8view word = {}, wcpy = {src, srcn}; + struct u8view word = {}, wcpy = sv; struct { bool before; size_t after; @@ -39,9 +39,9 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, n = before_dot_cnt = more_above_cnt = 0; - while (u8next(&ch, &src, &srcn)) { - if (src > word.p + word.len) { - u8wnext(&word, U8_ARGSP(wcpy)); + while (u8next(&ch, &sv)) { + if (sv.p > word.p + word.len) { + u8wnext(&word, &wcpy); ctx_t.after_soft_dotted = false; state = TITLE; } @@ -50,12 +50,12 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, if (before_dot_cnt == 0 || more_above_cnt == 0) { rune ch = 0; before_dot_cnt = more_above_cnt = 0; - struct u8view cpy = {src, srcn}; + struct u8view cpy = sv; do { before_dot_cnt++; more_above_cnt++; - } while (u8next(&ch, U8_ARGSP(cpy)) && !uprop_ccc_0_or_230(ch)); + } while (u8next(&ch, &cpy) && !uprop_ccc_0_or_230(ch)); if (ch != COMB_DOT_ABOVE) before_dot_cnt = 0; @@ -69,11 +69,11 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, if (final_sigma.after == 0) { rune ch = 0; - struct u8view cpy = {src, srcn}; + struct u8view cpy = sv; do final_sigma.after++; - while (u8next(&ch, U8_ARGSP(cpy)) && uprop_is_ci(ch)); + while (u8next(&ch, &cpy) && uprop_is_ci(ch)); if (!uprop_is_cased(ch)) final_sigma.after = 0; @@ -95,8 +95,8 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, if (flags & CF_LANG_NL) { rune next = 0; - if (srcn > 0) - u8tor(&next, src); + if (sv.len > 0) + u8tor(&next, sv.p); nl_IJ = (ch == 'i' || ch == 'I') && (next == 'j' || next == 'J'); } diff --git a/lib/unicode/string/u8upper.c b/lib/unicode/string/u8upper.c index 086a160..6d4026d 100644 --- a/lib/unicode/string/u8upper.c +++ b/lib/unicode/string/u8upper.c @@ -3,7 +3,7 @@ #include "unicode/string.h" size_t -u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, +u8upper(char8_t *restrict dst, size_t dstn, struct u8view sv, enum caseflags flags) { struct ucctx ctx = { @@ -15,7 +15,7 @@ u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, rune ch; size_t n = 0; - while (u8next(&ch, &src, &srcn)) { + while (u8next(&ch, &sv)) { struct rview rv = uprop_get_uc(ch, ctx); for (size_t i = 0; i < rv.len; i++) { if (n >= dstn) { diff --git a/lib/unicode/string/u8wcnt.c b/lib/unicode/string/u8wcnt.c index f1b1742..f71faf5 100644 --- a/lib/unicode/string/u8wcnt.c +++ b/lib/unicode/string/u8wcnt.c @@ -1,10 +1,10 @@ #include "unicode/string.h" size_t -u8wcnt(const char8_t *s, size_t n) +u8wcnt(struct u8view sv) { size_t m = 0; - while (u8wnext(nullptr, &s, &n)) + while (u8wnext(nullptr, &sv)) m++; return m; } diff --git a/lib/unicode/string/u8wcnt_human.c b/lib/unicode/string/u8wcnt_human.c index 6e70398..60e7f95 100644 --- a/lib/unicode/string/u8wcnt_human.c +++ b/lib/unicode/string/u8wcnt_human.c @@ -1,10 +1,10 @@ #include "unicode/string.h" size_t -u8wcnt_human(const char8_t *s, size_t n) +u8wcnt_human(struct u8view sv) { size_t m = 0; - while (u8wnext_human(nullptr, &s, &n)) + while (u8wnext_human(nullptr, &sv)) m++; return m; } diff --git a/lib/unicode/string/u8wnext.c b/lib/unicode/string/u8wnext.c index 5e893c6..6655c5d 100644 --- a/lib/unicode/string/u8wnext.c +++ b/lib/unicode/string/u8wnext.c @@ -29,22 +29,20 @@ static size_t findwbrk(struct u8view); static struct wbrk_state mkwbrkstate(struct u8view); size_t -u8wnext(struct u8view *w, const char8_t **s, size_t *n) +u8wnext(struct u8view *w, struct u8view *sv) { - ASSUME(n != nullptr); - ASSUME(s != nullptr); - ASSUME(*s != nullptr); + ASSUME(sv != nullptr); + ASSUME(sv->p != nullptr); - if (*n == 0) + if (sv->len == 0) return 0; - size_t off = findwbrk((struct u8view){*s, *n}); + size_t off = findwbrk(*sv); if (w != nullptr) - *w = (struct u8view){*s, off}; + *w = (struct u8view){sv->p, off}; - ASSUME(*n >= off); - *s += off; - *n -= off; + ASSUME(sv->len >= off); + VSHFT(sv, off); return off; } @@ -196,13 +194,13 @@ mkwbrkstate(struct u8view sv) rune ch; for (size_t i = 0; - i < lengthof(ws.raw.next) && u8next(&ch, U8_ARGSP(ws.raw_v)) != 0; i++) + i < lengthof(ws.raw.next) && u8next(&ch, &ws.raw_v) != 0; i++) { ws.raw.next[i] = mlib_lookup(ch); } for (size_t i = 0; - i < lengthof(ws.raw.next) && u8next(&ch, U8_ARGSP(ws.skip_v)) != 0;) + i < lengthof(ws.raw.next) && u8next(&ch, &ws.skip_v) != 0;) { ws.skip.next[i] = mlib_lookup(ch); if (!IS_IGNORE(ws.skip.next[i])) @@ -224,10 +222,10 @@ advance(struct wbrk_state *ws) ws->raw.prev[0] = ws->raw.next[0]; ws->raw.next[0] = ws->raw.next[1]; ws->raw.next[1] = - u8next(&ch, U8_ARGSP(ws->raw_v)) != 0 ? mlib_lookup(ch) : WBRK_EOT; + u8next(&ch, &ws->raw_v) != 0 ? mlib_lookup(ch) : WBRK_EOT; /* Increment the midpoint */ - u8next(nullptr, U8_ARGSP(ws->mid_v)); + u8next(nullptr, &ws->mid_v); /* Ignore ignorable properties */ if (!IS_IGNORE(ws->raw.prev[0])) { @@ -237,7 +235,7 @@ advance(struct wbrk_state *ws) ws->ri_parity = ws->ri_parity == 0 && ws->skip.prev[0] == WBRK_RI; do { - if (u8next(&ch, U8_ARGSP(ws->skip_v)) == 0) { + if (u8next(&ch, &ws->skip_v) == 0) { ws->skip.next[1] = WBRK_EOT; break; } diff --git a/lib/unicode/string/u8wnext_human.c b/lib/unicode/string/u8wnext_human.c index d85abf1..953d942 100644 --- a/lib/unicode/string/u8wnext_human.c +++ b/lib/unicode/string/u8wnext_human.c @@ -4,17 +4,16 @@ #include "unicode/string.h" size_t -u8wnext_human(struct u8view *dst, const char8_t **s, size_t *n) +u8wnext_human(struct u8view *dst, struct u8view *sv) { - ASSUME(n != nullptr); - ASSUME(s != nullptr); - ASSUME(*s != nullptr); + ASSUME(sv != nullptr); + ASSUME(sv->p != nullptr); struct u8view w; - while (u8wnext(&w, s, n)) { + while (u8wnext(&w, sv)) { rune ch; struct u8view cpy = w; - while (u8next(&ch, U8_ARGSP(cpy))) { + while (u8next(&ch, &cpy)) { if (uprop_get_gc(ch) & (GC_L | GC_N)) { if (dst != nullptr) *dst = w; -- cgit v1.2.3