diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-05-04 04:36:28 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-05-04 04:44:41 +0200 |
commit | 790aaa14406d45dd95dea3f7d6d00da5911c6584 (patch) | |
tree | f6e458cc3cde881893c06cba6cee94325c582112 | |
parent | 8f7f007a52f39c1e03817d417e95877526945b45 (diff) |
Replace u8split() with u8cut()
-rw-r--r-- | include/mbstring.h | 6 | ||||
-rw-r--r-- | lib/mbstring/u8cut.c | 19 | ||||
-rw-r--r-- | lib/mbstring/u8split.c | 16 | ||||
-rw-r--r-- | test/_brk-test.h | 28 | ||||
-rw-r--r-- | test/_case-test.h | 6 |
5 files changed, 32 insertions, 43 deletions
diff --git a/include/mbstring.h b/include/mbstring.h index ba654bb..d725e0d 100644 --- a/include/mbstring.h +++ b/include/mbstring.h @@ -20,12 +20,13 @@ #define u8bytec(x) (((x) & 0xC0) == 0x80) /* clang-format on */ +constexpr int U8_LEN_MAX = 4; constexpr rune U8_1B_MAX = 0x00007F; constexpr rune U8_2B_MAX = 0x0007FF; constexpr rune U8_3B_MAX = 0x00FFFF; constexpr rune U8_4B_MAX = 0x10FFFF; -constexpr int U8_LEN_MAX = 4; +constexpr rune MBEND = 0x110000; #define PRIsU8 ".*s" #define SV_PRI_ARGS(sv) ((int)(sv).len), ((sv).p) @@ -43,6 +44,7 @@ int u8tor(rune *, const char8_t *); [[nodiscard]] size_t u8cspn(struct u8view, const rune *, size_t); [[nodiscard]] size_t u8len(struct u8view); [[nodiscard]] size_t u8spn(struct u8view, const rune *, size_t); -struct u8view u8split(struct u8view *, rune); +rune u8cut(struct u8view *restrict, struct u8view *restrict, const rune *, + size_t); #endif /* !MLIB_MBSTRING_H */ diff --git a/lib/mbstring/u8cut.c b/lib/mbstring/u8cut.c new file mode 100644 index 0000000..3dd9663 --- /dev/null +++ b/lib/mbstring/u8cut.c @@ -0,0 +1,19 @@ +#include "macros.h" +#include "mbstring.h" + +rune +u8cut(struct u8view *restrict x, struct u8view *restrict y, const rune *seps, + size_t n) +{ + ASSUME(y != nullptr); + ASSUME(seps != nullptr); + size_t off = u8cspn(*y, seps, n); + if (x != nullptr) { + x->p = y->p; + x->len = off; + } + VSHFT(y, off); + rune ch = MBEND; + u8next(&ch, y); + return ch; +} diff --git a/lib/mbstring/u8split.c b/lib/mbstring/u8split.c deleted file mode 100644 index c26f48b..0000000 --- a/lib/mbstring/u8split.c +++ /dev/null @@ -1,16 +0,0 @@ -#include "mbstring.h" - -struct u8view -u8split(struct u8view *rhs, rune ch) -{ - struct u8view lhs = {.p = rhs->p}; - if ((rhs->p = u8chr(*rhs, ch)) == nullptr) { - lhs.len = rhs->len; - rhs->len = 0; - } else { - lhs.len = rhs->p - lhs.p; - rhs->len -= lhs.len; - u8next(nullptr, rhs); - } - return lhs; -} diff --git a/test/_brk-test.h b/test/_brk-test.h index e468f23..3a66f23 100644 --- a/test/_brk-test.h +++ b/test/_brk-test.h @@ -19,7 +19,6 @@ #define CNTFUNC CONCAT(CONCAT(u8, BRKTYPE), cnt) static bool test(struct u8view, int); -static int hexdigits(rune); int main(int, char **argv) @@ -59,25 +58,21 @@ test(struct u8view sv, int id) typedef dynarr(char8_t) item; dynarr(item) items = {}; - struct u8view sv_cpy = sv; - - do { - rune op, ch; - u8next(&op, &sv_cpy); + rune op; + struct u8view sv_cpy = sv; + while ((op = u8cut(nullptr, &sv_cpy, U"×÷", 2)) != MBEND) { + rune ch; sscanf(sv_cpy.p, "%" SCNxRUNE, &ch); - int off = hexdigits(ch); - off = MAX(4, off); - VSHFT(&sv_cpy, off); - char8_t buf[U8_LEN_MAX] = {}; + char8_t buf[U8_LEN_MAX]; int w = rtou8(buf, sizeof(buf), ch); total += w; if (op == U'÷') DAPUSH(&items, (item){}); DAEXTEND(&items.buf[items.len - 1], buf, w); - } while (sv_cpy.len > 0); + } size_t off = 0; char8_t *p = bufalloc(nullptr, 1, total); @@ -114,14 +109,3 @@ test(struct u8view sv, int id) return true; } - -int -hexdigits(rune ch) -{ - int n = 0; - do { - ch /= 16; - n++; - } while (ch != 0); - return n; -} diff --git a/test/_case-test.h b/test/_case-test.h index 12e3a57..c594554 100644 --- a/test/_case-test.h +++ b/test/_case-test.h @@ -54,9 +54,9 @@ test(const char8_t *line, int id) { struct u8view sv = {line, strlen(line)}; struct u8view before, after, flags; - before = u8split(&sv, ';'); - after = u8split(&sv, ';'); - flags = u8split(&sv, ';'); + u8cut(&before, &sv, U";", 1); + u8cut(&after, &sv, U";", 1); + u8cut(&flags, &sv, U";", 1); enum caseflags cf = u8eq(flags, U8("ẞ")) ? CF_ẞ : u8eq(flags, U8("AZ")) ? CF_LANG_AZ |