aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-04 04:36:28 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-04 04:44:41 +0200
commit790aaa14406d45dd95dea3f7d6d00da5911c6584 (patch)
treef6e458cc3cde881893c06cba6cee94325c582112
parent8f7f007a52f39c1e03817d417e95877526945b45 (diff)
Replace u8split() with u8cut()
-rw-r--r--include/mbstring.h6
-rw-r--r--lib/mbstring/u8cut.c19
-rw-r--r--lib/mbstring/u8split.c16
-rw-r--r--test/_brk-test.h28
-rw-r--r--test/_case-test.h6
5 files changed, 32 insertions, 43 deletions
diff --git a/include/mbstring.h b/include/mbstring.h
index ba654bb..d725e0d 100644
--- a/include/mbstring.h
+++ b/include/mbstring.h
@@ -20,12 +20,13 @@
#define u8bytec(x) (((x) & 0xC0) == 0x80)
/* clang-format on */
+constexpr int U8_LEN_MAX = 4;
constexpr rune U8_1B_MAX = 0x00007F;
constexpr rune U8_2B_MAX = 0x0007FF;
constexpr rune U8_3B_MAX = 0x00FFFF;
constexpr rune U8_4B_MAX = 0x10FFFF;
-constexpr int U8_LEN_MAX = 4;
+constexpr rune MBEND = 0x110000;
#define PRIsU8 ".*s"
#define SV_PRI_ARGS(sv) ((int)(sv).len), ((sv).p)
@@ -43,6 +44,7 @@ int u8tor(rune *, const char8_t *);
[[nodiscard]] size_t u8cspn(struct u8view, const rune *, size_t);
[[nodiscard]] size_t u8len(struct u8view);
[[nodiscard]] size_t u8spn(struct u8view, const rune *, size_t);
-struct u8view u8split(struct u8view *, rune);
+rune u8cut(struct u8view *restrict, struct u8view *restrict, const rune *,
+ size_t);
#endif /* !MLIB_MBSTRING_H */
diff --git a/lib/mbstring/u8cut.c b/lib/mbstring/u8cut.c
new file mode 100644
index 0000000..3dd9663
--- /dev/null
+++ b/lib/mbstring/u8cut.c
@@ -0,0 +1,19 @@
+#include "macros.h"
+#include "mbstring.h"
+
+rune
+u8cut(struct u8view *restrict x, struct u8view *restrict y, const rune *seps,
+ size_t n)
+{
+ ASSUME(y != nullptr);
+ ASSUME(seps != nullptr);
+ size_t off = u8cspn(*y, seps, n);
+ if (x != nullptr) {
+ x->p = y->p;
+ x->len = off;
+ }
+ VSHFT(y, off);
+ rune ch = MBEND;
+ u8next(&ch, y);
+ return ch;
+}
diff --git a/lib/mbstring/u8split.c b/lib/mbstring/u8split.c
deleted file mode 100644
index c26f48b..0000000
--- a/lib/mbstring/u8split.c
+++ /dev/null
@@ -1,16 +0,0 @@
-#include "mbstring.h"
-
-struct u8view
-u8split(struct u8view *rhs, rune ch)
-{
- struct u8view lhs = {.p = rhs->p};
- if ((rhs->p = u8chr(*rhs, ch)) == nullptr) {
- lhs.len = rhs->len;
- rhs->len = 0;
- } else {
- lhs.len = rhs->p - lhs.p;
- rhs->len -= lhs.len;
- u8next(nullptr, rhs);
- }
- return lhs;
-}
diff --git a/test/_brk-test.h b/test/_brk-test.h
index e468f23..3a66f23 100644
--- a/test/_brk-test.h
+++ b/test/_brk-test.h
@@ -19,7 +19,6 @@
#define CNTFUNC CONCAT(CONCAT(u8, BRKTYPE), cnt)
static bool test(struct u8view, int);
-static int hexdigits(rune);
int
main(int, char **argv)
@@ -59,25 +58,21 @@ test(struct u8view sv, int id)
typedef dynarr(char8_t) item;
dynarr(item) items = {};
- struct u8view sv_cpy = sv;
-
- do {
- rune op, ch;
- u8next(&op, &sv_cpy);
+ rune op;
+ struct u8view sv_cpy = sv;
+ while ((op = u8cut(nullptr, &sv_cpy, U"×÷", 2)) != MBEND) {
+ rune ch;
sscanf(sv_cpy.p, "%" SCNxRUNE, &ch);
- int off = hexdigits(ch);
- off = MAX(4, off);
- VSHFT(&sv_cpy, off);
- char8_t buf[U8_LEN_MAX] = {};
+ char8_t buf[U8_LEN_MAX];
int w = rtou8(buf, sizeof(buf), ch);
total += w;
if (op == U'÷')
DAPUSH(&items, (item){});
DAEXTEND(&items.buf[items.len - 1], buf, w);
- } while (sv_cpy.len > 0);
+ }
size_t off = 0;
char8_t *p = bufalloc(nullptr, 1, total);
@@ -114,14 +109,3 @@ test(struct u8view sv, int id)
return true;
}
-
-int
-hexdigits(rune ch)
-{
- int n = 0;
- do {
- ch /= 16;
- n++;
- } while (ch != 0);
- return n;
-}
diff --git a/test/_case-test.h b/test/_case-test.h
index 12e3a57..c594554 100644
--- a/test/_case-test.h
+++ b/test/_case-test.h
@@ -54,9 +54,9 @@ test(const char8_t *line, int id)
{
struct u8view sv = {line, strlen(line)};
struct u8view before, after, flags;
- before = u8split(&sv, ';');
- after = u8split(&sv, ';');
- flags = u8split(&sv, ';');
+ u8cut(&before, &sv, U";", 1);
+ u8cut(&after, &sv, U";", 1);
+ u8cut(&flags, &sv, U";", 1);
enum caseflags cf = u8eq(flags, U8("ẞ")) ? CF_ẞ
: u8eq(flags, U8("AZ")) ? CF_LANG_AZ