aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-14 23:59:05 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-14 23:59:05 +0200
commit1aeb7e2b426e7a94cdd4f83c4337f44c0f5a2ca8 (patch)
treeb80d1751bebed6dd39c34fa0a1b832c714e57292
parenta624e3343e1183aa0f2d4b05afea50eef348651a (diff)
Add encoding-generic macros
-rw-r--r--include/_uNview.h (renamed from include/_u8view.h)10
-rw-r--r--include/cli.h2
-rw-r--r--include/macros.h2
-rw-r--r--include/mbstring.h32
-rw-r--r--include/unicode/prop.h2
-rw-r--r--include/unicode/string.h31
-rw-r--r--lib/cli/optparse.c4
-rw-r--r--test/_brk-test.h10
-rw-r--r--test/_case-test.h22
-rw-r--r--test/norm-nfd-test.c10
-rw-r--r--test/wbrk-human-test.c10
11 files changed, 98 insertions, 37 deletions
diff --git a/include/_u8view.h b/include/_uNview.h
index 5d6a9b8..9d0d5e2 100644
--- a/include/_u8view.h
+++ b/include/_uNview.h
@@ -10,4 +10,14 @@ struct u8view {
size_t len;
};
+struct u16view {
+ const char16_t *p;
+ size_t len;
+};
+
+struct u32view {
+ const char32_t *p;
+ size_t len;
+};
+
#endif /* !MLIB__U8VIEW_H */
diff --git a/include/cli.h b/include/cli.h
index 7b5b611..b2545ae 100644
--- a/include/cli.h
+++ b/include/cli.h
@@ -5,7 +5,7 @@
#include "_attrs.h"
#include "_rune.h"
-#include "_u8view.h"
+#include "_uNview.h"
struct optparser {
bool _b;
diff --git a/include/macros.h b/include/macros.h
index 32e8b7c..5c38215 100644
--- a/include/macros.h
+++ b/include/macros.h
@@ -11,6 +11,8 @@
#define streq(x, y) (!strcmp((x), (y)))
#define u8eq(x, y) (!u8cmp((x), (y)))
+#define ucseq(lhs, rhs) (!_Generic((lhs), struct u8view: u8cmp)((lhs), (rhs)))
+
#define _MLIB_STR(s) #s
#define _MLIB_CONCAT(x, y) x##y
diff --git a/include/mbstring.h b/include/mbstring.h
index d725e0d..947195f 100644
--- a/include/mbstring.h
+++ b/include/mbstring.h
@@ -5,10 +5,14 @@
#include "_charN_t.h"
#include "_rune.h"
-#include "_u8view.h"
+#include "_uNview.h"
-#define U8(...) \
+#define U8(...) \
((struct u8view){__VA_OPT__(u8##__VA_ARGS__, sizeof(u8##__VA_ARGS__) - 1)})
+#define U16(...) \
+ ((struct u16view){__VA_OPT__(u##__VA_ARGS__, sizeof(u##__VA_ARGS__) - 1)})
+#define U32(...) \
+ ((struct u32view){__VA_OPT__(U##__VA_ARGS__, sizeof(U##__VA_ARGS__) - 1)})
#define VSHFT(sv, n) ((sv)->p += (n), (sv)->len -= (n))
@@ -28,7 +32,7 @@ constexpr rune U8_4B_MAX = 0x10FFFF;
constexpr rune MBEND = 0x110000;
-#define PRIsU8 ".*s"
+#define PRIsSV ".*s"
#define SV_PRI_ARGS(sv) ((int)(sv).len), ((sv).p)
int rtou8(char8_t *, size_t, rune);
@@ -47,4 +51,26 @@ int u8tor(rune *, const char8_t *);
rune u8cut(struct u8view *restrict, struct u8view *restrict, const rune *,
size_t);
+/* Encoding-generic macros */
+#define rtoucs(buf, bufsz, ch) \
+ _Generic((buf), char8_t *: rtou8)((buf), (bufsz), (ch))
+#define ucsnext(ch, sv) _Generic((sv), struct u8view: u8next)((ch), (sv))
+#define ucsprev(ch, sv, start) \
+ _Generic((sv), const char8_t **: u8prev)((ch), (sv), (start))
+#define ucstor(ch, p) \
+ _Generic((p), char8_t *: u8tor, const char8_t *: u8tor)((ch), (p))
+#define ucshaspfx(sv, pfx) _Generic((sv), struct u8view: u8haspfx)((sv), (pfx))
+#define ucshassfx(sv, sfx) _Generic((sv), struct u8view: u8hassfx)((sv), (sfx))
+#define ucschk(sv) _Generic((sv), struct u8view: u8chk)((sv))
+#define ucschr(sv, ch) _Generic((sv), struct u8view: u8chr)((sv), (ch))
+#define ucsrchr(sv, ch) _Generic((sv), struct u8view: u8rchr)((sv), (ch))
+#define ucscmp(lhs, rhs) _Generic((lhs), struct u8view: u8cmp)((lhs), (rhs))
+#define ucscspn(sv, delims, ndelims) \
+ _Generic((sv), struct u8view: u8cspn)((sv), (delims), (ndelims))
+#define ucslen(sv) _Generic((sv), struct u8view: u8len)((sv))
+#define ucsspn(sv, delims, ndelims) \
+ _Generic((sv), struct u8view: u8spn)((sv), (delims), (ndelims))
+#define ucscut(x, y, seps, nseps) \
+ _Generic((y), struct u8view *: u8cut)(x, y, seps, nseps)
+
#endif /* !MLIB_MBSTRING_H */
diff --git a/include/unicode/prop.h b/include/unicode/prop.h
index 0fbd479..422fa58 100644
--- a/include/unicode/prop.h
+++ b/include/unicode/prop.h
@@ -6,7 +6,7 @@
#include "_attrs.h"
#include "_rune.h"
-#include "_u8view.h"
+#include "_uNview.h"
struct rview {
const rune *p;
diff --git a/include/unicode/string.h b/include/unicode/string.h
index 0c7ef79..a5b1cdb 100644
--- a/include/unicode/string.h
+++ b/include/unicode/string.h
@@ -5,7 +5,7 @@
#include "_alloc_fn.h"
#include "_charN_t.h"
-#include "_u8view.h"
+#include "_uNview.h"
/* clang-format off */
@@ -23,11 +23,9 @@ enum [[clang::flag_enum]] caseflags {
[[nodiscard]] size_t u8gcnt(struct u8view);
[[nodiscard]] size_t u8wcnt(struct u8view);
[[nodiscard]] size_t u8wcnt_human(struct u8view);
-
size_t u8gnext(struct u8view *, struct u8view *);
size_t u8wnext(struct u8view *, struct u8view *);
size_t u8wnext_human(struct u8view *, struct u8view *);
-
[[nodiscard]] char8_t *u8casefold(size_t *, struct u8view, enum caseflags,
alloc_fn, void *);
[[nodiscard]] char8_t *u8lower(size_t *, struct u8view, enum caseflags,
@@ -36,9 +34,34 @@ size_t u8wnext_human(struct u8view *, struct u8view *);
alloc_fn, void *);
[[nodiscard]] char8_t *u8upper(size_t *, struct u8view, enum caseflags,
alloc_fn, void *);
-
+[[nodiscard]] char8_t *u8norm_nfc(size_t *, struct u8view, alloc_fn, void *);
[[nodiscard]] char8_t *u8norm_nfd(size_t *, struct u8view, alloc_fn, void *);
+/* Encoding-generic macros */
+#define ucsgcnt(sv) _Generic((sv), struct u8view: u8gcnt)((sv))
+#define ucswcnt(sv) _Generic((sv), struct u8view: u8wcnt)((sv))
+#define ucswcnt_human(sv) _Generic((sv), struct u8view: u8wcnt_human)((sv))
+#define ucsgnext(g, sv) _Generic((sv), struct u8view *: u8gnext)((g), (sv))
+#define ucswnext(g, sv) _Generic((sv), struct u8view *: u8wnext)((g), (sv))
+#define ucswnext_human(g, sv) \
+ _Generic((sv), struct u8view *: u8wnext_human)((g), (sv))
+#define ucscasefold(dstn, sv, flags, alloc, ctx) \
+ _Generic((sv), struct u8view: u8casefold)((dstn), (sv), (flags), (alloc), \
+ (ctx))
+#define ucslower(dstn, sv, flags, alloc, ctx) \
+ _Generic((sv), struct u8view: u8lower)((dstn), (sv), (flags), (alloc), \
+ (ctx))
+#define ucstitle(dstn, sv, flags, alloc, ctx) \
+ _Generic((sv), struct u8view: u8title)((dstn), (sv), (flags), (alloc), \
+ (ctx))
+#define ucsupper(dstn, sv, flags, alloc, ctx) \
+ _Generic((sv), struct u8view: u8upper)((dstn), (sv), (flags), (alloc), \
+ (ctx))
+#define ucsnorm_nfc(dstn, sv, alloc, ctx) \
+ _Generic((sv), struct u8view: u8norm_nfc)((dstn), (sv), (alloc), (ctx))
+#define ucsnorm_nfd(dstn, sv, alloc, ctx) \
+ _Generic((sv), struct u8view: u8norm_nfd)((dstn), (sv), (alloc), (ctx))
+
constexpr double U8CASEFOLD_SCALE = 3;
constexpr double U8LOWER_SCALE = 1.5;
constexpr double U8LOWER_SCALE_LT = 3;
diff --git a/lib/cli/optparse.c b/lib/cli/optparse.c
index ce688cf..7134b37 100644
--- a/lib/cli/optparse.c
+++ b/lib/cli/optparse.c
@@ -104,8 +104,8 @@ rune
shortopt(struct optparser *st, const struct cli_option *opts, size_t nopts)
{
rune ch;
- const char *opt = st->_argv[st->optind];
- st->_subopt += u8tor(&ch, opt + st->_subopt + 1);
+ const char8_t *opt = st->_argv[st->optind];
+ st->_subopt += ucstor(&ch, opt + st->_subopt + 1);
if (ch == '\0') {
st->_subopt = 0;
st->optind++;
diff --git a/test/_brk-test.h b/test/_brk-test.h
index 396138b..21a6a2b 100644
--- a/test/_brk-test.h
+++ b/test/_brk-test.h
@@ -15,8 +15,8 @@
#include <unicode/string.h>
#define TESTFILE STR(BRKTYPE) "brk.in"
-#define ITERFUNC CONCAT(CONCAT(u8, BRKTYPE), next)
-#define CNTFUNC CONCAT(CONCAT(u8, BRKTYPE), cnt)
+#define ITERFUNC CONCAT(CONCAT(ucs, BRKTYPE), next)
+#define CNTFUNC CONCAT(CONCAT(ucs, BRKTYPE), cnt)
static bool test(struct u8view, int);
@@ -68,12 +68,12 @@ test(struct u8view sv, int id)
rune op;
struct u8view sv_cpy = sv;
- while ((op = u8cut(nullptr, &sv_cpy, U"×÷", 2)) != MBEND) {
+ while ((op = ucscut(nullptr, &sv_cpy, U"×÷", 2)) != MBEND) {
rune ch;
sscanf(sv_cpy.p, "%" SCNxRUNE, &ch);
char8_t buf[U8_LEN_MAX];
- int w = rtou8(buf, sizeof(buf), ch);
+ int w = rtoucs(buf, sizeof(buf), ch);
total += w;
if (op == U'÷')
@@ -103,7 +103,7 @@ test(struct u8view sv, int id)
struct u8view it1, buf_cpy = buf;
for (size_t i = 0; ITERFUNC(&it1, &buf_cpy); i++) {
item it2 = items.buf[i];
- if (!u8eq(it1, ((struct u8view){it2.buf, it2.len}))) {
+ if (!ucseq(it1, ((struct u8view){it2.buf, it2.len}))) {
warn("case %d: expected %s ‘%.*s’ but got ‘%.*s’", id,
STR(BRKTYPE_LONG), (int)it2.len, it2.buf, SV_PRI_ARGS(it1));
rv = false;
diff --git a/test/_case-test.h b/test/_case-test.h
index 701d884..24a18f1 100644
--- a/test/_case-test.h
+++ b/test/_case-test.h
@@ -14,7 +14,7 @@
#include <unicode/string.h>
#define TESTFILE STR(CASETYPE) ".in"
-#define FUNC CONCAT(u8, CASETYPE)
+#define FUNC CONCAT(ucs, CASETYPE)
static bool test(const char8_t *, int);
@@ -54,22 +54,22 @@ test(const char8_t *line, int id)
{
struct u8view mapped, sv = {line, strlen(line)};
struct u8view before, after, flags;
- u8cut(&before, &sv, U";", 1);
- u8cut(&after, &sv, U";", 1);
- u8cut(&flags, &sv, U";", 1);
+ ucscut(&before, &sv, U";", 1);
+ ucscut(&after, &sv, U";", 1);
+ ucscut(&flags, &sv, U";", 1);
- enum caseflags cf = u8eq(flags, U8("ẞ")) ? CF_ẞ
- : u8eq(flags, U8("AZ")) ? CF_LANG_AZ
- : u8eq(flags, U8("LT")) ? CF_LANG_LT
- : u8eq(flags, U8("NL")) ? CF_LANG_NL
+ enum caseflags cf = ucseq(flags, U8("ẞ")) ? CF_ẞ
+ : ucseq(flags, U8("AZ")) ? CF_LANG_AZ
+ : ucseq(flags, U8("LT")) ? CF_LANG_LT
+ : ucseq(flags, U8("NL")) ? CF_LANG_NL
: 0;
arena a = mkarena(0);
- mapped.p = FUNC(&mapped.len, before, cf, alloc_arena, &(struct arena_ctx){
+ mapped.p = FUNC(&mapped.len, before, cf, alloc_arena, &((struct arena_ctx){
.a = &a,
- });
+ }));
- if (!u8eq(mapped, after)) {
+ if (!ucseq(mapped, after)) {
warn("case %d: expected ‘%.*s’ but got ‘%.*s’", id, SV_PRI_ARGS(after),
SV_PRI_ARGS(mapped));
arena_free(&a);
diff --git a/test/norm-nfd-test.c b/test/norm-nfd-test.c
index 02fde47..95bc8d5 100644
--- a/test/norm-nfd-test.c
+++ b/test/norm-nfd-test.c
@@ -60,7 +60,7 @@ test(struct u8view sv, int id)
};
struct u8view column;
- while (u8cut(&column, &sv, U";", 1) != MBEND) {
+ while (ucscut(&column, &sv, U";", 1) != MBEND) {
dynarr(char8_t) s = {
.alloc = alloc_arena,
.ctx = &ctx,
@@ -70,10 +70,10 @@ test(struct u8view sv, int id)
struct u8view cp;
do {
rune ch;
- _ = u8cut(&cp, &column, U" ", 1);
+ _ = ucscut(&cp, &column, U" ", 1);
sscanf(cp.p, "%" SCNxRUNE, &ch);
char8_t buf[U8_LEN_MAX];
- int w = rtou8(buf, sizeof(buf), ch);
+ int w = rtoucs(buf, sizeof(buf), ch);
DAEXTEND(&s, buf, w);
} while (_ != MBEND);
@@ -83,8 +83,8 @@ test(struct u8view sv, int id)
for (size_t i = 0; i < 5; i++) {
size_t base = i < 3 ? 2 : 4;
struct u8view normd = {};
- normd.p = u8norm_nfd(&normd.len, columns.buf[i], alloc_arena, &ctx);
- if (!u8eq(columns.buf[base], normd)) {
+ normd.p = ucsnorm_nfd(&normd.len, columns.buf[i], alloc_arena, &ctx);
+ if (!ucseq(columns.buf[base], normd)) {
warn("case %d: expected c%zu to be ‘%.*s’ but got ‘%.*s’", id,
i + 1, SV_PRI_ARGS(columns.buf[base]), SV_PRI_ARGS(normd));
rv = false;
diff --git a/test/wbrk-human-test.c b/test/wbrk-human-test.c
index 6f8bf7a..24a3513 100644
--- a/test/wbrk-human-test.c
+++ b/test/wbrk-human-test.c
@@ -47,26 +47,26 @@ bool
test(struct u8view sv, int id)
{
struct u8view src;
- u8cut(&src, &sv, U";", 1);
+ ucscut(&src, &sv, U";", 1);
struct u8view w;
dynarr(struct u8view) ws = {.alloc = alloc_heap};
- while (u8cut(&w, &sv, U"|", 1) != MBEND)
+ while (ucscut(&w, &sv, U"|", 1) != MBEND)
DAPUSH(&ws, w);
if (w.len > 0)
DAPUSH(&ws, w);
/* Assert the word count is correct */
size_t n;
- if ((n = u8wcnt_human(src)) != ws.len) {
+ if ((n = ucswcnt_human(src)) != ws.len) {
warn("case %d: expected %zu words but got %zu", id, ws.len, n);
return false;
}
/* Assert the individual words are correct */
- for (size_t i = 0; u8wnext_human(&w, &src) != 0; i++) {
- if (!u8eq(w, ws.buf[i])) {
+ for (size_t i = 0; ucswnext_human(&w, &src) != 0; i++) {
+ if (!ucseq(w, ws.buf[i])) {
warn("case %d: expected word %zu to be ‘%.*s’ but got ‘%.*s’", id,
i, SV_PRI_ARGS(ws.buf[i]), SV_PRI_ARGS(w));
return false;