Go all in on string views, and fix manuals

author: Thomas Voss <mail@thomasvoss.com> 2024-05-04 04:01:45 +0200
committer: Thomas Voss <mail@thomasvoss.com> 2024-05-04 04:01:45 +0200
commit: ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (patch)
tree: 90250966629653f0462cf17bc0b6f2476fb6d1fc
parent: 8b923ba5e5bb37ea26350b4c1c688b8697706609 (diff)
30 files changed, 213 insertions, 268 deletions
diff --git a/include/_qmacros.h b/include/_qmacros.h
deleted file mode 100644
index d496581..0000000
--- a/include/_qmacros.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef MLIB__QMACROS_H
-#define MLIB__QMACROS_H
-
-/* Macros for qualifier-preserving functions.  These are used to create wrappers
-   around some functions which will return a const-qualified pointer if the
-   input pointer is const-qualified, and a non-const-qualified pointer
-   otherwise.
-
-   The macros are taken from the N3020 proposal for C23. */
-
-/* clang-format off */
-#define _MLIB_PTR_IS_CONST(P) \
-	_Generic(1 ? (P) : (void *)(P), \
-	         const void *: 1, \
-	         default: 0)
-#define _MLIB_STATIC_IF(P, T, E) \
-	_Generic(&(char[!!(P) + 1]){0}, \
-	         char(*)[2]: T, \
-	         char(*)[1]: E)
-#define _MLIB_Q_PTR(T, F, S, ...) \
-	_MLIB_STATIC_IF(_MLIB_PTR_IS_CONST((S)), \
-	                (const T *)(F)(__VA_ARGS__), \
-	                      (T *)(F)(__VA_ARGS__))
-/* clang-format on */
-
-#endif /* !MLIB__QMACROS_H */
diff --git a/include/macros.h b/include/macros.h
index d4b53b4..32e8b7c 100644
--- a/include/macros.h
+++ b/include/macros.h
@@ -7,9 +7,9 @@
 
 #define lengthof(a) (sizeof(a) / sizeof(*(a)))
 
-#define memeq(...) (!memcmp(__VA_ARGS__))
-#define streq(...) (!strcmp(__VA_ARGS__))
-#define u8eq(...)  (!u8cmp(__VA_ARGS__))
+#define memeq(x, y, n) (!memcmp((x), (y), (n)))
+#define streq(x, y)    (!strcmp((x), (y)))
+#define u8eq(x, y)     (!u8cmp((x), (y)))
 
 #define _MLIB_STR(s)       #s
 #define _MLIB_CONCAT(x, y) x##y
diff --git a/include/mbstring.h b/include/mbstring.h
index d908284..ba654bb 100644
--- a/include/mbstring.h
+++ b/include/mbstring.h
@@ -4,14 +4,13 @@
 #include <stddef.h>
 
 #include "_charN_t.h"
-#include "_qmacros.h"
 #include "_rune.h"
 #include "_u8view.h"
 
 #define U8(...) \
 	((struct u8view){__VA_OPT__(u8##__VA_ARGS__, sizeof(u8##__VA_ARGS__) - 1)})
-#define U8_ARGS(s)  ((s).p), ((s).len)
-#define U8_ARGSP(s) (&(s).p), (&(s).len)
+
+#define VSHFT(sv, n) ((sv)->p += (n), (sv)->len -= (n))
 
 /* clang-format off */
 #define u8byte1(x) (((x) & 0x80) == 0x00)
@@ -29,33 +28,21 @@ constexpr rune U8_4B_MAX = 0x10FFFF;
 constexpr int U8_LEN_MAX = 4;
 
 #define PRIsU8          ".*s"
-#define U8_PRI_ARGS(sv) ((int)(sv).len), ((sv).p)
-
-[[nodiscard]] bool u8haspfx(const char8_t *, size_t, const char8_t *, size_t);
-[[nodiscard]] bool u8hassfx(const char8_t *, size_t, const char8_t *, size_t);
-
-[[nodiscard]] char8_t *u8chk(const char8_t *, size_t);
-
-[[nodiscard]] char8_t *u8chr(const char8_t *, size_t, rune);
-[[nodiscard]] char8_t *u8rchr(const char8_t *, size_t, rune);
+#define SV_PRI_ARGS(sv) ((int)(sv).len), ((sv).p)
 
 int rtou8(char8_t *, size_t, rune);
-int u8tor(rune *, const char8_t *);
-
-[[nodiscard]] int u8cmp(const char8_t *, size_t, const char8_t *, size_t);
-
-int u8next(rune *, const char8_t **, size_t *);
+int u8next(rune *, struct u8view *);
 int u8prev(rune *, const char8_t **, const char8_t *);
-
-[[nodiscard]] size_t u8spn(const char8_t *, size_t, const rune *, size_t);
-[[nodiscard]] size_t u8cspn(const char8_t *, size_t, const rune *, size_t);
-
-[[nodiscard]] size_t u8len(const char8_t *, size_t);
-
-struct u8view u8split(const char8_t **, size_t *, rune);
-
-#define u8chk(s, n)      _MLIB_Q_PTR(char8_t, u8chk, (s), (s), (n))
-#define u8chr(s, n, ch)  _MLIB_Q_PTR(char8_t, u8chr, (s), (s), (n), (ch))
-#define u8rchr(s, n, ch) _MLIB_Q_PTR(char8_t, u8rchr, (s), (s), (n), (ch))
+int u8tor(rune *, const char8_t *);
+[[nodiscard]] bool u8haspfx(struct u8view, struct u8view);
+[[nodiscard]] bool u8hassfx(struct u8view, struct u8view);
+[[nodiscard]] const char8_t *u8chk(struct u8view);
+[[nodiscard]] const char8_t *u8chr(struct u8view, rune);
+[[nodiscard]] const char8_t *u8rchr(struct u8view, rune);
+[[nodiscard]] int u8cmp(struct u8view, struct u8view);
+[[nodiscard]] size_t u8cspn(struct u8view, const rune *, size_t);
+[[nodiscard]] size_t u8len(struct u8view);
+[[nodiscard]] size_t u8spn(struct u8view, const rune *, size_t);
+struct u8view u8split(struct u8view *, rune);
 
 #endif /* !MLIB_MBSTRING_H */
diff --git a/include/unicode/string.h b/include/unicode/string.h
index 0ae49f0..bb8cafd 100644
--- a/include/unicode/string.h
+++ b/include/unicode/string.h
@@ -21,22 +21,22 @@ enum [[clang::flag_enum]] caseflags {
 
 /* clang-format on */
 
-[[nodiscard]] size_t u8gcnt(const char8_t *, size_t);
-[[nodiscard]] size_t u8wcnt(const char8_t *, size_t);
-[[nodiscard]] size_t u8wcnt_human(const char8_t *, size_t);
-
-size_t u8gnext(struct u8view *, const char8_t **, size_t *);
-size_t u8wnext(struct u8view *, const char8_t **, size_t *);
-size_t u8wnext_human(struct u8view *, const char8_t **, size_t *);
-
-[[mlib_warn_trunc]] size_t u8lower(char8_t *restrict, size_t, const char8_t *,
-                                   size_t, enum caseflags);
-[[mlib_warn_trunc]] size_t u8title(char8_t *restrict, size_t, const char8_t *,
-                                   size_t, enum caseflags);
-[[mlib_warn_trunc]] size_t u8upper(char8_t *restrict, size_t, const char8_t *,
-                                   size_t, enum caseflags);
-[[mlib_warn_trunc]] size_t u8casefold(char8_t *restrict, size_t,
-                                      const char8_t *, size_t, enum caseflags);
+[[nodiscard]] size_t u8gcnt(struct u8view);
+[[nodiscard]] size_t u8wcnt(struct u8view);
+[[nodiscard]] size_t u8wcnt_human(struct u8view);
+
+size_t u8gnext(struct u8view *, struct u8view *);
+size_t u8wnext(struct u8view *, struct u8view *);
+size_t u8wnext_human(struct u8view *, struct u8view *);
+
+[[mlib_warn_trunc]] size_t u8lower(char8_t *restrict, size_t, struct u8view,
+                                   enum caseflags);
+[[mlib_warn_trunc]] size_t u8title(char8_t *restrict, size_t, struct u8view,
+                                   enum caseflags);
+[[mlib_warn_trunc]] size_t u8upper(char8_t *restrict, size_t, struct u8view,
+                                   enum caseflags);
+[[mlib_warn_trunc]] size_t u8casefold(char8_t *restrict, size_t, struct u8view,
+                                      enum caseflags);
 
 constexpr double U8LOWER_SCALE = 1.5;
 constexpr double U8LOWER_SCALE_LT = 3;
diff --git a/lib/mbstring/u8chk.c b/lib/mbstring/u8chk.c
index 2566bac..20c4f3f 100644
--- a/lib/mbstring/u8chk.c
+++ b/lib/mbstring/u8chk.c
@@ -1,17 +1,15 @@
 #include "rune.h"
 #include "mbstring.h"
 
-char8_t *
-(u8chk)(const char8_t *s, size_t n)
+const char8_t *
+u8chk(struct u8view sv)
 {
-	while (n) {
-		rune ch;
-		int m = u8tor(&ch, s);
+	int w;
+	rune ch;
 
+	while (w = u8next(&ch, &sv)) {
 		if (ch == RUNE_ERROR)
-			return (char8_t *)s;
-		n -= m;
-		s += m;
+			return sv.p - w;
 	}
 
 	return nullptr;
diff --git a/lib/mbstring/u8chr.c b/lib/mbstring/u8chr.c
index 395a328..4831695 100644
--- a/lib/mbstring/u8chr.c
+++ b/lib/mbstring/u8chr.c
@@ -31,7 +31,7 @@
        TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
        SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
-static char8_t *
+static const char8_t *
 memmem2(const char8_t *h, size_t k, const char8_t *n)
 {
 	uint16_t hw, nw;
@@ -40,12 +40,12 @@ memmem2(const char8_t *h, size_t k, const char8_t *n)
 
 	for (h += 2, k -= 2; k; k--, hw = hw << 8 | *h++) {
 		if (hw == nw)
-			return (char8_t *)h - 2;
+			return h - 2;
 	}
-	return hw == nw ? (char8_t *)h - 2 : nullptr;
+	return hw == nw ? h - 2 : nullptr;
 }
 
-static char8_t *
+static const char8_t *
 memmem3(const char8_t *h, size_t k, const char8_t *n)
 {
 	uint32_t hw, nw;
@@ -54,12 +54,12 @@ memmem3(const char8_t *h, size_t k, const char8_t *n)
 
 	for (h += 3, k -= 3; k; k--, hw = (hw | *h++) << 8) {
 		if (hw == nw)
-			return (char8_t *)h - 3;
+			return h - 3;
 	}
-	return hw == nw ? (char8_t *)h - 3 : nullptr;
+	return hw == nw ? h - 3 : nullptr;
 }
 
-static char8_t *
+static const char8_t *
 memmem4(const char8_t *h, size_t k, const char8_t *n)
 {
 	uint32_t hw, nw;
@@ -68,28 +68,28 @@ memmem4(const char8_t *h, size_t k, const char8_t *n)
 
 	for (h += 4, k -= 4; k; k--, hw = hw << 8 | *h++) {
 		if (hw == nw)
-			return (char8_t *)h - 4;
+			return h - 4;
 	}
-	return hw == nw ? (char8_t *)h - 4 : nullptr;
+	return hw == nw ? h - 4 : nullptr;
 }
 
-char8_t *
-(u8chr)(const char8_t *s, size_t n, rune ch)
+const char8_t *
+u8chr(struct u8view sv, rune ch)
 {
 	char8_t buf[U8_LEN_MAX];
 	int m = rtou8(buf, sizeof(buf), ch);
 
-	if (n < (size_t)m)
+	if (sv.len < (size_t)m)
 		return nullptr;
 	switch (m) {
 	case 1:
-		return memchr(s, ch, n);
+		return memchr(sv.p, ch, sv.len);
 	case 2:
-		return memmem2(s, n, buf);
+		return memmem2(sv.p, sv.len, buf);
 	case 3:
-		return memmem3(s, n, buf);
+		return memmem3(sv.p, sv.len, buf);
 	case 4:
-		return memmem4(s, n, buf);
+		return memmem4(sv.p, sv.len, buf);
 	}
 
 	unreachable();
diff --git a/lib/mbstring/u8cmp.c b/lib/mbstring/u8cmp.c
index 8bd2400..0059020 100644
--- a/lib/mbstring/u8cmp.c
+++ b/lib/mbstring/u8cmp.c
@@ -3,7 +3,7 @@
 #include "mbstring.h"
 
 int
-u8cmp(const char8_t *x, size_t n, const char8_t *y, size_t m)
+u8cmp(struct u8view x, struct u8view y)
 {
-	return n != m ? (n > m ? +1 : -1) : memcmp(x, y, n);
+	return x.len != y.len ? (x.len > y.len ? +1 : -1) : memcmp(x.p, y.p, x.len);
 }
diff --git a/lib/mbstring/u8cspn.c b/lib/mbstring/u8cspn.c
index 4892de4..827373f 100644
--- a/lib/mbstring/u8cspn.c
+++ b/lib/mbstring/u8cspn.c
@@ -1,13 +1,13 @@
 #include "mbstring.h"
 
 size_t
-u8cspn(const char8_t *s, size_t n, const rune *p, size_t m)
+u8cspn(struct u8view sv, const rune *p, size_t n)
 {
 	rune ch;
-	size_t k, l;
+	size_t k, w;
 
-	for (k = 0; (l = u8next(&ch, &s, &n)); k += l) {
-		for (size_t i = 0; i < m; i++) {
+	for (k = 0; w = u8next(&ch, &sv); k += w) {
+		for (size_t i = 0; i < n; i++) {
 			if (p[i] == ch)
 				goto found;
 		}
diff --git a/lib/mbstring/u8haspfx.c b/lib/mbstring/u8haspfx.c
index b6cea50..c61efbb 100644
--- a/lib/mbstring/u8haspfx.c
+++ b/lib/mbstring/u8haspfx.c
@@ -4,7 +4,7 @@
 #include "mbstring.h"
 
 bool
-u8haspfx(const char8_t *s, size_t n, const char8_t *pfx, size_t m)
+u8haspfx(struct u8view sv, struct u8view pfx)
 {
-	return n >= m && memeq(s, pfx, m);
+	return sv.len >= pfx.len && memeq(sv.p, pfx.p, pfx.len);
 }
diff --git a/lib/mbstring/u8hassfx.c b/lib/mbstring/u8hassfx.c
index e31bb4b..8ea4456 100644
--- a/lib/mbstring/u8hassfx.c
+++ b/lib/mbstring/u8hassfx.c
@@ -4,7 +4,7 @@
 #include "mbstring.h"
 
 bool
-u8hassfx(const char8_t *s, size_t n, const char8_t *sfx, size_t m)
+u8hassfx(struct u8view sv, struct u8view sfx)
 {
-	return n >= m && memeq(s + n - m, sfx, m);
+	return sv.len >= sfx.len && memeq(sv.p + sv.len - sfx.len, sfx.p, sfx.len);
 }
diff --git a/lib/mbstring/u8len.c b/lib/mbstring/u8len.c
index 217ab66..23c55c5 100644
--- a/lib/mbstring/u8len.c
+++ b/lib/mbstring/u8len.c
@@ -1,10 +1,10 @@
 #include "mbstring.h"
 
 size_t
-u8len(const char8_t *s, size_t n)
+u8len(struct u8view sv)
 {
 	size_t m = 0;
-	while (u8next(nullptr, &s, &n))
+	while (u8next(nullptr, &sv))
 		m++;
 	return m;
 }
diff --git a/lib/mbstring/u8next.c b/lib/mbstring/u8next.c
index 82d2ad7..518de49 100644
--- a/lib/mbstring/u8next.c
+++ b/lib/mbstring/u8next.c
@@ -1,16 +1,15 @@
 #include "mbstring.h"
 
 int
-u8next(rune *ch, const char8_t **s, size_t *n)
+u8next(rune *ch, struct u8view *sv)
 {
-	rune _;
-	int m = 0;
+	int n = 0;
 
-	if (*n) {
-		m = u8tor(ch ? ch : &_, *s);
-		*n -= m;
-		*s += m;
+	if (sv->len) {
+		rune _;
+		n = u8tor(ch ? ch : &_, sv->p);
+		VSHFT(sv, n);
 	}
 
-	return m;
+	return n;
 }
diff --git a/lib/mbstring/u8rchr.c b/lib/mbstring/u8rchr.c
index 09aa111..825f8fd 100644
--- a/lib/mbstring/u8rchr.c
+++ b/lib/mbstring/u8rchr.c
@@ -3,17 +3,17 @@
 
 #include "mbstring.h"
 
-static char8_t *
+static const char8_t *
 memrchr1(const char8_t *s, size_t k, const char8_t *n)
 {
 	for (const char8_t *p = s + k - 1; k-- > 0; p--) {
 		if (*p == *n)
-			return (char8_t *)p;
+			return p;
 	}
 	return nullptr;
 }
 
-static char8_t *
+static const char8_t *
 memrchr2(const char8_t *h, size_t k, const char8_t *n)
 {
 	uint16_t hw, nw;
@@ -23,13 +23,13 @@ memrchr2(const char8_t *h, size_t k, const char8_t *n)
 
 	for (H -= 2, k -= 2; k; k--, hw = hw >> 8 | (*H-- << 8)) {
 		if (hw == nw)
-			return (char8_t *)H + 1;
+			return H + 1;
 	}
 
-	return hw == nw ? (char8_t *)H + 1 : nullptr;
+	return hw == nw ? H + 1 : nullptr;
 }
 
-static char8_t *
+static const char8_t *
 memrchr3(const char8_t *h, size_t k, const char8_t *n)
 {
 	uint32_t hw, nw;
@@ -41,13 +41,13 @@ memrchr3(const char8_t *h, size_t k, const char8_t *n)
 	     k--, hw = (hw >> 8 | (*H-- << 24)) & UINT32_C(0xFFFFFF00))
 	{
 		if (hw == nw)
-			return (char8_t *)H + 1;
+			return H + 1;
 	}
 
-	return hw == nw ? (char8_t *)H + 1 : nullptr;
+	return hw == nw ? H + 1 : nullptr;
 }
 
-static char8_t *
+static const char8_t *
 memrchr4(const char8_t *h, size_t k, const char8_t *n)
 {
 	uint32_t hw, nw;
@@ -57,29 +57,29 @@ memrchr4(const char8_t *h, size_t k, const char8_t *n)
 
 	for (H -= 4, k -= 4; k; k--, hw = hw >> 8 | (*H-- << 24)) {
 		if (hw == nw)
-			return (char8_t *)H + 1;
+			return H + 1;
 	}
 
-	return hw == nw ? (char8_t *)H + 1 : nullptr;
+	return hw == nw ? H + 1 : nullptr;
 }
 
-char8_t *
-(u8rchr)(const char8_t *s, size_t n, rune ch)
+const char8_t *
+u8rchr(struct u8view sv, rune ch)
 {
 	char8_t buf[U8_LEN_MAX];
-	int m = rtou8(buf, ch, sizeof(buf));
+	int n = rtou8(buf, ch, sizeof(buf));
 
-	if (n < (size_t)m)
+	if (sv.len < (size_t)n)
 		return nullptr;
-	switch (m) {
+	switch (n) {
 	case 1:
-		return (char8_t *)memrchr1(s, n, buf);
+		return memrchr1(sv.p, sv.len, buf);
 	case 2:
-		return (char8_t *)memrchr2(s, n, buf);
+		return memrchr2(sv.p, sv.len, buf);
 	case 3:
-		return (char8_t *)memrchr3(s, n, buf);
+		return memrchr3(sv.p, sv.len, buf);
 	case 4:
-		return (char8_t *)memrchr4(s, n, buf);
+		return memrchr4(sv.p, sv.len, buf);
 	}
 
 	unreachable();
diff --git a/lib/mbstring/u8split.c b/lib/mbstring/u8split.c
index 5ee3bc0..c26f48b 100644
--- a/lib/mbstring/u8split.c
+++ b/lib/mbstring/u8split.c
@@ -1,16 +1,16 @@
 #include "mbstring.h"
 
 struct u8view
-u8split(const char8_t **p, size_t *n, rune ch)
+u8split(struct u8view *rhs, rune ch)
 {
-	struct u8view lhs = {.p = *p};
-	if ((*p = u8chr(*p, *n, ch)) == nullptr) {
-		lhs.len = *n;
-		*n = 0;
+	struct u8view lhs = {.p = rhs->p};
+	if ((rhs->p = u8chr(*rhs, ch)) == nullptr) {
+		lhs.len = rhs->len;
+		rhs->len = 0;
 	} else {
-		lhs.len = *p - lhs.p;
-		*n -= lhs.len;
-		u8next(nullptr, p, n);
+		lhs.len = rhs->p - lhs.p;
+		rhs->len -= lhs.len;
+		u8next(nullptr, rhs);
 	}
 	return lhs;
 }
diff --git a/lib/mbstring/u8spn.c b/lib/mbstring/u8spn.c
index 1cf45f2..d41fcbc 100644
--- a/lib/mbstring/u8spn.c
+++ b/lib/mbstring/u8spn.c
@@ -1,15 +1,15 @@
 #include "mbstring.h"
 
 size_t
-u8spn(const char8_t *s, size_t n, const rune *p, size_t m)
+u8spn(struct u8view sv, const rune *p, size_t n)
 {
 	rune ch;
-	size_t k = 0, l;
+	size_t k = 0, w;
 
-	while ((l = u8next(&ch, &s, &n))) {
-		for (size_t i = 0; i < m; i++) {
+	while (w = u8next(&ch, &sv)) {
+		for (size_t i = 0; i < n; i++) {
 			if (p[i] == ch) {
-				k += l;
+				k += w;
 				goto found;
 			}
 		}
diff --git a/lib/optparse/optparse.c b/lib/optparse/optparse.c
index 407fa62..757dd47 100644
--- a/lib/optparse/optparse.c
+++ b/lib/optparse/optparse.c
@@ -44,11 +44,10 @@ optparse(struct optparse *st, const struct op_option *opts, size_t nopts)
 	st->optind++;
 
 	/* Skip ‘--’ */
-	opt.p += 2;
-	opt.len -= 2;
+	VSHFT(&opt, 2);
 
 	const struct op_option *o = nullptr;
-	const char8_t *eq_p = u8chr(opt.p, '=', opt.len);
+	const char8_t *eq_p = u8chr(opt, '=');
 	struct u8view opt_no_eq = {
 		.p = opt.p,
 		.len = eq_p == nullptr ? opt.len : (size_t)(eq_p - opt.p),
@@ -56,7 +55,7 @@ optparse(struct optparse *st, const struct op_option *opts, size_t nopts)
 
 	for (size_t i = 0; i < nopts; i++) {
 		struct u8view lo = opts[i].longopt;
-		if (lo.p == nullptr || !u8haspfx(U8_ARGS(lo), U8_ARGS(opt_no_eq)))
+		if (lo.p == nullptr || !u8haspfx(lo, opt_no_eq))
 			continue;
 		if (o != nullptr)
 			return error(st, OPT_MSG_INVALID, opt_no_eq);
@@ -146,7 +145,7 @@ rune
 error_s(struct optparse *st, const char *msg, struct u8view s)
 {
 	snprintf(st->errmsg, sizeof(st->errmsg), u8"%s — ‘%.*s’", msg,
-	         U8_PRI_ARGS(s));
+	         SV_PRI_ARGS(s));
 	return -1;
 }
 
diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c
index 6c0b61d..2ab7c7c 100644
--- a/lib/unicode/string/u8casefold.c
+++ b/lib/unicode/string/u8casefold.c
@@ -3,13 +3,13 @@
 #include "unicode/string.h"
 
 size_t
-u8casefold(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+u8casefold(char8_t *restrict dst, size_t dstn, struct u8view sv,
            enum caseflags flags)
 {
 	rune ch;
 	size_t n = 0;
 
-	while (u8next(&ch, &src, &srcn)) {
+	while (u8next(&ch, &sv)) {
 		struct rview rv = uprop_get_cf(ch, flags & CF_LANG_AZ);
 		for (size_t i = 0; i < rv.len; i++) {
 			if (n >= dstn) {
diff --git a/lib/unicode/string/u8gcnt.c b/lib/unicode/string/u8gcnt.c
index 81a0f97..6dfc519 100644
--- a/lib/unicode/string/u8gcnt.c
+++ b/lib/unicode/string/u8gcnt.c
@@ -1,10 +1,10 @@
 #include "unicode/string.h"
 
 size_t
-u8gcnt(const char8_t *s, size_t n)
+u8gcnt(struct u8view sv)
 {
 	size_t m = 0;
-	while (u8gnext(nullptr, &s, &n))
+	while (u8gnext(nullptr, &sv))
 		m++;
 	return m;
 }
diff --git a/lib/unicode/string/u8gnext.c b/lib/unicode/string/u8gnext.c
index a050bd5..3b0b410 100644
--- a/lib/unicode/string/u8gnext.c
+++ b/lib/unicode/string/u8gnext.c
@@ -20,17 +20,17 @@ static bool u8isgbrk(rune, rune, struct gbrk_state *);
 _MLIB_DEFINE_BSEARCH(gbrk_prop, gbrk_prop_tbl, GBP_OTHER)
 
 size_t
-u8gnext(struct u8view *g, const char8_t **s, size_t *n)
+u8gnext(struct u8view *g, struct u8view *sv)
 {
 	int m;
 	rune ch1;
 	const char8_t *p;
 	struct gbrk_state gs = {0};
 
-	if (*n == 0)
+	if (sv->len == 0)
 		return 0;
 
-	p = *s;
+	p = sv->p;
 	if (g)
 		g->p = p;
 	p += u8tor(&ch1, p);
@@ -38,14 +38,13 @@ u8gnext(struct u8view *g, const char8_t **s, size_t *n)
 	for (;;) {
 		rune ch2;
 
-		if ((size_t)(p - *s) >= *n)
+		if ((size_t)(p - sv->p) >= sv->len)
 			ch2 = 0;
 		else
 			m = u8tor(&ch2, p);
 		if (u8isgbrk(ch1, ch2, &gs)) {
-			ptrdiff_t d = p - *s;
-			*n -= d;
-			*s = p;
+			ptrdiff_t d = p - sv->p;
+			VSHFT(sv, d);
 			if (g)
 				g->len = d;
 			return d;
diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c
index 63fdae4..907077b 100644
--- a/lib/unicode/string/u8lower.c
+++ b/lib/unicode/string/u8lower.c
@@ -13,7 +13,7 @@ uprop_ccc_0_or_230(rune ch)
 }
 
 size_t
-u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
         enum caseflags flags)
 {
 	struct lcctx ctx = {
@@ -32,21 +32,21 @@ u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 	n = before_dot_cnt = more_above_cnt = 0;
 
-	while (u8next(&ch, &src, &srcn)) {
+	while (u8next(&ch, &sv)) {
 		rune next = 0;
-		if (srcn > 0)
-			u8tor(&next, src);
+		if (sv.len > 0)
+			u8tor(&next, sv.p);
 
 		if (ctx.az_or_tr || ctx.lt) {
 			if (before_dot_cnt == 0 || more_above_cnt == 0) {
 				rune ch = 0;
 				before_dot_cnt = more_above_cnt = 0;
-				struct u8view cpy = {src, srcn};
+				struct u8view cpy = sv;
 
 				do {
 					before_dot_cnt++;
 					more_above_cnt++;
-				} while (u8next(&ch, U8_ARGSP(cpy)) && !uprop_ccc_0_or_230(ch));
+				} while (u8next(&ch, &cpy) && !uprop_ccc_0_or_230(ch));
 
 				if (ch != COMB_DOT_ABOVE)
 					before_dot_cnt = 0;
@@ -60,11 +60,11 @@ u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 		if (final_sigma.after == 0) {
 			rune ch = 0;
-			struct u8view cpy = {src, srcn};
+			struct u8view cpy = sv;
 
 			do
 				final_sigma.after++;
-			while (u8next(&ch, U8_ARGSP(cpy)) && uprop_is_ci(ch));
+			while (u8next(&ch, &cpy) && uprop_is_ci(ch));
 
 			if (!uprop_is_cased(ch))
 				final_sigma.after = 0;
diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c
index 01e9d2e..f4d9b7e 100644
--- a/lib/unicode/string/u8title.c
+++ b/lib/unicode/string/u8title.c
@@ -14,7 +14,7 @@ uprop_ccc_0_or_230(rune ch)
 }
 
 size_t
-u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
         enum caseflags flags)
 {
 	struct tcctx ctx_t;
@@ -26,7 +26,7 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 	rune ch;
 	bool nl_IJ = false;
 	size_t n, before_dot_cnt, more_above_cnt;
-	struct u8view word = {}, wcpy = {src, srcn};
+	struct u8view word = {}, wcpy = sv;
 	struct {
 		bool before;
 		size_t after;
@@ -39,9 +39,9 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 	n = before_dot_cnt = more_above_cnt = 0;
 
-	while (u8next(&ch, &src, &srcn)) {
-		if (src > word.p + word.len) {
-			u8wnext(&word, U8_ARGSP(wcpy));
+	while (u8next(&ch, &sv)) {
+		if (sv.p > word.p + word.len) {
+			u8wnext(&word, &wcpy);
 			ctx_t.after_soft_dotted = false;
 			state = TITLE;
 		}
@@ -50,12 +50,12 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 			if (before_dot_cnt == 0 || more_above_cnt == 0) {
 				rune ch = 0;
 				before_dot_cnt = more_above_cnt = 0;
-				struct u8view cpy = {src, srcn};
+				struct u8view cpy = sv;
 
 				do {
 					before_dot_cnt++;
 					more_above_cnt++;
-				} while (u8next(&ch, U8_ARGSP(cpy)) && !uprop_ccc_0_or_230(ch));
+				} while (u8next(&ch, &cpy) && !uprop_ccc_0_or_230(ch));
 
 				if (ch != COMB_DOT_ABOVE)
 					before_dot_cnt = 0;
@@ -69,11 +69,11 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 		if (final_sigma.after == 0) {
 			rune ch = 0;
-			struct u8view cpy = {src, srcn};
+			struct u8view cpy = sv;
 
 			do
 				final_sigma.after++;
-			while (u8next(&ch, U8_ARGSP(cpy)) && uprop_is_ci(ch));
+			while (u8next(&ch, &cpy) && uprop_is_ci(ch));
 
 			if (!uprop_is_cased(ch))
 				final_sigma.after = 0;
@@ -95,8 +95,8 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 			if (flags & CF_LANG_NL) {
 				rune next = 0;
-				if (srcn > 0)
-					u8tor(&next, src);
+				if (sv.len > 0)
+					u8tor(&next, sv.p);
 				nl_IJ =
 					(ch == 'i' || ch == 'I') && (next == 'j' || next == 'J');
 			}
diff --git a/lib/unicode/string/u8upper.c b/lib/unicode/string/u8upper.c
index 086a160..6d4026d 100644
--- a/lib/unicode/string/u8upper.c
+++ b/lib/unicode/string/u8upper.c
@@ -3,7 +3,7 @@
 #include "unicode/string.h"
 
 size_t
-u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+u8upper(char8_t *restrict dst, size_t dstn, struct u8view sv,
         enum caseflags flags)
 {
 	struct ucctx ctx = {
@@ -15,7 +15,7 @@ u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 	rune ch;
 	size_t n = 0;
 
-	while (u8next(&ch, &src, &srcn)) {
+	while (u8next(&ch, &sv)) {
 		struct rview rv = uprop_get_uc(ch, ctx);
 		for (size_t i = 0; i < rv.len; i++) {
 			if (n >= dstn) {
diff --git a/lib/unicode/string/u8wcnt.c b/lib/unicode/string/u8wcnt.c
index f1b1742..f71faf5 100644
--- a/lib/unicode/string/u8wcnt.c
+++ b/lib/unicode/string/u8wcnt.c
@@ -1,10 +1,10 @@
 #include "unicode/string.h"
 
 size_t
-u8wcnt(const char8_t *s, size_t n)
+u8wcnt(struct u8view sv)
 {
 	size_t m = 0;
-	while (u8wnext(nullptr, &s, &n))
+	while (u8wnext(nullptr, &sv))
 		m++;
 	return m;
 }
diff --git a/lib/unicode/string/u8wcnt_human.c b/lib/unicode/string/u8wcnt_human.c
index 6e70398..60e7f95 100644
--- a/lib/unicode/string/u8wcnt_human.c
+++ b/lib/unicode/string/u8wcnt_human.c
@@ -1,10 +1,10 @@
 #include "unicode/string.h"
 
 size_t
-u8wcnt_human(const char8_t *s, size_t n)
+u8wcnt_human(struct u8view sv)
 {
 	size_t m = 0;
-	while (u8wnext_human(nullptr, &s, &n))
+	while (u8wnext_human(nullptr, &sv))
 		m++;
 	return m;
 }
diff --git a/lib/unicode/string/u8wnext.c b/lib/unicode/string/u8wnext.c
index 5e893c6..6655c5d 100644
--- a/lib/unicode/string/u8wnext.c
+++ b/lib/unicode/string/u8wnext.c
@@ -29,22 +29,20 @@ static size_t findwbrk(struct u8view);
 static struct wbrk_state mkwbrkstate(struct u8view);
 
 size_t
-u8wnext(struct u8view *w, const char8_t **s, size_t *n)
+u8wnext(struct u8view *w, struct u8view *sv)
 {
-	ASSUME(n != nullptr);
-	ASSUME(s != nullptr);
-	ASSUME(*s != nullptr);
+	ASSUME(sv != nullptr);
+	ASSUME(sv->p != nullptr);
 
-	if (*n == 0)
+	if (sv->len == 0)
 		return 0;
 
-	size_t off = findwbrk((struct u8view){*s, *n});
+	size_t off = findwbrk(*sv);
 	if (w != nullptr)
-		*w = (struct u8view){*s, off};
+		*w = (struct u8view){sv->p, off};
 
-	ASSUME(*n >= off);
-	*s += off;
-	*n -= off;
+	ASSUME(sv->len >= off);
+	VSHFT(sv, off);
 	return off;
 }
 
@@ -196,13 +194,13 @@ mkwbrkstate(struct u8view sv)
 
 	rune ch;
 	for (size_t i = 0;
-	     i < lengthof(ws.raw.next) && u8next(&ch, U8_ARGSP(ws.raw_v)) != 0; i++)
+	     i < lengthof(ws.raw.next) && u8next(&ch, &ws.raw_v) != 0; i++)
 	{
 		ws.raw.next[i] = mlib_lookup(ch);
 	}
 
 	for (size_t i = 0;
-	     i < lengthof(ws.raw.next) && u8next(&ch, U8_ARGSP(ws.skip_v)) != 0;)
+	     i < lengthof(ws.raw.next) && u8next(&ch, &ws.skip_v) != 0;)
 	{
 		ws.skip.next[i] = mlib_lookup(ch);
 		if (!IS_IGNORE(ws.skip.next[i]))
@@ -224,10 +222,10 @@ advance(struct wbrk_state *ws)
 	ws->raw.prev[0] = ws->raw.next[0];
 	ws->raw.next[0] = ws->raw.next[1];
 	ws->raw.next[1] =
-		u8next(&ch, U8_ARGSP(ws->raw_v)) != 0 ? mlib_lookup(ch) : WBRK_EOT;
+		u8next(&ch, &ws->raw_v) != 0 ? mlib_lookup(ch) : WBRK_EOT;
 
 	/* Increment the midpoint */
-	u8next(nullptr, U8_ARGSP(ws->mid_v));
+	u8next(nullptr, &ws->mid_v);
 
 	/* Ignore ignorable properties */
 	if (!IS_IGNORE(ws->raw.prev[0])) {
@@ -237,7 +235,7 @@ advance(struct wbrk_state *ws)
 		ws->ri_parity = ws->ri_parity == 0 && ws->skip.prev[0] == WBRK_RI;
 
 		do {
-			if (u8next(&ch, U8_ARGSP(ws->skip_v)) == 0) {
+			if (u8next(&ch, &ws->skip_v) == 0) {
 				ws->skip.next[1] = WBRK_EOT;
 				break;
 			}
diff --git a/lib/unicode/string/u8wnext_human.c b/lib/unicode/string/u8wnext_human.c
index d85abf1..953d942 100644
--- a/lib/unicode/string/u8wnext_human.c
+++ b/lib/unicode/string/u8wnext_human.c
@@ -4,17 +4,16 @@
 #include "unicode/string.h"
 
 size_t
-u8wnext_human(struct u8view *dst, const char8_t **s, size_t *n)
+u8wnext_human(struct u8view *dst, struct u8view *sv)
 {
-	ASSUME(n != nullptr);
-	ASSUME(s != nullptr);
-	ASSUME(*s != nullptr);
+	ASSUME(sv != nullptr);
+	ASSUME(sv->p != nullptr);
 
 	struct u8view w;
-	while (u8wnext(&w, s, n)) {
+	while (u8wnext(&w, sv)) {
 		rune ch;
 		struct u8view cpy = w;
-		while (u8next(&ch, U8_ARGSP(cpy))) {
+		while (u8next(&ch, &cpy)) {
 			if (uprop_get_gc(ch) & (GC_L | GC_N)) {
 				if (dst != nullptr)
 					*dst = w;
diff --git a/man/u8len.3 b/man/u8len.3
index f4d152f..5b51cd0 100644
--- a/man/u8len.3
+++ b/man/u8len.3
@@ -1,4 +1,4 @@
-.Dd 27 April 2024
+.Dd 4 May 2024
 .Dt U8LEN 3
 .Os
 .Sh NAME
@@ -9,38 +9,38 @@
 .Sh SYNOPSIS
 .In mbstring.h
 .Ft size_t
-.Fn u8len "const char8_t *s" "size_t n"
+.Fn u8len "struct u8view sv"
 .Sh DESCRIPTION
 The
 .Fn u8len
 function returns the number of UTF-8 encoded Unicode codepoints in the
-buffer
-.Fa s
-of length
-.Fa n
-bytes.
+string view
+.Fa sv .
 .Pp
 Invalid bytes are interpreted as having a length of 1 byte.
 .Sh RETURN VALUES
 The
 .Fn u8len
-function returns the number of codepoints in the buffer
-.Fa s .
+function returns the number of codepoints in the string view
+.Fa sv .
 .Sh EXAMPLES
 The following call to
 .Fn u8len
 will return 17 while the call to
 .Fn strlen
 will return 22 as a result of use of multibyte-characters in
-.Fa s .
+.Fa sv .
 .Bd -literal -offset indent
-struct u8view sv = U8(u8\(dq„Der Große Duden“\(dq);
-size_t blen = strlen((char *)sv.p);
-size_t cplen = u8len(U8_ARGS(sv));
+size_t n;
+struct u8view sv = U8(\(dq„Der Große Duden“\(dq);
+
+n = u8len(sv);            /* 17 */
+n = strlen((char *)sv.p); /* 22 */
 .Ed
 .Sh SEE ALSO
-.Xr u8gcnt 3 ,
 .Xr U8 3 ,
+.Xr u8gcnt 3 ,
+.Xr u8view 3 ,
 .Xr unicode 7 ,
 .Xr utf\-8 7
 .Sh STANDARDS
@@ -56,10 +56,11 @@ size_t cplen = u8len(U8_ARGS(sv));
 The return value of
 .Fn u8len
 does not necessarily represent the number of human-preceived characters
-in the given buffer;
-multiple codepoints may combine to form one human-preceived character
-that spans a single column.
-To count user-preceived codepoints
+in the given string view;
+multiple codepoints may combine to form one human-preceived character.
+These human-preceived characters may even take up multiple columns in a
+monospaced-environment such as in a terminal emulator.
+To count user-preceived characters
 .Pq also known as graphemes ,
 you may want to use the
 .Xr u8gcnt 3
diff --git a/man/u8next.3 b/man/u8next.3
index 1ba39f0..68079f1 100644
--- a/man/u8next.3
+++ b/man/u8next.3
@@ -1,4 +1,4 @@
-.Dd 20 February 2024
+.Dd 4 May 2024
 .Dt U8NEXT 3
 .Os
 .Sh NAME
@@ -10,30 +10,25 @@
 .Sh SYNOPSIS
 .In mbstring.h
 .Ft int
-.Fn u8next "rune *ch" "const char8_t **s" "size_t *n"
+.Fn u8next "rune *ch" "struct u8view sv"
 .Ft int
 .Fn u8prev "rune *ch" "const char8_t **s" "const char8_t *start"
 .Sh DESCRIPTION
 The
 .Fn u8next
-function decodes the first rune in the UTF-8 encoded string pointed to by
-.Fa s
-of length
-.Fa n
+function decodes the first rune in the UTF-8 encoded string view
+.Fa sv
 and stores the result in
 .Fa ch .
-It then updates
-.Fa s
-to point to the next codepoint in the buffer and updates the length
-.Fa n
-accordingly.
+It then shrinks
+.Fa sv
+so that the decoded rune is removed.
 .Pp
 The
 .Fn u8prev
 function takes a pointer
 .Fa start
-which points to the start of the string instead of a length,
-and updates
+which points to the start of the string and updates
 .Fa s
 to point to the previous codepoint in the buffer.
 The rune
@@ -59,19 +54,16 @@ or 0 at the end of iteration.
 The following calls to
 .Fn u8next
 iterate over and print all the codepoints in
-.Va s .
+.Va sv .
 .Bd -literal -offset indent
 #include <rune.h> /* For PRIXRUNE; see rune(3) */
 
-#define STRING u8"Ta’ Ħaġrat"
-
 int w;
 rune ch;
-const char8_t *s = STRING;
-size_t n = sizeof(STRING) - 1;
+struct u8view sv = U8("Ta’ Ħaġrat");
 
-while (w = u8next(&ch, &s, &n))
-	printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s - w);
+while (w = u8next(&ch, &sv))
+	printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, sv.p - w);
 .Ed
 .Pp
 The following example is the same as the previous,
@@ -81,23 +73,20 @@ function to iterate backwards.
 .Bd -literal -offset indent
 #include <rune.h> /* For PRIXRUNE; see rune(3) */
 
-#define STRING u8"Ta’ Ħaġrat"
-
 int w;
 rune ch;
-const char8_t *s, *start;
-size_t n = sizeof(STRING) - 1;
-
-start = STRING;
-s = start + n;
+struct u8view sv = U8("Ta’ Ħaġrat");
+const char8_t *s = sv.p + sv.len;
 
-while (w = u8prev(&ch, &s, start))
+while (w = u8prev(&ch, &s, sv.p))
 	printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s);
 .Ed
 .Sh SEE ALSO
 .Xr rune 3 ,
+.Xr U8 3 ,
 .Xr u8gnext 3 ,
 .Xr u8tor 3 ,
+.Xr u8view 3type ,
 .Xr RUNE_ERROR 3const ,
 .Xr unicode 7 ,
 .Xr utf\-8 7
diff --git a/man/u8tor.3 b/man/u8tor.3
index 6e3511e..8886193 100644
--- a/man/u8tor.3
+++ b/man/u8tor.3
@@ -1,4 +1,4 @@
-.Dd 10 March 2024
+.Dd 4 May 2024
 .Dt U8TOR 3
 .Os
 .Sh NAME
@@ -37,18 +37,20 @@ The following call to
 attempts to decode the first UTF-8 codepoint in
 .Va buf .
 .Bd -literal -offset indent
-/* Implementation of read_codepoint() omitted */
+#include <errors.h> /* For err(); see errors(3) */
+#include <rune.h> /* For PRIXRUNE; see rune(3) */
 
 rune ch;
-char8_t *buf = read_codepoint(stdin);
+char8_t *buf = u8"Γειά σου Κόσμε";
 int w = u8tor(&ch, buf);
 if (ch == RUNE_ERROR)
-	errx("Got invalid UTF-8 codepoint");
-printf("Got rune ‘%.*s’\en", w, buf);
+	err("Got invalid UTF-8 codepoint");
+printf("Got rune ‘%.*s’ (U+%04" PRIXRUNE ")\en", w, buf, ch);
 .Ed
 .Sh SEE ALSO
-.Xr errx 3mlib ,
+.Xr errors 3 ,
 .Xr rtou8 3 ,
+.Xr rune 3 ,
 .Xr u8chk 3 ,
 .Xr u8next 3 ,
 .Xr RUNE_ERROR 3const ,
diff --git a/man/usage.3 b/man/usage.3
index 92b9b43..ead0f29 100644
--- a/man/usage.3
+++ b/man/usage.3
@@ -1,4 +1,4 @@
-.Dd 27 April 2024
+.Dd 4 May 2024
 .Dt USAGE 3
 .Os
 .Sh NAME
@@ -34,9 +34,9 @@ be provided to the example executable.
 #include <optparse.h>
  
 static const struct op_option opts[] = {
-	{'a', U8(nullptr), OPT_NONE},
-	{'b', U8(nullptr), OPT_NONE},
-	{'h', U8(nullptr), OPT_NONE},
+	{'a', U8(),       OPT_NONE},
+	{'b', U8(),       OPT_NONE},
+	{'h', U8("help"), OPT_NONE},
 };
 
 int
author	Thomas Voss <mail@thomasvoss.com>	2024-05-04 04:01:45 +0200
committer	Thomas Voss <mail@thomasvoss.com>	2024-05-04 04:01:45 +0200
commit	ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (patch)
tree	90250966629653f0462cf17bc0b6f2476fb6d1fc
parent	8b923ba5e5bb37ea26350b4c1c688b8697706609 (diff)