Go all in on string views, and fix manuals

author: Thomas Voss <mail@thomasvoss.com> 2024-05-04 04:01:45 +0200
committer: Thomas Voss <mail@thomasvoss.com> 2024-05-04 04:01:45 +0200
commit: ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (patch)
tree: 90250966629653f0462cf17bc0b6f2476fb6d1fc /lib/unicode/string
parent: 8b923ba5e5bb37ea26350b4c1c688b8697706609 (diff)
10 files changed, 53 insertions, 57 deletions
diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c
index 6c0b61d..2ab7c7c 100644
--- a/lib/unicode/string/u8casefold.c
+++ b/lib/unicode/string/u8casefold.c
@@ -3,13 +3,13 @@
 #include "unicode/string.h"
 
 size_t
-u8casefold(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+u8casefold(char8_t *restrict dst, size_t dstn, struct u8view sv,
            enum caseflags flags)
 {
 	rune ch;
 	size_t n = 0;
 
-	while (u8next(&ch, &src, &srcn)) {
+	while (u8next(&ch, &sv)) {
 		struct rview rv = uprop_get_cf(ch, flags & CF_LANG_AZ);
 		for (size_t i = 0; i < rv.len; i++) {
 			if (n >= dstn) {
diff --git a/lib/unicode/string/u8gcnt.c b/lib/unicode/string/u8gcnt.c
index 81a0f97..6dfc519 100644
--- a/lib/unicode/string/u8gcnt.c
+++ b/lib/unicode/string/u8gcnt.c
@@ -1,10 +1,10 @@
 #include "unicode/string.h"
 
 size_t
-u8gcnt(const char8_t *s, size_t n)
+u8gcnt(struct u8view sv)
 {
 	size_t m = 0;
-	while (u8gnext(nullptr, &s, &n))
+	while (u8gnext(nullptr, &sv))
 		m++;
 	return m;
 }
diff --git a/lib/unicode/string/u8gnext.c b/lib/unicode/string/u8gnext.c
index a050bd5..3b0b410 100644
--- a/lib/unicode/string/u8gnext.c
+++ b/lib/unicode/string/u8gnext.c
@@ -20,17 +20,17 @@ static bool u8isgbrk(rune, rune, struct gbrk_state *);
 _MLIB_DEFINE_BSEARCH(gbrk_prop, gbrk_prop_tbl, GBP_OTHER)
 
 size_t
-u8gnext(struct u8view *g, const char8_t **s, size_t *n)
+u8gnext(struct u8view *g, struct u8view *sv)
 {
 	int m;
 	rune ch1;
 	const char8_t *p;
 	struct gbrk_state gs = {0};
 
-	if (*n == 0)
+	if (sv->len == 0)
 		return 0;
 
-	p = *s;
+	p = sv->p;
 	if (g)
 		g->p = p;
 	p += u8tor(&ch1, p);
@@ -38,14 +38,13 @@ u8gnext(struct u8view *g, const char8_t **s, size_t *n)
 	for (;;) {
 		rune ch2;
 
-		if ((size_t)(p - *s) >= *n)
+		if ((size_t)(p - sv->p) >= sv->len)
 			ch2 = 0;
 		else
 			m = u8tor(&ch2, p);
 		if (u8isgbrk(ch1, ch2, &gs)) {
-			ptrdiff_t d = p - *s;
-			*n -= d;
-			*s = p;
+			ptrdiff_t d = p - sv->p;
+			VSHFT(sv, d);
 			if (g)
 				g->len = d;
 			return d;
diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c
index 63fdae4..907077b 100644
--- a/lib/unicode/string/u8lower.c
+++ b/lib/unicode/string/u8lower.c
@@ -13,7 +13,7 @@ uprop_ccc_0_or_230(rune ch)
 }
 
 size_t
-u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
         enum caseflags flags)
 {
 	struct lcctx ctx = {
@@ -32,21 +32,21 @@ u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 	n = before_dot_cnt = more_above_cnt = 0;
 
-	while (u8next(&ch, &src, &srcn)) {
+	while (u8next(&ch, &sv)) {
 		rune next = 0;
-		if (srcn > 0)
-			u8tor(&next, src);
+		if (sv.len > 0)
+			u8tor(&next, sv.p);
 
 		if (ctx.az_or_tr || ctx.lt) {
 			if (before_dot_cnt == 0 || more_above_cnt == 0) {
 				rune ch = 0;
 				before_dot_cnt = more_above_cnt = 0;
-				struct u8view cpy = {src, srcn};
+				struct u8view cpy = sv;
 
 				do {
 					before_dot_cnt++;
 					more_above_cnt++;
-				} while (u8next(&ch, U8_ARGSP(cpy)) && !uprop_ccc_0_or_230(ch));
+				} while (u8next(&ch, &cpy) && !uprop_ccc_0_or_230(ch));
 
 				if (ch != COMB_DOT_ABOVE)
 					before_dot_cnt = 0;
@@ -60,11 +60,11 @@ u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 		if (final_sigma.after == 0) {
 			rune ch = 0;
-			struct u8view cpy = {src, srcn};
+			struct u8view cpy = sv;
 
 			do
 				final_sigma.after++;
-			while (u8next(&ch, U8_ARGSP(cpy)) && uprop_is_ci(ch));
+			while (u8next(&ch, &cpy) && uprop_is_ci(ch));
 
 			if (!uprop_is_cased(ch))
 				final_sigma.after = 0;
diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c
index 01e9d2e..f4d9b7e 100644
--- a/lib/unicode/string/u8title.c
+++ b/lib/unicode/string/u8title.c
@@ -14,7 +14,7 @@ uprop_ccc_0_or_230(rune ch)
 }
 
 size_t
-u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
         enum caseflags flags)
 {
 	struct tcctx ctx_t;
@@ -26,7 +26,7 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 	rune ch;
 	bool nl_IJ = false;
 	size_t n, before_dot_cnt, more_above_cnt;
-	struct u8view word = {}, wcpy = {src, srcn};
+	struct u8view word = {}, wcpy = sv;
 	struct {
 		bool before;
 		size_t after;
@@ -39,9 +39,9 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 	n = before_dot_cnt = more_above_cnt = 0;
 
-	while (u8next(&ch, &src, &srcn)) {
-		if (src > word.p + word.len) {
-			u8wnext(&word, U8_ARGSP(wcpy));
+	while (u8next(&ch, &sv)) {
+		if (sv.p > word.p + word.len) {
+			u8wnext(&word, &wcpy);
 			ctx_t.after_soft_dotted = false;
 			state = TITLE;
 		}
@@ -50,12 +50,12 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 			if (before_dot_cnt == 0 || more_above_cnt == 0) {
 				rune ch = 0;
 				before_dot_cnt = more_above_cnt = 0;
-				struct u8view cpy = {src, srcn};
+				struct u8view cpy = sv;
 
 				do {
 					before_dot_cnt++;
 					more_above_cnt++;
-				} while (u8next(&ch, U8_ARGSP(cpy)) && !uprop_ccc_0_or_230(ch));
+				} while (u8next(&ch, &cpy) && !uprop_ccc_0_or_230(ch));
 
 				if (ch != COMB_DOT_ABOVE)
 					before_dot_cnt = 0;
@@ -69,11 +69,11 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 		if (final_sigma.after == 0) {
 			rune ch = 0;
-			struct u8view cpy = {src, srcn};
+			struct u8view cpy = sv;
 
 			do
 				final_sigma.after++;
-			while (u8next(&ch, U8_ARGSP(cpy)) && uprop_is_ci(ch));
+			while (u8next(&ch, &cpy) && uprop_is_ci(ch));
 
 			if (!uprop_is_cased(ch))
 				final_sigma.after = 0;
@@ -95,8 +95,8 @@ u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 
 			if (flags & CF_LANG_NL) {
 				rune next = 0;
-				if (srcn > 0)
-					u8tor(&next, src);
+				if (sv.len > 0)
+					u8tor(&next, sv.p);
 				nl_IJ =
 					(ch == 'i' || ch == 'I') && (next == 'j' || next == 'J');
 			}
diff --git a/lib/unicode/string/u8upper.c b/lib/unicode/string/u8upper.c
index 086a160..6d4026d 100644
--- a/lib/unicode/string/u8upper.c
+++ b/lib/unicode/string/u8upper.c
@@ -3,7 +3,7 @@
 #include "unicode/string.h"
 
 size_t
-u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+u8upper(char8_t *restrict dst, size_t dstn, struct u8view sv,
         enum caseflags flags)
 {
 	struct ucctx ctx = {
@@ -15,7 +15,7 @@ u8upper(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
 	rune ch;
 	size_t n = 0;
 
-	while (u8next(&ch, &src, &srcn)) {
+	while (u8next(&ch, &sv)) {
 		struct rview rv = uprop_get_uc(ch, ctx);
 		for (size_t i = 0; i < rv.len; i++) {
 			if (n >= dstn) {
diff --git a/lib/unicode/string/u8wcnt.c b/lib/unicode/string/u8wcnt.c
index f1b1742..f71faf5 100644
--- a/lib/unicode/string/u8wcnt.c
+++ b/lib/unicode/string/u8wcnt.c
@@ -1,10 +1,10 @@
 #include "unicode/string.h"
 
 size_t
-u8wcnt(const char8_t *s, size_t n)
+u8wcnt(struct u8view sv)
 {
 	size_t m = 0;
-	while (u8wnext(nullptr, &s, &n))
+	while (u8wnext(nullptr, &sv))
 		m++;
 	return m;
 }
diff --git a/lib/unicode/string/u8wcnt_human.c b/lib/unicode/string/u8wcnt_human.c
index 6e70398..60e7f95 100644
--- a/lib/unicode/string/u8wcnt_human.c
+++ b/lib/unicode/string/u8wcnt_human.c
@@ -1,10 +1,10 @@
 #include "unicode/string.h"
 
 size_t
-u8wcnt_human(const char8_t *s, size_t n)
+u8wcnt_human(struct u8view sv)
 {
 	size_t m = 0;
-	while (u8wnext_human(nullptr, &s, &n))
+	while (u8wnext_human(nullptr, &sv))
 		m++;
 	return m;
 }
diff --git a/lib/unicode/string/u8wnext.c b/lib/unicode/string/u8wnext.c
index 5e893c6..6655c5d 100644
--- a/lib/unicode/string/u8wnext.c
+++ b/lib/unicode/string/u8wnext.c
@@ -29,22 +29,20 @@ static size_t findwbrk(struct u8view);
 static struct wbrk_state mkwbrkstate(struct u8view);
 
 size_t
-u8wnext(struct u8view *w, const char8_t **s, size_t *n)
+u8wnext(struct u8view *w, struct u8view *sv)
 {
-	ASSUME(n != nullptr);
-	ASSUME(s != nullptr);
-	ASSUME(*s != nullptr);
+	ASSUME(sv != nullptr);
+	ASSUME(sv->p != nullptr);
 
-	if (*n == 0)
+	if (sv->len == 0)
 		return 0;
 
-	size_t off = findwbrk((struct u8view){*s, *n});
+	size_t off = findwbrk(*sv);
 	if (w != nullptr)
-		*w = (struct u8view){*s, off};
+		*w = (struct u8view){sv->p, off};
 
-	ASSUME(*n >= off);
-	*s += off;
-	*n -= off;
+	ASSUME(sv->len >= off);
+	VSHFT(sv, off);
 	return off;
 }
 
@@ -196,13 +194,13 @@ mkwbrkstate(struct u8view sv)
 
 	rune ch;
 	for (size_t i = 0;
-	     i < lengthof(ws.raw.next) && u8next(&ch, U8_ARGSP(ws.raw_v)) != 0; i++)
+	     i < lengthof(ws.raw.next) && u8next(&ch, &ws.raw_v) != 0; i++)
 	{
 		ws.raw.next[i] = mlib_lookup(ch);
 	}
 
 	for (size_t i = 0;
-	     i < lengthof(ws.raw.next) && u8next(&ch, U8_ARGSP(ws.skip_v)) != 0;)
+	     i < lengthof(ws.raw.next) && u8next(&ch, &ws.skip_v) != 0;)
 	{
 		ws.skip.next[i] = mlib_lookup(ch);
 		if (!IS_IGNORE(ws.skip.next[i]))
@@ -224,10 +222,10 @@ advance(struct wbrk_state *ws)
 	ws->raw.prev[0] = ws->raw.next[0];
 	ws->raw.next[0] = ws->raw.next[1];
 	ws->raw.next[1] =
-		u8next(&ch, U8_ARGSP(ws->raw_v)) != 0 ? mlib_lookup(ch) : WBRK_EOT;
+		u8next(&ch, &ws->raw_v) != 0 ? mlib_lookup(ch) : WBRK_EOT;
 
 	/* Increment the midpoint */
-	u8next(nullptr, U8_ARGSP(ws->mid_v));
+	u8next(nullptr, &ws->mid_v);
 
 	/* Ignore ignorable properties */
 	if (!IS_IGNORE(ws->raw.prev[0])) {
@@ -237,7 +235,7 @@ advance(struct wbrk_state *ws)
 		ws->ri_parity = ws->ri_parity == 0 && ws->skip.prev[0] == WBRK_RI;
 
 		do {
-			if (u8next(&ch, U8_ARGSP(ws->skip_v)) == 0) {
+			if (u8next(&ch, &ws->skip_v) == 0) {
 				ws->skip.next[1] = WBRK_EOT;
 				break;
 			}
diff --git a/lib/unicode/string/u8wnext_human.c b/lib/unicode/string/u8wnext_human.c
index d85abf1..953d942 100644
--- a/lib/unicode/string/u8wnext_human.c
+++ b/lib/unicode/string/u8wnext_human.c
@@ -4,17 +4,16 @@
 #include "unicode/string.h"
 
 size_t
-u8wnext_human(struct u8view *dst, const char8_t **s, size_t *n)
+u8wnext_human(struct u8view *dst, struct u8view *sv)
 {
-	ASSUME(n != nullptr);
-	ASSUME(s != nullptr);
-	ASSUME(*s != nullptr);
+	ASSUME(sv != nullptr);
+	ASSUME(sv->p != nullptr);
 
 	struct u8view w;
-	while (u8wnext(&w, s, n)) {
+	while (u8wnext(&w, sv)) {
 		rune ch;
 		struct u8view cpy = w;
-		while (u8next(&ch, U8_ARGSP(cpy))) {
+		while (u8next(&ch, &cpy)) {
 			if (uprop_get_gc(ch) & (GC_L | GC_N)) {
 				if (dst != nullptr)
 					*dst = w;
author	Thomas Voss <mail@thomasvoss.com>	2024-05-04 04:01:45 +0200
committer	Thomas Voss <mail@thomasvoss.com>	2024-05-04 04:01:45 +0200
commit	ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (patch)
tree	90250966629653f0462cf17bc0b6f2476fb6d1fc /lib/unicode/string
parent	8b923ba5e5bb37ea26350b4c1c688b8697706609 (diff)