Fix tests and generalize *brk tests

author: Thomas Voss <mail@thomasvoss.com> 2024-05-04 04:21:39 +0200
committer: Thomas Voss <mail@thomasvoss.com> 2024-05-04 04:21:39 +0200
commit: 8f7f007a52f39c1e03817d417e95877526945b45 (patch)
tree: d911a8887d9422920d152665e46c5fed93d12d91 /test
parent: ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (diff)
5 files changed, 147 insertions, 258 deletions
diff --git a/test/_brk-test.h b/test/_brk-test.h
new file mode 100644
index 0000000..e468f23
--- /dev/null
+++ b/test/_brk-test.h
@@ -0,0 +1,127 @@
+#ifndef BRKTYPE
+#	error "BRKTYPE is not defined!"
+#endif
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <alloc.h>
+#include <dynarr.h>
+#include <errors.h>
+#include <macros.h>
+#include <mbstring.h>
+#include <rune.h>
+#include <unicode/string.h>
+
+#define TESTFILE STR(BRKTYPE) "brk.in"
+#define ITERFUNC CONCAT(CONCAT(u8, BRKTYPE), next)
+#define CNTFUNC  CONCAT(CONCAT(u8, BRKTYPE), cnt)
+
+static bool test(struct u8view, int);
+static int hexdigits(rune);
+
+int
+main(int, char **argv)
+{
+	int rv;
+	size_t n;
+	ssize_t nr;
+	char *line;
+	FILE *fp;
+
+	rv = EXIT_SUCCESS;
+	line = nullptr;
+	mlib_setprogname(argv[0]);
+
+	if ((fp = fopen(TESTFILE, "r")) == nullptr)
+		err("fopen: %s:", TESTFILE);
+
+	for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) {
+		if (line[nr - 1] == '\n')
+			line[--nr] = '\0';
+
+		if (!test((struct u8view){line, (size_t)nr}, id))
+			rv = EXIT_FAILURE;
+	}
+	if (ferror(fp))
+		err("getline: %s:", TESTFILE);
+
+	free(line);
+	fclose(fp);
+	return rv;
+}
+
+bool
+test(struct u8view sv, int id)
+{
+	size_t total = 0;
+
+	typedef dynarr(char8_t) item;
+	dynarr(item) items = {};
+	struct u8view sv_cpy = sv;
+
+	do {
+		rune op, ch;
+
+		u8next(&op, &sv_cpy);
+		sscanf(sv_cpy.p, "%" SCNxRUNE, &ch);
+		int off = hexdigits(ch);
+		off = MAX(4, off);
+		VSHFT(&sv_cpy, off);
+
+		char8_t buf[U8_LEN_MAX] = {};
+		int w = rtou8(buf, sizeof(buf), ch);
+		total += w;
+
+		if (op == U'÷')
+			DAPUSH(&items, (item){});
+		DAEXTEND(&items.buf[items.len - 1], buf, w);
+	} while (sv_cpy.len > 0);
+
+	size_t off = 0;
+	char8_t *p = bufalloc(nullptr, 1, total);
+	da_foreach (&items, g) {
+		memcpy(p + off, g->buf, g->len);
+		off += g->len;
+	}
+
+	struct u8view buf = {p, total};
+
+	/* Assert the item count is correct */
+	size_t items_got = CNTFUNC(buf);
+	if (items_got != items.len) {
+		warn("case %d: expected %zu items(s) but got %zu: ‘%s’", id, items.len,
+		     items_got, sv.p);
+		return false;
+	}
+
+	/* Assert the individual items are correct */
+	struct u8view it1, buf_cpy = buf;
+	for (size_t i = 0; ITERFUNC(&it1, &buf_cpy); i++) {
+		item it2 = items.buf[i];
+		if (!u8eq(it1, ((struct u8view){it2.buf, it2.len}))) {
+			warn("case %d: expected item ‘%.*s’ but got ‘%.*s’", id,
+			     (int)it2.len, it2.buf, SV_PRI_ARGS(it1));
+			return false;
+		}
+	}
+
+	da_foreach (&items, wd)
+		free(wd->buf);
+	free(items.buf);
+	free(p);
+
+	return true;
+}
+
+int
+hexdigits(rune ch)
+{
+	int n = 0;
+	do {
+		ch /= 16;
+		n++;
+	} while (ch != 0);
+	return n;
+}
diff --git a/test/_case-test.h b/test/_case-test.h
index 121667a..12e3a57 100644
--- a/test/_case-test.h
+++ b/test/_case-test.h
@@ -52,39 +52,37 @@ main(int, char **argv)
 bool
 test(const char8_t *line, int id)
 {
+	struct u8view sv = {line, strlen(line)};
 	struct u8view before, after, flags;
-	before.p = line;
-	after.p = strchr(line, ';') + 1;
-	before.len = after.p - before.p - 1;
-	flags.p = strchr(after.p, ';') + 1;
-	after.len = flags.p - after.p - 1;
-	flags.len = strlen(flags.p);
+	before = u8split(&sv, ';');
+	after  = u8split(&sv, ';');
+	flags  = u8split(&sv, ';');
 
-	enum caseflags cf = u8eq(U8_ARGS(flags), U8_ARGS(U8("ẞ")))  ? CF_ẞ
-	                  : u8eq(U8_ARGS(flags), U8_ARGS(U8("AZ"))) ? CF_LANG_AZ
-	                  : u8eq(U8_ARGS(flags), U8_ARGS(U8("LT"))) ? CF_LANG_LT
-	                  : u8eq(U8_ARGS(flags), U8_ARGS(U8("NL"))) ? CF_LANG_NL
-	                                                            : 0;
+	enum caseflags cf = u8eq(flags, U8("ẞ"))  ? CF_ẞ
+	                  : u8eq(flags, U8("AZ")) ? CF_LANG_AZ
+	                  : u8eq(flags, U8("LT")) ? CF_LANG_LT
+	                  : u8eq(flags, U8("NL")) ? CF_LANG_NL
+	                                          : 0;
 	char8_t *buf = bufalloc(nullptr, 1, after.len);
-	size_t bufsz = FUNC(nullptr, 0, U8_ARGS(before), cf);
+	size_t bufsz = FUNC(nullptr, 0, before, cf);
 	if (bufsz != after.len) {
 		warn("case %d: expected %scased buffer size of %zu but got %zu "
 		     "(flags=‘%.*s’)",
-		     id, STR(CASETYPE), after.len, bufsz, U8_PRI_ARGS(flags));
+		     id, STR(CASETYPE), after.len, bufsz, SV_PRI_ARGS(flags));
 		return false;
 	}
 
-	bufsz = FUNC(buf, bufsz, U8_ARGS(before), cf);
+	bufsz = FUNC(buf, bufsz, before, cf);
 	if (bufsz != after.len) {
 		warn("case %d: expected %scased length of %zu but got %zu "
 		     "(flags=‘%.*s’)",
-		     id, STR(CASETYPE), after.len, bufsz, U8_PRI_ARGS(flags));
+		     id, STR(CASETYPE), after.len, bufsz, SV_PRI_ARGS(flags));
 		return false;
 	}
 
 	if (!memeq(buf, after.p, bufsz)) {
 		warn("case %d: expected ‘%.*s’ but got ‘%.*s’ (flags=‘%.*s’)", id,
-		     U8_PRI_ARGS(after), (int)bufsz, buf, U8_PRI_ARGS(flags));
+		     SV_PRI_ARGS(after), (int)bufsz, buf, SV_PRI_ARGS(flags));
 		return false;
 	}
 
diff --git a/test/gbrk-test.c b/test/gbrk-test.c
index cf91121..4e4a602 100644
--- a/test/gbrk-test.c
+++ b/test/gbrk-test.c
@@ -1,120 +1,2 @@
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <alloc.h>
-#include <dynarr.h>
-#include <errors.h>
-#include <macros.h>
-#include <mbstring.h>
-#include <rune.h>
-#include <unicode/string.h>
-
-#define TESTFILE "gnext.in"
-
-static bool test(const char8_t *, size_t, int);
-static int hexdigits(rune);
-
-int
-main(int, char **argv)
-{
-	int rv;
-	size_t n;
-	ssize_t nr;
-	char *line;
-	FILE *fp;
-
-	rv = EXIT_SUCCESS;
-	line = nullptr;
-	mlib_setprogname(argv[0]);
-
-	if ((fp = fopen(TESTFILE, "r")) == nullptr)
-		err("fopen: %s:", TESTFILE);
-
-	for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) {
-		if (line[nr - 1] == '\n')
-			line[--nr] = '\0';
-
-		if (!test(line, (size_t)nr, id))
-			rv = EXIT_FAILURE;
-	}
-	if (ferror(fp))
-		err("getline: %s:", TESTFILE);
-
-	free(line);
-	fclose(fp);
-	return rv;
-}
-
-bool
-test(const char8_t *line, size_t n, int id)
-{
-	size_t total = 0;
-	const char8_t *line2 = line;
-
-	typedef dynarr(char8_t) graph;
-	dynarr(graph) graphs = {};
-
-	do {
-		rune op, ch;
-
-		u8next(&op, &line2, &n);
-		sscanf(line2, "%" SCNxRUNE, &ch);
-		int off = hexdigits(ch);
-		off = MAX(4, off);
-		line2 += off, n -= off;
-
-		char8_t buf[U8_LEN_MAX] = {};
-		int w = rtou8(buf, sizeof(buf), ch);
-		total += w;
-
-		if (op == U'÷')
-			DAPUSH(&graphs, (graph){});
-		DAEXTEND(&graphs.buf[graphs.len - 1], buf, w);
-	} while (n > 0);
-
-	size_t off = 0;
-	char8_t *buf = bufalloc(nullptr, 1, total);
-	da_foreach (&graphs, g) {
-		memcpy(buf + off, g->buf, g->len);
-		off += g->len;
-	}
-
-	/* Assert the grapheme count is correct */
-	size_t graphs_got = u8gcnt(buf, total);
-	if (graphs_got != graphs.len) {
-		warn("case %d: expected %zu grapheme(s) but got %zu: ‘%s’", id,
-		     graphs.len, graphs_got, line);
-		return false;
-	}
-
-	/* Assert the individual graphemes are correct */
-	struct u8view g;
-	const char8_t *buf_cpy = buf;
-	for (size_t i = 0; u8gnext(&g, &buf_cpy, &total); i++) {
-		graph g2 = graphs.buf[i];
-		if (!u8eq(g.p, g.len, g2.buf, g2.len)) {
-			warn("case %d: expected grapheme ‘%.*s’ but got ‘%.*s’", id,
-			     (int)g2.len, g2.buf, (int)g.len, g.p);
-			return false;
-		}
-	}
-
-	da_foreach (&graphs, wd)
-		free(wd->buf);
-	free(graphs.buf);
-	free(buf);
-
-	return true;
-}
-
-int
-hexdigits(rune ch)
-{
-	int n = 0;
-	do {
-		ch /= 16;
-		n++;
-	} while (ch != 0);
-	return n;
-}
+#define BRKTYPE g
+#include "_brk-test.h"
diff --git a/test/run-tests b/test/run-tests
index f191c33..82d3e00 100755
--- a/test/run-tests
+++ b/test/run-tests
@@ -26,8 +26,8 @@ download 'auxiliary/WordBreakTest.txt'
 grep '^[^#]'                         data/LowercaseTest     >lower.in
 grep '^[^#]'                         data/TitlecaseTest     >title.in
 grep '^[^#]'                         data/UppercaseTest     >upper.in
-sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in
-sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest     >wnext.in
+sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gbrk.in
+sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest     >wbrk.in
 
 for src in *.c
 do
diff --git a/test/wbrk-test.c b/test/wbrk-test.c
index 8425b5b..0690eae 100644
--- a/test/wbrk-test.c
+++ b/test/wbrk-test.c
@@ -1,120 +1,2 @@
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <alloc.h>
-#include <dynarr.h>
-#include <errors.h>
-#include <macros.h>
-#include <mbstring.h>
-#include <rune.h>
-#include <unicode/string.h>
-
-#define TESTFILE "wnext.in"
-
-static bool test(const char8_t *, size_t, int);
-static int hexdigits(rune);
-
-int
-main(int, char **argv)
-{
-	int rv;
-	size_t n;
-	ssize_t nr;
-	char *line;
-	FILE *fp;
-
-	rv = EXIT_SUCCESS;
-	line = nullptr;
-	mlib_setprogname(argv[0]);
-
-	if ((fp = fopen(TESTFILE, "r")) == nullptr)
-		err("fopen: %s:", TESTFILE);
-
-	for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) {
-		if (line[nr - 1] == '\n')
-			line[--nr] = '\0';
-
-		if (!test(line, (size_t)nr, id))
-			rv = EXIT_FAILURE;
-	}
-	if (ferror(fp))
-		err("getline: %s:", TESTFILE);
-
-	free(line);
-	fclose(fp);
-	return rv;
-}
-
-bool
-test(const char8_t *line, size_t n, int id)
-{
-	size_t total = 0;
-	const char8_t *line2 = line;
-
-	typedef dynarr(char8_t) word;
-	dynarr(word) words = {};
-
-	do {
-		rune op, ch;
-
-		u8next(&op, &line2, &n);
-		sscanf(line2, "%" SCNxRUNE, &ch);
-		int off = hexdigits(ch);
-		off = MAX(4, off);
-		line2 += off, n -= off;
-
-		char8_t buf[U8_LEN_MAX] = {};
-		int w = rtou8(buf, sizeof(buf), ch);
-		total += w;
-
-		if (op == U'÷')
-			DAPUSH(&words, (word){});
-		DAEXTEND(&words.buf[words.len - 1], buf, w);
-	} while (n > 0);
-
-	size_t off = 0;
-	char8_t *buf = bufalloc(nullptr, 1, total);
-	da_foreach (&words, wd) {
-		memcpy(buf + off, wd->buf, wd->len);
-		off += wd->len;
-	}
-
-	/* Assert the word count is correct */
-	size_t words_got = u8wcnt(buf, total);
-	if (words_got != words.len) {
-		warn("case %d: expected %zu word(s) but got %zu: ‘%s’", id, words.len,
-		     words_got, line);
-		return false;
-	}
-
-	/* Assert the individual words are correct */
-	struct u8view wd;
-	const char8_t *buf_cpy = buf;
-	for (size_t i = 0; u8wnext(&wd, &buf_cpy, &total); i++) {
-		word wd2 = words.buf[i];
-		if (!u8eq(wd.p, wd.len, wd2.buf, wd2.len)) {
-			warn("case %d: expected word ‘%.*s’ but got ‘%.*s’", id,
-			     (int)wd2.len, wd2.buf, (int)wd.len, wd.p);
-			return false;
-		}
-	}
-
-	da_foreach (&words, wd)
-		free(wd->buf);
-	free(words.buf);
-	free(buf);
-
-	return true;
-}
-
-int
-hexdigits(rune ch)
-{
-	int n = 0;
-	do {
-		ch /= 16;
-		n++;
-	} while (ch != 0);
-	return n;
-}
+#define BRKTYPE w
+#include "_brk-test.h"
author	Thomas Voss <mail@thomasvoss.com>	2024-05-04 04:21:39 +0200
committer	Thomas Voss <mail@thomasvoss.com>	2024-05-04 04:21:39 +0200
commit	8f7f007a52f39c1e03817d417e95877526945b45 (patch)
tree	d911a8887d9422920d152665e46c5fed93d12d91 /test
parent	ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (diff)