diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-04-22 21:27:07 +0200 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-22 21:27:07 +0200 | 
| commit | eda8550f79f7f836a78f5909f1dccc008511d4f8 (patch) | |
| tree | e2f801d7489a6eebeae92fef7b6b95c504668e44 /test | |
| parent | c0a983a29af17415ef29058d72f1a9cd99ddd83f (diff) | |
Add a test for word breaking
Diffstat (limited to 'test')
| -rwxr-xr-x | test/gen-test-data | 11 | ||||
| -rwxr-xr-x | test/run-tests | 33 | ||||
| -rw-r--r-- | test/wnext_test.c | 122 | 
3 files changed, 166 insertions, 0 deletions
| diff --git a/test/gen-test-data b/test/gen-test-data new file mode 100755 index 0000000..12ee11e --- /dev/null +++ b/test/gen-test-data @@ -0,0 +1,11 @@ +#!/bin/sh + +set -e + +download() +{ +	curl -sS "https://www.unicode.org/Public/15.1.0/ucd/$1" +} + +download 'auxiliary/WordBreakTest.txt' \ +| sed -En 's/\s+//g; s/÷?#.*//g; /./p' >wnext.in diff --git a/test/run-tests b/test/run-tests new file mode 100755 index 0000000..e19b611 --- /dev/null +++ b/test/run-tests @@ -0,0 +1,33 @@ +#!/bin/sh + +set -e +cd "${0%/*}" + +readonly FLAGS=' +	-std=c23 -I../include +	-Og -ggdb3 +	-Wall -Wextra -Wpedantic +	-Wno-pointer-sign +	-Wno-attributes +' + +(cd ..; ./make) + +./gen-test-data + +for src in *.c +do +	dst="${src%.*}" +	gcc $FLAGS -o "$dst" "$src" ../libmlib.a  +done + +s="$(find . -type f -executable \ +	-not -name gen-test-data \ +	-not -name run-tests \ +	-exec echo rm "*.in" {} +)" +trap "$s" EXIT + +find . -type f -executable \ +	-not -name gen-test-data \ +	-not -name run-tests \ +	-exec {} \; diff --git a/test/wnext_test.c b/test/wnext_test.c new file mode 100644 index 0000000..a137f7d --- /dev/null +++ b/test/wnext_test.c @@ -0,0 +1,122 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> + +#include "alloc.h" +#include "dynarr.h" +#include "errors.h" +#include "macros.h" +#include "mbstring.h" +#include "rune.h" +#include "unicode/string.h" + +#define TESTFILE "wnext.in" + +static bool test(const char8_t *, size_t, int); +static int hexdigits(rune); + +int +main(int, char **argv) +{ +	int rv; +	size_t n; +	ssize_t nr; +	char *line; +	FILE *fp; + +	rv = EXIT_SUCCESS; +	line = nullptr; +	mlib_setprogname(argv[0]); + +	if ((fp = fopen(TESTFILE, "r")) == nullptr) +		err("fopen: %s:", TESTFILE); + +	for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) { +		if (line[nr - 1] == '\n') +			line[--nr] = '\0'; + +		if (!test(line, (size_t)nr, id)) { +			rv = EXIT_FAILURE; +			break; +		} +	} +	if (ferror(fp)) +		err("getline: %s:", TESTFILE); + +	free(line); +	fclose(fp); +	return rv; +} + +bool +test(const char8_t *line, size_t n, int id) +{ +	size_t total = 0; +	const char8_t *line2 = line; + +	typedef dynarr(char8_t) word; +	dynarr(word) words = {}; + +	do { +		rune op, ch; + +		u8next(&op, &line2, &n); +		sscanf(line2, "%" SCNxRUNE, &ch); +		int off = hexdigits(ch); +		off = MAX(4, off); +		line2 += off, n -= off; + +		char8_t buf[U8_LEN_MAX] = {}; +		int w = rtou8(buf, sizeof(buf), ch); +		total += w; + +		if (op == U'÷') +			DAPUSH(&words, (word){}); +		DAEXTEND(&words.buf[words.len - 1], buf, w); +	} while (n > 0); + +	size_t off = 0; +	char8_t *buf = bufalloc(nullptr, 1, total); +	da_foreach (&words, wd) { +		memcpy(buf + off, wd->buf, wd->len); +		off += wd->len; +	} + +	/* Assert the word count is correct */ +	size_t words_got = u8wcnt(buf, total); +	if (words_got != words.len) { +		warn("case %d: expected %zu word(s) but got %zu: ‘%s’", id, words.len, +		     words_got, line); +		return false; +	} + +	/* Assert the individual words are correct */ +	struct u8view wd; +	const char8_t *buf_cpy = buf; +	for (size_t i = 0; u8wnext(&wd, &buf_cpy, &total); i++) { +		word wd2 = words.buf[i]; +		if (!u8eq(wd.p, wd.len, wd2.buf, wd2.len)) { +			warn("case %d: expected word ‘%.*s’ but got ‘%.*s’", id, +			     (int)wd2.len, wd2.buf, (int)wd.len, wd.p); +			return false; +		} +	} + +	da_foreach (&words, wd) +		free(wd->buf); +	free(words.buf); +	free(buf); + +	return true; +} + +int +hexdigits(rune ch) +{ +	int n = 0; +	do { +		ch /= 16; +		n++; +	} while (ch != 0); +	return n; +} |