diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-05-04 23:55:19 +0200 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-05-04 23:55:19 +0200 | 
| commit | e109b400133f57ce129f5e274fbb90cca0052a3b (patch) | |
| tree | f53cb5b0f58dd10916763bf8747de5b8f6e3ba57 | |
| parent | bd4d8c9f6e3fb7299f5f8e9fdb966824790aebc1 (diff) | |
Add tests for u8wnext_human()
| -rw-r--r-- | test/data/WordHumanBreakTest | 11 | ||||
| -rwxr-xr-x | test/run-tests | 13 | ||||
| -rw-r--r-- | test/wbrk-human-test.c | 78 | 
3 files changed, 96 insertions, 6 deletions
| diff --git a/test/data/WordHumanBreakTest b/test/data/WordHumanBreakTest new file mode 100644 index 0000000..8eea00f --- /dev/null +++ b/test/data/WordHumanBreakTest @@ -0,0 +1,11 @@ +# Empty input +; + +# Latin alphabet +C23 (ISO/IEC 9899:2024), the next C standard, replaces C17 (ISO/IEC 9899:2018).;C23|ISO|IEC|9899|2024|the|next|C|standard|replaces|C17|ISO|IEC|9899|2018 + +# Greek alphabet +Το ιουλιανό ημερολόγιο (365,25 ημέρες);Το|ιουλιανό|ημερολόγιο|365,25|ημέρες + +# Maltese with Arabic +Il-lingwist Malti Ġużè Aquilina kien jemmen li 'Mnajdra' (bl-Għarbi: منيدرة);Il|lingwist|Malti|Ġużè|Aquilina|kien|jemmen|li|Mnajdra|bl|Għarbi|منيدرة diff --git a/test/run-tests b/test/run-tests index f4c53c4..2562a32 100755 --- a/test/run-tests +++ b/test/run-tests @@ -23,12 +23,13 @@ readonly FLAGS='  download 'auxiliary/GraphemeBreakTest.txt'  download 'auxiliary/WordBreakTest.txt' -grep '^[^#]'                         data/CasefoldTest      >casefold.in -grep '^[^#]'                         data/LowercaseTest     >lower.in -grep '^[^#]'                         data/TitlecaseTest     >title.in -grep '^[^#]'                         data/UppercaseTest     >upper.in -sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gbrk.in -sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest     >wbrk.in +grep '^[^#]'                         data/CasefoldTest       >casefold.in +grep '^[^#]'                         data/LowercaseTest      >lower.in +grep '^[^#]'                         data/TitlecaseTest      >title.in +grep '^[^#]'                         data/UppercaseTest      >upper.in +grep '^[^#]'                         data/WordHumanBreakTest >wbrk-human.in +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest  >gbrk.in +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest      >wbrk.in  for src in *.c  do diff --git a/test/wbrk-human-test.c b/test/wbrk-human-test.c new file mode 100644 index 0000000..1fa9bd8 --- /dev/null +++ b/test/wbrk-human-test.c @@ -0,0 +1,78 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> + +#include <dynarr.h> +#include <errors.h> +#include <macros.h> +#include <mbstring.h> +#include <unicode/string.h> + +#define TESTFILE "wbrk-human.in" + +static bool test(struct u8view, int); + +int +main(int, char **argv) +{ +	int rv; +	size_t n; +	ssize_t nr; +	char *line; +	FILE *fp; + +	rv = EXIT_SUCCESS; +	line = nullptr; +	mlib_setprogname(argv[0]); + +	if ((fp = fopen(TESTFILE, "r")) == nullptr) +		err("fopen: %s:", TESTFILE); + +	for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) { +		if (line[nr - 1] == '\n') +			line[--nr] = '\0'; + +		if (!test((struct u8view){line, nr}, id)) +			rv = EXIT_FAILURE; +	} +	if (ferror(fp)) +		err("getline: %s:", TESTFILE); + +	free(line); +	fclose(fp); +	return rv; +} + +bool +test(struct u8view sv, int id) +{ +	struct u8view src; +	u8cut(&src, &sv, U";", 1); + +	struct u8view w; +	dynarr(struct u8view) ws = {}; + +	while (u8cut(&w, &sv, U"|", 1) != MBEND) +		DAPUSH(&ws, w); +	if (w.len > 0) +		DAPUSH(&ws, w); + +	/* Assert the word count is correct */ +	size_t n; +	if ((n = u8wcnt_human(src)) != ws.len) { +		warn("case %d: expected %zu words but got %zu", id, ws.len, n); +		return false; +	} + +	/* Assert the individual words are correct */ +	for (size_t i = 0; u8wnext_human(&w, &src) != 0; i++) { +		if (!u8eq(w, ws.buf[i])) { +			warn("case %d: expected word %zu to be ‘%.*s’ but got ‘%.*s’", id, +			     i, SV_PRI_ARGS(ws.buf[i]), SV_PRI_ARGS(w)); +			return false; +		} +	} + +	free(ws.buf); +	return true; +} |