aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/data/WordHumanBreakTest11
-rwxr-xr-xtest/run-tests13
-rw-r--r--test/wbrk-human-test.c78
3 files changed, 96 insertions, 6 deletions
diff --git a/test/data/WordHumanBreakTest b/test/data/WordHumanBreakTest
new file mode 100644
index 0000000..8eea00f
--- /dev/null
+++ b/test/data/WordHumanBreakTest
@@ -0,0 +1,11 @@
+# Empty input
+;
+
+# Latin alphabet
+C23 (ISO/IEC 9899:2024), the next C standard, replaces C17 (ISO/IEC 9899:2018).;C23|ISO|IEC|9899|2024|the|next|C|standard|replaces|C17|ISO|IEC|9899|2018
+
+# Greek alphabet
+Το ιουλιανό ημερολόγιο (365,25 ημέρες);Το|ιουλιανό|ημερολόγιο|365,25|ημέρες
+
+# Maltese with Arabic
+Il-lingwist Malti Ġużè Aquilina kien jemmen li 'Mnajdra' (bl-Għarbi: منيدرة);Il|lingwist|Malti|Ġużè|Aquilina|kien|jemmen|li|Mnajdra|bl|Għarbi|منيدرة
diff --git a/test/run-tests b/test/run-tests
index f4c53c4..2562a32 100755
--- a/test/run-tests
+++ b/test/run-tests
@@ -23,12 +23,13 @@ readonly FLAGS='
download 'auxiliary/GraphemeBreakTest.txt'
download 'auxiliary/WordBreakTest.txt'
-grep '^[^#]' data/CasefoldTest >casefold.in
-grep '^[^#]' data/LowercaseTest >lower.in
-grep '^[^#]' data/TitlecaseTest >title.in
-grep '^[^#]' data/UppercaseTest >upper.in
-sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gbrk.in
-sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wbrk.in
+grep '^[^#]' data/CasefoldTest >casefold.in
+grep '^[^#]' data/LowercaseTest >lower.in
+grep '^[^#]' data/TitlecaseTest >title.in
+grep '^[^#]' data/UppercaseTest >upper.in
+grep '^[^#]' data/WordHumanBreakTest >wbrk-human.in
+sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gbrk.in
+sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wbrk.in
for src in *.c
do
diff --git a/test/wbrk-human-test.c b/test/wbrk-human-test.c
new file mode 100644
index 0000000..1fa9bd8
--- /dev/null
+++ b/test/wbrk-human-test.c
@@ -0,0 +1,78 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <dynarr.h>
+#include <errors.h>
+#include <macros.h>
+#include <mbstring.h>
+#include <unicode/string.h>
+
+#define TESTFILE "wbrk-human.in"
+
+static bool test(struct u8view, int);
+
+int
+main(int, char **argv)
+{
+ int rv;
+ size_t n;
+ ssize_t nr;
+ char *line;
+ FILE *fp;
+
+ rv = EXIT_SUCCESS;
+ line = nullptr;
+ mlib_setprogname(argv[0]);
+
+ if ((fp = fopen(TESTFILE, "r")) == nullptr)
+ err("fopen: %s:", TESTFILE);
+
+ for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) {
+ if (line[nr - 1] == '\n')
+ line[--nr] = '\0';
+
+ if (!test((struct u8view){line, nr}, id))
+ rv = EXIT_FAILURE;
+ }
+ if (ferror(fp))
+ err("getline: %s:", TESTFILE);
+
+ free(line);
+ fclose(fp);
+ return rv;
+}
+
+bool
+test(struct u8view sv, int id)
+{
+ struct u8view src;
+ u8cut(&src, &sv, U";", 1);
+
+ struct u8view w;
+ dynarr(struct u8view) ws = {};
+
+ while (u8cut(&w, &sv, U"|", 1) != MBEND)
+ DAPUSH(&ws, w);
+ if (w.len > 0)
+ DAPUSH(&ws, w);
+
+ /* Assert the word count is correct */
+ size_t n;
+ if ((n = u8wcnt_human(src)) != ws.len) {
+ warn("case %d: expected %zu words but got %zu", id, ws.len, n);
+ return false;
+ }
+
+ /* Assert the individual words are correct */
+ for (size_t i = 0; u8wnext_human(&w, &src) != 0; i++) {
+ if (!u8eq(w, ws.buf[i])) {
+ warn("case %d: expected word %zu to be ‘%.*s’ but got ‘%.*s’", id,
+ i, SV_PRI_ARGS(ws.buf[i]), SV_PRI_ARGS(w));
+ return false;
+ }
+ }
+
+ free(ws.buf);
+ return true;
+}