From e109b400133f57ce129f5e274fbb90cca0052a3b Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sat, 4 May 2024 23:55:19 +0200 Subject: Add tests for u8wnext_human() --- test/data/WordHumanBreakTest | 11 +++++++ test/run-tests | 13 ++++---- test/wbrk-human-test.c | 78 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 6 deletions(-) create mode 100644 test/data/WordHumanBreakTest create mode 100644 test/wbrk-human-test.c diff --git a/test/data/WordHumanBreakTest b/test/data/WordHumanBreakTest new file mode 100644 index 0000000..8eea00f --- /dev/null +++ b/test/data/WordHumanBreakTest @@ -0,0 +1,11 @@ +# Empty input +; + +# Latin alphabet +C23 (ISO/IEC 9899:2024), the next C standard, replaces C17 (ISO/IEC 9899:2018).;C23|ISO|IEC|9899|2024|the|next|C|standard|replaces|C17|ISO|IEC|9899|2018 + +# Greek alphabet +Το ιουλιανό ημερολόγιο (365,25 ημέρες);Το|ιουλιανό|ημερολόγιο|365,25|ημέρες + +# Maltese with Arabic +Il-lingwist Malti Ġużè Aquilina kien jemmen li 'Mnajdra' (bl-Għarbi: منيدرة);Il|lingwist|Malti|Ġużè|Aquilina|kien|jemmen|li|Mnajdra|bl|Għarbi|منيدرة diff --git a/test/run-tests b/test/run-tests index f4c53c4..2562a32 100755 --- a/test/run-tests +++ b/test/run-tests @@ -23,12 +23,13 @@ readonly FLAGS=' download 'auxiliary/GraphemeBreakTest.txt' download 'auxiliary/WordBreakTest.txt' -grep '^[^#]' data/CasefoldTest >casefold.in -grep '^[^#]' data/LowercaseTest >lower.in -grep '^[^#]' data/TitlecaseTest >title.in -grep '^[^#]' data/UppercaseTest >upper.in -sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gbrk.in -sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wbrk.in +grep '^[^#]' data/CasefoldTest >casefold.in +grep '^[^#]' data/LowercaseTest >lower.in +grep '^[^#]' data/TitlecaseTest >title.in +grep '^[^#]' data/UppercaseTest >upper.in +grep '^[^#]' data/WordHumanBreakTest >wbrk-human.in +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gbrk.in +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wbrk.in for src in *.c do diff --git a/test/wbrk-human-test.c b/test/wbrk-human-test.c new file mode 100644 index 0000000..1fa9bd8 --- /dev/null +++ b/test/wbrk-human-test.c @@ -0,0 +1,78 @@ +#define _GNU_SOURCE +#include +#include + +#include +#include +#include +#include +#include + +#define TESTFILE "wbrk-human.in" + +static bool test(struct u8view, int); + +int +main(int, char **argv) +{ + int rv; + size_t n; + ssize_t nr; + char *line; + FILE *fp; + + rv = EXIT_SUCCESS; + line = nullptr; + mlib_setprogname(argv[0]); + + if ((fp = fopen(TESTFILE, "r")) == nullptr) + err("fopen: %s:", TESTFILE); + + for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) { + if (line[nr - 1] == '\n') + line[--nr] = '\0'; + + if (!test((struct u8view){line, nr}, id)) + rv = EXIT_FAILURE; + } + if (ferror(fp)) + err("getline: %s:", TESTFILE); + + free(line); + fclose(fp); + return rv; +} + +bool +test(struct u8view sv, int id) +{ + struct u8view src; + u8cut(&src, &sv, U";", 1); + + struct u8view w; + dynarr(struct u8view) ws = {}; + + while (u8cut(&w, &sv, U"|", 1) != MBEND) + DAPUSH(&ws, w); + if (w.len > 0) + DAPUSH(&ws, w); + + /* Assert the word count is correct */ + size_t n; + if ((n = u8wcnt_human(src)) != ws.len) { + warn("case %d: expected %zu words but got %zu", id, ws.len, n); + return false; + } + + /* Assert the individual words are correct */ + for (size_t i = 0; u8wnext_human(&w, &src) != 0; i++) { + if (!u8eq(w, ws.buf[i])) { + warn("case %d: expected word %zu to be ‘%.*s’ but got ‘%.*s’", id, + i, SV_PRI_ARGS(ws.buf[i]), SV_PRI_ARGS(w)); + return false; + } + } + + free(ws.buf); + return true; +} -- cgit v1.2.3