diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-05-04 04:21:39 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-05-04 04:21:39 +0200 |
commit | 8f7f007a52f39c1e03817d417e95877526945b45 (patch) | |
tree | d911a8887d9422920d152665e46c5fed93d12d91 /test | |
parent | ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (diff) |
Fix tests and generalize *brk tests
Diffstat (limited to 'test')
-rw-r--r-- | test/_brk-test.h | 127 | ||||
-rw-r--r-- | test/_case-test.h | 30 | ||||
-rw-r--r-- | test/gbrk-test.c | 122 | ||||
-rwxr-xr-x | test/run-tests | 4 | ||||
-rw-r--r-- | test/wbrk-test.c | 122 |
5 files changed, 147 insertions, 258 deletions
diff --git a/test/_brk-test.h b/test/_brk-test.h new file mode 100644 index 0000000..e468f23 --- /dev/null +++ b/test/_brk-test.h @@ -0,0 +1,127 @@ +#ifndef BRKTYPE +# error "BRKTYPE is not defined!" +#endif + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> + +#include <alloc.h> +#include <dynarr.h> +#include <errors.h> +#include <macros.h> +#include <mbstring.h> +#include <rune.h> +#include <unicode/string.h> + +#define TESTFILE STR(BRKTYPE) "brk.in" +#define ITERFUNC CONCAT(CONCAT(u8, BRKTYPE), next) +#define CNTFUNC CONCAT(CONCAT(u8, BRKTYPE), cnt) + +static bool test(struct u8view, int); +static int hexdigits(rune); + +int +main(int, char **argv) +{ + int rv; + size_t n; + ssize_t nr; + char *line; + FILE *fp; + + rv = EXIT_SUCCESS; + line = nullptr; + mlib_setprogname(argv[0]); + + if ((fp = fopen(TESTFILE, "r")) == nullptr) + err("fopen: %s:", TESTFILE); + + for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) { + if (line[nr - 1] == '\n') + line[--nr] = '\0'; + + if (!test((struct u8view){line, (size_t)nr}, id)) + rv = EXIT_FAILURE; + } + if (ferror(fp)) + err("getline: %s:", TESTFILE); + + free(line); + fclose(fp); + return rv; +} + +bool +test(struct u8view sv, int id) +{ + size_t total = 0; + + typedef dynarr(char8_t) item; + dynarr(item) items = {}; + struct u8view sv_cpy = sv; + + do { + rune op, ch; + + u8next(&op, &sv_cpy); + sscanf(sv_cpy.p, "%" SCNxRUNE, &ch); + int off = hexdigits(ch); + off = MAX(4, off); + VSHFT(&sv_cpy, off); + + char8_t buf[U8_LEN_MAX] = {}; + int w = rtou8(buf, sizeof(buf), ch); + total += w; + + if (op == U'÷') + DAPUSH(&items, (item){}); + DAEXTEND(&items.buf[items.len - 1], buf, w); + } while (sv_cpy.len > 0); + + size_t off = 0; + char8_t *p = bufalloc(nullptr, 1, total); + da_foreach (&items, g) { + memcpy(p + off, g->buf, g->len); + off += g->len; + } + + struct u8view buf = {p, total}; + + /* Assert the item count is correct */ + size_t items_got = CNTFUNC(buf); + if (items_got != items.len) { + warn("case %d: expected %zu items(s) but got %zu: ‘%s’", id, items.len, + items_got, sv.p); + return false; + } + + /* Assert the individual items are correct */ + struct u8view it1, buf_cpy = buf; + for (size_t i = 0; ITERFUNC(&it1, &buf_cpy); i++) { + item it2 = items.buf[i]; + if (!u8eq(it1, ((struct u8view){it2.buf, it2.len}))) { + warn("case %d: expected item ‘%.*s’ but got ‘%.*s’", id, + (int)it2.len, it2.buf, SV_PRI_ARGS(it1)); + return false; + } + } + + da_foreach (&items, wd) + free(wd->buf); + free(items.buf); + free(p); + + return true; +} + +int +hexdigits(rune ch) +{ + int n = 0; + do { + ch /= 16; + n++; + } while (ch != 0); + return n; +} diff --git a/test/_case-test.h b/test/_case-test.h index 121667a..12e3a57 100644 --- a/test/_case-test.h +++ b/test/_case-test.h @@ -52,39 +52,37 @@ main(int, char **argv) bool test(const char8_t *line, int id) { + struct u8view sv = {line, strlen(line)}; struct u8view before, after, flags; - before.p = line; - after.p = strchr(line, ';') + 1; - before.len = after.p - before.p - 1; - flags.p = strchr(after.p, ';') + 1; - after.len = flags.p - after.p - 1; - flags.len = strlen(flags.p); + before = u8split(&sv, ';'); + after = u8split(&sv, ';'); + flags = u8split(&sv, ';'); - enum caseflags cf = u8eq(U8_ARGS(flags), U8_ARGS(U8("ẞ"))) ? CF_ẞ - : u8eq(U8_ARGS(flags), U8_ARGS(U8("AZ"))) ? CF_LANG_AZ - : u8eq(U8_ARGS(flags), U8_ARGS(U8("LT"))) ? CF_LANG_LT - : u8eq(U8_ARGS(flags), U8_ARGS(U8("NL"))) ? CF_LANG_NL - : 0; + enum caseflags cf = u8eq(flags, U8("ẞ")) ? CF_ẞ + : u8eq(flags, U8("AZ")) ? CF_LANG_AZ + : u8eq(flags, U8("LT")) ? CF_LANG_LT + : u8eq(flags, U8("NL")) ? CF_LANG_NL + : 0; char8_t *buf = bufalloc(nullptr, 1, after.len); - size_t bufsz = FUNC(nullptr, 0, U8_ARGS(before), cf); + size_t bufsz = FUNC(nullptr, 0, before, cf); if (bufsz != after.len) { warn("case %d: expected %scased buffer size of %zu but got %zu " "(flags=‘%.*s’)", - id, STR(CASETYPE), after.len, bufsz, U8_PRI_ARGS(flags)); + id, STR(CASETYPE), after.len, bufsz, SV_PRI_ARGS(flags)); return false; } - bufsz = FUNC(buf, bufsz, U8_ARGS(before), cf); + bufsz = FUNC(buf, bufsz, before, cf); if (bufsz != after.len) { warn("case %d: expected %scased length of %zu but got %zu " "(flags=‘%.*s’)", - id, STR(CASETYPE), after.len, bufsz, U8_PRI_ARGS(flags)); + id, STR(CASETYPE), after.len, bufsz, SV_PRI_ARGS(flags)); return false; } if (!memeq(buf, after.p, bufsz)) { warn("case %d: expected ‘%.*s’ but got ‘%.*s’ (flags=‘%.*s’)", id, - U8_PRI_ARGS(after), (int)bufsz, buf, U8_PRI_ARGS(flags)); + SV_PRI_ARGS(after), (int)bufsz, buf, SV_PRI_ARGS(flags)); return false; } diff --git a/test/gbrk-test.c b/test/gbrk-test.c index cf91121..4e4a602 100644 --- a/test/gbrk-test.c +++ b/test/gbrk-test.c @@ -1,120 +1,2 @@ -#define _GNU_SOURCE -#include <stdio.h> -#include <stdlib.h> - -#include <alloc.h> -#include <dynarr.h> -#include <errors.h> -#include <macros.h> -#include <mbstring.h> -#include <rune.h> -#include <unicode/string.h> - -#define TESTFILE "gnext.in" - -static bool test(const char8_t *, size_t, int); -static int hexdigits(rune); - -int -main(int, char **argv) -{ - int rv; - size_t n; - ssize_t nr; - char *line; - FILE *fp; - - rv = EXIT_SUCCESS; - line = nullptr; - mlib_setprogname(argv[0]); - - if ((fp = fopen(TESTFILE, "r")) == nullptr) - err("fopen: %s:", TESTFILE); - - for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) { - if (line[nr - 1] == '\n') - line[--nr] = '\0'; - - if (!test(line, (size_t)nr, id)) - rv = EXIT_FAILURE; - } - if (ferror(fp)) - err("getline: %s:", TESTFILE); - - free(line); - fclose(fp); - return rv; -} - -bool -test(const char8_t *line, size_t n, int id) -{ - size_t total = 0; - const char8_t *line2 = line; - - typedef dynarr(char8_t) graph; - dynarr(graph) graphs = {}; - - do { - rune op, ch; - - u8next(&op, &line2, &n); - sscanf(line2, "%" SCNxRUNE, &ch); - int off = hexdigits(ch); - off = MAX(4, off); - line2 += off, n -= off; - - char8_t buf[U8_LEN_MAX] = {}; - int w = rtou8(buf, sizeof(buf), ch); - total += w; - - if (op == U'÷') - DAPUSH(&graphs, (graph){}); - DAEXTEND(&graphs.buf[graphs.len - 1], buf, w); - } while (n > 0); - - size_t off = 0; - char8_t *buf = bufalloc(nullptr, 1, total); - da_foreach (&graphs, g) { - memcpy(buf + off, g->buf, g->len); - off += g->len; - } - - /* Assert the grapheme count is correct */ - size_t graphs_got = u8gcnt(buf, total); - if (graphs_got != graphs.len) { - warn("case %d: expected %zu grapheme(s) but got %zu: ‘%s’", id, - graphs.len, graphs_got, line); - return false; - } - - /* Assert the individual graphemes are correct */ - struct u8view g; - const char8_t *buf_cpy = buf; - for (size_t i = 0; u8gnext(&g, &buf_cpy, &total); i++) { - graph g2 = graphs.buf[i]; - if (!u8eq(g.p, g.len, g2.buf, g2.len)) { - warn("case %d: expected grapheme ‘%.*s’ but got ‘%.*s’", id, - (int)g2.len, g2.buf, (int)g.len, g.p); - return false; - } - } - - da_foreach (&graphs, wd) - free(wd->buf); - free(graphs.buf); - free(buf); - - return true; -} - -int -hexdigits(rune ch) -{ - int n = 0; - do { - ch /= 16; - n++; - } while (ch != 0); - return n; -} +#define BRKTYPE g +#include "_brk-test.h" diff --git a/test/run-tests b/test/run-tests index f191c33..82d3e00 100755 --- a/test/run-tests +++ b/test/run-tests @@ -26,8 +26,8 @@ download 'auxiliary/WordBreakTest.txt' grep '^[^#]' data/LowercaseTest >lower.in grep '^[^#]' data/TitlecaseTest >title.in grep '^[^#]' data/UppercaseTest >upper.in -sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in -sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wnext.in +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gbrk.in +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wbrk.in for src in *.c do diff --git a/test/wbrk-test.c b/test/wbrk-test.c index 8425b5b..0690eae 100644 --- a/test/wbrk-test.c +++ b/test/wbrk-test.c @@ -1,120 +1,2 @@ -#define _GNU_SOURCE -#include <stdio.h> -#include <stdlib.h> - -#include <alloc.h> -#include <dynarr.h> -#include <errors.h> -#include <macros.h> -#include <mbstring.h> -#include <rune.h> -#include <unicode/string.h> - -#define TESTFILE "wnext.in" - -static bool test(const char8_t *, size_t, int); -static int hexdigits(rune); - -int -main(int, char **argv) -{ - int rv; - size_t n; - ssize_t nr; - char *line; - FILE *fp; - - rv = EXIT_SUCCESS; - line = nullptr; - mlib_setprogname(argv[0]); - - if ((fp = fopen(TESTFILE, "r")) == nullptr) - err("fopen: %s:", TESTFILE); - - for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) { - if (line[nr - 1] == '\n') - line[--nr] = '\0'; - - if (!test(line, (size_t)nr, id)) - rv = EXIT_FAILURE; - } - if (ferror(fp)) - err("getline: %s:", TESTFILE); - - free(line); - fclose(fp); - return rv; -} - -bool -test(const char8_t *line, size_t n, int id) -{ - size_t total = 0; - const char8_t *line2 = line; - - typedef dynarr(char8_t) word; - dynarr(word) words = {}; - - do { - rune op, ch; - - u8next(&op, &line2, &n); - sscanf(line2, "%" SCNxRUNE, &ch); - int off = hexdigits(ch); - off = MAX(4, off); - line2 += off, n -= off; - - char8_t buf[U8_LEN_MAX] = {}; - int w = rtou8(buf, sizeof(buf), ch); - total += w; - - if (op == U'÷') - DAPUSH(&words, (word){}); - DAEXTEND(&words.buf[words.len - 1], buf, w); - } while (n > 0); - - size_t off = 0; - char8_t *buf = bufalloc(nullptr, 1, total); - da_foreach (&words, wd) { - memcpy(buf + off, wd->buf, wd->len); - off += wd->len; - } - - /* Assert the word count is correct */ - size_t words_got = u8wcnt(buf, total); - if (words_got != words.len) { - warn("case %d: expected %zu word(s) but got %zu: ‘%s’", id, words.len, - words_got, line); - return false; - } - - /* Assert the individual words are correct */ - struct u8view wd; - const char8_t *buf_cpy = buf; - for (size_t i = 0; u8wnext(&wd, &buf_cpy, &total); i++) { - word wd2 = words.buf[i]; - if (!u8eq(wd.p, wd.len, wd2.buf, wd2.len)) { - warn("case %d: expected word ‘%.*s’ but got ‘%.*s’", id, - (int)wd2.len, wd2.buf, (int)wd.len, wd.p); - return false; - } - } - - da_foreach (&words, wd) - free(wd->buf); - free(words.buf); - free(buf); - - return true; -} - -int -hexdigits(rune ch) -{ - int n = 0; - do { - ch /= 16; - n++; - } while (ch != 0); - return n; -} +#define BRKTYPE w +#include "_brk-test.h" |