From 0453dab3f45494ab949748ccc720605d6a6e831a Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sat, 4 May 2024 00:19:07 +0200 Subject: Add tests for u8upper() --- test/data/UppercaseTest | 23 ++++++++++++++ test/gen-test-data | 1 + test/upper-test.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 test/data/UppercaseTest create mode 100644 test/upper-test.c diff --git a/test/data/UppercaseTest b/test/data/UppercaseTest new file mode 100644 index 0000000..76612ac --- /dev/null +++ b/test/data/UppercaseTest @@ -0,0 +1,23 @@ +# Empty input +;; + +# Latin alphabet +Lorem ipsum dolor sit amet, consectetur adipiscing elit.;LOREM IPSUM DOLOR SIT AMET, CONSECTETUR ADIPISCING ELIT.; + +# Greek alphabet +Δεν υπάρχει κανείς που να αγαπάει τον ίδιο τον πόνο;ΔΕΝ ΥΠΆΡΧΕΙ ΚΑΝΕΊΣ ΠΟΥ ΝΑ ΑΓΑΠΆΕΙ ΤΟΝ ΊΔΙΟ ΤΟΝ ΠΌΝΟ; + +# Cyrillic alphabet +Слава Україні проти російської агресії!;СЛАВА УКРАЇНІ ПРОТИ РОСІЙСЬКОЇ АГРЕСІЇ!; + +# Capital ‘ß’ with CF_ẞ, and ‘SS’ otherwise +Was ich nicht weiß, macht mich nicht heiß;WAS ICH NICHT WEISS, MACHT MICH NICHT HEISS; +Was ich nicht weiß, macht mich nicht heiß;WAS ICH NICHT WEIẞ, MACHT MICH NICHT HEIẞ;ẞ + +# Uppercase ‘i’ to ‘İ’ in Azeri/Turkish +istanbul’luyum;ISTANBUL’LUYUM; +istanbul’luyum;İSTANBUL’LUYUM;AZ + +# Remove U+0307 COMBINING DOT ABOVE in Lithuanian +ràsti, mèsti, ki̇̀lo;RÀSTI, MÈSTI, Kİ̀LO; +ràsti, mèsti, ki̇̀lo;RÀSTI, MÈSTI, KÌLO;LT diff --git a/test/gen-test-data b/test/gen-test-data index b9932ae..2f9e5a2 100755 --- a/test/gen-test-data +++ b/test/gen-test-data @@ -12,5 +12,6 @@ download() download 'auxiliary/GraphemeBreakTest.txt' download 'auxiliary/WordBreakTest.txt' +grep '^[^#]' data/UppercaseTest >upper.in sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wnext.in diff --git a/test/upper-test.c b/test/upper-test.c new file mode 100644 index 0000000..6881e0c --- /dev/null +++ b/test/upper-test.c @@ -0,0 +1,84 @@ +#define _GNU_SOURCE +#include +#include +#include + +#include +#include +#include +#include +#include + +#define TESTFILE "upper.in" + +static bool test(const char8_t *, int); + +int +main(int, char **argv) +{ + int rv; + size_t n; + ssize_t nr; + char *line; + FILE *fp; + + rv = EXIT_SUCCESS; + line = nullptr; + mlib_setprogname(argv[0]); + + if ((fp = fopen(TESTFILE, "r")) == nullptr) + err("fopen: %s:", TESTFILE); + + for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) { + if (line[nr - 1] == '\n') + line[--nr] = '\0'; + + if (!test(line, id)) + rv = EXIT_FAILURE; + } + if (ferror(fp)) + err("getline: %s:", TESTFILE); + + free(line); + fclose(fp); + return rv; +} + +bool +test(const char8_t *line, int id) +{ + struct u8view before, after, flags; + before.p = line; + after.p = strchr(line, ';') + 1; + before.len = after.p - before.p - 1; + flags.p = strchr(after.p, ';') + 1; + after.len = flags.p - after.p - 1; + flags.len = strlen(flags.p); + + enum caseflags cf = 0; + if (u8eq(U8_ARGS(flags), U8_ARGS(U8("ẞ")))) + cf |= CF_ẞ; + else if (u8eq(U8_ARGS(flags), U8_ARGS(U8("AZ")))) + cf |= CF_LANG_AZ; + else if (u8eq(U8_ARGS(flags), U8_ARGS(U8("LT")))) + cf |= CF_LANG_LT; + + char8_t *buf = bufalloc(nullptr, 1, after.len); + size_t ret = u8upper(buf, after.len, U8_ARGS(before), cf); + + if (ret != after.len) { + warn("case %d: expected uppercased length of %zu but got %zu " + "(flags=‘%.*s’)", + id, after.len, ret, U8_PRI_ARGS(flags)); + return false; + } + + if (!memeq(buf, after.p, after.len)) { + warn("case %d: expected ‘%.*s’ but got ‘%.*s’ (flags=‘%.*s’)", id, + U8_PRI_ARGS(after), (int)after.len, buf, U8_PRI_ARGS(flags)); + return false; + } + + free(buf); + return true; +} -- cgit v1.2.3