aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-04 00:19:07 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-04 00:19:07 +0200
commit0453dab3f45494ab949748ccc720605d6a6e831a (patch)
tree2beb2924896ba29c1327a4aff670156a7aedff55
parent1b068cedf3c113f27121182b29e096ccb3e1b9c1 (diff)
Add tests for u8upper()
-rw-r--r--test/data/UppercaseTest23
-rwxr-xr-xtest/gen-test-data1
-rw-r--r--test/upper-test.c84
3 files changed, 108 insertions, 0 deletions
diff --git a/test/data/UppercaseTest b/test/data/UppercaseTest
new file mode 100644
index 0000000..76612ac
--- /dev/null
+++ b/test/data/UppercaseTest
@@ -0,0 +1,23 @@
+# Empty input
+;;
+
+# Latin alphabet
+Lorem ipsum dolor sit amet, consectetur adipiscing elit.;LOREM IPSUM DOLOR SIT AMET, CONSECTETUR ADIPISCING ELIT.;
+
+# Greek alphabet
+Δεν υπάρχει κανείς που να αγαπάει τον ίδιο τον πόνο;ΔΕΝ ΥΠΆΡΧΕΙ ΚΑΝΕΊΣ ΠΟΥ ΝΑ ΑΓΑΠΆΕΙ ΤΟΝ ΊΔΙΟ ΤΟΝ ΠΌΝΟ;
+
+# Cyrillic alphabet
+Слава Україні проти російської агресії!;СЛАВА УКРАЇНІ ПРОТИ РОСІЙСЬКОЇ АГРЕСІЇ!;
+
+# Capital ‘ß’ with CF_ẞ, and ‘SS’ otherwise
+Was ich nicht weiß, macht mich nicht heiß;WAS ICH NICHT WEISS, MACHT MICH NICHT HEISS;
+Was ich nicht weiß, macht mich nicht heiß;WAS ICH NICHT WEIẞ, MACHT MICH NICHT HEIẞ;ẞ
+
+# Uppercase ‘i’ to ‘İ’ in Azeri/Turkish
+istanbul’luyum;ISTANBUL’LUYUM;
+istanbul’luyum;İSTANBUL’LUYUM;AZ
+
+# Remove U+0307 COMBINING DOT ABOVE in Lithuanian
+ràsti, mèsti, ki̇̀lo;RÀSTI, MÈSTI, Kİ̀LO;
+ràsti, mèsti, ki̇̀lo;RÀSTI, MÈSTI, KÌLO;LT
diff --git a/test/gen-test-data b/test/gen-test-data
index b9932ae..2f9e5a2 100755
--- a/test/gen-test-data
+++ b/test/gen-test-data
@@ -12,5 +12,6 @@ download()
download 'auxiliary/GraphemeBreakTest.txt'
download 'auxiliary/WordBreakTest.txt'
+grep '^[^#]' data/UppercaseTest >upper.in
sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in
sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wnext.in
diff --git a/test/upper-test.c b/test/upper-test.c
new file mode 100644
index 0000000..6881e0c
--- /dev/null
+++ b/test/upper-test.c
@@ -0,0 +1,84 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <alloc.h>
+#include <errors.h>
+#include <macros.h>
+#include <mbstring.h>
+#include <unicode/string.h>
+
+#define TESTFILE "upper.in"
+
+static bool test(const char8_t *, int);
+
+int
+main(int, char **argv)
+{
+ int rv;
+ size_t n;
+ ssize_t nr;
+ char *line;
+ FILE *fp;
+
+ rv = EXIT_SUCCESS;
+ line = nullptr;
+ mlib_setprogname(argv[0]);
+
+ if ((fp = fopen(TESTFILE, "r")) == nullptr)
+ err("fopen: %s:", TESTFILE);
+
+ for (int id = 1; (nr = getline(&line, &n, fp)) > 0; id++) {
+ if (line[nr - 1] == '\n')
+ line[--nr] = '\0';
+
+ if (!test(line, id))
+ rv = EXIT_FAILURE;
+ }
+ if (ferror(fp))
+ err("getline: %s:", TESTFILE);
+
+ free(line);
+ fclose(fp);
+ return rv;
+}
+
+bool
+test(const char8_t *line, int id)
+{
+ struct u8view before, after, flags;
+ before.p = line;
+ after.p = strchr(line, ';') + 1;
+ before.len = after.p - before.p - 1;
+ flags.p = strchr(after.p, ';') + 1;
+ after.len = flags.p - after.p - 1;
+ flags.len = strlen(flags.p);
+
+ enum caseflags cf = 0;
+ if (u8eq(U8_ARGS(flags), U8_ARGS(U8("ẞ"))))
+ cf |= CF_ẞ;
+ else if (u8eq(U8_ARGS(flags), U8_ARGS(U8("AZ"))))
+ cf |= CF_LANG_AZ;
+ else if (u8eq(U8_ARGS(flags), U8_ARGS(U8("LT"))))
+ cf |= CF_LANG_LT;
+
+ char8_t *buf = bufalloc(nullptr, 1, after.len);
+ size_t ret = u8upper(buf, after.len, U8_ARGS(before), cf);
+
+ if (ret != after.len) {
+ warn("case %d: expected uppercased length of %zu but got %zu "
+ "(flags=‘%.*s’)",
+ id, after.len, ret, U8_PRI_ARGS(flags));
+ return false;
+ }
+
+ if (!memeq(buf, after.p, after.len)) {
+ warn("case %d: expected ‘%.*s’ but got ‘%.*s’ (flags=‘%.*s’)", id,
+ U8_PRI_ARGS(after), (int)after.len, buf, U8_PRI_ARGS(flags));
+ return false;
+ }
+
+ free(buf);
+ return true;
+}