aboutsummaryrefslogtreecommitdiff
path: root/test/gen-test-data
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-03 18:37:33 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-03 18:37:33 +0200
commit56539c1cae81fc95608be9170c1965346ce31c09 (patch)
tree23ce8f06ec10290aa8bda50208a9b3e6c063746e /test/gen-test-data
parent50093cd4fe4461c657e9133113016db1124904d9 (diff)
Add tests for grapheme breaking
Diffstat (limited to 'test/gen-test-data')
-rwxr-xr-xtest/gen-test-data11
1 files changed, 8 insertions, 3 deletions
diff --git a/test/gen-test-data b/test/gen-test-data
index 12ee11e..b9932ae 100755
--- a/test/gen-test-data
+++ b/test/gen-test-data
@@ -4,8 +4,13 @@ set -e
download()
{
- curl -sS "https://www.unicode.org/Public/15.1.0/ucd/$1"
+ s="$(basename "$1" .txt)"
+ test -f "data/$s" ||
+ wget -q "https://www.unicode.org/Public/15.1.0/ucd/$1" -O "data/$s"
}
-download 'auxiliary/WordBreakTest.txt' \
-| sed -En 's/\s+//g; s/÷?#.*//g; /./p' >wnext.in
+download 'auxiliary/GraphemeBreakTest.txt'
+download 'auxiliary/WordBreakTest.txt'
+
+sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in
+sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wnext.in