diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-05-03 18:37:33 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-05-03 18:37:33 +0200 |
commit | 56539c1cae81fc95608be9170c1965346ce31c09 (patch) | |
tree | 23ce8f06ec10290aa8bda50208a9b3e6c063746e /test/gen-test-data | |
parent | 50093cd4fe4461c657e9133113016db1124904d9 (diff) |
Add tests for grapheme breaking
Diffstat (limited to 'test/gen-test-data')
-rwxr-xr-x | test/gen-test-data | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/test/gen-test-data b/test/gen-test-data index 12ee11e..b9932ae 100755 --- a/test/gen-test-data +++ b/test/gen-test-data @@ -4,8 +4,13 @@ set -e download() { - curl -sS "https://www.unicode.org/Public/15.1.0/ucd/$1" + s="$(basename "$1" .txt)" + test -f "data/$s" || + wget -q "https://www.unicode.org/Public/15.1.0/ucd/$1" -O "data/$s" } -download 'auxiliary/WordBreakTest.txt' \ -| sed -En 's/\s+//g; s/÷?#.*//g; /./p' >wnext.in +download 'auxiliary/GraphemeBreakTest.txt' +download 'auxiliary/WordBreakTest.txt' + +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wnext.in |