From 56539c1cae81fc95608be9170c1965346ce31c09 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Fri, 3 May 2024 18:37:33 +0200 Subject: Add tests for grapheme breaking --- test/gen-test-data | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'test/gen-test-data') diff --git a/test/gen-test-data b/test/gen-test-data index 12ee11e..b9932ae 100755 --- a/test/gen-test-data +++ b/test/gen-test-data @@ -4,8 +4,13 @@ set -e download() { - curl -sS "https://www.unicode.org/Public/15.1.0/ucd/$1" + s="$(basename "$1" .txt)" + test -f "data/$s" || + wget -q "https://www.unicode.org/Public/15.1.0/ucd/$1" -O "data/$s" } -download 'auxiliary/WordBreakTest.txt' \ -| sed -En 's/\s+//g; s/÷?#.*//g; /./p' >wnext.in +download 'auxiliary/GraphemeBreakTest.txt' +download 'auxiliary/WordBreakTest.txt' + +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in +sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wnext.in -- cgit v1.2.3