blob: 50028b8adbe1c17d9cea49c79c1ea99782fff12b (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
#!/bin/sh
set -e
download()
{
s="$(basename "$1" .txt)"
test -f "data/$s" ||
wget -q "https://www.unicode.org/Public/15.1.0/ucd/$1" -O "data/$s"
}
download 'auxiliary/GraphemeBreakTest.txt'
download 'auxiliary/WordBreakTest.txt'
grep '^[^#]' data/LowercaseTest >lower.in
grep '^[^#]' data/UppercaseTest >upper.in
sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in
sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wnext.in
|