blob: 2f9e5a2f285a1a215464bc65599c44bcb521c1f7 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
#!/bin/sh
set -e
download()
{
s="$(basename "$1" .txt)"
test -f "data/$s" ||
wget -q "https://www.unicode.org/Public/15.1.0/ucd/$1" -O "data/$s"
}
download 'auxiliary/GraphemeBreakTest.txt'
download 'auxiliary/WordBreakTest.txt'
grep '^[^#]' data/UppercaseTest >upper.in
sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in
sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wnext.in
|