aboutsummaryrefslogtreecommitdiff
path: root/test/gen-test-data
blob: 2f9e5a2f285a1a215464bc65599c44bcb521c1f7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/bin/sh

set -e

download()
{
	s="$(basename "$1" .txt)"
	test -f "data/$s" ||
		wget -q "https://www.unicode.org/Public/15.1.0/ucd/$1" -O "data/$s"
}

download 'auxiliary/GraphemeBreakTest.txt'
download 'auxiliary/WordBreakTest.txt'

grep '^[^#]'                         data/UppercaseTest     >upper.in
sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in
sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest     >wnext.in