#!/bin/sh set -e download() { s="$(basename "$1" .txt)" test -f "data/$s" || wget -q "https://www.unicode.org/Public/15.1.0/ucd/$1" -O "data/$s" } download 'auxiliary/GraphemeBreakTest.txt' download 'auxiliary/WordBreakTest.txt' grep '^[^#]' data/UppercaseTest >upper.in sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/GraphemeBreakTest >gnext.in sed -En 's/\s+//g; s/÷?#.*//g; /./p' data/WordBreakTest >wnext.in