From d34cd53842098bd84d7d6aa775225c66854306da Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sun, 5 May 2024 12:59:50 +0200 Subject: Make the Unicode data more useful (include U+XXXX) --- .local/bin/uni | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to '.local/bin') diff --git a/.local/bin/uni b/.local/bin/uni index 66b9950..72f9b70 100755 --- a/.local/bin/uni +++ b/.local/bin/uni @@ -5,14 +5,17 @@ set -e NOTIFY_SHORT `basename $args[0] func setup { curl 'https://www.unicode.org/Public/UNIDATA/UnicodeData.txt' - | sed -E ' - s/;[^;]*//2g - s/\<(.)([A-Z]*)/\1\L\2/2g - /^[^;]*;" { $2 = $11 } + $2 !~ /(First|Last)>$/ { + while (match($2, /[A-Z]{2,}([^)]|$)/)) { + car = substr($2, RSTART, 1) + cdr = substr($2, RSTART + 1, RLENGTH - 1) + sub(/[A-Z]{2,}([^)]|$)/, car tolower(cdr), $2) + } + printf "\\u%s;U+%s %s\n", $1, $1, $2 + } ' >$DATA } -- cgit v1.2.3