summaryrefslogtreecommitdiff
path: root/.local/bin
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-05 12:59:50 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-05 12:59:50 +0200
commitd34cd53842098bd84d7d6aa775225c66854306da (patch)
treebdd9fdb12ed378b51d52d47817a4b909da2fba65 /.local/bin
parent55b4077aff0cc27cca00aa9b3309abf67185296c (diff)
Make the Unicode data more useful (include U+XXXX)
Diffstat (limited to '.local/bin')
-rwxr-xr-x.local/bin/uni19
1 files changed, 11 insertions, 8 deletions
diff --git a/.local/bin/uni b/.local/bin/uni
index 66b9950..72f9b70 100755
--- a/.local/bin/uni
+++ b/.local/bin/uni
@@ -5,14 +5,17 @@ set -e NOTIFY_SHORT `basename $args[0]
func setup {
curl 'https://www.unicode.org/Public/UNIDATA/UnicodeData.txt'
- | sed -E '
- s/;[^;]*//2g
- s/\<(.)([A-Z]*)/\1\L\2/2g
- /^[^;]*;</d
- /Compatibility/d
- /Variation Selector/d
- s/[^;]*/\\u&/
- s/^\\u([^;]{5})/\\U000\1/
+ | awk '
+ BEGIN { FS = ";" }
+ $2 == "<control>" { $2 = $11 }
+ $2 !~ /(First|Last)>$/ {
+ while (match($2, /[A-Z]{2,}([^)]|$)/)) {
+ car = substr($2, RSTART, 1)
+ cdr = substr($2, RSTART + 1, RLENGTH - 1)
+ sub(/[A-Z]{2,}([^)]|$)/, car tolower(cdr), $2)
+ }
+ printf "\\u%s;U+%s %s\n", $1, $1, $2
+ }
' >$DATA
}