diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-10-30 01:51:14 +0100 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-10-30 01:51:14 +0100 |
commit | 042e43247f396a9000fead59d9bff87bf12806d6 (patch) | |
tree | e902784464cbe9ce3c5114d513b016523e7e4b29 /vendor/librune/gen | |
parent | 170b8a92434233241c990c3e9432786de3262bcd (diff) |
Completely revamp the grab source code
Some of the (many) few changes are:
- Multithreading for significantly faster performance
- The -p/--predicate flag
- Byte offsets as the default
- No customizable colors (maybe this will come back later)
- Newer edition of mlib (formerly librune)
Diffstat (limited to 'vendor/librune/gen')
-rwxr-xr-x | vendor/librune/gen/gbrk | 114 | ||||
-rwxr-xr-x | vendor/librune/gen/mkfile | 37 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-cat | 104 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-prop | 116 | ||||
-rw-r--r-- | vendor/librune/gen/rtype-prop.awk | 61 |
5 files changed, 0 insertions, 432 deletions
diff --git a/vendor/librune/gen/gbrk b/vendor/librune/gen/gbrk deleted file mode 100755 index 1146327..0000000 --- a/vendor/librune/gen/gbrk +++ /dev/null @@ -1,114 +0,0 @@ -#!/bin/sh - -cache() -{ - name="/tmp/librune/gbrk/$(basename "$1")" - if test ! -f "$name" - then - mkdir -p /tmp/librune/gbrk - wget -q "$1" -O "$name" - fi -} - -set -e -cd "${0%/*}/.." -exec >include/internal/gbrk_lookup.h - -readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt' -readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt' -readonly URL3='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt' - -cache "$URL1" & -cache "$URL2" & -cache "$URL3" & -wait - -cat <<C -/* This file is autogenerated by gen/gbrk; DO NOT EDIT. */ - -#ifndef RUNE_INTERNAL_GBRK_LOOKUP_H -#define RUNE_INTERNAL_GBRK_LOOKUP_H - -/* IWYU pragma: private */ -/* clang-format off */ - -#include "types.h" - -typedef enum { - GBP_OTHER = 0, - - GBP_CTRL = 1 << 0, /* Control */ - GBP_EXT = 1 << 1, /* Extend */ - GBP_PIC = 1 << 2, /* Extended_Pictographic */ - GBP_PREP = 1 << 3, /* Prepend */ - GBP_RI = 1 << 4, /* Regional_Indicator */ - GBP_SM = 1 << 5, /* SpacingMark */ - GBP_ZWJ = 1 << 6, /* ZWJ */ - - GBP_HNGL_L = 1 << 7, /* Hangul L */ - GBP_HNGL_LV = 1 << 8, /* Hangul LV */ - GBP_HNGL_LVT = 1 << 9, /* Hangul LVT */ - GBP_HNGL_T = 1 << 10, /* Hangul T */ - GBP_HNGL_V = 1 << 11, /* Hangul V */ - - GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */ - GBP_INDC_EXT = 1 << 13, /* Indic Extend */ - GBP_INDC_LNK = 1 << 14, /* Indic Linker */ -} gbrk_prop; - -static const struct { - rune lo, hi; - gbrk_prop prop; -} gbrk_prop_tbl[] = { -C - -gawk ' -BEGIN { - FS = "( *#.*| +; +)" - map["Control"] = "CTRL" - map["Extend"] = "EXT" - map["Extended_Pictographic"] = "PIC" - map["Prepend"] = "PREP" - map["Regional_Indicator"] = "RI" - map["SpacingMark"] = "SM" - map["ZWJ"] = "ZWJ" - - map["L"] = "HNGL_L" - map["LV"] = "HNGL_LV" - map["LVT"] = "HNGL_LVT" - map["T"] = "HNGL_T" - map["V"] = "HNGL_V" - - map["InCB; Consonant"] = "INDC_CNSNT" - map["InCB; Extend"] = "INDC_EXT" - map["InCB; Linker"] = "INDC_LNK" -} - -map[$2] { - n = split($1, a, /\.\./) - lo = strtonum("0X" a[1]) - hi = strtonum("0X" a[n]) - - for (i = lo; i <= hi; i++) { - s = "GBP_" map[$2] - props[i] = props[i] ? props[i] " | " s : s - } -} - -END { - for (i = 0; i <= 0x10FFFF; i++) { - if (!props[i]) - continue - lo = i - while (props[lo] == props[i + 1]) - i++ - printf "\t{0x%06X, 0x%06X, %s},\n", lo, i, props[lo] - } -} -' /tmp/librune/gbrk/* | sort - -cat <<C -}; - -#endif /* !RUNE_INTERNAL_GBRK_LOOKUP_H */ -C diff --git a/vendor/librune/gen/mkfile b/vendor/librune/gen/mkfile deleted file mode 100755 index a55b08a..0000000 --- a/vendor/librune/gen/mkfile +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh - -set -e -cd "${0%/*}/.." -exec >Makefile - -cat <<make -.POSIX: - -CC = cc -CFLAGS = -Wall -Wextra -Wpedantic -g -ggdb3 -fanalyzer -Iinclude - -make - -printf 'objs = \\\n' -find lib -name '*.c' | sort | sed ' - s/c$/o/ - s/.*/\t& \\/ - $s/ \\$// -' - -cat <<make - -all: \$(objs) - -make - -find lib -name '*.c' | sort | sed -E 's/(.*)c$/\1o: \1c/' - -cat <<make - -clean: - find lib -name '*.o' -delete - -ls: - @echo "alias ls='ls --color=auto -I \"*.o\" -I \"compile_commands.json\"'" >&2 -make diff --git a/vendor/librune/gen/rtype-cat b/vendor/librune/gen/rtype-cat deleted file mode 100755 index e35fb77..0000000 --- a/vendor/librune/gen/rtype-cat +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/sh - -cache() -{ - name="/tmp/librune/rtype/$(basename "$1")" - if test ! -f "$name" - then - mkdir -p /tmp/librune/rtype - wget -q "$1" -O "$name" - fi -} - -set -e -cd "${0%/*}/.." -exec >include/internal/rtype/cat.h - -readonly URL='https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt' -cache "$URL" - -cat <<C -/* This file is autogenerated by gen/rtype-cat; DO NOT EDIT. */ - -#ifndef RUNE_INTERNAL_RTYPE_CAT_H -#define RUNE_INTERNAL_RTYPE_CAT_H - -/* IWYU pragma: private */ -/* clang-format off */ - -#include "../types.h" -#include "../../rtype.h" - -static const enum unicat rtype_cat_lat1_tbl[] = { -C - -gawk ' -BEGIN { - FS = ";" -} - -{ - s = "UC_" toupper($3) - lo = strtonum("0X" $1) - - if ($2 ~ /First/) { - getline - hi = strtonum("0X" $1) - } else - hi = lo - - for (i = lo; i <= hi; i++) - props[i] = s -} - -END { - for (i = 0; i <= 0xFF; i++) - print props[i] "," -} -' /tmp/librune/rtype/* | paste -d' ' - - - - - - - - | sed 's/^/\t/' - -cat <<C -}; - -static const struct { - rune lo, hi; - enum unicat val; -} rtype_cat_tbl[] = { -C - -gawk ' -BEGIN { - FS = ";" -} - -{ - s = "UC_" toupper($3) - lo = strtonum("0X" $1) - - if ($2 ~ /First/) { - getline - hi = strtonum("0X" $1) - } else - hi = lo - - for (i = lo; i <= hi; i++) - props[i] = s -} - -END { - for (i = 0x100; i <= 0x10FFFF; i++) { - if (!props[i]) - continue - lo = i - while (props[lo] == props[i + 1]) - i++ - printf "\t{0x%06X, 0x%06X, %s},\n", lo, i, props[lo] - } -} -' /tmp/librune/rtype/* | sort - -cat <<C -}; - -#endif /* !RUNE_INTERNAL_RTYPE_CAT_H */ -C diff --git a/vendor/librune/gen/rtype-prop b/vendor/librune/gen/rtype-prop deleted file mode 100755 index 4c62884..0000000 --- a/vendor/librune/gen/rtype-prop +++ /dev/null @@ -1,116 +0,0 @@ -#!/bin/sh - -cache() -{ - name="/tmp/librune/rtype/$(basename "$1")" - if test ! -f "$name" - then - mkdir -p /tmp/librune/rtype - wget -q "$1" -O "$name" - fi -} - -cd "${0%/*}/.." - -readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt' -readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt' - -cache "$URL1" & -cache "$URL2" & -wait - -props1=' -ASCII_Hex_Digit -Bidi_Control -Dash -Deprecated -Diacritic -Extender -Hex_Digit -ID_Compat_Math_Continue -ID_Compat_Math_Start -Ideographic -IDS_Binary_Operator -IDS_Trinary_Operator -IDS_Unary_Operator -Join_Control -Logical_Order_Exception -Noncharacter_Code_Point -Pattern_Syntax -Pattern_White_Space -Prepended_Concatenation_Mark -Quotation_Mark -Radical -Regional_Indicator -Sentence_Terminal -Soft_Dotted -Terminal_Punctuation -Unified_Ideograph -Variation_Selector -White_Space -' - -props2=' -Alphabetic -Cased -Case_Ignorable -Changes_When_Casefolded -Changes_When_Casemapped -Changes_When_Lowercased -Changes_When_Titlecased -Changes_When_Uppercased -Default_Ignorable_Code_Point -Grapheme_Base -Grapheme_Extend -ID_Continue -ID_Start -Indic_Conjunct_Break -Lowercase -Math -Uppercase -XID_Continue -XID_Start -' -# InCB - -{ - for prop in $props1 - do - lprop=$(echo $prop | tr A-Z a-z) - - printf 'rune_has_prop_%s…' $lprop >&2 - - gawk -M -v prop=$prop -f gen/rtype-prop.awk \ - /tmp/librune/rtype/PropList.txt \ - >lib/rtype/rune_has_prop_$lprop.c - - echo "[[unsequenced]] bool rune_has_prop_$lprop(rune);" - echo ' DONE' >&2 - done - - for prop in $props2 - do - lprop=$(echo $prop | tr A-Z a-z) - - printf 'rune_has_prop_%s…' $lprop >&2 - - gawk -M -v prop=$prop -f gen/rtype-prop.awk \ - /tmp/librune/rtype/DerivedCoreProperties.txt \ - >lib/rtype/rune_has_prop_$lprop.c - - echo "[[unsequenced]] bool rune_has_prop_$lprop(rune);" - echo ' DONE' >&2 - done -} | gawk ' - /PROP PREDICATES END/ { no = 0 } - FILENAME != "-" && !no { print } - FILENAME == "-" { funcs[++i] = $0 } - - /PROP PREDICATES START/ { - no = 1 - asort(funcs) - for (i = 1; i <= length(funcs); i++) - print funcs[i] - } -' - include/rtype.h \ -| sponge include/rtype.h diff --git a/vendor/librune/gen/rtype-prop.awk b/vendor/librune/gen/rtype-prop.awk deleted file mode 100644 index 59b4a99..0000000 --- a/vendor/librune/gen/rtype-prop.awk +++ /dev/null @@ -1,61 +0,0 @@ -BEGIN { - FS = "( *#.*| +; +)" - - print "/* This file is autogenerated by gen/rtype-prop; DO NOT EDIT. */" - print "" - print "#include \"rtype.h\"" - print "" - print "#include \"internal/common.h\"" - print "" -} - -$2 == prop || (prop == "Indic_Conjunct_Break" && $2 ~ /InCB;/) { - n = split($1, a, /\.\./) - lo = strtonum("0x" a[1]) - hi = strtonum("0x" a[n]) - - for (i = lo; i <= hi; i++) - xs[i] = 1 -} - -END { - for (i = 0; i <= 0xFF; i++) { - if (xs[i]) - mask = or(mask, lshift(1, i)) - } - print "#if BIT_LOOKUP" - printf "static const unsigned _BitInt(LATIN1_MAX + 1) mask = 0x%Xuwb;\n", \ - mask - print "#endif" - print "" - print "static const struct {" - print "\trune lo, hi;" - print "} lookup_tbl[] = {" - - for (i = 0; i <= 0x10FFFF; i++) { - if (!xs[i]) - continue - lo = i - while (xs[i + 1]) - i++ - printf "\t{0x%06X, 0x%06X},\n", lo, i - } - - print "};" - print "" - print "#define TYPE bool" - print "#define TABLE lookup_tbl" - print "#define DEFAULT false" - print "#define HAS_VALUE 0" - print "#include \"internal/rtype/lookup-func.h\"" - print "" - print "bool" - printf "rune_has_prop_%s(rune ch)\n", tolower(prop) - print "{" - print "\treturn" - print "#if BIT_LOOKUP" - print "\t\tch <= LATIN1_MAX ? (mask & ch) :" - print "#endif" - print "\t\tlookup(ch);" - print "}" -} |