From 042e43247f396a9000fead59d9bff87bf12806d6 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Wed, 30 Oct 2024 01:51:14 +0100 Subject: Completely revamp the grab source code Some of the (many) few changes are: - Multithreading for significantly faster performance - The -p/--predicate flag - Byte offsets as the default - No customizable colors (maybe this will come back later) - Newer edition of mlib (formerly librune) --- vendor/librune/lib/utf8/rtou8.c | 38 --------------- vendor/librune/lib/utf8/u8bspn.c | 22 --------- vendor/librune/lib/utf8/u8cbspn.c | 20 -------- vendor/librune/lib/utf8/u8chk.c | 20 -------- vendor/librune/lib/utf8/u8chkr.c | 9 ---- vendor/librune/lib/utf8/u8chr.c | 97 -------------------------------------- vendor/librune/lib/utf8/u8cspn.c | 20 -------- vendor/librune/lib/utf8/u8len.c | 13 ----- vendor/librune/lib/utf8/u8next.c | 16 ------- vendor/librune/lib/utf8/u8prev.c | 40 ---------------- vendor/librune/lib/utf8/u8rchr.c | 87 ---------------------------------- vendor/librune/lib/utf8/u8set.c | 24 ---------- vendor/librune/lib/utf8/u8spn.c | 22 --------- vendor/librune/lib/utf8/u8tor.c | 31 ------------ vendor/librune/lib/utf8/u8tor_uc.c | 26 ---------- vendor/librune/lib/utf8/u8wdth.c | 13 ----- 16 files changed, 498 deletions(-) delete mode 100644 vendor/librune/lib/utf8/rtou8.c delete mode 100644 vendor/librune/lib/utf8/u8bspn.c delete mode 100644 vendor/librune/lib/utf8/u8cbspn.c delete mode 100644 vendor/librune/lib/utf8/u8chk.c delete mode 100644 vendor/librune/lib/utf8/u8chkr.c delete mode 100644 vendor/librune/lib/utf8/u8chr.c delete mode 100644 vendor/librune/lib/utf8/u8cspn.c delete mode 100644 vendor/librune/lib/utf8/u8len.c delete mode 100644 vendor/librune/lib/utf8/u8next.c delete mode 100644 vendor/librune/lib/utf8/u8prev.c delete mode 100644 vendor/librune/lib/utf8/u8rchr.c delete mode 100644 vendor/librune/lib/utf8/u8set.c delete mode 100644 vendor/librune/lib/utf8/u8spn.c delete mode 100644 vendor/librune/lib/utf8/u8tor.c delete mode 100644 vendor/librune/lib/utf8/u8tor_uc.c delete mode 100644 vendor/librune/lib/utf8/u8wdth.c (limited to 'vendor/librune/lib/utf8') diff --git a/vendor/librune/lib/utf8/rtou8.c b/vendor/librune/lib/utf8/rtou8.c deleted file mode 100644 index 94cce34..0000000 --- a/vendor/librune/lib/utf8/rtou8.c +++ /dev/null @@ -1,38 +0,0 @@ -#include - -#include "utf8.h" - -#include "internal/common.h" - -int -rtou8(char8_t *s, rune ch, size_t n) -{ - if (ch <= _1B_MAX) { - if (n >= 1) - s[0] = ch; - return 1; - } else if (ch <= _2B_MAX) { - if (n >= 2) { - s[0] = (ch >> 6) | 0xC0; - s[1] = (ch & 0x3F) | 0x80; - } - return 2; - } else if (ch <= _3B_MAX) { - if (n >= 3) { - s[0] = (ch >> 12) | 0xE0; - s[1] = ((ch >> 6) & 0x3F) | 0x80; - s[2] = (ch & 0x3F) | 0x80; - } - return 3; - } else if (ch <= _4B_MAX) { - if (n >= 4) { - s[0] = (ch >> 18) | 0xF0; - s[1] = ((ch >> 12) & 0x3F) | 0x80; - s[2] = ((ch >> 6) & 0x3F) | 0x80; - s[3] = (ch & 0x3F) | 0x80; - } - return 4; - } - - unreachable(); -} diff --git a/vendor/librune/lib/utf8/u8bspn.c b/vendor/librune/lib/utf8/u8bspn.c deleted file mode 100644 index 3ccd469..0000000 --- a/vendor/librune/lib/utf8/u8bspn.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "utf8.h" - -size_t -u8bspn(const char8_t *s, size_t n, const rune *p, size_t m) -{ - rune ch; - size_t k = 0; - - while (u8next(&ch, &s, &n)) { - for (size_t i = 0; i < m; i++) { - if (p[i] == ch) { - k += u8wdth(ch); - goto found; - } - } - - break; -found:; - } - - return k; -} diff --git a/vendor/librune/lib/utf8/u8cbspn.c b/vendor/librune/lib/utf8/u8cbspn.c deleted file mode 100644 index b51c300..0000000 --- a/vendor/librune/lib/utf8/u8cbspn.c +++ /dev/null @@ -1,20 +0,0 @@ -#include "utf8.h" - -size_t -u8cbspn(const char8_t *s, size_t n, const rune *p, size_t m) -{ - rune ch; - size_t k = 0; - - while (u8next(&ch, &s, &n)) { - for (size_t i = 0; i < m; i++) { - if (p[i] == ch) - goto found; - } - - k += u8wdth(ch); - } - -found: - return k; -} diff --git a/vendor/librune/lib/utf8/u8chk.c b/vendor/librune/lib/utf8/u8chk.c deleted file mode 100644 index 4fd1afc..0000000 --- a/vendor/librune/lib/utf8/u8chk.c +++ /dev/null @@ -1,20 +0,0 @@ -#include "rune.h" -#define _RUNE_NO_MACRO_WRAPPER 1 -#include "utf8.h" - -#include "internal/common.h" - -char8_t * -u8chk(const char8_t *s, size_t n) -{ - while (n) { - rune ch; - int m = u8tor(&ch, s); - - if (ch == RUNE_ERROR) - return (char8_t *)s; - n -= m; - } - - return nullptr; -} diff --git a/vendor/librune/lib/utf8/u8chkr.c b/vendor/librune/lib/utf8/u8chkr.c deleted file mode 100644 index 4510f16..0000000 --- a/vendor/librune/lib/utf8/u8chkr.c +++ /dev/null @@ -1,9 +0,0 @@ -#include "rune.h" -#include "utf8.h" - -bool -u8chkr(rune ch) -{ - return !((ch >= 0xD800 && ch <= 0xDFFF) || ch == 0xFFFE || ch == 0xFFFF - || ch > RUNE_MAX); -} diff --git a/vendor/librune/lib/utf8/u8chr.c b/vendor/librune/lib/utf8/u8chr.c deleted file mode 100644 index c387300..0000000 --- a/vendor/librune/lib/utf8/u8chr.c +++ /dev/null @@ -1,97 +0,0 @@ -#include -#include -#include - -#define _RUNE_NO_MACRO_WRAPPER 1 -#include "utf8.h" - -/* NOTE: The memmem*() functions were taken directly from the memmem() - implementation on OpenBSD. As a result, these functions are licensed under - OpenBSDs 2-Clause BSD License instead of this libraries 0-Clause BSD License. - - The license for these functions is as follows: - - Copyright © 2005–2020 Rich Felker, et al. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - “Software”), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -static char8_t * -memmem2(const char8_t *h, size_t k, const char8_t *n) -{ - uint16_t hw, nw; - hw = h[0] << 8 | h[1]; - nw = n[0] << 8 | n[1]; - - for (h += 2, k -= 2; k; k--, hw = hw << 8 | *h++) { - if (hw == nw) - return (char8_t *)h - 2; - } - return hw == nw ? (char8_t *)h - 2 : nullptr; -} - -static char8_t * -memmem3(const char8_t *h, size_t k, const char8_t *n) -{ - uint32_t hw, nw; - hw = h[0] << 24 | h[1] << 16 | h[2] << 8; - nw = n[0] << 24 | n[1] << 16 | n[2] << 8; - - for (h += 3, k -= 3; k; k--, hw = (hw | *h++) << 8) { - if (hw == nw) - return (char8_t *)h - 3; - } - return hw == nw ? (char8_t *)h - 3 : nullptr; -} - -static char8_t * -memmem4(const char8_t *h, size_t k, const char8_t *n) -{ - uint32_t hw, nw; - hw = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3]; - nw = n[0] << 24 | n[1] << 16 | n[2] << 8 | n[3]; - - for (h += 4, k -= 4; k; k--, hw = hw << 8 | *h++) { - if (hw == nw) - return (char8_t *)h - 4; - } - return hw == nw ? (char8_t *)h - 4 : nullptr; -} - -char8_t * -u8chr(const char8_t *s, rune ch, size_t n) -{ - char8_t buf[U8_LEN_MAX]; - int m = rtou8(buf, ch, sizeof(buf)); - - if (n < (size_t)m) - return nullptr; - switch (m) { - case 1: - return memchr(s, ch, n); - case 2: - return memmem2(s, n, buf); - case 3: - return memmem3(s, n, buf); - case 4: - return memmem4(s, n, buf); - } - - unreachable(); -} diff --git a/vendor/librune/lib/utf8/u8cspn.c b/vendor/librune/lib/utf8/u8cspn.c deleted file mode 100644 index 7d46a0b..0000000 --- a/vendor/librune/lib/utf8/u8cspn.c +++ /dev/null @@ -1,20 +0,0 @@ -#include "utf8.h" - -size_t -u8cspn(const char8_t *s, size_t n, const rune *p, size_t m) -{ - rune ch; - size_t k = 0; - - while (u8next(&ch, &s, &n)) { - for (size_t i = 0; i < m; i++) { - if (p[i] == ch) - goto found; - } - - k++; - } - -found: - return k; -} diff --git a/vendor/librune/lib/utf8/u8len.c b/vendor/librune/lib/utf8/u8len.c deleted file mode 100644 index fc66ee7..0000000 --- a/vendor/librune/lib/utf8/u8len.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "utf8.h" - -size_t -u8len(const char8_t *s, size_t n) -{ - rune unused; - size_t m = 0; - - while (u8next(&unused, &s, &n)) - m++; - - return m; -} diff --git a/vendor/librune/lib/utf8/u8next.c b/vendor/librune/lib/utf8/u8next.c deleted file mode 100644 index 12c521d..0000000 --- a/vendor/librune/lib/utf8/u8next.c +++ /dev/null @@ -1,16 +0,0 @@ -#define _RUNE_NO_MACRO_WRAPPER 1 -#include "utf8.h" - -int -u8next(rune *ch, const char8_t **s, size_t *n) -{ - int m = 0; - - if (*n) { - m = u8tor_uc(ch, *s); - *n -= m; - *s += m; - } - - return m; -} diff --git a/vendor/librune/lib/utf8/u8prev.c b/vendor/librune/lib/utf8/u8prev.c deleted file mode 100644 index a219ae9..0000000 --- a/vendor/librune/lib/utf8/u8prev.c +++ /dev/null @@ -1,40 +0,0 @@ -#define _RUNE_NO_MACRO_WRAPPER 1 -#include "rune.h" -#include "utf8.h" - -#include "internal/common.h" - -int -u8prev(rune *ch, const char8_t **p, const char8_t *start) -{ - int off; - bool match = true; - const char8_t *s = *p; - ptrdiff_t d = s - start; - - if (d <= 0) { - return 0; - } else if (U1(s[-1])) { - *ch = s[-1]; - off = 1; - } else if (d > 1 && UC(s[-1]) && U2(s[-2])) { - *ch = ((s[-2] & 0x1F) << 6) | (s[-1] & 0x3F); - off = 2; - } else if (d > 2 && UC(s[-1]) && UC(s[-2]) && U3(s[-3])) { - *ch = ((s[-3] & 0x0F) << 12) | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F); - off = 3; - } else if (d > 3 && UC(s[-1]) && UC(s[-2]) && UC(s[-3]) && U4(s[-4])) { - *ch = ((s[-4] & 0x07) << 18) | ((s[-3] & 0x3F) << 12) - | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F); - off = 4; - } else - match = false; - - if (!(match && u8chkr(*ch))) { - *ch = RUNE_ERROR; - off = 1; - } - - *p -= off; - return off; -} diff --git a/vendor/librune/lib/utf8/u8rchr.c b/vendor/librune/lib/utf8/u8rchr.c deleted file mode 100644 index b2668e4..0000000 --- a/vendor/librune/lib/utf8/u8rchr.c +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include - -#define _RUNE_NO_MACRO_WRAPPER 1 -#include "utf8.h" - -static char8_t * -memrchr1(const char8_t *s, size_t k, const char8_t *n) -{ - for (const char8_t *p = s + k - 1; k-- > 0; p--) { - if (*p == *n) - return (char8_t *)p; - } - return nullptr; -} - -static char8_t * -memrchr2(const char8_t *h, size_t k, const char8_t *n) -{ - uint16_t hw, nw; - const char8_t *H = h + k - 1; - hw = H[-1] << 8 | H[-0]; - nw = n[+0] << 8 | n[+1]; - - for (H -= 2, k -= 2; k; k--, hw = hw >> 8 | (*H-- << 8)) { - if (hw == nw) - return (char8_t *)H + 1; - } - - return hw == nw ? (char8_t *)H + 1 : nullptr; -} - -static char8_t * -memrchr3(const char8_t *h, size_t k, const char8_t *n) -{ - uint32_t hw, nw; - const char8_t *H = h + k - 1; - hw = H[-2] << 24 | H[-1] << 16 | H[-0] << 8; - nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8; - - for (H -= 3, k -= 3; k; - k--, hw = (hw >> 8 | (*H-- << 24)) & UINT32_C(0xFFFFFF00)) - { - if (hw == nw) - return (char8_t *)H + 1; - } - - return hw == nw ? (char8_t *)H + 1 : nullptr; -} - -static char8_t * -memrchr4(const char8_t *h, size_t k, const char8_t *n) -{ - uint32_t hw, nw; - const char8_t *H = h + k - 1; - hw = H[-3] << 24 | H[-2] << 16 | H[-1] << 8 | H[-0]; - nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8 | n[+3]; - - for (H -= 4, k -= 4; k; k--, hw = hw >> 8 | (*H-- << 24)) { - if (hw == nw) - return (char8_t *)H + 1; - } - - return hw == nw ? (char8_t *)H + 1 : nullptr; -} - -char8_t * -u8rchr(const char8_t *s, rune ch, size_t n) -{ - char8_t buf[U8_LEN_MAX]; - int m = rtou8(buf, ch, sizeof(buf)); - - if (n < (size_t)m) - return nullptr; - switch (m) { - case 1: - return (char8_t *)memrchr1(s, n, buf); - case 2: - return (char8_t *)memrchr2(s, n, buf); - case 3: - return (char8_t *)memrchr3(s, n, buf); - case 4: - return (char8_t *)memrchr4(s, n, buf); - } - - unreachable(); -} diff --git a/vendor/librune/lib/utf8/u8set.c b/vendor/librune/lib/utf8/u8set.c deleted file mode 100644 index 6c57991..0000000 --- a/vendor/librune/lib/utf8/u8set.c +++ /dev/null @@ -1,24 +0,0 @@ -#include - -#include "utf8.h" - -#include "internal/common.h" - -size_t -u8set(char8_t *s, rune ch, size_t n) -{ - int m; - char8_t buf[U8_LEN_MAX]; - - if (n == 0) - return 0; - if (ch <= _1B_MAX) { - memset(s, ch, n); - return n; - } - m = rtou8(buf, ch, sizeof(buf)); - for (size_t i = 0; i < n; i += m) - memcpy(s + i, buf, m); - - return n - n % m; -} diff --git a/vendor/librune/lib/utf8/u8spn.c b/vendor/librune/lib/utf8/u8spn.c deleted file mode 100644 index beeb33f..0000000 --- a/vendor/librune/lib/utf8/u8spn.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "utf8.h" - -size_t -u8spn(const char8_t *s, size_t n, const rune *p, size_t m) -{ - rune ch; - size_t k = 0; - - while (u8next(&ch, &s, &n)) { - for (size_t i = 0; i < m; i++) { - if (p[i] == ch) { - k++; - goto found; - } - } - - break; -found:; - } - - return k; -} diff --git a/vendor/librune/lib/utf8/u8tor.c b/vendor/librune/lib/utf8/u8tor.c deleted file mode 100644 index 152a174..0000000 --- a/vendor/librune/lib/utf8/u8tor.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "rune.h" -#include "utf8.h" - -#include "internal/common.h" - -int -u8tor(rune *ch, const char8_t *s) -{ - int n = 0; - - if (U1(s[0])) { - *ch = s[0]; - n = 1; - } else if (U2(s[0]) && UC(s[1])) { - *ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F); - n = 2; - } else if (U3(s[0]) && UC(s[1]) && UC(s[2])) { - *ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); - n = 3; - } else if (U4(s[0]) && UC(s[1]) && UC(s[2]) && UC(s[3])) { - *ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12) - | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); - n = 4; - } - - if (n && u8chkr(*ch)) - return n; - - *ch = RUNE_ERROR; - return 1; -} diff --git a/vendor/librune/lib/utf8/u8tor_uc.c b/vendor/librune/lib/utf8/u8tor_uc.c deleted file mode 100644 index 3448b59..0000000 --- a/vendor/librune/lib/utf8/u8tor_uc.c +++ /dev/null @@ -1,26 +0,0 @@ -#include - -#include "utf8.h" - -#include "internal/common.h" - -int -u8tor_uc(rune *ch, const char8_t *s) -{ - if (U1(s[0])) { - *ch = s[0]; - return 1; - } else if (U2(s[0])) { - *ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F); - return 2; - } else if (U3(s[0])) { - *ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); - return 3; - } else if (U4(s[0])) { - *ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12) - | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); - return 4; - } - - unreachable(); -} diff --git a/vendor/librune/lib/utf8/u8wdth.c b/vendor/librune/lib/utf8/u8wdth.c deleted file mode 100644 index 0bc5785..0000000 --- a/vendor/librune/lib/utf8/u8wdth.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "utf8.h" - -#include "internal/common.h" - -int -u8wdth(rune ch) -{ - return ch <= _1B_MAX ? 1 - : ch <= _2B_MAX ? 2 - : ch <= _3B_MAX ? 3 - : ch <= _4B_MAX ? 4 - : 0; -} -- cgit v1.2.3