aboutsummaryrefslogtreecommitdiff
path: root/vendor/librune/lib/utf8
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-10-30 01:51:14 +0100
committerThomas Voss <mail@thomasvoss.com> 2024-10-30 01:51:14 +0100
commit042e43247f396a9000fead59d9bff87bf12806d6 (patch)
treee902784464cbe9ce3c5114d513b016523e7e4b29 /vendor/librune/lib/utf8
parent170b8a92434233241c990c3e9432786de3262bcd (diff)
Completely revamp the grab source code
Some of the (many) few changes are: - Multithreading for significantly faster performance - The -p/--predicate flag - Byte offsets as the default - No customizable colors (maybe this will come back later) - Newer edition of mlib (formerly librune)
Diffstat (limited to 'vendor/librune/lib/utf8')
-rw-r--r--vendor/librune/lib/utf8/rtou8.c38
-rw-r--r--vendor/librune/lib/utf8/u8bspn.c22
-rw-r--r--vendor/librune/lib/utf8/u8cbspn.c20
-rw-r--r--vendor/librune/lib/utf8/u8chk.c20
-rw-r--r--vendor/librune/lib/utf8/u8chkr.c9
-rw-r--r--vendor/librune/lib/utf8/u8chr.c97
-rw-r--r--vendor/librune/lib/utf8/u8cspn.c20
-rw-r--r--vendor/librune/lib/utf8/u8len.c13
-rw-r--r--vendor/librune/lib/utf8/u8next.c16
-rw-r--r--vendor/librune/lib/utf8/u8prev.c40
-rw-r--r--vendor/librune/lib/utf8/u8rchr.c87
-rw-r--r--vendor/librune/lib/utf8/u8set.c24
-rw-r--r--vendor/librune/lib/utf8/u8spn.c22
-rw-r--r--vendor/librune/lib/utf8/u8tor.c31
-rw-r--r--vendor/librune/lib/utf8/u8tor_uc.c26
-rw-r--r--vendor/librune/lib/utf8/u8wdth.c13
16 files changed, 0 insertions, 498 deletions
diff --git a/vendor/librune/lib/utf8/rtou8.c b/vendor/librune/lib/utf8/rtou8.c
deleted file mode 100644
index 94cce34..0000000
--- a/vendor/librune/lib/utf8/rtou8.c
+++ /dev/null
@@ -1,38 +0,0 @@
-#include <stddef.h>
-
-#include "utf8.h"
-
-#include "internal/common.h"
-
-int
-rtou8(char8_t *s, rune ch, size_t n)
-{
- if (ch <= _1B_MAX) {
- if (n >= 1)
- s[0] = ch;
- return 1;
- } else if (ch <= _2B_MAX) {
- if (n >= 2) {
- s[0] = (ch >> 6) | 0xC0;
- s[1] = (ch & 0x3F) | 0x80;
- }
- return 2;
- } else if (ch <= _3B_MAX) {
- if (n >= 3) {
- s[0] = (ch >> 12) | 0xE0;
- s[1] = ((ch >> 6) & 0x3F) | 0x80;
- s[2] = (ch & 0x3F) | 0x80;
- }
- return 3;
- } else if (ch <= _4B_MAX) {
- if (n >= 4) {
- s[0] = (ch >> 18) | 0xF0;
- s[1] = ((ch >> 12) & 0x3F) | 0x80;
- s[2] = ((ch >> 6) & 0x3F) | 0x80;
- s[3] = (ch & 0x3F) | 0x80;
- }
- return 4;
- }
-
- unreachable();
-}
diff --git a/vendor/librune/lib/utf8/u8bspn.c b/vendor/librune/lib/utf8/u8bspn.c
deleted file mode 100644
index 3ccd469..0000000
--- a/vendor/librune/lib/utf8/u8bspn.c
+++ /dev/null
@@ -1,22 +0,0 @@
-#include "utf8.h"
-
-size_t
-u8bspn(const char8_t *s, size_t n, const rune *p, size_t m)
-{
- rune ch;
- size_t k = 0;
-
- while (u8next(&ch, &s, &n)) {
- for (size_t i = 0; i < m; i++) {
- if (p[i] == ch) {
- k += u8wdth(ch);
- goto found;
- }
- }
-
- break;
-found:;
- }
-
- return k;
-}
diff --git a/vendor/librune/lib/utf8/u8cbspn.c b/vendor/librune/lib/utf8/u8cbspn.c
deleted file mode 100644
index b51c300..0000000
--- a/vendor/librune/lib/utf8/u8cbspn.c
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "utf8.h"
-
-size_t
-u8cbspn(const char8_t *s, size_t n, const rune *p, size_t m)
-{
- rune ch;
- size_t k = 0;
-
- while (u8next(&ch, &s, &n)) {
- for (size_t i = 0; i < m; i++) {
- if (p[i] == ch)
- goto found;
- }
-
- k += u8wdth(ch);
- }
-
-found:
- return k;
-}
diff --git a/vendor/librune/lib/utf8/u8chk.c b/vendor/librune/lib/utf8/u8chk.c
deleted file mode 100644
index 4fd1afc..0000000
--- a/vendor/librune/lib/utf8/u8chk.c
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "rune.h"
-#define _RUNE_NO_MACRO_WRAPPER 1
-#include "utf8.h"
-
-#include "internal/common.h"
-
-char8_t *
-u8chk(const char8_t *s, size_t n)
-{
- while (n) {
- rune ch;
- int m = u8tor(&ch, s);
-
- if (ch == RUNE_ERROR)
- return (char8_t *)s;
- n -= m;
- }
-
- return nullptr;
-}
diff --git a/vendor/librune/lib/utf8/u8chkr.c b/vendor/librune/lib/utf8/u8chkr.c
deleted file mode 100644
index 4510f16..0000000
--- a/vendor/librune/lib/utf8/u8chkr.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "rune.h"
-#include "utf8.h"
-
-bool
-u8chkr(rune ch)
-{
- return !((ch >= 0xD800 && ch <= 0xDFFF) || ch == 0xFFFE || ch == 0xFFFF
- || ch > RUNE_MAX);
-}
diff --git a/vendor/librune/lib/utf8/u8chr.c b/vendor/librune/lib/utf8/u8chr.c
deleted file mode 100644
index c387300..0000000
--- a/vendor/librune/lib/utf8/u8chr.c
+++ /dev/null
@@ -1,97 +0,0 @@
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-
-#define _RUNE_NO_MACRO_WRAPPER 1
-#include "utf8.h"
-
-/* NOTE: The memmem*() functions were taken directly from the memmem()
- implementation on OpenBSD. As a result, these functions are licensed under
- OpenBSDs 2-Clause BSD License instead of this libraries 0-Clause BSD License.
-
- The license for these functions is as follows:
-
- Copyright © 2005–2020 Rich Felker, et al.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- “Software”), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-static char8_t *
-memmem2(const char8_t *h, size_t k, const char8_t *n)
-{
- uint16_t hw, nw;
- hw = h[0] << 8 | h[1];
- nw = n[0] << 8 | n[1];
-
- for (h += 2, k -= 2; k; k--, hw = hw << 8 | *h++) {
- if (hw == nw)
- return (char8_t *)h - 2;
- }
- return hw == nw ? (char8_t *)h - 2 : nullptr;
-}
-
-static char8_t *
-memmem3(const char8_t *h, size_t k, const char8_t *n)
-{
- uint32_t hw, nw;
- hw = h[0] << 24 | h[1] << 16 | h[2] << 8;
- nw = n[0] << 24 | n[1] << 16 | n[2] << 8;
-
- for (h += 3, k -= 3; k; k--, hw = (hw | *h++) << 8) {
- if (hw == nw)
- return (char8_t *)h - 3;
- }
- return hw == nw ? (char8_t *)h - 3 : nullptr;
-}
-
-static char8_t *
-memmem4(const char8_t *h, size_t k, const char8_t *n)
-{
- uint32_t hw, nw;
- hw = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3];
- nw = n[0] << 24 | n[1] << 16 | n[2] << 8 | n[3];
-
- for (h += 4, k -= 4; k; k--, hw = hw << 8 | *h++) {
- if (hw == nw)
- return (char8_t *)h - 4;
- }
- return hw == nw ? (char8_t *)h - 4 : nullptr;
-}
-
-char8_t *
-u8chr(const char8_t *s, rune ch, size_t n)
-{
- char8_t buf[U8_LEN_MAX];
- int m = rtou8(buf, ch, sizeof(buf));
-
- if (n < (size_t)m)
- return nullptr;
- switch (m) {
- case 1:
- return memchr(s, ch, n);
- case 2:
- return memmem2(s, n, buf);
- case 3:
- return memmem3(s, n, buf);
- case 4:
- return memmem4(s, n, buf);
- }
-
- unreachable();
-}
diff --git a/vendor/librune/lib/utf8/u8cspn.c b/vendor/librune/lib/utf8/u8cspn.c
deleted file mode 100644
index 7d46a0b..0000000
--- a/vendor/librune/lib/utf8/u8cspn.c
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "utf8.h"
-
-size_t
-u8cspn(const char8_t *s, size_t n, const rune *p, size_t m)
-{
- rune ch;
- size_t k = 0;
-
- while (u8next(&ch, &s, &n)) {
- for (size_t i = 0; i < m; i++) {
- if (p[i] == ch)
- goto found;
- }
-
- k++;
- }
-
-found:
- return k;
-}
diff --git a/vendor/librune/lib/utf8/u8len.c b/vendor/librune/lib/utf8/u8len.c
deleted file mode 100644
index fc66ee7..0000000
--- a/vendor/librune/lib/utf8/u8len.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "utf8.h"
-
-size_t
-u8len(const char8_t *s, size_t n)
-{
- rune unused;
- size_t m = 0;
-
- while (u8next(&unused, &s, &n))
- m++;
-
- return m;
-}
diff --git a/vendor/librune/lib/utf8/u8next.c b/vendor/librune/lib/utf8/u8next.c
deleted file mode 100644
index 12c521d..0000000
--- a/vendor/librune/lib/utf8/u8next.c
+++ /dev/null
@@ -1,16 +0,0 @@
-#define _RUNE_NO_MACRO_WRAPPER 1
-#include "utf8.h"
-
-int
-u8next(rune *ch, const char8_t **s, size_t *n)
-{
- int m = 0;
-
- if (*n) {
- m = u8tor_uc(ch, *s);
- *n -= m;
- *s += m;
- }
-
- return m;
-}
diff --git a/vendor/librune/lib/utf8/u8prev.c b/vendor/librune/lib/utf8/u8prev.c
deleted file mode 100644
index a219ae9..0000000
--- a/vendor/librune/lib/utf8/u8prev.c
+++ /dev/null
@@ -1,40 +0,0 @@
-#define _RUNE_NO_MACRO_WRAPPER 1
-#include "rune.h"
-#include "utf8.h"
-
-#include "internal/common.h"
-
-int
-u8prev(rune *ch, const char8_t **p, const char8_t *start)
-{
- int off;
- bool match = true;
- const char8_t *s = *p;
- ptrdiff_t d = s - start;
-
- if (d <= 0) {
- return 0;
- } else if (U1(s[-1])) {
- *ch = s[-1];
- off = 1;
- } else if (d > 1 && UC(s[-1]) && U2(s[-2])) {
- *ch = ((s[-2] & 0x1F) << 6) | (s[-1] & 0x3F);
- off = 2;
- } else if (d > 2 && UC(s[-1]) && UC(s[-2]) && U3(s[-3])) {
- *ch = ((s[-3] & 0x0F) << 12) | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F);
- off = 3;
- } else if (d > 3 && UC(s[-1]) && UC(s[-2]) && UC(s[-3]) && U4(s[-4])) {
- *ch = ((s[-4] & 0x07) << 18) | ((s[-3] & 0x3F) << 12)
- | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F);
- off = 4;
- } else
- match = false;
-
- if (!(match && u8chkr(*ch))) {
- *ch = RUNE_ERROR;
- off = 1;
- }
-
- *p -= off;
- return off;
-}
diff --git a/vendor/librune/lib/utf8/u8rchr.c b/vendor/librune/lib/utf8/u8rchr.c
deleted file mode 100644
index b2668e4..0000000
--- a/vendor/librune/lib/utf8/u8rchr.c
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <stddef.h>
-#include <stdint.h>
-
-#define _RUNE_NO_MACRO_WRAPPER 1
-#include "utf8.h"
-
-static char8_t *
-memrchr1(const char8_t *s, size_t k, const char8_t *n)
-{
- for (const char8_t *p = s + k - 1; k-- > 0; p--) {
- if (*p == *n)
- return (char8_t *)p;
- }
- return nullptr;
-}
-
-static char8_t *
-memrchr2(const char8_t *h, size_t k, const char8_t *n)
-{
- uint16_t hw, nw;
- const char8_t *H = h + k - 1;
- hw = H[-1] << 8 | H[-0];
- nw = n[+0] << 8 | n[+1];
-
- for (H -= 2, k -= 2; k; k--, hw = hw >> 8 | (*H-- << 8)) {
- if (hw == nw)
- return (char8_t *)H + 1;
- }
-
- return hw == nw ? (char8_t *)H + 1 : nullptr;
-}
-
-static char8_t *
-memrchr3(const char8_t *h, size_t k, const char8_t *n)
-{
- uint32_t hw, nw;
- const char8_t *H = h + k - 1;
- hw = H[-2] << 24 | H[-1] << 16 | H[-0] << 8;
- nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8;
-
- for (H -= 3, k -= 3; k;
- k--, hw = (hw >> 8 | (*H-- << 24)) & UINT32_C(0xFFFFFF00))
- {
- if (hw == nw)
- return (char8_t *)H + 1;
- }
-
- return hw == nw ? (char8_t *)H + 1 : nullptr;
-}
-
-static char8_t *
-memrchr4(const char8_t *h, size_t k, const char8_t *n)
-{
- uint32_t hw, nw;
- const char8_t *H = h + k - 1;
- hw = H[-3] << 24 | H[-2] << 16 | H[-1] << 8 | H[-0];
- nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8 | n[+3];
-
- for (H -= 4, k -= 4; k; k--, hw = hw >> 8 | (*H-- << 24)) {
- if (hw == nw)
- return (char8_t *)H + 1;
- }
-
- return hw == nw ? (char8_t *)H + 1 : nullptr;
-}
-
-char8_t *
-u8rchr(const char8_t *s, rune ch, size_t n)
-{
- char8_t buf[U8_LEN_MAX];
- int m = rtou8(buf, ch, sizeof(buf));
-
- if (n < (size_t)m)
- return nullptr;
- switch (m) {
- case 1:
- return (char8_t *)memrchr1(s, n, buf);
- case 2:
- return (char8_t *)memrchr2(s, n, buf);
- case 3:
- return (char8_t *)memrchr3(s, n, buf);
- case 4:
- return (char8_t *)memrchr4(s, n, buf);
- }
-
- unreachable();
-}
diff --git a/vendor/librune/lib/utf8/u8set.c b/vendor/librune/lib/utf8/u8set.c
deleted file mode 100644
index 6c57991..0000000
--- a/vendor/librune/lib/utf8/u8set.c
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <string.h>
-
-#include "utf8.h"
-
-#include "internal/common.h"
-
-size_t
-u8set(char8_t *s, rune ch, size_t n)
-{
- int m;
- char8_t buf[U8_LEN_MAX];
-
- if (n == 0)
- return 0;
- if (ch <= _1B_MAX) {
- memset(s, ch, n);
- return n;
- }
- m = rtou8(buf, ch, sizeof(buf));
- for (size_t i = 0; i < n; i += m)
- memcpy(s + i, buf, m);
-
- return n - n % m;
-}
diff --git a/vendor/librune/lib/utf8/u8spn.c b/vendor/librune/lib/utf8/u8spn.c
deleted file mode 100644
index beeb33f..0000000
--- a/vendor/librune/lib/utf8/u8spn.c
+++ /dev/null
@@ -1,22 +0,0 @@
-#include "utf8.h"
-
-size_t
-u8spn(const char8_t *s, size_t n, const rune *p, size_t m)
-{
- rune ch;
- size_t k = 0;
-
- while (u8next(&ch, &s, &n)) {
- for (size_t i = 0; i < m; i++) {
- if (p[i] == ch) {
- k++;
- goto found;
- }
- }
-
- break;
-found:;
- }
-
- return k;
-}
diff --git a/vendor/librune/lib/utf8/u8tor.c b/vendor/librune/lib/utf8/u8tor.c
deleted file mode 100644
index 152a174..0000000
--- a/vendor/librune/lib/utf8/u8tor.c
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "rune.h"
-#include "utf8.h"
-
-#include "internal/common.h"
-
-int
-u8tor(rune *ch, const char8_t *s)
-{
- int n = 0;
-
- if (U1(s[0])) {
- *ch = s[0];
- n = 1;
- } else if (U2(s[0]) && UC(s[1])) {
- *ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
- n = 2;
- } else if (U3(s[0]) && UC(s[1]) && UC(s[2])) {
- *ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
- n = 3;
- } else if (U4(s[0]) && UC(s[1]) && UC(s[2]) && UC(s[3])) {
- *ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12)
- | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
- n = 4;
- }
-
- if (n && u8chkr(*ch))
- return n;
-
- *ch = RUNE_ERROR;
- return 1;
-}
diff --git a/vendor/librune/lib/utf8/u8tor_uc.c b/vendor/librune/lib/utf8/u8tor_uc.c
deleted file mode 100644
index 3448b59..0000000
--- a/vendor/librune/lib/utf8/u8tor_uc.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include <stddef.h>
-
-#include "utf8.h"
-
-#include "internal/common.h"
-
-int
-u8tor_uc(rune *ch, const char8_t *s)
-{
- if (U1(s[0])) {
- *ch = s[0];
- return 1;
- } else if (U2(s[0])) {
- *ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
- return 2;
- } else if (U3(s[0])) {
- *ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
- return 3;
- } else if (U4(s[0])) {
- *ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12)
- | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
- return 4;
- }
-
- unreachable();
-}
diff --git a/vendor/librune/lib/utf8/u8wdth.c b/vendor/librune/lib/utf8/u8wdth.c
deleted file mode 100644
index 0bc5785..0000000
--- a/vendor/librune/lib/utf8/u8wdth.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "utf8.h"
-
-#include "internal/common.h"
-
-int
-u8wdth(rune ch)
-{
- return ch <= _1B_MAX ? 1
- : ch <= _2B_MAX ? 2
- : ch <= _3B_MAX ? 3
- : ch <= _4B_MAX ? 4
- : 0;
-}