From 4f93f935dc7a981ca073a322425c3f5929ffb644 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sun, 21 Jan 2024 03:03:58 +0100 Subject: Support line- & column-based match locations --- vendor/librune/lib/utf8/rtou8.c | 36 ++++++++++++++ vendor/librune/lib/utf8/u8bspn.c | 22 +++++++++ vendor/librune/lib/utf8/u8cbspn.c | 20 ++++++++ vendor/librune/lib/utf8/u8chk.c | 19 ++++++++ vendor/librune/lib/utf8/u8chkr.c | 9 ++++ vendor/librune/lib/utf8/u8chr.c | 97 ++++++++++++++++++++++++++++++++++++++ vendor/librune/lib/utf8/u8cspn.c | 20 ++++++++ vendor/librune/lib/utf8/u8len.c | 13 +++++ vendor/librune/lib/utf8/u8next.c | 14 ++++++ vendor/librune/lib/utf8/u8prev.c | 37 +++++++++++++++ vendor/librune/lib/utf8/u8rchr.c | 87 ++++++++++++++++++++++++++++++++++ vendor/librune/lib/utf8/u8set.c | 24 ++++++++++ vendor/librune/lib/utf8/u8spn.c | 22 +++++++++ vendor/librune/lib/utf8/u8tor.c | 31 ++++++++++++ vendor/librune/lib/utf8/u8tor_uc.c | 24 ++++++++++ vendor/librune/lib/utf8/u8wdth.c | 13 +++++ 16 files changed, 488 insertions(+) create mode 100644 vendor/librune/lib/utf8/rtou8.c create mode 100644 vendor/librune/lib/utf8/u8bspn.c create mode 100644 vendor/librune/lib/utf8/u8cbspn.c create mode 100644 vendor/librune/lib/utf8/u8chk.c create mode 100644 vendor/librune/lib/utf8/u8chkr.c create mode 100644 vendor/librune/lib/utf8/u8chr.c create mode 100644 vendor/librune/lib/utf8/u8cspn.c create mode 100644 vendor/librune/lib/utf8/u8len.c create mode 100644 vendor/librune/lib/utf8/u8next.c create mode 100644 vendor/librune/lib/utf8/u8prev.c create mode 100644 vendor/librune/lib/utf8/u8rchr.c create mode 100644 vendor/librune/lib/utf8/u8set.c create mode 100644 vendor/librune/lib/utf8/u8spn.c create mode 100644 vendor/librune/lib/utf8/u8tor.c create mode 100644 vendor/librune/lib/utf8/u8tor_uc.c create mode 100644 vendor/librune/lib/utf8/u8wdth.c (limited to 'vendor/librune/lib/utf8') diff --git a/vendor/librune/lib/utf8/rtou8.c b/vendor/librune/lib/utf8/rtou8.c new file mode 100644 index 0000000..1823f08 --- /dev/null +++ b/vendor/librune/lib/utf8/rtou8.c @@ -0,0 +1,36 @@ +#include "utf8.h" + +#include "internal/common.h" + +int +rtou8(char8_t *s, rune ch, size_t n) +{ + if (ch <= _1B_MAX) { + if (n >= 1) + s[0] = ch; + return 1; + } else if (ch <= _2B_MAX) { + if (n >= 2) { + s[0] = (ch >> 6) | 0xC0; + s[1] = (ch & 0x3F) | 0x80; + } + return 2; + } else if (ch <= _3B_MAX) { + if (n >= 3) { + s[0] = (ch >> 12) | 0xE0; + s[1] = ((ch >> 6) & 0x3F) | 0x80; + s[2] = (ch & 0x3F) | 0x80; + } + return 3; + } else if (ch <= _4B_MAX) { + if (n >= 4) { + s[0] = (ch >> 18) | 0xF0; + s[1] = ((ch >> 12) & 0x3F) | 0x80; + s[2] = ((ch >> 6) & 0x3F) | 0x80; + s[3] = (ch & 0x3F) | 0x80; + } + return 4; + } + + unreachable(); +} diff --git a/vendor/librune/lib/utf8/u8bspn.c b/vendor/librune/lib/utf8/u8bspn.c new file mode 100644 index 0000000..3ccd469 --- /dev/null +++ b/vendor/librune/lib/utf8/u8bspn.c @@ -0,0 +1,22 @@ +#include "utf8.h" + +size_t +u8bspn(const char8_t *s, size_t n, const rune *p, size_t m) +{ + rune ch; + size_t k = 0; + + while (u8next(&ch, &s, &n)) { + for (size_t i = 0; i < m; i++) { + if (p[i] == ch) { + k += u8wdth(ch); + goto found; + } + } + + break; +found:; + } + + return k; +} diff --git a/vendor/librune/lib/utf8/u8cbspn.c b/vendor/librune/lib/utf8/u8cbspn.c new file mode 100644 index 0000000..b51c300 --- /dev/null +++ b/vendor/librune/lib/utf8/u8cbspn.c @@ -0,0 +1,20 @@ +#include "utf8.h" + +size_t +u8cbspn(const char8_t *s, size_t n, const rune *p, size_t m) +{ + rune ch; + size_t k = 0; + + while (u8next(&ch, &s, &n)) { + for (size_t i = 0; i < m; i++) { + if (p[i] == ch) + goto found; + } + + k += u8wdth(ch); + } + +found: + return k; +} diff --git a/vendor/librune/lib/utf8/u8chk.c b/vendor/librune/lib/utf8/u8chk.c new file mode 100644 index 0000000..9a2cf03 --- /dev/null +++ b/vendor/librune/lib/utf8/u8chk.c @@ -0,0 +1,19 @@ +#include "rune.h" +#include "utf8.h" + +#include "internal/common.h" + +const char8_t * +u8chk(const char8_t *s, size_t n) +{ + while (n) { + rune ch; + int m = u8tor(&ch, s); + + if (ch == RUNE_ERROR) + return s; + n += m; + } + + return nullptr; +} diff --git a/vendor/librune/lib/utf8/u8chkr.c b/vendor/librune/lib/utf8/u8chkr.c new file mode 100644 index 0000000..4510f16 --- /dev/null +++ b/vendor/librune/lib/utf8/u8chkr.c @@ -0,0 +1,9 @@ +#include "rune.h" +#include "utf8.h" + +bool +u8chkr(rune ch) +{ + return !((ch >= 0xD800 && ch <= 0xDFFF) || ch == 0xFFFE || ch == 0xFFFF + || ch > RUNE_MAX); +} diff --git a/vendor/librune/lib/utf8/u8chr.c b/vendor/librune/lib/utf8/u8chr.c new file mode 100644 index 0000000..4ecbd10 --- /dev/null +++ b/vendor/librune/lib/utf8/u8chr.c @@ -0,0 +1,97 @@ +#include +#include + +#include "utf8.h" + +#include "internal/common.h" + +/* NOTE: The memmem*() functions were taken directly from the memmem() + implementation on OpenBSD. As a result, these functions are licensed under + OpenBSDs 2-Clause BSD License instead of this libraries 0-Clause BSD License. + + The license for these functions is as follows: + + Copyright © 2005–2020 Rich Felker, et al. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + “Software”), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +static const char8_t * +memmem2(const char8_t *h, size_t k, const char8_t *n) +{ + uint16_t hw, nw; + hw = h[0] << 8 | h[1]; + nw = n[0] << 8 | n[1]; + + for (h += 2, k -= 2; k; k--, hw = hw << 8 | *h++) { + if (hw == nw) + return h - 2; + } + return hw == nw ? h - 2 : nullptr; +} + +static const char8_t * +memmem3(const char8_t *h, size_t k, const char8_t *n) +{ + uint32_t hw, nw; + hw = h[0] << 24 | h[1] << 16 | h[2] << 8; + nw = n[0] << 24 | n[1] << 16 | n[2] << 8; + + for (h += 3, k -= 3; k; k--, hw = (hw | *h++) << 8) { + if (hw == nw) + return h - 3; + } + return hw == nw ? h - 3 : nullptr; +} + +static const char8_t * +memmem4(const char8_t *h, size_t k, const char8_t *n) +{ + uint32_t hw, nw; + hw = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3]; + nw = n[0] << 24 | n[1] << 16 | n[2] << 8 | n[3]; + + for (h += 4, k -= 4; k; k--, hw = hw << 8 | *h++) { + if (hw == nw) + return h - 4; + } + return hw == nw ? h - 4 : nullptr; +} + +const char8_t * +u8chr(const char8_t *s, rune ch, size_t n) +{ + char8_t buf[U8_LEN_MAX]; + int m = rtou8(buf, ch, sizeof(buf)); + + if (n < (size_t)m) + return nullptr; + switch (m) { + case 1: + return memchr(s, ch, n); + case 2: + return memmem2(s, n, buf); + case 3: + return memmem3(s, n, buf); + case 4: + return memmem4(s, n, buf); + } + + unreachable(); +} diff --git a/vendor/librune/lib/utf8/u8cspn.c b/vendor/librune/lib/utf8/u8cspn.c new file mode 100644 index 0000000..7d46a0b --- /dev/null +++ b/vendor/librune/lib/utf8/u8cspn.c @@ -0,0 +1,20 @@ +#include "utf8.h" + +size_t +u8cspn(const char8_t *s, size_t n, const rune *p, size_t m) +{ + rune ch; + size_t k = 0; + + while (u8next(&ch, &s, &n)) { + for (size_t i = 0; i < m; i++) { + if (p[i] == ch) + goto found; + } + + k++; + } + +found: + return k; +} diff --git a/vendor/librune/lib/utf8/u8len.c b/vendor/librune/lib/utf8/u8len.c new file mode 100644 index 0000000..fc66ee7 --- /dev/null +++ b/vendor/librune/lib/utf8/u8len.c @@ -0,0 +1,13 @@ +#include "utf8.h" + +size_t +u8len(const char8_t *s, size_t n) +{ + rune unused; + size_t m = 0; + + while (u8next(&unused, &s, &n)) + m++; + + return m; +} diff --git a/vendor/librune/lib/utf8/u8next.c b/vendor/librune/lib/utf8/u8next.c new file mode 100644 index 0000000..8edc084 --- /dev/null +++ b/vendor/librune/lib/utf8/u8next.c @@ -0,0 +1,14 @@ +#include "utf8.h" + +#include "internal/common.h" + +const char8_t * +u8next(rune *ch, const char8_t **s, size_t *n) +{ + int m; + + if (*n == 0) + return nullptr; + *n -= m = u8tor_uc(ch, *s); + return *s += m; +} diff --git a/vendor/librune/lib/utf8/u8prev.c b/vendor/librune/lib/utf8/u8prev.c new file mode 100644 index 0000000..fac0fc7 --- /dev/null +++ b/vendor/librune/lib/utf8/u8prev.c @@ -0,0 +1,37 @@ +#include "rune.h" +#include "utf8.h" + +#include "internal/common.h" + +const char8_t * +u8prev(rune *ch, const char8_t **p, const char8_t *start) +{ + int off; + bool match = true; + const char8_t *s = *p; + ptrdiff_t d = s - start; + + if (d <= 0) { + return nullptr; + } else if (U1(s[-1])) { + *ch = s[-1]; + off = 1; + } else if (d > 1 && UC(s[-1]) && U2(s[-2])) { + *ch = ((s[-2] & 0x1F) << 6) | (s[-1] & 0x3F); + off = 2; + } else if (d > 2 && UC(s[-1]) && UC(s[-2]) && U3(s[-3])) { + *ch = ((s[-3] & 0x0F) << 12) | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F); + off = 3; + } else if (d > 3 && UC(s[-1]) && UC(s[-2]) && UC(s[-3]) && U4(s[-4])) { + *ch = ((s[-4] & 0x07) << 18) | ((s[-3] & 0x3F) << 12) + | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F); + off = 4; + } else + match = false; + + if (match && u8chkr(*ch)) + return *p -= off; + + *ch = RUNE_ERROR; + return *p--; +} diff --git a/vendor/librune/lib/utf8/u8rchr.c b/vendor/librune/lib/utf8/u8rchr.c new file mode 100644 index 0000000..15fff51 --- /dev/null +++ b/vendor/librune/lib/utf8/u8rchr.c @@ -0,0 +1,87 @@ +#include + +#include "utf8.h" + +#include "internal/common.h" + +static const char8_t * +memrchr1(const char8_t *s, size_t k, const char8_t *n) +{ + for (const char8_t *p = s + k - 1; k-- > 0; p--) { + if (*p == *n) + return p; + } + return nullptr; +} + +static const char8_t * +memrchr2(const char8_t *h, size_t k, const char8_t *n) +{ + uint16_t hw, nw; + const char8_t *H = h + k - 1; + hw = H[-1] << 8 | H[-0]; + nw = n[+0] << 8 | n[+1]; + + for (H -= 2, k -= 2; k; k--, hw = hw >> 8 | (*H-- << 8)) { + if (hw == nw) + return H + 1; + } + + return hw == nw ? H + 1 : nullptr; +} + +static const char8_t * +memrchr3(const char8_t *h, size_t k, const char8_t *n) +{ + uint32_t hw, nw; + const char8_t *H = h + k - 1; + hw = H[-2] << 24 | H[-1] << 16 | H[-0] << 8; + nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8; + + for (H -= 3, k -= 3; k; + k--, hw = (hw >> 8 | (*H-- << 24)) & UINT32_C(0xFFFFFF00)) + { + if (hw == nw) + return H + 1; + } + + return hw == nw ? H + 1 : nullptr; +} + +static const char8_t * +memrchr4(const char8_t *h, size_t k, const char8_t *n) +{ + uint32_t hw, nw; + const char8_t *H = h + k - 1; + hw = H[-3] << 24 | H[-2] << 16 | H[-1] << 8 | H[-0]; + nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8 | n[+3]; + + for (H -= 4, k -= 4; k; k--, hw = hw >> 8 | (*H-- << 24)) { + if (hw == nw) + return H + 1; + } + + return hw == nw ? H + 1 : nullptr; +} + +const char8_t * +u8rchr(const char8_t *s, rune ch, size_t n) +{ + char8_t buf[U8_LEN_MAX]; + int m = rtou8(buf, ch, sizeof(buf)); + + if (n < (size_t)m) + return nullptr; + switch (m) { + case 1: + return memrchr1(s, n, buf); + case 2: + return memrchr2(s, n, buf); + case 3: + return memrchr3(s, n, buf); + case 4: + return memrchr4(s, n, buf); + } + + unreachable(); +} diff --git a/vendor/librune/lib/utf8/u8set.c b/vendor/librune/lib/utf8/u8set.c new file mode 100644 index 0000000..0dfba2c --- /dev/null +++ b/vendor/librune/lib/utf8/u8set.c @@ -0,0 +1,24 @@ +#include + +#include "utf8.h" + +#include "internal/common.h" + +size_t +u8set(const char8_t *s, rune ch, size_t n) +{ + int m; + char8_t buf[U8_LEN_MAX]; + + if (n == 0) + return 0; + if (ch <= _1B_MAX) { + memset((char *)s, ch, n); + return n; + } + m = rtou8(buf, ch, sizeof(buf)); + for (size_t i = 0; i < n; i += m) + memcpy((char *)s + i, buf, m); + + return n - n % m; +} diff --git a/vendor/librune/lib/utf8/u8spn.c b/vendor/librune/lib/utf8/u8spn.c new file mode 100644 index 0000000..beeb33f --- /dev/null +++ b/vendor/librune/lib/utf8/u8spn.c @@ -0,0 +1,22 @@ +#include "utf8.h" + +size_t +u8spn(const char8_t *s, size_t n, const rune *p, size_t m) +{ + rune ch; + size_t k = 0; + + while (u8next(&ch, &s, &n)) { + for (size_t i = 0; i < m; i++) { + if (p[i] == ch) { + k++; + goto found; + } + } + + break; +found:; + } + + return k; +} diff --git a/vendor/librune/lib/utf8/u8tor.c b/vendor/librune/lib/utf8/u8tor.c new file mode 100644 index 0000000..152a174 --- /dev/null +++ b/vendor/librune/lib/utf8/u8tor.c @@ -0,0 +1,31 @@ +#include "rune.h" +#include "utf8.h" + +#include "internal/common.h" + +int +u8tor(rune *ch, const char8_t *s) +{ + int n = 0; + + if (U1(s[0])) { + *ch = s[0]; + n = 1; + } else if (U2(s[0]) && UC(s[1])) { + *ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F); + n = 2; + } else if (U3(s[0]) && UC(s[1]) && UC(s[2])) { + *ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); + n = 3; + } else if (U4(s[0]) && UC(s[1]) && UC(s[2]) && UC(s[3])) { + *ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12) + | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); + n = 4; + } + + if (n && u8chkr(*ch)) + return n; + + *ch = RUNE_ERROR; + return 1; +} diff --git a/vendor/librune/lib/utf8/u8tor_uc.c b/vendor/librune/lib/utf8/u8tor_uc.c new file mode 100644 index 0000000..ea57332 --- /dev/null +++ b/vendor/librune/lib/utf8/u8tor_uc.c @@ -0,0 +1,24 @@ +#include "utf8.h" + +#include "internal/common.h" + +int +u8tor_uc(rune *ch, const char8_t *s) +{ + if (U1(s[0])) { + *ch = s[0]; + return 1; + } else if (U2(s[0])) { + *ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F); + return 2; + } else if (U3(s[0])) { + *ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); + return 3; + } else if (U4(s[0])) { + *ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12) + | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); + return 4; + } + + unreachable(); +} diff --git a/vendor/librune/lib/utf8/u8wdth.c b/vendor/librune/lib/utf8/u8wdth.c new file mode 100644 index 0000000..0bc5785 --- /dev/null +++ b/vendor/librune/lib/utf8/u8wdth.c @@ -0,0 +1,13 @@ +#include "utf8.h" + +#include "internal/common.h" + +int +u8wdth(rune ch) +{ + return ch <= _1B_MAX ? 1 + : ch <= _2B_MAX ? 2 + : ch <= _3B_MAX ? 3 + : ch <= _4B_MAX ? 4 + : 0; +} -- cgit v1.2.3