aboutsummaryrefslogtreecommitdiff
path: root/vendor/librune/lib
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-01-21 03:03:58 +0100
committerThomas Voss <mail@thomasvoss.com> 2024-01-21 03:03:58 +0100
commit4f93f935dc7a981ca073a322425c3f5929ffb644 (patch)
tree4460586408ec7fdfcecf3ba4584f0435067125a6 /vendor/librune/lib
parent72ea25a4d73e3e026366d4165f5bc4ec9e7418cb (diff)
Support line- & column-based match locations
Diffstat (limited to 'vendor/librune/lib')
-rw-r--r--vendor/librune/lib/gbrk/u8glen.c13
-rw-r--r--vendor/librune/lib/gbrk/u8gnext.c165
-rw-r--r--vendor/librune/lib/utf8/rtou8.c36
-rw-r--r--vendor/librune/lib/utf8/u8bspn.c22
-rw-r--r--vendor/librune/lib/utf8/u8cbspn.c20
-rw-r--r--vendor/librune/lib/utf8/u8chk.c19
-rw-r--r--vendor/librune/lib/utf8/u8chkr.c9
-rw-r--r--vendor/librune/lib/utf8/u8chr.c97
-rw-r--r--vendor/librune/lib/utf8/u8cspn.c20
-rw-r--r--vendor/librune/lib/utf8/u8len.c13
-rw-r--r--vendor/librune/lib/utf8/u8next.c14
-rw-r--r--vendor/librune/lib/utf8/u8prev.c37
-rw-r--r--vendor/librune/lib/utf8/u8rchr.c87
-rw-r--r--vendor/librune/lib/utf8/u8set.c24
-rw-r--r--vendor/librune/lib/utf8/u8spn.c22
-rw-r--r--vendor/librune/lib/utf8/u8tor.c31
-rw-r--r--vendor/librune/lib/utf8/u8tor_uc.c24
-rw-r--r--vendor/librune/lib/utf8/u8wdth.c13
18 files changed, 666 insertions, 0 deletions
diff --git a/vendor/librune/lib/gbrk/u8glen.c b/vendor/librune/lib/gbrk/u8glen.c
new file mode 100644
index 0000000..13cac7e
--- /dev/null
+++ b/vendor/librune/lib/gbrk/u8glen.c
@@ -0,0 +1,13 @@
+#include "gbrk.h"
+
+size_t
+u8glen(const char8_t *s, size_t n)
+{
+ size_t m = 0;
+ struct u8view unused;
+
+ while (u8gnext(&unused, &s, &n))
+ m++;
+
+ return m;
+}
diff --git a/vendor/librune/lib/gbrk/u8gnext.c b/vendor/librune/lib/gbrk/u8gnext.c
new file mode 100644
index 0000000..875d5cb
--- /dev/null
+++ b/vendor/librune/lib/gbrk/u8gnext.c
@@ -0,0 +1,165 @@
+#include <sys/types.h>
+
+#include "gbrk.h"
+#include "utf8.h"
+
+#include "internal/common.h"
+#include "internal/gbrk_lookup.h"
+
+#define lengthof(a) (sizeof(a) / sizeof(*(a)))
+
+struct gbrk_state {
+ enum {
+ GB9C_NONE,
+ GB9C_CNSNT,
+ GB9C_LNK,
+ } gb9c;
+ bool gb11 : 1;
+ bool gb12 : 1;
+};
+
+static bool u8isgbrk(rune, rune, struct gbrk_state *);
+static gbrk_prop getprop(rune);
+
+const char8_t *
+u8gnext(struct u8view *g, const char8_t **s, size_t *n)
+{
+ int m;
+ rune ch1;
+ const char8_t *p;
+ struct gbrk_state gs = {0};
+
+ if (*n == 0)
+ return nullptr;
+
+ g->p = p = *s;
+ p += u8tor_uc(&ch1, p);
+
+ for (;;) {
+ rune ch2;
+
+ if ((size_t)(p - *s) >= *n)
+ ch2 = 0;
+ else
+ m = u8tor_uc(&ch2, p);
+ if (u8isgbrk(ch1, ch2, &gs)) {
+ *n -= g->len = p - *s;
+ return *s = p;
+ }
+
+ ch1 = ch2;
+ p += m;
+ }
+}
+
+bool
+u8isgbrk(rune a, rune b, struct gbrk_state *gs)
+{
+ gbrk_prop ap, bp;
+
+ /* GB1 & GB2 */
+ if (!a || !b)
+ goto do_break;
+
+ /* GB3 & ASCII fast-track */
+ if ((a | b) < 0x300) {
+ if (a == '\r' && b == '\n')
+ return false;
+ goto do_break;
+ }
+
+ /* GB4 */
+ if (a == '\r' || a == '\n' || ((ap = getprop(a)) & GBP_CTRL))
+ goto do_break;
+
+ /* GB5 */
+ if (b == '\r' || b == '\n' || ((bp = getprop(b)) & GBP_CTRL))
+ goto do_break;
+
+ /* Setting flags for GB9c */
+ if (ap & GBP_INDC_CNSNT)
+ gs->gb9c = GB9C_CNSNT;
+ else if ((ap & GBP_INDC_LNK) && gs->gb9c == GB9C_CNSNT)
+ gs->gb9c = GB9C_LNK;
+
+ /* GB6 */
+ if ((ap & GBP_HNGL_L)
+ && (bp & (GBP_HNGL_L | GBP_HNGL_V | GBP_HNGL_LV | GBP_HNGL_LVT)))
+ {
+ return false;
+ }
+
+ /* GB7 */
+ if ((ap & (GBP_HNGL_LV | GBP_HNGL_V)) && (bp & (GBP_HNGL_V | GBP_HNGL_T)))
+ return false;
+
+ /* GB8 */
+ if ((ap & (GBP_HNGL_LVT | GBP_HNGL_T)) && (bp & GBP_HNGL_T))
+ return false;
+
+ /* GB9 */
+ if (bp & (GBP_EXT | GBP_ZWJ)) {
+ if (ap & GBP_PIC)
+ gs->gb11 = true;
+ return false;
+ }
+
+ /* GB9a */
+ if (bp & GBP_SM)
+ return false;
+
+ /* GB9b */
+ if (ap & GBP_PREP)
+ return false;
+
+ /* GB9c */
+ if ((ap & (GBP_INDC_EXT | GBP_INDC_LNK)) && (bp & GBP_INDC_CNSNT)
+ && gs->gb9c == GB9C_LNK)
+ {
+ return false;
+ }
+
+ /* GB11 */
+ if (gs->gb11) {
+ if ((ap & GBP_EXT) && (bp & (GBP_EXT | GBP_ZWJ)))
+ return false;
+ if ((ap & GBP_ZWJ) && (bp & GBP_PIC))
+ return false;
+ }
+
+ /* GB12 & GB13 */
+ if (ap & GBP_RI) {
+ if (gs->gb12 || !(bp & GBP_RI))
+ goto do_break;
+ gs->gb12 = true;
+ return false;
+ }
+
+ /* GB999 */
+do_break:
+ gs->gb9c = GB9C_NONE;
+ gs->gb11 = gs->gb12 = false;
+ return true;
+}
+
+gbrk_prop
+getprop(rune ch)
+{
+ ssize_t lo, hi;
+
+ lo = 0;
+ hi = lengthof(gbrk_prop_tbl) - 1;
+
+ while (lo <= hi) {
+ ssize_t i = (lo + hi) / 2;
+
+ if (ch < gbrk_prop_tbl[i].lo)
+ hi = i - 1;
+ else if (ch > gbrk_prop_tbl[i].hi)
+ lo = i + 1;
+ else
+ return gbrk_prop_tbl[i].prop;
+ }
+
+ return GBP_OTHER;
+}
diff --git a/vendor/librune/lib/utf8/rtou8.c b/vendor/librune/lib/utf8/rtou8.c
new file mode 100644
index 0000000..1823f08
--- /dev/null
+++ b/vendor/librune/lib/utf8/rtou8.c
@@ -0,0 +1,36 @@
+#include "utf8.h"
+
+#include "internal/common.h"
+
+int
+rtou8(char8_t *s, rune ch, size_t n)
+{
+ if (ch <= _1B_MAX) {
+ if (n >= 1)
+ s[0] = ch;
+ return 1;
+ } else if (ch <= _2B_MAX) {
+ if (n >= 2) {
+ s[0] = (ch >> 6) | 0xC0;
+ s[1] = (ch & 0x3F) | 0x80;
+ }
+ return 2;
+ } else if (ch <= _3B_MAX) {
+ if (n >= 3) {
+ s[0] = (ch >> 12) | 0xE0;
+ s[1] = ((ch >> 6) & 0x3F) | 0x80;
+ s[2] = (ch & 0x3F) | 0x80;
+ }
+ return 3;
+ } else if (ch <= _4B_MAX) {
+ if (n >= 4) {
+ s[0] = (ch >> 18) | 0xF0;
+ s[1] = ((ch >> 12) & 0x3F) | 0x80;
+ s[2] = ((ch >> 6) & 0x3F) | 0x80;
+ s[3] = (ch & 0x3F) | 0x80;
+ }
+ return 4;
+ }
+
+ unreachable();
+}
diff --git a/vendor/librune/lib/utf8/u8bspn.c b/vendor/librune/lib/utf8/u8bspn.c
new file mode 100644
index 0000000..3ccd469
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8bspn.c
@@ -0,0 +1,22 @@
+#include "utf8.h"
+
+size_t
+u8bspn(const char8_t *s, size_t n, const rune *p, size_t m)
+{
+ rune ch;
+ size_t k = 0;
+
+ while (u8next(&ch, &s, &n)) {
+ for (size_t i = 0; i < m; i++) {
+ if (p[i] == ch) {
+ k += u8wdth(ch);
+ goto found;
+ }
+ }
+
+ break;
+found:;
+ }
+
+ return k;
+}
diff --git a/vendor/librune/lib/utf8/u8cbspn.c b/vendor/librune/lib/utf8/u8cbspn.c
new file mode 100644
index 0000000..b51c300
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8cbspn.c
@@ -0,0 +1,20 @@
+#include "utf8.h"
+
+size_t
+u8cbspn(const char8_t *s, size_t n, const rune *p, size_t m)
+{
+ rune ch;
+ size_t k = 0;
+
+ while (u8next(&ch, &s, &n)) {
+ for (size_t i = 0; i < m; i++) {
+ if (p[i] == ch)
+ goto found;
+ }
+
+ k += u8wdth(ch);
+ }
+
+found:
+ return k;
+}
diff --git a/vendor/librune/lib/utf8/u8chk.c b/vendor/librune/lib/utf8/u8chk.c
new file mode 100644
index 0000000..9a2cf03
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8chk.c
@@ -0,0 +1,19 @@
+#include "rune.h"
+#include "utf8.h"
+
+#include "internal/common.h"
+
+const char8_t *
+u8chk(const char8_t *s, size_t n)
+{
+ while (n) {
+ rune ch;
+ int m = u8tor(&ch, s);
+
+ if (ch == RUNE_ERROR)
+ return s;
+ n += m;
+ }
+
+ return nullptr;
+}
diff --git a/vendor/librune/lib/utf8/u8chkr.c b/vendor/librune/lib/utf8/u8chkr.c
new file mode 100644
index 0000000..4510f16
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8chkr.c
@@ -0,0 +1,9 @@
+#include "rune.h"
+#include "utf8.h"
+
+bool
+u8chkr(rune ch)
+{
+ return !((ch >= 0xD800 && ch <= 0xDFFF) || ch == 0xFFFE || ch == 0xFFFF
+ || ch > RUNE_MAX);
+}
diff --git a/vendor/librune/lib/utf8/u8chr.c b/vendor/librune/lib/utf8/u8chr.c
new file mode 100644
index 0000000..4ecbd10
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8chr.c
@@ -0,0 +1,97 @@
+#include <stdint.h>
+#include <string.h>
+
+#include "utf8.h"
+
+#include "internal/common.h"
+
+/* NOTE: The memmem*() functions were taken directly from the memmem()
+ implementation on OpenBSD. As a result, these functions are licensed under
+ OpenBSDs 2-Clause BSD License instead of this libraries 0-Clause BSD License.
+
+ The license for these functions is as follows:
+
+ Copyright © 2005–2020 Rich Felker, et al.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ “Software”), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+static const char8_t *
+memmem2(const char8_t *h, size_t k, const char8_t *n)
+{
+ uint16_t hw, nw;
+ hw = h[0] << 8 | h[1];
+ nw = n[0] << 8 | n[1];
+
+ for (h += 2, k -= 2; k; k--, hw = hw << 8 | *h++) {
+ if (hw == nw)
+ return h - 2;
+ }
+ return hw == nw ? h - 2 : nullptr;
+}
+
+static const char8_t *
+memmem3(const char8_t *h, size_t k, const char8_t *n)
+{
+ uint32_t hw, nw;
+ hw = h[0] << 24 | h[1] << 16 | h[2] << 8;
+ nw = n[0] << 24 | n[1] << 16 | n[2] << 8;
+
+ for (h += 3, k -= 3; k; k--, hw = (hw | *h++) << 8) {
+ if (hw == nw)
+ return h - 3;
+ }
+ return hw == nw ? h - 3 : nullptr;
+}
+
+static const char8_t *
+memmem4(const char8_t *h, size_t k, const char8_t *n)
+{
+ uint32_t hw, nw;
+ hw = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3];
+ nw = n[0] << 24 | n[1] << 16 | n[2] << 8 | n[3];
+
+ for (h += 4, k -= 4; k; k--, hw = hw << 8 | *h++) {
+ if (hw == nw)
+ return h - 4;
+ }
+ return hw == nw ? h - 4 : nullptr;
+}
+
+const char8_t *
+u8chr(const char8_t *s, rune ch, size_t n)
+{
+ char8_t buf[U8_LEN_MAX];
+ int m = rtou8(buf, ch, sizeof(buf));
+
+ if (n < (size_t)m)
+ return nullptr;
+ switch (m) {
+ case 1:
+ return memchr(s, ch, n);
+ case 2:
+ return memmem2(s, n, buf);
+ case 3:
+ return memmem3(s, n, buf);
+ case 4:
+ return memmem4(s, n, buf);
+ }
+
+ unreachable();
+}
diff --git a/vendor/librune/lib/utf8/u8cspn.c b/vendor/librune/lib/utf8/u8cspn.c
new file mode 100644
index 0000000..7d46a0b
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8cspn.c
@@ -0,0 +1,20 @@
+#include "utf8.h"
+
+size_t
+u8cspn(const char8_t *s, size_t n, const rune *p, size_t m)
+{
+ rune ch;
+ size_t k = 0;
+
+ while (u8next(&ch, &s, &n)) {
+ for (size_t i = 0; i < m; i++) {
+ if (p[i] == ch)
+ goto found;
+ }
+
+ k++;
+ }
+
+found:
+ return k;
+}
diff --git a/vendor/librune/lib/utf8/u8len.c b/vendor/librune/lib/utf8/u8len.c
new file mode 100644
index 0000000..fc66ee7
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8len.c
@@ -0,0 +1,13 @@
+#include "utf8.h"
+
+size_t
+u8len(const char8_t *s, size_t n)
+{
+ rune unused;
+ size_t m = 0;
+
+ while (u8next(&unused, &s, &n))
+ m++;
+
+ return m;
+}
diff --git a/vendor/librune/lib/utf8/u8next.c b/vendor/librune/lib/utf8/u8next.c
new file mode 100644
index 0000000..8edc084
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8next.c
@@ -0,0 +1,14 @@
+#include "utf8.h"
+
+#include "internal/common.h"
+
+const char8_t *
+u8next(rune *ch, const char8_t **s, size_t *n)
+{
+ int m;
+
+ if (*n == 0)
+ return nullptr;
+ *n -= m = u8tor_uc(ch, *s);
+ return *s += m;
+}
diff --git a/vendor/librune/lib/utf8/u8prev.c b/vendor/librune/lib/utf8/u8prev.c
new file mode 100644
index 0000000..fac0fc7
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8prev.c
@@ -0,0 +1,37 @@
+#include "rune.h"
+#include "utf8.h"
+
+#include "internal/common.h"
+
+const char8_t *
+u8prev(rune *ch, const char8_t **p, const char8_t *start)
+{
+ int off;
+ bool match = true;
+ const char8_t *s = *p;
+ ptrdiff_t d = s - start;
+
+ if (d <= 0) {
+ return nullptr;
+ } else if (U1(s[-1])) {
+ *ch = s[-1];
+ off = 1;
+ } else if (d > 1 && UC(s[-1]) && U2(s[-2])) {
+ *ch = ((s[-2] & 0x1F) << 6) | (s[-1] & 0x3F);
+ off = 2;
+ } else if (d > 2 && UC(s[-1]) && UC(s[-2]) && U3(s[-3])) {
+ *ch = ((s[-3] & 0x0F) << 12) | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F);
+ off = 3;
+ } else if (d > 3 && UC(s[-1]) && UC(s[-2]) && UC(s[-3]) && U4(s[-4])) {
+ *ch = ((s[-4] & 0x07) << 18) | ((s[-3] & 0x3F) << 12)
+ | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F);
+ off = 4;
+ } else
+ match = false;
+
+ if (match && u8chkr(*ch))
+ return *p -= off;
+
+ *ch = RUNE_ERROR;
+ return *p--;
+}
diff --git a/vendor/librune/lib/utf8/u8rchr.c b/vendor/librune/lib/utf8/u8rchr.c
new file mode 100644
index 0000000..15fff51
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8rchr.c
@@ -0,0 +1,87 @@
+#include <stdint.h>
+
+#include "utf8.h"
+
+#include "internal/common.h"
+
+static const char8_t *
+memrchr1(const char8_t *s, size_t k, const char8_t *n)
+{
+ for (const char8_t *p = s + k - 1; k-- > 0; p--) {
+ if (*p == *n)
+ return p;
+ }
+ return nullptr;
+}
+
+static const char8_t *
+memrchr2(const char8_t *h, size_t k, const char8_t *n)
+{
+ uint16_t hw, nw;
+ const char8_t *H = h + k - 1;
+ hw = H[-1] << 8 | H[-0];
+ nw = n[+0] << 8 | n[+1];
+
+ for (H -= 2, k -= 2; k; k--, hw = hw >> 8 | (*H-- << 8)) {
+ if (hw == nw)
+ return H + 1;
+ }
+
+ return hw == nw ? H + 1 : nullptr;
+}
+
+static const char8_t *
+memrchr3(const char8_t *h, size_t k, const char8_t *n)
+{
+ uint32_t hw, nw;
+ const char8_t *H = h + k - 1;
+ hw = H[-2] << 24 | H[-1] << 16 | H[-0] << 8;
+ nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8;
+
+ for (H -= 3, k -= 3; k;
+ k--, hw = (hw >> 8 | (*H-- << 24)) & UINT32_C(0xFFFFFF00))
+ {
+ if (hw == nw)
+ return H + 1;
+ }
+
+ return hw == nw ? H + 1 : nullptr;
+}
+
+static const char8_t *
+memrchr4(const char8_t *h, size_t k, const char8_t *n)
+{
+ uint32_t hw, nw;
+ const char8_t *H = h + k - 1;
+ hw = H[-3] << 24 | H[-2] << 16 | H[-1] << 8 | H[-0];
+ nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8 | n[+3];
+
+ for (H -= 4, k -= 4; k; k--, hw = hw >> 8 | (*H-- << 24)) {
+ if (hw == nw)
+ return H + 1;
+ }
+
+ return hw == nw ? H + 1 : nullptr;
+}
+
+const char8_t *
+u8rchr(const char8_t *s, rune ch, size_t n)
+{
+ char8_t buf[U8_LEN_MAX];
+ int m = rtou8(buf, ch, sizeof(buf));
+
+ if (n < (size_t)m)
+ return nullptr;
+ switch (m) {
+ case 1:
+ return memrchr1(s, n, buf);
+ case 2:
+ return memrchr2(s, n, buf);
+ case 3:
+ return memrchr3(s, n, buf);
+ case 4:
+ return memrchr4(s, n, buf);
+ }
+
+ unreachable();
+}
diff --git a/vendor/librune/lib/utf8/u8set.c b/vendor/librune/lib/utf8/u8set.c
new file mode 100644
index 0000000..0dfba2c
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8set.c
@@ -0,0 +1,24 @@
+#include <string.h>
+
+#include "utf8.h"
+
+#include "internal/common.h"
+
+size_t
+u8set(const char8_t *s, rune ch, size_t n)
+{
+ int m;
+ char8_t buf[U8_LEN_MAX];
+
+ if (n == 0)
+ return 0;
+ if (ch <= _1B_MAX) {
+ memset((char *)s, ch, n);
+ return n;
+ }
+ m = rtou8(buf, ch, sizeof(buf));
+ for (size_t i = 0; i < n; i += m)
+ memcpy((char *)s + i, buf, m);
+
+ return n - n % m;
+}
diff --git a/vendor/librune/lib/utf8/u8spn.c b/vendor/librune/lib/utf8/u8spn.c
new file mode 100644
index 0000000..beeb33f
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8spn.c
@@ -0,0 +1,22 @@
+#include "utf8.h"
+
+size_t
+u8spn(const char8_t *s, size_t n, const rune *p, size_t m)
+{
+ rune ch;
+ size_t k = 0;
+
+ while (u8next(&ch, &s, &n)) {
+ for (size_t i = 0; i < m; i++) {
+ if (p[i] == ch) {
+ k++;
+ goto found;
+ }
+ }
+
+ break;
+found:;
+ }
+
+ return k;
+}
diff --git a/vendor/librune/lib/utf8/u8tor.c b/vendor/librune/lib/utf8/u8tor.c
new file mode 100644
index 0000000..152a174
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8tor.c
@@ -0,0 +1,31 @@
+#include "rune.h"
+#include "utf8.h"
+
+#include "internal/common.h"
+
+int
+u8tor(rune *ch, const char8_t *s)
+{
+ int n = 0;
+
+ if (U1(s[0])) {
+ *ch = s[0];
+ n = 1;
+ } else if (U2(s[0]) && UC(s[1])) {
+ *ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
+ n = 2;
+ } else if (U3(s[0]) && UC(s[1]) && UC(s[2])) {
+ *ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
+ n = 3;
+ } else if (U4(s[0]) && UC(s[1]) && UC(s[2]) && UC(s[3])) {
+ *ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12)
+ | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
+ n = 4;
+ }
+
+ if (n && u8chkr(*ch))
+ return n;
+
+ *ch = RUNE_ERROR;
+ return 1;
+}
diff --git a/vendor/librune/lib/utf8/u8tor_uc.c b/vendor/librune/lib/utf8/u8tor_uc.c
new file mode 100644
index 0000000..ea57332
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8tor_uc.c
@@ -0,0 +1,24 @@
+#include "utf8.h"
+
+#include "internal/common.h"
+
+int
+u8tor_uc(rune *ch, const char8_t *s)
+{
+ if (U1(s[0])) {
+ *ch = s[0];
+ return 1;
+ } else if (U2(s[0])) {
+ *ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
+ return 2;
+ } else if (U3(s[0])) {
+ *ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
+ return 3;
+ } else if (U4(s[0])) {
+ *ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12)
+ | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
+ return 4;
+ }
+
+ unreachable();
+}
diff --git a/vendor/librune/lib/utf8/u8wdth.c b/vendor/librune/lib/utf8/u8wdth.c
new file mode 100644
index 0000000..0bc5785
--- /dev/null
+++ b/vendor/librune/lib/utf8/u8wdth.c
@@ -0,0 +1,13 @@
+#include "utf8.h"
+
+#include "internal/common.h"
+
+int
+u8wdth(rune ch)
+{
+ return ch <= _1B_MAX ? 1
+ : ch <= _2B_MAX ? 2
+ : ch <= _3B_MAX ? 3
+ : ch <= _4B_MAX ? 4
+ : 0;
+}