diff options
Diffstat (limited to 'vendor/librune/lib/utf8')
| -rw-r--r-- | vendor/librune/lib/utf8/rtou8.c | 36 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8bspn.c | 22 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8cbspn.c | 20 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8chk.c | 19 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8chkr.c | 9 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8chr.c | 97 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8cspn.c | 20 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8len.c | 13 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8next.c | 14 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8prev.c | 37 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8rchr.c | 87 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8set.c | 24 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8spn.c | 22 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8tor.c | 31 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8tor_uc.c | 24 | ||||
| -rw-r--r-- | vendor/librune/lib/utf8/u8wdth.c | 13 | 
16 files changed, 488 insertions, 0 deletions
| diff --git a/vendor/librune/lib/utf8/rtou8.c b/vendor/librune/lib/utf8/rtou8.c new file mode 100644 index 0000000..1823f08 --- /dev/null +++ b/vendor/librune/lib/utf8/rtou8.c @@ -0,0 +1,36 @@ +#include "utf8.h" + +#include "internal/common.h" + +int +rtou8(char8_t *s, rune ch, size_t n) +{ +	if (ch <= _1B_MAX) { +		if (n >= 1) +			s[0] = ch; +		return 1; +	} else if (ch <= _2B_MAX) { +		if (n >= 2) { +			s[0] = (ch >> 6) | 0xC0; +			s[1] = (ch & 0x3F) | 0x80; +		} +		return 2; +	} else if (ch <= _3B_MAX) { +		if (n >= 3) { +			s[0] = (ch >> 12) | 0xE0; +			s[1] = ((ch >> 6) & 0x3F) | 0x80; +			s[2] = (ch & 0x3F) | 0x80; +		} +		return 3; +	} else if (ch <= _4B_MAX) { +		if (n >= 4) { +			s[0] = (ch >> 18) | 0xF0; +			s[1] = ((ch >> 12) & 0x3F) | 0x80; +			s[2] = ((ch >> 6) & 0x3F) | 0x80; +			s[3] = (ch & 0x3F) | 0x80; +		} +		return 4; +	} + +	unreachable(); +} diff --git a/vendor/librune/lib/utf8/u8bspn.c b/vendor/librune/lib/utf8/u8bspn.c new file mode 100644 index 0000000..3ccd469 --- /dev/null +++ b/vendor/librune/lib/utf8/u8bspn.c @@ -0,0 +1,22 @@ +#include "utf8.h" + +size_t +u8bspn(const char8_t *s, size_t n, const rune *p, size_t m) +{ +	rune ch; +	size_t k = 0; + +	while (u8next(&ch, &s, &n)) { +		for (size_t i = 0; i < m; i++) { +			if (p[i] == ch) { +				k += u8wdth(ch); +				goto found; +			} +		} + +		break; +found:; +	} + +	return k; +} diff --git a/vendor/librune/lib/utf8/u8cbspn.c b/vendor/librune/lib/utf8/u8cbspn.c new file mode 100644 index 0000000..b51c300 --- /dev/null +++ b/vendor/librune/lib/utf8/u8cbspn.c @@ -0,0 +1,20 @@ +#include "utf8.h" + +size_t +u8cbspn(const char8_t *s, size_t n, const rune *p, size_t m) +{ +	rune ch; +	size_t k = 0; + +	while (u8next(&ch, &s, &n)) { +		for (size_t i = 0; i < m; i++) { +			if (p[i] == ch) +				goto found; +		} + +		k += u8wdth(ch); +	} + +found: +	return k; +} diff --git a/vendor/librune/lib/utf8/u8chk.c b/vendor/librune/lib/utf8/u8chk.c new file mode 100644 index 0000000..9a2cf03 --- /dev/null +++ b/vendor/librune/lib/utf8/u8chk.c @@ -0,0 +1,19 @@ +#include "rune.h" +#include "utf8.h" + +#include "internal/common.h" + +const char8_t * +u8chk(const char8_t *s, size_t n) +{ +	while (n) { +		rune ch; +		int m = u8tor(&ch, s); + +		if (ch == RUNE_ERROR) +			return s; +		n += m; +	} + +	return nullptr; +} diff --git a/vendor/librune/lib/utf8/u8chkr.c b/vendor/librune/lib/utf8/u8chkr.c new file mode 100644 index 0000000..4510f16 --- /dev/null +++ b/vendor/librune/lib/utf8/u8chkr.c @@ -0,0 +1,9 @@ +#include "rune.h" +#include "utf8.h" + +bool +u8chkr(rune ch) +{ +	return !((ch >= 0xD800 && ch <= 0xDFFF) || ch == 0xFFFE || ch == 0xFFFF +	         || ch > RUNE_MAX); +} diff --git a/vendor/librune/lib/utf8/u8chr.c b/vendor/librune/lib/utf8/u8chr.c new file mode 100644 index 0000000..4ecbd10 --- /dev/null +++ b/vendor/librune/lib/utf8/u8chr.c @@ -0,0 +1,97 @@ +#include <stdint.h> +#include <string.h> + +#include "utf8.h" + +#include "internal/common.h" + +/* NOTE: The memmem*() functions were taken directly from the memmem() +   implementation on OpenBSD.  As a result, these functions are licensed under +   OpenBSDs 2-Clause BSD License instead of this libraries 0-Clause BSD License. + +   The license for these functions is as follows: + +       Copyright © 2005–2020 Rich Felker, et al. + +       Permission is hereby granted, free of charge, to any person obtaining +       a copy of this software and associated documentation files (the +       “Software”), to deal in the Software without restriction, including +       without limitation the rights to use, copy, modify, merge, publish, +       distribute, sublicense, and/or sell copies of the Software, and to +       permit persons to whom the Software is furnished to do so, subject to +       the following conditions: + +       The above copyright notice and this permission notice shall be +       included in all copies or substantial portions of the Software. + +       THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, +       EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +       MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +       IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +       CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +       TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +       SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +static const char8_t * +memmem2(const char8_t *h, size_t k, const char8_t *n) +{ +	uint16_t hw, nw; +	hw = h[0] << 8 | h[1]; +	nw = n[0] << 8 | n[1]; + +	for (h += 2, k -= 2; k; k--, hw = hw << 8 | *h++) { +		if (hw == nw) +			return h - 2; +	} +	return hw == nw ? h - 2 : nullptr; +} + +static const char8_t * +memmem3(const char8_t *h, size_t k, const char8_t *n) +{ +	uint32_t hw, nw; +	hw = h[0] << 24 | h[1] << 16 | h[2] << 8; +	nw = n[0] << 24 | n[1] << 16 | n[2] << 8; + +	for (h += 3, k -= 3; k; k--, hw = (hw | *h++) << 8) { +		if (hw == nw) +			return h - 3; +	} +	return hw == nw ? h - 3 : nullptr; +} + +static const char8_t * +memmem4(const char8_t *h, size_t k, const char8_t *n) +{ +	uint32_t hw, nw; +	hw = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3]; +	nw = n[0] << 24 | n[1] << 16 | n[2] << 8 | n[3]; + +	for (h += 4, k -= 4; k; k--, hw = hw << 8 | *h++) { +		if (hw == nw) +			return h - 4; +	} +	return hw == nw ? h - 4 : nullptr; +} + +const char8_t * +u8chr(const char8_t *s, rune ch, size_t n) +{ +	char8_t buf[U8_LEN_MAX]; +	int m = rtou8(buf, ch, sizeof(buf)); + +	if (n < (size_t)m) +		return nullptr; +	switch (m) { +	case 1: +		return memchr(s, ch, n); +	case 2: +		return memmem2(s, n, buf); +	case 3: +		return memmem3(s, n, buf); +	case 4: +		return memmem4(s, n, buf); +	} + +	unreachable(); +} diff --git a/vendor/librune/lib/utf8/u8cspn.c b/vendor/librune/lib/utf8/u8cspn.c new file mode 100644 index 0000000..7d46a0b --- /dev/null +++ b/vendor/librune/lib/utf8/u8cspn.c @@ -0,0 +1,20 @@ +#include "utf8.h" + +size_t +u8cspn(const char8_t *s, size_t n, const rune *p, size_t m) +{ +	rune ch; +	size_t k = 0; + +	while (u8next(&ch, &s, &n)) { +		for (size_t i = 0; i < m; i++) { +			if (p[i] == ch) +				goto found; +		} + +		k++; +	} + +found: +	return k; +} diff --git a/vendor/librune/lib/utf8/u8len.c b/vendor/librune/lib/utf8/u8len.c new file mode 100644 index 0000000..fc66ee7 --- /dev/null +++ b/vendor/librune/lib/utf8/u8len.c @@ -0,0 +1,13 @@ +#include "utf8.h" + +size_t +u8len(const char8_t *s, size_t n) +{ +	rune unused; +	size_t m = 0; + +	while (u8next(&unused, &s, &n)) +		m++; + +	return m; +} diff --git a/vendor/librune/lib/utf8/u8next.c b/vendor/librune/lib/utf8/u8next.c new file mode 100644 index 0000000..8edc084 --- /dev/null +++ b/vendor/librune/lib/utf8/u8next.c @@ -0,0 +1,14 @@ +#include "utf8.h" + +#include "internal/common.h" + +const char8_t * +u8next(rune *ch, const char8_t **s, size_t *n) +{ +	int m; + +	if (*n == 0) +		return nullptr; +	*n -= m = u8tor_uc(ch, *s); +	return *s += m; +} diff --git a/vendor/librune/lib/utf8/u8prev.c b/vendor/librune/lib/utf8/u8prev.c new file mode 100644 index 0000000..fac0fc7 --- /dev/null +++ b/vendor/librune/lib/utf8/u8prev.c @@ -0,0 +1,37 @@ +#include "rune.h" +#include "utf8.h" + +#include "internal/common.h" + +const char8_t * +u8prev(rune *ch, const char8_t **p, const char8_t *start) +{ +	int off; +	bool match = true; +	const char8_t *s = *p; +	ptrdiff_t d = s - start; + +	if (d <= 0) { +		return nullptr; +	} else if (U1(s[-1])) { +		*ch = s[-1]; +		off = 1; +	} else if (d > 1 && UC(s[-1]) && U2(s[-2])) { +		*ch = ((s[-2] & 0x1F) << 6) | (s[-1] & 0x3F); +		off = 2; +	} else if (d > 2 && UC(s[-1]) && UC(s[-2]) && U3(s[-3])) { +		*ch = ((s[-3] & 0x0F) << 12) | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F); +		off = 3; +	} else if (d > 3 && UC(s[-1]) && UC(s[-2]) && UC(s[-3]) && U4(s[-4])) { +		*ch = ((s[-4] & 0x07) << 18) | ((s[-3] & 0x3F) << 12) +		    | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F); +		off = 4; +	} else +		match = false; + +	if (match && u8chkr(*ch)) +		return *p -= off; + +	*ch = RUNE_ERROR; +	return *p--; +} diff --git a/vendor/librune/lib/utf8/u8rchr.c b/vendor/librune/lib/utf8/u8rchr.c new file mode 100644 index 0000000..15fff51 --- /dev/null +++ b/vendor/librune/lib/utf8/u8rchr.c @@ -0,0 +1,87 @@ +#include <stdint.h> + +#include "utf8.h" + +#include "internal/common.h" + +static const char8_t * +memrchr1(const char8_t *s, size_t k, const char8_t *n) +{ +	for (const char8_t *p = s + k - 1; k-- > 0; p--) { +		if (*p == *n) +			return p; +	} +	return nullptr; +} + +static const char8_t * +memrchr2(const char8_t *h, size_t k, const char8_t *n) +{ +	uint16_t hw, nw; +	const char8_t *H = h + k - 1; +	hw = H[-1] << 8 | H[-0]; +	nw = n[+0] << 8 | n[+1]; + +	for (H -= 2, k -= 2; k; k--, hw = hw >> 8 | (*H-- << 8)) { +		if (hw == nw) +			return H + 1; +	} + +	return hw == nw ? H + 1 : nullptr; +} + +static const char8_t * +memrchr3(const char8_t *h, size_t k, const char8_t *n) +{ +	uint32_t hw, nw; +	const char8_t *H = h + k - 1; +	hw = H[-2] << 24 | H[-1] << 16 | H[-0] << 8; +	nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8; + +	for (H -= 3, k -= 3; k; +	     k--, hw = (hw >> 8 | (*H-- << 24)) & UINT32_C(0xFFFFFF00)) +	{ +		if (hw == nw) +			return H + 1; +	} + +	return hw == nw ? H + 1 : nullptr; +} + +static const char8_t * +memrchr4(const char8_t *h, size_t k, const char8_t *n) +{ +	uint32_t hw, nw; +	const char8_t *H = h + k - 1; +	hw = H[-3] << 24 | H[-2] << 16 | H[-1] << 8 | H[-0]; +	nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8 | n[+3]; + +	for (H -= 4, k -= 4; k; k--, hw = hw >> 8 | (*H-- << 24)) { +		if (hw == nw) +			return H + 1; +	} + +	return hw == nw ? H + 1 : nullptr; +} + +const char8_t * +u8rchr(const char8_t *s, rune ch, size_t n) +{ +	char8_t buf[U8_LEN_MAX]; +	int m = rtou8(buf, ch, sizeof(buf)); + +	if (n < (size_t)m) +		return nullptr; +	switch (m) { +	case 1: +		return memrchr1(s, n, buf); +	case 2: +		return memrchr2(s, n, buf); +	case 3: +		return memrchr3(s, n, buf); +	case 4: +		return memrchr4(s, n, buf); +	} + +	unreachable(); +} diff --git a/vendor/librune/lib/utf8/u8set.c b/vendor/librune/lib/utf8/u8set.c new file mode 100644 index 0000000..0dfba2c --- /dev/null +++ b/vendor/librune/lib/utf8/u8set.c @@ -0,0 +1,24 @@ +#include <string.h> + +#include "utf8.h" + +#include "internal/common.h" + +size_t +u8set(const char8_t *s, rune ch, size_t n) +{ +	int m; +	char8_t buf[U8_LEN_MAX]; + +	if (n == 0) +		return 0; +	if (ch <= _1B_MAX) { +		memset((char *)s, ch, n); +		return n; +	} +	m = rtou8(buf, ch, sizeof(buf)); +	for (size_t i = 0; i < n; i += m) +		memcpy((char *)s + i, buf, m); + +	return n - n % m; +} diff --git a/vendor/librune/lib/utf8/u8spn.c b/vendor/librune/lib/utf8/u8spn.c new file mode 100644 index 0000000..beeb33f --- /dev/null +++ b/vendor/librune/lib/utf8/u8spn.c @@ -0,0 +1,22 @@ +#include "utf8.h" + +size_t +u8spn(const char8_t *s, size_t n, const rune *p, size_t m) +{ +	rune ch; +	size_t k = 0; + +	while (u8next(&ch, &s, &n)) { +		for (size_t i = 0; i < m; i++) { +			if (p[i] == ch) { +				k++; +				goto found; +			} +		} + +		break; +found:; +	} + +	return k; +} diff --git a/vendor/librune/lib/utf8/u8tor.c b/vendor/librune/lib/utf8/u8tor.c new file mode 100644 index 0000000..152a174 --- /dev/null +++ b/vendor/librune/lib/utf8/u8tor.c @@ -0,0 +1,31 @@ +#include "rune.h" +#include "utf8.h" + +#include "internal/common.h" + +int +u8tor(rune *ch, const char8_t *s) +{ +	int n = 0; + +	if (U1(s[0])) { +		*ch = s[0]; +		n = 1; +	} else if (U2(s[0]) && UC(s[1])) { +		*ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F); +		n = 2; +	} else if (U3(s[0]) && UC(s[1]) && UC(s[2])) { +		*ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); +		n = 3; +	} else if (U4(s[0]) && UC(s[1]) && UC(s[2]) && UC(s[3])) { +		*ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12) +		    | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); +		n = 4; +	} + +	if (n && u8chkr(*ch)) +		return n; + +	*ch = RUNE_ERROR; +	return 1; +} diff --git a/vendor/librune/lib/utf8/u8tor_uc.c b/vendor/librune/lib/utf8/u8tor_uc.c new file mode 100644 index 0000000..ea57332 --- /dev/null +++ b/vendor/librune/lib/utf8/u8tor_uc.c @@ -0,0 +1,24 @@ +#include "utf8.h" + +#include "internal/common.h" + +int +u8tor_uc(rune *ch, const char8_t *s) +{ +	if (U1(s[0])) { +		*ch = s[0]; +		return 1; +	} else if (U2(s[0])) { +		*ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F); +		return 2; +	} else if (U3(s[0])) { +		*ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); +		return 3; +	} else if (U4(s[0])) { +		*ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12) +		    | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); +		return 4; +	} + +	unreachable(); +} diff --git a/vendor/librune/lib/utf8/u8wdth.c b/vendor/librune/lib/utf8/u8wdth.c new file mode 100644 index 0000000..0bc5785 --- /dev/null +++ b/vendor/librune/lib/utf8/u8wdth.c @@ -0,0 +1,13 @@ +#include "utf8.h" + +#include "internal/common.h" + +int +u8wdth(rune ch) +{ +	return ch <= _1B_MAX ? 1 +	     : ch <= _2B_MAX ? 2 +	     : ch <= _3B_MAX ? 3 +	     : ch <= _4B_MAX ? 4 +	                     : 0; +} |