diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-01-21 03:03:58 +0100 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-01-21 03:03:58 +0100 | 
| commit | 4f93f935dc7a981ca073a322425c3f5929ffb644 (patch) | |
| tree | 4460586408ec7fdfcecf3ba4584f0435067125a6 /vendor/librune/lib/gbrk | |
| parent | 72ea25a4d73e3e026366d4165f5bc4ec9e7418cb (diff) | |
Support line- & column-based match locations
Diffstat (limited to 'vendor/librune/lib/gbrk')
| -rw-r--r-- | vendor/librune/lib/gbrk/u8glen.c | 13 | ||||
| -rw-r--r-- | vendor/librune/lib/gbrk/u8gnext.c | 165 | 
2 files changed, 178 insertions, 0 deletions
| diff --git a/vendor/librune/lib/gbrk/u8glen.c b/vendor/librune/lib/gbrk/u8glen.c new file mode 100644 index 0000000..13cac7e --- /dev/null +++ b/vendor/librune/lib/gbrk/u8glen.c @@ -0,0 +1,13 @@ +#include "gbrk.h" + +size_t +u8glen(const char8_t *s, size_t n) +{ +	size_t m = 0; +	struct u8view unused; + +	while (u8gnext(&unused, &s, &n)) +		m++; + +	return m; +} diff --git a/vendor/librune/lib/gbrk/u8gnext.c b/vendor/librune/lib/gbrk/u8gnext.c new file mode 100644 index 0000000..875d5cb --- /dev/null +++ b/vendor/librune/lib/gbrk/u8gnext.c @@ -0,0 +1,165 @@ +#include <sys/types.h> + +#include "gbrk.h" +#include "utf8.h" + +#include "internal/common.h" +#include "internal/gbrk_lookup.h" + +#define lengthof(a) (sizeof(a) / sizeof(*(a))) + +struct gbrk_state { +	enum { +		GB9C_NONE, +		GB9C_CNSNT, +		GB9C_LNK, +	} gb9c; +	bool gb11 : 1; +	bool gb12 : 1; +}; + +static bool u8isgbrk(rune, rune, struct gbrk_state *); +static gbrk_prop getprop(rune); + +const char8_t * +u8gnext(struct u8view *g, const char8_t **s, size_t *n) +{ +	int m; +	rune ch1; +	const char8_t *p; +	struct gbrk_state gs = {0}; + +	if (*n == 0) +		return nullptr; + +	g->p = p = *s; +	p += u8tor_uc(&ch1, p); + +	for (;;) { +		rune ch2; + +		if ((size_t)(p - *s) >= *n) +			ch2 = 0; +		else +			m = u8tor_uc(&ch2, p); +		if (u8isgbrk(ch1, ch2, &gs)) { +			*n -= g->len = p - *s; +			return *s = p; +		} + +		ch1 = ch2; +		p += m; +	} +} + +bool +u8isgbrk(rune a, rune b, struct gbrk_state *gs) +{ +	gbrk_prop ap, bp; + +	/* GB1 & GB2 */ +	if (!a || !b) +		goto do_break; + +	/* GB3 & ASCII fast-track */ +	if ((a | b) < 0x300) { +		if (a == '\r' && b == '\n') +			return false; +		goto do_break; +	} + +	/* GB4 */ +	if (a == '\r' || a == '\n' || ((ap = getprop(a)) & GBP_CTRL)) +		goto do_break; + +	/* GB5 */ +	if (b == '\r' || b == '\n' || ((bp = getprop(b)) & GBP_CTRL)) +		goto do_break; + +	/* Setting flags for GB9c */ +	if (ap & GBP_INDC_CNSNT) +		gs->gb9c = GB9C_CNSNT; +	else if ((ap & GBP_INDC_LNK) && gs->gb9c == GB9C_CNSNT) +		gs->gb9c = GB9C_LNK; + +	/* GB6 */ +	if ((ap & GBP_HNGL_L) +	    && (bp & (GBP_HNGL_L | GBP_HNGL_V | GBP_HNGL_LV | GBP_HNGL_LVT))) +	{ +		return false; +	} + +	/* GB7 */ +	if ((ap & (GBP_HNGL_LV | GBP_HNGL_V)) && (bp & (GBP_HNGL_V | GBP_HNGL_T))) +		return false; + +	/* GB8 */ +	if ((ap & (GBP_HNGL_LVT | GBP_HNGL_T)) && (bp & GBP_HNGL_T)) +		return false; + +	/* GB9 */ +	if (bp & (GBP_EXT | GBP_ZWJ)) { +		if (ap & GBP_PIC) +			gs->gb11 = true; +		return false; +	} + +	/* GB9a */ +	if (bp & GBP_SM) +		return false; + +	/* GB9b */ +	if (ap & GBP_PREP) +		return false; + +	/* GB9c */ +	if ((ap & (GBP_INDC_EXT | GBP_INDC_LNK)) && (bp & GBP_INDC_CNSNT) +	    && gs->gb9c == GB9C_LNK) +	{ +		return false; +	} + +	/* GB11 */ +	if (gs->gb11) { +		if ((ap & GBP_EXT) && (bp & (GBP_EXT | GBP_ZWJ))) +			return false; +		if ((ap & GBP_ZWJ) && (bp & GBP_PIC)) +			return false; +	} + +	/* GB12 & GB13 */ +	if (ap & GBP_RI) { +		if (gs->gb12 || !(bp & GBP_RI)) +			goto do_break; +		gs->gb12 = true; +		return false; +	} + +	/* GB999 */ +do_break: +	gs->gb9c = GB9C_NONE; +	gs->gb11 = gs->gb12 = false; +	return true; +} + +gbrk_prop +getprop(rune ch) +{ +	ssize_t lo, hi; + +	lo = 0; +	hi = lengthof(gbrk_prop_tbl) - 1; + +	while (lo <= hi) { +		ssize_t i = (lo + hi) / 2; + +		if (ch < gbrk_prop_tbl[i].lo) +			hi = i - 1; +		else if (ch > gbrk_prop_tbl[i].hi) +			lo = i + 1; +		else +			return gbrk_prop_tbl[i].prop; +	} + +	return GBP_OTHER; +} |