aboutsummaryrefslogblamecommitdiff
path: root/vendor/librune/lib/gbrk/u8gnext.c
blob: 875d5cbdd2300cfc295777735e181f2c44b61af2 (plain) (tree)



































































































































































                                                                                  
#include <sys/types.h>

#include "gbrk.h"
#include "utf8.h"

#include "internal/common.h"
#include "internal/gbrk_lookup.h"

#define lengthof(a) (sizeof(a) / sizeof(*(a)))

struct gbrk_state {
	enum {
		GB9C_NONE,
		GB9C_CNSNT,
		GB9C_LNK,
	} gb9c;
	bool gb11 : 1;
	bool gb12 : 1;
};

static bool u8isgbrk(rune, rune, struct gbrk_state *);
static gbrk_prop getprop(rune);

const char8_t *
u8gnext(struct u8view *g, const char8_t **s, size_t *n)
{
	int m;
	rune ch1;
	const char8_t *p;
	struct gbrk_state gs = {0};

	if (*n == 0)
		return nullptr;

	g->p = p = *s;
	p += u8tor_uc(&ch1, p);

	for (;;) {
		rune ch2;

		if ((size_t)(p - *s) >= *n)
			ch2 = 0;
		else
			m = u8tor_uc(&ch2, p);
		if (u8isgbrk(ch1, ch2, &gs)) {
			*n -= g->len = p - *s;
			return *s = p;
		}

		ch1 = ch2;
		p += m;
	}
}

bool
u8isgbrk(rune a, rune b, struct gbrk_state *gs)
{
	gbrk_prop ap, bp;

	/* GB1 & GB2 */
	if (!a || !b)
		goto do_break;

	/* GB3 & ASCII fast-track */
	if ((a | b) < 0x300) {
		if (a == '\r' && b == '\n')
			return false;
		goto do_break;
	}

	/* GB4 */
	if (a == '\r' || a == '\n' || ((ap = getprop(a)) & GBP_CTRL))
		goto do_break;

	/* GB5 */
	if (b == '\r' || b == '\n' || ((bp = getprop(b)) & GBP_CTRL))
		goto do_break;

	/* Setting flags for GB9c */
	if (ap & GBP_INDC_CNSNT)
		gs->gb9c = GB9C_CNSNT;
	else if ((ap & GBP_INDC_LNK) && gs->gb9c == GB9C_CNSNT)
		gs->gb9c = GB9C_LNK;

	/* GB6 */
	if ((ap & GBP_HNGL_L)
	    && (bp & (GBP_HNGL_L | GBP_HNGL_V | GBP_HNGL_LV | GBP_HNGL_LVT)))
	{
		return false;
	}

	/* GB7 */
	if ((ap & (GBP_HNGL_LV | GBP_HNGL_V)) && (bp & (GBP_HNGL_V | GBP_HNGL_T)))
		return false;

	/* GB8 */
	if ((ap & (GBP_HNGL_LVT | GBP_HNGL_T)) && (bp & GBP_HNGL_T))
		return false;

	/* GB9 */
	if (bp & (GBP_EXT | GBP_ZWJ)) {
		if (ap & GBP_PIC)
			gs->gb11 = true;
		return false;
	}

	/* GB9a */
	if (bp & GBP_SM)
		return false;

	/* GB9b */
	if (ap & GBP_PREP)
		return false;

	/* GB9c */
	if ((ap & (GBP_INDC_EXT | GBP_INDC_LNK)) && (bp & GBP_INDC_CNSNT)
	    && gs->gb9c == GB9C_LNK)
	{
		return false;
	}

	/* GB11 */
	if (gs->gb11) {
		if ((ap & GBP_EXT) && (bp & (GBP_EXT | GBP_ZWJ)))
			return false;
		if ((ap & GBP_ZWJ) && (bp & GBP_PIC))
			return false;
	}

	/* GB12 & GB13 */
	if (ap & GBP_RI) {
		if (gs->gb12 || !(bp & GBP_RI))
			goto do_break;
		gs->gb12 = true;
		return false;
	}

	/* GB999 */
do_break:
	gs->gb9c = GB9C_NONE;
	gs->gb11 = gs->gb12 = false;
	return true;
}

gbrk_prop
getprop(rune ch)
{
	ssize_t lo, hi;

	lo = 0;
	hi = lengthof(gbrk_prop_tbl) - 1;

	while (lo <= hi) {
		ssize_t i = (lo + hi) / 2;

		if (ch < gbrk_prop_tbl[i].lo)
			hi = i - 1;
		else if (ch > gbrk_prop_tbl[i].hi)
			lo = i + 1;
		else
			return gbrk_prop_tbl[i].prop;
	}

	return GBP_OTHER;
}