diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-04-15 01:19:12 +0200 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-15 01:19:12 +0200 | 
| commit | 5ea567bef8836164ae6b3829762b237c9ea31bc3 (patch) | |
| tree | bc69c9d5cd2cccde6717684c16ad6b519d80e722 /lib/unicode | |
| parent | dfb50ba9e7874d095019a5d62d1a0aaf04207ac5 (diff) | |
Add u8lower()
Diffstat (limited to 'lib/unicode')
| -rw-r--r-- | lib/unicode/string/u8lower.c | 46 | 
1 files changed, 46 insertions, 0 deletions
diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c new file mode 100644 index 0000000..052217f --- /dev/null +++ b/lib/unicode/string/u8lower.c @@ -0,0 +1,46 @@ +#include "mbstring.h" +#include "unicode/prop.h" +#include "unicode/string.h" + +constexpr rune COMB_GRAVE     = 0x0300; +constexpr rune COMB_ACUTE     = 0x0301; +constexpr rune COMB_TILDE     = 0x0303; +constexpr rune COMB_DOT_ABOVE = 0x0307; + +size_t +u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, +        enum caseflags flags) +{ +	struct lcctx ctx = { +		.az_or_tr = flags & CF_LANG_AZ, +		.lt = flags & CF_LANG_LT, +	}; + +	rune ch; +	size_t n = 0; + +	while (u8next(&ch, &src, &srcn)) { +		/* TODO: Set ‘eow’ once word-segmentation is implemented */ + +		rune next = 0; +		if (srcn > 0) +			u8tor(&next, src); +		ctx.before_dot = next == COMB_DOT_ABOVE; +		ctx.before_acc = next == COMB_GRAVE +		              || next == COMB_ACUTE +		              || next == COMB_TILDE; + +		struct rview rv = uprop_get_lc(ch, ctx); +		for (size_t i = 0; i < rv.len; i++) { +			if (n >= dstn) { +				char8_t buf[U8_LEN_MAX]; +				n += rtou8(buf, sizeof(buf), rv.p[i]); +			} else +				n += rtou8(dst + n, dstn - n, rv.p[i]); +		} + +		ctx.after_I = ch == 'I'; +	} + +	return n; +}  |