aboutsummaryrefslogtreecommitdiff
path: root/lib/unicode/string/u8lower.c
blob: 052217f037e2885b33c69bef86bbe402cfd37833 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#include "mbstring.h"
#include "unicode/prop.h"
#include "unicode/string.h"

constexpr rune COMB_GRAVE     = 0x0300;
constexpr rune COMB_ACUTE     = 0x0301;
constexpr rune COMB_TILDE     = 0x0303;
constexpr rune COMB_DOT_ABOVE = 0x0307;

size_t
u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
        enum caseflags flags)
{
	struct lcctx ctx = {
		.az_or_tr = flags & CF_LANG_AZ,
		.lt = flags & CF_LANG_LT,
	};

	rune ch;
	size_t n = 0;

	while (u8next(&ch, &src, &srcn)) {
		/* TODO: Set ‘eow’ once word-segmentation is implemented */

		rune next = 0;
		if (srcn > 0)
			u8tor(&next, src);
		ctx.before_dot = next == COMB_DOT_ABOVE;
		ctx.before_acc = next == COMB_GRAVE
		              || next == COMB_ACUTE
		              || next == COMB_TILDE;

		struct rview rv = uprop_get_lc(ch, ctx);
		for (size_t i = 0; i < rv.len; i++) {
			if (n >= dstn) {
				char8_t buf[U8_LEN_MAX];
				n += rtou8(buf, sizeof(buf), rv.p[i]);
			} else
				n += rtou8(dst + n, dstn - n, rv.p[i]);
		}

		ctx.after_I = ch == 'I';
	}

	return n;
}