aboutsummaryrefslogtreecommitdiff
path: root/lib/unicode/string/u8lower.c
blob: f59cf39547abd3cd95953a866c11f44aefb022cb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include "_attrs.h"
#include "mbstring.h"
#include "unicode/prop.h"
#include "unicode/string.h"

constexpr rune COMB_DOT_ABOVE = 0x0307;

[[unsequenced, _mlib_inline]] static inline bool
uprop_ccc_0_or_230(rune ch)
{
	enum uprop_ccc x = uprop_get_ccc(ch);
	return x == 0 || x == 230;
}

size_t
u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
        enum caseflags flags)
{
	struct lcctx ctx = {
		.az_or_tr = flags & CF_LANG_AZ,
		.lt = flags & CF_LANG_LT,
	};

	rune ch;
	size_t n, before_dot_cnt, more_above_cnt;
	struct {
		bool before;
		size_t after;
	} final_sigma = {};

	n = before_dot_cnt = more_above_cnt = 0;

	while (u8next(&ch, &src, &srcn)) {
		rune next = 0;
		if (srcn > 0)
			u8tor(&next, src);

		if (ctx.az_or_tr || ctx.lt) {
			if (before_dot_cnt == 0 || more_above_cnt == 0) {
				rune ch = 0;
				before_dot_cnt = more_above_cnt = 0;
				struct u8view cpy = {src, srcn};

				do {
					before_dot_cnt++;
					more_above_cnt++;
				} while (u8next(&ch, U8_ARGSP(cpy)) && !uprop_ccc_0_or_230(ch));

				if (ch != COMB_DOT_ABOVE)
					before_dot_cnt = 0;
				if (uprop_get_ccc(ch) != 230)
					more_above_cnt = 0;
			} else {
				before_dot_cnt--;
				more_above_cnt--;
			}
		}

		if (final_sigma.after == 0) {
			rune ch;
			struct u8view cpy = {src, srcn};

			do
				final_sigma.after++;
			while (u8next(&ch, U8_ARGSP(cpy)) && uprop_is_ci(ch));

			if (!uprop_is_cased(ch))
				final_sigma.after = 0;
		} else
			final_sigma.after--;

		ctx.before_dot = before_dot_cnt > 0;
		ctx.more_above = more_above_cnt > 0;
		ctx.final_sigma = final_sigma.before && final_sigma.after == 0;

		struct rview rv = uprop_get_lc(ch, ctx);
		for (size_t i = 0; i < rv.len; i++) {
			if (n >= dstn) {
				char8_t buf[U8_LEN_MAX];
				n += rtou8(buf, sizeof(buf), rv.p[i]);
			} else
				n += rtou8(dst + n, dstn - n, rv.p[i]);
		}

		ctx.after_I = (ch == 'I') || (ctx.after_I && !uprop_ccc_0_or_230(ch));
		if (uprop_is_cased(ch))
			final_sigma.before = true;
		else if (!uprop_is_ci(ch))
			final_sigma.before = false;
	}

	return n;
}