aboutsummaryrefslogtreecommitdiff
path: root/lib/unicode/string/u8title.c
blob: f4d9b7e34bf07410a5c5df7b826221ff653c674b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include "_attrs.h"
#include "mbstring.h"
#include "unicode/prop.h"
#include "unicode/string.h"

constexpr rune COMB_DOT_ABOVE = 0x0307;

[[unsequenced, _mlib_inline]]
static inline bool
uprop_ccc_0_or_230(rune ch)
{
	enum uprop_ccc x = uprop_get_ccc(ch);
	return x == 0 || x == 230;
}

size_t
u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
        enum caseflags flags)
{
	struct tcctx ctx_t;
	struct lcctx ctx_l;

	ctx_t.az_or_tr = ctx_l.az_or_tr = flags & CF_LANG_AZ;
	ctx_t.lt = ctx_l.lt = flags & CF_LANG_LT;

	rune ch;
	bool nl_IJ = false;
	size_t n, before_dot_cnt, more_above_cnt;
	struct u8view word = {}, wcpy = sv;
	struct {
		bool before;
		size_t after;
	} final_sigma = {};
	enum {
		TITLE,
		BETWEEN,
		LOWER,
	} state = 0;

	n = before_dot_cnt = more_above_cnt = 0;

	while (u8next(&ch, &sv)) {
		if (sv.p > word.p + word.len) {
			u8wnext(&word, &wcpy);
			ctx_t.after_soft_dotted = false;
			state = TITLE;
		}

		if (ctx_l.az_or_tr || ctx_l.lt) {
			if (before_dot_cnt == 0 || more_above_cnt == 0) {
				rune ch = 0;
				before_dot_cnt = more_above_cnt = 0;
				struct u8view cpy = sv;

				do {
					before_dot_cnt++;
					more_above_cnt++;
				} while (u8next(&ch, &cpy) && !uprop_ccc_0_or_230(ch));

				if (ch != COMB_DOT_ABOVE)
					before_dot_cnt = 0;
				if (uprop_get_ccc(ch) != 230)
					more_above_cnt = 0;
			} else {
				before_dot_cnt--;
				more_above_cnt--;
			}
		}

		if (final_sigma.after == 0) {
			rune ch = 0;
			struct u8view cpy = sv;

			do
				final_sigma.after++;
			while (u8next(&ch, &cpy) && uprop_is_ci(ch));

			if (!uprop_is_cased(ch))
				final_sigma.after = 0;
		} else
			final_sigma.after--;

		ctx_l.before_dot = before_dot_cnt > 0;
		ctx_l.more_above = more_above_cnt > 0;
		ctx_l.final_sigma = final_sigma.before && final_sigma.after == 0;

		if (state == BETWEEN && uprop_is_cased(ch))
			state = LOWER;
		struct rview rv = state == LOWER && !nl_IJ ? uprop_get_lc(ch, ctx_l)
		                                           : uprop_get_tc(ch, ctx_t);
		if (nl_IJ)
			nl_IJ = false;
		if (state == TITLE && uprop_is_cased(ch)) {
			state = BETWEEN;

			if (flags & CF_LANG_NL) {
				rune next = 0;
				if (sv.len > 0)
					u8tor(&next, sv.p);
				nl_IJ =
					(ch == 'i' || ch == 'I') && (next == 'j' || next == 'J');
			}
		}

		for (size_t i = 0; i < rv.len; i++) {
			if (n >= dstn) {
				char8_t buf[U8_LEN_MAX];
				n += rtou8(buf, sizeof(buf), rv.p[i]);
			} else
				n += rtou8(dst + n, dstn - n, rv.p[i]);
		}

		ctx_l.after_I =
			(ch == 'I') || (ctx_l.after_I && !uprop_ccc_0_or_230(ch));
		if (uprop_is_cased(ch))
			final_sigma.before = true;
		else if (!uprop_is_ci(ch))
			final_sigma.before = false;

		if (uprop_is_sd(ch))
			ctx_t.after_soft_dotted = true;
		else if (uprop_ccc_0_or_230(ch))
			ctx_t.after_soft_dotted = false;
	}

	return n;
}