diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-04-23 01:13:43 +0200 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-23 01:32:43 +0200 | 
| commit | f8727410e6c83a8390eb9a4812bd8831d07d49e4 (patch) | |
| tree | c9e1ed68de1346fc8285e9a956a792481d55d003 | |
| parent | a348937d08f3cc5bf94e07c3b888992f0033bca1 (diff) | |
Add u8title()
| -rw-r--r-- | include/unicode/string.h | 14 | ||||
| -rw-r--r-- | lib/unicode/string/u8title.c | 58 | 
2 files changed, 67 insertions, 5 deletions
| diff --git a/include/unicode/string.h b/include/unicode/string.h index 887a216..cb19821 100644 --- a/include/unicode/string.h +++ b/include/unicode/string.h @@ -6,6 +6,8 @@  #include "_charN_t.h"  #include "_u8view.h" +#define mlib_warn_trunc nodiscard("don’t forget to check for truncation") +  /* clang-format off */  enum [[clang::flag_enum]] caseflags { @@ -25,18 +27,20 @@ size_t u8gnext(struct u8view *, const char8_t **, size_t *);  size_t u8wnext(struct u8view *, const char8_t **, size_t *);  size_t u8wnext_human(struct u8view *, const char8_t **, size_t *); -#define mlib_warn_trunc nodiscard("don’t forget to check for truncation")  [[mlib_warn_trunc]] size_t u8casefold(char8_t *restrict, size_t,                                        const char8_t *, size_t, enum caseflags);  [[mlib_warn_trunc]] size_t u8lower(char8_t *restrict, size_t, const char8_t *,                                     size_t, enum caseflags); +[[mlib_warn_trunc]] size_t u8title(char8_t *restrict, size_t, const char8_t *, +                                   size_t, enum caseflags);  [[mlib_warn_trunc]] size_t u8upper(char8_t *restrict, size_t, const char8_t *,                                     size_t, enum caseflags); -#undef mlib_warn_trunc -constexpr double U8LOWER_SCALE    = 1.5; +constexpr double U8LOWER_SCALE = 1.5;  constexpr double U8LOWER_SCALE_LT = 3; -constexpr double U8TITLE_SCALE    = 3; -constexpr double U8UPPER_SCALE    = 3; +constexpr double U8TITLE_SCALE = 3; +constexpr double U8UPPER_SCALE = 3; + +#undef mlib_warn_trunc  #endif /* !MLIB_UNICODE_STRING_H */ diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c new file mode 100644 index 0000000..a462c4e --- /dev/null +++ b/lib/unicode/string/u8title.c @@ -0,0 +1,58 @@ +#include "mbstring.h" +#include "unicode/prop.h" +#include "unicode/string.h" + +constexpr rune COMB_GRAVE     = 0x0300; +constexpr rune COMB_ACUTE     = 0x0301; +constexpr rune COMB_TILDE     = 0x0303; +constexpr rune COMB_DOT_ABOVE = 0x0307; + +size_t +u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, +        enum caseflags flags) +{ +	struct tcctx ctx_t = { +		.az_or_tr = flags & CF_LANG_AZ, +		.lt = flags & CF_LANG_LT, +	}; +	struct lcctx ctx_l = { +		.az_or_tr = ctx_t.az_or_tr, +		.lt = ctx_t.lt, +	}; + +	int w; +	rune ch; +	bool sow; +	size_t n = 0; +	struct u8view word = {}, cpy = {src, srcn}; + +	while (w = u8next(&ch, &src, &srcn)) { +		rune next = 0; +		if (srcn > 0) +			u8tor(&next, src); +		if (src > word.p + word.len) +			u8wnext(&word, U8_ARGSP(cpy)); + +		sow = src - w == word.p; +		ctx_l.eow = src == word.p + word.len; +		ctx_l.before_dot = next == COMB_DOT_ABOVE; +		ctx_l.before_acc = next == COMB_GRAVE +		                || next == COMB_ACUTE +		                || next == COMB_TILDE; + +		struct rview rv = sow ? uprop_get_tc(ch, ctx_t) +		                      : uprop_get_lc(ch, ctx_l); +		for (size_t i = 0; i < rv.len; i++) { +			if (n >= dstn) { +				char8_t buf[U8_LEN_MAX]; +				n += rtou8(buf, sizeof(buf), rv.p[i]); +			} else +				n += rtou8(dst + n, dstn - n, rv.p[i]); +		} + +		ctx_t.after_i = ch == 'i'; +		ctx_l.after_I = ch == 'I'; +	} + +	return n; +} |