aboutsummaryrefslogtreecommitdiff
path: root/lib/unicode
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-23 01:13:43 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-23 01:32:43 +0200
commitf8727410e6c83a8390eb9a4812bd8831d07d49e4 (patch)
treec9e1ed68de1346fc8285e9a956a792481d55d003 /lib/unicode
parenta348937d08f3cc5bf94e07c3b888992f0033bca1 (diff)
Add u8title()
Diffstat (limited to 'lib/unicode')
-rw-r--r--lib/unicode/string/u8title.c58
1 files changed, 58 insertions, 0 deletions
diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c
new file mode 100644
index 0000000..a462c4e
--- /dev/null
+++ b/lib/unicode/string/u8title.c
@@ -0,0 +1,58 @@
+#include "mbstring.h"
+#include "unicode/prop.h"
+#include "unicode/string.h"
+
+constexpr rune COMB_GRAVE = 0x0300;
+constexpr rune COMB_ACUTE = 0x0301;
+constexpr rune COMB_TILDE = 0x0303;
+constexpr rune COMB_DOT_ABOVE = 0x0307;
+
+size_t
+u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+ enum caseflags flags)
+{
+ struct tcctx ctx_t = {
+ .az_or_tr = flags & CF_LANG_AZ,
+ .lt = flags & CF_LANG_LT,
+ };
+ struct lcctx ctx_l = {
+ .az_or_tr = ctx_t.az_or_tr,
+ .lt = ctx_t.lt,
+ };
+
+ int w;
+ rune ch;
+ bool sow;
+ size_t n = 0;
+ struct u8view word = {}, cpy = {src, srcn};
+
+ while (w = u8next(&ch, &src, &srcn)) {
+ rune next = 0;
+ if (srcn > 0)
+ u8tor(&next, src);
+ if (src > word.p + word.len)
+ u8wnext(&word, U8_ARGSP(cpy));
+
+ sow = src - w == word.p;
+ ctx_l.eow = src == word.p + word.len;
+ ctx_l.before_dot = next == COMB_DOT_ABOVE;
+ ctx_l.before_acc = next == COMB_GRAVE
+ || next == COMB_ACUTE
+ || next == COMB_TILDE;
+
+ struct rview rv = sow ? uprop_get_tc(ch, ctx_t)
+ : uprop_get_lc(ch, ctx_l);
+ for (size_t i = 0; i < rv.len; i++) {
+ if (n >= dstn) {
+ char8_t buf[U8_LEN_MAX];
+ n += rtou8(buf, sizeof(buf), rv.p[i]);
+ } else
+ n += rtou8(dst + n, dstn - n, rv.p[i]);
+ }
+
+ ctx_t.after_i = ch == 'i';
+ ctx_l.after_I = ch == 'I';
+ }
+
+ return n;
+}