aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/unicode/string.h14
-rw-r--r--lib/unicode/string/u8title.c58
2 files changed, 67 insertions, 5 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h
index 887a216..cb19821 100644
--- a/include/unicode/string.h
+++ b/include/unicode/string.h
@@ -6,6 +6,8 @@
#include "_charN_t.h"
#include "_u8view.h"
+#define mlib_warn_trunc nodiscard("don’t forget to check for truncation")
+
/* clang-format off */
enum [[clang::flag_enum]] caseflags {
@@ -25,18 +27,20 @@ size_t u8gnext(struct u8view *, const char8_t **, size_t *);
size_t u8wnext(struct u8view *, const char8_t **, size_t *);
size_t u8wnext_human(struct u8view *, const char8_t **, size_t *);
-#define mlib_warn_trunc nodiscard("don’t forget to check for truncation")
[[mlib_warn_trunc]] size_t u8casefold(char8_t *restrict, size_t,
const char8_t *, size_t, enum caseflags);
[[mlib_warn_trunc]] size_t u8lower(char8_t *restrict, size_t, const char8_t *,
size_t, enum caseflags);
+[[mlib_warn_trunc]] size_t u8title(char8_t *restrict, size_t, const char8_t *,
+ size_t, enum caseflags);
[[mlib_warn_trunc]] size_t u8upper(char8_t *restrict, size_t, const char8_t *,
size_t, enum caseflags);
-#undef mlib_warn_trunc
-constexpr double U8LOWER_SCALE = 1.5;
+constexpr double U8LOWER_SCALE = 1.5;
constexpr double U8LOWER_SCALE_LT = 3;
-constexpr double U8TITLE_SCALE = 3;
-constexpr double U8UPPER_SCALE = 3;
+constexpr double U8TITLE_SCALE = 3;
+constexpr double U8UPPER_SCALE = 3;
+
+#undef mlib_warn_trunc
#endif /* !MLIB_UNICODE_STRING_H */
diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c
new file mode 100644
index 0000000..a462c4e
--- /dev/null
+++ b/lib/unicode/string/u8title.c
@@ -0,0 +1,58 @@
+#include "mbstring.h"
+#include "unicode/prop.h"
+#include "unicode/string.h"
+
+constexpr rune COMB_GRAVE = 0x0300;
+constexpr rune COMB_ACUTE = 0x0301;
+constexpr rune COMB_TILDE = 0x0303;
+constexpr rune COMB_DOT_ABOVE = 0x0307;
+
+size_t
+u8title(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+ enum caseflags flags)
+{
+ struct tcctx ctx_t = {
+ .az_or_tr = flags & CF_LANG_AZ,
+ .lt = flags & CF_LANG_LT,
+ };
+ struct lcctx ctx_l = {
+ .az_or_tr = ctx_t.az_or_tr,
+ .lt = ctx_t.lt,
+ };
+
+ int w;
+ rune ch;
+ bool sow;
+ size_t n = 0;
+ struct u8view word = {}, cpy = {src, srcn};
+
+ while (w = u8next(&ch, &src, &srcn)) {
+ rune next = 0;
+ if (srcn > 0)
+ u8tor(&next, src);
+ if (src > word.p + word.len)
+ u8wnext(&word, U8_ARGSP(cpy));
+
+ sow = src - w == word.p;
+ ctx_l.eow = src == word.p + word.len;
+ ctx_l.before_dot = next == COMB_DOT_ABOVE;
+ ctx_l.before_acc = next == COMB_GRAVE
+ || next == COMB_ACUTE
+ || next == COMB_TILDE;
+
+ struct rview rv = sow ? uprop_get_tc(ch, ctx_t)
+ : uprop_get_lc(ch, ctx_l);
+ for (size_t i = 0; i < rv.len; i++) {
+ if (n >= dstn) {
+ char8_t buf[U8_LEN_MAX];
+ n += rtou8(buf, sizeof(buf), rv.p[i]);
+ } else
+ n += rtou8(dst + n, dstn - n, rv.p[i]);
+ }
+
+ ctx_t.after_i = ch == 'i';
+ ctx_l.after_I = ch == 'I';
+ }
+
+ return n;
+}