aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-15 01:19:12 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-15 01:19:12 +0200
commit5ea567bef8836164ae6b3829762b237c9ea31bc3 (patch)
treebc69c9d5cd2cccde6717684c16ad6b519d80e722
parentdfb50ba9e7874d095019a5d62d1a0aaf04207ac5 (diff)
Add u8lower()
-rw-r--r--include/unicode/string.h2
-rw-r--r--lib/unicode/string/u8lower.c46
2 files changed, 48 insertions, 0 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h
index 6197235..0c1786c 100644
--- a/include/unicode/string.h
+++ b/include/unicode/string.h
@@ -16,6 +16,8 @@ enum [[clang::__flag_enum__]] caseflags {
size_t u8glen(const char8_t *, size_t);
size_t u8gnext(struct u8view *, const char8_t **, size_t *);
+size_t u8lower(char8_t *restrict, size_t, const char8_t *, size_t,
+ enum caseflags);
size_t u8upper(char8_t *restrict, size_t, const char8_t *, size_t,
enum caseflags);
diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c
new file mode 100644
index 0000000..052217f
--- /dev/null
+++ b/lib/unicode/string/u8lower.c
@@ -0,0 +1,46 @@
+#include "mbstring.h"
+#include "unicode/prop.h"
+#include "unicode/string.h"
+
+constexpr rune COMB_GRAVE = 0x0300;
+constexpr rune COMB_ACUTE = 0x0301;
+constexpr rune COMB_TILDE = 0x0303;
+constexpr rune COMB_DOT_ABOVE = 0x0307;
+
+size_t
+u8lower(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+ enum caseflags flags)
+{
+ struct lcctx ctx = {
+ .az_or_tr = flags & CF_LANG_AZ,
+ .lt = flags & CF_LANG_LT,
+ };
+
+ rune ch;
+ size_t n = 0;
+
+ while (u8next(&ch, &src, &srcn)) {
+ /* TODO: Set ‘eow’ once word-segmentation is implemented */
+
+ rune next = 0;
+ if (srcn > 0)
+ u8tor(&next, src);
+ ctx.before_dot = next == COMB_DOT_ABOVE;
+ ctx.before_acc = next == COMB_GRAVE
+ || next == COMB_ACUTE
+ || next == COMB_TILDE;
+
+ struct rview rv = uprop_get_lc(ch, ctx);
+ for (size_t i = 0; i < rv.len; i++) {
+ if (n >= dstn) {
+ char8_t buf[U8_LEN_MAX];
+ n += rtou8(buf, sizeof(buf), rv.p[i]);
+ } else
+ n += rtou8(dst + n, dstn - n, rv.p[i]);
+ }
+
+ ctx.after_I = ch == 'I';
+ }
+
+ return n;
+}