aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-13 11:42:12 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-13 11:42:12 +0200
commite6ddd22060e49ddd8382069ef03fcc3d2618af13 (patch)
tree6ee599b65cde97d513081222dfd9f4441e77f8a2
parentd3a04ae6ea8c086e45c2bf51d3ebe97b724abae8 (diff)
Add u8upper()
-rw-r--r--include/unicode/string.h9
-rw-r--r--lib/unicode/string/u8upper.c36
2 files changed, 45 insertions, 0 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h
index c2a99c1..10e16ae 100644
--- a/include/unicode/string.h
+++ b/include/unicode/string.h
@@ -6,7 +6,16 @@
#include "__charN_t.h"
#include "__u8view.h"
+enum [[clang::__flag_enum__]] upper_flags {
+ UF_LANG_AZ = 1 << 0,
+ UF_LANG_TR = 1 << 0,
+ UF_LANG_LT = 1 << 1,
+ UF_ẞ = 1 << 2,
+};
+
size_t u8glen(const char8_t *, size_t);
size_t u8gnext(struct u8view *, const char8_t **, size_t *);
+size_t u8upper(char8_t *, size_t, const char8_t *, size_t, enum upper_flags);
+
#endif /* !MLIB_UNICODE_STRING_H */
diff --git a/lib/unicode/string/u8upper.c b/lib/unicode/string/u8upper.c
new file mode 100644
index 0000000..df4a086
--- /dev/null
+++ b/lib/unicode/string/u8upper.c
@@ -0,0 +1,36 @@
+#include "mbstring.h"
+#include "unicode/prop.h"
+#include "unicode/string.h"
+
+size_t
+u8upper(char8_t *dst, size_t dstn, const char8_t *src, size_t srcn,
+ enum upper_flags flags)
+{
+ struct ucctx ctx = {
+ .az_or_tr = flags & UF_LANG_AZ,
+ .cap_eszett = flags & UF_ẞ,
+ };
+
+ rune ch;
+ size_t n = 0;
+ bool prev_was_i = false;
+
+ while (u8next(&ch, &src, &srcn)) {
+ if (ch == 0x307 && prev_was_i && (flags & UF_LANG_LT))
+ ctx.lt_after_i = true;
+
+ struct rview rv = uprop_get_uc(ch, ctx);
+ for (size_t i = 0; i < rv.len; i++) {
+ if (n >= dstn) {
+ char8_t buf[U8_LEN_MAX];
+ n += rtou8(buf, sizeof(buf), rv.p[i]);
+ } else
+ n += rtou8(dst + n, dstn - n, rv.p[i]);
+ }
+
+ prev_was_i = ch == 'i';
+ ctx.lt_after_i = false;
+ }
+
+ return n;
+}