From e6ddd22060e49ddd8382069ef03fcc3d2618af13 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sat, 13 Apr 2024 11:42:12 +0200 Subject: Add u8upper() --- include/unicode/string.h | 9 +++++++++ lib/unicode/string/u8upper.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 lib/unicode/string/u8upper.c diff --git a/include/unicode/string.h b/include/unicode/string.h index c2a99c1..10e16ae 100644 --- a/include/unicode/string.h +++ b/include/unicode/string.h @@ -6,7 +6,16 @@ #include "__charN_t.h" #include "__u8view.h" +enum [[clang::__flag_enum__]] upper_flags { + UF_LANG_AZ = 1 << 0, + UF_LANG_TR = 1 << 0, + UF_LANG_LT = 1 << 1, + UF_ẞ = 1 << 2, +}; + size_t u8glen(const char8_t *, size_t); size_t u8gnext(struct u8view *, const char8_t **, size_t *); +size_t u8upper(char8_t *, size_t, const char8_t *, size_t, enum upper_flags); + #endif /* !MLIB_UNICODE_STRING_H */ diff --git a/lib/unicode/string/u8upper.c b/lib/unicode/string/u8upper.c new file mode 100644 index 0000000..df4a086 --- /dev/null +++ b/lib/unicode/string/u8upper.c @@ -0,0 +1,36 @@ +#include "mbstring.h" +#include "unicode/prop.h" +#include "unicode/string.h" + +size_t +u8upper(char8_t *dst, size_t dstn, const char8_t *src, size_t srcn, + enum upper_flags flags) +{ + struct ucctx ctx = { + .az_or_tr = flags & UF_LANG_AZ, + .cap_eszett = flags & UF_ẞ, + }; + + rune ch; + size_t n = 0; + bool prev_was_i = false; + + while (u8next(&ch, &src, &srcn)) { + if (ch == 0x307 && prev_was_i && (flags & UF_LANG_LT)) + ctx.lt_after_i = true; + + struct rview rv = uprop_get_uc(ch, ctx); + for (size_t i = 0; i < rv.len; i++) { + if (n >= dstn) { + char8_t buf[U8_LEN_MAX]; + n += rtou8(buf, sizeof(buf), rv.p[i]); + } else + n += rtou8(dst + n, dstn - n, rv.p[i]); + } + + prev_was_i = ch == 'i'; + ctx.lt_after_i = false; + } + + return n; +} -- cgit v1.2.3