aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/unicode/string.h6
-rw-r--r--lib/unicode/string/u8casefold.c24
2 files changed, 30 insertions, 0 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h
index 0c1786c..d80f04c 100644
--- a/include/unicode/string.h
+++ b/include/unicode/string.h
@@ -6,6 +6,8 @@
#include "__charN_t.h"
#include "__u8view.h"
+/* clang-format off */
+
enum [[clang::__flag_enum__]] caseflags {
CF_LANG_AZ = 1 << 0, /* Azeri; alias for UF_LANG_TR */
CF_LANG_TR = 1 << 0, /* Turkish; alias for UF_LANG_AZ */
@@ -13,9 +15,13 @@ enum [[clang::__flag_enum__]] caseflags {
CF_ẞ = 1 << 2, /* Use ‘ẞ’ as the uppercase of ‘ß’ */
};
+/* clang-format on */
+
size_t u8glen(const char8_t *, size_t);
size_t u8gnext(struct u8view *, const char8_t **, size_t *);
+size_t u8casefold(char8_t *restrict, size_t, const char8_t *, size_t,
+ enum caseflags);
size_t u8lower(char8_t *restrict, size_t, const char8_t *, size_t,
enum caseflags);
size_t u8upper(char8_t *restrict, size_t, const char8_t *, size_t,
diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c
new file mode 100644
index 0000000..6c0b61d
--- /dev/null
+++ b/lib/unicode/string/u8casefold.c
@@ -0,0 +1,24 @@
+#include "mbstring.h"
+#include "unicode/prop.h"
+#include "unicode/string.h"
+
+size_t
+u8casefold(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn,
+ enum caseflags flags)
+{
+ rune ch;
+ size_t n = 0;
+
+ while (u8next(&ch, &src, &srcn)) {
+ struct rview rv = uprop_get_cf(ch, flags & CF_LANG_AZ);
+ for (size_t i = 0; i < rv.len; i++) {
+ if (n >= dstn) {
+ char8_t buf[U8_LEN_MAX];
+ n += rtou8(buf, sizeof(buf), rv.p[i]);
+ } else
+ n += rtou8(dst + n, dstn - n, rv.p[i]);
+ }
+ }
+
+ return n;
+}