diff options
-rw-r--r-- | include/unicode/string.h | 6 | ||||
-rw-r--r-- | lib/unicode/string/u8casefold.c | 24 |
2 files changed, 30 insertions, 0 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h index 0c1786c..d80f04c 100644 --- a/include/unicode/string.h +++ b/include/unicode/string.h @@ -6,6 +6,8 @@ #include "__charN_t.h" #include "__u8view.h" +/* clang-format off */ + enum [[clang::__flag_enum__]] caseflags { CF_LANG_AZ = 1 << 0, /* Azeri; alias for UF_LANG_TR */ CF_LANG_TR = 1 << 0, /* Turkish; alias for UF_LANG_AZ */ @@ -13,9 +15,13 @@ enum [[clang::__flag_enum__]] caseflags { CF_ẞ = 1 << 2, /* Use ‘ẞ’ as the uppercase of ‘ß’ */ }; +/* clang-format on */ + size_t u8glen(const char8_t *, size_t); size_t u8gnext(struct u8view *, const char8_t **, size_t *); +size_t u8casefold(char8_t *restrict, size_t, const char8_t *, size_t, + enum caseflags); size_t u8lower(char8_t *restrict, size_t, const char8_t *, size_t, enum caseflags); size_t u8upper(char8_t *restrict, size_t, const char8_t *, size_t, diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c new file mode 100644 index 0000000..6c0b61d --- /dev/null +++ b/lib/unicode/string/u8casefold.c @@ -0,0 +1,24 @@ +#include "mbstring.h" +#include "unicode/prop.h" +#include "unicode/string.h" + +size_t +u8casefold(char8_t *restrict dst, size_t dstn, const char8_t *src, size_t srcn, + enum caseflags flags) +{ + rune ch; + size_t n = 0; + + while (u8next(&ch, &src, &srcn)) { + struct rview rv = uprop_get_cf(ch, flags & CF_LANG_AZ); + for (size_t i = 0; i < rv.len; i++) { + if (n >= dstn) { + char8_t buf[U8_LEN_MAX]; + n += rtou8(buf, sizeof(buf), rv.p[i]); + } else + n += rtou8(dst + n, dstn - n, rv.p[i]); + } + } + + return n; +} |