diff options
-rw-r--r-- | include/unicode/string.h | 2 | ||||
-rw-r--r-- | lib/unicode/string/u8wcnt_human.c | 10 | ||||
-rw-r--r-- | lib/unicode/string/u8wnext_human.c | 27 |
3 files changed, 39 insertions, 0 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h index 9ac029a..887a216 100644 --- a/include/unicode/string.h +++ b/include/unicode/string.h @@ -19,9 +19,11 @@ enum [[clang::flag_enum]] caseflags { [[nodiscard]] size_t u8gcnt(const char8_t *, size_t); [[nodiscard]] size_t u8wcnt(const char8_t *, size_t); +[[nodiscard]] size_t u8wcnt_human(const char8_t *, size_t); size_t u8gnext(struct u8view *, const char8_t **, size_t *); size_t u8wnext(struct u8view *, const char8_t **, size_t *); +size_t u8wnext_human(struct u8view *, const char8_t **, size_t *); #define mlib_warn_trunc nodiscard("don’t forget to check for truncation") [[mlib_warn_trunc]] size_t u8casefold(char8_t *restrict, size_t, diff --git a/lib/unicode/string/u8wcnt_human.c b/lib/unicode/string/u8wcnt_human.c new file mode 100644 index 0000000..6e70398 --- /dev/null +++ b/lib/unicode/string/u8wcnt_human.c @@ -0,0 +1,10 @@ +#include "unicode/string.h" + +size_t +u8wcnt_human(const char8_t *s, size_t n) +{ + size_t m = 0; + while (u8wnext_human(nullptr, &s, &n)) + m++; + return m; +} diff --git a/lib/unicode/string/u8wnext_human.c b/lib/unicode/string/u8wnext_human.c new file mode 100644 index 0000000..d85abf1 --- /dev/null +++ b/lib/unicode/string/u8wnext_human.c @@ -0,0 +1,27 @@ +#include "macros.h" +#include "mbstring.h" +#include "unicode/prop.h" +#include "unicode/string.h" + +size_t +u8wnext_human(struct u8view *dst, const char8_t **s, size_t *n) +{ + ASSUME(n != nullptr); + ASSUME(s != nullptr); + ASSUME(*s != nullptr); + + struct u8view w; + while (u8wnext(&w, s, n)) { + rune ch; + struct u8view cpy = w; + while (u8next(&ch, U8_ARGSP(cpy))) { + if (uprop_get_gc(ch) & (GC_L | GC_N)) { + if (dst != nullptr) + *dst = w; + return w.len; + } + } + } + + return 0; +} |