aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/unicode/string.h2
-rw-r--r--lib/unicode/string/u8wcnt_human.c10
-rw-r--r--lib/unicode/string/u8wnext_human.c27
3 files changed, 39 insertions, 0 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h
index 9ac029a..887a216 100644
--- a/include/unicode/string.h
+++ b/include/unicode/string.h
@@ -19,9 +19,11 @@ enum [[clang::flag_enum]] caseflags {
[[nodiscard]] size_t u8gcnt(const char8_t *, size_t);
[[nodiscard]] size_t u8wcnt(const char8_t *, size_t);
+[[nodiscard]] size_t u8wcnt_human(const char8_t *, size_t);
size_t u8gnext(struct u8view *, const char8_t **, size_t *);
size_t u8wnext(struct u8view *, const char8_t **, size_t *);
+size_t u8wnext_human(struct u8view *, const char8_t **, size_t *);
#define mlib_warn_trunc nodiscard("don’t forget to check for truncation")
[[mlib_warn_trunc]] size_t u8casefold(char8_t *restrict, size_t,
diff --git a/lib/unicode/string/u8wcnt_human.c b/lib/unicode/string/u8wcnt_human.c
new file mode 100644
index 0000000..6e70398
--- /dev/null
+++ b/lib/unicode/string/u8wcnt_human.c
@@ -0,0 +1,10 @@
+#include "unicode/string.h"
+
+size_t
+u8wcnt_human(const char8_t *s, size_t n)
+{
+ size_t m = 0;
+ while (u8wnext_human(nullptr, &s, &n))
+ m++;
+ return m;
+}
diff --git a/lib/unicode/string/u8wnext_human.c b/lib/unicode/string/u8wnext_human.c
new file mode 100644
index 0000000..d85abf1
--- /dev/null
+++ b/lib/unicode/string/u8wnext_human.c
@@ -0,0 +1,27 @@
+#include "macros.h"
+#include "mbstring.h"
+#include "unicode/prop.h"
+#include "unicode/string.h"
+
+size_t
+u8wnext_human(struct u8view *dst, const char8_t **s, size_t *n)
+{
+ ASSUME(n != nullptr);
+ ASSUME(s != nullptr);
+ ASSUME(*s != nullptr);
+
+ struct u8view w;
+ while (u8wnext(&w, s, n)) {
+ rune ch;
+ struct u8view cpy = w;
+ while (u8next(&ch, U8_ARGSP(cpy))) {
+ if (uprop_get_gc(ch) & (GC_L | GC_N)) {
+ if (dst != nullptr)
+ *dst = w;
+ return w.len;
+ }
+ }
+ }
+
+ return 0;
+}