From 1aeb7e2b426e7a94cdd4f83c4337f44c0f5a2ca8 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 14 May 2024 23:59:05 +0200 Subject: Add encoding-generic macros --- include/_u8view.h | 13 ------------- include/_uNview.h | 23 +++++++++++++++++++++++ include/cli.h | 2 +- include/macros.h | 2 ++ include/mbstring.h | 32 +++++++++++++++++++++++++++++--- include/unicode/prop.h | 2 +- include/unicode/string.h | 31 +++++++++++++++++++++++++++---- 7 files changed, 83 insertions(+), 22 deletions(-) delete mode 100644 include/_u8view.h create mode 100644 include/_uNview.h (limited to 'include') diff --git a/include/_u8view.h b/include/_u8view.h deleted file mode 100644 index 5d6a9b8..0000000 --- a/include/_u8view.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef MLIB__U8VIEW_H -#define MLIB__U8VIEW_H - -#include - -#include "_charN_t.h" - -struct u8view { - const char8_t *p; - size_t len; -}; - -#endif /* !MLIB__U8VIEW_H */ diff --git a/include/_uNview.h b/include/_uNview.h new file mode 100644 index 0000000..9d0d5e2 --- /dev/null +++ b/include/_uNview.h @@ -0,0 +1,23 @@ +#ifndef MLIB__U8VIEW_H +#define MLIB__U8VIEW_H + +#include + +#include "_charN_t.h" + +struct u8view { + const char8_t *p; + size_t len; +}; + +struct u16view { + const char16_t *p; + size_t len; +}; + +struct u32view { + const char32_t *p; + size_t len; +}; + +#endif /* !MLIB__U8VIEW_H */ diff --git a/include/cli.h b/include/cli.h index 7b5b611..b2545ae 100644 --- a/include/cli.h +++ b/include/cli.h @@ -5,7 +5,7 @@ #include "_attrs.h" #include "_rune.h" -#include "_u8view.h" +#include "_uNview.h" struct optparser { bool _b; diff --git a/include/macros.h b/include/macros.h index 32e8b7c..5c38215 100644 --- a/include/macros.h +++ b/include/macros.h @@ -11,6 +11,8 @@ #define streq(x, y) (!strcmp((x), (y))) #define u8eq(x, y) (!u8cmp((x), (y))) +#define ucseq(lhs, rhs) (!_Generic((lhs), struct u8view: u8cmp)((lhs), (rhs))) + #define _MLIB_STR(s) #s #define _MLIB_CONCAT(x, y) x##y diff --git a/include/mbstring.h b/include/mbstring.h index d725e0d..947195f 100644 --- a/include/mbstring.h +++ b/include/mbstring.h @@ -5,10 +5,14 @@ #include "_charN_t.h" #include "_rune.h" -#include "_u8view.h" +#include "_uNview.h" -#define U8(...) \ +#define U8(...) \ ((struct u8view){__VA_OPT__(u8##__VA_ARGS__, sizeof(u8##__VA_ARGS__) - 1)}) +#define U16(...) \ + ((struct u16view){__VA_OPT__(u##__VA_ARGS__, sizeof(u##__VA_ARGS__) - 1)}) +#define U32(...) \ + ((struct u32view){__VA_OPT__(U##__VA_ARGS__, sizeof(U##__VA_ARGS__) - 1)}) #define VSHFT(sv, n) ((sv)->p += (n), (sv)->len -= (n)) @@ -28,7 +32,7 @@ constexpr rune U8_4B_MAX = 0x10FFFF; constexpr rune MBEND = 0x110000; -#define PRIsU8 ".*s" +#define PRIsSV ".*s" #define SV_PRI_ARGS(sv) ((int)(sv).len), ((sv).p) int rtou8(char8_t *, size_t, rune); @@ -47,4 +51,26 @@ int u8tor(rune *, const char8_t *); rune u8cut(struct u8view *restrict, struct u8view *restrict, const rune *, size_t); +/* Encoding-generic macros */ +#define rtoucs(buf, bufsz, ch) \ + _Generic((buf), char8_t *: rtou8)((buf), (bufsz), (ch)) +#define ucsnext(ch, sv) _Generic((sv), struct u8view: u8next)((ch), (sv)) +#define ucsprev(ch, sv, start) \ + _Generic((sv), const char8_t **: u8prev)((ch), (sv), (start)) +#define ucstor(ch, p) \ + _Generic((p), char8_t *: u8tor, const char8_t *: u8tor)((ch), (p)) +#define ucshaspfx(sv, pfx) _Generic((sv), struct u8view: u8haspfx)((sv), (pfx)) +#define ucshassfx(sv, sfx) _Generic((sv), struct u8view: u8hassfx)((sv), (sfx)) +#define ucschk(sv) _Generic((sv), struct u8view: u8chk)((sv)) +#define ucschr(sv, ch) _Generic((sv), struct u8view: u8chr)((sv), (ch)) +#define ucsrchr(sv, ch) _Generic((sv), struct u8view: u8rchr)((sv), (ch)) +#define ucscmp(lhs, rhs) _Generic((lhs), struct u8view: u8cmp)((lhs), (rhs)) +#define ucscspn(sv, delims, ndelims) \ + _Generic((sv), struct u8view: u8cspn)((sv), (delims), (ndelims)) +#define ucslen(sv) _Generic((sv), struct u8view: u8len)((sv)) +#define ucsspn(sv, delims, ndelims) \ + _Generic((sv), struct u8view: u8spn)((sv), (delims), (ndelims)) +#define ucscut(x, y, seps, nseps) \ + _Generic((y), struct u8view *: u8cut)(x, y, seps, nseps) + #endif /* !MLIB_MBSTRING_H */ diff --git a/include/unicode/prop.h b/include/unicode/prop.h index 0fbd479..422fa58 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -6,7 +6,7 @@ #include "_attrs.h" #include "_rune.h" -#include "_u8view.h" +#include "_uNview.h" struct rview { const rune *p; diff --git a/include/unicode/string.h b/include/unicode/string.h index 0c7ef79..a5b1cdb 100644 --- a/include/unicode/string.h +++ b/include/unicode/string.h @@ -5,7 +5,7 @@ #include "_alloc_fn.h" #include "_charN_t.h" -#include "_u8view.h" +#include "_uNview.h" /* clang-format off */ @@ -23,11 +23,9 @@ enum [[clang::flag_enum]] caseflags { [[nodiscard]] size_t u8gcnt(struct u8view); [[nodiscard]] size_t u8wcnt(struct u8view); [[nodiscard]] size_t u8wcnt_human(struct u8view); - size_t u8gnext(struct u8view *, struct u8view *); size_t u8wnext(struct u8view *, struct u8view *); size_t u8wnext_human(struct u8view *, struct u8view *); - [[nodiscard]] char8_t *u8casefold(size_t *, struct u8view, enum caseflags, alloc_fn, void *); [[nodiscard]] char8_t *u8lower(size_t *, struct u8view, enum caseflags, @@ -36,9 +34,34 @@ size_t u8wnext_human(struct u8view *, struct u8view *); alloc_fn, void *); [[nodiscard]] char8_t *u8upper(size_t *, struct u8view, enum caseflags, alloc_fn, void *); - +[[nodiscard]] char8_t *u8norm_nfc(size_t *, struct u8view, alloc_fn, void *); [[nodiscard]] char8_t *u8norm_nfd(size_t *, struct u8view, alloc_fn, void *); +/* Encoding-generic macros */ +#define ucsgcnt(sv) _Generic((sv), struct u8view: u8gcnt)((sv)) +#define ucswcnt(sv) _Generic((sv), struct u8view: u8wcnt)((sv)) +#define ucswcnt_human(sv) _Generic((sv), struct u8view: u8wcnt_human)((sv)) +#define ucsgnext(g, sv) _Generic((sv), struct u8view *: u8gnext)((g), (sv)) +#define ucswnext(g, sv) _Generic((sv), struct u8view *: u8wnext)((g), (sv)) +#define ucswnext_human(g, sv) \ + _Generic((sv), struct u8view *: u8wnext_human)((g), (sv)) +#define ucscasefold(dstn, sv, flags, alloc, ctx) \ + _Generic((sv), struct u8view: u8casefold)((dstn), (sv), (flags), (alloc), \ + (ctx)) +#define ucslower(dstn, sv, flags, alloc, ctx) \ + _Generic((sv), struct u8view: u8lower)((dstn), (sv), (flags), (alloc), \ + (ctx)) +#define ucstitle(dstn, sv, flags, alloc, ctx) \ + _Generic((sv), struct u8view: u8title)((dstn), (sv), (flags), (alloc), \ + (ctx)) +#define ucsupper(dstn, sv, flags, alloc, ctx) \ + _Generic((sv), struct u8view: u8upper)((dstn), (sv), (flags), (alloc), \ + (ctx)) +#define ucsnorm_nfc(dstn, sv, alloc, ctx) \ + _Generic((sv), struct u8view: u8norm_nfc)((dstn), (sv), (alloc), (ctx)) +#define ucsnorm_nfd(dstn, sv, alloc, ctx) \ + _Generic((sv), struct u8view: u8norm_nfd)((dstn), (sv), (alloc), (ctx)) + constexpr double U8CASEFOLD_SCALE = 3; constexpr double U8LOWER_SCALE = 1.5; constexpr double U8LOWER_SCALE_LT = 3; -- cgit v1.2.3