diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-05-09 03:09:53 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-05-09 03:09:53 +0200 |
commit | 99b1b6e74b9ded856466eccb998c762a3a7f1f45 (patch) | |
tree | 50ac470fd46416b99c8d5788f4b02bb0a12daa58 | |
parent | 085b77730a553ce9769fcc76b68772dbde564004 (diff) |
Make the rest of the casemapping functions take custom allocators
-rw-r--r-- | include/unicode/string.h | 21 | ||||
-rw-r--r-- | lib/unicode/string/u8casefold.c | 32 | ||||
-rw-r--r-- | lib/unicode/string/u8lower.c | 32 | ||||
-rw-r--r-- | lib/unicode/string/u8title.c | 31 | ||||
-rw-r--r-- | test/_case-test.h | 26 | ||||
-rwxr-xr-x | test/run-tests | 1 |
6 files changed, 81 insertions, 62 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h index 7da4385..e3d93ad 100644 --- a/include/unicode/string.h +++ b/include/unicode/string.h @@ -7,8 +7,6 @@ #include "_charN_t.h" #include "_u8view.h" -#define mlib_warn_trunc nodiscard("don’t forget to check for truncation") - /* clang-format off */ enum [[clang::flag_enum]] caseflags { @@ -30,20 +28,19 @@ size_t u8gnext(struct u8view *, struct u8view *); size_t u8wnext(struct u8view *, struct u8view *); size_t u8wnext_human(struct u8view *, struct u8view *); -[[mlib_warn_trunc]] -size_t u8casefold(char8_t *restrict, size_t, struct u8view, enum caseflags); -[[mlib_warn_trunc]] -size_t u8lower(char8_t *restrict, size_t, struct u8view, enum caseflags); -[[mlib_warn_trunc]] -size_t u8title(char8_t *restrict, size_t, struct u8view, enum caseflags); -[[nodiscard]] -char8_t *u8upper(size_t *, struct u8view, enum caseflags, alloc_fn, void *); +[[nodiscard]] char8_t *u8casefold(size_t *, struct u8view, enum caseflags, + alloc_fn, void *); +[[nodiscard]] char8_t *u8lower(size_t *, struct u8view, enum caseflags, + alloc_fn, void *); +[[nodiscard]] char8_t *u8title(size_t *, struct u8view, enum caseflags, + alloc_fn, void *); +[[nodiscard]] char8_t *u8upper(size_t *, struct u8view, enum caseflags, + alloc_fn, void *); +constexpr double U8CASEFOLD_SCALE = 3; constexpr double U8LOWER_SCALE = 1.5; constexpr double U8LOWER_SCALE_LT = 3; constexpr double U8TITLE_SCALE = 3; constexpr double U8UPPER_SCALE = 3; -#undef mlib_warn_trunc - #endif /* !MLIB_UNICODE_STRING_H */ diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c index 2ab7c7c..aba08f3 100644 --- a/lib/unicode/string/u8casefold.c +++ b/lib/unicode/string/u8casefold.c @@ -1,24 +1,32 @@ +#include <errno.h> +#include <stdckdint.h> + #include "mbstring.h" #include "unicode/prop.h" #include "unicode/string.h" -size_t -u8casefold(char8_t *restrict dst, size_t dstn, struct u8view sv, - enum caseflags flags) +char8_t * +u8casefold(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc, + void *alloc_ctx) { + size_t bufsz; + if (ckd_mul(&bufsz, sv.len, (size_t)U8CASEFOLD_SCALE)) { + errno = EOVERFLOW; + return nullptr; + } + + char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t)); + if (dst == nullptr) + return nullptr; + rune ch; size_t n = 0; - while (u8next(&ch, &sv)) { struct rview rv = uprop_get_cf(ch, flags & CF_LANG_AZ); - for (size_t i = 0; i < rv.len; i++) { - if (n >= dstn) { - char8_t buf[U8_LEN_MAX]; - n += rtou8(buf, sizeof(buf), rv.p[i]); - } else - n += rtou8(dst + n, dstn - n, rv.p[i]); - } + for (size_t i = 0; i < rv.len; i++) + n += rtou8(dst + n, bufsz - n, rv.p[i]); } - return n; + *dstn = n; + return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t)); } diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c index 907077b..47c32f5 100644 --- a/lib/unicode/string/u8lower.c +++ b/lib/unicode/string/u8lower.c @@ -1,3 +1,6 @@ +#include <errno.h> +#include <stdckdint.h> + #include "_attrs.h" #include "mbstring.h" #include "unicode/prop.h" @@ -12,9 +15,9 @@ uprop_ccc_0_or_230(rune ch) return x == 0 || x == 230; } -size_t -u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv, - enum caseflags flags) +char8_t * +u8lower(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc, + void *alloc_ctx) { struct lcctx ctx = { .az_or_tr = flags & CF_LANG_AZ, @@ -32,6 +35,17 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv, n = before_dot_cnt = more_above_cnt = 0; + size_t bufsz; + /* TODO: Also use U8LOWER_SCALE */ + if (ckd_mul(&bufsz, sv.len, (size_t)U8LOWER_SCALE_LT)) { + errno = EOVERFLOW; + return nullptr; + } + + char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t)); + if (dst == nullptr) + return nullptr; + while (u8next(&ch, &sv)) { rune next = 0; if (sv.len > 0) @@ -76,13 +90,8 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv, ctx.final_sigma = final_sigma.before && final_sigma.after == 0; struct rview rv = uprop_get_lc(ch, ctx); - for (size_t i = 0; i < rv.len; i++) { - if (n >= dstn) { - char8_t buf[U8_LEN_MAX]; - n += rtou8(buf, sizeof(buf), rv.p[i]); - } else - n += rtou8(dst + n, dstn - n, rv.p[i]); - } + for (size_t i = 0; i < rv.len; i++) + n += rtou8(dst + n, bufsz - n, rv.p[i]); ctx.after_I = (ch == 'I') || (ctx.after_I && !uprop_ccc_0_or_230(ch)); if (uprop_is_cased(ch)) @@ -91,5 +100,6 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv, final_sigma.before = false; } - return n; + *dstn = n; + return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t)); } diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c index f4d9b7e..1adf110 100644 --- a/lib/unicode/string/u8title.c +++ b/lib/unicode/string/u8title.c @@ -1,3 +1,6 @@ +#include <errno.h> +#include <stdckdint.h> + #include "_attrs.h" #include "mbstring.h" #include "unicode/prop.h" @@ -13,9 +16,9 @@ uprop_ccc_0_or_230(rune ch) return x == 0 || x == 230; } -size_t -u8title(char8_t *restrict dst, size_t dstn, struct u8view sv, - enum caseflags flags) +char8_t * +u8title(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc, + void *alloc_ctx) { struct tcctx ctx_t; struct lcctx ctx_l; @@ -39,6 +42,16 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv, n = before_dot_cnt = more_above_cnt = 0; + size_t bufsz; + if (ckd_mul(&bufsz, sv.len, (size_t)U8TITLE_SCALE)) { + errno = EOVERFLOW; + return nullptr; + } + + char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t)); + if (dst == nullptr) + return nullptr; + while (u8next(&ch, &sv)) { if (sv.p > word.p + word.len) { u8wnext(&word, &wcpy); @@ -102,13 +115,8 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv, } } - for (size_t i = 0; i < rv.len; i++) { - if (n >= dstn) { - char8_t buf[U8_LEN_MAX]; - n += rtou8(buf, sizeof(buf), rv.p[i]); - } else - n += rtou8(dst + n, dstn - n, rv.p[i]); - } + for (size_t i = 0; i < rv.len; i++) + n += rtou8(dst + n, bufsz - n, rv.p[i]); ctx_l.after_I = (ch == 'I') || (ctx_l.after_I && !uprop_ccc_0_or_230(ch)); @@ -123,5 +131,6 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv, ctx_t.after_soft_dotted = false; } - return n; + *dstn = n; + return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t)); } diff --git a/test/_case-test.h b/test/_case-test.h index 8f18b69..684c6ec 100644 --- a/test/_case-test.h +++ b/test/_case-test.h @@ -52,7 +52,7 @@ main(int, char **argv) bool test(const char8_t *line, int id) { - struct u8view sv = {line, strlen(line)}; + struct u8view mapped, sv = {line, strlen(line)}; struct u8view before, after, flags; u8cut(&before, &sv, U";", 1); u8cut(&after, &sv, U";", 1); @@ -63,27 +63,21 @@ test(const char8_t *line, int id) : u8eq(flags, U8("LT")) ? CF_LANG_LT : u8eq(flags, U8("NL")) ? CF_LANG_NL : 0; - char8_t *buf = bufalloc(nullptr, 1, after.len); - size_t bufsz = FUNC(nullptr, 0, before, cf); - if (bufsz != after.len) { - warn("case %d: expected %s buffer size of %zu but got %zu", - id, STR(CASETYPE_VERB), after.len, bufsz); - return false; - } - bufsz = FUNC(buf, bufsz, before, cf); - if (bufsz != after.len) { - warn("case %d: expected %s length of %zu but got %zu", - id, STR(CASETYPE_VERB), after.len, bufsz); + arena a = mkarena(0); + mapped.p = FUNC(&mapped.len, before, cf, alloc_arena, &a); + + if (mapped.p == nullptr) { + warn("case %d: got null %s buffer", id, STR(CASETYPE_VERB)); return false; } - if (!memeq(buf, after.p, bufsz)) { - warn("case %d: expected ‘%.*s’ but got ‘%.*s’", - id, SV_PRI_ARGS(after), (int)bufsz, buf); + if (!u8eq(mapped, after)) { + warn("case %d: expected ‘%.*s’ but got ‘%.*s’", id, SV_PRI_ARGS(after), + SV_PRI_ARGS(mapped)); return false; } - free(buf); + arena_free(&a); return true; } diff --git a/test/run-tests b/test/run-tests index 2562a32..f8554c0 100755 --- a/test/run-tests +++ b/test/run-tests @@ -16,6 +16,7 @@ readonly FLAGS=' -Wall -Wextra -Wpedantic -Wno-pointer-sign -Wno-attributes + -fsanitize=address,undefined ' (cd ..; ./make) |