From 99b1b6e74b9ded856466eccb998c762a3a7f1f45 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Thu, 9 May 2024 03:09:53 +0200 Subject: Make the rest of the casemapping functions take custom allocators --- lib/unicode/string/u8casefold.c | 32 ++++++++++++++++++++------------ lib/unicode/string/u8lower.c | 32 +++++++++++++++++++++----------- lib/unicode/string/u8title.c | 31 ++++++++++++++++++++----------- 3 files changed, 61 insertions(+), 34 deletions(-) (limited to 'lib/unicode') diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c index 2ab7c7c..aba08f3 100644 --- a/lib/unicode/string/u8casefold.c +++ b/lib/unicode/string/u8casefold.c @@ -1,24 +1,32 @@ +#include +#include + #include "mbstring.h" #include "unicode/prop.h" #include "unicode/string.h" -size_t -u8casefold(char8_t *restrict dst, size_t dstn, struct u8view sv, - enum caseflags flags) +char8_t * +u8casefold(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc, + void *alloc_ctx) { + size_t bufsz; + if (ckd_mul(&bufsz, sv.len, (size_t)U8CASEFOLD_SCALE)) { + errno = EOVERFLOW; + return nullptr; + } + + char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t)); + if (dst == nullptr) + return nullptr; + rune ch; size_t n = 0; - while (u8next(&ch, &sv)) { struct rview rv = uprop_get_cf(ch, flags & CF_LANG_AZ); - for (size_t i = 0; i < rv.len; i++) { - if (n >= dstn) { - char8_t buf[U8_LEN_MAX]; - n += rtou8(buf, sizeof(buf), rv.p[i]); - } else - n += rtou8(dst + n, dstn - n, rv.p[i]); - } + for (size_t i = 0; i < rv.len; i++) + n += rtou8(dst + n, bufsz - n, rv.p[i]); } - return n; + *dstn = n; + return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t)); } diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c index 907077b..47c32f5 100644 --- a/lib/unicode/string/u8lower.c +++ b/lib/unicode/string/u8lower.c @@ -1,3 +1,6 @@ +#include +#include + #include "_attrs.h" #include "mbstring.h" #include "unicode/prop.h" @@ -12,9 +15,9 @@ uprop_ccc_0_or_230(rune ch) return x == 0 || x == 230; } -size_t -u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv, - enum caseflags flags) +char8_t * +u8lower(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc, + void *alloc_ctx) { struct lcctx ctx = { .az_or_tr = flags & CF_LANG_AZ, @@ -32,6 +35,17 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv, n = before_dot_cnt = more_above_cnt = 0; + size_t bufsz; + /* TODO: Also use U8LOWER_SCALE */ + if (ckd_mul(&bufsz, sv.len, (size_t)U8LOWER_SCALE_LT)) { + errno = EOVERFLOW; + return nullptr; + } + + char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t)); + if (dst == nullptr) + return nullptr; + while (u8next(&ch, &sv)) { rune next = 0; if (sv.len > 0) @@ -76,13 +90,8 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv, ctx.final_sigma = final_sigma.before && final_sigma.after == 0; struct rview rv = uprop_get_lc(ch, ctx); - for (size_t i = 0; i < rv.len; i++) { - if (n >= dstn) { - char8_t buf[U8_LEN_MAX]; - n += rtou8(buf, sizeof(buf), rv.p[i]); - } else - n += rtou8(dst + n, dstn - n, rv.p[i]); - } + for (size_t i = 0; i < rv.len; i++) + n += rtou8(dst + n, bufsz - n, rv.p[i]); ctx.after_I = (ch == 'I') || (ctx.after_I && !uprop_ccc_0_or_230(ch)); if (uprop_is_cased(ch)) @@ -91,5 +100,6 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv, final_sigma.before = false; } - return n; + *dstn = n; + return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t)); } diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c index f4d9b7e..1adf110 100644 --- a/lib/unicode/string/u8title.c +++ b/lib/unicode/string/u8title.c @@ -1,3 +1,6 @@ +#include +#include + #include "_attrs.h" #include "mbstring.h" #include "unicode/prop.h" @@ -13,9 +16,9 @@ uprop_ccc_0_or_230(rune ch) return x == 0 || x == 230; } -size_t -u8title(char8_t *restrict dst, size_t dstn, struct u8view sv, - enum caseflags flags) +char8_t * +u8title(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc, + void *alloc_ctx) { struct tcctx ctx_t; struct lcctx ctx_l; @@ -39,6 +42,16 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv, n = before_dot_cnt = more_above_cnt = 0; + size_t bufsz; + if (ckd_mul(&bufsz, sv.len, (size_t)U8TITLE_SCALE)) { + errno = EOVERFLOW; + return nullptr; + } + + char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t)); + if (dst == nullptr) + return nullptr; + while (u8next(&ch, &sv)) { if (sv.p > word.p + word.len) { u8wnext(&word, &wcpy); @@ -102,13 +115,8 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv, } } - for (size_t i = 0; i < rv.len; i++) { - if (n >= dstn) { - char8_t buf[U8_LEN_MAX]; - n += rtou8(buf, sizeof(buf), rv.p[i]); - } else - n += rtou8(dst + n, dstn - n, rv.p[i]); - } + for (size_t i = 0; i < rv.len; i++) + n += rtou8(dst + n, bufsz - n, rv.p[i]); ctx_l.after_I = (ch == 'I') || (ctx_l.after_I && !uprop_ccc_0_or_230(ch)); @@ -123,5 +131,6 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv, ctx_t.after_soft_dotted = false; } - return n; + *dstn = n; + return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t)); } -- cgit v1.2.3