aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-09 03:09:53 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-09 03:09:53 +0200
commit99b1b6e74b9ded856466eccb998c762a3a7f1f45 (patch)
tree50ac470fd46416b99c8d5788f4b02bb0a12daa58 /lib
parent085b77730a553ce9769fcc76b68772dbde564004 (diff)
Make the rest of the casemapping functions take custom allocators
Diffstat (limited to 'lib')
-rw-r--r--lib/unicode/string/u8casefold.c32
-rw-r--r--lib/unicode/string/u8lower.c32
-rw-r--r--lib/unicode/string/u8title.c31
3 files changed, 61 insertions, 34 deletions
diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c
index 2ab7c7c..aba08f3 100644
--- a/lib/unicode/string/u8casefold.c
+++ b/lib/unicode/string/u8casefold.c
@@ -1,24 +1,32 @@
+#include <errno.h>
+#include <stdckdint.h>
+
#include "mbstring.h"
#include "unicode/prop.h"
#include "unicode/string.h"
-size_t
-u8casefold(char8_t *restrict dst, size_t dstn, struct u8view sv,
- enum caseflags flags)
+char8_t *
+u8casefold(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc,
+ void *alloc_ctx)
{
+ size_t bufsz;
+ if (ckd_mul(&bufsz, sv.len, (size_t)U8CASEFOLD_SCALE)) {
+ errno = EOVERFLOW;
+ return nullptr;
+ }
+
+ char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t));
+ if (dst == nullptr)
+ return nullptr;
+
rune ch;
size_t n = 0;
-
while (u8next(&ch, &sv)) {
struct rview rv = uprop_get_cf(ch, flags & CF_LANG_AZ);
- for (size_t i = 0; i < rv.len; i++) {
- if (n >= dstn) {
- char8_t buf[U8_LEN_MAX];
- n += rtou8(buf, sizeof(buf), rv.p[i]);
- } else
- n += rtou8(dst + n, dstn - n, rv.p[i]);
- }
+ for (size_t i = 0; i < rv.len; i++)
+ n += rtou8(dst + n, bufsz - n, rv.p[i]);
}
- return n;
+ *dstn = n;
+ return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t));
}
diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c
index 907077b..47c32f5 100644
--- a/lib/unicode/string/u8lower.c
+++ b/lib/unicode/string/u8lower.c
@@ -1,3 +1,6 @@
+#include <errno.h>
+#include <stdckdint.h>
+
#include "_attrs.h"
#include "mbstring.h"
#include "unicode/prop.h"
@@ -12,9 +15,9 @@ uprop_ccc_0_or_230(rune ch)
return x == 0 || x == 230;
}
-size_t
-u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
- enum caseflags flags)
+char8_t *
+u8lower(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc,
+ void *alloc_ctx)
{
struct lcctx ctx = {
.az_or_tr = flags & CF_LANG_AZ,
@@ -32,6 +35,17 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
n = before_dot_cnt = more_above_cnt = 0;
+ size_t bufsz;
+ /* TODO: Also use U8LOWER_SCALE */
+ if (ckd_mul(&bufsz, sv.len, (size_t)U8LOWER_SCALE_LT)) {
+ errno = EOVERFLOW;
+ return nullptr;
+ }
+
+ char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t));
+ if (dst == nullptr)
+ return nullptr;
+
while (u8next(&ch, &sv)) {
rune next = 0;
if (sv.len > 0)
@@ -76,13 +90,8 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
ctx.final_sigma = final_sigma.before && final_sigma.after == 0;
struct rview rv = uprop_get_lc(ch, ctx);
- for (size_t i = 0; i < rv.len; i++) {
- if (n >= dstn) {
- char8_t buf[U8_LEN_MAX];
- n += rtou8(buf, sizeof(buf), rv.p[i]);
- } else
- n += rtou8(dst + n, dstn - n, rv.p[i]);
- }
+ for (size_t i = 0; i < rv.len; i++)
+ n += rtou8(dst + n, bufsz - n, rv.p[i]);
ctx.after_I = (ch == 'I') || (ctx.after_I && !uprop_ccc_0_or_230(ch));
if (uprop_is_cased(ch))
@@ -91,5 +100,6 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
final_sigma.before = false;
}
- return n;
+ *dstn = n;
+ return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t));
}
diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c
index f4d9b7e..1adf110 100644
--- a/lib/unicode/string/u8title.c
+++ b/lib/unicode/string/u8title.c
@@ -1,3 +1,6 @@
+#include <errno.h>
+#include <stdckdint.h>
+
#include "_attrs.h"
#include "mbstring.h"
#include "unicode/prop.h"
@@ -13,9 +16,9 @@ uprop_ccc_0_or_230(rune ch)
return x == 0 || x == 230;
}
-size_t
-u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
- enum caseflags flags)
+char8_t *
+u8title(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc,
+ void *alloc_ctx)
{
struct tcctx ctx_t;
struct lcctx ctx_l;
@@ -39,6 +42,16 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
n = before_dot_cnt = more_above_cnt = 0;
+ size_t bufsz;
+ if (ckd_mul(&bufsz, sv.len, (size_t)U8TITLE_SCALE)) {
+ errno = EOVERFLOW;
+ return nullptr;
+ }
+
+ char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t));
+ if (dst == nullptr)
+ return nullptr;
+
while (u8next(&ch, &sv)) {
if (sv.p > word.p + word.len) {
u8wnext(&word, &wcpy);
@@ -102,13 +115,8 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
}
}
- for (size_t i = 0; i < rv.len; i++) {
- if (n >= dstn) {
- char8_t buf[U8_LEN_MAX];
- n += rtou8(buf, sizeof(buf), rv.p[i]);
- } else
- n += rtou8(dst + n, dstn - n, rv.p[i]);
- }
+ for (size_t i = 0; i < rv.len; i++)
+ n += rtou8(dst + n, bufsz - n, rv.p[i]);
ctx_l.after_I =
(ch == 'I') || (ctx_l.after_I && !uprop_ccc_0_or_230(ch));
@@ -123,5 +131,6 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
ctx_t.after_soft_dotted = false;
}
- return n;
+ *dstn = n;
+ return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t));
}