aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-09 03:09:53 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-09 03:09:53 +0200
commit99b1b6e74b9ded856466eccb998c762a3a7f1f45 (patch)
tree50ac470fd46416b99c8d5788f4b02bb0a12daa58
parent085b77730a553ce9769fcc76b68772dbde564004 (diff)
Make the rest of the casemapping functions take custom allocators
-rw-r--r--include/unicode/string.h21
-rw-r--r--lib/unicode/string/u8casefold.c32
-rw-r--r--lib/unicode/string/u8lower.c32
-rw-r--r--lib/unicode/string/u8title.c31
-rw-r--r--test/_case-test.h26
-rwxr-xr-xtest/run-tests1
6 files changed, 81 insertions, 62 deletions
diff --git a/include/unicode/string.h b/include/unicode/string.h
index 7da4385..e3d93ad 100644
--- a/include/unicode/string.h
+++ b/include/unicode/string.h
@@ -7,8 +7,6 @@
#include "_charN_t.h"
#include "_u8view.h"
-#define mlib_warn_trunc nodiscard("don’t forget to check for truncation")
-
/* clang-format off */
enum [[clang::flag_enum]] caseflags {
@@ -30,20 +28,19 @@ size_t u8gnext(struct u8view *, struct u8view *);
size_t u8wnext(struct u8view *, struct u8view *);
size_t u8wnext_human(struct u8view *, struct u8view *);
-[[mlib_warn_trunc]]
-size_t u8casefold(char8_t *restrict, size_t, struct u8view, enum caseflags);
-[[mlib_warn_trunc]]
-size_t u8lower(char8_t *restrict, size_t, struct u8view, enum caseflags);
-[[mlib_warn_trunc]]
-size_t u8title(char8_t *restrict, size_t, struct u8view, enum caseflags);
-[[nodiscard]]
-char8_t *u8upper(size_t *, struct u8view, enum caseflags, alloc_fn, void *);
+[[nodiscard]] char8_t *u8casefold(size_t *, struct u8view, enum caseflags,
+ alloc_fn, void *);
+[[nodiscard]] char8_t *u8lower(size_t *, struct u8view, enum caseflags,
+ alloc_fn, void *);
+[[nodiscard]] char8_t *u8title(size_t *, struct u8view, enum caseflags,
+ alloc_fn, void *);
+[[nodiscard]] char8_t *u8upper(size_t *, struct u8view, enum caseflags,
+ alloc_fn, void *);
+constexpr double U8CASEFOLD_SCALE = 3;
constexpr double U8LOWER_SCALE = 1.5;
constexpr double U8LOWER_SCALE_LT = 3;
constexpr double U8TITLE_SCALE = 3;
constexpr double U8UPPER_SCALE = 3;
-#undef mlib_warn_trunc
-
#endif /* !MLIB_UNICODE_STRING_H */
diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c
index 2ab7c7c..aba08f3 100644
--- a/lib/unicode/string/u8casefold.c
+++ b/lib/unicode/string/u8casefold.c
@@ -1,24 +1,32 @@
+#include <errno.h>
+#include <stdckdint.h>
+
#include "mbstring.h"
#include "unicode/prop.h"
#include "unicode/string.h"
-size_t
-u8casefold(char8_t *restrict dst, size_t dstn, struct u8view sv,
- enum caseflags flags)
+char8_t *
+u8casefold(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc,
+ void *alloc_ctx)
{
+ size_t bufsz;
+ if (ckd_mul(&bufsz, sv.len, (size_t)U8CASEFOLD_SCALE)) {
+ errno = EOVERFLOW;
+ return nullptr;
+ }
+
+ char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t));
+ if (dst == nullptr)
+ return nullptr;
+
rune ch;
size_t n = 0;
-
while (u8next(&ch, &sv)) {
struct rview rv = uprop_get_cf(ch, flags & CF_LANG_AZ);
- for (size_t i = 0; i < rv.len; i++) {
- if (n >= dstn) {
- char8_t buf[U8_LEN_MAX];
- n += rtou8(buf, sizeof(buf), rv.p[i]);
- } else
- n += rtou8(dst + n, dstn - n, rv.p[i]);
- }
+ for (size_t i = 0; i < rv.len; i++)
+ n += rtou8(dst + n, bufsz - n, rv.p[i]);
}
- return n;
+ *dstn = n;
+ return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t));
}
diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c
index 907077b..47c32f5 100644
--- a/lib/unicode/string/u8lower.c
+++ b/lib/unicode/string/u8lower.c
@@ -1,3 +1,6 @@
+#include <errno.h>
+#include <stdckdint.h>
+
#include "_attrs.h"
#include "mbstring.h"
#include "unicode/prop.h"
@@ -12,9 +15,9 @@ uprop_ccc_0_or_230(rune ch)
return x == 0 || x == 230;
}
-size_t
-u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
- enum caseflags flags)
+char8_t *
+u8lower(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc,
+ void *alloc_ctx)
{
struct lcctx ctx = {
.az_or_tr = flags & CF_LANG_AZ,
@@ -32,6 +35,17 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
n = before_dot_cnt = more_above_cnt = 0;
+ size_t bufsz;
+ /* TODO: Also use U8LOWER_SCALE */
+ if (ckd_mul(&bufsz, sv.len, (size_t)U8LOWER_SCALE_LT)) {
+ errno = EOVERFLOW;
+ return nullptr;
+ }
+
+ char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t));
+ if (dst == nullptr)
+ return nullptr;
+
while (u8next(&ch, &sv)) {
rune next = 0;
if (sv.len > 0)
@@ -76,13 +90,8 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
ctx.final_sigma = final_sigma.before && final_sigma.after == 0;
struct rview rv = uprop_get_lc(ch, ctx);
- for (size_t i = 0; i < rv.len; i++) {
- if (n >= dstn) {
- char8_t buf[U8_LEN_MAX];
- n += rtou8(buf, sizeof(buf), rv.p[i]);
- } else
- n += rtou8(dst + n, dstn - n, rv.p[i]);
- }
+ for (size_t i = 0; i < rv.len; i++)
+ n += rtou8(dst + n, bufsz - n, rv.p[i]);
ctx.after_I = (ch == 'I') || (ctx.after_I && !uprop_ccc_0_or_230(ch));
if (uprop_is_cased(ch))
@@ -91,5 +100,6 @@ u8lower(char8_t *restrict dst, size_t dstn, struct u8view sv,
final_sigma.before = false;
}
- return n;
+ *dstn = n;
+ return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t));
}
diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c
index f4d9b7e..1adf110 100644
--- a/lib/unicode/string/u8title.c
+++ b/lib/unicode/string/u8title.c
@@ -1,3 +1,6 @@
+#include <errno.h>
+#include <stdckdint.h>
+
#include "_attrs.h"
#include "mbstring.h"
#include "unicode/prop.h"
@@ -13,9 +16,9 @@ uprop_ccc_0_or_230(rune ch)
return x == 0 || x == 230;
}
-size_t
-u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
- enum caseflags flags)
+char8_t *
+u8title(size_t *dstn, struct u8view sv, enum caseflags flags, alloc_fn alloc,
+ void *alloc_ctx)
{
struct tcctx ctx_t;
struct lcctx ctx_l;
@@ -39,6 +42,16 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
n = before_dot_cnt = more_above_cnt = 0;
+ size_t bufsz;
+ if (ckd_mul(&bufsz, sv.len, (size_t)U8TITLE_SCALE)) {
+ errno = EOVERFLOW;
+ return nullptr;
+ }
+
+ char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, alignof(char8_t));
+ if (dst == nullptr)
+ return nullptr;
+
while (u8next(&ch, &sv)) {
if (sv.p > word.p + word.len) {
u8wnext(&word, &wcpy);
@@ -102,13 +115,8 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
}
}
- for (size_t i = 0; i < rv.len; i++) {
- if (n >= dstn) {
- char8_t buf[U8_LEN_MAX];
- n += rtou8(buf, sizeof(buf), rv.p[i]);
- } else
- n += rtou8(dst + n, dstn - n, rv.p[i]);
- }
+ for (size_t i = 0; i < rv.len; i++)
+ n += rtou8(dst + n, bufsz - n, rv.p[i]);
ctx_l.after_I =
(ch == 'I') || (ctx_l.after_I && !uprop_ccc_0_or_230(ch));
@@ -123,5 +131,6 @@ u8title(char8_t *restrict dst, size_t dstn, struct u8view sv,
ctx_t.after_soft_dotted = false;
}
- return n;
+ *dstn = n;
+ return alloc(alloc_ctx, dst, bufsz, n, alignof(char8_t));
}
diff --git a/test/_case-test.h b/test/_case-test.h
index 8f18b69..684c6ec 100644
--- a/test/_case-test.h
+++ b/test/_case-test.h
@@ -52,7 +52,7 @@ main(int, char **argv)
bool
test(const char8_t *line, int id)
{
- struct u8view sv = {line, strlen(line)};
+ struct u8view mapped, sv = {line, strlen(line)};
struct u8view before, after, flags;
u8cut(&before, &sv, U";", 1);
u8cut(&after, &sv, U";", 1);
@@ -63,27 +63,21 @@ test(const char8_t *line, int id)
: u8eq(flags, U8("LT")) ? CF_LANG_LT
: u8eq(flags, U8("NL")) ? CF_LANG_NL
: 0;
- char8_t *buf = bufalloc(nullptr, 1, after.len);
- size_t bufsz = FUNC(nullptr, 0, before, cf);
- if (bufsz != after.len) {
- warn("case %d: expected %s buffer size of %zu but got %zu",
- id, STR(CASETYPE_VERB), after.len, bufsz);
- return false;
- }
- bufsz = FUNC(buf, bufsz, before, cf);
- if (bufsz != after.len) {
- warn("case %d: expected %s length of %zu but got %zu",
- id, STR(CASETYPE_VERB), after.len, bufsz);
+ arena a = mkarena(0);
+ mapped.p = FUNC(&mapped.len, before, cf, alloc_arena, &a);
+
+ if (mapped.p == nullptr) {
+ warn("case %d: got null %s buffer", id, STR(CASETYPE_VERB));
return false;
}
- if (!memeq(buf, after.p, bufsz)) {
- warn("case %d: expected ‘%.*s’ but got ‘%.*s’",
- id, SV_PRI_ARGS(after), (int)bufsz, buf);
+ if (!u8eq(mapped, after)) {
+ warn("case %d: expected ‘%.*s’ but got ‘%.*s’", id, SV_PRI_ARGS(after),
+ SV_PRI_ARGS(mapped));
return false;
}
- free(buf);
+ arena_free(&a);
return true;
}
diff --git a/test/run-tests b/test/run-tests
index 2562a32..f8554c0 100755
--- a/test/run-tests
+++ b/test/run-tests
@@ -16,6 +16,7 @@ readonly FLAGS='
-Wall -Wextra -Wpedantic
-Wno-pointer-sign
-Wno-attributes
+ -fsanitize=address,undefined
'
(cd ..; ./make)