From d874d01e8e9a30f0073a6e559cbae07244dec7bf Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Thu, 3 Oct 2024 00:36:26 +0200 Subject: Huge library overhaul --- lib/unicode/string/u8casefold.c | 9 ++++----- lib/unicode/string/u8lower.c | 9 ++++----- lib/unicode/string/u8norm.c | 41 ++++++++++++++++++++++------------------- lib/unicode/string/u8title.c | 9 ++++----- lib/unicode/string/u8upper.c | 9 ++++----- 5 files changed, 38 insertions(+), 39 deletions(-) (limited to 'lib/unicode') diff --git a/lib/unicode/string/u8casefold.c b/lib/unicode/string/u8casefold.c index e3a3402..fc9ed54 100644 --- a/lib/unicode/string/u8casefold.c +++ b/lib/unicode/string/u8casefold.c @@ -1,3 +1,4 @@ +#include #include #include @@ -7,11 +8,9 @@ #include "unicode/string.h" char8_t * -u8casefold(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, - void *alloc_ctx) +u8casefold(size_t *dstn, u8view_t sv, caseflags_t flags, allocator_t mem) { ASSUME(dstn != nullptr); - ASSUME(alloc != nullptr); size_t bufsz; if (ckd_mul(&bufsz, sv.len, (size_t)U8CASEFOLD_SCALE)) { @@ -19,7 +18,7 @@ u8casefold(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, return nullptr; } - char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, 1, alignof(char8_t)); + char8_t *dst = new(mem, typeof(*dst), bufsz); rune ch; size_t n = 0; @@ -30,5 +29,5 @@ u8casefold(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, } *dstn = n; - return alloc(alloc_ctx, dst, bufsz, n, 1, alignof(char8_t)); + return resz(mem, dst, bufsz, n); } diff --git a/lib/unicode/string/u8lower.c b/lib/unicode/string/u8lower.c index 2b1ec36..e553f98 100644 --- a/lib/unicode/string/u8lower.c +++ b/lib/unicode/string/u8lower.c @@ -2,6 +2,7 @@ #include #include "_attrs.h" +#include "alloc.h" #include "macros.h" #include "mbstring.h" #include "unicode/prop.h" @@ -17,11 +18,9 @@ uprop_ccc_0_or_230(rune ch) } char8_t * -u8lower(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, - void *alloc_ctx) +u8lower(size_t *dstn, u8view_t sv, caseflags_t flags, allocator_t mem) { ASSUME(dstn != nullptr); - ASSUME(alloc != nullptr); struct lcctx ctx = { .az_or_tr = flags & CF_LANG_AZ, @@ -46,7 +45,7 @@ u8lower(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, return nullptr; } - char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, 1, alignof(char8_t)); + char8_t *dst = new(mem, typeof(*dst), bufsz); while (u8next(&ch, &sv)) { rune next = 0; @@ -103,5 +102,5 @@ u8lower(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, } *dstn = n; - return alloc(alloc_ctx, dst, bufsz, n, 1, alignof(char8_t)); + return resz(mem, dst, bufsz, n); } diff --git a/lib/unicode/string/u8norm.c b/lib/unicode/string/u8norm.c index 02156ea..c60fa5b 100644 --- a/lib/unicode/string/u8norm.c +++ b/lib/unicode/string/u8norm.c @@ -1,6 +1,10 @@ +#include #include +#include +#include #include +#include "alloc.h" #include "macros.h" #include "mbstring.h" #include "unicode/_cm.h" @@ -28,22 +32,20 @@ constexpr int TCNT = 28; constexpr int NCNT = VCNT * TCNT; constexpr int SCNT = LCNT * NCNT; -static void decomp(char8_t *, size_t *, size_t, rune, enum normform); +static void decomp(char8_t *, size_t *, size_t, rune, normform_t); static void compbuf(char8_t *, size_t *); static const qcfn qc_lookup[] = { - [NF_NFC] = (qcfn)uprop_get_nfc_qc, - [NF_NFD] = (qcfn)uprop_get_nfd_qc, + [NF_NFC] = (qcfn)uprop_get_nfc_qc, + [NF_NFD] = (qcfn)uprop_get_nfd_qc, [NF_NFKC] = (qcfn)uprop_get_nfkc_qc, [NF_NFKD] = (qcfn)uprop_get_nfkd_qc, }; char8_t * -u8norm(size_t *dstn, u8view_t src, alloc_fn alloc, void *ctx, - enum normform nf) +u8norm(size_t *dstn, u8view_t src, allocator_t mem, normform_t nf) { ASSUME(dstn != nullptr); - ASSUME(alloc != nullptr); ASSUME(BETWEEN(0, nf, 4)); { @@ -57,30 +59,31 @@ u8norm(size_t *dstn, u8view_t src, alloc_fn alloc, void *ctx, } *dstn = src.len; - char8_t *dst = alloc(ctx, nullptr, 0, src.len, 1, alignof(char8_t)); + char8_t *dst = new(mem, typeof(*dst), src.len); return memcpy(dst, src.p, src.len); } no: - /* Pre-allocate a buffer with some initial capacity; there is no need to - check for overflow when computing bufsz because alloc() will handle the - overflow error for us. */ int scale = (nf & 0b10) ? NFKD_SCALE : NFD_SCALE; - size_t bufsz = src.len * scale; - char8_t *dst = alloc(ctx, nullptr, 0, src.len, scale, alignof(char8_t)); + ptrdiff_t bufsz; + if (ckd_mul(&bufsz, src.len, scale)) { + errno = EOVERFLOW; + return nullptr; + } + char8_t *dst = new(mem, typeof(*dst), bufsz); *dstn = 0; for (rune ch; ucsnext(&ch, &src) != 0; decomp(dst, dstn, bufsz, ch, nf)) ; if (nf & 0b01) compbuf(dst, dstn); - return alloc(ctx, dst, src.len, *dstn, 1, alignof(char8_t)); + return resz(mem, dst, bufsz, *dstn); } #define WRITE(ch) *dstn += rtoucs(dst + *dstn, bufsz - *dstn, (ch)) void -decomp(char8_t *dst, size_t *dstn, size_t bufsz, rune ch, enum normform nf) +decomp(char8_t *dst, size_t *dstn, size_t bufsz, rune ch, normform_t nf) { if (uprop_get_hst(ch) != HST_NA) { int si = ch - SBASE; @@ -96,8 +99,8 @@ decomp(char8_t *dst, size_t *dstn, size_t bufsz, rune ch, enum normform nf) WRITE(v); if (t != TBASE) WRITE(t); - } else if (((nf & 0b10) && uprop_get_dt(ch) != DT_NONE) - || ((nf & 0b10) == 0 && uprop_get_dt(ch) == DT_CAN)) + } else if (((nf & 0b10) != 0 && uprop_get_dt(ch) != DT_NONE) + || ((nf & 0b10) == 0 && uprop_get_dt(ch) == DT_CAN)) { struct rview rv = uprop_get_dm(ch); for (size_t i = 0; i < rv.len; i++) @@ -170,12 +173,12 @@ compbuf(char8_t *dst, size_t *dstn) /* Try Hangul composition */ if (comp == 0) { if (BETWEEN(LBASE, L, LBASE + LCNT - 1) - && BETWEEN(VBASE, C, VBASE + VCNT - 1)) + && BETWEEN(VBASE, C, VBASE + VCNT - 1)) { comp = SBASE + ((L - LBASE) * NCNT + (C - VBASE) * TCNT); } else if (BETWEEN(TBASE, C, TBASE + TCNT - 1) - && BETWEEN(SBASE, L, SBASE + SCNT - 1) - && ((L - SBASE) % TCNT) == 0) + && BETWEEN(SBASE, L, SBASE + SCNT - 1) + && ((L - SBASE) % TCNT) == 0) { comp = L + (C - TBASE); } diff --git a/lib/unicode/string/u8title.c b/lib/unicode/string/u8title.c index 3a85f4d..c0637ff 100644 --- a/lib/unicode/string/u8title.c +++ b/lib/unicode/string/u8title.c @@ -2,6 +2,7 @@ #include #include "_attrs.h" +#include "alloc.h" #include "macros.h" #include "mbstring.h" #include "unicode/prop.h" @@ -18,11 +19,9 @@ uprop_ccc_0_or_230(rune ch) } char8_t * -u8title(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, - void *alloc_ctx) +u8title(size_t *dstn, u8view_t sv, caseflags_t flags, allocator_t mem) { ASSUME(dstn != nullptr); - ASSUME(alloc != nullptr); struct tcctx ctx_t; struct lcctx ctx_l; @@ -52,7 +51,7 @@ u8title(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, return nullptr; } - char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, 1, alignof(char8_t)); + char8_t *dst = new(mem, typeof(*dst), bufsz); while (u8next(&ch, &sv)) { if (sv.p > word.p + word.len) { @@ -134,5 +133,5 @@ u8title(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, } *dstn = n; - return alloc(alloc_ctx, dst, bufsz, n, 1, alignof(char8_t)); + return resz(mem, dst, bufsz, n); } diff --git a/lib/unicode/string/u8upper.c b/lib/unicode/string/u8upper.c index a77fcd8..ad10ef7 100644 --- a/lib/unicode/string/u8upper.c +++ b/lib/unicode/string/u8upper.c @@ -1,17 +1,16 @@ #include #include +#include "alloc.h" #include "macros.h" #include "mbstring.h" #include "unicode/prop.h" #include "unicode/string.h" char8_t * -u8upper(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, - void *alloc_ctx) +u8upper(size_t *dstn, u8view_t sv, caseflags_t flags, allocator_t mem) { ASSUME(dstn != nullptr); - ASSUME(alloc != nullptr); struct ucctx ctx = { .az_or_tr = flags & CF_LANG_AZ, @@ -25,7 +24,7 @@ u8upper(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, return nullptr; } - char8_t *dst = alloc(alloc_ctx, nullptr, 0, bufsz, 1, alignof(char8_t)); + char8_t *dst = new(mem, typeof(*dst), bufsz); rune ch; size_t n = 0; @@ -43,5 +42,5 @@ u8upper(size_t *dstn, u8view_t sv, enum caseflags flags, alloc_fn alloc, } *dstn = n; - return alloc(alloc_ctx, dst, bufsz, n, 1, alignof(char8_t)); + return resz(mem, dst, bufsz, n); } -- cgit v1.2.3