aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/unicode/prop.h4
-rw-r--r--lib/unicode/string/u8norm.c32
2 files changed, 32 insertions, 4 deletions
diff --git a/include/unicode/prop.h b/include/unicode/prop.h
index 647f461..a7e90ad 100644
--- a/include/unicode/prop.h
+++ b/include/unicode/prop.h
@@ -864,9 +864,9 @@ enum uprop_lb : uint_least8_t {
};
enum uprop_nfc_qc : uint_least8_t {
- NFC_QC_M, /* Maybe */
NFC_QC_N, /* No */
NFC_QC_Y, /* Yes */
+ NFC_QC_M, /* Maybe */
};
enum uprop_nfd_qc : uint_least8_t {
@@ -875,9 +875,9 @@ enum uprop_nfd_qc : uint_least8_t {
};
enum uprop_nfkc_qc : uint_least8_t {
- NFKC_QC_M, /* Maybe */
NFKC_QC_N, /* No */
NFKC_QC_Y, /* Yes */
+ NFKC_QC_M, /* Maybe */
};
enum uprop_nfkd_qc : uint_least8_t {
diff --git a/lib/unicode/string/u8norm.c b/lib/unicode/string/u8norm.c
index 128a67a..91c6aa5 100644
--- a/lib/unicode/string/u8norm.c
+++ b/lib/unicode/string/u8norm.c
@@ -1,3 +1,4 @@
+#include <inttypes.h>
#include <string.h>
#include "macros.h"
@@ -8,8 +9,9 @@
#define BETWEEN(x, y, z) ((x) <= (y) && (y) <= (z))
-static void decomp(char8_t *, size_t *, size_t, rune, enum normform);
-static void compbuf(char8_t *, size_t *);
+typedef uint_least8_t (*qcfn)(rune);
+
+constexpr uint_least8_t YES = 1;
/* Computed using a gen/scale-norm.c */
constexpr int NFD_SCALE = 3;
@@ -26,6 +28,16 @@ constexpr int TCNT = 28;
constexpr int NCNT = VCNT * TCNT;
constexpr int SCNT = LCNT * NCNT;
+static void decomp(char8_t *, size_t *, size_t, rune, enum normform);
+static void compbuf(char8_t *, size_t *);
+
+static const qcfn qc_lookup[] = {
+ [NF_NFC] = (qcfn)uprop_get_nfc_qc,
+ [NF_NFD] = (qcfn)uprop_get_nfd_qc,
+ [NF_NFKC] = (qcfn)uprop_get_nfkc_qc,
+ [NF_NFKD] = (qcfn)uprop_get_nfkd_qc,
+};
+
char8_t *
u8norm(size_t *dstn, struct u8view src, alloc_fn alloc, void *ctx,
enum normform nf)
@@ -34,6 +46,22 @@ u8norm(size_t *dstn, struct u8view src, alloc_fn alloc, void *ctx,
ASSUME(alloc != nullptr);
ASSUME(BETWEEN(0, nf, 4));
+ {
+ qcfn f = qc_lookup[nf];
+ struct u8view sv = src;
+ enum uprop_ccc prvcc = 0, curcc;
+ for (rune ch; ucsnext(&ch, &sv) != 0; prvcc = curcc) {
+ curcc = uprop_get_ccc(ch);
+ if ((prvcc > curcc && curcc != CCC_NR) || (f(ch) != YES))
+ goto no;
+ }
+
+ *dstn = src.len;
+ char8_t *dst = alloc(ctx, nullptr, 0, src.len, 1, alignof(char8_t));
+ return memcpy(dst, src.p, src.len);
+ }
+
+no:
/* Pre-allocate a buffer with some initial capacity; there is no need to
check for overflow when computing bufsz because alloc() will handle the
overflow error for us. */