From 93e6bb1bf6b12eaf3c07eb1b2b702aa1e95be48a Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sat, 8 Jun 2024 01:37:17 +0200 Subject: Genesis commit --- .exrc | 1 + .gitignore | 5 + .gitmodules | 3 + LICENSE | 14 ++ README | 1 + cbs.h | 596 +++++++++++++++++++++++++++++++++++++++++++++++++++ make.c | 232 ++++++++++++++++++++ src/errors.c | 27 +++ src/errors.h | 6 + src/lexer.c | 34 +++ src/lexer.h | 21 ++ src/main.c | 68 ++++++ src/unicode-avx2.c | 152 +++++++++++++ src/unicode-neon.c | 147 +++++++++++++ src/unicode-sse4_1.c | 158 ++++++++++++++ src/unicode.c | 59 +++++ src/unicode.h | 17 ++ 17 files changed, 1541 insertions(+) create mode 100644 .exrc create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 LICENSE create mode 100644 README create mode 100644 cbs.h create mode 100644 make.c create mode 100644 src/errors.c create mode 100644 src/errors.h create mode 100644 src/lexer.c create mode 100644 src/lexer.h create mode 100644 src/main.c create mode 100644 src/unicode-avx2.c create mode 100644 src/unicode-neon.c create mode 100644 src/unicode-sse4_1.c create mode 100644 src/unicode.c create mode 100644 src/unicode.h diff --git a/.exrc b/.exrc new file mode 100644 index 0000000..432baf7 --- /dev/null +++ b/.exrc @@ -0,0 +1 @@ +set makeprg=./make diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8892127 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.cache/ +compile_commands.json +make +*.o +oryx diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..cd5a7dd --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/mlib"] + path = vendor/mlib + url = https://github.com/Mango0x45/mlib.git diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b946725 --- /dev/null +++ b/LICENSE @@ -0,0 +1,14 @@ +BSD Zero Clause License + +Copyright © 2024 Thomas Voss + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. diff --git a/README b/README new file mode 100644 index 0000000..fe2ab13 --- /dev/null +++ b/README @@ -0,0 +1 @@ +Oryx — Programming Made Better diff --git a/cbs.h b/cbs.h new file mode 100644 index 0000000..492150c --- /dev/null +++ b/cbs.h @@ -0,0 +1,596 @@ +#ifndef C_BUILD_SYSTEM_H +#define C_BUILD_SYSTEM_H + +#define _GNU_SOURCE +#include +#include + +#include +#include +#include +#ifndef CBS_NO_THREADS +# include +#endif +#include +#include +#include +#include +#include +#include +#include + +#define _vtoxs(...) ((char *[]){__VA_ARGS__}) + +#define lengthof(xs) (sizeof(xs) / sizeof(*(xs))) + +struct strs { + char **buf; + size_t len, cap; +}; + +enum pkg_config_flags { + PC_CFLAGS = 1 << 0, + PC_LIBS = 1 << 1, + PC_SHARED = 1 << 2, + PC_STATIC = 1 << 3, +}; + +void cbsinit(int, char **); +static void rebuild(const char *); /* Always call via macro wrapper */ +#define rebuild() rebuild(__FILE__) + +static void strsfree(struct strs *); +static void strszero(struct strs *); +static void strspush(struct strs *, char **, size_t); +static void strspushenv(struct strs *, const char *, char **, size_t); +#define strspushl(xs, ...) \ + strspush((xs), _vtoxs(__VA_ARGS__), lengthof(_vtoxs(__VA_ARGS__))) +#define strspushenvl(xs, ev, ...) \ + strspushenv((xs), (ev), _vtoxs(__VA_ARGS__), lengthof(_vtoxs(__VA_ARGS__))) + +static bool fexists(const char *); +static int fmdcmp(const char *, const char *); +static bool fmdolder(const char *, const char *); +static bool fmdnewer(const char *, const char *); +static bool foutdated(const char *, char **, size_t); +#define foutdatedl(s, ...) \ + foutdated(s, _vtoxs(__VA_ARGS__), lengthof(_vtoxs(__VA_ARGS__))) + +static int cmdexec(struct strs); +static pid_t cmdexec_async(struct strs); +static int cmdexec_read(struct strs, char **, size_t *); +static int cmdwait(pid_t); +static void cmdput(struct strs); +static void cmdfput(FILE *, struct strs); + +static char *swpext(const char *, const char *); +static bool pcquery(struct strs *, const char *, int); +static bool binexists(const char *); +static int nproc(void); + +#ifndef CBS_NO_THREADS +typedef void tjob(void *); +typedef void tjob_free(void *); + +struct _tqueue { + void *arg; + tjob *fn; + tjob_free *free; + struct _tqueue *next; +}; + +typedef struct { + bool stop; + size_t tcnt, left; + pthread_t *thrds; + pthread_cond_t cnd; + pthread_mutex_t mtx; + struct _tqueue *head, *tail; +} tpool; + +static void tpinit(tpool *, size_t); +static void tpfree(tpool *); +static void tpwait(tpool *); +static void tpenq(tpool *, tjob *, void *, tjob_free *); +#endif /* !CBS_NO_THREADS */ + +static int _cbs_argc; +static char **_cbs_argv; + +/* Implementation */ + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#ifdef __APPLE__ +# define st_mtim st_mtimespec +#endif + +void +cbsinit(int argc, char **argv) +{ + _cbs_argc = argc; + _cbs_argv = malloc(sizeof(char *) * (argc + 1)); + assert(_cbs_argv != NULL); + for (int i = 0; i < argc; i++) { + _cbs_argv[i] = strdup(argv[i]); + assert(_cbs_argv[i] != NULL); + } + _cbs_argv[argc] = NULL; + + char *s = strrchr(_cbs_argv[0], '/'); + if (s != NULL) { + s[0] = 0; + assert(chdir(_cbs_argv[0]) != -1); + s[0] = '/'; + } +} + +void +(rebuild)(const char *path) +{ + char *src, *dst; + + if ((src = strrchr(path, '/')) != NULL) + src++; + else + src = (char *)path; + + if ((dst = strrchr(*_cbs_argv, '/')) != NULL) + dst++; + else + dst = *_cbs_argv; + + if (!foutdatedl(dst, src)) + return; + + struct strs xs = {0}; + strspushenvl(&xs, "CC", "cc"); +#ifndef CBS_NO_THREADS + strspushl(&xs, "-lpthread"); +#endif + strspushl(&xs, "-o", dst, src); + cmdput(xs); + assert(cmdexec(xs) == EXIT_SUCCESS); + + execvp(*_cbs_argv, _cbs_argv); + assert(!"failed to execute process"); +} + +void +strsfree(struct strs *xs) +{ + free(xs->buf); + xs->buf = NULL; + xs->len = xs->cap = 0; +} + +void +strszero(struct strs *xs) +{ + xs->len = 0; +} + +void +strspush(struct strs *xs, char **ys, size_t n) +{ + if (n == 0) + return; + + if (xs->len + n >= xs->cap) { + xs->cap = (xs->len + n) * 2; + xs->buf = realloc(xs->buf, sizeof(char *) * (xs->cap + 1)); + assert(xs->buf != NULL); + } + memcpy(xs->buf + xs->len, ys, n * sizeof(char *)); + xs->len += n; + + assert(xs->len <= xs->cap); + xs->buf[xs->len] = NULL; +} + +void +strspushenv(struct strs *xs, const char *ev, char **ys, size_t n) +{ + /* NOTE: Do your best to NOT modify any pushed envvar! */ + char *p = getenv(ev); + if (p == NULL || *p == 0) + strspush(xs, ys, n); + else + strspush(xs, &p, 1); +} + +bool +fexists(const char *f) +{ + return !access(f, F_OK); +} + +int +fmdcmp(const char *lhs, const char *rhs) +{ + struct stat sbl, sbr; + + assert(stat(lhs, &sbl) != -1); + assert(stat(rhs, &sbr) != -1); + + return sbl.st_mtim.tv_sec == sbr.st_mtim.tv_sec + ? sbl.st_mtim.tv_nsec - sbr.st_mtim.tv_nsec + : sbl.st_mtim.tv_sec - sbr.st_mtim.tv_sec; +} + +bool +fmdnewer(const char *lhs, const char *rhs) +{ + return fmdcmp(lhs, rhs) > 0; +} + +bool +fmdolder(const char *lhs, const char *rhs) +{ + return fmdcmp(lhs, rhs) < 0; +} + +bool +foutdated(const char *src, char **deps, size_t n) +{ + if (!fexists(src)) + return true; + for (size_t i = 0; i < n; i++) { + if (fmdolder(src, deps[i])) + return true; + } + return false; +} + +int +cmdexec(struct strs xs) +{ + return cmdwait(cmdexec_async(xs)); +} + +pid_t +cmdexec_async(struct strs xs) +{ + pid_t pid = fork(); + assert(pid != -1); + if (pid == 0) { + execvp(xs.buf[0], xs.buf); + assert(!"failed to execute process"); + } + return pid; +} + +int +cmdexec_read(struct strs xs, char **p, size_t *n) +{ + enum { + R, + W, + }; + int fds[2]; + + assert(pipe(fds) != -1); + + pid_t pid = fork(); + assert(pid != -1); + + if (pid == 0) { + close(fds[R]); + close(STDOUT_FILENO); + assert(dup2(fds[W], STDOUT_FILENO) != -1); + execvp(xs.buf[0], xs.buf); + assert(!"failed to execute process"); + } + + close(fds[W]); + + struct stat sb; + assert(fstat(fds[R], &sb) != -1); + + *p = NULL, *n = 0; + char *buf = malloc(sb.st_blksize); + assert(buf != NULL); + + for (;;) { + ssize_t nr; + if ((nr = read(fds[R], buf, sb.st_blksize)) == 0) + break; + assert(nr != -1); + + *p = realloc(*p, *n + nr + 1); + assert(*p != NULL); + + memcpy(*p + *n, buf, nr); + *n += nr; + } + + close(fds[R]); + if (buf != NULL) + buf[*n] = 0; + free(buf); + + return cmdwait(pid); +} + +int +cmdwait(pid_t pid) +{ + int ws; + assert(waitpid(pid, &ws, 0) != -1); + if (WIFEXITED(ws)) + return WEXITSTATUS(ws); + return WIFEXITED(ws) ? WEXITSTATUS(ws) : 256; +} + +/* import shlex + + s = '#define _SHELL_SAFE "' + for c in map(chr, range(128)): + if not shlex._find_unsafe(c): + s += c + print(s + '"') */ +#define _SHELL_SAFE \ + "%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" + +void +cmdput(struct strs xs) +{ + cmdfput(stdout, xs); +} + +void +cmdfput(FILE *fp, struct strs xs) +{ + flockfile(fp); + for (size_t i = 0; i < xs.len; i++) { + bool safe = true; + char *p, *q; + + p = q = xs.buf[i]; + for (; *q; q++) { + if (!strchr(_SHELL_SAFE, *q)) { + safe = false; + break; + } + } + + if (safe) + fputs(p, fp); + else { + putc('\'', fp); + for (q = p; *q; q++) { + if (*q == '\'') + fputs("'\"'\"'", fp); + else + putc(*q, fp); + } + putc('\'', fp); + } + + putc(i == xs.len - 1 ? '\n' : ' ', fp); + } + funlockfile(fp); +} + +bool +pcquery(struct strs *xs, const char *lib, int flags) +{ + struct strs ys = {0}; + + strspushl(&ys, "pkg-config", "--silence-errors"); + if (flags & PC_CFLAGS) + strspushl(&ys, "--cflags"); + if (flags & PC_LIBS) + strspushl(&ys, "--libs"); + if (flags & PC_SHARED) + strspushl(&ys, "--shared"); + if (flags & PC_STATIC) + strspushl(&ys, "--static"); + strspushl(&ys, (char *)lib); + + char *buf; + size_t bufsz; + int ec = cmdexec_read(ys, &buf, &bufsz); + strsfree(&ys); + if (ec != EXIT_SUCCESS) + return false; + + /* Remove trailing newline */ + buf[bufsz - 1] = 0; + + wordexp_t we; + assert(wordexp(buf, &we, WRDE_NOCMD) == 0); + + char **words = malloc(sizeof(char *) * we.we_wordc); + assert(words != NULL); + for (size_t i = 0; i < we.we_wordc; i++) + assert((words[i] = strdup(we.we_wordv[i])) != NULL); + + strspush(xs, words, we.we_wordc); + wordfree(&we); + free(buf); + return true; +} + +bool +binexists(const char *s) +{ + const char *path = getenv("PATH"); + assert(path != NULL); + + char *p = strdup(path), *it; + assert(p != NULL); + + for (it = strtok(p, ":"); it != NULL; it = strtok(NULL, ":")) { + static char buf[PATH_MAX]; + memset(buf, 0, sizeof(buf)); + snprintf(buf, sizeof(buf), "%s/%s", it, s); + if (fexists(buf)) { + free(p); + return true; + } + } + + free(p); + return false; +} + +int +nproc(void) +{ +#ifdef _SC_NPROCESSORS_ONLN + return (int)sysconf(_SC_NPROCESSORS_ONLN); +#else + errno = 0; + return -1; +#endif +} + +char * +swpext(const char *file, const char *ext) +{ + const char *p = strrchr(file, '.'); + if (p == NULL) { + p = strdup(file); + assert(p != NULL); + return (char *)p; + } + + size_t noextlen = p - file; + char *s = malloc(noextlen + strlen(ext) + 2); + assert(s != NULL); + sprintf(s, "%.*s.%s", (int)noextlen, file, ext); + return s; +} + +#ifndef CBS_NO_THREADS +static struct _tqueue * +_tpdeq(tpool *tp) +{ + struct _tqueue *q = tp->head; + + if (q != NULL) { + tp->head = tp->head->next; + if (!tp->head) + tp->tail = NULL; + } + + return q; +} + +static void * +_tpwork(void *arg) +{ + tpool *tp = arg; + + while (!tp->stop) { + struct _tqueue *q; + + pthread_mutex_lock(&tp->mtx); + while (!tp->stop && !tp->head) + pthread_cond_wait(&tp->cnd, &tp->mtx); + if (tp->stop) { + pthread_mutex_unlock(&tp->mtx); + break; + } + + q = _tpdeq(tp); + pthread_mutex_unlock(&tp->mtx); + + q->fn(q->arg); + if (q->free) + q->free(q->arg); + free(q); + + pthread_mutex_lock(&tp->mtx); + tp->left--; + pthread_cond_broadcast(&tp->cnd); + pthread_mutex_unlock(&tp->mtx); + } + + return NULL; +} + +void +tpinit(tpool *tp, size_t n) +{ + tp->tcnt = n; + tp->stop = false; + tp->left = 0; + tp->head = tp->tail = NULL; + tp->thrds = malloc(sizeof(pthread_t) * n); + assert(tp->thrds != NULL); + pthread_cond_init(&tp->cnd, NULL); + pthread_mutex_init(&tp->mtx, NULL); + for (size_t i = 0; i < n; i++) + assert(pthread_create(tp->thrds + i, NULL, _tpwork, tp) == 0); +} + +void +tpfree(tpool *tp) +{ + tp->stop = true; + + pthread_mutex_lock(&tp->mtx); + pthread_cond_broadcast(&tp->cnd); + pthread_mutex_unlock(&tp->mtx); + + for (size_t i = 0; i < tp->tcnt; i++) + pthread_join(tp->thrds[i], NULL); + + free(tp->thrds); + while (tp->head != NULL) { + struct _tqueue *q = _tpdeq(tp); + if (q->free) + q->free(q->arg); + free(q); + } + + pthread_cond_destroy(&tp->cnd); + pthread_mutex_destroy(&tp->mtx); +} + +void +tpwait(tpool *tp) +{ + pthread_mutex_lock(&tp->mtx); + while (!tp->stop && tp->left) + pthread_cond_wait(&tp->cnd, &tp->mtx); + pthread_mutex_unlock(&tp->mtx); +} + +void +tpenq(tpool *tp, tjob *fn, void *arg, tjob_free *free) +{ + struct _tqueue *q = malloc(sizeof(*q)); + assert(q != NULL); + *q = (struct _tqueue){ + .fn = fn, + .arg = arg, + .free = free, + }; + + pthread_mutex_lock(&tp->mtx); + if (tp->tail) + tp->tail->next = q; + if (!tp->head) + tp->head = q; + tp->tail = q; + tp->left++; + pthread_cond_signal(&tp->cnd); + pthread_mutex_unlock(&tp->mtx); +} +#endif /* !CBS_NO_THREADS */ + +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif + +#ifdef __APPLE__ +# undef st_mtim +#endif + +#endif /* !C_BUILD_SYSTEM_H */ diff --git a/make.c b/make.c new file mode 100644 index 0000000..306c4b7 --- /dev/null +++ b/make.c @@ -0,0 +1,232 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cbs.h" + +#define TARGET "oryx" + +enum { + SIMD_AVX2 = 1 << 0, + SIMD_NEON = 1 << 1, + SIMD_SSE4_1 = 1 << 2, +}; + +static char *cflags_all[] = { + "-pipe", + "-std=c99", + "-Wall", + "-Wextra", + "-Wno-attributes", + "-Wno-parentheses", + "-Wno-pointer-sign", + "-Wpedantic", + "-Wvla", +#if __GLIBC__ + "-D_GNU_SOURCE", +#endif +}; + +static char *cflags_dbg[] = { + "-DDEBUG=1", "-fsanitize=address,undefined", "-g3", "-ggdb3", "-O0", +}; + +static char *cflags_rls[] = { + "-DNDEBUG=1", "-flto", "-O3", +#ifndef __APPLE__ + "-march=native", "-mtune=native", +#endif +}; + +static char *argv0; +static bool fflag, rflag; +static int simd_flags; + +static void cc(void *); +static void ld(void); +static bool tagvalid(const char *); +static void ckd_cpu_flags(void); +static int globerr(const char *, int); + +static void +usage(void) +{ + fprintf(stderr, + "Usage: %s [-fr]\n" + " %s clean\n", + argv0, argv0); + exit(EXIT_FAILURE); +} + +int +main(int argc, char **argv) +{ + cbsinit(argc, argv); + rebuild(); + + argv0 = argv[0]; + + int opt; + while ((opt = getopt(argc, argv, "fr")) != -1) { + switch (opt) { + case 'f': + fflag = true; + break; + case 'r': + rflag = true; + break; + default: + usage(); + } + } + + argc -= optind; + argv += optind; + + struct strs cmd = {0}; + + if (argc > 0) { + if (strcmp("clean", *argv) == 0) { + strspushl(&cmd, "find", ".", "-name", TARGET, "-or", "-name", "*.o", + "-delete"); + cmdput(cmd); + cmdexec(cmd); + } else { + fprintf(stderr, "%s: invalid subcommand — ‘%s’\n", argv0, *argv); + usage(); + } + + return EXIT_SUCCESS; + } + + ckd_cpu_flags(); + + glob_t g; + assert(glob("src/*.c", 0, globerr, &g) == 0); + + int procs = nproc(); + if (procs == -1) + procs = 8; + + tpool tp; + tpinit(&tp, procs); + for (size_t i = 0; i < g.gl_pathc; i++) + tpenq(&tp, cc, g.gl_pathv[i], NULL); + tpwait(&tp); + tpfree(&tp); + + ld(); + + globfree(&g); + strsfree(&cmd); + return EXIT_SUCCESS; +} + +void +cc(void *arg) +{ + if (!tagvalid(arg)) + return; + + struct strs cmd = {0}; + char *dst = swpext(arg, "o"), *src = arg; + + if (!fflag && !foutdatedl(dst, src)) + goto out; + + strspushenvl(&cmd, "CC", "cc"); + strspush(&cmd, cflags_all, lengthof(cflags_all)); + if (rflag) + strspushenv(&cmd, "CFLAGS", cflags_rls, lengthof(cflags_rls)); + else + strspushenv(&cmd, "CFLAGS", cflags_dbg, lengthof(cflags_dbg)); + if (simd_flags != 0) + strspushl(&cmd, "-DORYX_SIMD=1"); + strspushl(&cmd, "-o", dst, "-c", src); + + cmdput(cmd); + cmdexec(cmd); + strsfree(&cmd); +out: + free(dst); +} + +void +ld(void) +{ + glob_t g; + bool dobuild = fflag; + struct strs cmd = {0}; + + strspushenvl(&cmd, "CC", "cc"); + strspush(&cmd, cflags_all, lengthof(cflags_all)); + if (rflag) + strspushenv(&cmd, "CFLAGS", cflags_rls, lengthof(cflags_rls)); + else + strspushenv(&cmd, "CFLAGS", cflags_dbg, lengthof(cflags_dbg)); + strspushl(&cmd, "-o", TARGET); + + assert(glob("src/*.o", 0, globerr, &g) == 0); + for (size_t i = 0; i < g.gl_pathc; i++) { + if (!tagvalid(g.gl_pathv[i])) + continue; + if (foutdatedl(TARGET, g.gl_pathv[i])) + dobuild = true; + strspushl(&cmd, g.gl_pathv[i]); + } + + if (dobuild) { + cmdput(cmd); + cmdexec(cmd); + } + + globfree(&g); + strsfree(&cmd); +} + +bool +tagvalid(const char *file) +{ + if (strstr(file, "-avx2.") != NULL && (simd_flags & SIMD_AVX2) == 0) + return false; + if (strstr(file, "-neon.") != NULL && (simd_flags & SIMD_NEON) == 0) + return false; + if (strstr(file, "-sse4_1.") != NULL && (simd_flags & SIMD_SSE4_1) == 0) + return false; + return true; +} + +void +ckd_cpu_flags(void) +{ + if (!rflag) + return; +#if __GNUC__ && __x86_64__ + uint32_t exx; + + asm volatile("cpuid" : "=b"(exx) : "a"(7), "c"(0)); + if (exx & (1 << 5)) { + simd_flags |= SIMD_AVX2; + return; + } + + asm volatile("cpuid" : "=c"(exx) : "a"(1), "c"(0)); + if (exx & (1 << 19)) + simd_flags |= SIMD_SSE4_1; +#elif __ARM_NEON + simd_flags |= SIMD_NEON; +#endif +} + +int +globerr(const char *s, int e) +{ + fprintf(stderr, "glob: %s: %s\n", s, strerror(e)); + exit(EXIT_FAILURE); +} diff --git a/src/errors.c b/src/errors.c new file mode 100644 index 0000000..49eb11d --- /dev/null +++ b/src/errors.c @@ -0,0 +1,27 @@ +#include +#include +#include +#include +#include + +#include "errors.h" + +void +err(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + int save = errno; + flockfile(stderr); + + fputs("oryx: ", stderr); + vfprintf(stderr, fmt, ap); + if (fmt[strlen(fmt) - 1] == ':') + fprintf(stderr, " %s", strerror(save)); + fputc('\n', stderr); + fflush(stderr); + funlockfile(stderr); + va_end(ap); + exit(EXIT_FAILURE); +} diff --git a/src/errors.h b/src/errors.h new file mode 100644 index 0000000..69c8ea0 --- /dev/null +++ b/src/errors.h @@ -0,0 +1,6 @@ +#ifndef ORYX_ERRORS_H +#define ORYX_ERRORS_H + +void err(const char *, ...); + +#endif /* !ORYX_ERRORS_H */ diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..970202a --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,34 @@ +#include +#include +#include + +#include "errors.h" +#include "lexer.h" +#include "unicode.h" + +struct lexeme * +lexstring(const char *code, size_t codesz, size_t *lcnt) +{ + struct { + struct lexeme *p; + size_t len, buf; + } data = {0}; + +#if ORYX_SIMD + if (!utf8_validate_simd(code, codesz)) { +#endif + size_t off = utf8_validate_off(code, codesz); + if (off != 0) + err("Invalid UTF-8 at byte-offset %zu", off - 1); +#if ORYX_SIMD + } +#endif + + const char *end = code + codesz; + while (code < end) { + rune ch = utf8_decode(&code); + } + + *lcnt = data.len; + return data.p; +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..7271498 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,21 @@ +#ifndef ORYX_LEXER_H +#define ORYX_LEXER_H + +#include +#include + +enum { + LEXIDENT, +}; + +typedef uint8_t lexeme_kind; + +struct lexeme { + lexeme_kind kind; + const char *p; + size_t len; +}; + +struct lexeme *lexstring(const char *, size_t, size_t *); + +#endif /* !ORYX_LEXER_H */ diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..23b0471 --- /dev/null +++ b/src/main.c @@ -0,0 +1,68 @@ +#include + +#include +#include +#include +#include + +#include "errors.h" +#include "lexer.h" + +static char *readfile(const char *, size_t *); + +int +main(int argc, char **argv) +{ + if (argc != 2) { + fputs("Usage: oryx file\n", stderr); + exit(EXIT_FAILURE); + } + + struct { + char *p; + size_t len; + } file = { + .p = readfile(argv[1], &file.len), + }; + + struct { + struct lexeme *p; + size_t len; + } toks = { + .p = lexstring(file.p, file.len, &toks.len), + }; + +#if DEBUG + free(file.p); + free(toks.p); +#endif + return EXIT_SUCCESS; +} + +char * +readfile(const char *filename, size_t *n) +{ + int fd = open(filename, O_RDONLY); + if (fd == -1) + err("open: %s", filename); + + struct stat sb; + if (fstat(fd, &sb) == -1) + err("fstat: %s", filename); + + char *p = malloc(sb.st_size + 4); + if (p == NULL) + err("malloc:"); + + ssize_t nr; + for (size_t off = 0; (nr = read(fd, p + off, sb.st_blksize)) > 0; off += nr) + ; + if (nr == -1) + err("read: %s", filename); + for (int i = 0; i < 4; i++) + p[sb.st_size + i] = 0; + + *n = sb.st_size; + close(fd); + return p; +} diff --git a/src/unicode-avx2.c b/src/unicode-avx2.c new file mode 100644 index 0000000..6507ca2 --- /dev/null +++ b/src/unicode-avx2.c @@ -0,0 +1,152 @@ +#include +#include + +#include "unicode.h" + +#pragma GCC diagnostic ignored "-Woverflow" + +static const int8_t _first_len_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, +}; + +static const int8_t _first_range_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, +}; + +static const int8_t _range_min_tbl[] = { + 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, 0xC2, 0x7F, 0x7F, + 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, + 0x90, 0x80, 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, +}; +static const int8_t _range_max_tbl[] = { + 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, 0xF4, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, + 0xBF, 0x8F, 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +}; + +static const int8_t _df_ee_tbl[] = { + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, +}; + +static const int8_t _ef_fe_tbl[] = { + 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static inline __m256i +push_last_byte_of_a_to_b(__m256i a, __m256i b) +{ + return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15); +} + +static inline __m256i +push_last_2bytes_of_a_to_b(__m256i a, __m256i b) +{ + return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 14); +} + +static inline __m256i +push_last_3bytes_of_a_to_b(__m256i a, __m256i b) +{ + return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 13); +} + +bool +utf8_validate_simd(const char *data, size_t len) +{ + const unsigned char *s = data; + if (len >= 32) { + __m256i prev_input = _mm256_set1_epi8(0); + __m256i prev_first_len = _mm256_set1_epi8(0); + + const __m256i first_len_tbl = _mm256_loadu_si256( + (const __m256i *)_first_len_tbl); + const __m256i first_range_tbl = _mm256_loadu_si256( + (const __m256i *)_first_range_tbl); + const __m256i range_min_tbl = _mm256_loadu_si256( + (const __m256i *)_range_min_tbl); + const __m256i range_max_tbl = _mm256_loadu_si256( + (const __m256i *)_range_max_tbl); + const __m256i df_ee_tbl = _mm256_loadu_si256( + (const __m256i *)_df_ee_tbl); + const __m256i ef_fe_tbl = _mm256_loadu_si256( + (const __m256i *)_ef_fe_tbl); + + __m256i error1 = _mm256_set1_epi8(0); + __m256i error2 = _mm256_set1_epi8(0); + + while (len >= 32) { + const __m256i input = _mm256_loadu_si256((const __m256i *)s); + + const __m256i high_nibbles = _mm256_and_si256( + _mm256_srli_epi16(input, 4), _mm256_set1_epi8(0x0F)); + + __m256i first_len = _mm256_shuffle_epi8(first_len_tbl, + high_nibbles); + + __m256i range = _mm256_shuffle_epi8(first_range_tbl, high_nibbles); + + range = _mm256_or_si256( + range, push_last_byte_of_a_to_b(prev_first_len, first_len)); + + __m256i tmp1, tmp2; + + tmp1 = push_last_2bytes_of_a_to_b(prev_first_len, first_len); + tmp2 = _mm256_subs_epu8(tmp1, _mm256_set1_epi8(1)); + + range = _mm256_or_si256(range, tmp2); + + tmp1 = push_last_3bytes_of_a_to_b(prev_first_len, first_len); + tmp2 = _mm256_subs_epu8(tmp1, _mm256_set1_epi8(2)); + range = _mm256_or_si256(range, tmp2); + + __m256i shift1, pos, range2; + + shift1 = push_last_byte_of_a_to_b(prev_input, input); + pos = _mm256_sub_epi8(shift1, _mm256_set1_epi8(0xEF)); + + tmp1 = _mm256_subs_epu8(pos, _mm256_set1_epi8(240)); + range2 = _mm256_shuffle_epi8(df_ee_tbl, tmp1); + tmp2 = _mm256_adds_epu8(pos, _mm256_set1_epi8(112)); + range2 = _mm256_add_epi8(range2, + _mm256_shuffle_epi8(ef_fe_tbl, tmp2)); + + range = _mm256_add_epi8(range, range2); + + __m256i minv = _mm256_shuffle_epi8(range_min_tbl, range); + __m256i maxv = _mm256_shuffle_epi8(range_max_tbl, range); + + error1 = _mm256_or_si256(error1, _mm256_cmpgt_epi8(minv, input)); + error2 = _mm256_or_si256(error2, _mm256_cmpgt_epi8(input, maxv)); + + prev_input = input; + prev_first_len = first_len; + + s += 32; + len -= 32; + } + + __m256i error = _mm256_or_si256(error1, error2); + if (!_mm256_testz_si256(error, error)) + return false; + + int32_t token4 = _mm256_extract_epi32(prev_input, 7); + const int8_t *token = (const int8_t *)&token4; + int lookahead = 0; + if (token[3] > (int8_t)0xBF) + lookahead = 1; + else if (token[2] > (int8_t)0xBF) + lookahead = 2; + else if (token[1] > (int8_t)0xBF) + lookahead = 3; + + s -= lookahead; + len += lookahead; + } + + /* Check remaining bytes with naïve method */ + return utf8_validate_off(s, len) == 0; +} diff --git a/src/unicode-neon.c b/src/unicode-neon.c new file mode 100644 index 0000000..2791117 --- /dev/null +++ b/src/unicode-neon.c @@ -0,0 +1,147 @@ +#include +#include + +#include "unicode.h" + +#pragma GCC diagnostic ignored "-Woverflow" + +static const uint8_t _first_len_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, +}; + +static const uint8_t _first_range_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, +}; + +static const uint8_t _range_min_tbl[] = { + 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, + 0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +}; +static const uint8_t _range_max_tbl[] = { + 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, + 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const uint8_t _range_adjust_tbl[] = { + 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, +}; + +bool +utf8_validate_simd(const char *data, size_t len) +{ + const unsigned char *s = data; + if (len >= 32) { + uint8x16_t prev_input = vdupq_n_u8(0); + uint8x16_t prev_first_len = vdupq_n_u8(0); + + const uint8x16_t first_len_tbl = vld1q_u8(_first_len_tbl); + const uint8x16_t first_range_tbl = vld1q_u8(_first_range_tbl); + const uint8x16_t range_min_tbl = vld1q_u8(_range_min_tbl); + const uint8x16_t range_max_tbl = vld1q_u8(_range_max_tbl); + const uint8x16x2_t range_adjust_tbl = vld2q_u8(_range_adjust_tbl); + + const uint8x16_t const_1 = vdupq_n_u8(1); + const uint8x16_t const_2 = vdupq_n_u8(2); + const uint8x16_t const_e0 = vdupq_n_u8(0xE0); + + uint8x16_t error1 = vdupq_n_u8(0); + uint8x16_t error2 = vdupq_n_u8(0); + uint8x16_t error3 = vdupq_n_u8(0); + uint8x16_t error4 = vdupq_n_u8(0); + + while (len >= 32) { +#if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 8) + /* GCC doesn't support vldq1_u8_x2 until version 8 */ + const uint8x16_t input_a = vld1q_u8(data); + const uint8x16_t input_b = vld1q_u8(data + 16); +#else + /* Forces a double load on Clang */ + const uint8x16x2_t input_pair = vld1q_u8_x2(s); + const uint8x16_t input_a = input_pair.val[0]; + const uint8x16_t input_b = input_pair.val[1]; +#endif + + const uint8x16_t high_nibbles_a = vshrq_n_u8(input_a, 4); + const uint8x16_t high_nibbles_b = vshrq_n_u8(input_b, 4); + + const uint8x16_t first_len_a = vqtbl1q_u8(first_len_tbl, + high_nibbles_a); + const uint8x16_t first_len_b = vqtbl1q_u8(first_len_tbl, + high_nibbles_b); + + uint8x16_t range_a = vqtbl1q_u8(first_range_tbl, high_nibbles_a); + uint8x16_t range_b = vqtbl1q_u8(first_range_tbl, high_nibbles_b); + + range_a = vorrq_u8(range_a, + vextq_u8(prev_first_len, first_len_a, 15)); + range_b = vorrq_u8(range_b, vextq_u8(first_len_a, first_len_b, 15)); + + uint8x16_t tmp1_a, tmp2_a, tmp1_b, tmp2_b; + tmp1_a = vextq_u8(prev_first_len, first_len_a, 14); + tmp1_a = vqsubq_u8(tmp1_a, const_1); + range_a = vorrq_u8(range_a, tmp1_a); + + tmp1_b = vextq_u8(first_len_a, first_len_b, 14); + tmp1_b = vqsubq_u8(tmp1_b, const_1); + range_b = vorrq_u8(range_b, tmp1_b); + + tmp2_a = vextq_u8(prev_first_len, first_len_a, 13); + tmp2_a = vqsubq_u8(tmp2_a, const_2); + range_a = vorrq_u8(range_a, tmp2_a); + + tmp2_b = vextq_u8(first_len_a, first_len_b, 13); + tmp2_b = vqsubq_u8(tmp2_b, const_2); + range_b = vorrq_u8(range_b, tmp2_b); + + uint8x16_t shift1_a = vextq_u8(prev_input, input_a, 15); + uint8x16_t pos_a = vsubq_u8(shift1_a, const_e0); + range_a = vaddq_u8(range_a, vqtbl2q_u8(range_adjust_tbl, pos_a)); + + uint8x16_t shift1_b = vextq_u8(input_a, input_b, 15); + uint8x16_t pos_b = vsubq_u8(shift1_b, const_e0); + range_b = vaddq_u8(range_b, vqtbl2q_u8(range_adjust_tbl, pos_b)); + + uint8x16_t minv_a = vqtbl1q_u8(range_min_tbl, range_a); + uint8x16_t maxv_a = vqtbl1q_u8(range_max_tbl, range_a); + + uint8x16_t minv_b = vqtbl1q_u8(range_min_tbl, range_b); + uint8x16_t maxv_b = vqtbl1q_u8(range_max_tbl, range_b); + + error1 = vorrq_u8(error1, vcltq_u8(input_a, minv_a)); + error2 = vorrq_u8(error2, vcgtq_u8(input_a, maxv_a)); + + error3 = vorrq_u8(error3, vcltq_u8(input_b, minv_b)); + error4 = vorrq_u8(error4, vcgtq_u8(input_b, maxv_b)); + + prev_input = input_b; + prev_first_len = first_len_b; + + s += 32; + len -= 32; + } + error1 = vorrq_u8(error1, error2); + error1 = vorrq_u8(error1, error3); + error1 = vorrq_u8(error1, error4); + + if (vmaxvq_u8(error1)) + return -1; + + uint32_t token4; + vst1q_lane_u32(&token4, vreinterpretq_u32_u8(prev_input), 3); + + const int8_t *token = (const int8_t *)&token4; + int lookahead = 0; + if (token[3] > (int8_t)0xBF) + lookahead = 1; + else if (token[2] > (int8_t)0xBF) + lookahead = 2; + else if (token[1] > (int8_t)0xBF) + lookahead = 3; + + s -= lookahead; + len += lookahead; + } + + return utf8_validate_off(s, len) == 0; +} diff --git a/src/unicode-sse4_1.c b/src/unicode-sse4_1.c new file mode 100644 index 0000000..17a46a8 --- /dev/null +++ b/src/unicode-sse4_1.c @@ -0,0 +1,158 @@ +#include +#include +#include + +#include "unicode.h" + +#pragma GCC diagnostic ignored "-Woverflow" + +static const int8_t _first_len_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, +}; + +static const int8_t _first_range_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, +}; + +static const int8_t _range_min_tbl[] = { + 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, + 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, +}; +static const int8_t _range_max_tbl[] = { + 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, + 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +}; + +static const int8_t _df_ee_tbl[] = { + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, +}; +static const int8_t _ef_fe_tbl[] = { + 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* Return 0 on success, -1 on error */ +bool +utf8_validate_simd(const char *data, size_t len) +{ + const unsigned char *s = data; + if (len >= 32) { + __m128i prev_input = _mm_set1_epi8(0); + __m128i prev_first_len = _mm_set1_epi8(0); + + const __m128i first_len_tbl = _mm_loadu_si128( + (const __m128i *)_first_len_tbl); + const __m128i first_range_tbl = _mm_loadu_si128( + (const __m128i *)_first_range_tbl); + const __m128i range_min_tbl = _mm_loadu_si128( + (const __m128i *)_range_min_tbl); + const __m128i range_max_tbl = _mm_loadu_si128( + (const __m128i *)_range_max_tbl); + const __m128i df_ee_tbl = _mm_loadu_si128((const __m128i *)_df_ee_tbl); + const __m128i ef_fe_tbl = _mm_loadu_si128((const __m128i *)_ef_fe_tbl); + + __m128i error = _mm_set1_epi8(0); + + while (len >= 32) { + /***************************** block 1 ****************************/ + const __m128i input_a = _mm_loadu_si128((const __m128i *)s); + + __m128i high_nibbles = _mm_and_si128(_mm_srli_epi16(input_a, 4), + _mm_set1_epi8(0x0F)); + + __m128i first_len_a = _mm_shuffle_epi8(first_len_tbl, high_nibbles); + + __m128i range_a = _mm_shuffle_epi8(first_range_tbl, high_nibbles); + + range_a = _mm_or_si128( + range_a, _mm_alignr_epi8(first_len_a, prev_first_len, 15)); + + __m128i tmp; + tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 14); + tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1)); + range_a = _mm_or_si128(range_a, tmp); + + tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 13); + tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2)); + range_a = _mm_or_si128(range_a, tmp); + + __m128i shift1, pos, range2; + shift1 = _mm_alignr_epi8(input_a, prev_input, 15); + pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF)); + tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0)); + range2 = _mm_shuffle_epi8(df_ee_tbl, tmp); + tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70)); + range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp)); + + range_a = _mm_add_epi8(range_a, range2); + + __m128i minv = _mm_shuffle_epi8(range_min_tbl, range_a); + __m128i maxv = _mm_shuffle_epi8(range_max_tbl, range_a); + + tmp = _mm_or_si128(_mm_cmplt_epi8(input_a, minv), + _mm_cmpgt_epi8(input_a, maxv)); + error = _mm_or_si128(error, tmp); + + /***************************** block 2 ****************************/ + const __m128i input_b = _mm_loadu_si128((const __m128i *)(s + 16)); + + high_nibbles = _mm_and_si128(_mm_srli_epi16(input_b, 4), + _mm_set1_epi8(0x0F)); + + __m128i first_len_b = _mm_shuffle_epi8(first_len_tbl, high_nibbles); + + __m128i range_b = _mm_shuffle_epi8(first_range_tbl, high_nibbles); + + range_b = _mm_or_si128( + range_b, _mm_alignr_epi8(first_len_b, first_len_a, 15)); + + tmp = _mm_alignr_epi8(first_len_b, first_len_a, 14); + tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1)); + range_b = _mm_or_si128(range_b, tmp); + + tmp = _mm_alignr_epi8(first_len_b, first_len_a, 13); + tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2)); + range_b = _mm_or_si128(range_b, tmp); + + shift1 = _mm_alignr_epi8(input_b, input_a, 15); + pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF)); + tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0)); + range2 = _mm_shuffle_epi8(df_ee_tbl, tmp); + tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70)); + range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp)); + + range_b = _mm_add_epi8(range_b, range2); + + minv = _mm_shuffle_epi8(range_min_tbl, range_b); + maxv = _mm_shuffle_epi8(range_max_tbl, range_b); + + tmp = _mm_or_si128(_mm_cmplt_epi8(input_b, minv), + _mm_cmpgt_epi8(input_b, maxv)); + error = _mm_or_si128(error, tmp); + + /************************ next iteration **************************/ + prev_input = input_b; + prev_first_len = first_len_b; + + s += 32; + len -= 32; + } + + if (!_mm_testz_si128(error, error)) + return false; + + int32_t token4 = _mm_extract_epi32(prev_input, 3); + const int8_t *token = (const int8_t *)&token4; + int lookahead = 0; + if (token[3] > (int8_t)0xBF) + lookahead = 1; + else if (token[2] > (int8_t)0xBF) + lookahead = 2; + else if (token[1] > (int8_t)0xBF) + lookahead = 3; + + s -= lookahead; + len += lookahead; + } + + return utf8_validate_off(s, len) == 0; +} diff --git a/src/unicode.c b/src/unicode.c new file mode 100644 index 0000000..e1faa55 --- /dev/null +++ b/src/unicode.c @@ -0,0 +1,59 @@ +#include "unicode.h" + +/* Branchless UTF-8 decoding and validation by Christopher Wellons. + + You can find the original source with comments at + https://github.com/skeeto/branchless-utf8. */ + +static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; +static const rune mins[] = {RUNE_C(4194304), 0, 128, 2048, RUNE_C(65536)}; +static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; +static const int shiftc[] = {0, 18, 12, 6, 0}; +static const int shifte[] = {0, 6, 4, 2, 0}; + +rune +utf8_decode(const char **buf) +{ + const unsigned char *s = *buf; + int len = lengths[s[0] >> 3]; + *buf = s + len + !len; + + rune c = (rune)(s[0] & masks[len]) << 18; + c |= (rune)(s[1] & 0x3f) << 12; + c |= (rune)(s[2] & 0x3f) << 6; + c |= (rune)(s[3] & 0x3f) << 0; + return c >> shiftc[len]; +} + +size_t +utf8_validate_off(const char *buf, size_t len) +{ + const char *start = buf, *end = start + len; + while (buf < end) { + const unsigned char *s = buf; + int len = lengths[s[0] >> 3]; + + const unsigned char *next = s + len + !len; + + rune c = (rune)(s[0] & masks[len]) << 18; + c |= (rune)(s[1] & 0x3f) << 12; + c |= (rune)(s[2] & 0x3f) << 6; + c |= (rune)(s[3] & 0x3f) << 0; + c >>= shiftc[len]; + + int e = (c < mins[len]) << 6; + e |= ((c >> 11) == 0x1B) << 7; + e |= (c > 0x10FFFF) << 8; + e |= (s[1] & 0xC0) >> 2; + e |= (s[2] & 0xC0) >> 4; + e |= (s[3]) >> 6; + e ^= 0x2A; + e >>= shifte[len]; + if (e != 0) + return buf - start + 1; + buf = next; + } + + return 0; +} diff --git a/src/unicode.h b/src/unicode.h new file mode 100644 index 0000000..701c8c7 --- /dev/null +++ b/src/unicode.h @@ -0,0 +1,17 @@ +#ifndef ORYX_UNICODE_H +#define ORYX_UNICODE_H + +#include +#include +#include + +#define RUNE_C(x) UINT32_C(x) +typedef uint32_t rune; + +rune utf8_decode(const char **); +size_t utf8_validate_off(const char *, size_t); +#if ORYX_SIMD +bool utf8_validate_simd(const char *, size_t); +#endif + +#endif /* !ORYX_UNICODE_H */ -- cgit v1.2.3