diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-06-08 01:37:17 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-06-08 01:40:42 +0200 |
commit | 93e6bb1bf6b12eaf3c07eb1b2b702aa1e95be48a (patch) | |
tree | 5ed75f5ece5145923cef2598410ad8cd2f1968ae |
Genesis commit
-rw-r--r-- | .exrc | 1 | ||||
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | LICENSE | 14 | ||||
-rw-r--r-- | README | 1 | ||||
-rw-r--r-- | cbs.h | 596 | ||||
-rw-r--r-- | make.c | 232 | ||||
-rw-r--r-- | src/errors.c | 27 | ||||
-rw-r--r-- | src/errors.h | 6 | ||||
-rw-r--r-- | src/lexer.c | 34 | ||||
-rw-r--r-- | src/lexer.h | 21 | ||||
-rw-r--r-- | src/main.c | 68 | ||||
-rw-r--r-- | src/unicode-avx2.c | 152 | ||||
-rw-r--r-- | src/unicode-neon.c | 147 | ||||
-rw-r--r-- | src/unicode-sse4_1.c | 158 | ||||
-rw-r--r-- | src/unicode.c | 59 | ||||
-rw-r--r-- | src/unicode.h | 17 |
17 files changed, 1541 insertions, 0 deletions
@@ -0,0 +1 @@ +set makeprg=./make diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8892127 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.cache/ +compile_commands.json +make +*.o +oryx diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..cd5a7dd --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/mlib"] + path = vendor/mlib + url = https://github.com/Mango0x45/mlib.git @@ -0,0 +1,14 @@ +BSD Zero Clause License + +Copyright © 2024 Thomas Voss + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. @@ -0,0 +1 @@ +Oryx — Programming Made Better @@ -0,0 +1,596 @@ +#ifndef C_BUILD_SYSTEM_H +#define C_BUILD_SYSTEM_H + +#define _GNU_SOURCE +#include <sys/stat.h> +#include <sys/wait.h> + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#ifndef CBS_NO_THREADS +# include <pthread.h> +#endif +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wordexp.h> + +#define _vtoxs(...) ((char *[]){__VA_ARGS__}) + +#define lengthof(xs) (sizeof(xs) / sizeof(*(xs))) + +struct strs { + char **buf; + size_t len, cap; +}; + +enum pkg_config_flags { + PC_CFLAGS = 1 << 0, + PC_LIBS = 1 << 1, + PC_SHARED = 1 << 2, + PC_STATIC = 1 << 3, +}; + +void cbsinit(int, char **); +static void rebuild(const char *); /* Always call via macro wrapper */ +#define rebuild() rebuild(__FILE__) + +static void strsfree(struct strs *); +static void strszero(struct strs *); +static void strspush(struct strs *, char **, size_t); +static void strspushenv(struct strs *, const char *, char **, size_t); +#define strspushl(xs, ...) \ + strspush((xs), _vtoxs(__VA_ARGS__), lengthof(_vtoxs(__VA_ARGS__))) +#define strspushenvl(xs, ev, ...) \ + strspushenv((xs), (ev), _vtoxs(__VA_ARGS__), lengthof(_vtoxs(__VA_ARGS__))) + +static bool fexists(const char *); +static int fmdcmp(const char *, const char *); +static bool fmdolder(const char *, const char *); +static bool fmdnewer(const char *, const char *); +static bool foutdated(const char *, char **, size_t); +#define foutdatedl(s, ...) \ + foutdated(s, _vtoxs(__VA_ARGS__), lengthof(_vtoxs(__VA_ARGS__))) + +static int cmdexec(struct strs); +static pid_t cmdexec_async(struct strs); +static int cmdexec_read(struct strs, char **, size_t *); +static int cmdwait(pid_t); +static void cmdput(struct strs); +static void cmdfput(FILE *, struct strs); + +static char *swpext(const char *, const char *); +static bool pcquery(struct strs *, const char *, int); +static bool binexists(const char *); +static int nproc(void); + +#ifndef CBS_NO_THREADS +typedef void tjob(void *); +typedef void tjob_free(void *); + +struct _tqueue { + void *arg; + tjob *fn; + tjob_free *free; + struct _tqueue *next; +}; + +typedef struct { + bool stop; + size_t tcnt, left; + pthread_t *thrds; + pthread_cond_t cnd; + pthread_mutex_t mtx; + struct _tqueue *head, *tail; +} tpool; + +static void tpinit(tpool *, size_t); +static void tpfree(tpool *); +static void tpwait(tpool *); +static void tpenq(tpool *, tjob *, void *, tjob_free *); +#endif /* !CBS_NO_THREADS */ + +static int _cbs_argc; +static char **_cbs_argv; + +/* Implementation */ + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#ifdef __APPLE__ +# define st_mtim st_mtimespec +#endif + +void +cbsinit(int argc, char **argv) +{ + _cbs_argc = argc; + _cbs_argv = malloc(sizeof(char *) * (argc + 1)); + assert(_cbs_argv != NULL); + for (int i = 0; i < argc; i++) { + _cbs_argv[i] = strdup(argv[i]); + assert(_cbs_argv[i] != NULL); + } + _cbs_argv[argc] = NULL; + + char *s = strrchr(_cbs_argv[0], '/'); + if (s != NULL) { + s[0] = 0; + assert(chdir(_cbs_argv[0]) != -1); + s[0] = '/'; + } +} + +void +(rebuild)(const char *path) +{ + char *src, *dst; + + if ((src = strrchr(path, '/')) != NULL) + src++; + else + src = (char *)path; + + if ((dst = strrchr(*_cbs_argv, '/')) != NULL) + dst++; + else + dst = *_cbs_argv; + + if (!foutdatedl(dst, src)) + return; + + struct strs xs = {0}; + strspushenvl(&xs, "CC", "cc"); +#ifndef CBS_NO_THREADS + strspushl(&xs, "-lpthread"); +#endif + strspushl(&xs, "-o", dst, src); + cmdput(xs); + assert(cmdexec(xs) == EXIT_SUCCESS); + + execvp(*_cbs_argv, _cbs_argv); + assert(!"failed to execute process"); +} + +void +strsfree(struct strs *xs) +{ + free(xs->buf); + xs->buf = NULL; + xs->len = xs->cap = 0; +} + +void +strszero(struct strs *xs) +{ + xs->len = 0; +} + +void +strspush(struct strs *xs, char **ys, size_t n) +{ + if (n == 0) + return; + + if (xs->len + n >= xs->cap) { + xs->cap = (xs->len + n) * 2; + xs->buf = realloc(xs->buf, sizeof(char *) * (xs->cap + 1)); + assert(xs->buf != NULL); + } + memcpy(xs->buf + xs->len, ys, n * sizeof(char *)); + xs->len += n; + + assert(xs->len <= xs->cap); + xs->buf[xs->len] = NULL; +} + +void +strspushenv(struct strs *xs, const char *ev, char **ys, size_t n) +{ + /* NOTE: Do your best to NOT modify any pushed envvar! */ + char *p = getenv(ev); + if (p == NULL || *p == 0) + strspush(xs, ys, n); + else + strspush(xs, &p, 1); +} + +bool +fexists(const char *f) +{ + return !access(f, F_OK); +} + +int +fmdcmp(const char *lhs, const char *rhs) +{ + struct stat sbl, sbr; + + assert(stat(lhs, &sbl) != -1); + assert(stat(rhs, &sbr) != -1); + + return sbl.st_mtim.tv_sec == sbr.st_mtim.tv_sec + ? sbl.st_mtim.tv_nsec - sbr.st_mtim.tv_nsec + : sbl.st_mtim.tv_sec - sbr.st_mtim.tv_sec; +} + +bool +fmdnewer(const char *lhs, const char *rhs) +{ + return fmdcmp(lhs, rhs) > 0; +} + +bool +fmdolder(const char *lhs, const char *rhs) +{ + return fmdcmp(lhs, rhs) < 0; +} + +bool +foutdated(const char *src, char **deps, size_t n) +{ + if (!fexists(src)) + return true; + for (size_t i = 0; i < n; i++) { + if (fmdolder(src, deps[i])) + return true; + } + return false; +} + +int +cmdexec(struct strs xs) +{ + return cmdwait(cmdexec_async(xs)); +} + +pid_t +cmdexec_async(struct strs xs) +{ + pid_t pid = fork(); + assert(pid != -1); + if (pid == 0) { + execvp(xs.buf[0], xs.buf); + assert(!"failed to execute process"); + } + return pid; +} + +int +cmdexec_read(struct strs xs, char **p, size_t *n) +{ + enum { + R, + W, + }; + int fds[2]; + + assert(pipe(fds) != -1); + + pid_t pid = fork(); + assert(pid != -1); + + if (pid == 0) { + close(fds[R]); + close(STDOUT_FILENO); + assert(dup2(fds[W], STDOUT_FILENO) != -1); + execvp(xs.buf[0], xs.buf); + assert(!"failed to execute process"); + } + + close(fds[W]); + + struct stat sb; + assert(fstat(fds[R], &sb) != -1); + + *p = NULL, *n = 0; + char *buf = malloc(sb.st_blksize); + assert(buf != NULL); + + for (;;) { + ssize_t nr; + if ((nr = read(fds[R], buf, sb.st_blksize)) == 0) + break; + assert(nr != -1); + + *p = realloc(*p, *n + nr + 1); + assert(*p != NULL); + + memcpy(*p + *n, buf, nr); + *n += nr; + } + + close(fds[R]); + if (buf != NULL) + buf[*n] = 0; + free(buf); + + return cmdwait(pid); +} + +int +cmdwait(pid_t pid) +{ + int ws; + assert(waitpid(pid, &ws, 0) != -1); + if (WIFEXITED(ws)) + return WEXITSTATUS(ws); + return WIFEXITED(ws) ? WEXITSTATUS(ws) : 256; +} + +/* import shlex + + s = '#define _SHELL_SAFE "' + for c in map(chr, range(128)): + if not shlex._find_unsafe(c): + s += c + print(s + '"') */ +#define _SHELL_SAFE \ + "%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" + +void +cmdput(struct strs xs) +{ + cmdfput(stdout, xs); +} + +void +cmdfput(FILE *fp, struct strs xs) +{ + flockfile(fp); + for (size_t i = 0; i < xs.len; i++) { + bool safe = true; + char *p, *q; + + p = q = xs.buf[i]; + for (; *q; q++) { + if (!strchr(_SHELL_SAFE, *q)) { + safe = false; + break; + } + } + + if (safe) + fputs(p, fp); + else { + putc('\'', fp); + for (q = p; *q; q++) { + if (*q == '\'') + fputs("'\"'\"'", fp); + else + putc(*q, fp); + } + putc('\'', fp); + } + + putc(i == xs.len - 1 ? '\n' : ' ', fp); + } + funlockfile(fp); +} + +bool +pcquery(struct strs *xs, const char *lib, int flags) +{ + struct strs ys = {0}; + + strspushl(&ys, "pkg-config", "--silence-errors"); + if (flags & PC_CFLAGS) + strspushl(&ys, "--cflags"); + if (flags & PC_LIBS) + strspushl(&ys, "--libs"); + if (flags & PC_SHARED) + strspushl(&ys, "--shared"); + if (flags & PC_STATIC) + strspushl(&ys, "--static"); + strspushl(&ys, (char *)lib); + + char *buf; + size_t bufsz; + int ec = cmdexec_read(ys, &buf, &bufsz); + strsfree(&ys); + if (ec != EXIT_SUCCESS) + return false; + + /* Remove trailing newline */ + buf[bufsz - 1] = 0; + + wordexp_t we; + assert(wordexp(buf, &we, WRDE_NOCMD) == 0); + + char **words = malloc(sizeof(char *) * we.we_wordc); + assert(words != NULL); + for (size_t i = 0; i < we.we_wordc; i++) + assert((words[i] = strdup(we.we_wordv[i])) != NULL); + + strspush(xs, words, we.we_wordc); + wordfree(&we); + free(buf); + return true; +} + +bool +binexists(const char *s) +{ + const char *path = getenv("PATH"); + assert(path != NULL); + + char *p = strdup(path), *it; + assert(p != NULL); + + for (it = strtok(p, ":"); it != NULL; it = strtok(NULL, ":")) { + static char buf[PATH_MAX]; + memset(buf, 0, sizeof(buf)); + snprintf(buf, sizeof(buf), "%s/%s", it, s); + if (fexists(buf)) { + free(p); + return true; + } + } + + free(p); + return false; +} + +int +nproc(void) +{ +#ifdef _SC_NPROCESSORS_ONLN + return (int)sysconf(_SC_NPROCESSORS_ONLN); +#else + errno = 0; + return -1; +#endif +} + +char * +swpext(const char *file, const char *ext) +{ + const char *p = strrchr(file, '.'); + if (p == NULL) { + p = strdup(file); + assert(p != NULL); + return (char *)p; + } + + size_t noextlen = p - file; + char *s = malloc(noextlen + strlen(ext) + 2); + assert(s != NULL); + sprintf(s, "%.*s.%s", (int)noextlen, file, ext); + return s; +} + +#ifndef CBS_NO_THREADS +static struct _tqueue * +_tpdeq(tpool *tp) +{ + struct _tqueue *q = tp->head; + + if (q != NULL) { + tp->head = tp->head->next; + if (!tp->head) + tp->tail = NULL; + } + + return q; +} + +static void * +_tpwork(void *arg) +{ + tpool *tp = arg; + + while (!tp->stop) { + struct _tqueue *q; + + pthread_mutex_lock(&tp->mtx); + while (!tp->stop && !tp->head) + pthread_cond_wait(&tp->cnd, &tp->mtx); + if (tp->stop) { + pthread_mutex_unlock(&tp->mtx); + break; + } + + q = _tpdeq(tp); + pthread_mutex_unlock(&tp->mtx); + + q->fn(q->arg); + if (q->free) + q->free(q->arg); + free(q); + + pthread_mutex_lock(&tp->mtx); + tp->left--; + pthread_cond_broadcast(&tp->cnd); + pthread_mutex_unlock(&tp->mtx); + } + + return NULL; +} + +void +tpinit(tpool *tp, size_t n) +{ + tp->tcnt = n; + tp->stop = false; + tp->left = 0; + tp->head = tp->tail = NULL; + tp->thrds = malloc(sizeof(pthread_t) * n); + assert(tp->thrds != NULL); + pthread_cond_init(&tp->cnd, NULL); + pthread_mutex_init(&tp->mtx, NULL); + for (size_t i = 0; i < n; i++) + assert(pthread_create(tp->thrds + i, NULL, _tpwork, tp) == 0); +} + +void +tpfree(tpool *tp) +{ + tp->stop = true; + + pthread_mutex_lock(&tp->mtx); + pthread_cond_broadcast(&tp->cnd); + pthread_mutex_unlock(&tp->mtx); + + for (size_t i = 0; i < tp->tcnt; i++) + pthread_join(tp->thrds[i], NULL); + + free(tp->thrds); + while (tp->head != NULL) { + struct _tqueue *q = _tpdeq(tp); + if (q->free) + q->free(q->arg); + free(q); + } + + pthread_cond_destroy(&tp->cnd); + pthread_mutex_destroy(&tp->mtx); +} + +void +tpwait(tpool *tp) +{ + pthread_mutex_lock(&tp->mtx); + while (!tp->stop && tp->left) + pthread_cond_wait(&tp->cnd, &tp->mtx); + pthread_mutex_unlock(&tp->mtx); +} + +void +tpenq(tpool *tp, tjob *fn, void *arg, tjob_free *free) +{ + struct _tqueue *q = malloc(sizeof(*q)); + assert(q != NULL); + *q = (struct _tqueue){ + .fn = fn, + .arg = arg, + .free = free, + }; + + pthread_mutex_lock(&tp->mtx); + if (tp->tail) + tp->tail->next = q; + if (!tp->head) + tp->head = q; + tp->tail = q; + tp->left++; + pthread_cond_signal(&tp->cnd); + pthread_mutex_unlock(&tp->mtx); +} +#endif /* !CBS_NO_THREADS */ + +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif + +#ifdef __APPLE__ +# undef st_mtim +#endif + +#endif /* !C_BUILD_SYSTEM_H */ @@ -0,0 +1,232 @@ +#define _GNU_SOURCE +#include <assert.h> +#include <glob.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "cbs.h" + +#define TARGET "oryx" + +enum { + SIMD_AVX2 = 1 << 0, + SIMD_NEON = 1 << 1, + SIMD_SSE4_1 = 1 << 2, +}; + +static char *cflags_all[] = { + "-pipe", + "-std=c99", + "-Wall", + "-Wextra", + "-Wno-attributes", + "-Wno-parentheses", + "-Wno-pointer-sign", + "-Wpedantic", + "-Wvla", +#if __GLIBC__ + "-D_GNU_SOURCE", +#endif +}; + +static char *cflags_dbg[] = { + "-DDEBUG=1", "-fsanitize=address,undefined", "-g3", "-ggdb3", "-O0", +}; + +static char *cflags_rls[] = { + "-DNDEBUG=1", "-flto", "-O3", +#ifndef __APPLE__ + "-march=native", "-mtune=native", +#endif +}; + +static char *argv0; +static bool fflag, rflag; +static int simd_flags; + +static void cc(void *); +static void ld(void); +static bool tagvalid(const char *); +static void ckd_cpu_flags(void); +static int globerr(const char *, int); + +static void +usage(void) +{ + fprintf(stderr, + "Usage: %s [-fr]\n" + " %s clean\n", + argv0, argv0); + exit(EXIT_FAILURE); +} + +int +main(int argc, char **argv) +{ + cbsinit(argc, argv); + rebuild(); + + argv0 = argv[0]; + + int opt; + while ((opt = getopt(argc, argv, "fr")) != -1) { + switch (opt) { + case 'f': + fflag = true; + break; + case 'r': + rflag = true; + break; + default: + usage(); + } + } + + argc -= optind; + argv += optind; + + struct strs cmd = {0}; + + if (argc > 0) { + if (strcmp("clean", *argv) == 0) { + strspushl(&cmd, "find", ".", "-name", TARGET, "-or", "-name", "*.o", + "-delete"); + cmdput(cmd); + cmdexec(cmd); + } else { + fprintf(stderr, "%s: invalid subcommand — ‘%s’\n", argv0, *argv); + usage(); + } + + return EXIT_SUCCESS; + } + + ckd_cpu_flags(); + + glob_t g; + assert(glob("src/*.c", 0, globerr, &g) == 0); + + int procs = nproc(); + if (procs == -1) + procs = 8; + + tpool tp; + tpinit(&tp, procs); + for (size_t i = 0; i < g.gl_pathc; i++) + tpenq(&tp, cc, g.gl_pathv[i], NULL); + tpwait(&tp); + tpfree(&tp); + + ld(); + + globfree(&g); + strsfree(&cmd); + return EXIT_SUCCESS; +} + +void +cc(void *arg) +{ + if (!tagvalid(arg)) + return; + + struct strs cmd = {0}; + char *dst = swpext(arg, "o"), *src = arg; + + if (!fflag && !foutdatedl(dst, src)) + goto out; + + strspushenvl(&cmd, "CC", "cc"); + strspush(&cmd, cflags_all, lengthof(cflags_all)); + if (rflag) + strspushenv(&cmd, "CFLAGS", cflags_rls, lengthof(cflags_rls)); + else + strspushenv(&cmd, "CFLAGS", cflags_dbg, lengthof(cflags_dbg)); + if (simd_flags != 0) + strspushl(&cmd, "-DORYX_SIMD=1"); + strspushl(&cmd, "-o", dst, "-c", src); + + cmdput(cmd); + cmdexec(cmd); + strsfree(&cmd); +out: + free(dst); +} + +void +ld(void) +{ + glob_t g; + bool dobuild = fflag; + struct strs cmd = {0}; + + strspushenvl(&cmd, "CC", "cc"); + strspush(&cmd, cflags_all, lengthof(cflags_all)); + if (rflag) + strspushenv(&cmd, "CFLAGS", cflags_rls, lengthof(cflags_rls)); + else + strspushenv(&cmd, "CFLAGS", cflags_dbg, lengthof(cflags_dbg)); + strspushl(&cmd, "-o", TARGET); + + assert(glob("src/*.o", 0, globerr, &g) == 0); + for (size_t i = 0; i < g.gl_pathc; i++) { + if (!tagvalid(g.gl_pathv[i])) + continue; + if (foutdatedl(TARGET, g.gl_pathv[i])) + dobuild = true; + strspushl(&cmd, g.gl_pathv[i]); + } + + if (dobuild) { + cmdput(cmd); + cmdexec(cmd); + } + + globfree(&g); + strsfree(&cmd); +} + +bool +tagvalid(const char *file) +{ + if (strstr(file, "-avx2.") != NULL && (simd_flags & SIMD_AVX2) == 0) + return false; + if (strstr(file, "-neon.") != NULL && (simd_flags & SIMD_NEON) == 0) + return false; + if (strstr(file, "-sse4_1.") != NULL && (simd_flags & SIMD_SSE4_1) == 0) + return false; + return true; +} + +void +ckd_cpu_flags(void) +{ + if (!rflag) + return; +#if __GNUC__ && __x86_64__ + uint32_t exx; + + asm volatile("cpuid" : "=b"(exx) : "a"(7), "c"(0)); + if (exx & (1 << 5)) { + simd_flags |= SIMD_AVX2; + return; + } + + asm volatile("cpuid" : "=c"(exx) : "a"(1), "c"(0)); + if (exx & (1 << 19)) + simd_flags |= SIMD_SSE4_1; +#elif __ARM_NEON + simd_flags |= SIMD_NEON; +#endif +} + +int +globerr(const char *s, int e) +{ + fprintf(stderr, "glob: %s: %s\n", s, strerror(e)); + exit(EXIT_FAILURE); +} diff --git a/src/errors.c b/src/errors.c new file mode 100644 index 0000000..49eb11d --- /dev/null +++ b/src/errors.c @@ -0,0 +1,27 @@ +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "errors.h" + +void +err(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + int save = errno; + flockfile(stderr); + + fputs("oryx: ", stderr); + vfprintf(stderr, fmt, ap); + if (fmt[strlen(fmt) - 1] == ':') + fprintf(stderr, " %s", strerror(save)); + fputc('\n', stderr); + fflush(stderr); + funlockfile(stderr); + va_end(ap); + exit(EXIT_FAILURE); +} diff --git a/src/errors.h b/src/errors.h new file mode 100644 index 0000000..69c8ea0 --- /dev/null +++ b/src/errors.h @@ -0,0 +1,6 @@ +#ifndef ORYX_ERRORS_H +#define ORYX_ERRORS_H + +void err(const char *, ...); + +#endif /* !ORYX_ERRORS_H */ diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..970202a --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,34 @@ +#include <inttypes.h> +#include <stddef.h> +#include <stdio.h> + +#include "errors.h" +#include "lexer.h" +#include "unicode.h" + +struct lexeme * +lexstring(const char *code, size_t codesz, size_t *lcnt) +{ + struct { + struct lexeme *p; + size_t len, buf; + } data = {0}; + +#if ORYX_SIMD + if (!utf8_validate_simd(code, codesz)) { +#endif + size_t off = utf8_validate_off(code, codesz); + if (off != 0) + err("Invalid UTF-8 at byte-offset %zu", off - 1); +#if ORYX_SIMD + } +#endif + + const char *end = code + codesz; + while (code < end) { + rune ch = utf8_decode(&code); + } + + *lcnt = data.len; + return data.p; +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..7271498 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,21 @@ +#ifndef ORYX_LEXER_H +#define ORYX_LEXER_H + +#include <stddef.h> +#include <stdint.h> + +enum { + LEXIDENT, +}; + +typedef uint8_t lexeme_kind; + +struct lexeme { + lexeme_kind kind; + const char *p; + size_t len; +}; + +struct lexeme *lexstring(const char *, size_t, size_t *); + +#endif /* !ORYX_LEXER_H */ diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..23b0471 --- /dev/null +++ b/src/main.c @@ -0,0 +1,68 @@ +#include <sys/stat.h> + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "errors.h" +#include "lexer.h" + +static char *readfile(const char *, size_t *); + +int +main(int argc, char **argv) +{ + if (argc != 2) { + fputs("Usage: oryx file\n", stderr); + exit(EXIT_FAILURE); + } + + struct { + char *p; + size_t len; + } file = { + .p = readfile(argv[1], &file.len), + }; + + struct { + struct lexeme *p; + size_t len; + } toks = { + .p = lexstring(file.p, file.len, &toks.len), + }; + +#if DEBUG + free(file.p); + free(toks.p); +#endif + return EXIT_SUCCESS; +} + +char * +readfile(const char *filename, size_t *n) +{ + int fd = open(filename, O_RDONLY); + if (fd == -1) + err("open: %s", filename); + + struct stat sb; + if (fstat(fd, &sb) == -1) + err("fstat: %s", filename); + + char *p = malloc(sb.st_size + 4); + if (p == NULL) + err("malloc:"); + + ssize_t nr; + for (size_t off = 0; (nr = read(fd, p + off, sb.st_blksize)) > 0; off += nr) + ; + if (nr == -1) + err("read: %s", filename); + for (int i = 0; i < 4; i++) + p[sb.st_size + i] = 0; + + *n = sb.st_size; + close(fd); + return p; +} diff --git a/src/unicode-avx2.c b/src/unicode-avx2.c new file mode 100644 index 0000000..6507ca2 --- /dev/null +++ b/src/unicode-avx2.c @@ -0,0 +1,152 @@ +#include <stdint.h> +#include <x86intrin.h> + +#include "unicode.h" + +#pragma GCC diagnostic ignored "-Woverflow" + +static const int8_t _first_len_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, +}; + +static const int8_t _first_range_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, +}; + +static const int8_t _range_min_tbl[] = { + 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, 0xC2, 0x7F, 0x7F, + 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, + 0x90, 0x80, 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, +}; +static const int8_t _range_max_tbl[] = { + 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, 0xF4, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, + 0xBF, 0x8F, 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +}; + +static const int8_t _df_ee_tbl[] = { + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, +}; + +static const int8_t _ef_fe_tbl[] = { + 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static inline __m256i +push_last_byte_of_a_to_b(__m256i a, __m256i b) +{ + return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15); +} + +static inline __m256i +push_last_2bytes_of_a_to_b(__m256i a, __m256i b) +{ + return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 14); +} + +static inline __m256i +push_last_3bytes_of_a_to_b(__m256i a, __m256i b) +{ + return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 13); +} + +bool +utf8_validate_simd(const char *data, size_t len) +{ + const unsigned char *s = data; + if (len >= 32) { + __m256i prev_input = _mm256_set1_epi8(0); + __m256i prev_first_len = _mm256_set1_epi8(0); + + const __m256i first_len_tbl = _mm256_loadu_si256( + (const __m256i *)_first_len_tbl); + const __m256i first_range_tbl = _mm256_loadu_si256( + (const __m256i *)_first_range_tbl); + const __m256i range_min_tbl = _mm256_loadu_si256( + (const __m256i *)_range_min_tbl); + const __m256i range_max_tbl = _mm256_loadu_si256( + (const __m256i *)_range_max_tbl); + const __m256i df_ee_tbl = _mm256_loadu_si256( + (const __m256i *)_df_ee_tbl); + const __m256i ef_fe_tbl = _mm256_loadu_si256( + (const __m256i *)_ef_fe_tbl); + + __m256i error1 = _mm256_set1_epi8(0); + __m256i error2 = _mm256_set1_epi8(0); + + while (len >= 32) { + const __m256i input = _mm256_loadu_si256((const __m256i *)s); + + const __m256i high_nibbles = _mm256_and_si256( + _mm256_srli_epi16(input, 4), _mm256_set1_epi8(0x0F)); + + __m256i first_len = _mm256_shuffle_epi8(first_len_tbl, + high_nibbles); + + __m256i range = _mm256_shuffle_epi8(first_range_tbl, high_nibbles); + + range = _mm256_or_si256( + range, push_last_byte_of_a_to_b(prev_first_len, first_len)); + + __m256i tmp1, tmp2; + + tmp1 = push_last_2bytes_of_a_to_b(prev_first_len, first_len); + tmp2 = _mm256_subs_epu8(tmp1, _mm256_set1_epi8(1)); + + range = _mm256_or_si256(range, tmp2); + + tmp1 = push_last_3bytes_of_a_to_b(prev_first_len, first_len); + tmp2 = _mm256_subs_epu8(tmp1, _mm256_set1_epi8(2)); + range = _mm256_or_si256(range, tmp2); + + __m256i shift1, pos, range2; + + shift1 = push_last_byte_of_a_to_b(prev_input, input); + pos = _mm256_sub_epi8(shift1, _mm256_set1_epi8(0xEF)); + + tmp1 = _mm256_subs_epu8(pos, _mm256_set1_epi8(240)); + range2 = _mm256_shuffle_epi8(df_ee_tbl, tmp1); + tmp2 = _mm256_adds_epu8(pos, _mm256_set1_epi8(112)); + range2 = _mm256_add_epi8(range2, + _mm256_shuffle_epi8(ef_fe_tbl, tmp2)); + + range = _mm256_add_epi8(range, range2); + + __m256i minv = _mm256_shuffle_epi8(range_min_tbl, range); + __m256i maxv = _mm256_shuffle_epi8(range_max_tbl, range); + + error1 = _mm256_or_si256(error1, _mm256_cmpgt_epi8(minv, input)); + error2 = _mm256_or_si256(error2, _mm256_cmpgt_epi8(input, maxv)); + + prev_input = input; + prev_first_len = first_len; + + s += 32; + len -= 32; + } + + __m256i error = _mm256_or_si256(error1, error2); + if (!_mm256_testz_si256(error, error)) + return false; + + int32_t token4 = _mm256_extract_epi32(prev_input, 7); + const int8_t *token = (const int8_t *)&token4; + int lookahead = 0; + if (token[3] > (int8_t)0xBF) + lookahead = 1; + else if (token[2] > (int8_t)0xBF) + lookahead = 2; + else if (token[1] > (int8_t)0xBF) + lookahead = 3; + + s -= lookahead; + len += lookahead; + } + + /* Check remaining bytes with naïve method */ + return utf8_validate_off(s, len) == 0; +} diff --git a/src/unicode-neon.c b/src/unicode-neon.c new file mode 100644 index 0000000..2791117 --- /dev/null +++ b/src/unicode-neon.c @@ -0,0 +1,147 @@ +#include <arm_neon.h> +#include <stdint.h> + +#include "unicode.h" + +#pragma GCC diagnostic ignored "-Woverflow" + +static const uint8_t _first_len_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, +}; + +static const uint8_t _first_range_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, +}; + +static const uint8_t _range_min_tbl[] = { + 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, + 0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +}; +static const uint8_t _range_max_tbl[] = { + 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, + 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const uint8_t _range_adjust_tbl[] = { + 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, +}; + +bool +utf8_validate_simd(const char *data, size_t len) +{ + const unsigned char *s = data; + if (len >= 32) { + uint8x16_t prev_input = vdupq_n_u8(0); + uint8x16_t prev_first_len = vdupq_n_u8(0); + + const uint8x16_t first_len_tbl = vld1q_u8(_first_len_tbl); + const uint8x16_t first_range_tbl = vld1q_u8(_first_range_tbl); + const uint8x16_t range_min_tbl = vld1q_u8(_range_min_tbl); + const uint8x16_t range_max_tbl = vld1q_u8(_range_max_tbl); + const uint8x16x2_t range_adjust_tbl = vld2q_u8(_range_adjust_tbl); + + const uint8x16_t const_1 = vdupq_n_u8(1); + const uint8x16_t const_2 = vdupq_n_u8(2); + const uint8x16_t const_e0 = vdupq_n_u8(0xE0); + + uint8x16_t error1 = vdupq_n_u8(0); + uint8x16_t error2 = vdupq_n_u8(0); + uint8x16_t error3 = vdupq_n_u8(0); + uint8x16_t error4 = vdupq_n_u8(0); + + while (len >= 32) { +#if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 8) + /* GCC doesn't support vldq1_u8_x2 until version 8 */ + const uint8x16_t input_a = vld1q_u8(data); + const uint8x16_t input_b = vld1q_u8(data + 16); +#else + /* Forces a double load on Clang */ + const uint8x16x2_t input_pair = vld1q_u8_x2(s); + const uint8x16_t input_a = input_pair.val[0]; + const uint8x16_t input_b = input_pair.val[1]; +#endif + + const uint8x16_t high_nibbles_a = vshrq_n_u8(input_a, 4); + const uint8x16_t high_nibbles_b = vshrq_n_u8(input_b, 4); + + const uint8x16_t first_len_a = vqtbl1q_u8(first_len_tbl, + high_nibbles_a); + const uint8x16_t first_len_b = vqtbl1q_u8(first_len_tbl, + high_nibbles_b); + + uint8x16_t range_a = vqtbl1q_u8(first_range_tbl, high_nibbles_a); + uint8x16_t range_b = vqtbl1q_u8(first_range_tbl, high_nibbles_b); + + range_a = vorrq_u8(range_a, + vextq_u8(prev_first_len, first_len_a, 15)); + range_b = vorrq_u8(range_b, vextq_u8(first_len_a, first_len_b, 15)); + + uint8x16_t tmp1_a, tmp2_a, tmp1_b, tmp2_b; + tmp1_a = vextq_u8(prev_first_len, first_len_a, 14); + tmp1_a = vqsubq_u8(tmp1_a, const_1); + range_a = vorrq_u8(range_a, tmp1_a); + + tmp1_b = vextq_u8(first_len_a, first_len_b, 14); + tmp1_b = vqsubq_u8(tmp1_b, const_1); + range_b = vorrq_u8(range_b, tmp1_b); + + tmp2_a = vextq_u8(prev_first_len, first_len_a, 13); + tmp2_a = vqsubq_u8(tmp2_a, const_2); + range_a = vorrq_u8(range_a, tmp2_a); + + tmp2_b = vextq_u8(first_len_a, first_len_b, 13); + tmp2_b = vqsubq_u8(tmp2_b, const_2); + range_b = vorrq_u8(range_b, tmp2_b); + + uint8x16_t shift1_a = vextq_u8(prev_input, input_a, 15); + uint8x16_t pos_a = vsubq_u8(shift1_a, const_e0); + range_a = vaddq_u8(range_a, vqtbl2q_u8(range_adjust_tbl, pos_a)); + + uint8x16_t shift1_b = vextq_u8(input_a, input_b, 15); + uint8x16_t pos_b = vsubq_u8(shift1_b, const_e0); + range_b = vaddq_u8(range_b, vqtbl2q_u8(range_adjust_tbl, pos_b)); + + uint8x16_t minv_a = vqtbl1q_u8(range_min_tbl, range_a); + uint8x16_t maxv_a = vqtbl1q_u8(range_max_tbl, range_a); + + uint8x16_t minv_b = vqtbl1q_u8(range_min_tbl, range_b); + uint8x16_t maxv_b = vqtbl1q_u8(range_max_tbl, range_b); + + error1 = vorrq_u8(error1, vcltq_u8(input_a, minv_a)); + error2 = vorrq_u8(error2, vcgtq_u8(input_a, maxv_a)); + + error3 = vorrq_u8(error3, vcltq_u8(input_b, minv_b)); + error4 = vorrq_u8(error4, vcgtq_u8(input_b, maxv_b)); + + prev_input = input_b; + prev_first_len = first_len_b; + + s += 32; + len -= 32; + } + error1 = vorrq_u8(error1, error2); + error1 = vorrq_u8(error1, error3); + error1 = vorrq_u8(error1, error4); + + if (vmaxvq_u8(error1)) + return -1; + + uint32_t token4; + vst1q_lane_u32(&token4, vreinterpretq_u32_u8(prev_input), 3); + + const int8_t *token = (const int8_t *)&token4; + int lookahead = 0; + if (token[3] > (int8_t)0xBF) + lookahead = 1; + else if (token[2] > (int8_t)0xBF) + lookahead = 2; + else if (token[1] > (int8_t)0xBF) + lookahead = 3; + + s -= lookahead; + len += lookahead; + } + + return utf8_validate_off(s, len) == 0; +} diff --git a/src/unicode-sse4_1.c b/src/unicode-sse4_1.c new file mode 100644 index 0000000..17a46a8 --- /dev/null +++ b/src/unicode-sse4_1.c @@ -0,0 +1,158 @@ +#include <stddef.h> +#include <stdint.h> +#include <x86intrin.h> + +#include "unicode.h" + +#pragma GCC diagnostic ignored "-Woverflow" + +static const int8_t _first_len_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, +}; + +static const int8_t _first_range_tbl[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, +}; + +static const int8_t _range_min_tbl[] = { + 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, + 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, +}; +static const int8_t _range_max_tbl[] = { + 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, + 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +}; + +static const int8_t _df_ee_tbl[] = { + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, +}; +static const int8_t _ef_fe_tbl[] = { + 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* Return 0 on success, -1 on error */ +bool +utf8_validate_simd(const char *data, size_t len) +{ + const unsigned char *s = data; + if (len >= 32) { + __m128i prev_input = _mm_set1_epi8(0); + __m128i prev_first_len = _mm_set1_epi8(0); + + const __m128i first_len_tbl = _mm_loadu_si128( + (const __m128i *)_first_len_tbl); + const __m128i first_range_tbl = _mm_loadu_si128( + (const __m128i *)_first_range_tbl); + const __m128i range_min_tbl = _mm_loadu_si128( + (const __m128i *)_range_min_tbl); + const __m128i range_max_tbl = _mm_loadu_si128( + (const __m128i *)_range_max_tbl); + const __m128i df_ee_tbl = _mm_loadu_si128((const __m128i *)_df_ee_tbl); + const __m128i ef_fe_tbl = _mm_loadu_si128((const __m128i *)_ef_fe_tbl); + + __m128i error = _mm_set1_epi8(0); + + while (len >= 32) { + /***************************** block 1 ****************************/ + const __m128i input_a = _mm_loadu_si128((const __m128i *)s); + + __m128i high_nibbles = _mm_and_si128(_mm_srli_epi16(input_a, 4), + _mm_set1_epi8(0x0F)); + + __m128i first_len_a = _mm_shuffle_epi8(first_len_tbl, high_nibbles); + + __m128i range_a = _mm_shuffle_epi8(first_range_tbl, high_nibbles); + + range_a = _mm_or_si128( + range_a, _mm_alignr_epi8(first_len_a, prev_first_len, 15)); + + __m128i tmp; + tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 14); + tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1)); + range_a = _mm_or_si128(range_a, tmp); + + tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 13); + tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2)); + range_a = _mm_or_si128(range_a, tmp); + + __m128i shift1, pos, range2; + shift1 = _mm_alignr_epi8(input_a, prev_input, 15); + pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF)); + tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0)); + range2 = _mm_shuffle_epi8(df_ee_tbl, tmp); + tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70)); + range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp)); + + range_a = _mm_add_epi8(range_a, range2); + + __m128i minv = _mm_shuffle_epi8(range_min_tbl, range_a); + __m128i maxv = _mm_shuffle_epi8(range_max_tbl, range_a); + + tmp = _mm_or_si128(_mm_cmplt_epi8(input_a, minv), + _mm_cmpgt_epi8(input_a, maxv)); + error = _mm_or_si128(error, tmp); + + /***************************** block 2 ****************************/ + const __m128i input_b = _mm_loadu_si128((const __m128i *)(s + 16)); + + high_nibbles = _mm_and_si128(_mm_srli_epi16(input_b, 4), + _mm_set1_epi8(0x0F)); + + __m128i first_len_b = _mm_shuffle_epi8(first_len_tbl, high_nibbles); + + __m128i range_b = _mm_shuffle_epi8(first_range_tbl, high_nibbles); + + range_b = _mm_or_si128( + range_b, _mm_alignr_epi8(first_len_b, first_len_a, 15)); + + tmp = _mm_alignr_epi8(first_len_b, first_len_a, 14); + tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1)); + range_b = _mm_or_si128(range_b, tmp); + + tmp = _mm_alignr_epi8(first_len_b, first_len_a, 13); + tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2)); + range_b = _mm_or_si128(range_b, tmp); + + shift1 = _mm_alignr_epi8(input_b, input_a, 15); + pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF)); + tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0)); + range2 = _mm_shuffle_epi8(df_ee_tbl, tmp); + tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70)); + range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp)); + + range_b = _mm_add_epi8(range_b, range2); + + minv = _mm_shuffle_epi8(range_min_tbl, range_b); + maxv = _mm_shuffle_epi8(range_max_tbl, range_b); + + tmp = _mm_or_si128(_mm_cmplt_epi8(input_b, minv), + _mm_cmpgt_epi8(input_b, maxv)); + error = _mm_or_si128(error, tmp); + + /************************ next iteration **************************/ + prev_input = input_b; + prev_first_len = first_len_b; + + s += 32; + len -= 32; + } + + if (!_mm_testz_si128(error, error)) + return false; + + int32_t token4 = _mm_extract_epi32(prev_input, 3); + const int8_t *token = (const int8_t *)&token4; + int lookahead = 0; + if (token[3] > (int8_t)0xBF) + lookahead = 1; + else if (token[2] > (int8_t)0xBF) + lookahead = 2; + else if (token[1] > (int8_t)0xBF) + lookahead = 3; + + s -= lookahead; + len += lookahead; + } + + return utf8_validate_off(s, len) == 0; +} diff --git a/src/unicode.c b/src/unicode.c new file mode 100644 index 0000000..e1faa55 --- /dev/null +++ b/src/unicode.c @@ -0,0 +1,59 @@ +#include "unicode.h" + +/* Branchless UTF-8 decoding and validation by Christopher Wellons. + + You can find the original source with comments at + https://github.com/skeeto/branchless-utf8. */ + +static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; +static const rune mins[] = {RUNE_C(4194304), 0, 128, 2048, RUNE_C(65536)}; +static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; +static const int shiftc[] = {0, 18, 12, 6, 0}; +static const int shifte[] = {0, 6, 4, 2, 0}; + +rune +utf8_decode(const char **buf) +{ + const unsigned char *s = *buf; + int len = lengths[s[0] >> 3]; + *buf = s + len + !len; + + rune c = (rune)(s[0] & masks[len]) << 18; + c |= (rune)(s[1] & 0x3f) << 12; + c |= (rune)(s[2] & 0x3f) << 6; + c |= (rune)(s[3] & 0x3f) << 0; + return c >> shiftc[len]; +} + +size_t +utf8_validate_off(const char *buf, size_t len) +{ + const char *start = buf, *end = start + len; + while (buf < end) { + const unsigned char *s = buf; + int len = lengths[s[0] >> 3]; + + const unsigned char *next = s + len + !len; + + rune c = (rune)(s[0] & masks[len]) << 18; + c |= (rune)(s[1] & 0x3f) << 12; + c |= (rune)(s[2] & 0x3f) << 6; + c |= (rune)(s[3] & 0x3f) << 0; + c >>= shiftc[len]; + + int e = (c < mins[len]) << 6; + e |= ((c >> 11) == 0x1B) << 7; + e |= (c > 0x10FFFF) << 8; + e |= (s[1] & 0xC0) >> 2; + e |= (s[2] & 0xC0) >> 4; + e |= (s[3]) >> 6; + e ^= 0x2A; + e >>= shifte[len]; + if (e != 0) + return buf - start + 1; + buf = next; + } + + return 0; +} diff --git a/src/unicode.h b/src/unicode.h new file mode 100644 index 0000000..701c8c7 --- /dev/null +++ b/src/unicode.h @@ -0,0 +1,17 @@ +#ifndef ORYX_UNICODE_H +#define ORYX_UNICODE_H + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#define RUNE_C(x) UINT32_C(x) +typedef uint32_t rune; + +rune utf8_decode(const char **); +size_t utf8_validate_off(const char *, size_t); +#if ORYX_SIMD +bool utf8_validate_simd(const char *, size_t); +#endif + +#endif /* !ORYX_UNICODE_H */ |