aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.exrc1
-rw-r--r--.gitignore5
-rw-r--r--.gitmodules3
-rw-r--r--LICENSE14
-rw-r--r--README1
-rw-r--r--cbs.h596
-rw-r--r--make.c232
-rw-r--r--src/errors.c27
-rw-r--r--src/errors.h6
-rw-r--r--src/lexer.c34
-rw-r--r--src/lexer.h21
-rw-r--r--src/main.c68
-rw-r--r--src/unicode-avx2.c152
-rw-r--r--src/unicode-neon.c147
-rw-r--r--src/unicode-sse4_1.c158
-rw-r--r--src/unicode.c59
-rw-r--r--src/unicode.h17
17 files changed, 1541 insertions, 0 deletions
diff --git a/.exrc b/.exrc
new file mode 100644
index 0000000..432baf7
--- /dev/null
+++ b/.exrc
@@ -0,0 +1 @@
+set makeprg=./make
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8892127
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.cache/
+compile_commands.json
+make
+*.o
+oryx
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..cd5a7dd
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "vendor/mlib"]
+ path = vendor/mlib
+ url = https://github.com/Mango0x45/mlib.git
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..b946725
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,14 @@
+BSD Zero Clause License
+
+Copyright © 2024 Thomas Voss
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
diff --git a/README b/README
new file mode 100644
index 0000000..fe2ab13
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+Oryx — Programming Made Better
diff --git a/cbs.h b/cbs.h
new file mode 100644
index 0000000..492150c
--- /dev/null
+++ b/cbs.h
@@ -0,0 +1,596 @@
+#ifndef C_BUILD_SYSTEM_H
+#define C_BUILD_SYSTEM_H
+
+#define _GNU_SOURCE
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#ifndef CBS_NO_THREADS
+# include <pthread.h>
+#endif
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wordexp.h>
+
+#define _vtoxs(...) ((char *[]){__VA_ARGS__})
+
+#define lengthof(xs) (sizeof(xs) / sizeof(*(xs)))
+
+struct strs {
+ char **buf;
+ size_t len, cap;
+};
+
+enum pkg_config_flags {
+ PC_CFLAGS = 1 << 0,
+ PC_LIBS = 1 << 1,
+ PC_SHARED = 1 << 2,
+ PC_STATIC = 1 << 3,
+};
+
+void cbsinit(int, char **);
+static void rebuild(const char *); /* Always call via macro wrapper */
+#define rebuild() rebuild(__FILE__)
+
+static void strsfree(struct strs *);
+static void strszero(struct strs *);
+static void strspush(struct strs *, char **, size_t);
+static void strspushenv(struct strs *, const char *, char **, size_t);
+#define strspushl(xs, ...) \
+ strspush((xs), _vtoxs(__VA_ARGS__), lengthof(_vtoxs(__VA_ARGS__)))
+#define strspushenvl(xs, ev, ...) \
+ strspushenv((xs), (ev), _vtoxs(__VA_ARGS__), lengthof(_vtoxs(__VA_ARGS__)))
+
+static bool fexists(const char *);
+static int fmdcmp(const char *, const char *);
+static bool fmdolder(const char *, const char *);
+static bool fmdnewer(const char *, const char *);
+static bool foutdated(const char *, char **, size_t);
+#define foutdatedl(s, ...) \
+ foutdated(s, _vtoxs(__VA_ARGS__), lengthof(_vtoxs(__VA_ARGS__)))
+
+static int cmdexec(struct strs);
+static pid_t cmdexec_async(struct strs);
+static int cmdexec_read(struct strs, char **, size_t *);
+static int cmdwait(pid_t);
+static void cmdput(struct strs);
+static void cmdfput(FILE *, struct strs);
+
+static char *swpext(const char *, const char *);
+static bool pcquery(struct strs *, const char *, int);
+static bool binexists(const char *);
+static int nproc(void);
+
+#ifndef CBS_NO_THREADS
+typedef void tjob(void *);
+typedef void tjob_free(void *);
+
+struct _tqueue {
+ void *arg;
+ tjob *fn;
+ tjob_free *free;
+ struct _tqueue *next;
+};
+
+typedef struct {
+ bool stop;
+ size_t tcnt, left;
+ pthread_t *thrds;
+ pthread_cond_t cnd;
+ pthread_mutex_t mtx;
+ struct _tqueue *head, *tail;
+} tpool;
+
+static void tpinit(tpool *, size_t);
+static void tpfree(tpool *);
+static void tpwait(tpool *);
+static void tpenq(tpool *, tjob *, void *, tjob_free *);
+#endif /* !CBS_NO_THREADS */
+
+static int _cbs_argc;
+static char **_cbs_argv;
+
+/* Implementation */
+
+#ifdef __GNUC__
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#ifdef __APPLE__
+# define st_mtim st_mtimespec
+#endif
+
+void
+cbsinit(int argc, char **argv)
+{
+ _cbs_argc = argc;
+ _cbs_argv = malloc(sizeof(char *) * (argc + 1));
+ assert(_cbs_argv != NULL);
+ for (int i = 0; i < argc; i++) {
+ _cbs_argv[i] = strdup(argv[i]);
+ assert(_cbs_argv[i] != NULL);
+ }
+ _cbs_argv[argc] = NULL;
+
+ char *s = strrchr(_cbs_argv[0], '/');
+ if (s != NULL) {
+ s[0] = 0;
+ assert(chdir(_cbs_argv[0]) != -1);
+ s[0] = '/';
+ }
+}
+
+void
+(rebuild)(const char *path)
+{
+ char *src, *dst;
+
+ if ((src = strrchr(path, '/')) != NULL)
+ src++;
+ else
+ src = (char *)path;
+
+ if ((dst = strrchr(*_cbs_argv, '/')) != NULL)
+ dst++;
+ else
+ dst = *_cbs_argv;
+
+ if (!foutdatedl(dst, src))
+ return;
+
+ struct strs xs = {0};
+ strspushenvl(&xs, "CC", "cc");
+#ifndef CBS_NO_THREADS
+ strspushl(&xs, "-lpthread");
+#endif
+ strspushl(&xs, "-o", dst, src);
+ cmdput(xs);
+ assert(cmdexec(xs) == EXIT_SUCCESS);
+
+ execvp(*_cbs_argv, _cbs_argv);
+ assert(!"failed to execute process");
+}
+
+void
+strsfree(struct strs *xs)
+{
+ free(xs->buf);
+ xs->buf = NULL;
+ xs->len = xs->cap = 0;
+}
+
+void
+strszero(struct strs *xs)
+{
+ xs->len = 0;
+}
+
+void
+strspush(struct strs *xs, char **ys, size_t n)
+{
+ if (n == 0)
+ return;
+
+ if (xs->len + n >= xs->cap) {
+ xs->cap = (xs->len + n) * 2;
+ xs->buf = realloc(xs->buf, sizeof(char *) * (xs->cap + 1));
+ assert(xs->buf != NULL);
+ }
+ memcpy(xs->buf + xs->len, ys, n * sizeof(char *));
+ xs->len += n;
+
+ assert(xs->len <= xs->cap);
+ xs->buf[xs->len] = NULL;
+}
+
+void
+strspushenv(struct strs *xs, const char *ev, char **ys, size_t n)
+{
+ /* NOTE: Do your best to NOT modify any pushed envvar! */
+ char *p = getenv(ev);
+ if (p == NULL || *p == 0)
+ strspush(xs, ys, n);
+ else
+ strspush(xs, &p, 1);
+}
+
+bool
+fexists(const char *f)
+{
+ return !access(f, F_OK);
+}
+
+int
+fmdcmp(const char *lhs, const char *rhs)
+{
+ struct stat sbl, sbr;
+
+ assert(stat(lhs, &sbl) != -1);
+ assert(stat(rhs, &sbr) != -1);
+
+ return sbl.st_mtim.tv_sec == sbr.st_mtim.tv_sec
+ ? sbl.st_mtim.tv_nsec - sbr.st_mtim.tv_nsec
+ : sbl.st_mtim.tv_sec - sbr.st_mtim.tv_sec;
+}
+
+bool
+fmdnewer(const char *lhs, const char *rhs)
+{
+ return fmdcmp(lhs, rhs) > 0;
+}
+
+bool
+fmdolder(const char *lhs, const char *rhs)
+{
+ return fmdcmp(lhs, rhs) < 0;
+}
+
+bool
+foutdated(const char *src, char **deps, size_t n)
+{
+ if (!fexists(src))
+ return true;
+ for (size_t i = 0; i < n; i++) {
+ if (fmdolder(src, deps[i]))
+ return true;
+ }
+ return false;
+}
+
+int
+cmdexec(struct strs xs)
+{
+ return cmdwait(cmdexec_async(xs));
+}
+
+pid_t
+cmdexec_async(struct strs xs)
+{
+ pid_t pid = fork();
+ assert(pid != -1);
+ if (pid == 0) {
+ execvp(xs.buf[0], xs.buf);
+ assert(!"failed to execute process");
+ }
+ return pid;
+}
+
+int
+cmdexec_read(struct strs xs, char **p, size_t *n)
+{
+ enum {
+ R,
+ W,
+ };
+ int fds[2];
+
+ assert(pipe(fds) != -1);
+
+ pid_t pid = fork();
+ assert(pid != -1);
+
+ if (pid == 0) {
+ close(fds[R]);
+ close(STDOUT_FILENO);
+ assert(dup2(fds[W], STDOUT_FILENO) != -1);
+ execvp(xs.buf[0], xs.buf);
+ assert(!"failed to execute process");
+ }
+
+ close(fds[W]);
+
+ struct stat sb;
+ assert(fstat(fds[R], &sb) != -1);
+
+ *p = NULL, *n = 0;
+ char *buf = malloc(sb.st_blksize);
+ assert(buf != NULL);
+
+ for (;;) {
+ ssize_t nr;
+ if ((nr = read(fds[R], buf, sb.st_blksize)) == 0)
+ break;
+ assert(nr != -1);
+
+ *p = realloc(*p, *n + nr + 1);
+ assert(*p != NULL);
+
+ memcpy(*p + *n, buf, nr);
+ *n += nr;
+ }
+
+ close(fds[R]);
+ if (buf != NULL)
+ buf[*n] = 0;
+ free(buf);
+
+ return cmdwait(pid);
+}
+
+int
+cmdwait(pid_t pid)
+{
+ int ws;
+ assert(waitpid(pid, &ws, 0) != -1);
+ if (WIFEXITED(ws))
+ return WEXITSTATUS(ws);
+ return WIFEXITED(ws) ? WEXITSTATUS(ws) : 256;
+}
+
+/* import shlex
+
+ s = '#define _SHELL_SAFE "'
+ for c in map(chr, range(128)):
+ if not shlex._find_unsafe(c):
+ s += c
+ print(s + '"') */
+#define _SHELL_SAFE \
+ "%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
+
+void
+cmdput(struct strs xs)
+{
+ cmdfput(stdout, xs);
+}
+
+void
+cmdfput(FILE *fp, struct strs xs)
+{
+ flockfile(fp);
+ for (size_t i = 0; i < xs.len; i++) {
+ bool safe = true;
+ char *p, *q;
+
+ p = q = xs.buf[i];
+ for (; *q; q++) {
+ if (!strchr(_SHELL_SAFE, *q)) {
+ safe = false;
+ break;
+ }
+ }
+
+ if (safe)
+ fputs(p, fp);
+ else {
+ putc('\'', fp);
+ for (q = p; *q; q++) {
+ if (*q == '\'')
+ fputs("'\"'\"'", fp);
+ else
+ putc(*q, fp);
+ }
+ putc('\'', fp);
+ }
+
+ putc(i == xs.len - 1 ? '\n' : ' ', fp);
+ }
+ funlockfile(fp);
+}
+
+bool
+pcquery(struct strs *xs, const char *lib, int flags)
+{
+ struct strs ys = {0};
+
+ strspushl(&ys, "pkg-config", "--silence-errors");
+ if (flags & PC_CFLAGS)
+ strspushl(&ys, "--cflags");
+ if (flags & PC_LIBS)
+ strspushl(&ys, "--libs");
+ if (flags & PC_SHARED)
+ strspushl(&ys, "--shared");
+ if (flags & PC_STATIC)
+ strspushl(&ys, "--static");
+ strspushl(&ys, (char *)lib);
+
+ char *buf;
+ size_t bufsz;
+ int ec = cmdexec_read(ys, &buf, &bufsz);
+ strsfree(&ys);
+ if (ec != EXIT_SUCCESS)
+ return false;
+
+ /* Remove trailing newline */
+ buf[bufsz - 1] = 0;
+
+ wordexp_t we;
+ assert(wordexp(buf, &we, WRDE_NOCMD) == 0);
+
+ char **words = malloc(sizeof(char *) * we.we_wordc);
+ assert(words != NULL);
+ for (size_t i = 0; i < we.we_wordc; i++)
+ assert((words[i] = strdup(we.we_wordv[i])) != NULL);
+
+ strspush(xs, words, we.we_wordc);
+ wordfree(&we);
+ free(buf);
+ return true;
+}
+
+bool
+binexists(const char *s)
+{
+ const char *path = getenv("PATH");
+ assert(path != NULL);
+
+ char *p = strdup(path), *it;
+ assert(p != NULL);
+
+ for (it = strtok(p, ":"); it != NULL; it = strtok(NULL, ":")) {
+ static char buf[PATH_MAX];
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, sizeof(buf), "%s/%s", it, s);
+ if (fexists(buf)) {
+ free(p);
+ return true;
+ }
+ }
+
+ free(p);
+ return false;
+}
+
+int
+nproc(void)
+{
+#ifdef _SC_NPROCESSORS_ONLN
+ return (int)sysconf(_SC_NPROCESSORS_ONLN);
+#else
+ errno = 0;
+ return -1;
+#endif
+}
+
+char *
+swpext(const char *file, const char *ext)
+{
+ const char *p = strrchr(file, '.');
+ if (p == NULL) {
+ p = strdup(file);
+ assert(p != NULL);
+ return (char *)p;
+ }
+
+ size_t noextlen = p - file;
+ char *s = malloc(noextlen + strlen(ext) + 2);
+ assert(s != NULL);
+ sprintf(s, "%.*s.%s", (int)noextlen, file, ext);
+ return s;
+}
+
+#ifndef CBS_NO_THREADS
+static struct _tqueue *
+_tpdeq(tpool *tp)
+{
+ struct _tqueue *q = tp->head;
+
+ if (q != NULL) {
+ tp->head = tp->head->next;
+ if (!tp->head)
+ tp->tail = NULL;
+ }
+
+ return q;
+}
+
+static void *
+_tpwork(void *arg)
+{
+ tpool *tp = arg;
+
+ while (!tp->stop) {
+ struct _tqueue *q;
+
+ pthread_mutex_lock(&tp->mtx);
+ while (!tp->stop && !tp->head)
+ pthread_cond_wait(&tp->cnd, &tp->mtx);
+ if (tp->stop) {
+ pthread_mutex_unlock(&tp->mtx);
+ break;
+ }
+
+ q = _tpdeq(tp);
+ pthread_mutex_unlock(&tp->mtx);
+
+ q->fn(q->arg);
+ if (q->free)
+ q->free(q->arg);
+ free(q);
+
+ pthread_mutex_lock(&tp->mtx);
+ tp->left--;
+ pthread_cond_broadcast(&tp->cnd);
+ pthread_mutex_unlock(&tp->mtx);
+ }
+
+ return NULL;
+}
+
+void
+tpinit(tpool *tp, size_t n)
+{
+ tp->tcnt = n;
+ tp->stop = false;
+ tp->left = 0;
+ tp->head = tp->tail = NULL;
+ tp->thrds = malloc(sizeof(pthread_t) * n);
+ assert(tp->thrds != NULL);
+ pthread_cond_init(&tp->cnd, NULL);
+ pthread_mutex_init(&tp->mtx, NULL);
+ for (size_t i = 0; i < n; i++)
+ assert(pthread_create(tp->thrds + i, NULL, _tpwork, tp) == 0);
+}
+
+void
+tpfree(tpool *tp)
+{
+ tp->stop = true;
+
+ pthread_mutex_lock(&tp->mtx);
+ pthread_cond_broadcast(&tp->cnd);
+ pthread_mutex_unlock(&tp->mtx);
+
+ for (size_t i = 0; i < tp->tcnt; i++)
+ pthread_join(tp->thrds[i], NULL);
+
+ free(tp->thrds);
+ while (tp->head != NULL) {
+ struct _tqueue *q = _tpdeq(tp);
+ if (q->free)
+ q->free(q->arg);
+ free(q);
+ }
+
+ pthread_cond_destroy(&tp->cnd);
+ pthread_mutex_destroy(&tp->mtx);
+}
+
+void
+tpwait(tpool *tp)
+{
+ pthread_mutex_lock(&tp->mtx);
+ while (!tp->stop && tp->left)
+ pthread_cond_wait(&tp->cnd, &tp->mtx);
+ pthread_mutex_unlock(&tp->mtx);
+}
+
+void
+tpenq(tpool *tp, tjob *fn, void *arg, tjob_free *free)
+{
+ struct _tqueue *q = malloc(sizeof(*q));
+ assert(q != NULL);
+ *q = (struct _tqueue){
+ .fn = fn,
+ .arg = arg,
+ .free = free,
+ };
+
+ pthread_mutex_lock(&tp->mtx);
+ if (tp->tail)
+ tp->tail->next = q;
+ if (!tp->head)
+ tp->head = q;
+ tp->tail = q;
+ tp->left++;
+ pthread_cond_signal(&tp->cnd);
+ pthread_mutex_unlock(&tp->mtx);
+}
+#endif /* !CBS_NO_THREADS */
+
+#ifdef __GNUC__
+# pragma GCC diagnostic pop
+#endif
+
+#ifdef __APPLE__
+# undef st_mtim
+#endif
+
+#endif /* !C_BUILD_SYSTEM_H */
diff --git a/make.c b/make.c
new file mode 100644
index 0000000..306c4b7
--- /dev/null
+++ b/make.c
@@ -0,0 +1,232 @@
+#define _GNU_SOURCE
+#include <assert.h>
+#include <glob.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "cbs.h"
+
+#define TARGET "oryx"
+
+enum {
+ SIMD_AVX2 = 1 << 0,
+ SIMD_NEON = 1 << 1,
+ SIMD_SSE4_1 = 1 << 2,
+};
+
+static char *cflags_all[] = {
+ "-pipe",
+ "-std=c99",
+ "-Wall",
+ "-Wextra",
+ "-Wno-attributes",
+ "-Wno-parentheses",
+ "-Wno-pointer-sign",
+ "-Wpedantic",
+ "-Wvla",
+#if __GLIBC__
+ "-D_GNU_SOURCE",
+#endif
+};
+
+static char *cflags_dbg[] = {
+ "-DDEBUG=1", "-fsanitize=address,undefined", "-g3", "-ggdb3", "-O0",
+};
+
+static char *cflags_rls[] = {
+ "-DNDEBUG=1", "-flto", "-O3",
+#ifndef __APPLE__
+ "-march=native", "-mtune=native",
+#endif
+};
+
+static char *argv0;
+static bool fflag, rflag;
+static int simd_flags;
+
+static void cc(void *);
+static void ld(void);
+static bool tagvalid(const char *);
+static void ckd_cpu_flags(void);
+static int globerr(const char *, int);
+
+static void
+usage(void)
+{
+ fprintf(stderr,
+ "Usage: %s [-fr]\n"
+ " %s clean\n",
+ argv0, argv0);
+ exit(EXIT_FAILURE);
+}
+
+int
+main(int argc, char **argv)
+{
+ cbsinit(argc, argv);
+ rebuild();
+
+ argv0 = argv[0];
+
+ int opt;
+ while ((opt = getopt(argc, argv, "fr")) != -1) {
+ switch (opt) {
+ case 'f':
+ fflag = true;
+ break;
+ case 'r':
+ rflag = true;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ struct strs cmd = {0};
+
+ if (argc > 0) {
+ if (strcmp("clean", *argv) == 0) {
+ strspushl(&cmd, "find", ".", "-name", TARGET, "-or", "-name", "*.o",
+ "-delete");
+ cmdput(cmd);
+ cmdexec(cmd);
+ } else {
+ fprintf(stderr, "%s: invalid subcommand — ‘%s’\n", argv0, *argv);
+ usage();
+ }
+
+ return EXIT_SUCCESS;
+ }
+
+ ckd_cpu_flags();
+
+ glob_t g;
+ assert(glob("src/*.c", 0, globerr, &g) == 0);
+
+ int procs = nproc();
+ if (procs == -1)
+ procs = 8;
+
+ tpool tp;
+ tpinit(&tp, procs);
+ for (size_t i = 0; i < g.gl_pathc; i++)
+ tpenq(&tp, cc, g.gl_pathv[i], NULL);
+ tpwait(&tp);
+ tpfree(&tp);
+
+ ld();
+
+ globfree(&g);
+ strsfree(&cmd);
+ return EXIT_SUCCESS;
+}
+
+void
+cc(void *arg)
+{
+ if (!tagvalid(arg))
+ return;
+
+ struct strs cmd = {0};
+ char *dst = swpext(arg, "o"), *src = arg;
+
+ if (!fflag && !foutdatedl(dst, src))
+ goto out;
+
+ strspushenvl(&cmd, "CC", "cc");
+ strspush(&cmd, cflags_all, lengthof(cflags_all));
+ if (rflag)
+ strspushenv(&cmd, "CFLAGS", cflags_rls, lengthof(cflags_rls));
+ else
+ strspushenv(&cmd, "CFLAGS", cflags_dbg, lengthof(cflags_dbg));
+ if (simd_flags != 0)
+ strspushl(&cmd, "-DORYX_SIMD=1");
+ strspushl(&cmd, "-o", dst, "-c", src);
+
+ cmdput(cmd);
+ cmdexec(cmd);
+ strsfree(&cmd);
+out:
+ free(dst);
+}
+
+void
+ld(void)
+{
+ glob_t g;
+ bool dobuild = fflag;
+ struct strs cmd = {0};
+
+ strspushenvl(&cmd, "CC", "cc");
+ strspush(&cmd, cflags_all, lengthof(cflags_all));
+ if (rflag)
+ strspushenv(&cmd, "CFLAGS", cflags_rls, lengthof(cflags_rls));
+ else
+ strspushenv(&cmd, "CFLAGS", cflags_dbg, lengthof(cflags_dbg));
+ strspushl(&cmd, "-o", TARGET);
+
+ assert(glob("src/*.o", 0, globerr, &g) == 0);
+ for (size_t i = 0; i < g.gl_pathc; i++) {
+ if (!tagvalid(g.gl_pathv[i]))
+ continue;
+ if (foutdatedl(TARGET, g.gl_pathv[i]))
+ dobuild = true;
+ strspushl(&cmd, g.gl_pathv[i]);
+ }
+
+ if (dobuild) {
+ cmdput(cmd);
+ cmdexec(cmd);
+ }
+
+ globfree(&g);
+ strsfree(&cmd);
+}
+
+bool
+tagvalid(const char *file)
+{
+ if (strstr(file, "-avx2.") != NULL && (simd_flags & SIMD_AVX2) == 0)
+ return false;
+ if (strstr(file, "-neon.") != NULL && (simd_flags & SIMD_NEON) == 0)
+ return false;
+ if (strstr(file, "-sse4_1.") != NULL && (simd_flags & SIMD_SSE4_1) == 0)
+ return false;
+ return true;
+}
+
+void
+ckd_cpu_flags(void)
+{
+ if (!rflag)
+ return;
+#if __GNUC__ && __x86_64__
+ uint32_t exx;
+
+ asm volatile("cpuid" : "=b"(exx) : "a"(7), "c"(0));
+ if (exx & (1 << 5)) {
+ simd_flags |= SIMD_AVX2;
+ return;
+ }
+
+ asm volatile("cpuid" : "=c"(exx) : "a"(1), "c"(0));
+ if (exx & (1 << 19))
+ simd_flags |= SIMD_SSE4_1;
+#elif __ARM_NEON
+ simd_flags |= SIMD_NEON;
+#endif
+}
+
+int
+globerr(const char *s, int e)
+{
+ fprintf(stderr, "glob: %s: %s\n", s, strerror(e));
+ exit(EXIT_FAILURE);
+}
diff --git a/src/errors.c b/src/errors.c
new file mode 100644
index 0000000..49eb11d
--- /dev/null
+++ b/src/errors.c
@@ -0,0 +1,27 @@
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "errors.h"
+
+void
+err(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+
+ int save = errno;
+ flockfile(stderr);
+
+ fputs("oryx: ", stderr);
+ vfprintf(stderr, fmt, ap);
+ if (fmt[strlen(fmt) - 1] == ':')
+ fprintf(stderr, " %s", strerror(save));
+ fputc('\n', stderr);
+ fflush(stderr);
+ funlockfile(stderr);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
diff --git a/src/errors.h b/src/errors.h
new file mode 100644
index 0000000..69c8ea0
--- /dev/null
+++ b/src/errors.h
@@ -0,0 +1,6 @@
+#ifndef ORYX_ERRORS_H
+#define ORYX_ERRORS_H
+
+void err(const char *, ...);
+
+#endif /* !ORYX_ERRORS_H */
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..970202a
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,34 @@
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include "errors.h"
+#include "lexer.h"
+#include "unicode.h"
+
+struct lexeme *
+lexstring(const char *code, size_t codesz, size_t *lcnt)
+{
+ struct {
+ struct lexeme *p;
+ size_t len, buf;
+ } data = {0};
+
+#if ORYX_SIMD
+ if (!utf8_validate_simd(code, codesz)) {
+#endif
+ size_t off = utf8_validate_off(code, codesz);
+ if (off != 0)
+ err("Invalid UTF-8 at byte-offset %zu", off - 1);
+#if ORYX_SIMD
+ }
+#endif
+
+ const char *end = code + codesz;
+ while (code < end) {
+ rune ch = utf8_decode(&code);
+ }
+
+ *lcnt = data.len;
+ return data.p;
+}
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..7271498
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,21 @@
+#ifndef ORYX_LEXER_H
+#define ORYX_LEXER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+enum {
+ LEXIDENT,
+};
+
+typedef uint8_t lexeme_kind;
+
+struct lexeme {
+ lexeme_kind kind;
+ const char *p;
+ size_t len;
+};
+
+struct lexeme *lexstring(const char *, size_t, size_t *);
+
+#endif /* !ORYX_LEXER_H */
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..23b0471
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,68 @@
+#include <sys/stat.h>
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "errors.h"
+#include "lexer.h"
+
+static char *readfile(const char *, size_t *);
+
+int
+main(int argc, char **argv)
+{
+ if (argc != 2) {
+ fputs("Usage: oryx file\n", stderr);
+ exit(EXIT_FAILURE);
+ }
+
+ struct {
+ char *p;
+ size_t len;
+ } file = {
+ .p = readfile(argv[1], &file.len),
+ };
+
+ struct {
+ struct lexeme *p;
+ size_t len;
+ } toks = {
+ .p = lexstring(file.p, file.len, &toks.len),
+ };
+
+#if DEBUG
+ free(file.p);
+ free(toks.p);
+#endif
+ return EXIT_SUCCESS;
+}
+
+char *
+readfile(const char *filename, size_t *n)
+{
+ int fd = open(filename, O_RDONLY);
+ if (fd == -1)
+ err("open: %s", filename);
+
+ struct stat sb;
+ if (fstat(fd, &sb) == -1)
+ err("fstat: %s", filename);
+
+ char *p = malloc(sb.st_size + 4);
+ if (p == NULL)
+ err("malloc:");
+
+ ssize_t nr;
+ for (size_t off = 0; (nr = read(fd, p + off, sb.st_blksize)) > 0; off += nr)
+ ;
+ if (nr == -1)
+ err("read: %s", filename);
+ for (int i = 0; i < 4; i++)
+ p[sb.st_size + i] = 0;
+
+ *n = sb.st_size;
+ close(fd);
+ return p;
+}
diff --git a/src/unicode-avx2.c b/src/unicode-avx2.c
new file mode 100644
index 0000000..6507ca2
--- /dev/null
+++ b/src/unicode-avx2.c
@@ -0,0 +1,152 @@
+#include <stdint.h>
+#include <x86intrin.h>
+
+#include "unicode.h"
+
+#pragma GCC diagnostic ignored "-Woverflow"
+
+static const int8_t _first_len_tbl[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
+};
+
+static const int8_t _first_range_tbl[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
+};
+
+static const int8_t _range_min_tbl[] = {
+ 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, 0xC2, 0x7F, 0x7F,
+ 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80,
+ 0x90, 0x80, 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
+};
+static const int8_t _range_max_tbl[] = {
+ 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, 0xF4, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F,
+ 0xBF, 0x8F, 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+};
+
+static const int8_t _df_ee_tbl[] = {
+ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
+ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
+};
+
+static const int8_t _ef_fe_tbl[] = {
+ 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static inline __m256i
+push_last_byte_of_a_to_b(__m256i a, __m256i b)
+{
+ return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
+}
+
+static inline __m256i
+push_last_2bytes_of_a_to_b(__m256i a, __m256i b)
+{
+ return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 14);
+}
+
+static inline __m256i
+push_last_3bytes_of_a_to_b(__m256i a, __m256i b)
+{
+ return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 13);
+}
+
+bool
+utf8_validate_simd(const char *data, size_t len)
+{
+ const unsigned char *s = data;
+ if (len >= 32) {
+ __m256i prev_input = _mm256_set1_epi8(0);
+ __m256i prev_first_len = _mm256_set1_epi8(0);
+
+ const __m256i first_len_tbl = _mm256_loadu_si256(
+ (const __m256i *)_first_len_tbl);
+ const __m256i first_range_tbl = _mm256_loadu_si256(
+ (const __m256i *)_first_range_tbl);
+ const __m256i range_min_tbl = _mm256_loadu_si256(
+ (const __m256i *)_range_min_tbl);
+ const __m256i range_max_tbl = _mm256_loadu_si256(
+ (const __m256i *)_range_max_tbl);
+ const __m256i df_ee_tbl = _mm256_loadu_si256(
+ (const __m256i *)_df_ee_tbl);
+ const __m256i ef_fe_tbl = _mm256_loadu_si256(
+ (const __m256i *)_ef_fe_tbl);
+
+ __m256i error1 = _mm256_set1_epi8(0);
+ __m256i error2 = _mm256_set1_epi8(0);
+
+ while (len >= 32) {
+ const __m256i input = _mm256_loadu_si256((const __m256i *)s);
+
+ const __m256i high_nibbles = _mm256_and_si256(
+ _mm256_srli_epi16(input, 4), _mm256_set1_epi8(0x0F));
+
+ __m256i first_len = _mm256_shuffle_epi8(first_len_tbl,
+ high_nibbles);
+
+ __m256i range = _mm256_shuffle_epi8(first_range_tbl, high_nibbles);
+
+ range = _mm256_or_si256(
+ range, push_last_byte_of_a_to_b(prev_first_len, first_len));
+
+ __m256i tmp1, tmp2;
+
+ tmp1 = push_last_2bytes_of_a_to_b(prev_first_len, first_len);
+ tmp2 = _mm256_subs_epu8(tmp1, _mm256_set1_epi8(1));
+
+ range = _mm256_or_si256(range, tmp2);
+
+ tmp1 = push_last_3bytes_of_a_to_b(prev_first_len, first_len);
+ tmp2 = _mm256_subs_epu8(tmp1, _mm256_set1_epi8(2));
+ range = _mm256_or_si256(range, tmp2);
+
+ __m256i shift1, pos, range2;
+
+ shift1 = push_last_byte_of_a_to_b(prev_input, input);
+ pos = _mm256_sub_epi8(shift1, _mm256_set1_epi8(0xEF));
+
+ tmp1 = _mm256_subs_epu8(pos, _mm256_set1_epi8(240));
+ range2 = _mm256_shuffle_epi8(df_ee_tbl, tmp1);
+ tmp2 = _mm256_adds_epu8(pos, _mm256_set1_epi8(112));
+ range2 = _mm256_add_epi8(range2,
+ _mm256_shuffle_epi8(ef_fe_tbl, tmp2));
+
+ range = _mm256_add_epi8(range, range2);
+
+ __m256i minv = _mm256_shuffle_epi8(range_min_tbl, range);
+ __m256i maxv = _mm256_shuffle_epi8(range_max_tbl, range);
+
+ error1 = _mm256_or_si256(error1, _mm256_cmpgt_epi8(minv, input));
+ error2 = _mm256_or_si256(error2, _mm256_cmpgt_epi8(input, maxv));
+
+ prev_input = input;
+ prev_first_len = first_len;
+
+ s += 32;
+ len -= 32;
+ }
+
+ __m256i error = _mm256_or_si256(error1, error2);
+ if (!_mm256_testz_si256(error, error))
+ return false;
+
+ int32_t token4 = _mm256_extract_epi32(prev_input, 7);
+ const int8_t *token = (const int8_t *)&token4;
+ int lookahead = 0;
+ if (token[3] > (int8_t)0xBF)
+ lookahead = 1;
+ else if (token[2] > (int8_t)0xBF)
+ lookahead = 2;
+ else if (token[1] > (int8_t)0xBF)
+ lookahead = 3;
+
+ s -= lookahead;
+ len += lookahead;
+ }
+
+ /* Check remaining bytes with naïve method */
+ return utf8_validate_off(s, len) == 0;
+}
diff --git a/src/unicode-neon.c b/src/unicode-neon.c
new file mode 100644
index 0000000..2791117
--- /dev/null
+++ b/src/unicode-neon.c
@@ -0,0 +1,147 @@
+#include <arm_neon.h>
+#include <stdint.h>
+
+#include "unicode.h"
+
+#pragma GCC diagnostic ignored "-Woverflow"
+
+static const uint8_t _first_len_tbl[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
+};
+
+static const uint8_t _first_range_tbl[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
+};
+
+static const uint8_t _range_min_tbl[] = {
+ 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
+ 0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+};
+static const uint8_t _range_max_tbl[] = {
+ 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
+ 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const uint8_t _range_adjust_tbl[] = {
+ 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
+};
+
+bool
+utf8_validate_simd(const char *data, size_t len)
+{
+ const unsigned char *s = data;
+ if (len >= 32) {
+ uint8x16_t prev_input = vdupq_n_u8(0);
+ uint8x16_t prev_first_len = vdupq_n_u8(0);
+
+ const uint8x16_t first_len_tbl = vld1q_u8(_first_len_tbl);
+ const uint8x16_t first_range_tbl = vld1q_u8(_first_range_tbl);
+ const uint8x16_t range_min_tbl = vld1q_u8(_range_min_tbl);
+ const uint8x16_t range_max_tbl = vld1q_u8(_range_max_tbl);
+ const uint8x16x2_t range_adjust_tbl = vld2q_u8(_range_adjust_tbl);
+
+ const uint8x16_t const_1 = vdupq_n_u8(1);
+ const uint8x16_t const_2 = vdupq_n_u8(2);
+ const uint8x16_t const_e0 = vdupq_n_u8(0xE0);
+
+ uint8x16_t error1 = vdupq_n_u8(0);
+ uint8x16_t error2 = vdupq_n_u8(0);
+ uint8x16_t error3 = vdupq_n_u8(0);
+ uint8x16_t error4 = vdupq_n_u8(0);
+
+ while (len >= 32) {
+#if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 8)
+ /* GCC doesn't support vldq1_u8_x2 until version 8 */
+ const uint8x16_t input_a = vld1q_u8(data);
+ const uint8x16_t input_b = vld1q_u8(data + 16);
+#else
+ /* Forces a double load on Clang */
+ const uint8x16x2_t input_pair = vld1q_u8_x2(s);
+ const uint8x16_t input_a = input_pair.val[0];
+ const uint8x16_t input_b = input_pair.val[1];
+#endif
+
+ const uint8x16_t high_nibbles_a = vshrq_n_u8(input_a, 4);
+ const uint8x16_t high_nibbles_b = vshrq_n_u8(input_b, 4);
+
+ const uint8x16_t first_len_a = vqtbl1q_u8(first_len_tbl,
+ high_nibbles_a);
+ const uint8x16_t first_len_b = vqtbl1q_u8(first_len_tbl,
+ high_nibbles_b);
+
+ uint8x16_t range_a = vqtbl1q_u8(first_range_tbl, high_nibbles_a);
+ uint8x16_t range_b = vqtbl1q_u8(first_range_tbl, high_nibbles_b);
+
+ range_a = vorrq_u8(range_a,
+ vextq_u8(prev_first_len, first_len_a, 15));
+ range_b = vorrq_u8(range_b, vextq_u8(first_len_a, first_len_b, 15));
+
+ uint8x16_t tmp1_a, tmp2_a, tmp1_b, tmp2_b;
+ tmp1_a = vextq_u8(prev_first_len, first_len_a, 14);
+ tmp1_a = vqsubq_u8(tmp1_a, const_1);
+ range_a = vorrq_u8(range_a, tmp1_a);
+
+ tmp1_b = vextq_u8(first_len_a, first_len_b, 14);
+ tmp1_b = vqsubq_u8(tmp1_b, const_1);
+ range_b = vorrq_u8(range_b, tmp1_b);
+
+ tmp2_a = vextq_u8(prev_first_len, first_len_a, 13);
+ tmp2_a = vqsubq_u8(tmp2_a, const_2);
+ range_a = vorrq_u8(range_a, tmp2_a);
+
+ tmp2_b = vextq_u8(first_len_a, first_len_b, 13);
+ tmp2_b = vqsubq_u8(tmp2_b, const_2);
+ range_b = vorrq_u8(range_b, tmp2_b);
+
+ uint8x16_t shift1_a = vextq_u8(prev_input, input_a, 15);
+ uint8x16_t pos_a = vsubq_u8(shift1_a, const_e0);
+ range_a = vaddq_u8(range_a, vqtbl2q_u8(range_adjust_tbl, pos_a));
+
+ uint8x16_t shift1_b = vextq_u8(input_a, input_b, 15);
+ uint8x16_t pos_b = vsubq_u8(shift1_b, const_e0);
+ range_b = vaddq_u8(range_b, vqtbl2q_u8(range_adjust_tbl, pos_b));
+
+ uint8x16_t minv_a = vqtbl1q_u8(range_min_tbl, range_a);
+ uint8x16_t maxv_a = vqtbl1q_u8(range_max_tbl, range_a);
+
+ uint8x16_t minv_b = vqtbl1q_u8(range_min_tbl, range_b);
+ uint8x16_t maxv_b = vqtbl1q_u8(range_max_tbl, range_b);
+
+ error1 = vorrq_u8(error1, vcltq_u8(input_a, minv_a));
+ error2 = vorrq_u8(error2, vcgtq_u8(input_a, maxv_a));
+
+ error3 = vorrq_u8(error3, vcltq_u8(input_b, minv_b));
+ error4 = vorrq_u8(error4, vcgtq_u8(input_b, maxv_b));
+
+ prev_input = input_b;
+ prev_first_len = first_len_b;
+
+ s += 32;
+ len -= 32;
+ }
+ error1 = vorrq_u8(error1, error2);
+ error1 = vorrq_u8(error1, error3);
+ error1 = vorrq_u8(error1, error4);
+
+ if (vmaxvq_u8(error1))
+ return -1;
+
+ uint32_t token4;
+ vst1q_lane_u32(&token4, vreinterpretq_u32_u8(prev_input), 3);
+
+ const int8_t *token = (const int8_t *)&token4;
+ int lookahead = 0;
+ if (token[3] > (int8_t)0xBF)
+ lookahead = 1;
+ else if (token[2] > (int8_t)0xBF)
+ lookahead = 2;
+ else if (token[1] > (int8_t)0xBF)
+ lookahead = 3;
+
+ s -= lookahead;
+ len += lookahead;
+ }
+
+ return utf8_validate_off(s, len) == 0;
+}
diff --git a/src/unicode-sse4_1.c b/src/unicode-sse4_1.c
new file mode 100644
index 0000000..17a46a8
--- /dev/null
+++ b/src/unicode-sse4_1.c
@@ -0,0 +1,158 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <x86intrin.h>
+
+#include "unicode.h"
+
+#pragma GCC diagnostic ignored "-Woverflow"
+
+static const int8_t _first_len_tbl[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
+};
+
+static const int8_t _first_range_tbl[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
+};
+
+static const int8_t _range_min_tbl[] = {
+ 0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
+ 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
+};
+static const int8_t _range_max_tbl[] = {
+ 0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
+ 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+};
+
+static const int8_t _df_ee_tbl[] = {
+ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
+};
+static const int8_t _ef_fe_tbl[] = {
+ 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* Return 0 on success, -1 on error */
+bool
+utf8_validate_simd(const char *data, size_t len)
+{
+ const unsigned char *s = data;
+ if (len >= 32) {
+ __m128i prev_input = _mm_set1_epi8(0);
+ __m128i prev_first_len = _mm_set1_epi8(0);
+
+ const __m128i first_len_tbl = _mm_loadu_si128(
+ (const __m128i *)_first_len_tbl);
+ const __m128i first_range_tbl = _mm_loadu_si128(
+ (const __m128i *)_first_range_tbl);
+ const __m128i range_min_tbl = _mm_loadu_si128(
+ (const __m128i *)_range_min_tbl);
+ const __m128i range_max_tbl = _mm_loadu_si128(
+ (const __m128i *)_range_max_tbl);
+ const __m128i df_ee_tbl = _mm_loadu_si128((const __m128i *)_df_ee_tbl);
+ const __m128i ef_fe_tbl = _mm_loadu_si128((const __m128i *)_ef_fe_tbl);
+
+ __m128i error = _mm_set1_epi8(0);
+
+ while (len >= 32) {
+ /***************************** block 1 ****************************/
+ const __m128i input_a = _mm_loadu_si128((const __m128i *)s);
+
+ __m128i high_nibbles = _mm_and_si128(_mm_srli_epi16(input_a, 4),
+ _mm_set1_epi8(0x0F));
+
+ __m128i first_len_a = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
+
+ __m128i range_a = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
+
+ range_a = _mm_or_si128(
+ range_a, _mm_alignr_epi8(first_len_a, prev_first_len, 15));
+
+ __m128i tmp;
+ tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 14);
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
+ range_a = _mm_or_si128(range_a, tmp);
+
+ tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 13);
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
+ range_a = _mm_or_si128(range_a, tmp);
+
+ __m128i shift1, pos, range2;
+ shift1 = _mm_alignr_epi8(input_a, prev_input, 15);
+ pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
+ tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
+ range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
+ tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
+ range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
+
+ range_a = _mm_add_epi8(range_a, range2);
+
+ __m128i minv = _mm_shuffle_epi8(range_min_tbl, range_a);
+ __m128i maxv = _mm_shuffle_epi8(range_max_tbl, range_a);
+
+ tmp = _mm_or_si128(_mm_cmplt_epi8(input_a, minv),
+ _mm_cmpgt_epi8(input_a, maxv));
+ error = _mm_or_si128(error, tmp);
+
+ /***************************** block 2 ****************************/
+ const __m128i input_b = _mm_loadu_si128((const __m128i *)(s + 16));
+
+ high_nibbles = _mm_and_si128(_mm_srli_epi16(input_b, 4),
+ _mm_set1_epi8(0x0F));
+
+ __m128i first_len_b = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
+
+ __m128i range_b = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
+
+ range_b = _mm_or_si128(
+ range_b, _mm_alignr_epi8(first_len_b, first_len_a, 15));
+
+ tmp = _mm_alignr_epi8(first_len_b, first_len_a, 14);
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
+ range_b = _mm_or_si128(range_b, tmp);
+
+ tmp = _mm_alignr_epi8(first_len_b, first_len_a, 13);
+ tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
+ range_b = _mm_or_si128(range_b, tmp);
+
+ shift1 = _mm_alignr_epi8(input_b, input_a, 15);
+ pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
+ tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
+ range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
+ tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
+ range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
+
+ range_b = _mm_add_epi8(range_b, range2);
+
+ minv = _mm_shuffle_epi8(range_min_tbl, range_b);
+ maxv = _mm_shuffle_epi8(range_max_tbl, range_b);
+
+ tmp = _mm_or_si128(_mm_cmplt_epi8(input_b, minv),
+ _mm_cmpgt_epi8(input_b, maxv));
+ error = _mm_or_si128(error, tmp);
+
+ /************************ next iteration **************************/
+ prev_input = input_b;
+ prev_first_len = first_len_b;
+
+ s += 32;
+ len -= 32;
+ }
+
+ if (!_mm_testz_si128(error, error))
+ return false;
+
+ int32_t token4 = _mm_extract_epi32(prev_input, 3);
+ const int8_t *token = (const int8_t *)&token4;
+ int lookahead = 0;
+ if (token[3] > (int8_t)0xBF)
+ lookahead = 1;
+ else if (token[2] > (int8_t)0xBF)
+ lookahead = 2;
+ else if (token[1] > (int8_t)0xBF)
+ lookahead = 3;
+
+ s -= lookahead;
+ len += lookahead;
+ }
+
+ return utf8_validate_off(s, len) == 0;
+}
diff --git a/src/unicode.c b/src/unicode.c
new file mode 100644
index 0000000..e1faa55
--- /dev/null
+++ b/src/unicode.c
@@ -0,0 +1,59 @@
+#include "unicode.h"
+
+/* Branchless UTF-8 decoding and validation by Christopher Wellons.
+
+ You can find the original source with comments at
+ https://github.com/skeeto/branchless-utf8. */
+
+static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0};
+static const rune mins[] = {RUNE_C(4194304), 0, 128, 2048, RUNE_C(65536)};
+static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
+static const int shiftc[] = {0, 18, 12, 6, 0};
+static const int shifte[] = {0, 6, 4, 2, 0};
+
+rune
+utf8_decode(const char **buf)
+{
+ const unsigned char *s = *buf;
+ int len = lengths[s[0] >> 3];
+ *buf = s + len + !len;
+
+ rune c = (rune)(s[0] & masks[len]) << 18;
+ c |= (rune)(s[1] & 0x3f) << 12;
+ c |= (rune)(s[2] & 0x3f) << 6;
+ c |= (rune)(s[3] & 0x3f) << 0;
+ return c >> shiftc[len];
+}
+
+size_t
+utf8_validate_off(const char *buf, size_t len)
+{
+ const char *start = buf, *end = start + len;
+ while (buf < end) {
+ const unsigned char *s = buf;
+ int len = lengths[s[0] >> 3];
+
+ const unsigned char *next = s + len + !len;
+
+ rune c = (rune)(s[0] & masks[len]) << 18;
+ c |= (rune)(s[1] & 0x3f) << 12;
+ c |= (rune)(s[2] & 0x3f) << 6;
+ c |= (rune)(s[3] & 0x3f) << 0;
+ c >>= shiftc[len];
+
+ int e = (c < mins[len]) << 6;
+ e |= ((c >> 11) == 0x1B) << 7;
+ e |= (c > 0x10FFFF) << 8;
+ e |= (s[1] & 0xC0) >> 2;
+ e |= (s[2] & 0xC0) >> 4;
+ e |= (s[3]) >> 6;
+ e ^= 0x2A;
+ e >>= shifte[len];
+ if (e != 0)
+ return buf - start + 1;
+ buf = next;
+ }
+
+ return 0;
+}
diff --git a/src/unicode.h b/src/unicode.h
new file mode 100644
index 0000000..701c8c7
--- /dev/null
+++ b/src/unicode.h
@@ -0,0 +1,17 @@
+#ifndef ORYX_UNICODE_H
+#define ORYX_UNICODE_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define RUNE_C(x) UINT32_C(x)
+typedef uint32_t rune;
+
+rune utf8_decode(const char **);
+size_t utf8_validate_off(const char *, size_t);
+#if ORYX_SIMD
+bool utf8_validate_simd(const char *, size_t);
+#endif
+
+#endif /* !ORYX_UNICODE_H */