From 79e6af86ca526d5fb56af6f6ca3da713e3a5e9f9 Mon Sep 17 00:00:00 2001
From: Thomas Voss <mail@thomasvoss.com>
Date: Tue, 13 Feb 2024 13:02:28 +0100
Subject: Genesis commit

---
 src/c8asm/assembler.c  |  74 ++++++
 src/c8asm/assembler.h  |  10 +
 src/c8asm/common.h     |  16 ++
 src/c8asm/grammar.ebnf |  46 ++++
 src/c8asm/instr.gperf  |  33 +++
 src/c8asm/lexer.c      | 204 ++++++++++++++++
 src/c8asm/lexer.h      |  28 +++
 src/c8asm/lookup.h     | 190 +++++++++++++++
 src/c8asm/main.c       | 109 +++++++++
 src/c8asm/parser.c     | 627 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/c8asm/parser.h     | 122 ++++++++++
 11 files changed, 1459 insertions(+)
 create mode 100644 src/c8asm/assembler.c
 create mode 100644 src/c8asm/assembler.h
 create mode 100644 src/c8asm/common.h
 create mode 100644 src/c8asm/grammar.ebnf
 create mode 100644 src/c8asm/instr.gperf
 create mode 100644 src/c8asm/lexer.c
 create mode 100644 src/c8asm/lexer.h
 create mode 100644 src/c8asm/lookup.h
 create mode 100644 src/c8asm/main.c
 create mode 100644 src/c8asm/parser.c
 create mode 100644 src/c8asm/parser.h

(limited to 'src/c8asm')

diff --git a/src/c8asm/assembler.c b/src/c8asm/assembler.c
new file mode 100644
index 0000000..0d02555
--- /dev/null
+++ b/src/c8asm/assembler.c
@@ -0,0 +1,74 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <da.h>
+#include <mbstring.h>
+
+#include "assembler.h"
+#include "cerr.h"
+#include "common.h"
+#include "parser.h"
+
+/* TODO: Remove */
+#ifndef unreachable
+#	define unreachable() __builtin_unreachable()
+#endif
+
+#define E_LEXISTS "label ‘%.*s’ has already been declared"
+
+struct label {
+	uint16_t addr;
+	struct u8view sv;
+};
+
+struct labels {
+	struct label *buf;
+	size_t len, cap;
+};
+
+static bool u8eq(struct u8view, struct u8view);
+static void pushlabel(struct labels *, struct label);
+
+static size_t i;
+
+bool
+u8eq(struct u8view x, struct u8view y)
+{
+	return x.len == y.len && memcmp(x.p, y.p, x.len) == 0;
+}
+
+void
+pushlabel(struct labels *dst, struct label lbl)
+{
+	da_foreach (dst, stored) {
+		if (u8eq(stored->sv, lbl.sv)) {
+			die_with_off(filename, lbl.sv.p - baseptr, E_LEXISTS,
+			             U8_PRI_ARGS(lbl.sv));
+		}
+	}
+
+	dapush(dst, lbl);
+}
+
+void
+assemble([[maybe_unused]] FILE *stream, struct ast ast)
+{
+	static struct labels locals, globals;
+
+	da_foreach (&ast, node) {
+		if (node->kind == D_LABEL) {
+			struct label lbl = {
+				.addr = i,
+				.sv = node->name,
+			};
+			pushlabel(node->name.p[0] == '.' ? &locals : &globals, lbl);
+		} else if (node->kind == D_INSTR)
+			i += node->instr.kind == I_DB ? node->instr.len : 2;
+		else
+			unreachable();
+	}
+
+	locals.len = 0;
+}
diff --git a/src/c8asm/assembler.h b/src/c8asm/assembler.h
new file mode 100644
index 0000000..7b37a53
--- /dev/null
+++ b/src/c8asm/assembler.h
@@ -0,0 +1,10 @@
+#ifndef AHOY_C8ASM_ASSEMBLER_H
+#define AHOY_C8ASM_ASSEMBLER_H
+
+#include <stdio.h>
+
+#include "parser.h"
+
+void assemble(FILE *, struct ast);
+
+#endif /* !AHOY_C8ASM_ASSEMBLER_H */
diff --git a/src/c8asm/common.h b/src/c8asm/common.h
new file mode 100644
index 0000000..4f905b3
--- /dev/null
+++ b/src/c8asm/common.h
@@ -0,0 +1,16 @@
+#ifndef AHOY_C8ASM_COMMON_H
+#define AHOY_C8ASM_COMMON_H
+
+#include <mbstring.h>
+
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+
+#define lengthof(a) (sizeof(a) / sizeof(*(a)))
+#define streq(x, y) (!strcmp(x, y))
+
+extern size_t filesize;
+extern const char *filename;
+extern const char8_t *baseptr;
+
+#endif /* !AHOY_C8ASM_COMMON_H */
diff --git a/src/c8asm/grammar.ebnf b/src/c8asm/grammar.ebnf
new file mode 100644
index 0000000..59ba9fc
--- /dev/null
+++ b/src/c8asm/grammar.ebnf
@@ -0,0 +1,46 @@
+program = {line};
+line = {label}, [operation], EOL;
+label = IDENT, ':';
+
+operation = add  | and  | bcd | call
+          | cls  | drw  | hex | jp
+          | ld   | or   | ret | rnd
+          | rstr | se   | shl | shr
+          | sknp | skp  | sne | stor
+          | sub  | subn | sys | xor
+		  | db;
+
+add  = "add", ((vreg, vreg) | (vreg, BYTE) | ("i", vreg));
+and  = "and", vreg, vreg;
+bcd  = "bcd", vreg;
+call = "call", addr;
+cls  = "cls";
+drw  = "drw", vreg, vreg, NIBBL;
+hex  = "hex", vreg;
+jp   = "jp", ["v0"], addr;
+ld   = "ld", ((vreg, (vreg | BYTE | "dt" | "k"))
+            | ("i", addr)
+			| ("dt", vreg)
+			| ("st", vreg));
+or   = "or", vreg, vreg;
+ret  = "ret";
+rnd  = "rnd", vreg, BYTE;
+rstr = "rstr", vreg;
+se   = "se", vreg, (vreg | BYTE);
+shl  = "shl", vreg;
+shr  = "shr", vreg;
+sknp = "sknp", vreg;
+skp  = "skp", vreg;
+sne  = "sne", vreg, (vreg | BYTE);
+stor = "stor", vreg;
+sub  = "sub", vreg, vreg;
+subn = "subn", vreg, vreg;
+sys  = "sys", addr;
+xor  = "xor", vreg, vreg;
+db   = "db", {(BYTE | STRING)};
+
+addr = ADDR | IDENT;
+vreg = "v0" | "v1" | "v2" | "v3"
+     | "v4" | "v5" | "v6" | "v7"
+     | "v8" | "v9" | "va" | "vb"
+     | "vc" | "vd" | "ve" | "vf";
diff --git a/src/c8asm/instr.gperf b/src/c8asm/instr.gperf
new file mode 100644
index 0000000..fac4e57
--- /dev/null
+++ b/src/c8asm/instr.gperf
@@ -0,0 +1,33 @@
+%compare-lengths
+%define initializer-suffix ,nullptr
+%define lookup-function-name oplookup
+%includes
+%readonly-tables
+%struct-type
+struct opf_pair { char *name; void (*pfn)(void); };
+%%
+add,  parseop_add
+and,  parseop_and
+bcd,  parseop_bcd
+call, parseop_call
+cls,  parseop_cls
+db,   parseop_db
+drw,  parseop_drw
+hex,  parseop_hex
+jp,   parseop_jp
+ld,   parseop_ld
+or,   parseop_or
+ret,  parseop_ret
+rnd,  parseop_rnd
+rstr, parseop_rstr
+se,   parseop_se
+shl,  parseop_shl
+shr,  parseop_shr
+sknp, parseop_sknp
+skp,  parseop_skp
+sne,  parseop_sne
+stor, parseop_stor
+sub,  parseop_sub
+subn, parseop_subn
+sys,  parseop_sys
+xor,  parseop_xor
diff --git a/src/c8asm/lexer.c b/src/c8asm/lexer.c
new file mode 100644
index 0000000..effc32e
--- /dev/null
+++ b/src/c8asm/lexer.c
@@ -0,0 +1,204 @@
+#include <da.h>
+#include <mbstring.h>
+#include <rtype.h>
+
+#include "cerr.h"
+#include "common.h"
+#include "lexer.h"
+
+#define ISDIGIT(n)   ((n) >= '0' && (n) <= '9')
+#define U8MOV(sv, n) ((sv)->p += (n), (sv)->len -= (n))
+
+#define E_BASE         "integer with invalid base specifier ‘%.*s’"
+#define E_EXTRA        "unknown extraneous character ‘%.*s’"
+#define E_IDENTCHAR    "illegal character in identifier ‘%.*s’"
+#define E_IDENTLOST    "local label missing identifier"
+#define E_IDENTSCHAR   "illegal first character in identifier ‘%.*s’"
+#define E_UNTERMINATED "unterminated string literal ‘%.*s%.*s’"
+#define E_UTF8         "invalid UTF-8 byte near ‘%02X’"
+
+#define EOLS     U"\n\v\f\r\x85\u2028\u2029"
+#define NUMCHARS U"'0123456789abcdefABCDEF"
+
+static void lexline(struct tokens *, struct u8view *);
+static bool skipws(struct u8view *);
+
+const char *
+tokrepr(tokkind k)
+{
+	return (const char *[]){
+		[T_COLON] = "colon",      [T_EOL] = "end of line",
+		[T_IDENT] = "identifier", [T_NUMBER] = "number",
+		[T_STRING] = "string",
+	}[k];
+}
+
+struct tokens
+lexfile(struct u8view sv)
+{
+	const char8_t *s;
+	struct tokens toks;
+
+	if (s = u8chk(sv.p, sv.len))
+		die_with_off(filename, s - sv.p, E_UTF8, *s);
+
+	dainit(&toks, 256);
+
+	while (sv.len) {
+		size_t len = u8cbspn(sv.p, sv.len, EOLS, lengthof(EOLS) - 1);
+		struct u8view line = {
+			.p = sv.p,
+			.len = len,
+		};
+
+		lexline(&toks, &line);
+
+		/* Skip trailing EOL */
+		if (sv.len > len)
+			len += u8rlen(sv.p + len);
+
+		U8MOV(&sv, len);
+	}
+
+	return toks;
+}
+
+void
+lexline(struct tokens *toks, struct u8view *sv)
+{
+#define die_with_off(...) \
+	die_with_off(filename, sv->p - baseptr - w, __VA_ARGS__);
+
+	struct token tok;
+
+	for (;;) {
+		int w;
+		rune ch;
+
+		if (!skipws(sv))
+			goto end;
+
+		tok.sv.p = sv->p;
+		tok.sv.len = w = u8next(&ch, &sv->p, &sv->len);
+
+		if (ISDIGIT(ch)) {
+			size_t off, m = 10;
+
+			tok.kind = T_NUMBER;
+			tok.base = 10;
+
+			if (ch == '0') {
+				w = u8next(&ch, &sv->p, &sv->len);
+				if (!w || rprop_is_pat_ws(ch)) {
+					sv->p -= w;
+					sv->len += w;
+					goto out;
+				}
+				tok.sv.len++;
+
+				switch (ch) {
+				case 'b':
+					tok.base = m = 2;
+					break;
+				case 'o':
+					tok.base = m = 8;
+					break;
+				case 'd':
+					/* Implicitly base-10 already */
+					break;
+				case 'x':
+					/* m = 22 because A–F can be both upper- or lowercase */
+					tok.base = 16;
+					m = 22;
+					break;
+				default:
+					if (!ISDIGIT(ch))
+						die_with_off(E_BASE, w, sv->p - w);
+				}
+			}
+
+out:
+			/* +1 to support the digit separator */
+			tok.sv.len += off = u8bspn(sv->p, sv->len, NUMCHARS, m + 1);
+			U8MOV(sv, off);
+		} else if (ch == '.' || ch == '_' || rprop_is_xids(ch)) {
+			tok.kind = T_IDENT;
+			if (ch == '.') {
+				if (!sv->len)
+					die_with_off(E_IDENTLOST);
+
+				tok.sv.len += w = u8next(&ch, &sv->p, &sv->len);
+				if (rprop_is_pat_ws(ch))
+					die_with_off(E_IDENTLOST);
+				if (ch != '_' && !rprop_is_xids(ch)) {
+					die_with_off(E_IDENTSCHAR, w, sv->p - w);
+				}
+			}
+
+			while (w = u8next(&ch, &sv->p, &sv->len)) {
+				if (ch == ':' || rprop_is_pat_ws(ch)) {
+					U8MOV(sv, -w);
+					break;
+				}
+				if (!rprop_is_xidc(ch))
+					die_with_off(E_IDENTCHAR, w, sv->p - w);
+
+				tok.sv.len += w;
+			}
+		} else if (ch == '"') {
+			tok.kind = T_STRING;
+			while (w = u8next(&ch, &sv->p, &sv->len)) {
+				tok.sv.len += w;
+				if (ch == '"')
+					goto found;
+			}
+			die_with_off(E_UNTERMINATED, (int)MIN(tok.sv.len, 20), tok.sv.p,
+			             tok.sv.len > 20 ? (int)lengthof(u8"…") - 1 : 0, u8"…");
+found:
+		} else if (ch == ':') {
+			tok.kind = T_COLON;
+		} else if (ch == ';') {
+			goto end;
+		} else {
+			die_with_off(E_EXTRA, w, sv->p - w);
+		}
+
+		/* The colon is the only token that isn’t whitespace separated */
+		if (ch != ':' && sv->len) {
+			w = u8next(&ch, &sv->p, &sv->len);
+			if (!w || !rprop_is_pat_ws(ch))
+				die_with_off(E_EXTRA, w, sv->p - w);
+		}
+
+		dapush(toks, tok);
+	}
+
+end:;
+	tok = (struct token){
+		.kind = T_EOL,
+		.sv.p = sv->p,
+		.sv.len = 0,
+	};
+	dapush(toks, tok);
+
+#undef die_with_off
+}
+
+bool
+skipws(struct u8view *sv)
+{
+	rune ch;
+
+	if (!sv->len)
+		return false;
+
+	for (int w = u8tor_uc(&ch, sv->p); rprop_is_pat_ws(ch);
+	     w = u8tor_uc(&ch, sv->p))
+	{
+		U8MOV(sv, w);
+		if (!sv->len)
+			return false;
+	}
+
+	return true;
+}
diff --git a/src/c8asm/lexer.h b/src/c8asm/lexer.h
new file mode 100644
index 0000000..ef20cef
--- /dev/null
+++ b/src/c8asm/lexer.h
@@ -0,0 +1,28 @@
+#ifndef AHOY_C8ASM_LEXER_H
+#define AHOY_C8ASM_LEXER_H
+
+#include <mbstring.h>
+
+typedef enum [[clang::flag_enum]] {
+	T_COLON = 1 << 0,
+	T_EOL = 1 << 1,
+	T_IDENT = 1 << 2,
+	T_NUMBER = 1 << 3,
+	T_STRING = 1 << 4,
+} tokkind;
+
+struct token {
+	tokkind kind;
+	struct u8view sv;
+	int base; /* For number literals */
+};
+
+struct tokens {
+	struct token *buf;
+	size_t len, cap;
+};
+
+const char *tokrepr(tokkind);
+struct tokens lexfile(struct u8view);
+
+#endif /* !AHOY_C8ASM_LEXER_H */
diff --git a/src/c8asm/lookup.h b/src/c8asm/lookup.h
new file mode 100644
index 0000000..26bc141
--- /dev/null
+++ b/src/c8asm/lookup.h
@@ -0,0 +1,190 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf --output-file src/c8asm/lookup.h src/c8asm/instr.gperf  */
+/* Computed positions: -k'1-3' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
+#endif
+
+#line 7 "src/c8asm/instr.gperf"
+struct opf_pair { char *name; void (*pfn)(void); };
+#include <string.h>
+
+#define TOTAL_KEYWORDS 25
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 4
+#define MIN_HASH_VALUE 2
+#define MAX_HASH_VALUE 49
+/* maximum key range = 48, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (register const char *str, register size_t len)
+{
+  static const unsigned char asso_values[] =
+    {
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50,  5, 15, 15,
+      10,  0, 50, 50, 10, 50,  0,  5, 20, 50,
+       5,  5, 18, 50,  0,  0,  0, 30, 50,  8,
+       3,  3, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+      50, 50, 50, 50, 50, 50
+    };
+  register unsigned int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[2]];
+      /*FALLTHROUGH*/
+      case 2:
+        hval += asso_values[(unsigned char)str[1]];
+      /*FALLTHROUGH*/
+      case 1:
+        hval += asso_values[(unsigned char)str[0]];
+        break;
+    }
+  return hval;
+}
+
+const struct opf_pair *
+oplookup (register const char *str, register size_t len)
+{
+  static const unsigned char lengthtable[] =
+    {
+       0,  0,  2,  3,  4,  0,  3,  2,  3,  4,  0,  3,  0,  3,
+       4,  0,  3,  0,  3,  0,  2,  3,  0,  3,  0,  0,  3,  2,
+       3,  0,  0,  0,  2,  3,  0,  0,  0,  0,  3,  0,  0,  0,
+       0,  3,  4,  0,  0,  0,  3,  4
+    };
+  static const struct opf_pair wordlist[] =
+    {
+      {"",nullptr}, {"",nullptr},
+#line 23 "src/c8asm/instr.gperf"
+      {"se",   parseop_se},
+#line 20 "src/c8asm/instr.gperf"
+      {"ret",  parseop_ret},
+#line 22 "src/c8asm/instr.gperf"
+      {"rstr", parseop_rstr},
+      {"",nullptr},
+#line 32 "src/c8asm/instr.gperf"
+      {"sys",  parseop_sys},
+#line 19 "src/c8asm/instr.gperf"
+      {"or",   parseop_or},
+#line 28 "src/c8asm/instr.gperf"
+      {"sne",  parseop_sne},
+#line 29 "src/c8asm/instr.gperf"
+      {"stor", parseop_stor},
+      {"",nullptr},
+#line 33 "src/c8asm/instr.gperf"
+      {"xor",  parseop_xor},
+      {"",nullptr},
+#line 25 "src/c8asm/instr.gperf"
+      {"shr",  parseop_shr},
+#line 26 "src/c8asm/instr.gperf"
+      {"sknp", parseop_sknp},
+      {"",nullptr},
+#line 16 "src/c8asm/instr.gperf"
+      {"hex",  parseop_hex},
+      {"",nullptr},
+#line 21 "src/c8asm/instr.gperf"
+      {"rnd",  parseop_rnd},
+      {"",nullptr},
+#line 17 "src/c8asm/instr.gperf"
+      {"jp",   parseop_jp},
+#line 15 "src/c8asm/instr.gperf"
+      {"drw",  parseop_drw},
+      {"",nullptr},
+#line 10 "src/c8asm/instr.gperf"
+      {"and",  parseop_and},
+      {"",nullptr}, {"",nullptr},
+#line 27 "src/c8asm/instr.gperf"
+      {"skp",  parseop_skp},
+#line 14 "src/c8asm/instr.gperf"
+      {"db",   parseop_db},
+#line 9 "src/c8asm/instr.gperf"
+      {"add",  parseop_add},
+      {"",nullptr}, {"",nullptr}, {"",nullptr},
+#line 18 "src/c8asm/instr.gperf"
+      {"ld",   parseop_ld},
+#line 24 "src/c8asm/instr.gperf"
+      {"shl",  parseop_shl},
+      {"",nullptr}, {"",nullptr}, {"",nullptr}, {"",nullptr},
+#line 13 "src/c8asm/instr.gperf"
+      {"cls",  parseop_cls},
+      {"",nullptr}, {"",nullptr}, {"",nullptr}, {"",nullptr},
+#line 11 "src/c8asm/instr.gperf"
+      {"bcd",  parseop_bcd},
+#line 12 "src/c8asm/instr.gperf"
+      {"call", parseop_call},
+      {"",nullptr}, {"",nullptr}, {"",nullptr},
+#line 30 "src/c8asm/instr.gperf"
+      {"sub",  parseop_sub},
+#line 31 "src/c8asm/instr.gperf"
+      {"subn", parseop_subn}
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register unsigned int key = hash (str, len);
+
+      if (key <= MAX_HASH_VALUE)
+        if (len == lengthtable[key])
+          {
+            register const char *s = wordlist[key].name;
+
+            if (*str == *s && !memcmp (str + 1, s + 1, len - 1))
+              return &wordlist[key];
+          }
+    }
+  return 0;
+}
diff --git a/src/c8asm/main.c b/src/c8asm/main.c
new file mode 100644
index 0000000..edc595b
--- /dev/null
+++ b/src/c8asm/main.c
@@ -0,0 +1,109 @@
+#include <sys/stat.h>
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <builder.h>
+#include <da.h>
+
+#include "assembler.h"
+#include "cerr.h"
+#include "common.h"
+#include "lexer.h"
+#include "parser.h"
+
+static void asmfile(int, const char *);
+
+size_t filesize;
+const char *filename;
+const char8_t *baseptr;
+
+int
+main(int argc, char **argv)
+{
+	int opt;
+	const struct option longopts[] = {
+		{"help",  no_argument, nullptr, 'h'},
+		{nullptr, no_argument, nullptr, 0  },
+	};
+
+	cerrinit(*argv);
+	while ((opt = getopt_long(argc, argv, "h", longopts, nullptr)) != -1) {
+		switch (opt) {
+		case 'h':
+			execlp("man", "man", "1", argv[0], nullptr);
+			die("execlp: man 1 %s", argv[0]);
+		default:
+			fprintf(stderr, "Usage: %s [file ...]\n", argv[0]);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (!argc)
+		asmfile(STDIN_FILENO, "-");
+	for (int i = 0; i < argc; i++) {
+		if (streq("-", argv[i]))
+			asmfile(STDIN_FILENO, "-");
+		else {
+			int fd;
+			if ((fd = open(argv[i], O_RDONLY)) == -1)
+				die("open: %s", argv[i]);
+			asmfile(fd, argv[i]);
+			close(fd);
+		}
+	}
+
+	return EXIT_SUCCESS;
+}
+
+void
+asmfile(int fd, const char *fn)
+{
+	char *buf;
+	size_t blksize;
+	ssize_t nr;
+	struct ast ast;
+	struct stat st;
+	struct u8str sb;
+	struct tokens toks;
+
+	filename = fn;
+
+	if (fstat(fd, &st) == -1)
+		die("fstat: %s", filename);
+	blksize = MAX(st.st_blksize, BUFSIZ);
+	if (!(buf = malloc(blksize)))
+		die("malloc");
+
+	/* Load the contents of the file into sb */
+	u8strinit(&sb, S_ISREG(st.st_mode) ? (size_t)st.st_size : blksize);
+	while ((nr = read(fd, buf, blksize)) > 0) {
+		struct u8view v = {
+			.p = buf,
+			.len = nr,
+		};
+		if (!u8strpush(&sb, v))
+			die("u8strpush");
+	}
+	if (nr == -1)
+		die("read: %s", filename);
+
+	free(buf);
+	filesize = sb.len;
+	baseptr = u8strfit(&sb)->p;
+	assemble(stdout, ast = parsefile(toks = lexfile(u8strtou8(sb))));
+
+	da_foreach (&ast, node) {
+		if (node->kind == D_INSTR && node->instr.kind == I_DB)
+			free(node->instr.buf);
+	}
+
+	free(toks.buf);
+	u8strfree(sb);
+}
diff --git a/src/c8asm/parser.c b/src/c8asm/parser.c
new file mode 100644
index 0000000..b746bea
--- /dev/null
+++ b/src/c8asm/parser.c
@@ -0,0 +1,627 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <da.h>
+
+#include "cerr.h"
+#include "common.h"
+#include "lexer.h"
+#include "parser.h"
+
+/* TODO: Remove */
+#ifndef unreachable
+#	define unreachable() __builtin_unreachable()
+#endif
+
+#define E_BADLABEL  "identifier ‘%.*s’ cannot be used as a label"
+#define E_EARLY     "expected %s but input ended prematurely"
+#define E_EXPECTED2 "expected %s but got %s"
+#define E_EXPECTED  "expected %s but got %s ‘%.*s’"
+#define E_INSTR     "got unknown instruction ‘%.*s’"
+#define E_TOOLARGE  "expected %s but got out-of-range integer ‘%.*s’"
+
+#define die_with_off(P, ...) die_with_off(filename, (P)-baseptr, __VA_ARGS__)
+#define memeq(X, Y, N)       (!memcmp(X, Y, N))
+
+enum numsize {
+	NS_NIBBLE = 0xF,
+	NS_BYTE = 0xFF,
+	NS_ADDR = 0xFFF,
+};
+
+enum regtype {
+	RT_NONE,
+	RT_DT,
+	RT_I,
+	RT_K,
+	RT_ST,
+	RT_VX,
+};
+
+static bool parselabel(void);
+static void parseline(void);
+static void parseop(void);
+static struct raw_addr parseaddr(struct token);
+static struct token reqnext(const char *, tokkind);
+
+static uint16_t hexval(char);
+static uint16_t parsenum(struct token, enum numsize);
+static enum regtype regtype(struct u8view);
+
+static void parseop_add(void), parseop_and(void), parseop_bcd(void),
+	parseop_call(void), parseop_cls(void), parseop_db(void), parseop_drw(void),
+	parseop_hex(void), parseop_jp(void), parseop_ld(void), parseop_or(void),
+	parseop_ret(void), parseop_rnd(void), parseop_rstr(void), parseop_se(void),
+	parseop_shl(void), parseop_shr(void), parseop_sknp(void), parseop_skp(void),
+	parseop_sne(void), parseop_stor(void), parseop_sub(void),
+	parseop_subn(void), parseop_sys(void), parseop_xor(void);
+#include "lookup.h"
+
+static size_t i;
+static struct ast ast;
+static struct tokens *tokens;
+
+struct ast
+parsefile(struct tokens toks)
+{
+	ast.len = i = 0;
+	tokens = &toks;
+
+	while (i < toks.len)
+		parseline();
+
+	/* We can safely not do this, but GCC disagrees.  This gets GCC to not
+	   complain about dangling pointers. */
+	tokens = nullptr;
+
+	return ast;
+}
+
+void
+parseline(void)
+{
+	while (parselabel())
+		;
+	parseop();
+	reqnext("end of line", T_EOL);
+}
+
+bool
+parselabel(void)
+{
+	if (tokens->len - i >= 2 && tokens->buf[i].kind == T_IDENT
+	    && tokens->buf[i + 1].kind == T_COLON)
+	{
+		struct dir lbl = {
+			.kind = D_LABEL,
+			.name = tokens->buf[i].sv,
+		};
+		if (regtype(lbl.name) != RT_NONE)
+			die_with_off(lbl.name.p, E_BADLABEL, U8_PRI_ARGS(lbl.name));
+		dapush(&ast, lbl);
+		i += 2;
+		return true;
+	}
+
+	return false;
+}
+
+void
+parseop(void)
+{
+	const struct opf_pair *op;
+	struct token tok = reqnext("instruction or end of line", T_IDENT | T_EOL);
+
+	if (tok.kind == T_EOL) {
+		i--;
+		return;
+	}
+
+	if (!(op = oplookup(tok.sv.p, tok.sv.len)))
+		die_with_off(tok.sv.p, E_INSTR, U8_PRI_ARGS(tok.sv));
+	op->pfn();
+}
+
+struct raw_addr
+parseaddr(struct token tok)
+{
+	if (tok.kind == T_NUMBER)
+		return (struct raw_addr){.val = parsenum(tok, NS_ADDR)};
+	if (tok.kind == T_STRING) {
+		if (regtype(tok.sv) != RT_NONE)
+			die_with_off(tok.sv.p, E_BADLABEL, U8_PRI_ARGS(tok.sv));
+		return (struct raw_addr){.label = true, .sv = tok.sv};
+	}
+	unreachable();
+}
+
+enum regtype
+regtype(struct u8view v)
+{
+	if (v.len == 0 || v.len > 2)
+		return RT_NONE;
+	if (v.len == 1)
+		return v.p[0] == 'i' ? RT_I : v.p[0] == 'k' ? RT_K : RT_NONE;
+	if (memeq(v.p, "dt", 2))
+		return RT_DT;
+	if (memeq(v.p, "st", 2))
+		return RT_ST;
+	return v.p[0] == 'v'
+	            && ((v.p[1] >= '0' && v.p[1] <= '9')
+	                || (v.p[1] >= 'a' && v.p[1] <= 'f'))
+	         ? RT_VX
+	         : RT_NONE;
+}
+
+uint16_t
+hexval(char ch)
+{
+	return ch >= '0' && ch <= '9' ? ch - '0'
+	     : ch >= 'a' && ch <= 'f' ? ch - 'a' + 10
+	                              : (unreachable(), 0);
+}
+
+uint16_t
+parsenum(struct token tok, enum numsize size)
+{
+	char ch;
+	uint16_t acc, cutoff, cutlim;
+	struct u8view v = tok.sv;
+
+	acc = 0;
+	cutoff = size;
+	cutlim = cutoff % tok.base;
+	cutoff /= tok.base;
+
+	if (v.len >= 2 && v.p[0] == '0' && v.p[1] > '9') {
+		v.p += 2;
+		v.len -= 2;
+	}
+
+	for (ch = *v.p; v.len; v.p++, v.len--, ch = *v.p) {
+		if (ch == '\'')
+			continue;
+		else if (ch >= '0' && ch <= '9')
+			ch -= '0';
+		else if (ch >= 'a' && ch <= 'f')
+			ch -= 'a' - 10;
+		else if (ch >= 'A' && ch <= 'F')
+			ch -= 'A' - 10;
+		else
+			unreachable();
+
+		if (acc > cutoff || (acc == cutoff && ch > cutlim)) {
+			const char *s = size == NS_NIBBLE ? "nibble"
+			              : size == NS_BYTE   ? "byte"
+			              : size == NS_ADDR   ? "address"
+			                                  : (unreachable(), nullptr);
+			die_with_off(tok.sv.p, E_TOOLARGE, s, U8_PRI_ARGS(tok.sv));
+		}
+
+		acc *= tok.base;
+		acc += ch;
+	}
+
+	return acc;
+}
+
+struct token
+reqnext(const char *want, tokkind msk)
+{
+	struct token t;
+	if (i >= tokens->len)
+		die_with_off(baseptr + filesize - 1, E_EARLY, want);
+
+	if ((t = tokens->buf[i++]).kind & msk)
+		return t;
+	if (t.kind == T_EOL)
+		die_with_off(t.sv.p, E_EXPECTED2, want, tokrepr(t.kind));
+	die_with_off(t.sv.p, E_EXPECTED, want, tokrepr(t.kind), U8_PRI_ARGS(t.sv));
+}
+
+#define I(...) ((struct dir){.kind = D_INSTR, .instr = (__VA_ARGS__)})
+
+/* Common implementations of instructions that always take 1 or 2 v-registers */
+#define ONE_VREG(T) \
+	do { \
+		struct instr ins = {.kind = (T)}; \
+		struct token tok = reqnext("v-register", T_IDENT); \
+		if (regtype(tok.sv) & ~RT_VX) { \
+			die_with_off(tok.sv.p, E_EXPECTED, "v-register", \
+			             tokrepr(tok.kind), U8_PRI_ARGS(tok.sv)); \
+		} \
+		ins.args[ins.len++].val = hexval(tok.sv.p[1]); \
+		dapush(&ast, I(ins)); \
+	} while (false)
+#define TWO_VREG(T) \
+	do { \
+		struct instr ins = {.kind = (T)}; \
+		struct token lhs = reqnext("v-register", T_IDENT); \
+		struct token rhs = reqnext("v-register", T_IDENT); \
+		if (regtype(lhs.sv) & ~RT_VX) { \
+			die_with_off(lhs.sv.p, E_EXPECTED, "v-register", \
+			             tokrepr(lhs.kind), U8_PRI_ARGS(lhs.sv)); \
+		} \
+		if (regtype(rhs.sv) & ~RT_VX) { \
+			die_with_off(rhs.sv.p, E_EXPECTED, "v-register", \
+			             tokrepr(rhs.kind), U8_PRI_ARGS(rhs.sv)); \
+		} \
+		ins.args[ins.len++].val = hexval(lhs.sv.p[1]); \
+		ins.args[ins.len++].val = hexval(rhs.sv.p[1]); \
+		dapush(&ast, I(ins)); \
+	} while (false)
+
+void
+parseop_add(void)
+{
+	enum regtype rt;
+	struct instr ins = {};
+	struct token tok = reqnext("v- or i-register", T_IDENT);
+
+	switch (rt = regtype(tok.sv)) {
+	case RT_VX:
+		ins.args[ins.len++].val = hexval(tok.sv.p[1]);
+		tok = reqnext("byte or v-register", T_IDENT | T_NUMBER);
+
+		if (tok.kind == T_NUMBER) {
+			ins.kind = I_ADD_VX_B;
+			ins.args[ins.len++].val = parsenum(tok, NS_BYTE);
+		} else if (regtype(tok.sv) != RT_VX) {
+			die_with_off(tok.sv.p, E_EXPECTED, "v-register", tokrepr(tok.kind),
+			             U8_PRI_ARGS(tok.sv));
+		} else {
+			ins.kind = I_ADD_VX_VY;
+			ins.args[ins.len++].val = hexval(tok.sv.p[1]);
+		}
+		break;
+	case RT_I:
+		ins.kind = I_ADD_I_VX;
+		tok = reqnext("v-register", T_IDENT);
+		if (regtype(tok.sv) != RT_VX) {
+			die_with_off(tok.sv.p, E_EXPECTED, "v-register", tokrepr(tok.kind),
+			             U8_PRI_ARGS(tok.sv));
+		}
+		ins.args[ins.len++].val = hexval(tok.sv.p[1]);
+		break;
+	default:
+		die_with_off(tok.sv.p, E_EXPECTED, "v- or i-register",
+		             tokrepr(tok.kind), U8_PRI_ARGS(tok.sv));
+	}
+
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_and(void)
+{
+	TWO_VREG(I_AND);
+}
+
+void
+parseop_bcd(void)
+{
+	ONE_VREG(I_BCD);
+}
+
+void
+parseop_call(void)
+{
+	struct instr ins = {.kind = I_CALL};
+	struct token tok = reqnext("address", T_IDENT | T_NUMBER);
+	ins.args[ins.len++] = parseaddr(tok);
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_cls(void)
+{
+	dapush(&ast, I((struct instr){.kind = I_CLS}));
+}
+
+void
+parseop_db(void)
+{
+	struct instr ins = {.kind = I_DB};
+	do {
+		struct token tok = reqnext("byte or string", T_NUMBER | T_STRING);
+		switch (tok.kind) {
+		case T_NUMBER:
+			dapush(&ins, parsenum(tok, NS_BYTE));
+			break;
+		case T_STRING:
+			for (size_t i = 1; i < tok.sv.len - 1; i++)
+				dapush(&ins, tok.sv.p[i]);
+			break;
+		default:
+			unreachable();
+		}
+	} while (i < tokens->len && tokens->buf[i].kind != T_EOL);
+
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_drw(void)
+{
+	struct instr ins = {.kind = I_DRW};
+	struct token op1, op2, op3;
+
+	op1 = reqnext("v-register", T_IDENT);
+	op2 = reqnext("v-register", T_IDENT);
+	op3 = reqnext("nibble", T_NUMBER);
+
+	if (regtype(op1.sv) != RT_VX) {
+		die_with_off(op1.sv.p, E_EXPECTED, "v-register", tokrepr(op1.kind),
+		             U8_PRI_ARGS(op1.sv));
+	}
+	if (regtype(op2.sv) != RT_VX) {
+		die_with_off(op2.sv.p, E_EXPECTED, "v-register", tokrepr(op2.kind),
+		             U8_PRI_ARGS(op2.sv));
+	}
+
+	ins.args[ins.len++].val = hexval(op1.sv.p[1]);
+	ins.args[ins.len++].val = hexval(op2.sv.p[1]);
+	ins.args[ins.len++].val = parsenum(op3, NS_NIBBLE);
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_hex(void)
+{
+	ONE_VREG(I_HEX);
+}
+
+void
+parseop_jp(void)
+{
+	enum regtype rt;
+	struct instr ins = {};
+	struct token op = reqnext("v0-register or address", T_IDENT | T_NUMBER);
+
+	if (op.kind == T_IDENT)
+		rt = regtype(op.sv);
+	if (op.kind == T_NUMBER || (op.kind == T_IDENT && rt == RT_NONE)) {
+		ins.kind = I_JP_ADDR;
+		ins.args[ins.len++] = parseaddr(op);
+	} else if (op.kind == T_IDENT) {
+		ins.kind = I_JP_V0_ADDR;
+		if (op.sv.len != 2 || !memeq(op.sv.p, "v0", 2)) {
+			die_with_off(op.sv.p, E_EXPECTED, "v0-register or address",
+			             tokrepr(op.kind), U8_PRI_ARGS(op.sv));
+		}
+		ins.args[ins.len++] = parseaddr(reqnext("address", T_NUMBER | T_IDENT));
+	} else
+		unreachable();
+
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_ld(void)
+{
+	enum regtype rt;
+	struct instr ins = {};
+	struct token op = reqnext("v-, i-, dt-, or st-register", T_IDENT);
+
+	switch (rt = regtype(op.sv)) {
+	case RT_DT:
+	case RT_ST:
+		ins.kind = rt == RT_DT ? I_LD_DT : I_LD_ST;
+		op = reqnext("v-register", T_IDENT);
+		if (regtype(op.sv) != RT_VX) {
+			die_with_off(op.sv.p, E_EXPECTED, "v-register", tokrepr(op.kind),
+			             U8_PRI_ARGS(op.sv));
+		}
+		ins.args[ins.len++].val = hexval(op.sv.p[1]);
+		break;
+
+	case RT_I:
+		ins.kind = I_LD_I;
+		ins.args[ins.len++] = parseaddr(reqnext("address", T_NUMBER | T_IDENT));
+		break;
+
+	case RT_VX:
+		ins.args[ins.len++].val = hexval(op.sv.p[1]);
+		op = reqnext("v-, k-, or dt-register, or byte", T_IDENT | T_NUMBER);
+
+		switch (op.kind) {
+		case T_IDENT:
+			switch (rt = regtype(op.sv)) {
+			case RT_DT:
+				ins.kind = I_LD_VX_DT;
+				break;
+			case RT_K:
+				ins.kind = I_LD_VX_K;
+				break;
+			case RT_VX:
+				ins.kind = I_LD_VX_VY;
+				ins.args[ins.len++].val = hexval(op.sv.p[1]);
+				break;
+			default:
+				die_with_off(op.sv.p, E_EXPECTED,
+				             "v-, k-, or dt-register, or byte",
+				             tokrepr(op.kind), U8_PRI_ARGS(op.sv));
+			}
+
+			break;
+		case T_NUMBER:
+			ins.kind = I_LD_VX_BYTE;
+			ins.args[ins.len++].val = parsenum(op, NS_BYTE);
+			break;
+		default:
+			unreachable();
+		}
+		break;
+
+	default:
+		die_with_off(op.sv.p, E_EXPECTED, "v-, i-, dt-, or st-register",
+		             tokrepr(op.kind), U8_PRI_ARGS(op.sv));
+	}
+
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_or(void)
+{
+	TWO_VREG(I_OR);
+}
+
+void
+parseop_ret(void)
+{
+	dapush(&ast, I((struct instr){.kind = I_RET}));
+}
+
+void
+parseop_rnd(void)
+{
+	struct instr ins = {.kind = I_RND};
+	struct token op1, op2;
+
+	op1 = reqnext("v-register", T_IDENT);
+	op2 = reqnext("byte", T_NUMBER);
+
+	if (regtype(op1.sv) != RT_VX) {
+		die_with_off(op1.sv.p, E_EXPECTED, "v-register", tokrepr(op1.kind),
+		             U8_PRI_ARGS(op1.sv));
+	}
+
+	ins.args[ins.len++].val = hexval(op1.sv.p[1]);
+	ins.args[ins.len++].val = parsenum(op2, NS_BYTE);
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_rstr(void)
+{
+	ONE_VREG(I_RSTR);
+}
+
+void
+parseop_se(void)
+{
+	struct instr ins = {};
+	struct token op1, op2;
+
+	op1 = reqnext("v-register", T_IDENT);
+	op2 = reqnext("byte or v-register", T_IDENT | T_NUMBER);
+
+	if (regtype(op1.sv) != RT_VX) {
+		die_with_off(op1.sv.p, E_EXPECTED, "v-register", tokrepr(op1.kind),
+		             U8_PRI_ARGS(op1.sv));
+	}
+	ins.args[ins.len++].val = hexval(op1.sv.p[1]);
+
+	switch (op2.kind) {
+	case T_IDENT:
+		if (regtype(op2.sv) != RT_VX) {
+			die_with_off(op2.sv.p, E_EXPECTED, "v-register", tokrepr(op2.kind),
+			             U8_PRI_ARGS(op2.sv));
+		}
+		ins.kind = I_SE_VX_VY;
+		ins.args[ins.len++].val = hexval(op2.sv.p[1]);
+		break;
+	case T_NUMBER:
+		ins.kind = I_SE_VX_B;
+		ins.args[ins.len++].val = parsenum(op2, NS_BYTE);
+		break;
+	default:
+		unreachable();
+	}
+
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_shl(void)
+{
+	ONE_VREG(I_SHL);
+}
+
+void
+parseop_shr(void)
+{
+	ONE_VREG(I_SHR);
+}
+
+void
+parseop_sknp(void)
+{
+	ONE_VREG(I_SKNP);
+}
+
+void
+parseop_skp(void)
+{
+	ONE_VREG(I_SKP);
+}
+
+void
+parseop_sne(void)
+{
+	struct instr ins = {};
+	struct token op1, op2;
+
+	op1 = reqnext("v-register", T_IDENT);
+	op2 = reqnext("byte or v-register", T_IDENT | T_NUMBER);
+
+	if (regtype(op1.sv) != RT_VX) {
+		die_with_off(op1.sv.p, E_EXPECTED, "v-register", tokrepr(op1.kind),
+		             U8_PRI_ARGS(op1.sv));
+	}
+	ins.args[ins.len++].val = hexval(op1.sv.p[1]);
+
+	switch (op2.kind) {
+	case T_IDENT:
+		if (regtype(op2.sv) != RT_VX) {
+			die_with_off(op2.sv.p, E_EXPECTED, "v-register", tokrepr(op2.kind),
+			             U8_PRI_ARGS(op2.sv));
+		}
+		ins.kind = I_SNE_VX_VY;
+		ins.args[ins.len++].val = hexval(op2.sv.p[1]);
+		break;
+	case T_NUMBER:
+		ins.kind = I_SNE_VX_B;
+		ins.args[ins.len++].val = parsenum(op2, NS_BYTE);
+		break;
+	default:
+		unreachable();
+	}
+
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_stor(void)
+{
+	ONE_VREG(I_STOR);
+}
+
+void
+parseop_sub(void)
+{
+	TWO_VREG(I_SUB);
+}
+
+void
+parseop_subn(void)
+{
+	TWO_VREG(I_SUBN);
+}
+
+void
+parseop_sys(void)
+{
+	struct instr ins = {.kind = I_SYS};
+	ins.args[ins.len++] = parseaddr(reqnext("address", T_NUMBER | T_IDENT));
+	dapush(&ast, I(ins));
+}
+
+void
+parseop_xor(void)
+{
+	TWO_VREG(I_XOR);
+}
diff --git a/src/c8asm/parser.h b/src/c8asm/parser.h
new file mode 100644
index 0000000..392b003
--- /dev/null
+++ b/src/c8asm/parser.h
@@ -0,0 +1,122 @@
+#ifndef AHOY_C8ASM_PARSER_H
+#define AHOY_C8ASM_PARSER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <mbstring.h>
+
+struct tokens;
+
+typedef enum {
+	I_ADD_I_VX,
+	I_ADD_VX_B,
+	I_ADD_VX_VY,
+	I_AND,
+	I_BCD,
+	I_CALL,
+	I_CLS,
+	I_DB,
+	I_DRW,
+	I_HEX,
+	I_JP_ADDR,
+	I_JP_V0_ADDR,
+	I_LD_DT,
+	I_LD_I,
+	I_LD_ST,
+	I_LD_VX_BYTE,
+	I_LD_VX_DT,
+	I_LD_VX_K,
+	I_LD_VX_VY,
+	I_OR,
+	I_RET,
+	I_RND,
+	I_RSTR,
+	I_SE_VX_B,
+	I_SE_VX_VY,
+	I_SHL,
+	I_SHR,
+	I_SKNP,
+	I_SKP,
+	I_SNE_VX_B,
+	I_SNE_VX_VY,
+	I_STOR,
+	I_SUB,
+	I_SUBN,
+	I_SYS,
+	I_XOR,
+} instrkind;
+
+typedef enum {
+	R_V0,
+	R_V1,
+	R_V2,
+	R_V3,
+	R_V4,
+	R_V5,
+	R_V6,
+	R_V7,
+	R_V8,
+	R_V9,
+	R_VA,
+	R_VB,
+	R_VC,
+	R_VD,
+	R_VE,
+	R_VF,
+	R_I,
+	R_K,
+	R_DT,
+	R_ST,
+} reg;
+
+typedef enum {
+	D_INSTR,
+	D_LABEL,
+} dirkind;
+
+/* Arguments can always be represented by a uint16_t, however the parser is not
+   responsible for assigning addresses to labels.  As a result an arg at this
+   stage can be either a uint16_t or the name of a label. */
+struct raw_addr {
+	bool label;
+	union {
+		uint16_t val;
+		struct u8view sv;
+	};
+};
+
+struct instr {
+	instrkind kind;
+
+	/* The most arguments any instruction can take is 3, so it’s more efficient
+	   to just store the arguments in a fixed-size array.  The only exception is
+	   the ‘db’ instruction which takes a variable-number of arguments, so in
+	   that case we use a dynamic array. */
+	union {
+		struct raw_addr args[3];
+		struct {
+			uint8_t *buf;
+			size_t cap;
+		};
+	};
+
+	size_t len;
+};
+
+struct dir {
+	dirkind kind;
+	union {
+		struct u8view name;
+		struct instr instr;
+	};
+};
+
+struct ast {
+	struct dir *buf;
+	size_t len, cap;
+};
+
+struct ast parsefile(struct tokens);
+
+#endif /* !AHOY_C8ASM_PARSER_H */
-- 
cgit v1.2.3