aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-02-13 13:02:28 +0100
committerThomas Voss <mail@thomasvoss.com> 2024-02-13 13:11:47 +0100
commit79e6af86ca526d5fb56af6f6ca3da713e3a5e9f9 (patch)
tree752f1c26d1f122dcf58374ac78db109c9578be45 /src
Genesis commit
Diffstat (limited to 'src')
-rw-r--r--src/NOTES7
-rw-r--r--src/c8asm/assembler.c74
-rw-r--r--src/c8asm/assembler.h10
-rw-r--r--src/c8asm/common.h16
-rw-r--r--src/c8asm/grammar.ebnf46
-rw-r--r--src/c8asm/instr.gperf33
-rw-r--r--src/c8asm/lexer.c204
-rw-r--r--src/c8asm/lexer.h28
-rw-r--r--src/c8asm/lookup.h190
-rw-r--r--src/c8asm/main.c109
-rw-r--r--src/c8asm/parser.c627
-rw-r--r--src/c8asm/parser.h122
-rw-r--r--src/common/cerr.c73
-rw-r--r--src/common/cerr.h19
14 files changed, 1558 insertions, 0 deletions
diff --git a/src/NOTES b/src/NOTES
new file mode 100644
index 0000000..d155ab1
--- /dev/null
+++ b/src/NOTES
@@ -0,0 +1,7 @@
+The shl and shr instructions are often documented to take both a Vx and a
+Vy argument. It’s a bit unclear what the behaviour here is meant to be.
+The original CHIP-8 implementation shifted Vy by 1 and stored the result
+in Vx. Some GitHub projects shift the value in Vx by the value in Vy.
+Ahoy takes the approach of SCHIP where Vx is shifted by 1 and Vy is
+ignored. The Ahoy assembler does not recognize a second Vy argument to
+either instruction.
diff --git a/src/c8asm/assembler.c b/src/c8asm/assembler.c
new file mode 100644
index 0000000..0d02555
--- /dev/null
+++ b/src/c8asm/assembler.c
@@ -0,0 +1,74 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <da.h>
+#include <mbstring.h>
+
+#include "assembler.h"
+#include "cerr.h"
+#include "common.h"
+#include "parser.h"
+
+/* TODO: Remove */
+#ifndef unreachable
+# define unreachable() __builtin_unreachable()
+#endif
+
+#define E_LEXISTS "label ‘%.*s’ has already been declared"
+
+struct label {
+ uint16_t addr;
+ struct u8view sv;
+};
+
+struct labels {
+ struct label *buf;
+ size_t len, cap;
+};
+
+static bool u8eq(struct u8view, struct u8view);
+static void pushlabel(struct labels *, struct label);
+
+static size_t i;
+
+bool
+u8eq(struct u8view x, struct u8view y)
+{
+ return x.len == y.len && memcmp(x.p, y.p, x.len) == 0;
+}
+
+void
+pushlabel(struct labels *dst, struct label lbl)
+{
+ da_foreach (dst, stored) {
+ if (u8eq(stored->sv, lbl.sv)) {
+ die_with_off(filename, lbl.sv.p - baseptr, E_LEXISTS,
+ U8_PRI_ARGS(lbl.sv));
+ }
+ }
+
+ dapush(dst, lbl);
+}
+
+void
+assemble([[maybe_unused]] FILE *stream, struct ast ast)
+{
+ static struct labels locals, globals;
+
+ da_foreach (&ast, node) {
+ if (node->kind == D_LABEL) {
+ struct label lbl = {
+ .addr = i,
+ .sv = node->name,
+ };
+ pushlabel(node->name.p[0] == '.' ? &locals : &globals, lbl);
+ } else if (node->kind == D_INSTR)
+ i += node->instr.kind == I_DB ? node->instr.len : 2;
+ else
+ unreachable();
+ }
+
+ locals.len = 0;
+}
diff --git a/src/c8asm/assembler.h b/src/c8asm/assembler.h
new file mode 100644
index 0000000..7b37a53
--- /dev/null
+++ b/src/c8asm/assembler.h
@@ -0,0 +1,10 @@
+#ifndef AHOY_C8ASM_ASSEMBLER_H
+#define AHOY_C8ASM_ASSEMBLER_H
+
+#include <stdio.h>
+
+#include "parser.h"
+
+void assemble(FILE *, struct ast);
+
+#endif /* !AHOY_C8ASM_ASSEMBLER_H */
diff --git a/src/c8asm/common.h b/src/c8asm/common.h
new file mode 100644
index 0000000..4f905b3
--- /dev/null
+++ b/src/c8asm/common.h
@@ -0,0 +1,16 @@
+#ifndef AHOY_C8ASM_COMMON_H
+#define AHOY_C8ASM_COMMON_H
+
+#include <mbstring.h>
+
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+
+#define lengthof(a) (sizeof(a) / sizeof(*(a)))
+#define streq(x, y) (!strcmp(x, y))
+
+extern size_t filesize;
+extern const char *filename;
+extern const char8_t *baseptr;
+
+#endif /* !AHOY_C8ASM_COMMON_H */
diff --git a/src/c8asm/grammar.ebnf b/src/c8asm/grammar.ebnf
new file mode 100644
index 0000000..59ba9fc
--- /dev/null
+++ b/src/c8asm/grammar.ebnf
@@ -0,0 +1,46 @@
+program = {line};
+line = {label}, [operation], EOL;
+label = IDENT, ':';
+
+operation = add | and | bcd | call
+ | cls | drw | hex | jp
+ | ld | or | ret | rnd
+ | rstr | se | shl | shr
+ | sknp | skp | sne | stor
+ | sub | subn | sys | xor
+ | db;
+
+add = "add", ((vreg, vreg) | (vreg, BYTE) | ("i", vreg));
+and = "and", vreg, vreg;
+bcd = "bcd", vreg;
+call = "call", addr;
+cls = "cls";
+drw = "drw", vreg, vreg, NIBBL;
+hex = "hex", vreg;
+jp = "jp", ["v0"], addr;
+ld = "ld", ((vreg, (vreg | BYTE | "dt" | "k"))
+ | ("i", addr)
+ | ("dt", vreg)
+ | ("st", vreg));
+or = "or", vreg, vreg;
+ret = "ret";
+rnd = "rnd", vreg, BYTE;
+rstr = "rstr", vreg;
+se = "se", vreg, (vreg | BYTE);
+shl = "shl", vreg;
+shr = "shr", vreg;
+sknp = "sknp", vreg;
+skp = "skp", vreg;
+sne = "sne", vreg, (vreg | BYTE);
+stor = "stor", vreg;
+sub = "sub", vreg, vreg;
+subn = "subn", vreg, vreg;
+sys = "sys", addr;
+xor = "xor", vreg, vreg;
+db = "db", {(BYTE | STRING)};
+
+addr = ADDR | IDENT;
+vreg = "v0" | "v1" | "v2" | "v3"
+ | "v4" | "v5" | "v6" | "v7"
+ | "v8" | "v9" | "va" | "vb"
+ | "vc" | "vd" | "ve" | "vf";
diff --git a/src/c8asm/instr.gperf b/src/c8asm/instr.gperf
new file mode 100644
index 0000000..fac4e57
--- /dev/null
+++ b/src/c8asm/instr.gperf
@@ -0,0 +1,33 @@
+%compare-lengths
+%define initializer-suffix ,nullptr
+%define lookup-function-name oplookup
+%includes
+%readonly-tables
+%struct-type
+struct opf_pair { char *name; void (*pfn)(void); };
+%%
+add, parseop_add
+and, parseop_and
+bcd, parseop_bcd
+call, parseop_call
+cls, parseop_cls
+db, parseop_db
+drw, parseop_drw
+hex, parseop_hex
+jp, parseop_jp
+ld, parseop_ld
+or, parseop_or
+ret, parseop_ret
+rnd, parseop_rnd
+rstr, parseop_rstr
+se, parseop_se
+shl, parseop_shl
+shr, parseop_shr
+sknp, parseop_sknp
+skp, parseop_skp
+sne, parseop_sne
+stor, parseop_stor
+sub, parseop_sub
+subn, parseop_subn
+sys, parseop_sys
+xor, parseop_xor
diff --git a/src/c8asm/lexer.c b/src/c8asm/lexer.c
new file mode 100644
index 0000000..effc32e
--- /dev/null
+++ b/src/c8asm/lexer.c
@@ -0,0 +1,204 @@
+#include <da.h>
+#include <mbstring.h>
+#include <rtype.h>
+
+#include "cerr.h"
+#include "common.h"
+#include "lexer.h"
+
+#define ISDIGIT(n) ((n) >= '0' && (n) <= '9')
+#define U8MOV(sv, n) ((sv)->p += (n), (sv)->len -= (n))
+
+#define E_BASE "integer with invalid base specifier ‘%.*s’"
+#define E_EXTRA "unknown extraneous character ‘%.*s’"
+#define E_IDENTCHAR "illegal character in identifier ‘%.*s’"
+#define E_IDENTLOST "local label missing identifier"
+#define E_IDENTSCHAR "illegal first character in identifier ‘%.*s’"
+#define E_UNTERMINATED "unterminated string literal ‘%.*s%.*s’"
+#define E_UTF8 "invalid UTF-8 byte near ‘%02X’"
+
+#define EOLS U"\n\v\f\r\x85\u2028\u2029"
+#define NUMCHARS U"'0123456789abcdefABCDEF"
+
+static void lexline(struct tokens *, struct u8view *);
+static bool skipws(struct u8view *);
+
+const char *
+tokrepr(tokkind k)
+{
+ return (const char *[]){
+ [T_COLON] = "colon", [T_EOL] = "end of line",
+ [T_IDENT] = "identifier", [T_NUMBER] = "number",
+ [T_STRING] = "string",
+ }[k];
+}
+
+struct tokens
+lexfile(struct u8view sv)
+{
+ const char8_t *s;
+ struct tokens toks;
+
+ if (s = u8chk(sv.p, sv.len))
+ die_with_off(filename, s - sv.p, E_UTF8, *s);
+
+ dainit(&toks, 256);
+
+ while (sv.len) {
+ size_t len = u8cbspn(sv.p, sv.len, EOLS, lengthof(EOLS) - 1);
+ struct u8view line = {
+ .p = sv.p,
+ .len = len,
+ };
+
+ lexline(&toks, &line);
+
+ /* Skip trailing EOL */
+ if (sv.len > len)
+ len += u8rlen(sv.p + len);
+
+ U8MOV(&sv, len);
+ }
+
+ return toks;
+}
+
+void
+lexline(struct tokens *toks, struct u8view *sv)
+{
+#define die_with_off(...) \
+ die_with_off(filename, sv->p - baseptr - w, __VA_ARGS__);
+
+ struct token tok;
+
+ for (;;) {
+ int w;
+ rune ch;
+
+ if (!skipws(sv))
+ goto end;
+
+ tok.sv.p = sv->p;
+ tok.sv.len = w = u8next(&ch, &sv->p, &sv->len);
+
+ if (ISDIGIT(ch)) {
+ size_t off, m = 10;
+
+ tok.kind = T_NUMBER;
+ tok.base = 10;
+
+ if (ch == '0') {
+ w = u8next(&ch, &sv->p, &sv->len);
+ if (!w || rprop_is_pat_ws(ch)) {
+ sv->p -= w;
+ sv->len += w;
+ goto out;
+ }
+ tok.sv.len++;
+
+ switch (ch) {
+ case 'b':
+ tok.base = m = 2;
+ break;
+ case 'o':
+ tok.base = m = 8;
+ break;
+ case 'd':
+ /* Implicitly base-10 already */
+ break;
+ case 'x':
+ /* m = 22 because A–F can be both upper- or lowercase */
+ tok.base = 16;
+ m = 22;
+ break;
+ default:
+ if (!ISDIGIT(ch))
+ die_with_off(E_BASE, w, sv->p - w);
+ }
+ }
+
+out:
+ /* +1 to support the digit separator */
+ tok.sv.len += off = u8bspn(sv->p, sv->len, NUMCHARS, m + 1);
+ U8MOV(sv, off);
+ } else if (ch == '.' || ch == '_' || rprop_is_xids(ch)) {
+ tok.kind = T_IDENT;
+ if (ch == '.') {
+ if (!sv->len)
+ die_with_off(E_IDENTLOST);
+
+ tok.sv.len += w = u8next(&ch, &sv->p, &sv->len);
+ if (rprop_is_pat_ws(ch))
+ die_with_off(E_IDENTLOST);
+ if (ch != '_' && !rprop_is_xids(ch)) {
+ die_with_off(E_IDENTSCHAR, w, sv->p - w);
+ }
+ }
+
+ while (w = u8next(&ch, &sv->p, &sv->len)) {
+ if (ch == ':' || rprop_is_pat_ws(ch)) {
+ U8MOV(sv, -w);
+ break;
+ }
+ if (!rprop_is_xidc(ch))
+ die_with_off(E_IDENTCHAR, w, sv->p - w);
+
+ tok.sv.len += w;
+ }
+ } else if (ch == '"') {
+ tok.kind = T_STRING;
+ while (w = u8next(&ch, &sv->p, &sv->len)) {
+ tok.sv.len += w;
+ if (ch == '"')
+ goto found;
+ }
+ die_with_off(E_UNTERMINATED, (int)MIN(tok.sv.len, 20), tok.sv.p,
+ tok.sv.len > 20 ? (int)lengthof(u8"…") - 1 : 0, u8"…");
+found:
+ } else if (ch == ':') {
+ tok.kind = T_COLON;
+ } else if (ch == ';') {
+ goto end;
+ } else {
+ die_with_off(E_EXTRA, w, sv->p - w);
+ }
+
+ /* The colon is the only token that isn’t whitespace separated */
+ if (ch != ':' && sv->len) {
+ w = u8next(&ch, &sv->p, &sv->len);
+ if (!w || !rprop_is_pat_ws(ch))
+ die_with_off(E_EXTRA, w, sv->p - w);
+ }
+
+ dapush(toks, tok);
+ }
+
+end:;
+ tok = (struct token){
+ .kind = T_EOL,
+ .sv.p = sv->p,
+ .sv.len = 0,
+ };
+ dapush(toks, tok);
+
+#undef die_with_off
+}
+
+bool
+skipws(struct u8view *sv)
+{
+ rune ch;
+
+ if (!sv->len)
+ return false;
+
+ for (int w = u8tor_uc(&ch, sv->p); rprop_is_pat_ws(ch);
+ w = u8tor_uc(&ch, sv->p))
+ {
+ U8MOV(sv, w);
+ if (!sv->len)
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/c8asm/lexer.h b/src/c8asm/lexer.h
new file mode 100644
index 0000000..ef20cef
--- /dev/null
+++ b/src/c8asm/lexer.h
@@ -0,0 +1,28 @@
+#ifndef AHOY_C8ASM_LEXER_H
+#define AHOY_C8ASM_LEXER_H
+
+#include <mbstring.h>
+
+typedef enum [[clang::flag_enum]] {
+ T_COLON = 1 << 0,
+ T_EOL = 1 << 1,
+ T_IDENT = 1 << 2,
+ T_NUMBER = 1 << 3,
+ T_STRING = 1 << 4,
+} tokkind;
+
+struct token {
+ tokkind kind;
+ struct u8view sv;
+ int base; /* For number literals */
+};
+
+struct tokens {
+ struct token *buf;
+ size_t len, cap;
+};
+
+const char *tokrepr(tokkind);
+struct tokens lexfile(struct u8view);
+
+#endif /* !AHOY_C8ASM_LEXER_H */
diff --git a/src/c8asm/lookup.h b/src/c8asm/lookup.h
new file mode 100644
index 0000000..26bc141
--- /dev/null
+++ b/src/c8asm/lookup.h
@@ -0,0 +1,190 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf --output-file src/c8asm/lookup.h src/c8asm/instr.gperf */
+/* Computed positions: -k'1-3' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646. */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
+#endif
+
+#line 7 "src/c8asm/instr.gperf"
+struct opf_pair { char *name; void (*pfn)(void); };
+#include <string.h>
+
+#define TOTAL_KEYWORDS 25
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 4
+#define MIN_HASH_VALUE 2
+#define MAX_HASH_VALUE 49
+/* maximum key range = 48, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (register const char *str, register size_t len)
+{
+ static const unsigned char asso_values[] =
+ {
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 5, 15, 15,
+ 10, 0, 50, 50, 10, 50, 0, 5, 20, 50,
+ 5, 5, 18, 50, 0, 0, 0, 30, 50, 8,
+ 3, 3, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50
+ };
+ register unsigned int hval = len;
+
+ switch (hval)
+ {
+ default:
+ hval += asso_values[(unsigned char)str[2]];
+ /*FALLTHROUGH*/
+ case 2:
+ hval += asso_values[(unsigned char)str[1]];
+ /*FALLTHROUGH*/
+ case 1:
+ hval += asso_values[(unsigned char)str[0]];
+ break;
+ }
+ return hval;
+}
+
+const struct opf_pair *
+oplookup (register const char *str, register size_t len)
+{
+ static const unsigned char lengthtable[] =
+ {
+ 0, 0, 2, 3, 4, 0, 3, 2, 3, 4, 0, 3, 0, 3,
+ 4, 0, 3, 0, 3, 0, 2, 3, 0, 3, 0, 0, 3, 2,
+ 3, 0, 0, 0, 2, 3, 0, 0, 0, 0, 3, 0, 0, 0,
+ 0, 3, 4, 0, 0, 0, 3, 4
+ };
+ static const struct opf_pair wordlist[] =
+ {
+ {"",nullptr}, {"",nullptr},
+#line 23 "src/c8asm/instr.gperf"
+ {"se", parseop_se},
+#line 20 "src/c8asm/instr.gperf"
+ {"ret", parseop_ret},
+#line 22 "src/c8asm/instr.gperf"
+ {"rstr", parseop_rstr},
+ {"",nullptr},
+#line 32 "src/c8asm/instr.gperf"
+ {"sys", parseop_sys},
+#line 19 "src/c8asm/instr.gperf"
+ {"or", parseop_or},
+#line 28 "src/c8asm/instr.gperf"
+ {"sne", parseop_sne},
+#line 29 "src/c8asm/instr.gperf"
+ {"stor", parseop_stor},
+ {"",nullptr},
+#line 33 "src/c8asm/instr.gperf"
+ {"xor", parseop_xor},
+ {"",nullptr},
+#line 25 "src/c8asm/instr.gperf"
+ {"shr", parseop_shr},
+#line 26 "src/c8asm/instr.gperf"
+ {"sknp", parseop_sknp},
+ {"",nullptr},
+#line 16 "src/c8asm/instr.gperf"
+ {"hex", parseop_hex},
+ {"",nullptr},
+#line 21 "src/c8asm/instr.gperf"
+ {"rnd", parseop_rnd},
+ {"",nullptr},
+#line 17 "src/c8asm/instr.gperf"
+ {"jp", parseop_jp},
+#line 15 "src/c8asm/instr.gperf"
+ {"drw", parseop_drw},
+ {"",nullptr},
+#line 10 "src/c8asm/instr.gperf"
+ {"and", parseop_and},
+ {"",nullptr}, {"",nullptr},
+#line 27 "src/c8asm/instr.gperf"
+ {"skp", parseop_skp},
+#line 14 "src/c8asm/instr.gperf"
+ {"db", parseop_db},
+#line 9 "src/c8asm/instr.gperf"
+ {"add", parseop_add},
+ {"",nullptr}, {"",nullptr}, {"",nullptr},
+#line 18 "src/c8asm/instr.gperf"
+ {"ld", parseop_ld},
+#line 24 "src/c8asm/instr.gperf"
+ {"shl", parseop_shl},
+ {"",nullptr}, {"",nullptr}, {"",nullptr}, {"",nullptr},
+#line 13 "src/c8asm/instr.gperf"
+ {"cls", parseop_cls},
+ {"",nullptr}, {"",nullptr}, {"",nullptr}, {"",nullptr},
+#line 11 "src/c8asm/instr.gperf"
+ {"bcd", parseop_bcd},
+#line 12 "src/c8asm/instr.gperf"
+ {"call", parseop_call},
+ {"",nullptr}, {"",nullptr}, {"",nullptr},
+#line 30 "src/c8asm/instr.gperf"
+ {"sub", parseop_sub},
+#line 31 "src/c8asm/instr.gperf"
+ {"subn", parseop_subn}
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register unsigned int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE)
+ if (len == lengthtable[key])
+ {
+ register const char *s = wordlist[key].name;
+
+ if (*str == *s && !memcmp (str + 1, s + 1, len - 1))
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff --git a/src/c8asm/main.c b/src/c8asm/main.c
new file mode 100644
index 0000000..edc595b
--- /dev/null
+++ b/src/c8asm/main.c
@@ -0,0 +1,109 @@
+#include <sys/stat.h>
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <builder.h>
+#include <da.h>
+
+#include "assembler.h"
+#include "cerr.h"
+#include "common.h"
+#include "lexer.h"
+#include "parser.h"
+
+static void asmfile(int, const char *);
+
+size_t filesize;
+const char *filename;
+const char8_t *baseptr;
+
+int
+main(int argc, char **argv)
+{
+ int opt;
+ const struct option longopts[] = {
+ {"help", no_argument, nullptr, 'h'},
+ {nullptr, no_argument, nullptr, 0 },
+ };
+
+ cerrinit(*argv);
+ while ((opt = getopt_long(argc, argv, "h", longopts, nullptr)) != -1) {
+ switch (opt) {
+ case 'h':
+ execlp("man", "man", "1", argv[0], nullptr);
+ die("execlp: man 1 %s", argv[0]);
+ default:
+ fprintf(stderr, "Usage: %s [file ...]\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (!argc)
+ asmfile(STDIN_FILENO, "-");
+ for (int i = 0; i < argc; i++) {
+ if (streq("-", argv[i]))
+ asmfile(STDIN_FILENO, "-");
+ else {
+ int fd;
+ if ((fd = open(argv[i], O_RDONLY)) == -1)
+ die("open: %s", argv[i]);
+ asmfile(fd, argv[i]);
+ close(fd);
+ }
+ }
+
+ return EXIT_SUCCESS;
+}
+
+void
+asmfile(int fd, const char *fn)
+{
+ char *buf;
+ size_t blksize;
+ ssize_t nr;
+ struct ast ast;
+ struct stat st;
+ struct u8str sb;
+ struct tokens toks;
+
+ filename = fn;
+
+ if (fstat(fd, &st) == -1)
+ die("fstat: %s", filename);
+ blksize = MAX(st.st_blksize, BUFSIZ);
+ if (!(buf = malloc(blksize)))
+ die("malloc");
+
+ /* Load the contents of the file into sb */
+ u8strinit(&sb, S_ISREG(st.st_mode) ? (size_t)st.st_size : blksize);
+ while ((nr = read(fd, buf, blksize)) > 0) {
+ struct u8view v = {
+ .p = buf,
+ .len = nr,
+ };
+ if (!u8strpush(&sb, v))
+ die("u8strpush");
+ }
+ if (nr == -1)
+ die("read: %s", filename);
+
+ free(buf);
+ filesize = sb.len;
+ baseptr = u8strfit(&sb)->p;
+ assemble(stdout, ast = parsefile(toks = lexfile(u8strtou8(sb))));
+
+ da_foreach (&ast, node) {
+ if (node->kind == D_INSTR && node->instr.kind == I_DB)
+ free(node->instr.buf);
+ }
+
+ free(toks.buf);
+ u8strfree(sb);
+}
diff --git a/src/c8asm/parser.c b/src/c8asm/parser.c
new file mode 100644
index 0000000..b746bea
--- /dev/null
+++ b/src/c8asm/parser.c
@@ -0,0 +1,627 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <da.h>
+
+#include "cerr.h"
+#include "common.h"
+#include "lexer.h"
+#include "parser.h"
+
+/* TODO: Remove */
+#ifndef unreachable
+# define unreachable() __builtin_unreachable()
+#endif
+
+#define E_BADLABEL "identifier ‘%.*s’ cannot be used as a label"
+#define E_EARLY "expected %s but input ended prematurely"
+#define E_EXPECTED2 "expected %s but got %s"
+#define E_EXPECTED "expected %s but got %s ‘%.*s’"
+#define E_INSTR "got unknown instruction ‘%.*s’"
+#define E_TOOLARGE "expected %s but got out-of-range integer ‘%.*s’"
+
+#define die_with_off(P, ...) die_with_off(filename, (P)-baseptr, __VA_ARGS__)
+#define memeq(X, Y, N) (!memcmp(X, Y, N))
+
+enum numsize {
+ NS_NIBBLE = 0xF,
+ NS_BYTE = 0xFF,
+ NS_ADDR = 0xFFF,
+};
+
+enum regtype {
+ RT_NONE,
+ RT_DT,
+ RT_I,
+ RT_K,
+ RT_ST,
+ RT_VX,
+};
+
+static bool parselabel(void);
+static void parseline(void);
+static void parseop(void);
+static struct raw_addr parseaddr(struct token);
+static struct token reqnext(const char *, tokkind);
+
+static uint16_t hexval(char);
+static uint16_t parsenum(struct token, enum numsize);
+static enum regtype regtype(struct u8view);
+
+static void parseop_add(void), parseop_and(void), parseop_bcd(void),
+ parseop_call(void), parseop_cls(void), parseop_db(void), parseop_drw(void),
+ parseop_hex(void), parseop_jp(void), parseop_ld(void), parseop_or(void),
+ parseop_ret(void), parseop_rnd(void), parseop_rstr(void), parseop_se(void),
+ parseop_shl(void), parseop_shr(void), parseop_sknp(void), parseop_skp(void),
+ parseop_sne(void), parseop_stor(void), parseop_sub(void),
+ parseop_subn(void), parseop_sys(void), parseop_xor(void);
+#include "lookup.h"
+
+static size_t i;
+static struct ast ast;
+static struct tokens *tokens;
+
+struct ast
+parsefile(struct tokens toks)
+{
+ ast.len = i = 0;
+ tokens = &toks;
+
+ while (i < toks.len)
+ parseline();
+
+ /* We can safely not do this, but GCC disagrees. This gets GCC to not
+ complain about dangling pointers. */
+ tokens = nullptr;
+
+ return ast;
+}
+
+void
+parseline(void)
+{
+ while (parselabel())
+ ;
+ parseop();
+ reqnext("end of line", T_EOL);
+}
+
+bool
+parselabel(void)
+{
+ if (tokens->len - i >= 2 && tokens->buf[i].kind == T_IDENT
+ && tokens->buf[i + 1].kind == T_COLON)
+ {
+ struct dir lbl = {
+ .kind = D_LABEL,
+ .name = tokens->buf[i].sv,
+ };
+ if (regtype(lbl.name) != RT_NONE)
+ die_with_off(lbl.name.p, E_BADLABEL, U8_PRI_ARGS(lbl.name));
+ dapush(&ast, lbl);
+ i += 2;
+ return true;
+ }
+
+ return false;
+}
+
+void
+parseop(void)
+{
+ const struct opf_pair *op;
+ struct token tok = reqnext("instruction or end of line", T_IDENT | T_EOL);
+
+ if (tok.kind == T_EOL) {
+ i--;
+ return;
+ }
+
+ if (!(op = oplookup(tok.sv.p, tok.sv.len)))
+ die_with_off(tok.sv.p, E_INSTR, U8_PRI_ARGS(tok.sv));
+ op->pfn();
+}
+
+struct raw_addr
+parseaddr(struct token tok)
+{
+ if (tok.kind == T_NUMBER)
+ return (struct raw_addr){.val = parsenum(tok, NS_ADDR)};
+ if (tok.kind == T_STRING) {
+ if (regtype(tok.sv) != RT_NONE)
+ die_with_off(tok.sv.p, E_BADLABEL, U8_PRI_ARGS(tok.sv));
+ return (struct raw_addr){.label = true, .sv = tok.sv};
+ }
+ unreachable();
+}
+
+enum regtype
+regtype(struct u8view v)
+{
+ if (v.len == 0 || v.len > 2)
+ return RT_NONE;
+ if (v.len == 1)
+ return v.p[0] == 'i' ? RT_I : v.p[0] == 'k' ? RT_K : RT_NONE;
+ if (memeq(v.p, "dt", 2))
+ return RT_DT;
+ if (memeq(v.p, "st", 2))
+ return RT_ST;
+ return v.p[0] == 'v'
+ && ((v.p[1] >= '0' && v.p[1] <= '9')
+ || (v.p[1] >= 'a' && v.p[1] <= 'f'))
+ ? RT_VX
+ : RT_NONE;
+}
+
+uint16_t
+hexval(char ch)
+{
+ return ch >= '0' && ch <= '9' ? ch - '0'
+ : ch >= 'a' && ch <= 'f' ? ch - 'a' + 10
+ : (unreachable(), 0);
+}
+
+uint16_t
+parsenum(struct token tok, enum numsize size)
+{
+ char ch;
+ uint16_t acc, cutoff, cutlim;
+ struct u8view v = tok.sv;
+
+ acc = 0;
+ cutoff = size;
+ cutlim = cutoff % tok.base;
+ cutoff /= tok.base;
+
+ if (v.len >= 2 && v.p[0] == '0' && v.p[1] > '9') {
+ v.p += 2;
+ v.len -= 2;
+ }
+
+ for (ch = *v.p; v.len; v.p++, v.len--, ch = *v.p) {
+ if (ch == '\'')
+ continue;
+ else if (ch >= '0' && ch <= '9')
+ ch -= '0';
+ else if (ch >= 'a' && ch <= 'f')
+ ch -= 'a' - 10;
+ else if (ch >= 'A' && ch <= 'F')
+ ch -= 'A' - 10;
+ else
+ unreachable();
+
+ if (acc > cutoff || (acc == cutoff && ch > cutlim)) {
+ const char *s = size == NS_NIBBLE ? "nibble"
+ : size == NS_BYTE ? "byte"
+ : size == NS_ADDR ? "address"
+ : (unreachable(), nullptr);
+ die_with_off(tok.sv.p, E_TOOLARGE, s, U8_PRI_ARGS(tok.sv));
+ }
+
+ acc *= tok.base;
+ acc += ch;
+ }
+
+ return acc;
+}
+
+struct token
+reqnext(const char *want, tokkind msk)
+{
+ struct token t;
+ if (i >= tokens->len)
+ die_with_off(baseptr + filesize - 1, E_EARLY, want);
+
+ if ((t = tokens->buf[i++]).kind & msk)
+ return t;
+ if (t.kind == T_EOL)
+ die_with_off(t.sv.p, E_EXPECTED2, want, tokrepr(t.kind));
+ die_with_off(t.sv.p, E_EXPECTED, want, tokrepr(t.kind), U8_PRI_ARGS(t.sv));
+}
+
+#define I(...) ((struct dir){.kind = D_INSTR, .instr = (__VA_ARGS__)})
+
+/* Common implementations of instructions that always take 1 or 2 v-registers */
+#define ONE_VREG(T) \
+ do { \
+ struct instr ins = {.kind = (T)}; \
+ struct token tok = reqnext("v-register", T_IDENT); \
+ if (regtype(tok.sv) & ~RT_VX) { \
+ die_with_off(tok.sv.p, E_EXPECTED, "v-register", \
+ tokrepr(tok.kind), U8_PRI_ARGS(tok.sv)); \
+ } \
+ ins.args[ins.len++].val = hexval(tok.sv.p[1]); \
+ dapush(&ast, I(ins)); \
+ } while (false)
+#define TWO_VREG(T) \
+ do { \
+ struct instr ins = {.kind = (T)}; \
+ struct token lhs = reqnext("v-register", T_IDENT); \
+ struct token rhs = reqnext("v-register", T_IDENT); \
+ if (regtype(lhs.sv) & ~RT_VX) { \
+ die_with_off(lhs.sv.p, E_EXPECTED, "v-register", \
+ tokrepr(lhs.kind), U8_PRI_ARGS(lhs.sv)); \
+ } \
+ if (regtype(rhs.sv) & ~RT_VX) { \
+ die_with_off(rhs.sv.p, E_EXPECTED, "v-register", \
+ tokrepr(rhs.kind), U8_PRI_ARGS(rhs.sv)); \
+ } \
+ ins.args[ins.len++].val = hexval(lhs.sv.p[1]); \
+ ins.args[ins.len++].val = hexval(rhs.sv.p[1]); \
+ dapush(&ast, I(ins)); \
+ } while (false)
+
+void
+parseop_add(void)
+{
+ enum regtype rt;
+ struct instr ins = {};
+ struct token tok = reqnext("v- or i-register", T_IDENT);
+
+ switch (rt = regtype(tok.sv)) {
+ case RT_VX:
+ ins.args[ins.len++].val = hexval(tok.sv.p[1]);
+ tok = reqnext("byte or v-register", T_IDENT | T_NUMBER);
+
+ if (tok.kind == T_NUMBER) {
+ ins.kind = I_ADD_VX_B;
+ ins.args[ins.len++].val = parsenum(tok, NS_BYTE);
+ } else if (regtype(tok.sv) != RT_VX) {
+ die_with_off(tok.sv.p, E_EXPECTED, "v-register", tokrepr(tok.kind),
+ U8_PRI_ARGS(tok.sv));
+ } else {
+ ins.kind = I_ADD_VX_VY;
+ ins.args[ins.len++].val = hexval(tok.sv.p[1]);
+ }
+ break;
+ case RT_I:
+ ins.kind = I_ADD_I_VX;
+ tok = reqnext("v-register", T_IDENT);
+ if (regtype(tok.sv) != RT_VX) {
+ die_with_off(tok.sv.p, E_EXPECTED, "v-register", tokrepr(tok.kind),
+ U8_PRI_ARGS(tok.sv));
+ }
+ ins.args[ins.len++].val = hexval(tok.sv.p[1]);
+ break;
+ default:
+ die_with_off(tok.sv.p, E_EXPECTED, "v- or i-register",
+ tokrepr(tok.kind), U8_PRI_ARGS(tok.sv));
+ }
+
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_and(void)
+{
+ TWO_VREG(I_AND);
+}
+
+void
+parseop_bcd(void)
+{
+ ONE_VREG(I_BCD);
+}
+
+void
+parseop_call(void)
+{
+ struct instr ins = {.kind = I_CALL};
+ struct token tok = reqnext("address", T_IDENT | T_NUMBER);
+ ins.args[ins.len++] = parseaddr(tok);
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_cls(void)
+{
+ dapush(&ast, I((struct instr){.kind = I_CLS}));
+}
+
+void
+parseop_db(void)
+{
+ struct instr ins = {.kind = I_DB};
+ do {
+ struct token tok = reqnext("byte or string", T_NUMBER | T_STRING);
+ switch (tok.kind) {
+ case T_NUMBER:
+ dapush(&ins, parsenum(tok, NS_BYTE));
+ break;
+ case T_STRING:
+ for (size_t i = 1; i < tok.sv.len - 1; i++)
+ dapush(&ins, tok.sv.p[i]);
+ break;
+ default:
+ unreachable();
+ }
+ } while (i < tokens->len && tokens->buf[i].kind != T_EOL);
+
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_drw(void)
+{
+ struct instr ins = {.kind = I_DRW};
+ struct token op1, op2, op3;
+
+ op1 = reqnext("v-register", T_IDENT);
+ op2 = reqnext("v-register", T_IDENT);
+ op3 = reqnext("nibble", T_NUMBER);
+
+ if (regtype(op1.sv) != RT_VX) {
+ die_with_off(op1.sv.p, E_EXPECTED, "v-register", tokrepr(op1.kind),
+ U8_PRI_ARGS(op1.sv));
+ }
+ if (regtype(op2.sv) != RT_VX) {
+ die_with_off(op2.sv.p, E_EXPECTED, "v-register", tokrepr(op2.kind),
+ U8_PRI_ARGS(op2.sv));
+ }
+
+ ins.args[ins.len++].val = hexval(op1.sv.p[1]);
+ ins.args[ins.len++].val = hexval(op2.sv.p[1]);
+ ins.args[ins.len++].val = parsenum(op3, NS_NIBBLE);
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_hex(void)
+{
+ ONE_VREG(I_HEX);
+}
+
+void
+parseop_jp(void)
+{
+ enum regtype rt;
+ struct instr ins = {};
+ struct token op = reqnext("v0-register or address", T_IDENT | T_NUMBER);
+
+ if (op.kind == T_IDENT)
+ rt = regtype(op.sv);
+ if (op.kind == T_NUMBER || (op.kind == T_IDENT && rt == RT_NONE)) {
+ ins.kind = I_JP_ADDR;
+ ins.args[ins.len++] = parseaddr(op);
+ } else if (op.kind == T_IDENT) {
+ ins.kind = I_JP_V0_ADDR;
+ if (op.sv.len != 2 || !memeq(op.sv.p, "v0", 2)) {
+ die_with_off(op.sv.p, E_EXPECTED, "v0-register or address",
+ tokrepr(op.kind), U8_PRI_ARGS(op.sv));
+ }
+ ins.args[ins.len++] = parseaddr(reqnext("address", T_NUMBER | T_IDENT));
+ } else
+ unreachable();
+
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_ld(void)
+{
+ enum regtype rt;
+ struct instr ins = {};
+ struct token op = reqnext("v-, i-, dt-, or st-register", T_IDENT);
+
+ switch (rt = regtype(op.sv)) {
+ case RT_DT:
+ case RT_ST:
+ ins.kind = rt == RT_DT ? I_LD_DT : I_LD_ST;
+ op = reqnext("v-register", T_IDENT);
+ if (regtype(op.sv) != RT_VX) {
+ die_with_off(op.sv.p, E_EXPECTED, "v-register", tokrepr(op.kind),
+ U8_PRI_ARGS(op.sv));
+ }
+ ins.args[ins.len++].val = hexval(op.sv.p[1]);
+ break;
+
+ case RT_I:
+ ins.kind = I_LD_I;
+ ins.args[ins.len++] = parseaddr(reqnext("address", T_NUMBER | T_IDENT));
+ break;
+
+ case RT_VX:
+ ins.args[ins.len++].val = hexval(op.sv.p[1]);
+ op = reqnext("v-, k-, or dt-register, or byte", T_IDENT | T_NUMBER);
+
+ switch (op.kind) {
+ case T_IDENT:
+ switch (rt = regtype(op.sv)) {
+ case RT_DT:
+ ins.kind = I_LD_VX_DT;
+ break;
+ case RT_K:
+ ins.kind = I_LD_VX_K;
+ break;
+ case RT_VX:
+ ins.kind = I_LD_VX_VY;
+ ins.args[ins.len++].val = hexval(op.sv.p[1]);
+ break;
+ default:
+ die_with_off(op.sv.p, E_EXPECTED,
+ "v-, k-, or dt-register, or byte",
+ tokrepr(op.kind), U8_PRI_ARGS(op.sv));
+ }
+
+ break;
+ case T_NUMBER:
+ ins.kind = I_LD_VX_BYTE;
+ ins.args[ins.len++].val = parsenum(op, NS_BYTE);
+ break;
+ default:
+ unreachable();
+ }
+ break;
+
+ default:
+ die_with_off(op.sv.p, E_EXPECTED, "v-, i-, dt-, or st-register",
+ tokrepr(op.kind), U8_PRI_ARGS(op.sv));
+ }
+
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_or(void)
+{
+ TWO_VREG(I_OR);
+}
+
+void
+parseop_ret(void)
+{
+ dapush(&ast, I((struct instr){.kind = I_RET}));
+}
+
+void
+parseop_rnd(void)
+{
+ struct instr ins = {.kind = I_RND};
+ struct token op1, op2;
+
+ op1 = reqnext("v-register", T_IDENT);
+ op2 = reqnext("byte", T_NUMBER);
+
+ if (regtype(op1.sv) != RT_VX) {
+ die_with_off(op1.sv.p, E_EXPECTED, "v-register", tokrepr(op1.kind),
+ U8_PRI_ARGS(op1.sv));
+ }
+
+ ins.args[ins.len++].val = hexval(op1.sv.p[1]);
+ ins.args[ins.len++].val = parsenum(op2, NS_BYTE);
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_rstr(void)
+{
+ ONE_VREG(I_RSTR);
+}
+
+void
+parseop_se(void)
+{
+ struct instr ins = {};
+ struct token op1, op2;
+
+ op1 = reqnext("v-register", T_IDENT);
+ op2 = reqnext("byte or v-register", T_IDENT | T_NUMBER);
+
+ if (regtype(op1.sv) != RT_VX) {
+ die_with_off(op1.sv.p, E_EXPECTED, "v-register", tokrepr(op1.kind),
+ U8_PRI_ARGS(op1.sv));
+ }
+ ins.args[ins.len++].val = hexval(op1.sv.p[1]);
+
+ switch (op2.kind) {
+ case T_IDENT:
+ if (regtype(op2.sv) != RT_VX) {
+ die_with_off(op2.sv.p, E_EXPECTED, "v-register", tokrepr(op2.kind),
+ U8_PRI_ARGS(op2.sv));
+ }
+ ins.kind = I_SE_VX_VY;
+ ins.args[ins.len++].val = hexval(op2.sv.p[1]);
+ break;
+ case T_NUMBER:
+ ins.kind = I_SE_VX_B;
+ ins.args[ins.len++].val = parsenum(op2, NS_BYTE);
+ break;
+ default:
+ unreachable();
+ }
+
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_shl(void)
+{
+ ONE_VREG(I_SHL);
+}
+
+void
+parseop_shr(void)
+{
+ ONE_VREG(I_SHR);
+}
+
+void
+parseop_sknp(void)
+{
+ ONE_VREG(I_SKNP);
+}
+
+void
+parseop_skp(void)
+{
+ ONE_VREG(I_SKP);
+}
+
+void
+parseop_sne(void)
+{
+ struct instr ins = {};
+ struct token op1, op2;
+
+ op1 = reqnext("v-register", T_IDENT);
+ op2 = reqnext("byte or v-register", T_IDENT | T_NUMBER);
+
+ if (regtype(op1.sv) != RT_VX) {
+ die_with_off(op1.sv.p, E_EXPECTED, "v-register", tokrepr(op1.kind),
+ U8_PRI_ARGS(op1.sv));
+ }
+ ins.args[ins.len++].val = hexval(op1.sv.p[1]);
+
+ switch (op2.kind) {
+ case T_IDENT:
+ if (regtype(op2.sv) != RT_VX) {
+ die_with_off(op2.sv.p, E_EXPECTED, "v-register", tokrepr(op2.kind),
+ U8_PRI_ARGS(op2.sv));
+ }
+ ins.kind = I_SNE_VX_VY;
+ ins.args[ins.len++].val = hexval(op2.sv.p[1]);
+ break;
+ case T_NUMBER:
+ ins.kind = I_SNE_VX_B;
+ ins.args[ins.len++].val = parsenum(op2, NS_BYTE);
+ break;
+ default:
+ unreachable();
+ }
+
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_stor(void)
+{
+ ONE_VREG(I_STOR);
+}
+
+void
+parseop_sub(void)
+{
+ TWO_VREG(I_SUB);
+}
+
+void
+parseop_subn(void)
+{
+ TWO_VREG(I_SUBN);
+}
+
+void
+parseop_sys(void)
+{
+ struct instr ins = {.kind = I_SYS};
+ ins.args[ins.len++] = parseaddr(reqnext("address", T_NUMBER | T_IDENT));
+ dapush(&ast, I(ins));
+}
+
+void
+parseop_xor(void)
+{
+ TWO_VREG(I_XOR);
+}
diff --git a/src/c8asm/parser.h b/src/c8asm/parser.h
new file mode 100644
index 0000000..392b003
--- /dev/null
+++ b/src/c8asm/parser.h
@@ -0,0 +1,122 @@
+#ifndef AHOY_C8ASM_PARSER_H
+#define AHOY_C8ASM_PARSER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <mbstring.h>
+
+struct tokens;
+
+typedef enum {
+ I_ADD_I_VX,
+ I_ADD_VX_B,
+ I_ADD_VX_VY,
+ I_AND,
+ I_BCD,
+ I_CALL,
+ I_CLS,
+ I_DB,
+ I_DRW,
+ I_HEX,
+ I_JP_ADDR,
+ I_JP_V0_ADDR,
+ I_LD_DT,
+ I_LD_I,
+ I_LD_ST,
+ I_LD_VX_BYTE,
+ I_LD_VX_DT,
+ I_LD_VX_K,
+ I_LD_VX_VY,
+ I_OR,
+ I_RET,
+ I_RND,
+ I_RSTR,
+ I_SE_VX_B,
+ I_SE_VX_VY,
+ I_SHL,
+ I_SHR,
+ I_SKNP,
+ I_SKP,
+ I_SNE_VX_B,
+ I_SNE_VX_VY,
+ I_STOR,
+ I_SUB,
+ I_SUBN,
+ I_SYS,
+ I_XOR,
+} instrkind;
+
+typedef enum {
+ R_V0,
+ R_V1,
+ R_V2,
+ R_V3,
+ R_V4,
+ R_V5,
+ R_V6,
+ R_V7,
+ R_V8,
+ R_V9,
+ R_VA,
+ R_VB,
+ R_VC,
+ R_VD,
+ R_VE,
+ R_VF,
+ R_I,
+ R_K,
+ R_DT,
+ R_ST,
+} reg;
+
+typedef enum {
+ D_INSTR,
+ D_LABEL,
+} dirkind;
+
+/* Arguments can always be represented by a uint16_t, however the parser is not
+ responsible for assigning addresses to labels. As a result an arg at this
+ stage can be either a uint16_t or the name of a label. */
+struct raw_addr {
+ bool label;
+ union {
+ uint16_t val;
+ struct u8view sv;
+ };
+};
+
+struct instr {
+ instrkind kind;
+
+ /* The most arguments any instruction can take is 3, so it’s more efficient
+ to just store the arguments in a fixed-size array. The only exception is
+ the ‘db’ instruction which takes a variable-number of arguments, so in
+ that case we use a dynamic array. */
+ union {
+ struct raw_addr args[3];
+ struct {
+ uint8_t *buf;
+ size_t cap;
+ };
+ };
+
+ size_t len;
+};
+
+struct dir {
+ dirkind kind;
+ union {
+ struct u8view name;
+ struct instr instr;
+ };
+};
+
+struct ast {
+ struct dir *buf;
+ size_t len, cap;
+};
+
+struct ast parsefile(struct tokens);
+
+#endif /* !AHOY_C8ASM_PARSER_H */
diff --git a/src/common/cerr.c b/src/common/cerr.c
new file mode 100644
index 0000000..0032795
--- /dev/null
+++ b/src/common/cerr.c
@@ -0,0 +1,73 @@
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "cerr.h"
+
+#define SGR_BOLD "\33[1m"
+#define SGR_DONE "\33[0m"
+
+static bool color;
+static const char *progname;
+
+void
+cerrinit(const char *s)
+{
+ const char *p = strrchr(s, '/');
+ progname = p ? p + 1 : s;
+
+ if (isatty(STDOUT_FILENO)) {
+ const char *ev = getenv("NO_COLOR");
+ if (!ev || !*ev)
+ color = true;
+ }
+}
+
+void
+die(const char *fmt, ...)
+{
+ va_list ap;
+ int e = errno;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "%s%s:%s ", color ? SGR_BOLD : "", progname,
+ color ? SGR_DONE : "");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, ": %s\n", strerror(e));
+ va_end(ap);
+
+ exit(EXIT_FAILURE);
+}
+
+void
+diex(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "%s%s:%s ", color ? SGR_BOLD : "", progname,
+ color ? SGR_DONE : "");
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+ va_end(ap);
+
+ exit(EXIT_FAILURE);
+}
+
+void
+die_with_off(const char *file, size_t off, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "%s%s:%s:%zu:%s ", color ? SGR_BOLD : "", progname, file,
+ off, color ? SGR_DONE : "");
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+ va_end(ap);
+
+ exit(EXIT_FAILURE);
+}
diff --git a/src/common/cerr.h b/src/common/cerr.h
new file mode 100644
index 0000000..de58b1d
--- /dev/null
+++ b/src/common/cerr.h
@@ -0,0 +1,19 @@
+#ifndef AHOY_COMMON_CERR_H
+#define AHOY_COMMON_CERR_H
+
+#include <stddef.h>
+
+/* clang-format off */
+
+[[gnu::nonnull]] void cerrinit(const char *);
+
+[[noreturn, gnu::nonnull, gnu::format(printf, 1, 2)]]
+void die(const char *, ...);
+
+[[noreturn, gnu::nonnull, gnu::format(printf, 1, 2)]]
+void diex(const char *, ...);
+
+[[noreturn, gnu::nonnull, gnu::format(printf, 3, 4)]]
+void die_with_off(const char *, size_t, const char *, ...);
+
+#endif /* !AHOY_COMMON_CERR_H */