5 files changed, 46 insertions, 28 deletions
diff --git a/src/c8asm/common.h b/src/c8asm/common.h
index f5899f0..49af0f7 100644
--- a/src/c8asm/common.h
+++ b/src/c8asm/common.h
@@ -6,5 +6,6 @@
 extern size_t filesize;
 extern const char *filename;
 extern const char8_t *baseptr;
+extern struct u8view filebuf;
 
 #endif /* !AHOY_C8ASM_COMMON_H */
diff --git a/src/c8asm/lexer.c b/src/c8asm/lexer.c
index 04fb8ad..3d2a3bf 100644
--- a/src/c8asm/lexer.c
+++ b/src/c8asm/lexer.c
@@ -10,12 +10,15 @@
 #define ISDIGIT(n)   ((n) >= '0' && (n) <= '9')
 #define U8MOV(sv, n) ((sv)->p += (n), (sv)->len -= (n))
 
-#define E_BASE         "integer with invalid base specifier ‘%.*s’"
-#define E_EXTRA        "unknown extraneous character ‘%.*s’"
-#define E_IDENTCHAR    "illegal character in identifier ‘%.*s’"
+#define die_at_pos_with_code(HL, OFF, ...) \
+	die_at_pos_with_code(filename, filebuf, (HL), (OFF), __VA_ARGS__)
+
+#define E_BASE         "integer with invalid base specifier"
+#define E_EXTRA        "unknown extraneous character"
+#define E_IDENTCCHAR   "illegal character in identifier"
 #define E_IDENTLOST    "local label missing identifier"
-#define E_IDENTSCHAR   "illegal first character in identifier ‘%.*s’"
-#define E_UNTERMINATED "unterminated string literal ‘%.*s%.*s’"
+#define E_IDENTSCHAR   "illegal first character in identifier"
+#define E_UNTERMINATED "unterminated string literal"
 #define E_UTF8         "invalid UTF-8 byte near ‘%02X’"
 
 #define EOLS     U"\n\v\f\r\x85\u2028\u2029"
@@ -35,10 +38,11 @@ tokrepr(tokkind k)
 }
 
 struct tokens
-lexfile(struct u8view sv)
+lexfile(void)
 {
 	const char8_t *s;
 	struct tokens toks;
+	struct u8view sv = filebuf;
 
 	if (s = u8chk(sv.p, sv.len))
 		die_with_off(filename, s - sv.p, E_UTF8, *s);
@@ -67,8 +71,8 @@ lexfile(struct u8view sv)
 void
 lexline(struct tokens *toks, struct u8view *sv)
 {
-#define die_with_off(...) \
-	die_with_off(filename, sv->p - baseptr - w, __VA_ARGS__);
+#define _die_at_pos_with_code(HL, ...) \
+	die_at_pos_with_code((HL), sv->p - baseptr - w, __VA_ARGS__)
 
 	struct token tok;
 
@@ -113,7 +117,7 @@ lexline(struct tokens *toks, struct u8view *sv)
 					break;
 				default:
 					if (!ISDIGIT(ch))
-						die_with_off(E_BASE, w, sv->p - w);
+						_die_at_pos_with_code(tok.sv, E_BASE);
 				}
 			}
 
@@ -124,14 +128,12 @@ out:
 		} else if (ch == '.' || ch == '_' || rprop_is_xids(ch)) {
 			tok.kind = T_IDENT;
 			if (ch == '.') {
-				if (!sv->len)
-					die_with_off(E_IDENTLOST);
-
 				tok.sv.len += w = u8next(&ch, &sv->p, &sv->len);
-				if (rprop_is_pat_ws(ch))
-					die_with_off(E_IDENTLOST);
+				if (!w || rprop_is_pat_ws(ch))
+					_die_at_pos_with_code(tok.sv, E_IDENTLOST);
 				if (ch != '_' && !rprop_is_xids(ch)) {
-					die_with_off(E_IDENTSCHAR, w, sv->p - w);
+					U8MOV(&tok.sv, 1);
+					_die_at_pos_with_code(tok.sv, E_IDENTSCHAR);
 				}
 			}
 
@@ -140,8 +142,13 @@ out:
 					U8MOV(sv, -w);
 					break;
 				}
-				if (!rprop_is_xidc(ch))
-					die_with_off(E_IDENTCHAR, w, sv->p - w);
+				if (!rprop_is_xidc(ch)) {
+					struct u8view hl = {
+						.p = sv->p - w,
+						.len = w,
+					};
+					_die_at_pos_with_code(hl, E_IDENTCCHAR);
+				}
 
 				tok.sv.len += w;
 			}
@@ -152,22 +159,24 @@ out:
 				if (ch == '"')
 					goto found;
 			}
-			die_with_off(E_UNTERMINATED, (int)MIN(tok.sv.len, 20), tok.sv.p,
-			             tok.sv.len > 20 ? (int)lengthof(u8"…") - 1 : 0, u8"…");
+			_die_at_pos_with_code(tok.sv, E_UNTERMINATED);
 found:
 		} else if (ch == ':') {
 			tok.kind = T_COLON;
 		} else if (ch == ';') {
 			goto end;
 		} else {
-			die_with_off(E_EXTRA, w, sv->p - w);
+			struct u8view hl = {.p = sv->p - w, .len = w};
+			_die_at_pos_with_code(hl, E_EXTRA);
 		}
 
 		/* The colon is the only token that isn’t whitespace separated */
 		if (ch != ':' && sv->len) {
 			w = u8next(&ch, &sv->p, &sv->len);
-			if (!w || !rprop_is_pat_ws(ch))
-				die_with_off(E_EXTRA, w, sv->p - w);
+			if (!w || !rprop_is_pat_ws(ch)) {
+				struct u8view hl = {.p = sv->p - w, .len = w};
+				_die_at_pos_with_code(hl, E_EXTRA);
+			}
 		}
 
 		dapush(toks, tok);
@@ -181,7 +190,7 @@ end:;
 	};
 	dapush(toks, tok);
 
-#undef die_with_off
+#undef _die_at_pos_with_code
 }
 
 bool
diff --git a/src/c8asm/lexer.h b/src/c8asm/lexer.h
index ef20cef..46692a8 100644
--- a/src/c8asm/lexer.h
+++ b/src/c8asm/lexer.h
@@ -23,6 +23,6 @@ struct tokens {
 };
 
 const char *tokrepr(tokkind);
-struct tokens lexfile(struct u8view);
+struct tokens lexfile();
 
 #endif /* !AHOY_C8ASM_LEXER_H */
diff --git a/src/c8asm/main.c b/src/c8asm/main.c
index 5ee9c9e..b286778 100644
--- a/src/c8asm/main.c
+++ b/src/c8asm/main.c
@@ -21,6 +21,7 @@ static void asmfile(int, const char *);
 size_t filesize;
 const char *filename;
 const char8_t *baseptr;
+struct u8view filebuf;
 
 int
 main(int argc, char **argv)
@@ -100,8 +101,9 @@ asmfile(int fd, const char *fn)
 
 	free(buf);
 	filesize = sb.len;
+	filebuf = u8strtou8(sb);
 	baseptr = u8strfit(&sb)->p;
-	assemble(stdout, ast = parsefile(toks = lexfile(u8strtou8(sb))));
+	assemble(stdout, ast = parsefile(toks = lexfile()));
 
 	da_foreach (&ast, node) {
 		if (node->kind == D_INSTR && node->instr.kind == I_DB)
diff --git a/src/c8asm/parser.c b/src/c8asm/parser.c
index e0f3660..5a68fad 100644
--- a/src/c8asm/parser.c
+++ b/src/c8asm/parser.c
@@ -206,14 +206,20 @@ struct token
 reqnext(const char *want, tokkind msk)
 {
 	struct token t;
+
 	if (i >= tokens->len)
 		die_with_off(baseptr + filesize - 1, E_EARLY, want);
 
 	if ((t = tokens->buf[i++]).kind & msk)
 		return t;
-	if (t.kind == T_EOL)
-		die_with_off(t.sv.p, E_EXPECTED2, want, tokrepr(t.kind));
-	die_with_off(t.sv.p, E_EXPECTED, want, tokrepr(t.kind), U8_PRI_ARGS(t.sv));
+	if (t.kind == T_EOL) {
+		die_at_pos_with_code(filename, filebuf, (struct u8view){},
+		                     t.sv.p - baseptr, E_EXPECTED2, want,
+		                     tokrepr(t.kind));
+	}
+
+	die_at_pos_with_code(filename, filebuf, t.sv, t.sv.p - baseptr, E_EXPECTED,
+	                     want, tokrepr(t.kind), U8_PRI_ARGS(t.sv));
 }
 
 #define I(...) ((struct dir){.kind = D_INSTR, .instr = (__VA_ARGS__)})