aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-02-19 01:36:55 +0100
committerThomas Voss <mail@thomasvoss.com> 2024-02-19 01:36:55 +0100
commit6eec862df222c8d060e8482dd09d0f91197ee9fc (patch)
tree233c425622b379c3a719051351a1d5137df73e11 /src
parent623c781706175767a81287f48fc399bf6aee5a43 (diff)
Better error messages in the lexer
Diffstat (limited to 'src')
-rw-r--r--src/c8asm/common.h1
-rw-r--r--src/c8asm/lexer.c55
-rw-r--r--src/c8asm/lexer.h2
-rw-r--r--src/c8asm/main.c4
-rw-r--r--src/c8asm/parser.c12
-rw-r--r--src/common/cerr.c103
-rw-r--r--src/common/cerr.h7
7 files changed, 148 insertions, 36 deletions
diff --git a/src/c8asm/common.h b/src/c8asm/common.h
index f5899f0..49af0f7 100644
--- a/src/c8asm/common.h
+++ b/src/c8asm/common.h
@@ -6,5 +6,6 @@
extern size_t filesize;
extern const char *filename;
extern const char8_t *baseptr;
+extern struct u8view filebuf;
#endif /* !AHOY_C8ASM_COMMON_H */
diff --git a/src/c8asm/lexer.c b/src/c8asm/lexer.c
index 04fb8ad..3d2a3bf 100644
--- a/src/c8asm/lexer.c
+++ b/src/c8asm/lexer.c
@@ -10,12 +10,15 @@
#define ISDIGIT(n) ((n) >= '0' && (n) <= '9')
#define U8MOV(sv, n) ((sv)->p += (n), (sv)->len -= (n))
-#define E_BASE "integer with invalid base specifier ‘%.*s’"
-#define E_EXTRA "unknown extraneous character ‘%.*s’"
-#define E_IDENTCHAR "illegal character in identifier ‘%.*s’"
+#define die_at_pos_with_code(HL, OFF, ...) \
+ die_at_pos_with_code(filename, filebuf, (HL), (OFF), __VA_ARGS__)
+
+#define E_BASE "integer with invalid base specifier"
+#define E_EXTRA "unknown extraneous character"
+#define E_IDENTCCHAR "illegal character in identifier"
#define E_IDENTLOST "local label missing identifier"
-#define E_IDENTSCHAR "illegal first character in identifier ‘%.*s’"
-#define E_UNTERMINATED "unterminated string literal ‘%.*s%.*s’"
+#define E_IDENTSCHAR "illegal first character in identifier"
+#define E_UNTERMINATED "unterminated string literal"
#define E_UTF8 "invalid UTF-8 byte near ‘%02X’"
#define EOLS U"\n\v\f\r\x85\u2028\u2029"
@@ -35,10 +38,11 @@ tokrepr(tokkind k)
}
struct tokens
-lexfile(struct u8view sv)
+lexfile(void)
{
const char8_t *s;
struct tokens toks;
+ struct u8view sv = filebuf;
if (s = u8chk(sv.p, sv.len))
die_with_off(filename, s - sv.p, E_UTF8, *s);
@@ -67,8 +71,8 @@ lexfile(struct u8view sv)
void
lexline(struct tokens *toks, struct u8view *sv)
{
-#define die_with_off(...) \
- die_with_off(filename, sv->p - baseptr - w, __VA_ARGS__);
+#define _die_at_pos_with_code(HL, ...) \
+ die_at_pos_with_code((HL), sv->p - baseptr - w, __VA_ARGS__)
struct token tok;
@@ -113,7 +117,7 @@ lexline(struct tokens *toks, struct u8view *sv)
break;
default:
if (!ISDIGIT(ch))
- die_with_off(E_BASE, w, sv->p - w);
+ _die_at_pos_with_code(tok.sv, E_BASE);
}
}
@@ -124,14 +128,12 @@ out:
} else if (ch == '.' || ch == '_' || rprop_is_xids(ch)) {
tok.kind = T_IDENT;
if (ch == '.') {
- if (!sv->len)
- die_with_off(E_IDENTLOST);
-
tok.sv.len += w = u8next(&ch, &sv->p, &sv->len);
- if (rprop_is_pat_ws(ch))
- die_with_off(E_IDENTLOST);
+ if (!w || rprop_is_pat_ws(ch))
+ _die_at_pos_with_code(tok.sv, E_IDENTLOST);
if (ch != '_' && !rprop_is_xids(ch)) {
- die_with_off(E_IDENTSCHAR, w, sv->p - w);
+ U8MOV(&tok.sv, 1);
+ _die_at_pos_with_code(tok.sv, E_IDENTSCHAR);
}
}
@@ -140,8 +142,13 @@ out:
U8MOV(sv, -w);
break;
}
- if (!rprop_is_xidc(ch))
- die_with_off(E_IDENTCHAR, w, sv->p - w);
+ if (!rprop_is_xidc(ch)) {
+ struct u8view hl = {
+ .p = sv->p - w,
+ .len = w,
+ };
+ _die_at_pos_with_code(hl, E_IDENTCCHAR);
+ }
tok.sv.len += w;
}
@@ -152,22 +159,24 @@ out:
if (ch == '"')
goto found;
}
- die_with_off(E_UNTERMINATED, (int)MIN(tok.sv.len, 20), tok.sv.p,
- tok.sv.len > 20 ? (int)lengthof(u8"…") - 1 : 0, u8"…");
+ _die_at_pos_with_code(tok.sv, E_UNTERMINATED);
found:
} else if (ch == ':') {
tok.kind = T_COLON;
} else if (ch == ';') {
goto end;
} else {
- die_with_off(E_EXTRA, w, sv->p - w);
+ struct u8view hl = {.p = sv->p - w, .len = w};
+ _die_at_pos_with_code(hl, E_EXTRA);
}
/* The colon is the only token that isn’t whitespace separated */
if (ch != ':' && sv->len) {
w = u8next(&ch, &sv->p, &sv->len);
- if (!w || !rprop_is_pat_ws(ch))
- die_with_off(E_EXTRA, w, sv->p - w);
+ if (!w || !rprop_is_pat_ws(ch)) {
+ struct u8view hl = {.p = sv->p - w, .len = w};
+ _die_at_pos_with_code(hl, E_EXTRA);
+ }
}
dapush(toks, tok);
@@ -181,7 +190,7 @@ end:;
};
dapush(toks, tok);
-#undef die_with_off
+#undef _die_at_pos_with_code
}
bool
diff --git a/src/c8asm/lexer.h b/src/c8asm/lexer.h
index ef20cef..46692a8 100644
--- a/src/c8asm/lexer.h
+++ b/src/c8asm/lexer.h
@@ -23,6 +23,6 @@ struct tokens {
};
const char *tokrepr(tokkind);
-struct tokens lexfile(struct u8view);
+struct tokens lexfile();
#endif /* !AHOY_C8ASM_LEXER_H */
diff --git a/src/c8asm/main.c b/src/c8asm/main.c
index 5ee9c9e..b286778 100644
--- a/src/c8asm/main.c
+++ b/src/c8asm/main.c
@@ -21,6 +21,7 @@ static void asmfile(int, const char *);
size_t filesize;
const char *filename;
const char8_t *baseptr;
+struct u8view filebuf;
int
main(int argc, char **argv)
@@ -100,8 +101,9 @@ asmfile(int fd, const char *fn)
free(buf);
filesize = sb.len;
+ filebuf = u8strtou8(sb);
baseptr = u8strfit(&sb)->p;
- assemble(stdout, ast = parsefile(toks = lexfile(u8strtou8(sb))));
+ assemble(stdout, ast = parsefile(toks = lexfile()));
da_foreach (&ast, node) {
if (node->kind == D_INSTR && node->instr.kind == I_DB)
diff --git a/src/c8asm/parser.c b/src/c8asm/parser.c
index e0f3660..5a68fad 100644
--- a/src/c8asm/parser.c
+++ b/src/c8asm/parser.c
@@ -206,14 +206,20 @@ struct token
reqnext(const char *want, tokkind msk)
{
struct token t;
+
if (i >= tokens->len)
die_with_off(baseptr + filesize - 1, E_EARLY, want);
if ((t = tokens->buf[i++]).kind & msk)
return t;
- if (t.kind == T_EOL)
- die_with_off(t.sv.p, E_EXPECTED2, want, tokrepr(t.kind));
- die_with_off(t.sv.p, E_EXPECTED, want, tokrepr(t.kind), U8_PRI_ARGS(t.sv));
+ if (t.kind == T_EOL) {
+ die_at_pos_with_code(filename, filebuf, (struct u8view){},
+ t.sv.p - baseptr, E_EXPECTED2, want,
+ tokrepr(t.kind));
+ }
+
+ die_at_pos_with_code(filename, filebuf, t.sv, t.sv.p - baseptr, E_EXPECTED,
+ want, tokrepr(t.kind), U8_PRI_ARGS(t.sv));
}
#define I(...) ((struct dir){.kind = D_INSTR, .instr = (__VA_ARGS__)})
diff --git a/src/common/cerr.c b/src/common/cerr.c
index 8570247..819df4a 100644
--- a/src/common/cerr.c
+++ b/src/common/cerr.c
@@ -1,19 +1,33 @@
#include <errno.h>
#include <stdarg.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <mbstring.h>
+
#include "cerr.h"
+#include "macros.h"
+#include "mbstring.h"
+
+#define TAB_AS_SPC " "
+
+#define EOLS8 u8"\n\v\f\r\x85\u2028\u2029"
+#define EOLS32 U"\n\v\f\r\x85\u2028\u2029"
#define SGR_BOLD "\33[1m"
#define SGR_DONE "\33[0m"
+#define SGR_WARN "\33[1;35m"
+#define SGR_ERR "\33[1;31m"
+
+int sizelen(size_t);
static bool color;
static const char *progname;
-static const char *_bold, *_done;
+static const char *_bold, *_done, *_warn, *_err;
void
cerrinit(const char *s)
@@ -23,17 +37,16 @@ cerrinit(const char *s)
if (isatty(STDOUT_FILENO)) {
const char *ev = getenv("NO_COLOR");
- if (!ev || !*ev)
- color = true;
+ color = !ev || !*ev;
}
if (color) {
_bold = SGR_BOLD;
_done = SGR_DONE;
- } else {
- _bold = "";
- _done = "";
- }
+ _warn = SGR_WARN;
+ _err = SGR_ERR;
+ } else
+ _bold = _done = _warn = _err = "";
}
void
@@ -71,10 +84,84 @@ die_with_off(const char *file, size_t off, const char *fmt, ...)
va_list ap;
va_start(ap, fmt);
- fprintf(stderr, "%s%s:%s:%zu:%s ", _bold, progname, file, off, _done);
+ fprintf(stderr, "%s%s: %s:%zu:%s %serror:%s ", _bold, progname, file, off,
+ _done, _err, _done);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+ va_end(ap);
+
+ exit(EXIT_FAILURE);
+}
+
+void
+die_at_pos_with_code(const char *file, struct u8view sv, struct u8view hl,
+ size_t off, const char *fmt, ...)
+{
+ int w;
+ rune _;
+ size_t lb;
+ va_list ap;
+ const char8_t *prv, *end;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "%s%s: %s:%zu:%s %serror:%s ", _bold, progname, file, off,
+ _done, _err, _done);
vfprintf(stderr, fmt, ap);
fputc('\n', stderr);
va_end(ap);
+ for (lb = 0, prv = end = sv.p; end <= sv.p + off; lb++) {
+ prv = end;
+ end += u8cbspn(end, sv.p + sv.len - end, EOLS32, lengthof(EOLS32) - 1);
+ end += u8tor_uc(&_, end);
+ }
+
+ u8prev(&_, &end, prv);
+ w = sizelen(lb);
+ w = MAX(w, 4);
+
+ fprintf(stderr, " %*zu │ ", w, lb);
+
+ /* The following is really ugly, but it works! */
+ if (hl.p) {
+ ptrdiff_t w2 = hl.p - prv;
+ struct u8view pfx = {prv, w2};
+
+ for (ptrdiff_t i = 0; i < w2; i++) {
+ if (prv[i] == '\t')
+ fputs(TAB_AS_SPC, stderr);
+ else
+ fputc(prv[i], stderr);
+ }
+ fprintf(stderr, "%s%.*s%s%.*s\n", _err, U8_PRI_ARGS(hl), _done,
+ (int)(end - (prv + w2 + hl.len)), prv + w2 + hl.len);
+ fprintf(stderr, " %*c │ ", w, ' ');
+
+ while (u8next(&_, &pfx.p, &pfx.len)) {
+ if (_ == '\t')
+ fputs(TAB_AS_SPC, stderr);
+ else
+ fputc(' ', stderr);
+ }
+
+ fprintf(stderr, "%s^", _err);
+ for (u8next(&_, &hl.p, &hl.len); u8next(&_, &hl.p, &hl.len);
+ fputc('~', stderr))
+ ;
+ fprintf(stderr, "%s\n", _done);
+ } else {
+ fprintf(stderr, "%.*s\n", (int)(end - prv), prv);
+ fprintf(stderr, " %*c │\n", w, ' ');
+ }
+
exit(EXIT_FAILURE);
}
+
+int
+sizelen(size_t x)
+{
+ int n;
+ for (n = 0; x; x /= 10, n++)
+ ;
+ return n;
+}
diff --git a/src/common/cerr.h b/src/common/cerr.h
index 869c9e2..5476f96 100644
--- a/src/common/cerr.h
+++ b/src/common/cerr.h
@@ -3,6 +3,8 @@
#include <stddef.h>
+#include <mbstring.h>
+
void cerrinit(const char *);
[[noreturn, gnu::nonnull, gnu::format(printf, 1, 2)]]
@@ -14,4 +16,9 @@ void diex(const char *, ...);
[[noreturn, gnu::nonnull, gnu::format(printf, 3, 4)]]
void die_with_off(const char *, size_t, const char *, ...);
+[[noreturn, gnu::nonnull, gnu::format(printf, 5, 6)]]
+void
+die_at_pos_with_code(const char *, struct u8view, struct u8view, size_t,
+ const char *, ...);
+
#endif /* !AHOY_COMMON_CERR_H */