From 6eec862df222c8d060e8482dd09d0f91197ee9fc Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 19 Feb 2024 01:36:55 +0100 Subject: Better error messages in the lexer --- src/c8asm/common.h | 1 + src/c8asm/lexer.c | 55 ++++++++++++++++------------ src/c8asm/lexer.h | 2 +- src/c8asm/main.c | 4 ++- src/c8asm/parser.c | 12 +++++-- src/common/cerr.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++----- src/common/cerr.h | 7 ++++ 7 files changed, 148 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/c8asm/common.h b/src/c8asm/common.h index f5899f0..49af0f7 100644 --- a/src/c8asm/common.h +++ b/src/c8asm/common.h @@ -6,5 +6,6 @@ extern size_t filesize; extern const char *filename; extern const char8_t *baseptr; +extern struct u8view filebuf; #endif /* !AHOY_C8ASM_COMMON_H */ diff --git a/src/c8asm/lexer.c b/src/c8asm/lexer.c index 04fb8ad..3d2a3bf 100644 --- a/src/c8asm/lexer.c +++ b/src/c8asm/lexer.c @@ -10,12 +10,15 @@ #define ISDIGIT(n) ((n) >= '0' && (n) <= '9') #define U8MOV(sv, n) ((sv)->p += (n), (sv)->len -= (n)) -#define E_BASE "integer with invalid base specifier ‘%.*s’" -#define E_EXTRA "unknown extraneous character ‘%.*s’" -#define E_IDENTCHAR "illegal character in identifier ‘%.*s’" +#define die_at_pos_with_code(HL, OFF, ...) \ + die_at_pos_with_code(filename, filebuf, (HL), (OFF), __VA_ARGS__) + +#define E_BASE "integer with invalid base specifier" +#define E_EXTRA "unknown extraneous character" +#define E_IDENTCCHAR "illegal character in identifier" #define E_IDENTLOST "local label missing identifier" -#define E_IDENTSCHAR "illegal first character in identifier ‘%.*s’" -#define E_UNTERMINATED "unterminated string literal ‘%.*s%.*s’" +#define E_IDENTSCHAR "illegal first character in identifier" +#define E_UNTERMINATED "unterminated string literal" #define E_UTF8 "invalid UTF-8 byte near ‘%02X’" #define EOLS U"\n\v\f\r\x85\u2028\u2029" @@ -35,10 +38,11 @@ tokrepr(tokkind k) } struct tokens -lexfile(struct u8view sv) +lexfile(void) { const char8_t *s; struct tokens toks; + struct u8view sv = filebuf; if (s = u8chk(sv.p, sv.len)) die_with_off(filename, s - sv.p, E_UTF8, *s); @@ -67,8 +71,8 @@ lexfile(struct u8view sv) void lexline(struct tokens *toks, struct u8view *sv) { -#define die_with_off(...) \ - die_with_off(filename, sv->p - baseptr - w, __VA_ARGS__); +#define _die_at_pos_with_code(HL, ...) \ + die_at_pos_with_code((HL), sv->p - baseptr - w, __VA_ARGS__) struct token tok; @@ -113,7 +117,7 @@ lexline(struct tokens *toks, struct u8view *sv) break; default: if (!ISDIGIT(ch)) - die_with_off(E_BASE, w, sv->p - w); + _die_at_pos_with_code(tok.sv, E_BASE); } } @@ -124,14 +128,12 @@ out: } else if (ch == '.' || ch == '_' || rprop_is_xids(ch)) { tok.kind = T_IDENT; if (ch == '.') { - if (!sv->len) - die_with_off(E_IDENTLOST); - tok.sv.len += w = u8next(&ch, &sv->p, &sv->len); - if (rprop_is_pat_ws(ch)) - die_with_off(E_IDENTLOST); + if (!w || rprop_is_pat_ws(ch)) + _die_at_pos_with_code(tok.sv, E_IDENTLOST); if (ch != '_' && !rprop_is_xids(ch)) { - die_with_off(E_IDENTSCHAR, w, sv->p - w); + U8MOV(&tok.sv, 1); + _die_at_pos_with_code(tok.sv, E_IDENTSCHAR); } } @@ -140,8 +142,13 @@ out: U8MOV(sv, -w); break; } - if (!rprop_is_xidc(ch)) - die_with_off(E_IDENTCHAR, w, sv->p - w); + if (!rprop_is_xidc(ch)) { + struct u8view hl = { + .p = sv->p - w, + .len = w, + }; + _die_at_pos_with_code(hl, E_IDENTCCHAR); + } tok.sv.len += w; } @@ -152,22 +159,24 @@ out: if (ch == '"') goto found; } - die_with_off(E_UNTERMINATED, (int)MIN(tok.sv.len, 20), tok.sv.p, - tok.sv.len > 20 ? (int)lengthof(u8"…") - 1 : 0, u8"…"); + _die_at_pos_with_code(tok.sv, E_UNTERMINATED); found: } else if (ch == ':') { tok.kind = T_COLON; } else if (ch == ';') { goto end; } else { - die_with_off(E_EXTRA, w, sv->p - w); + struct u8view hl = {.p = sv->p - w, .len = w}; + _die_at_pos_with_code(hl, E_EXTRA); } /* The colon is the only token that isn’t whitespace separated */ if (ch != ':' && sv->len) { w = u8next(&ch, &sv->p, &sv->len); - if (!w || !rprop_is_pat_ws(ch)) - die_with_off(E_EXTRA, w, sv->p - w); + if (!w || !rprop_is_pat_ws(ch)) { + struct u8view hl = {.p = sv->p - w, .len = w}; + _die_at_pos_with_code(hl, E_EXTRA); + } } dapush(toks, tok); @@ -181,7 +190,7 @@ end:; }; dapush(toks, tok); -#undef die_with_off +#undef _die_at_pos_with_code } bool diff --git a/src/c8asm/lexer.h b/src/c8asm/lexer.h index ef20cef..46692a8 100644 --- a/src/c8asm/lexer.h +++ b/src/c8asm/lexer.h @@ -23,6 +23,6 @@ struct tokens { }; const char *tokrepr(tokkind); -struct tokens lexfile(struct u8view); +struct tokens lexfile(); #endif /* !AHOY_C8ASM_LEXER_H */ diff --git a/src/c8asm/main.c b/src/c8asm/main.c index 5ee9c9e..b286778 100644 --- a/src/c8asm/main.c +++ b/src/c8asm/main.c @@ -21,6 +21,7 @@ static void asmfile(int, const char *); size_t filesize; const char *filename; const char8_t *baseptr; +struct u8view filebuf; int main(int argc, char **argv) @@ -100,8 +101,9 @@ asmfile(int fd, const char *fn) free(buf); filesize = sb.len; + filebuf = u8strtou8(sb); baseptr = u8strfit(&sb)->p; - assemble(stdout, ast = parsefile(toks = lexfile(u8strtou8(sb)))); + assemble(stdout, ast = parsefile(toks = lexfile())); da_foreach (&ast, node) { if (node->kind == D_INSTR && node->instr.kind == I_DB) diff --git a/src/c8asm/parser.c b/src/c8asm/parser.c index e0f3660..5a68fad 100644 --- a/src/c8asm/parser.c +++ b/src/c8asm/parser.c @@ -206,14 +206,20 @@ struct token reqnext(const char *want, tokkind msk) { struct token t; + if (i >= tokens->len) die_with_off(baseptr + filesize - 1, E_EARLY, want); if ((t = tokens->buf[i++]).kind & msk) return t; - if (t.kind == T_EOL) - die_with_off(t.sv.p, E_EXPECTED2, want, tokrepr(t.kind)); - die_with_off(t.sv.p, E_EXPECTED, want, tokrepr(t.kind), U8_PRI_ARGS(t.sv)); + if (t.kind == T_EOL) { + die_at_pos_with_code(filename, filebuf, (struct u8view){}, + t.sv.p - baseptr, E_EXPECTED2, want, + tokrepr(t.kind)); + } + + die_at_pos_with_code(filename, filebuf, t.sv, t.sv.p - baseptr, E_EXPECTED, + want, tokrepr(t.kind), U8_PRI_ARGS(t.sv)); } #define I(...) ((struct dir){.kind = D_INSTR, .instr = (__VA_ARGS__)}) diff --git a/src/common/cerr.c b/src/common/cerr.c index 8570247..819df4a 100644 --- a/src/common/cerr.c +++ b/src/common/cerr.c @@ -1,19 +1,33 @@ #include #include +#include #include #include #include #include +#include + #include "cerr.h" +#include "macros.h" +#include "mbstring.h" + +#define TAB_AS_SPC " " + +#define EOLS8 u8"\n\v\f\r\x85\u2028\u2029" +#define EOLS32 U"\n\v\f\r\x85\u2028\u2029" #define SGR_BOLD "\33[1m" #define SGR_DONE "\33[0m" +#define SGR_WARN "\33[1;35m" +#define SGR_ERR "\33[1;31m" + +int sizelen(size_t); static bool color; static const char *progname; -static const char *_bold, *_done; +static const char *_bold, *_done, *_warn, *_err; void cerrinit(const char *s) @@ -23,17 +37,16 @@ cerrinit(const char *s) if (isatty(STDOUT_FILENO)) { const char *ev = getenv("NO_COLOR"); - if (!ev || !*ev) - color = true; + color = !ev || !*ev; } if (color) { _bold = SGR_BOLD; _done = SGR_DONE; - } else { - _bold = ""; - _done = ""; - } + _warn = SGR_WARN; + _err = SGR_ERR; + } else + _bold = _done = _warn = _err = ""; } void @@ -71,10 +84,84 @@ die_with_off(const char *file, size_t off, const char *fmt, ...) va_list ap; va_start(ap, fmt); - fprintf(stderr, "%s%s:%s:%zu:%s ", _bold, progname, file, off, _done); + fprintf(stderr, "%s%s: %s:%zu:%s %serror:%s ", _bold, progname, file, off, + _done, _err, _done); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + va_end(ap); + + exit(EXIT_FAILURE); +} + +void +die_at_pos_with_code(const char *file, struct u8view sv, struct u8view hl, + size_t off, const char *fmt, ...) +{ + int w; + rune _; + size_t lb; + va_list ap; + const char8_t *prv, *end; + + va_start(ap, fmt); + fprintf(stderr, "%s%s: %s:%zu:%s %serror:%s ", _bold, progname, file, off, + _done, _err, _done); vfprintf(stderr, fmt, ap); fputc('\n', stderr); va_end(ap); + for (lb = 0, prv = end = sv.p; end <= sv.p + off; lb++) { + prv = end; + end += u8cbspn(end, sv.p + sv.len - end, EOLS32, lengthof(EOLS32) - 1); + end += u8tor_uc(&_, end); + } + + u8prev(&_, &end, prv); + w = sizelen(lb); + w = MAX(w, 4); + + fprintf(stderr, " %*zu │ ", w, lb); + + /* The following is really ugly, but it works! */ + if (hl.p) { + ptrdiff_t w2 = hl.p - prv; + struct u8view pfx = {prv, w2}; + + for (ptrdiff_t i = 0; i < w2; i++) { + if (prv[i] == '\t') + fputs(TAB_AS_SPC, stderr); + else + fputc(prv[i], stderr); + } + fprintf(stderr, "%s%.*s%s%.*s\n", _err, U8_PRI_ARGS(hl), _done, + (int)(end - (prv + w2 + hl.len)), prv + w2 + hl.len); + fprintf(stderr, " %*c │ ", w, ' '); + + while (u8next(&_, &pfx.p, &pfx.len)) { + if (_ == '\t') + fputs(TAB_AS_SPC, stderr); + else + fputc(' ', stderr); + } + + fprintf(stderr, "%s^", _err); + for (u8next(&_, &hl.p, &hl.len); u8next(&_, &hl.p, &hl.len); + fputc('~', stderr)) + ; + fprintf(stderr, "%s\n", _done); + } else { + fprintf(stderr, "%.*s\n", (int)(end - prv), prv); + fprintf(stderr, " %*c │\n", w, ' '); + } + exit(EXIT_FAILURE); } + +int +sizelen(size_t x) +{ + int n; + for (n = 0; x; x /= 10, n++) + ; + return n; +} diff --git a/src/common/cerr.h b/src/common/cerr.h index 869c9e2..5476f96 100644 --- a/src/common/cerr.h +++ b/src/common/cerr.h @@ -3,6 +3,8 @@ #include +#include + void cerrinit(const char *); [[noreturn, gnu::nonnull, gnu::format(printf, 1, 2)]] @@ -14,4 +16,9 @@ void diex(const char *, ...); [[noreturn, gnu::nonnull, gnu::format(printf, 3, 4)]] void die_with_off(const char *, size_t, const char *, ...); +[[noreturn, gnu::nonnull, gnu::format(printf, 5, 6)]] +void +die_at_pos_with_code(const char *, struct u8view, struct u8view, size_t, + const char *, ...); + #endif /* !AHOY_COMMON_CERR_H */ -- cgit v1.2.3