diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 02:04:37 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 02:04:37 +0200 |
commit | 5e384049170dcb9e7e8aae5e98729bbc26661f90 (patch) | |
tree | 26dfca606d4c55ca1917f7dd5da9eb211db82275 /src/analyzer.c | |
parent | 81ad5bea2fab5eec6c4d9a017f1acc5a53dbde47 (diff) |
Huge changes to static analysis
Diffstat (limited to 'src/analyzer.c')
-rw-r--r-- | src/analyzer.c | 261 |
1 files changed, 161 insertions, 100 deletions
diff --git a/src/analyzer.c b/src/analyzer.c index d563b84..e842859 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -10,33 +10,64 @@ #include "common.h" #include "errors.h" #include "parser.h" +#include "strview.h" #include "types.h" -struct environ { - struct declaration { - idx_t_ astidx; - } *buf; - size_t len, cap; -}; +/* A hashmap mapping symbol names to their indicies in the AST */ +typedef struct symtab { + struct symtab *child[4]; + struct strview key; + idx_t_ val; +} symtab; -struct evstack { - struct environ *buf; +/* A dynamic array of scopes */ +struct scopes { + struct scope *buf; size_t len, cap; }; +/* Analyzer context; keeps track of the state of static analysis */ struct azctx { + /* An arena allocator */ + arena *a; + + /* The return type of the function being analyzed */ struct type fnret; + + /* The name of the symbol being declared. This is necessary to allow + for ‘X :: X’ to be treated as shadowing and not a circular + definition */ struct strview decl; + + /* If we need to check for return statements. Only true for the + outer body-block of a function that returns a value. */ bool chkrets; + + /* The index of the current scope in the scopes array */ + idx_t_ si; }; -static void analyzeast(struct evstack, struct type *, struct ast, - struct lexemes) +static void analyzeast(struct scope *, struct type *, struct ast, + struct lexemes, arena *) __attribute__((nonnull)); -typedef idx_t_ analyzer(struct azctx, struct evstack, struct type *, struct ast, +/* Perform a pass over the entire AST and return an array of symbol + tables, one for each scope in the program */ +static struct scope *gensymtabs(struct ast, struct lexemes, arena *) + __attribute__((returns_nonnull, nonnull)); + +/* Find all the unordered symbols in the scope delimited by the inclusive + indicies BEG and END in the AST, and accumulate them into a symbol + table appended to the symbol table list. UP is the index of the + previous scopes symbol table in the symbol table list. */ +static void find_unordered_syms(struct scopes *, struct ast, struct lexemes, + idx_t_ up, idx_t_ beg, idx_t_ end, arena *) + __attribute__((nonnull)); + +typedef idx_t_ analyzer(struct azctx, struct scope *, struct type *, struct ast, struct lexemes, idx_t_) __attribute__((nonnull)); + static analyzer analyzeblk, analyzedecl, analyzeexpr, analyzefn, analyzestmt; static const struct type *typegrab(struct ast, struct lexemes, idx_t_) @@ -44,24 +75,70 @@ static const struct type *typegrab(struct ast, struct lexemes, idx_t_) static bool typecompat(struct type, struct type); static bool returns(struct ast, idx_t_); +/* Index the symbol table M with the key SV, returning a pointer to the + value. If no entry exists and A is non-null, a pointer to a newly + allocated (and zeroed) value is returned, NULL otherwise. */ +static idx_t_ *symtab_insert(symtab **m, struct strview sv, arena *a) + __attribute__((nonnull(1))); + /* Defined in primitives.gperf */ const struct type *typelookup(const uchar *, size_t) __attribute__((nonnull)); -struct type * -analyzeprog(struct ast ast, struct lexemes toks) +void +analyzeprog(struct ast ast, struct lexemes toks, arena *a, struct type **types, + struct scope **scps) +{ + *types = bufalloc(NULL, ast.len, sizeof(**types)); + memset(*types, 0, ast.len * sizeof(**types)); + + *scps = gensymtabs(ast, toks, a); + analyzeast(*scps, *types, ast, toks, a); +} + +struct scope * +gensymtabs(struct ast ast, struct lexemes toks, arena *a) { - arena a = NULL; - struct evstack evs = {.cap = 16}; - evs.buf = bufalloc(NULL, evs.cap, sizeof(*evs.buf)); - struct type *types = bufalloc(NULL, ast.len, sizeof(*types)); - memset(types, 0, ast.len * sizeof(*types)); + struct scopes scps = {.cap = 32}; + scps.buf = bufalloc(NULL, scps.cap, sizeof(*scps.buf)); + find_unordered_syms(&scps, ast, toks, 0, 0, ast.len - 1, a); + return scps.buf; +} - analyzeast(evs, types, ast, toks); +void +find_unordered_syms(struct scopes *scps, struct ast ast, struct lexemes toks, + idx_t_ up, idx_t_ beg, idx_t_ end, arena *a) +{ + if (scps->len == scps->cap) { + scps->cap *= 2; + scps->buf = bufalloc(scps->buf, scps->cap, sizeof(*scps->buf)); + } - arena_free(&a); - free(evs.buf); - return types; + struct scope *scp = scps->buf + scps->len++; + *scp = (struct scope){ + .i = beg, + .up = up, + .map = NULL, + }; + + for (idx_t_ i = beg; likely(i <= end); i++) { + bool globl_var = ast.kinds[i] <= _AST_DECLS_END && beg == 0; + bool const_var = (ast.kinds[i] | 1) == ASTPCDECL; + + if (globl_var || const_var) { + struct strview sv = toks.strs[ast.lexemes[i]]; + idx_t_ *p = symtab_insert(&scp->map, sv, a); + if (*p != 0) { + err("analyzer: Symbol ‘%.*s’ declared multiple times", + SV_PRI_ARGS(sv)); + } + *p = i; + } else if (ast.kinds[i] == ASTBLK) { + struct pair p = ast.kids[i]; + find_unordered_syms(scps, ast, toks, beg, p.lhs, p.rhs, a); + i = p.rhs; + } + } } const struct type * @@ -75,39 +152,31 @@ typegrab(struct ast ast, struct lexemes toks, idx_t_ i) } void -analyzeast(struct evstack evs, struct type *types, struct ast ast, - struct lexemes toks) +analyzeast(struct scope *scps, struct type *types, struct ast ast, + struct lexemes toks, arena *a) { - struct environ ev = {.cap = 16}; - ev.buf = bufalloc(NULL, ev.cap, sizeof(*ev.buf)); - - for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { - assert(ast.kinds[i] <= _AST_DECLS_END); - if (ev.len == ev.cap) { - ev.cap *= 2; - ev.buf = bufalloc(ev.buf, ev.cap, sizeof(*ev.buf)); - } - ev.buf[ev.len++] = (struct declaration){.astidx = i}; - } - - assert(evs.cap > 0); - evs.buf[0] = ev; - evs.len = 1; - - struct azctx ctx = {0}; + struct azctx ctx = {.a = a}; for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { assert(ast.kinds[i] <= _AST_DECLS_END); - analyzedecl(ctx, evs, types, ast, toks, i); + analyzedecl(ctx, scps, types, ast, toks, i); } - - free(ev.buf); } idx_t_ -analyzedecl(struct azctx ctx, struct evstack evs, struct type *types, +analyzedecl(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { - ctx.decl = toks.strs[ast.lexemes[i]]; + struct strview sv = toks.strs[ast.lexemes[i]]; + if (ctx.si > 0 && (ast.kinds[i] | 1) == ASTPDECL) { + idx_t_ *ip = symtab_insert(&scps[ctx.si].map, sv, ctx.a); + if (*ip == 0) + *ip = i; + else { + err("analyzer: Variable ‘%.*s’ declared multiple times", + SV_PRI_ARGS(sv)); + } + } + types[i].kind = TYPE_CHECKING; struct pair p = ast.kids[i]; @@ -121,7 +190,8 @@ analyzedecl(struct azctx ctx, struct evstack evs, struct type *types, if (p.lhs != AST_EMPTY) ltype = *typegrab(ast, toks, p.lhs); if (p.rhs != AST_EMPTY) { - ni = analyzeexpr(ctx, evs, types, ast, toks, p.rhs); + ctx.decl = sv; + ni = analyzeexpr(ctx, scps, types, ast, toks, p.rhs); rtype = types[p.rhs]; } else ni = fwdnode(ast, i); @@ -136,13 +206,13 @@ analyzedecl(struct azctx ctx, struct evstack evs, struct type *types, } idx_t_ -analyzestmt(struct azctx ctx, struct evstack evs, struct type *types, +analyzestmt(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { switch (ast.kinds[i]) { case ASTDECL: case ASTCDECL: - return analyzedecl(ctx, evs, types, ast, toks, i); + return analyzedecl(ctx, scps, types, ast, toks, i); case ASTRET: { idx_t_ expr = ast.kids[i].rhs; if (expr == AST_EMPTY) { @@ -152,19 +222,18 @@ analyzestmt(struct azctx ctx, struct evstack evs, struct type *types, } else if (ctx.fnret.kind == TYPE_UNSET) err("analyzer: Function has no return value"); - idx_t_ ni = analyzeexpr(ctx, evs, types, ast, toks, ast.kids[i].rhs); + idx_t_ ni = analyzeexpr(ctx, scps, types, ast, toks, ast.kids[i].rhs); if (!typecompat(ctx.fnret, types[ast.kids[i].rhs])) err("analyzer: Return type mismatch"); return ni; } default: - assert(!"unreachable"); __builtin_unreachable(); } } idx_t_ -analyzeexpr(struct azctx ctx, struct evstack evs, struct type *types, +analyzeexpr(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { switch (ast.kinds[i]) { @@ -176,49 +245,44 @@ analyzeexpr(struct azctx ctx, struct evstack evs, struct type *types, case ASTIDENT: { struct strview sv = toks.strs[ast.lexemes[i]]; - for (size_t lvl = evs.len; lvl-- > 0;) { - struct environ ev = evs.buf[lvl]; - /* TODO: Binary search */ - for (size_t j = 0; j < ev.len; j++) { - struct strview sv2 = toks.strs[ast.lexemes[ev.buf[j].astidx]]; - if (sv.len != sv2.len || memcmp(sv.p, sv2.p, sv.len) != 0) - continue; - - /* We need this to allow shadowing in situations like - FOO :FOO */ - if (lvl == evs.len - 1 && sv.len == ctx.decl.len - && memcmp(sv.p, ctx.decl.p, sv.len) == 0) - { - break; - } + /* Variable shadowing */ + if (strview_eq(sv, ctx.decl) && ctx.si > 0) + ctx.si--; - switch (types[j].kind) { + for (idx_t_ lvl = ctx.si;;) { + struct scope scp = scps[lvl]; + idx_t_ *ip = symtab_insert(&scp.map, sv, NULL); + + if (ip == NULL) { + if (lvl == 0) + break; + lvl = scp.up; + } else { + switch (types[*ip].kind) { case TYPE_UNSET: - evs.len = lvl + 1; - analyzedecl(ctx, evs, types, ast, toks, ev.buf[j].astidx); + ctx.si = lvl; + analyzedecl(ctx, scps, types, ast, toks, *ip); break; case TYPE_CHECKING: - err("analyzer: Circular definition of ‘%.*s’", (int)sv.len, - sv.p); + err("analyzer: Circular definition of ‘%.*s’", SV_PRI_ARGS(sv)); } - types[i] = types[j]; + types[i] = types[*ip]; return i + 1; } } - err("analyzer: Unknown constant ‘%.*s’", (int)sv.len, sv.p); + err("analyzer: Unknown symbol ‘%.*s’", SV_PRI_ARGS(sv)); } case ASTFN: - return analyzefn(ctx, evs, types, ast, toks, i); + return analyzefn(ctx, scps, types, ast, toks, i); default: - err("analyzer: Unexpected AST kind %u", ast.kinds[i]); __builtin_unreachable(); } } idx_t_ -analyzefn(struct azctx ctx, struct evstack evs, struct type *types, +analyzefn(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { struct type t = {.kind = TYPE_FN}; @@ -232,33 +296,17 @@ analyzefn(struct azctx ctx, struct evstack evs, struct type *types, } else ctx.fnret.kind = TYPE_UNSET; types[i] = t; - return analyzeblk(ctx, evs, types, ast, toks, p.rhs); + return analyzeblk(ctx, scps, types, ast, toks, p.rhs); } idx_t_ -analyzeblk(struct azctx ctx, struct evstack evs, struct type *types, +analyzeblk(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { - struct environ ev = {.cap = 16}; - ev.buf = bufalloc(NULL, ev.cap, sizeof(*ev.buf)); - struct pair p = ast.kids[i]; - for (idx_t_ i = p.lhs; likely(i < p.rhs); i = fwdnode(ast, i)) { - if (ast.kinds[i] != ASTCDECL) - continue; - if (ev.len == ev.cap) { - ev.cap *= 2; - ev.buf = bufalloc(ev.buf, ev.cap, sizeof(*ev.buf)); - } - ev.buf[ev.len++] = (struct declaration){.astidx = i}; - } - - if (evs.len == evs.cap) { - evs.cap *= 2; - evs.buf = bufalloc(evs.buf, evs.cap, sizeof(*evs.buf)); - } - evs.buf[evs.len++] = ev; + while (scps[ctx.si].i != p.lhs) + ctx.si++; bool chkrets = ctx.chkrets, hasret = false; ctx.chkrets = false; @@ -266,13 +314,11 @@ analyzeblk(struct azctx ctx, struct evstack evs, struct type *types, for (i = p.lhs; i <= p.rhs;) { if (chkrets && returns(ast, i)) hasret = true; - i = analyzestmt(ctx, evs, types, ast, toks, i); + i = analyzestmt(ctx, scps, types, ast, toks, i); } - if (chkrets && !hasret) err("analyzer: Function doesn’t return on all paths"); - free(ev.buf); return i; } @@ -312,3 +358,18 @@ typecompat(struct type lhs, struct type rhs) return lhs.issigned == rhs.issigned && lhs.isfloat == rhs.isfloat && lhs.size == rhs.size; } + +idx_t_ * +symtab_insert(symtab **m, struct strview k, arena *a) +{ + for (uint64_t h = strview_hash(k); *m; h <<= 2) { + if (strview_eq(k, (*m)->key)) + return &(*m)->val; + m = &(*m)->child[h >> 62]; + } + if (a == NULL) + return NULL; + *m = arena_new(a, symtab, 1); + (*m)->key = k; + return &(*m)->val; +} |