#include #include #include #include #include #include #include #include #include "alloc.h" #include "analyzer.h" #include "common.h" #include "errors.h" #include "parser.h" #include "strview.h" #include "types.h" /* Mapping of symbol names to their indicies in the AST */ typedef struct symtab { struct symtab *child[4]; strview_t key; idx_t val; } symtab_t; typedef struct { scope_t *buf; size_t len, cap; } scopes_t; struct azctx { arena_t *a; /* The return type of the function being analyzed */ type_t fnret; /* The name of the symbol being declared. This is necessary to allow for ‘X :: X’ to be treated as shadowing and not a circular definition */ strview_t decl; /* The index of the current scope in the scopes array */ idx_t si; /* If we need to check for return statements. Only true for the outer body-block of a function that returns a value. */ bool chkrets; }; struct cfctx { arena_t *a; strview_t decl; idx_t si; }; /* Perform static analysis over the AST */ static void analyzeast(scope_t *, type_t *, ast_t, aux_t, lexemes_t, arena_t *) __attribute__((nonnull)); /* Perform constant folding over the AST */ static void constfold(mpq_t *, scope_t *, type_t *, ast_t, lexemes_t, arena_t *) __attribute__((nonnull)); /* Perform a pass over the entire AST and return an array of symbol tables, one for each scope in the program */ static scope_t *gensymtabs(ast_t, aux_t, lexemes_t, arena_t *) __attribute__((returns_nonnull, nonnull)); /* Find all the unordered symbols in the scope delimited by the inclusive indicies BEG and END in the AST, and accumulate them into a symbol table appended to the symbol table list. UP is the index of the previous scopes symbol table in the symbol table list. */ static void find_unordered_syms(scopes_t *, ast_t, aux_t, lexemes_t, idx_t up, idx_t beg, idx_t end, arena_t *) __attribute__((nonnull)); typedef idx_t analyzer(struct azctx, scope_t *, type_t *, ast_t, aux_t, lexemes_t, idx_t) __attribute__((nonnull)); typedef idx_t constfolder(struct cfctx, mpq_t *, scope_t *, type_t *, ast_t, lexemes_t, idx_t) __attribute__((nonnull)); static analyzer analyzeblk, analyzedecl, analyzeexpr, analyzefn, analyzestmt; static constfolder constfoldblk, constfolddecl, constfoldexpr, constfoldstmt; static const type_t *typegrab(ast_t, lexemes_t, idx_t) __attribute__((returns_nonnull)); static bool typecompat(type_t, type_t); static bool returns(ast_t, idx_t); /* Index the symbol table M with the key SV, returning a pointer to the value. If no entry exists and A is non-null, a pointer to a newly allocated (and zeroed) value is returned, NULL otherwise. */ static idx_t *symtab_insert(symtab **m, strview_t sv, arena_t *a) __attribute__((nonnull(1))); /* Defined in primitives.gperf */ const type_t *typelookup(const uchar *, size_t) __attribute__((nonnull)); void analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, type_t **types, scope_t **scps, mpq_t **folds) { *types = bufalloc(NULL, ast.len, sizeof(**types)); memset(*types, 0, ast.len * sizeof(**types)); *scps = gensymtabs(ast, aux, toks, a); analyzeast(*scps, *types, ast, aux, toks, a); *folds = bufalloc(NULL, ast.len, sizeof(**folds)); memset(*folds, 0, ast.len * sizeof(**folds)); constfold(*folds, *scps, *types, ast, toks, a); } scope_t * gensymtabs(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a) { scopes_t scps = {.cap = 32}; scps.buf = bufalloc(NULL, scps.cap, sizeof(*scps.buf)); find_unordered_syms(&scps, ast, aux, toks, 0, 0, ast.len - 1, a); return scps.buf; } void find_unordered_syms(scopes_t *scps, ast_t ast, aux_t aux, lexemes_t toks, idx_t up, idx_t beg, idx_t end, arena_t *a) { if (scps->len == scps->cap) { scps->cap *= 2; scps->buf = bufalloc(scps->buf, scps->cap, sizeof(*scps->buf)); } scope_t *scp = scps->buf + scps->len++; *scp = (scope_t){ .i = beg, .up = up, .map = NULL, }; for (idx_t i = beg; likely(i <= end); i++) { bool isstatic = ast.kinds[i] <= _AST_DECLS_END && aux.buf[ast.kids[i].lhs].decl.isstatic; bool isconst = ast.kinds[i] == ASTCDECL; if (isstatic || isconst) { strview_t sv = toks.strs[ast.lexemes[i]]; idx_t *p = symtab_insert(&scp->map, sv, a); if (*p != 0) { err("analyzer: Symbol ‘%.*s’ declared multiple times", SV_PRI_ARGS(sv)); } *p = i; } else if (ast.kinds[i] == ASTBLK) { pair_t p = ast.kids[i]; find_unordered_syms(scps, ast, aux, toks, beg, p.lhs, p.rhs, a); i = p.rhs; } } } const type_t * typegrab(ast_t ast, lexemes_t toks, idx_t i) { strview_t sv = toks.strs[ast.lexemes[i]]; const type_t *tp = typelookup(sv.p, sv.len); if (tp == NULL) err("analyzer: Unknown type ‘%.*s’", (int)sv.len, sv.p); return tp; } void analyzeast(scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, arena_t *a) { struct azctx ctx = {.a = a}; for (idx_t i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { assert(ast.kinds[i] <= _AST_DECLS_END); analyzedecl(ctx, scps, types, ast, aux, toks, i); } } idx_t analyzedecl(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { strview_t sv = toks.strs[ast.lexemes[i]]; if (ctx.si > 0 && ast.kinds[i] == ASTDECL) { idx_t *ip = symtab_insert(&scps[ctx.si].map, sv, ctx.a); if (*ip == 0) *ip = i; else { err("analyzer: Variable ‘%.*s’ declared multiple times", SV_PRI_ARGS(sv)); } } types[i].kind = TYPE_CHECKING; pair_t p = ast.kids[i]; type_t ltype, rtype; ltype.kind = TYPE_UNSET; idx_t typeidx = aux.buf[p.lhs].decl.type; assert(typeidx != AST_EMPTY || p.rhs != AST_EMPTY); idx_t ni; if (typeidx != AST_EMPTY) ltype = *typegrab(ast, toks, typeidx); if (p.rhs != AST_EMPTY) { ctx.decl = sv; ni = analyzeexpr(ctx, scps, types, ast, aux, toks, p.rhs); rtype = types[p.rhs]; } else ni = fwdnode(ast, i); if (ltype.kind == TYPE_UNSET) ltype = rtype; else if (!typecompat(ltype, rtype)) err("analyzer: Type mismatch"); types[i] = ltype; return ni; } idx_t analyzestmt(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { switch (ast.kinds[i]) { case ASTDECL: case ASTCDECL: return analyzedecl(ctx, scps, types, ast, aux, toks, i); case ASTRET: { idx_t expr = ast.kids[i].rhs; if (expr == AST_EMPTY) { if (ctx.fnret.kind != TYPE_UNSET) err("analyzer: Missing return value"); return i + 1; } else if (ctx.fnret.kind == TYPE_UNSET) err("analyzer: Function has no return value"); idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, expr); if (!typecompat(ctx.fnret, types[expr])) err("analyzer: Return type mismatch"); types[i] = ctx.fnret; return ni; } default: __builtin_unreachable(); } } idx_t analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { switch (ast.kinds[i]) { case ASTNUMLIT: types[i].kind = TYPE_NUM; types[i].size = 0; types[i].issigned = true; return i + 1; case ASTIDENT: { strview_t sv = toks.strs[ast.lexemes[i]]; /* Variable shadowing */ if (strview_eq(sv, ctx.decl) && ctx.si > 0) ctx.si--; for (idx_t lvl = ctx.si;;) { scope_t scp = scps[lvl]; idx_t *ip = symtab_insert(&scp.map, sv, NULL); if (ip == NULL) { if (lvl == 0) break; lvl = scp.up; } else { switch (types[*ip].kind) { case TYPE_UNSET: ctx.si = lvl; analyzedecl(ctx, scps, types, ast, aux, toks, *ip); break; case TYPE_CHECKING: err("analyzer: Circular definition of ‘%.*s’", SV_PRI_ARGS(sv)); } types[i] = types[*ip]; return i + 1; } } err("analyzer: Unknown symbol ‘%.*s’", SV_PRI_ARGS(sv)); } case ASTFN: return analyzefn(ctx, scps, types, ast, aux, toks, i); default: __builtin_unreachable(); } } idx_t analyzefn(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { type_t t = {.kind = TYPE_FN}; pair_t p = ast.kids[i]; idx_t proto = p.lhs; if (ast.kids[proto].rhs != AST_EMPTY) { t.ret = typegrab(ast, toks, ast.kids[proto].rhs); ctx.fnret = *t.ret; ctx.chkrets = true; } else ctx.fnret.kind = TYPE_UNSET; types[i] = t; return analyzeblk(ctx, scps, types, ast, aux, toks, p.rhs); } idx_t analyzeblk(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { pair_t p = ast.kids[i]; while (scps[ctx.si].i != p.lhs) ctx.si++; bool chkrets = ctx.chkrets, hasret = false; ctx.chkrets = false; for (i = p.lhs; i <= p.rhs;) { if (chkrets && returns(ast, i)) hasret = true; i = analyzestmt(ctx, scps, types, ast, aux, toks, i); } if (chkrets && !hasret) err("analyzer: Function doesn’t return on all paths"); return i; } idx_t constfoldstmt(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, idx_t i) { switch (ast.kinds[i]) { case ASTDECL: case ASTCDECL: return constfolddecl(ctx, folds, scps, types, ast, toks, i); case ASTRET: return constfoldexpr(ctx, folds, scps, types, ast, toks, ast.kids[i].rhs); default: __builtin_unreachable(); } } idx_t constfoldblk(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, idx_t i) { pair_t p = ast.kids[i]; while (scps[ctx.si].i != p.lhs) ctx.si++; for (i = p.lhs; i <= p.rhs; i = constfoldstmt(ctx, folds, scps, types, ast, toks, i)) ; return i; } idx_t constfoldexpr(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, idx_t i) { /* Check if this expression has already been constant folded. This works because when an mpq_t is initialized via mpq_init(), it is set to 0/1 meaning that the denominator pointer can’t be NULL. */ if ((*folds[i])._mp_den._mp_d != NULL) return fwdnode(ast, i); switch (ast.kinds[i]) { case ASTNUMLIT: { mpq_init(folds[i]); /* TODO: Temporary allocator */ strview_t sv = toks.strs[ast.lexemes[i]]; char *buf = bufalloc(NULL, sv.len + 1, 1); size_t len = 0; for (size_t i = 0; i < sv.len; i++) { if (isdigit(sv.p[i])) buf[len++] = sv.p[i]; } buf[len] = 0; int ret = mpq_set_str(folds[i], buf, 10); assert(ret == 0); free(buf); return fwdnode(ast, i); } case ASTIDENT: { strview_t sv = toks.strs[ast.lexemes[i]]; /* Variable shadowing */ if (strview_eq(sv, ctx.decl) && ctx.si > 0) ctx.si--; for (idx_t lvl = ctx.si;;) { scope_t scp = scps[lvl]; idx_t *ip = symtab_insert(&scp.map, sv, NULL); if (ip == NULL) { assert(lvl != 0); lvl = scp.up; } else { switch (ast.kinds[*ip]) { case ASTDECL: break; case ASTCDECL: { idx_t expr = ast.kids[*ip].rhs; assert(expr != AST_EMPTY); #if DEBUG mpq_init(folds[i]); mpq_set(folds[i], folds[expr]); #else *folds[i] = *folds[expr]; #endif if ((*folds[i])._mp_den._mp_d == NULL) { ctx.si = lvl; (void)constfolddecl(ctx, folds, scps, types, ast, toks, *ip); #if DEBUG mpq_init(folds[i]); mpq_set(folds[i], folds[expr]); #else *folds[i] = *folds[expr]; #endif assert((*folds[i])._mp_den._mp_d != NULL); } break; } default: __builtin_unreachable(); } return fwdnode(ast, i); } } } case ASTFN: return constfoldblk(ctx, folds, scps, types, ast, toks, ast.kids[i].rhs); default: __builtin_unreachable(); } } idx_t constfolddecl(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, idx_t i) { if (ast.kids[i].rhs == AST_EMPTY) return fwdnode(ast, i); ctx.decl = toks.strs[ast.lexemes[i]]; return constfoldexpr(ctx, folds, scps, types, ast, toks, ast.kids[i].rhs); } void constfold(mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, arena_t *a) { struct cfctx ctx = {.a = a}; for (idx_t i = 0; likely(i < ast.len);) { assert(ast.kinds[i] <= _AST_DECLS_END); i = constfolddecl(ctx, folds, scps, types, ast, toks, i); } } bool returns(ast_t ast, idx_t i) { switch (ast.kinds[i]) { case ASTDECL: case ASTCDECL: return false; case ASTRET: return true; } __builtin_unreachable(); } bool typecompat(type_t lhs, type_t rhs) { /* Function types are compatible if they have the same parameter- and return types */ if (lhs.kind == TYPE_FN && rhs.kind == TYPE_FN) return lhs.paramcnt == rhs.paramcnt && lhs.ret == rhs.ret; if (lhs.kind == TYPE_FN || rhs.kind == TYPE_FN) return false; /* At this point we only have numeric types left */ /* Untyped numeric types are compatible with all numeric types */ if (lhs.size == 0 || rhs.size == 0) return true; /* Two typed numeric types are only compatible if they have the same size and sign and are either both integral or both floats */ return lhs.issigned == rhs.issigned && lhs.isfloat == rhs.isfloat && lhs.size == rhs.size; } idx_t * symtab_insert(symtab **m, strview_t k, arena_t *a) { for (uint64_t h = strview_hash(k); *m; h <<= 2) { if (strview_eq(k, (*m)->key)) return &(*m)->val; m = &(*m)->child[h >> 62]; } if (a == NULL) return NULL; *m = arena_new(a, symtab, 1); (*m)->key = k; return &(*m)->val; }