#include #include #include #include #include #include #include #include #include #include #include "alloc.h" #include "analyzer.h" #include "common.h" #include "errors.h" #include "parser.h" #include "strview.h" #include "symtab.h" #include "types.h" #define LOG2_10 (3.321928) #define MP_BITCNT_MAX ((mp_bitcnt_t)-1) /* In debug builds we want to actually alloc a new mpq_t so that it’s easier to free memory without doing a double free */ #if DEBUG # define MPQCPY(x, y) do { mpq_init(x); mpq_set(x, y); } while (false) #else # define MPQCPY(x, y) (*(x) = *(y)) #endif typedef struct { scope_t *buf; size_t len, cap; } scopes_t; struct azctx { arena_t *a; /* The return type of the function being analyzed */ type_t fnret; /* The name of the symbol being declared. This is necessary to allow for ‘X :: X’ to be treated as shadowing and not a circular definition */ strview_t decl; /* The index of the current scope in the scopes array */ idx_t si; /* If we need to check for return statements. Only true for the outer body-block of a function that returns a value. */ bool chkrets; }; struct cfctx { arena_t *a; scratch_t *s; strview_t decl; idx_t si; }; /* Perform static analysis over the AST */ static void analyzeast(scope_t *, type_t *, ast_t, aux_t, lexemes_t, arena_t *) __attribute__((nonnull)); /* Perform constant folding over the AST */ static void constfold(mpq_t *, scope_t *, type_t *, ast_t, lexemes_t, arena_t *, scratch_t *) __attribute__((nonnull)); /* Perform a pass over the entire AST and return an array of symbol tables, one for each scope in the program */ static scope_t *gensymtabs(ast_t, aux_t, lexemes_t, arena_t *) __attribute__((returns_nonnull, nonnull)); /* Find all the unordered symbols in the scope delimited by the inclusive indicies BEG and END in the AST, and accumulate them into a symbol table appended to the symbol table list. UP is the index of the previous scopes symbol table in the symbol table list. */ static void find_unordered_syms(scopes_t *, ast_t, aux_t, lexemes_t, idx_t up, idx_t beg, idx_t end, arena_t *) __attribute__((nonnull)); typedef idx_t analyzer(struct azctx, scope_t *, type_t *, ast_t, aux_t, lexemes_t, idx_t) __attribute__((nonnull)); typedef idx_t constfolder(struct cfctx, mpq_t *, scope_t *, type_t *, ast_t, lexemes_t, idx_t) __attribute__((nonnull)); static analyzer analyzeblk, analyzedecl, analyzeexpr, analyzefn, analyzestmt; static constfolder constfoldblk, constfolddecl, constfoldexpr, constfoldstmt; static const type_t *typegrab(ast_t, lexemes_t, idx_t) __attribute__((returns_nonnull)); static bool typecompat(type_t, type_t); static bool returns(ast_t, idx_t); /* Defined in primitives.gperf */ const type_t *typelookup(const uchar *, size_t) __attribute__((nonnull)); void analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, type_t **types, scope_t **scps, mpq_t **folds) { if ((*types = calloc(ast.len, sizeof(**types))) == NULL) err("calloc:"); *scps = gensymtabs(ast, aux, toks, a); analyzeast(*scps, *types, ast, aux, toks, a); scratch_t s = {0}; if ((*folds = calloc(ast.len, sizeof(**folds))) == NULL) err("calloc:"); constfold(*folds, *scps, *types, ast, toks, a, &s); } scope_t * gensymtabs(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a) { scopes_t scps = {.cap = 32}; scps.buf = bufalloc(NULL, scps.cap, sizeof(*scps.buf)); find_unordered_syms(&scps, ast, aux, toks, 0, 0, ast.len - 1, a); return scps.buf; } void find_unordered_syms(scopes_t *scps, ast_t ast, aux_t aux, lexemes_t toks, idx_t up, idx_t beg, idx_t end, arena_t *a) { if (scps->len == scps->cap) { scps->cap *= 2; scps->buf = bufalloc(scps->buf, scps->cap, sizeof(*scps->buf)); } scope_t *scp = scps->buf + scps->len++; *scp = (scope_t){ .i = beg, .up = up, .map = NULL, }; for (idx_t i = beg; likely(i <= end); i++) { bool isstatic = ast.kinds[i] <= _AST_DECLS_END && aux.buf[ast.kids[i].lhs].decl.isstatic; bool isconst = ast.kinds[i] == ASTCDECL; if (isstatic || isconst) { strview_t sv = toks.strs[ast.lexemes[i]]; symval_t *p = symtab_insert(&scp->map, sv, a); if (p->exists) { err("analyzer: Symbol ‘%.*s’ declared multiple times", SV_PRI_ARGS(sv)); } p->i = i; p->exists = true; } else if (ast.kinds[i] == ASTBLK) { pair_t p = ast.kids[i]; find_unordered_syms(scps, ast, aux, toks, beg, p.lhs, p.rhs, a); i = p.rhs; } } } const type_t * typegrab(ast_t ast, lexemes_t toks, idx_t i) { strview_t sv = toks.strs[ast.lexemes[i]]; const type_t *tp = typelookup(sv.p, sv.len); if (tp == NULL) err("analyzer: Unknown type ‘%.*s’", (int)sv.len, sv.p); return tp; } void analyzeast(scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, arena_t *a) { struct azctx ctx = {.a = a}; for (idx_t i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { assert(ast.kinds[i] <= _AST_DECLS_END); analyzedecl(ctx, scps, types, ast, aux, toks, i); } } idx_t analyzedecl(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { strview_t sv = toks.strs[ast.lexemes[i]]; bool isstatic = ast.kinds[i] <= _AST_DECLS_END && aux.buf[ast.kids[i].lhs].decl.isstatic; bool isconst = ast.kinds[i] == ASTCDECL; if (!isconst && !isstatic) { symval_t *sym = symtab_insert(&scps[ctx.si].map, sv, ctx.a); if (sym->exists) { err("analyzer: Variable ‘%.*s’ declared multiple times", SV_PRI_ARGS(sv)); } else { sym->i = i; sym->exists = true; } } types[i].kind = TYPE_CHECKING; pair_t p = ast.kids[i]; type_t ltype = {0}, rtype = {0}; idx_t typeidx = aux.buf[p.lhs].decl.type; assert(typeidx != AST_EMPTY || p.rhs != AST_EMPTY); idx_t ni; if (typeidx != AST_EMPTY) ltype = *typegrab(ast, toks, typeidx); if (p.rhs != AST_EMPTY) { ctx.decl = sv; ni = analyzeexpr(ctx, scps, types, ast, aux, toks, p.rhs); rtype = types[p.rhs]; } else ni = fwdnode(ast, i); if (ltype.kind == TYPE_UNSET) { ltype = rtype; if (ast.kinds[i] == ASTDECL && rtype.size == 0) ltype.size = 8; } else if (!typecompat(ltype, rtype)) err("analyzer: Type mismatch"); types[i] = ltype; return ni; } idx_t analyzestmt(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { switch (ast.kinds[i]) { case ASTDECL: case ASTCDECL: return analyzedecl(ctx, scps, types, ast, aux, toks, i); case ASTRET: { idx_t expr = ast.kids[i].rhs; if (expr == AST_EMPTY) { if (ctx.fnret.kind != TYPE_UNSET) err("analyzer: Missing return value"); return i + 1; } else if (ctx.fnret.kind == TYPE_UNSET) err("analyzer: Function has no return value"); idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, expr); if (!typecompat(ctx.fnret, types[expr])) err("analyzer: Return type mismatch"); types[i] = ctx.fnret; return ni; } default: __builtin_unreachable(); } } idx_t analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { switch (ast.kinds[i]) { case ASTNUMLIT: { strview_t sv = toks.strs[ast.lexemes[i]]; types[i].kind = TYPE_NUM; types[i].size = 0; types[i].issigned = true; types[i].isfloat = memchr(sv.p, '.', sv.len) != NULL; return fwdnode(ast, i); } case ASTIDENT: { strview_t sv = toks.strs[ast.lexemes[i]]; /* Variable shadowing */ if (strview_eq(sv, ctx.decl) && ctx.si > 0) ctx.si--; for (idx_t lvl = ctx.si;;) { scope_t scp = scps[lvl]; symval_t *sym = symtab_insert(&scp.map, sv, NULL); if (sym == NULL) { if (lvl == 0) break; lvl = scp.up; } else { switch (types[sym->i].kind) { case TYPE_UNSET: ctx.si = lvl; analyzedecl(ctx, scps, types, ast, aux, toks, sym->i); break; case TYPE_CHECKING: err("analyzer: Circular definition of ‘%.*s’", SV_PRI_ARGS(sv)); } types[i] = types[sym->i]; return fwdnode(ast, i); } } err("analyzer: Unknown symbol ‘%.*s’", SV_PRI_ARGS(sv)); } case ASTUNNEG: { idx_t ni, rhs; rhs = ast.kids[i].rhs; ni = analyzeexpr(ctx, scps, types, ast, aux, toks, rhs); type_t t = types[rhs]; if (t.kind != TYPE_NUM || !t.issigned) err("analyzer: Unary negation is reserved for signed numeric types"); types[i] = t; return ni; } case ASTBINADD: case ASTBINSUB: case ASTBINMUL: case ASTBINDIV: case ASTBINMOD: { idx_t lhs, rhs; lhs = ast.kids[i].lhs; rhs = ast.kids[i].rhs; analyzeexpr(ctx, scps, types, ast, aux, toks, lhs); idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, rhs); if (!typecompat(types[lhs], types[rhs])) err("analyzer: Binary oprand type mismatch"); if (ast.kinds[i] == ASTBINMOD) { if (types[lhs].kind != TYPE_NUM || types[lhs].isfloat) err("analyzer: Remainder is not defined for non-integer types"); if (types[rhs].kind != TYPE_NUM || types[rhs].isfloat) err("analyzer: Remainder is not defined for non-integer types"); } /* In the expression ‘x ⋆ y’ where ⋆ is a binary operator, the expression has type T if both x and y have type T. If one of x or y is a sized type and the other isn’t, then the expression has the type of the sized type. Additionally if both x and y are unsized types but one is a floating-point type and the other isn’t, the type of the expression is an unsized floating-point type. */ types[i] = types[lhs].size != 0 ? types[lhs] : types[rhs]; types[i].isfloat = types[lhs].isfloat || types[rhs].isfloat; return ni; } case ASTFN: return analyzefn(ctx, scps, types, ast, aux, toks, i); default: __builtin_unreachable(); } } idx_t analyzefn(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { type_t t = {.kind = TYPE_FN}; pair_t p = ast.kids[i]; idx_t proto = p.lhs; if (ast.kids[proto].rhs != AST_EMPTY) { t.ret = typegrab(ast, toks, ast.kids[proto].rhs); ctx.fnret = *t.ret; ctx.chkrets = true; } else ctx.fnret.kind = TYPE_UNSET; types[i] = t; return analyzeblk(ctx, scps, types, ast, aux, toks, p.rhs); } idx_t analyzeblk(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, idx_t i) { pair_t p = ast.kids[i]; while (scps[ctx.si].i != p.lhs) ctx.si++; bool chkrets = ctx.chkrets, hasret = false; ctx.chkrets = false; for (i = p.lhs; i <= p.rhs;) { if (chkrets && returns(ast, i)) hasret = true; i = analyzestmt(ctx, scps, types, ast, aux, toks, i); } if (chkrets && !hasret) err("analyzer: Function doesn’t return on all paths"); return i; } idx_t constfoldstmt(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, idx_t i) { switch (ast.kinds[i]) { case ASTDECL: case ASTCDECL: return constfolddecl(ctx, folds, scps, types, ast, toks, i); case ASTRET: return constfoldexpr(ctx, folds, scps, types, ast, toks, ast.kids[i].rhs); default: __builtin_unreachable(); } } idx_t constfoldblk(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, idx_t i) { pair_t p = ast.kids[i]; while (scps[ctx.si].i != p.lhs) ctx.si++; for (i = p.lhs; i <= p.rhs; i = constfoldstmt(ctx, folds, scps, types, ast, toks, i)) ; return i; } idx_t constfoldexpr(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, idx_t i) { if (MPQ_IS_INIT(folds[i])) return fwdnode(ast, i); switch (ast.kinds[i]) { case ASTNUMLIT: { mpq_init(folds[i]); strview_t sv = toks.strs[ast.lexemes[i]]; char *buf = tmpalloc(ctx.s, sv.len + 1, 1); size_t len = 0; bool isfloat = false; for (size_t i = 0; i < sv.len; i++) { if (sv.p[i] == '.') { isfloat = true; buf[len++] = sv.p[i]; } if (isdigit(sv.p[i])) buf[len++] = sv.p[i]; } buf[len] = 0; if (isfloat) { /* TODO: Is this correct? It seems to work… but I don’t know if there is a better way to do this; sometimes the precision is a few bits more than it could be. */ mpf_t x; double prec = ceil((sv.len - 1) * LOG2_10); mpf_init2(x, MIN(MP_BITCNT_MAX, (mp_bitcnt_t)prec)); #if DEBUG int ret = #endif mpf_set_str(x, buf, 10); assert(ret == 0); mpq_set_f(folds[i], x); mpf_clear(x); } else { #if DEBUG int ret = #endif mpq_set_str(folds[i], buf, 10); assert(ret == 0); } return fwdnode(ast, i); } case ASTIDENT: { strview_t sv = toks.strs[ast.lexemes[i]]; /* Variable shadowing */ if (strview_eq(sv, ctx.decl) && ctx.si > 0) ctx.si--; for (idx_t lvl = ctx.si;;) { scope_t scp = scps[lvl]; symval_t *sym = symtab_insert(&scp.map, sv, NULL); if (sym == NULL) { assert(lvl != 0); lvl = scp.up; } else { switch (ast.kinds[sym->i]) { case ASTDECL: break; case ASTCDECL: { idx_t expr = ast.kids[sym->i].rhs; assert(expr != AST_EMPTY); MPQCPY(folds[i], folds[expr]); if (!MPQ_IS_INIT(folds[i])) { ctx.si = lvl; (void)constfolddecl(ctx, folds, scps, types, ast, toks, sym->i); MPQCPY(folds[i], folds[expr]); assert(MPQ_IS_INIT(folds[i])); } break; } default: __builtin_unreachable(); } return fwdnode(ast, i); } } } case ASTUNNEG: { idx_t rhs = ast.kids[i].rhs; idx_t ni = constfoldexpr(ctx, folds, scps, types, ast, toks, rhs); mpq_t *x = folds + rhs; if (MPQ_IS_INIT(*x)) { MPQCPY(folds[i], *x); mpq_neg(folds[i], folds[i]); } return ni; } case ASTBINADD: case ASTBINSUB: case ASTBINMUL: { static void (*const mpq_fns[UINT8_MAX])(mpq_t, const mpq_t, const mpq_t) = { ['+'] = mpq_add, ['-'] = mpq_sub, ['*'] = mpq_mul, }; idx_t lhs, rhs; lhs = ast.kids[i].lhs; rhs = ast.kids[i].rhs; (void)constfoldexpr(ctx, folds, scps, types, ast, toks, lhs); idx_t ni = constfoldexpr(ctx, folds, scps, types, ast, toks, rhs); if (MPQ_IS_INIT(folds[lhs]) && MPQ_IS_INIT(folds[rhs])) { mpq_init(folds[i]); mpq_fns[ast.kinds[i]](folds[i], folds[lhs], folds[rhs]); } return ni; } case ASTBINDIV: { idx_t lhs, rhs; lhs = ast.kids[i].lhs; rhs = ast.kids[i].rhs; (void)constfoldexpr(ctx, folds, scps, types, ast, toks, lhs); idx_t ni = constfoldexpr(ctx, folds, scps, types, ast, toks, rhs); if (MPQ_IS_INIT(folds[lhs]) && MPQ_IS_INIT(folds[rhs])) { mpq_init(folds[i]); if (types[i].isfloat) mpq_div(folds[i], folds[lhs], folds[rhs]); else { mpz_tdiv_q(mpq_numref(folds[i]), mpq_numref(folds[lhs]), mpq_numref(folds[rhs])); } } return ni; } case ASTBINMOD: { idx_t lhs, rhs; lhs = ast.kids[i].lhs; rhs = ast.kids[i].rhs; (void)constfoldexpr(ctx, folds, scps, types, ast, toks, lhs); idx_t ni = constfoldexpr(ctx, folds, scps, types, ast, toks, rhs); if (MPQ_IS_INIT(folds[lhs]) && MPQ_IS_INIT(folds[rhs])) { mpq_init(folds[i]); mpz_tdiv_r(mpq_numref(folds[i]), mpq_numref(folds[lhs]), mpq_numref(folds[rhs])); } return ni; } case ASTFN: return constfoldblk(ctx, folds, scps, types, ast, toks, ast.kids[i].rhs); default: __builtin_unreachable(); } } idx_t constfolddecl(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, idx_t i) { if (ast.kids[i].rhs == AST_EMPTY) return fwdnode(ast, i); ctx.decl = toks.strs[ast.lexemes[i]]; return constfoldexpr(ctx, folds, scps, types, ast, toks, ast.kids[i].rhs); } void constfold(mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, arena_t *a, scratch_t *s) { struct cfctx ctx = {.a = a, .s = s}; for (idx_t i = 0; likely(i < ast.len);) { assert(ast.kinds[i] <= _AST_DECLS_END); i = constfolddecl(ctx, folds, scps, types, ast, toks, i); } } bool returns(ast_t ast, idx_t i) { switch (ast.kinds[i]) { case ASTDECL: case ASTCDECL: return false; case ASTRET: return true; } __builtin_unreachable(); } bool typecompat(type_t lhs, type_t rhs) { /* Function types are compatible if they have the same parameter- and return types */ if (lhs.kind == TYPE_FN && rhs.kind == TYPE_FN) return lhs.paramcnt == rhs.paramcnt && lhs.ret == rhs.ret; if (lhs.kind == TYPE_FN || rhs.kind == TYPE_FN) return false; /* At this point we only have numeric types left */ /* Untyped numeric types are compatible with all numeric types */ if (lhs.size == 0 || rhs.size == 0) return true; /* Two typed numeric types are only compatible if they have the same size and sign and are either both integral or both floats */ return lhs.issigned == rhs.issigned && lhs.isfloat == rhs.isfloat && lhs.size == rhs.size; }