From f05c9eddc9f4dff41016b5363925e59c2de671e2 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 2 Jul 2024 00:34:12 +0200 Subject: Completely rework how types are handled --- cbs.h | 18 +- grammar.ebnf | 75 ++++++ make.c | 27 +- src/analyzer.c | 682 ++++++++++++++++++++++++++++----------------------- src/analyzer.h | 22 +- src/codegen.c | 43 ++-- src/codegen.h | 2 +- src/common.h | 2 + src/main.c | 5 +- src/primitives.gperf | 42 ---- src/strview.h | 3 +- src/symtab.c | 21 ++ src/symtab.h | 26 +- test.yx | 133 ++++++++++ test/arena.c | 3 +- 15 files changed, 678 insertions(+), 426 deletions(-) create mode 100644 grammar.ebnf delete mode 100644 src/primitives.gperf create mode 100644 test.yx diff --git a/cbs.h b/cbs.h index 0295bbf..6618636 100644 --- a/cbs.h +++ b/cbs.h @@ -195,10 +195,22 @@ strspushenv(struct strs *xs, const char *ev, char **ys, size_t n) { /* NOTE: Do your best to NOT modify any pushed envvar! */ char *p = getenv(ev); - if (p == NULL || *p == 0) + if (p == NULL || *p == 0) { strspush(xs, ys, n); - else - strspush(xs, &p, 1); + return; + } + + wordexp_t we; + assert(wordexp(p, &we, WRDE_NOCMD) == 0); + + /* TODO: Memory leak! */ + for (size_t i = 0; i < we.we_wordc; i++) { + char *w = strdup(we.we_wordv[i]); + assert(w != NULL); + strspushl(xs, w); + } + + wordfree(&we); } bool diff --git a/grammar.ebnf b/grammar.ebnf new file mode 100644 index 0000000..a5f5e50 --- /dev/null +++ b/grammar.ebnf @@ -0,0 +1,75 @@ +(* vi: ft=ebnf + *) + +program + : {'pub' declaration} + ; + +declaration + : mutdecl + | constdecl + ; + +assignment + : expression '=' expression ';' + ; + +mutdecl + : IDENT ':' type ';' + | IDENT ':' [type] '=' (expression | ellipsis) ';' + ; + +constdecl + : IDENT ':' [type] ':' (expression ';' | function) + ; + +function + : prototype '{' statement* '}' + ; + +prototype + : '(' ')' [type] + ; + +statement + | declaration + | assignment + | 'return' [expression] ';' + ; + +expression + : IDENT + | NUMERIC + | unop expression + | expression binop expression + | '(' expression ')' + ; + +unop + : '-' + | '&' + | '+' + | '~' + ; + +binop + : '+' + | '%' + | '&' + | '*' + | '-' + | '/' + | '<<' + | '>>' + | '|' + | '~' + ; + +type + : IDENT + ; + +ellipsis + : '…' + | '...' + ; diff --git a/make.c b/make.c index 4692189..5d9212b 100644 --- a/make.c +++ b/make.c @@ -61,7 +61,7 @@ static void mkgmp(int); static bool tagvalid(const char *); static void chk_cpu_flags(void); static int globerr(const char *, int); -static tjob cc, cc_test, gperf; +static tjob cc, cc_test; static void usage(void) @@ -158,14 +158,7 @@ main(int argc, char **argv) glob_t g; - /* GNU Perf files */ - assert(glob("src/*.gperf", 0, globerr, &g) == 0); - for (size_t i = 0; i < g.gl_pathc; i++) - tpenq(&tp, gperf, g.gl_pathv[i], NULL); - tpwait(&tp); - /* C files */ - globfree(&g); assert(glob("src/*.c", 0, globerr, &g) == 0); for (size_t i = 0; i < g.gl_pathc; i++) tpenq(&tp, cc, g.gl_pathv[i], NULL); @@ -267,24 +260,6 @@ out: free(dst); } -void -gperf(void *arg) -{ - struct strs cmd = {0}; - char *dst = swpext(arg, "gen.c"), *src = arg; - - if (!fflag && fmdnewer(dst, src)) - goto out; - - strspushl(&cmd, "gperf", src, "--output-file", dst); - - cmdput(cmd); - cmdexec(cmd); - strsfree(&cmd); -out: - free(dst); -} - void ld(void) { diff --git a/src/analyzer.c b/src/analyzer.c index e7199ca..52c6535 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -36,10 +36,19 @@ typedef struct { } scopes_t; struct azctx { - arena_t *a; + arena_t *a; + scratch_t *s; + + ast_t ast; + aux_t aux; + lexemes_t toks; + mpq_t *folds; + scopes_t scps; + type_t **types; + typetab_t *ttab; /* The return type of the function being analyzed */ - type_t fnret; + type_t *fnret; /* The name of the symbol being declared. This is necessary to allow for ‘X :: X’ to be treated as shadowing and not a circular @@ -54,229 +63,275 @@ struct azctx { bool chkrets; }; -struct cfctx { - arena_t *a; - scratch_t *s; - strview_t decl; - idx_t si; -}; - /* Perform static analysis over the AST */ -static void analyzeast(scope_t *, type_t *, ast_t, aux_t, lexemes_t, arena_t *) +static void analyzeast(struct azctx *) __attribute__((nonnull)); /* Perform constant folding over the AST */ -static void constfold(mpq_t *, scope_t *, type_t *, ast_t, lexemes_t, arena_t *, - scratch_t *) +static void constfold(struct azctx *) __attribute__((nonnull)); -/* Perform a pass over the entire AST and return an array of symbol - tables, one for each scope in the program */ -static scope_t *gensymtabs(ast_t, aux_t, lexemes_t, arena_t *) - __attribute__((returns_nonnull, nonnull)); +/* Perform a pass over the entire AST and generate an array of symbol + tables, one for each scope in the program. These tables are stored in + the provided CTX. */ +static void gensymtabs(struct azctx *ctx) + __attribute__((nonnull)); /* Find all the unordered symbols in the scope delimited by the inclusive indicies BEG and END in the AST, and accumulate them into a symbol table appended to the symbol table list. UP is the index of the previous scopes symbol table in the symbol table list. */ -static void find_unordered_syms(scopes_t *, ast_t, aux_t, lexemes_t, idx_t up, - idx_t beg, idx_t end, arena_t *) - __attribute__((nonnull)); +static void find_unordered_syms(struct azctx *, idx_t up, idx_t beg, idx_t end); -typedef idx_t analyzer(struct azctx, scope_t *, type_t *, ast_t, aux_t, - lexemes_t, idx_t) +typedef idx_t analyzer(struct azctx *, idx_t) __attribute__((nonnull)); -typedef idx_t constfolder(struct cfctx, mpq_t *, scope_t *, type_t *, ast_t, - lexemes_t, idx_t) +static analyzer analyzeblk, analyzedecl, analyzeexpr, analyzefn, analyzestmt, + constfoldblk, constfolddecl, constfoldstmt; + +/* Perform constant-folding on the expression at index I in the AST, and + assert that the resulting constant can be represented by type T. */ +static idx_t constfoldexpr(struct azctx *, type_t *T, idx_t i) __attribute__((nonnull)); -static analyzer analyzeblk, analyzedecl, analyzeexpr, analyzefn, analyzestmt; -static constfolder constfoldblk, constfolddecl, constfoldstmt; -static idx_t constfoldexpr(struct cfctx, mpq_t *, scope_t *, type_t *, ast_t, - lexemes_t, type_t, idx_t) +/* Assert if the types T1 and T2 are compatible with each other */ +static bool typecompat(type_t *t1, type_t *t2) __attribute__((nonnull)); -static const type_t *typegrab(ast_t, lexemes_t, idx_t) - __attribute__((returns_nonnull)); -static bool typecompat(type_t, type_t); -static bool returns(ast_t, idx_t); +/* Check if the statement at node I in the AST returns from the function */ +static bool returns(ast_t, idx_t i); -/* Defined in primitives.gperf */ -const type_t *typelookup(const uchar *, size_t) - __attribute__((nonnull)); +enum { + PRIM_INT8, + PRIM_INT16, + PRIM_INT32, + PRIM_INT64, + PRIM_INT128, + PRIM_INT, -void -analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, type_t **types, - scope_t **scps, mpq_t **folds) + PRIM_UINT8, + PRIM_UINT16, + PRIM_UINT32, + PRIM_UINT64, + PRIM_UINT128, + PRIM_UINT, + + PRIM_RUNE, + + PRIM_F16, + PRIM_F32, + PRIM_F64, + PRIM_F128, + + _PRIM_CNT, +}; + +static struct { + strview_t name; + type_t t; +} primitives[] = { + [PRIM_INT] = {SVC("int"), {.kind = TYPE_NUM, .size = 8, .issigned = true}}, + [PRIM_INT8] = {SVC("i8"), {.kind = TYPE_NUM, .size = 1, .issigned = true}}, + [PRIM_INT16] = {SVC("i16"), {.kind = TYPE_NUM, .size = 2, .issigned = true}}, + [PRIM_INT32] = {SVC("i32"), {.kind = TYPE_NUM, .size = 4, .issigned = true}}, + [PRIM_INT64] = {SVC("i64"), {.kind = TYPE_NUM, .size = 8, .issigned = true}}, + [PRIM_INT128] = {SVC("i128"), {.kind = TYPE_NUM, .size = 16, .issigned = true}}, + + [PRIM_UINT] = {SVC("uint"), {.kind = TYPE_NUM, .size = 8}}, + [PRIM_UINT8] = {SVC("u8"), {.kind = TYPE_NUM, .size = 1}}, + [PRIM_UINT16] = {SVC("u16"), {.kind = TYPE_NUM, .size = 2}}, + [PRIM_UINT32] = {SVC("u32"), {.kind = TYPE_NUM, .size = 4}}, + [PRIM_UINT64] = {SVC("u64"), {.kind = TYPE_NUM, .size = 8}}, + [PRIM_UINT128] = {SVC("u128"), {.kind = TYPE_NUM, .size = 16}}, + + [PRIM_RUNE] = {SVC("rune"), {.kind = TYPE_NUM, .size = 4, .issigned = true}}, + + [PRIM_F16] = {SVC("f16"), {.kind = TYPE_NUM, .size = 2, .isfloat = true}}, + [PRIM_F32] = {SVC("f32"), {.kind = TYPE_NUM, .size = 4, .isfloat = true}}, + [PRIM_F64] = {SVC("f64"), {.kind = TYPE_NUM, .size = 8, .isfloat = true}}, + [PRIM_F128] = {SVC("f128"), {.kind = TYPE_NUM, .size = 16, .isfloat = true}}, +}; + +static type_t NOT_CHECKED = {.kind = TYPE_CHECKING}; +static type_t UNTYPED_INT = {.kind = TYPE_NUM, .size = 0}; +static type_t UNTYPED_FLT = {.kind = TYPE_NUM, .size = 0, .isfloat = true}; + +type_t ** +analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, scope_t **scps, + mpq_t **folds) { - if ((*types = calloc(ast.len, sizeof(**types))) == NULL) + struct azctx ctx = { + .a = a, + .s = &(scratch_t){0}, + .ast = ast, + .aux = aux, + .toks = toks, + }; + + for (size_t i = 0; i < lengthof(primitives); i++) { + *typetab_insert(&ctx.ttab, primitives[i].name, a) = + (type_t *)&primitives[i].t; + } + + if ((ctx.types = calloc(ctx.ast.len, sizeof(*ctx.types))) == NULL) err("calloc:"); - *scps = gensymtabs(ast, aux, toks, a); - analyzeast(*scps, *types, ast, aux, toks, a); + gensymtabs(&ctx); + analyzeast(&ctx); - scratch_t s = {0}; - if ((*folds = calloc(ast.len, sizeof(**folds))) == NULL) + if ((ctx.folds = calloc(ctx.ast.len, sizeof(**ctx.folds))) == NULL) err("calloc:"); - constfold(*folds, *scps, *types, ast, toks, a, &s); + constfold(&ctx); + *scps = ctx.scps.buf; + *folds = ctx.folds; + return ctx.types; } -scope_t * -gensymtabs(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a) +void +gensymtabs(struct azctx *ctx) { - scopes_t scps = {.cap = 32}; - scps.buf = bufalloc(NULL, scps.cap, sizeof(*scps.buf)); - find_unordered_syms(&scps, ast, aux, toks, 0, 0, ast.len - 1, a); - return scps.buf; + ctx->scps.cap = 32; + ctx->scps.buf = bufalloc(NULL, ctx->scps.cap, sizeof(*ctx->scps.buf)); + find_unordered_syms(ctx, 0, 0, ctx->ast.len - 1); } void -find_unordered_syms(scopes_t *scps, ast_t ast, aux_t aux, lexemes_t toks, - idx_t up, idx_t beg, idx_t end, arena_t *a) +find_unordered_syms(struct azctx *ctx, idx_t up, idx_t beg, idx_t end) { - if (scps->len == scps->cap) { - scps->cap *= 2; - scps->buf = bufalloc(scps->buf, scps->cap, sizeof(*scps->buf)); + if (ctx->scps.len == ctx->scps.cap) { + ctx->scps.cap *= 2; + ctx->scps.buf = bufalloc(ctx->scps.buf, ctx->scps.cap, + sizeof(*ctx->scps.buf)); } - scope_t *scp = scps->buf + scps->len++; - *scp = (scope_t){ - .i = beg, - .up = up, - .map = NULL, - }; + scope_t *scp = ctx->scps.buf + ctx->scps.len++; + *scp = (scope_t){.i = beg, .up = up}; for (idx_t i = beg; likely(i <= end); i++) { - bool isstatic = ast.kinds[i] <= _AST_DECLS_END - && aux.buf[ast.kids[i].lhs].decl.isstatic; - bool isconst = ast.kinds[i] == ASTCDECL; + bool isstatic = ctx->ast.kinds[i] <= _AST_DECLS_END + && ctx->aux.buf[ctx->ast.kids[i].lhs].decl.isstatic; + bool isconst = ctx->ast.kinds[i] == ASTCDECL; if (isstatic || isconst) { - strview_t sv = toks.strs[ast.lexemes[i]]; - symval_t *p = symtab_insert(&scp->map, sv, a); + strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]]; + symval_t *p = symtab_insert(&scp->map, sv, ctx->a); if (p->exists) { err("analyzer: Symbol ‘%.*s’ declared multiple times", SV_PRI_ARGS(sv)); } p->i = i; p->exists = true; - } else if (ast.kinds[i] == ASTBLK) { - pair_t p = ast.kids[i]; - find_unordered_syms(scps, ast, aux, toks, beg, p.lhs, p.rhs, a); + } else if (ctx->ast.kinds[i] == ASTBLK) { + pair_t p = ctx->ast.kids[i]; + find_unordered_syms(ctx, beg, p.lhs, p.rhs); i = p.rhs; } } } -const type_t * -typegrab(ast_t ast, lexemes_t toks, idx_t i) -{ - strview_t sv = toks.strs[ast.lexemes[i]]; - const type_t *tp = typelookup(sv.p, sv.len); - if (tp == NULL) - err("analyzer: Unknown type ‘%.*s’", (int)sv.len, sv.p); - return tp; -} - void -analyzeast(scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, - arena_t *a) +analyzeast(struct azctx *ctx) { - struct azctx ctx = {.a = a}; - for (idx_t i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { - assert(ast.kinds[i] <= _AST_DECLS_END); - analyzedecl(ctx, scps, types, ast, aux, toks, i); + for (idx_t i = 0; likely(i < ctx->ast.len); i = fwdnode(ctx->ast, i)) { + assert(ctx->ast.kinds[i] <= _AST_DECLS_END); + (void)analyzedecl(ctx, i); } } idx_t -analyzedecl(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, - aux_t aux, lexemes_t toks, idx_t i) +analyzedecl(struct azctx *ctx, idx_t i) { - strview_t sv = toks.strs[ast.lexemes[i]]; + strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]]; - bool isconst = ast.kinds[i] == ASTCDECL; - bool isundef = aux.buf[ast.kids[i].lhs].decl.isundef; - bool isstatic = ast.kinds[i] <= _AST_DECLS_END - && aux.buf[ast.kids[i].lhs].decl.isstatic; + bool isconst = ctx->ast.kinds[i] == ASTCDECL; + bool isundef = ctx->aux.buf[ctx->ast.kids[i].lhs].decl.isundef; + bool isstatic = ctx->ast.kinds[i] <= _AST_DECLS_END + && ctx->aux.buf[ctx->ast.kids[i].lhs].decl.isstatic; if (isstatic && isundef) err("analyzer: Static variables may not be undefined"); if (!isconst && !isstatic) { - symval_t *sym = symtab_insert(&scps[ctx.si].map, sv, ctx.a); + symval_t *sym = symtab_insert(&ctx->scps.buf[ctx->si].map, sv, ctx->a); if (sym->exists) { err("analyzer: Variable ‘%.*s’ declared multiple times", SV_PRI_ARGS(sv)); } else { + printf("inserted %.*s\n", SV_PRI_ARGS(sv)); sym->i = i; sym->exists = true; } } - types[i].kind = TYPE_CHECKING; + ctx->types[i] = &NOT_CHECKED; - pair_t p = ast.kids[i]; - type_t ltype = {0}, rtype = {0}; + pair_t p = ctx->ast.kids[i]; + type_t *ltype, *rtype; + ltype = rtype = NULL; - idx_t typeidx = aux.buf[p.lhs].decl.type; + idx_t typeidx = ctx->aux.buf[p.lhs].decl.type; assert(typeidx != AST_EMPTY || p.rhs != AST_EMPTY); idx_t ni; - if (typeidx != AST_EMPTY) - ltype = *typegrab(ast, toks, typeidx); + if (typeidx != AST_EMPTY) { + strview_t sv = ctx->toks.strs[ctx->ast.lexemes[typeidx]]; + type_t **t = typetab_insert(&ctx->ttab, sv, NULL); + if (t == NULL) + err("analyzer: Undeclared type ‘%.*s’", SV_PRI_ARGS(sv)); + ltype = *t; + } if (p.rhs != AST_EMPTY) { - ctx.decl = sv; - ni = analyzeexpr(ctx, scps, types, ast, aux, toks, p.rhs); - rtype = types[p.rhs]; + struct azctx nctx = *ctx; + nctx.decl = sv; + ni = analyzeexpr(&nctx, p.rhs); + rtype = ctx->types[p.rhs]; } else - ni = fwdnode(ast, i); + ni = fwdnode(ctx->ast, i); - if (ltype.kind == TYPE_UNSET) { + if (ltype == NULL) { ltype = rtype; - if (ast.kinds[i] == ASTDECL && rtype.size == 0) - ltype.size = 8; - } else if (!typecompat(ltype, rtype)) + if (ctx->ast.kinds[i] == ASTDECL && rtype->size == 0) + ltype = &primitives[rtype == &UNTYPED_INT ? PRIM_INT : PRIM_F64].t; + } else if (rtype != NULL && !typecompat(ltype, rtype)) err("analyzer: Type mismatch"); - types[i] = ltype; + ctx->types[i] = ltype; return ni; } idx_t -analyzestmt(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, - aux_t aux, lexemes_t toks, idx_t i) +analyzestmt(struct azctx *ctx, idx_t i) { - switch (ast.kinds[i]) { + switch (ctx->ast.kinds[i]) { case ASTDECL: case ASTCDECL: - return analyzedecl(ctx, scps, types, ast, aux, toks, i); + return analyzedecl(ctx, i); case ASTASIGN: { - pair_t p = ast.kids[i]; - (void)analyzeexpr(ctx, scps, types, ast, aux, toks, p.lhs); + pair_t p = ctx->ast.kids[i]; + (void)analyzeexpr(ctx, p.lhs); /* TODO: Allow assignments to expressions returning pointer types */ - if (ast.kinds[p.lhs] != ASTIDENT) + if (ctx->ast.kinds[p.lhs] != ASTIDENT) err("analyzer: Assignments may only be made to identifiers"); - idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, p.rhs); - if (!typecompat(types[p.lhs], types[p.rhs])) + idx_t ni = analyzeexpr(ctx, p.rhs); + if (!typecompat(ctx->types[p.lhs], ctx->types[p.rhs])) err("analyzer: Assignment type mismatch"); - types[i] = types[p.lhs]; + ctx->types[i] = ctx->types[p.lhs]; return ni; } case ASTRET: { - idx_t expr = ast.kids[i].rhs; + idx_t expr = ctx->ast.kids[i].rhs; if (expr == AST_EMPTY) { - if (ctx.fnret.kind != TYPE_UNSET) + if (ctx->fnret->kind != TYPE_UNSET) err("analyzer: Missing return value"); return i + 1; - } else if (ctx.fnret.kind == TYPE_UNSET) + } else if (ctx->fnret->kind == TYPE_UNSET) err("analyzer: Function has no return value"); - idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, expr); - if (!typecompat(ctx.fnret, types[expr])) + idx_t ni = analyzeexpr(ctx, expr); + printf("%p %p\n", (void *)ctx->fnret, (void *)ctx->types[expr]); + if (!typecompat(ctx->fnret, ctx->types[expr])) err("analyzer: Return type mismatch"); - types[i] = ctx.fnret; + ctx->types[i] = ctx->fnret; return ni; } default: @@ -285,27 +340,28 @@ analyzestmt(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, } idx_t -analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, - aux_t aux, lexemes_t toks, idx_t i) +analyzeexpr(struct azctx *ctx, idx_t i) { - switch (ast.kinds[i]) { + /* Create local copy */ + struct azctx nctx = *ctx; + ctx = &nctx; + + switch (ctx->ast.kinds[i]) { case ASTNUMLIT: { - strview_t sv = toks.strs[ast.lexemes[i]]; - types[i].kind = TYPE_NUM; - types[i].size = 0; - types[i].issigned = true; - types[i].isfloat = memchr(sv.p, '.', sv.len) != NULL; - return fwdnode(ast, i); + strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]]; + ctx->types[i] = memchr(sv.p, '.', sv.len) != NULL ? &UNTYPED_FLT + : &UNTYPED_INT; + return fwdnode(ctx->ast, i); } case ASTIDENT: { - strview_t sv = toks.strs[ast.lexemes[i]]; + strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]]; /* Variable shadowing */ - if (strview_eq(sv, ctx.decl) && ctx.si > 0) - ctx.si--; + if (strview_eq(sv, ctx->decl) && ctx->si > 0) + ctx->si--; - for (idx_t lvl = ctx.si;;) { - scope_t scp = scps[lvl]; + for (idx_t lvl = ctx->si;;) { + scope_t scp = ctx->scps.buf[lvl]; symval_t *sym = symtab_insert(&scp.map, sv, NULL); if (sym == NULL) { @@ -313,18 +369,18 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, break; lvl = scp.up; } else { - switch (types[sym->i].kind) { + switch (ctx->types[sym->i]->kind) { case TYPE_UNSET: - ctx.si = lvl; - analyzedecl(ctx, scps, types, ast, aux, toks, sym->i); + ctx->si = lvl; + (void)analyzedecl(ctx, sym->i); break; case TYPE_CHECKING: err("analyzer: Circular definition of ‘%.*s’", SV_PRI_ARGS(sv)); } - types[i] = types[sym->i]; - return fwdnode(ast, i); + ctx->types[i] = ctx->types[sym->i]; + return fwdnode(ctx->ast, i); } } @@ -333,16 +389,21 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, case ASTUNCMPL: case ASTUNNEG: { idx_t ni, rhs; - rhs = ast.kids[i].rhs; - ni = analyzeexpr(ctx, scps, types, ast, aux, toks, rhs); - type_t t = types[rhs]; - if (ast.kinds[i] == ASTUNNEG && (t.kind != TYPE_NUM || !t.issigned)) + rhs = ctx->ast.kids[i].rhs; + ni = analyzeexpr(ctx, rhs); + type_t *t = ctx->types[rhs]; + if (ctx->ast.kinds[i] == ASTUNNEG + && (t->kind != TYPE_NUM || !t->issigned)) + { err("analyzer: Unary negation is reserved for signed numeric " "types"); - else if (ast.kinds[i] == ASTUNCMPL && (t.kind != TYPE_NUM || t.isfloat)) + } else if (ctx->ast.kinds[i] == ASTUNCMPL + && (t->kind != TYPE_NUM || t->isfloat)) + { err("analyzer: Unary negation is reserved for numeric integer " "types"); - types[i] = t; + } + ctx->types[i] = t; return ni; } case ASTBINADD: @@ -356,13 +417,14 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, case ASTBINSUB: case ASTBINXOR: { idx_t lhs, rhs; - lhs = ast.kids[i].lhs; - rhs = ast.kids[i].rhs; - analyzeexpr(ctx, scps, types, ast, aux, toks, lhs); - idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, rhs); - - bool isshift = ast.kinds[i] == ASTBINSHL || ast.kinds[i] == ASTBINSHR; - if (!isshift && !typecompat(types[lhs], types[rhs])) + lhs = ctx->ast.kids[i].lhs; + rhs = ctx->ast.kids[i].rhs; + (void)analyzeexpr(ctx, lhs); + idx_t ni = analyzeexpr(ctx, rhs); + + bool isshift = ctx->ast.kinds[i] == ASTBINSHL + || ctx->ast.kinds[i] == ASTBINSHR; + if (!isshift && !typecompat(ctx->types[lhs], ctx->types[rhs])) err("analyzer: Binary oprand type mismatch"); static const bool int_only[UINT8_MAX + 1] = { @@ -370,9 +432,10 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, [ASTBINSHL] = true, [ASTBINSHR] = true, [ASTBINXOR] = true, }; - if (int_only[ast.kinds[i]] - && (types[lhs].kind != TYPE_NUM || types[lhs].isfloat - || types[rhs].kind != TYPE_NUM || types[rhs].isfloat)) + if (int_only[ctx->ast.kinds[i]] + && (ctx->types[lhs]->kind != TYPE_NUM || ctx->types[lhs]->isfloat + || ctx->types[rhs]->kind != TYPE_NUM + || ctx->types[rhs]->isfloat)) { err("analyzer: Operation not defined for non-integer types"); } @@ -390,54 +453,66 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, Expressions for these operators always take the type of x, and y can be any integer type. */ if (isshift) - types[i] = types[lhs]; + ctx->types[i] = ctx->types[lhs]; else { - types[i] = types[lhs].size != 0 ? types[lhs] : types[rhs]; - types[i].isfloat = types[lhs].isfloat || types[rhs].isfloat; + ctx->types[i] = ctx->types[lhs]->size != 0 ? ctx->types[lhs] + : ctx->types[rhs]; + ctx->types[i]->isfloat = ctx->types[lhs]->isfloat + || ctx->types[rhs]->isfloat; } return ni; } case ASTFN: - return analyzefn(ctx, scps, types, ast, aux, toks, i); + return analyzefn(ctx, i); default: __builtin_unreachable(); } } idx_t -analyzefn(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, - lexemes_t toks, idx_t i) +analyzefn(struct azctx *ctx, idx_t i) { type_t t = {.kind = TYPE_FN}; - pair_t p = ast.kids[i]; + pair_t p = ctx->ast.kids[i]; + + /* Create local copy */ + struct azctx nctx = *ctx; + ctx = &nctx; idx_t proto = p.lhs; - if (ast.kids[proto].rhs != AST_EMPTY) { - t.ret = typegrab(ast, toks, ast.kids[proto].rhs); - ctx.fnret = *t.ret; - ctx.chkrets = true; + idx_t ret = ctx->ast.kids[proto].rhs; + if (ret != AST_EMPTY) { + strview_t sv = ctx->toks.strs[ctx->ast.lexemes[ret]]; + type_t **p = typetab_insert(&ctx->ttab, sv, NULL); + if (p == NULL) + err("analyzer: Undeclared type ‘%.*s’", SV_PRI_ARGS(sv)); + ctx->fnret = t.ret = *p; + ctx->chkrets = true; } else - ctx.fnret.kind = TYPE_UNSET; - types[i] = t; - return analyzeblk(ctx, scps, types, ast, aux, toks, p.rhs); + ctx->fnret = NULL; + *(ctx->types[i] = arena_new(ctx->a, type_t, 1)) = t; + return analyzeblk(ctx, p.rhs); } idx_t -analyzeblk(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, - lexemes_t toks, idx_t i) +analyzeblk(struct azctx *ctx, idx_t i) { - pair_t p = ast.kids[i]; + /* Create local copy */ + struct azctx nctx = *ctx; + ctx = &nctx; + + pair_t p = ctx->ast.kids[i]; - while (scps[ctx.si].i != p.lhs) - ctx.si++; + while (ctx->scps.buf[ctx->si].i != p.lhs) + ctx->si++; - bool chkrets = ctx.chkrets, hasret = false; - ctx.chkrets = false; + bool chkrets = ctx->chkrets, hasret = false; + ctx->chkrets = false; for (i = p.lhs; i <= p.rhs;) { - if (chkrets && returns(ast, i)) + if (chkrets && returns(ctx->ast, i)) hasret = true; - i = analyzestmt(ctx, scps, types, ast, aux, toks, i); + i = analyzestmt(ctx, i); } if (chkrets && !hasret) err("analyzer: Function doesn’t return on all paths"); @@ -446,54 +521,52 @@ analyzeblk(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, } idx_t -constfoldstmt(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, - ast_t ast, lexemes_t toks, idx_t i) +constfoldstmt(struct azctx *ctx, idx_t i) { - switch (ast.kinds[i]) { + switch (ctx->ast.kinds[i]) { case ASTDECL: case ASTCDECL: - return constfolddecl(ctx, folds, scps, types, ast, toks, i); + return constfolddecl(ctx, i); case ASTASIGN: case ASTRET: - return constfoldexpr(ctx, folds, scps, types, ast, toks, - types[i], ast.kids[i].rhs); + return constfoldexpr(ctx, ctx->types[i], ctx->ast.kids[i].rhs); default: __builtin_unreachable(); } } idx_t -constfoldblk(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, - ast_t ast, lexemes_t toks, idx_t i) +constfoldblk(struct azctx *ctx, idx_t i) { - pair_t p = ast.kids[i]; - while (scps[ctx.si].i != p.lhs) - ctx.si++; - for (i = p.lhs; i <= p.rhs; - i = constfoldstmt(ctx, folds, scps, types, ast, toks, i)) + /* Create local copy */ + struct azctx nctx = *ctx; + ctx = &nctx; + + pair_t p = ctx->ast.kids[i]; + while (ctx->scps.buf[ctx->si].i != p.lhs) + ctx->si++; + for (i = p.lhs; i <= p.rhs; i = constfoldstmt(ctx, i)) ; - return i; } idx_t -constfoldexpr(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, - ast_t ast, lexemes_t toks, type_t T, idx_t i) +constfoldexpr(struct azctx *ctx, type_t *T, idx_t i) { - if (MPQ_IS_INIT(folds[i])) - return fwdnode(ast, i); + if (MPQ_IS_INIT(ctx->folds[i])) + return fwdnode(ctx->ast, i); idx_t ni; + struct azctx nctx; - switch (ast.kinds[i]) { + switch (ctx->ast.kinds[i]) { case ASTFN: - return constfoldblk(ctx, folds, scps, types, ast, toks, - ast.kids[i].rhs); + return constfoldblk(ctx, ctx->ast.kids[i].rhs); case ASTNUMLIT: { - mpq_init(folds[i]); + mpq_init(ctx->folds[i]); - strview_t sv = toks.strs[ast.lexemes[i]]; - char *buf = tmpalloc(ctx.s, sv.len + 1, 1); + strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]]; + char *buf = tmpalloc(ctx->s, sv.len + 1, 1); size_t len = 0; bool isfloat = false; @@ -520,48 +593,51 @@ constfoldexpr(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, #endif mpf_set_str(x, buf, 10); assert(ret == 0); - mpq_set_f(folds[i], x); + mpq_set_f(ctx->folds[i], x); mpf_clear(x); } else { #if DEBUG int ret = #endif - mpq_set_str(folds[i], buf, 10); + mpq_set_str(ctx->folds[i], buf, 10); assert(ret == 0); } - ni = fwdnode(ast, i); + ni = fwdnode(ctx->ast, i); break; } case ASTIDENT: { - strview_t sv = toks.strs[ast.lexemes[i]]; + /* Create local copy */ + nctx = *ctx; + ctx = &nctx; + + strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]]; /* Variable shadowing */ - if (strview_eq(sv, ctx.decl) && ctx.si > 0) - ctx.si--; + if (strview_eq(sv, ctx->decl) && ctx->si > 0) + ctx->si--; - for (idx_t lvl = ctx.si;;) { - scope_t scp = scps[lvl]; + for (idx_t lvl = ctx->si;;) { + scope_t scp = ctx->scps.buf[lvl]; symval_t *sym = symtab_insert(&scp.map, sv, NULL); if (sym == NULL) { assert(lvl != 0); lvl = scp.up; } else { - switch (ast.kinds[sym->i]) { + switch (ctx->ast.kinds[sym->i]) { case ASTDECL: - return fwdnode(ast, i); + return fwdnode(ctx->ast, i); case ASTCDECL: { - idx_t expr = ast.kids[sym->i].rhs; + idx_t expr = ctx->ast.kids[sym->i].rhs; assert(expr != AST_EMPTY); - MPQCPY(folds[i], folds[expr]); - if (!MPQ_IS_INIT(folds[i])) { - ctx.si = lvl; - (void)constfolddecl(ctx, folds, scps, types, ast, toks, - sym->i); - MPQCPY(folds[i], folds[expr]); - assert(MPQ_IS_INIT(folds[i])); + MPQCPY(ctx->folds[i], ctx->folds[expr]); + if (!MPQ_IS_INIT(ctx->folds[i])) { + ctx->si = lvl; + (void)constfolddecl(ctx, sym->i); + MPQCPY(ctx->folds[i], ctx->folds[expr]); + assert(MPQ_IS_INIT(ctx->folds[i])); } - ni = fwdnode(ast, i); + ni = fwdnode(ctx->ast, i); goto out; } default: @@ -573,19 +649,18 @@ out: break; } case ASTUNCMPL: { - ni = constfoldexpr(ctx, folds, scps, types, ast, toks, types[i], - ast.kids[i].rhs); - if (MPQ_IS_INIT(folds[ast.kids[i].rhs])) + ni = constfoldexpr(ctx, ctx->types[i], ctx->ast.kids[i].rhs); + if (MPQ_IS_INIT(ctx->folds[ctx->ast.kids[i].rhs])) err("analyzer: Cannot perform bitwise complement of constant"); break; } case ASTUNNEG: { - idx_t rhs = ast.kids[i].rhs; - ni = constfoldexpr(ctx, folds, scps, types, ast, toks, types[i], rhs); - mpq_t *x = folds + rhs; + idx_t rhs = ctx->ast.kids[i].rhs; + ni = constfoldexpr(ctx, ctx->types[i], rhs); + mpq_t *x = ctx->folds + rhs; if (MPQ_IS_INIT(*x)) { - MPQCPY(folds[i], *x); - mpq_neg(folds[i], folds[i]); + MPQCPY(ctx->folds[i], *x); + mpq_neg(ctx->folds[i], ctx->folds[i]); } break; } @@ -599,31 +674,32 @@ out: ['*'] = mpq_mul, }; idx_t lhs, rhs; - lhs = ast.kids[i].lhs; - rhs = ast.kids[i].rhs; - (void)constfoldexpr(ctx, folds, scps, types, ast, toks, types[i], lhs); - ni = constfoldexpr(ctx, folds, scps, types, ast, toks, types[i], rhs); - if (MPQ_IS_INIT(folds[lhs]) && MPQ_IS_INIT(folds[rhs])) { - mpq_init(folds[i]); - mpq_fns[ast.kinds[i]](folds[i], folds[lhs], folds[rhs]); + lhs = ctx->ast.kids[i].lhs; + rhs = ctx->ast.kids[i].rhs; + (void)constfoldexpr(ctx, ctx->types[i], lhs); + ni = constfoldexpr(ctx, ctx->types[i], rhs); + if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) { + mpq_init(ctx->folds[i]); + mpq_fns[ctx->ast.kinds[i]](ctx->folds[i], ctx->folds[lhs], + ctx->folds[rhs]); } break; } case ASTBINDIV: { idx_t lhs, rhs; - lhs = ast.kids[i].lhs; - rhs = ast.kids[i].rhs; + lhs = ctx->ast.kids[i].lhs; + rhs = ctx->ast.kids[i].rhs; - (void)constfoldexpr(ctx, folds, scps, types, ast, toks, types[i], lhs); - ni = constfoldexpr(ctx, folds, scps, types, ast, toks, types[i], rhs); + (void)constfoldexpr(ctx, ctx->types[i], lhs); + ni = constfoldexpr(ctx, ctx->types[i], rhs); - if (MPQ_IS_INIT(folds[lhs]) && MPQ_IS_INIT(folds[rhs])) { - mpq_init(folds[i]); - if (types[i].isfloat) - mpq_div(folds[i], folds[lhs], folds[rhs]); + if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) { + mpq_init(ctx->folds[i]); + if (ctx->types[i]->isfloat) + mpq_div(ctx->folds[i], ctx->folds[lhs], ctx->folds[rhs]); else { - mpz_tdiv_q(mpq_numref(folds[i]), mpq_numref(folds[lhs]), - mpq_numref(folds[rhs])); + mpz_tdiv_q(mpq_numref(ctx->folds[i]), mpq_numref(ctx->folds[lhs]), + mpq_numref(ctx->folds[rhs])); } } break; @@ -641,18 +717,18 @@ out: }; idx_t lhs, rhs; - lhs = ast.kids[i].lhs; - rhs = ast.kids[i].rhs; - - (void)constfoldexpr(ctx, folds, scps, types, ast, toks, types[i], lhs); - ni = constfoldexpr(ctx, folds, scps, types, ast, toks, types[i], rhs); - - if (MPQ_IS_INIT(folds[lhs]) && MPQ_IS_INIT(folds[rhs])) { - assert(MPQ_IS_WHOLE(folds[lhs])); - assert(MPQ_IS_WHOLE(folds[rhs])); - mpq_init(folds[i]); - mpz_fns[ast.kinds[i]](mpq_numref(folds[i]), mpq_numref(folds[lhs]), - mpq_numref(folds[rhs])); + lhs = ctx->ast.kids[i].lhs; + rhs = ctx->ast.kids[i].rhs; + + (void)constfoldexpr(ctx, ctx->types[i], lhs); + ni = constfoldexpr(ctx, ctx->types[i], rhs); + + if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) { + assert(MPQ_IS_WHOLE(ctx->folds[lhs])); + assert(MPQ_IS_WHOLE(ctx->folds[rhs])); + mpq_init(ctx->folds[i]); + mpz_fns[ctx->ast.kinds[i]](mpq_numref(ctx->folds[i]), mpq_numref(ctx->folds[lhs]), + mpq_numref(ctx->folds[rhs])); } break; } @@ -665,28 +741,28 @@ out: }; idx_t lhs, rhs; - lhs = ast.kids[i].lhs; - rhs = ast.kids[i].rhs; + lhs = ctx->ast.kids[i].lhs; + rhs = ctx->ast.kids[i].rhs; - (void)constfoldexpr(ctx, folds, scps, types, ast, toks, types[lhs], lhs); - ni = constfoldexpr(ctx, folds, scps, types, ast, toks, types[rhs], rhs); + (void)constfoldexpr(ctx, ctx->types[lhs], lhs); + ni = constfoldexpr(ctx, ctx->types[rhs], rhs); - if (MPQ_IS_INIT(folds[rhs])) { - if (mpq_sgn(folds[rhs]) == -1) + if (MPQ_IS_INIT(ctx->folds[rhs])) { + if (mpq_sgn(ctx->folds[rhs]) == -1) err("analyzer: Cannot shift by negative value"); } - if (MPQ_IS_INIT(folds[lhs]) && MPQ_IS_INIT(folds[rhs])) { + if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) { mpz_ptr cur_z, lhs_z, rhs_z; - cur_z = mpq_numref(folds[i]); - lhs_z = mpq_numref(folds[lhs]); - rhs_z = mpq_numref(folds[rhs]); + cur_z = mpq_numref(ctx->folds[i]); + lhs_z = mpq_numref(ctx->folds[lhs]); + rhs_z = mpq_numref(ctx->folds[rhs]); - mpq_init(folds[i]); + mpq_init(ctx->folds[i]); if (mpz_cmp_ui(rhs_z, ULONG_MAX) > 0) err("analyzer: Shift oprand too large"); mp_bitcnt_t shftcnt = mpz_get_ui(rhs_z); - mpz_fns[ast.kinds[i]](cur_z, lhs_z, shftcnt); + mpz_fns[ctx->ast.kinds[i]](cur_z, lhs_z, shftcnt); } break; } @@ -694,23 +770,21 @@ out: __builtin_unreachable(); } - if (MPQ_IS_INIT(folds[i]) && !T.issigned && mpq_sgn(folds[i]) == -1) + if (MPQ_IS_INIT(ctx->folds[i]) && !T->issigned && mpq_sgn(ctx->folds[i]) == -1) err("analyzer: Cannot convert negative value to unsigned type"); - if (T.size != 0 && !T.isfloat && MPQ_IS_INIT(folds[i])) { - if (!MPQ_IS_WHOLE(folds[i])) + if (T->size != 0 && !T->isfloat && MPQ_IS_INIT(ctx->folds[i])) { + if (!MPQ_IS_WHOLE(ctx->folds[i])) err("analyzer: Invalid integer"); int cmp; - mpz_ptr num = mpq_numref(folds[i]); - /* TODO: Can we make the first branch work when the type has the - same size as an unsigned long? */ - if (T.size < sizeof(unsigned long)) { - unsigned long x = 1UL << (T.size * 8 - T.issigned); + mpz_ptr num = mpq_numref(ctx->folds[i]); + if (T->size < sizeof(unsigned long)) { + unsigned long x = 1UL << (T->size * 8 - T->issigned); cmp = mpz_cmp_ui(num, x - 1); } else { mpz_t x; - mp_bitcnt_t bits = T.size * 8 - T.issigned; + mp_bitcnt_t bits = T->size * 8 - T->issigned; mpz_init_set_ui(x, 1); mpz_mul_2exp(x, x, bits); mpz_sub_ui(x, x, 1); @@ -725,25 +799,24 @@ out: } idx_t -constfolddecl(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, - ast_t ast, lexemes_t toks, idx_t i) +constfolddecl(struct azctx *ctx, idx_t i) { - if (ast.kids[i].rhs == AST_EMPTY) - return fwdnode(ast, i); - ctx.decl = toks.strs[ast.lexemes[i]]; - return constfoldexpr(ctx, folds, scps, types, ast, toks, types[i], - ast.kids[i].rhs); + if (ctx->ast.kids[i].rhs == AST_EMPTY) + return fwdnode(ctx->ast, i); + + /* Create local copy */ + struct azctx nctx = *ctx; + ctx = &nctx; + ctx->decl = ctx->toks.strs[ctx->ast.lexemes[i]]; + + return constfoldexpr(ctx, ctx->types[i], ctx->ast.kids[i].rhs); } void -constfold(mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, - arena_t *a, scratch_t *s) +constfold(struct azctx *ctx) { - struct cfctx ctx = {.a = a, .s = s}; - for (idx_t i = 0; likely(i < ast.len);) { - assert(ast.kinds[i] <= _AST_DECLS_END); - i = constfolddecl(ctx, folds, scps, types, ast, toks, i); - } + for (idx_t i = 0; likely(i < ctx->ast.len); i = constfolddecl(ctx, i)) + assert(ctx->ast.kinds[i] <= _AST_DECLS_END); } bool @@ -761,23 +834,26 @@ returns(ast_t ast, idx_t i) } bool -typecompat(type_t lhs, type_t rhs) +typecompat(type_t *lhs, type_t *rhs) { + if (lhs == rhs) + return true; + /* Function types are compatible if they have the same parameter- and return types */ - if (lhs.kind == TYPE_FN && rhs.kind == TYPE_FN) - return lhs.paramcnt == rhs.paramcnt && lhs.ret == rhs.ret; - if (lhs.kind == TYPE_FN || rhs.kind == TYPE_FN) + if (lhs->kind == TYPE_FN && rhs->kind == TYPE_FN) + return lhs->paramcnt == rhs->paramcnt && lhs->ret == rhs->ret; + if (lhs->kind == TYPE_FN || rhs->kind == TYPE_FN) return false; /* At this point we only have numeric types left */ /* Untyped numeric types are compatible with all numeric types */ - if (lhs.size == 0 || rhs.size == 0) + if (lhs->size == 0 || rhs->size == 0) return true; /* Two typed numeric types are only compatible if they have the same size and sign and are either both integral or both floats */ - return lhs.issigned == rhs.issigned && lhs.isfloat == rhs.isfloat - && lhs.size == rhs.size; + return lhs->issigned == rhs->issigned && lhs->isfloat == rhs->isfloat + && lhs->size == rhs->size; } diff --git a/src/analyzer.h b/src/analyzer.h index 12c1b26..c6fd80f 100644 --- a/src/analyzer.h +++ b/src/analyzer.h @@ -38,25 +38,7 @@ typedef struct { symtab_t *map; } scope_t; -/* A variable type */ -typedef struct type { - uint8_t kind; - - union { - struct { - uint8_t size; /* number of bytes */ - bool isfloat; - bool issigned; - }; - struct { - const struct type *params, *ret; - idx_t paramcnt; - }; - }; -} type_t; - -void analyzeprog(ast_t, aux_t, lexemes_t, arena_t *, type_t **, scope_t **, - mpq_t **) - __attribute__((nonnull)); +type_t **analyzeprog(ast_t, aux_t, lexemes_t, arena_t *, scope_t **, mpq_t **) + __attribute__((returns_nonnull, nonnull)); #endif /* !ORYX_ANALYZER_H */ diff --git a/src/codegen.c b/src/codegen.c index f391bda..a263db2 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -17,8 +17,6 @@ #include "parser.h" #include "strview.h" -#define lengthof(xs) (sizeof(xs) / sizeof(*(xs))) - /* Cheers to Lernö for this */ #define LLVM_TARGET_INIT(x) \ do { \ @@ -38,7 +36,7 @@ struct cgctx { mpq_t *folds; scope_t *scps; - type_t *types; + type_t **types; ast_t ast; aux_t aux; lexemes_t toks; @@ -54,15 +52,16 @@ struct cgctx { }; static void codegenast(struct cgctx); -static LLVMTypeRef type2llvm(struct cgctx, type_t); +static LLVMTypeRef type2llvm(struct cgctx, type_t *) + __attribute__((nonnull)); static symval_t *symtab_get_from_scopes(struct cgctx ctx, strview_t sv); extern bool lflag, sflag; extern const char *oflag; void -codegen(const char *file, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, - aux_t aux, lexemes_t toks) +codegen(const char *file, mpq_t *folds, scope_t *scps, type_t **types, + ast_t ast, aux_t aux, lexemes_t toks) { LLVM_TARGET_INIT(AArch64); LLVM_TARGET_INIT(X86); @@ -142,13 +141,13 @@ codegen(const char *file, mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, static idx_t codegendecl(struct cgctx ctx, idx_t); idx_t -codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) +codegentypedexpr(struct cgctx ctx, idx_t i, type_t *T, LLVMValueRef *outv) { /* If true, implies numeric constant */ - if (MPQ_IS_INIT(ctx.folds[i]) && !type.isfloat) { + if (MPQ_IS_INIT(ctx.folds[i]) && !T->isfloat) { char buf[40 /* The max value of a u128 is length 39 */]; mpz_get_str(buf, 10, mpq_numref(ctx.folds[i])); - *outv = LLVMConstIntOfString(type2llvm(ctx, type), buf, 10); + *outv = LLVMConstIntOfString(type2llvm(ctx, T), buf, 10); return fwdnode(ctx.ast, i); } else if (MPQ_IS_INIT(ctx.folds[i]) /* && type.isfloat */) { char *s, *buf; @@ -158,7 +157,7 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) mp_bitcnt_t prec; /* TODO: Is this even correct? */ - switch (type.size) { + switch (T->size) { case 2: prec = 5; break; @@ -187,7 +186,7 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) for (size_t i = e; i < len; i++) buf[i + 1] = s[i]; buf[len + 1] = 0; - *outv = LLVMConstRealOfString(type2llvm(ctx, type), buf); + *outv = LLVMConstRealOfString(type2llvm(ctx, T), buf); free(s); mpf_clear(x); @@ -248,13 +247,13 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) (void)codegentypedexpr(ctx, lhs, ctx.types[i], &vl); idx_t ni = codegentypedexpr(ctx, rhs, ctx.types[i], &vr); - if (ctx.ast.kinds[i] >= ASTBINSHL && ctx.types[rhs].size != 0) { + if (ctx.ast.kinds[i] >= ASTBINSHL && ctx.types[rhs]->size != 0) { vr = LLVMBuildIntCast2(ctx.bob, vr, type2llvm(ctx, ctx.types[lhs]), false, "cast"); } struct binop bo = binoptbl[ctx.ast.kinds[i]]; - *outv = bo.fn[ctx.types[i].isfloat ? 2 : ctx.types[i].issigned]( + *outv = bo.fn[ctx.types[i]->isfloat ? 2 : ctx.types[i]->issigned]( ctx.bob, vl, vr, bo.name); return ni; } @@ -364,9 +363,9 @@ codegenfunc(struct cgctx ctx, idx_t i, strview_t sv) } arena_snapshot_restore(ctx.a, snap); - LLVMTypeRef ret = ctx.types[i].ret == NULL + LLVMTypeRef ret = ctx.types[i]->ret == NULL ? LLVMVoidTypeInContext(ctx.ctx) - : type2llvm(ctx, *ctx.types[i].ret); + : type2llvm(ctx, ctx.types[i]->ret); LLVMTypeRef ft = LLVMFunctionType(ret, NULL, 0, false); ctx.func = LLVMAddFunction(ctx.mod, name, ft); @@ -449,17 +448,17 @@ codegenast(struct cgctx ctx) } LLVMTypeRef -type2llvm(struct cgctx ctx, type_t t) +type2llvm(struct cgctx ctx, type_t *T) { - switch (t.kind) { + switch (T->kind) { case TYPE_FN: err("codegen: %s: Not implemented for function types", __func__); case TYPE_NUM: - assert(t.size != 0); - assert((unsigned)t.size * 8 <= 128); - if (!t.isfloat) - return LLVMIntTypeInContext(ctx.ctx, t.size * 8); - switch (t.size) { + assert(T->size != 0); + assert((unsigned)T->size * 8 <= 128); + if (!T->isfloat) + return LLVMIntTypeInContext(ctx.ctx, T->size * 8); + switch (T->size) { case 2: return LLVMHalfTypeInContext(ctx.ctx); case 4: diff --git a/src/codegen.h b/src/codegen.h index 338ade4..e214067 100644 --- a/src/codegen.h +++ b/src/codegen.h @@ -7,7 +7,7 @@ #include "lexer.h" #include "parser.h" -void codegen(const char *, mpq_t *, scope_t *, type_t *, ast_t, aux_t, +void codegen(const char *, mpq_t *, scope_t *, type_t **, ast_t, aux_t, lexemes_t) __attribute__((nonnull)); diff --git a/src/common.h b/src/common.h index 6285a15..c34892b 100644 --- a/src/common.h +++ b/src/common.h @@ -31,4 +31,6 @@ #define MIN(x, y) ((x) < (y) ? (x) : (y)) #define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define lengthof(xs) (sizeof(xs) / sizeof(*(xs))) + #endif /* !ORYX_COMMON_H */ diff --git a/src/main.c b/src/main.c index cc6cfd7..1555d17 100644 --- a/src/main.c +++ b/src/main.c @@ -74,13 +74,12 @@ usage: aux_t aux; mpq_t *folds; - type_t *types; scope_t *scps; arena_t a = NULL; lexemes_t toks = lexstring(src, srclen); ast_t ast = parsetoks(toks, &aux); - analyzeprog(ast, aux, toks, &a, &types, &scps, &folds); + type_t **types = analyzeprog(ast, aux, toks, &a, &scps, &folds); codegen(argv[0], folds, scps, types, ast, aux, toks); #if DEBUG @@ -91,8 +90,8 @@ usage: free(folds); free(scps); - free(src); free(types); + free(src); lexemes_free(toks); ast_free(ast); aux_free(aux); diff --git a/src/primitives.gperf b/src/primitives.gperf deleted file mode 100644 index bc2cb2e..0000000 --- a/src/primitives.gperf +++ /dev/null @@ -1,42 +0,0 @@ -%compare-strncmp -%includes -%language=ANSI-C -%readonly-tables -%struct-type - -%{ -#include - -#include "analyzer.h" -#include "types.h" - -#pragma GCC diagnostic ignored "-Wmissing-field-initializers" -#pragma GCC diagnostic ignored "-Wunused-parameter" -%} - -struct typeslot { char *name; struct type inner; }; -%% -i8, { TYPE_NUM, {.size = 1, .issigned=true, .isfloat=false} } -i16, { TYPE_NUM, {.size = 2, .issigned=true, .isfloat=false} } -i32, { TYPE_NUM, {.size = 4, .issigned=true, .isfloat=false} } -i64, { TYPE_NUM, {.size = 8, .issigned=true, .isfloat=false} } -i128, { TYPE_NUM, {.size = 16, .issigned=true, .isfloat=false} } -int, { TYPE_NUM, {.size = 8, .issigned=true, .isfloat=false} } -u8, { TYPE_NUM, {.size = 1, .issigned=false, .isfloat=false} } -u16, { TYPE_NUM, {.size = 2, .issigned=false, .isfloat=false} } -u32, { TYPE_NUM, {.size = 4, .issigned=false, .isfloat=false} } -u64, { TYPE_NUM, {.size = 8, .issigned=false, .isfloat=false} } -u128, { TYPE_NUM, {.size = 16, .issigned=false, .isfloat=false} } -uint, { TYPE_NUM, {.size = 8, .issigned=false, .isfloat=false} } -rune, { TYPE_NUM, {.size = 4, .issigned=true, .isfloat=false} } -f16, { TYPE_NUM, {.size = 2, .issigned=true, .isfloat=true } } -f32, { TYPE_NUM, {.size = 4, .issigned=true, .isfloat=true } } -f64, { TYPE_NUM, {.size = 8, .issigned=true, .isfloat=true } } -f128, { TYPE_NUM, {.size = 16, .issigned=true, .isfloat=true } } -%% -const struct type * -typelookup(const uchar *p, size_t len) -{ - const struct typeslot *tp = in_word_set(p, len); - return tp == NULL ? NULL : &tp->inner; -} diff --git a/src/strview.h b/src/strview.h index d6629ee..7eafdcc 100644 --- a/src/strview.h +++ b/src/strview.h @@ -17,7 +17,8 @@ typedef struct { #define SV_PRI_ARGS(sv) ((int)(sv).len), ((sv).p) /* Convert the string-literal S into a string-view */ -#define SV(s) ((strview_t){s, sizeof(s) - 1}) +#define SV(s) ((strview_t){s, sizeof(s) - 1}) +#define SVC(s) {s, sizeof(s) - 1} /* Return the hash of SV */ uint64_t strview_hash(strview_t sv); diff --git a/src/symtab.c b/src/symtab.c index 279fdb9..8fcba3c 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -11,6 +11,12 @@ struct symtab { symval_t val; }; +struct typetab { + typetab_t *child[4]; + strview_t key; + type_t *val; +}; + symval_t * symtab_insert(symtab_t **m, strview_t sv, arena_t *a) { @@ -25,3 +31,18 @@ symtab_insert(symtab_t **m, strview_t sv, arena_t *a) (*m)->key = sv; return &(*m)->val; } + +type_t ** +typetab_insert(typetab_t **m, strview_t sv, arena_t *a) +{ + for (uint64_t h = strview_hash(sv); *m; h <<= 2) { + if (strview_eq(sv, (*m)->key)) + return &(*m)->val; + m = &(*m)->child[h >> 62]; + } + if (a == NULL) + return NULL; + *m = arena_new(a, typetab_t, 1); + (*m)->key = sv; + return &(*m)->val; +} diff --git a/src/symtab.h b/src/symtab.h index 29c4868..448ffd9 100644 --- a/src/symtab.h +++ b/src/symtab.h @@ -1,5 +1,5 @@ -#ifndef ORYX_SYMTAB_H -#define ORYX_SYMTAB_H +#ifndef ORYX_TABLES_H +#define ORYX_TABLES_H #include @@ -10,6 +10,7 @@ #include "types.h" typedef struct symtab symtab_t; +typedef struct typetab typetab_t; typedef struct { bool exists; @@ -17,10 +18,29 @@ typedef struct { LLVMValueRef v; } symval_t; +typedef struct type { + uint8_t kind; + + union { + struct { + uint8_t size; /* number of bytes */ + bool isfloat; + bool issigned; + }; + struct { + struct type *params, *ret; + idx_t paramcnt; + }; + }; +} type_t; + /* Index the symbol table M with the key SV, returning a pointer to the value. If no entry exists and A is non-null, a pointer to a newly allocated (and zeroed) value is returned, NULL otherwise. */ symval_t *symtab_insert(symtab_t **m, strview_t sv, arena_t *a) __attribute__((nonnull(1))); -#endif /* !ORYX_SYMTAB_H */ +type_t **typetab_insert(typetab_t **m, strview_t sv, arena_t *a) + __attribute__((nonnull(1))); + +#endif /* !ORYX_TABLES_H */ diff --git a/test.yx b/test.yx new file mode 100644 index 0000000..346f095 --- /dev/null +++ b/test.yx @@ -0,0 +1,133 @@ +X :: 5; +Y: u8 : X; +Z :: Y; + +ZZ :: 127; + +my_global: i8 = ZZ; +no_init: u128; + +another_global := 123'456'789; + +uninit :: () { + x: int = …; + y: u32 = ...; +} + +pub main :: () { + no_init: u128; + x := no_init; + + no_init_undef: int = …; +} + +pub bar :: () int { + hello_world_this_is_my_var := 126; + hello_world_this_is_my_var′ := 127; + hello_world_this_is_my_var″ := 128; + return hello_world_this_is_my_var′; +} + +foo :: () int { + baz :: () int { + X :: X; + return X; + } + + y := 5; + x := y; + return x; +} + +rune :: () { + ch: rune = 8224; /* U+2020 DAGGER */ +} + +float_test :: () { + π :: 3.14159265358979323846264338327950288419716939937510582097494459; + a: f16 = π; + b: f32 = π; + c: f64 = π; + d: f128 = π; + + f := 3.14; +} + +/* Yes we have comments in this language, + /* You can even nest them! */ */ + +/* There are no line-comments, because why have 2 styles when you can have 1? */ + +neg_test :: () int { + x := 420; + return -x; +} + +/* This method should return 1337, because it’s not an increment but + actually parsed as (+ (+ x)), which is equivalant to ‘x’. */ +some_math :: () int { + x := 1337; + return ++x; +} + +complex_math :: () uint { + x: uint = 42; + y: uint = 123; + z := (x + y) / x; + return x + y / x; +} + +div :: () f64 { + return 5 / 2; +} + +remainder :: () int { + x := 5; + y := 2; + z := 5 % 2; + return x % y; +} + +xor :: () int { + x := 42; + y := 123; + z := ~y; + return x + z ~ y; +} + +shl_kinda_sus :: () u8 { + x: u8 = 1; + y: u16 : 1<<8 - 1; + return x<> 256; +} + +bit_fidling :: () int { + x := 122; + y := x | 1; + return y; +} + +assignment :: () int { + x := 5; + y := 4; + x = x + y; + y = 69; + return x + y; +} + +some_global: int; +mutate_global :: () { + some_global = 42; +} + +float_div :: () f64 { + x := 5.0; + y := 2.0; + return x / y; +} diff --git a/test/arena.c b/test/arena.c index 5efd162..e09aceb 100644 --- a/test/arena.c +++ b/test/arena.c @@ -6,10 +6,9 @@ #include #include "alloc.h" +#include "common.h" #include "test-internal.h" -#define lengthof(xs) (sizeof(xs) / sizeof(*(xs))) - static void make_and_free(void), make_and_resize(void), free_empty_arena(void), -- cgit v1.2.3