From f883a252b108bd9c4fadb1a78daec85440cc7f08 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 18 Jun 2024 14:33:06 +0200 Subject: Do more work on the typechecker and compiler --- src/analyzer.c | 166 ++++++++++++++++++++++++++++++++++----------------- src/analyzer.h | 19 ++---- src/codegen.c | 84 +++++++++++++++++++------- src/codegen.h | 4 +- src/main.c | 5 +- src/primitives.gperf | 20 +++---- 6 files changed, 193 insertions(+), 105 deletions(-) (limited to 'src') diff --git a/src/analyzer.c b/src/analyzer.c index d02096e..43b1f61 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -14,8 +14,7 @@ struct environ { idx_t_ up; - struct constdecl { - struct type type; + struct declaration { idx_t_ astidx; } *buf; size_t len, cap; @@ -30,14 +29,24 @@ static struct environ *create_environments(struct ast, struct lexemes, struct environs *, idx_t_, idx_t_, arena *) __attribute__((returns_nonnull, nonnull)); -static void typecheck_environment(struct environs, struct ast, struct lexemes, - idx_t_); -static struct type typecheck_constdecl(struct environs, struct ast, - struct lexemes, idx_t_, idx_t_); -static struct type typecheck_expr(struct environs, struct ast, struct lexemes, - idx_t_, idx_t_); -static struct type typecheck_fn(struct environs, struct ast, struct lexemes, - idx_t_, idx_t_); +static void typecheckast(struct environs, struct type *, struct ast, + struct lexemes) + __attribute__((nonnull)); +static idx_t_ typecheckdecl(struct environs, struct type *, struct ast, + struct lexemes, idx_t_, idx_t_) + __attribute__((nonnull)); +static idx_t_ typecheckstmt(struct environs, struct type *, struct ast, + struct lexemes, idx_t_, idx_t_) + __attribute__((nonnull)); +static idx_t_ typecheckexpr(struct environs, struct type *, struct ast, + struct lexemes, idx_t_, idx_t_) + __attribute__((nonnull)); +static idx_t_ typecheckfn(struct environs, struct type *, struct ast, + struct lexemes, idx_t_, idx_t_) + __attribute__((nonnull)); +static idx_t_ typecheckblk(struct environs, struct type *, struct ast, + struct lexemes, idx_t_, idx_t_) + __attribute__((nonnull)); static const struct type *typegrab(struct ast, struct lexemes, idx_t_) __attribute__((returns_nonnull)); static bool typecompat(struct type, struct type); @@ -46,18 +55,21 @@ static bool typecompat(struct type, struct type); const struct type *typelookup(const uchar *, size_t) __attribute__((nonnull)); -void +struct type * analyzeast(struct ast ast, struct lexemes toks) { arena a = NULL; struct environs evs = {0}; + struct type *types = bufalloc(NULL, ast.len, sizeof(*types)); + memset(types, 0, ast.len * sizeof(*types)); create_environments(ast, toks, &evs, 0, ast.len - 1, &a); - for (size_t i = 0; i < evs.len; i++) - typecheck_environment(evs, ast, toks, i); + typecheckast(evs, types, ast, toks); arena_free(&a); free(evs.buf); + + return types; } struct environ * @@ -73,25 +85,29 @@ create_environments(struct ast ast, struct lexemes toks, struct environs *evs, struct environ *ev = evs->buf + evs->len++; *ev = (struct environ){.cap = 16}; - ev->buf = arena_new(a, struct constdecl, ev->cap); + ev->buf = arena_new(a, struct declaration, ev->cap); for (idx_t_ i = beg; likely(i <= end); i++) { switch (ast.kinds[i]) { + case ASTDECL: + if (beg != 0) + break; + __attribute__((fallthrough)); case ASTCDECL: { - struct constdecl cd = {.astidx = i}; + struct declaration cd = {.astidx = i}; struct strview sv = toks.strs[ast.lexemes[i]]; /* TODO: Sorted insert and existence check */ for (size_t i = 0; i < ev->len; i++) { struct strview sv2 = toks.strs[ast.lexemes[ev->buf[i].astidx]]; if (sv.len == sv2.len && memcmp(sv.p, sv2.p, sv.len) == 0) { - err("analyzer: Constant ‘%.*s’ declared multiple times", + err("analyzer: Symbol ‘%.*s’ declared multiple times", (int)sv.len, sv.p); } } if (ev->len == ev->cap) { - ev->buf = arena_grow(a, ev->buf, struct constdecl, ev->cap, + ev->buf = arena_grow(a, ev->buf, struct declaration, ev->cap, ev->cap * 2); ev->cap *= 2; } @@ -114,12 +130,18 @@ create_environments(struct ast ast, struct lexemes toks, struct environs *evs, } void -typecheck_environment(struct environs evs, struct ast ast, struct lexemes toks, - idx_t_ i) +typecheckast(struct environs evs, struct type *types, struct ast ast, + struct lexemes toks) { - struct environ ev = evs.buf[i]; - for (size_t j = 0; j < ev.len; j++) - typecheck_constdecl(evs, ast, toks, j, i); + for (idx_t_ i = 0; likely(i < ast.len);) { + assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL); + if (types[i].kind == TYPE_UNSET) + i = typecheckdecl(evs, types, ast, toks, 0, i); + else { + while (++i < ast.len && ast.kinds[i] != ASTDECL && ast.kinds[i] != ASTCDECL) + ; + } + } } const struct type * @@ -132,17 +154,13 @@ typegrab(struct ast ast, struct lexemes toks, idx_t_ i) return tp; } -struct type -typecheck_constdecl(struct environs evs, struct ast ast, struct lexemes toks, - idx_t_ i, idx_t_ evi) +idx_t_ +typecheckdecl(struct environs evs, struct type *types, struct ast ast, + struct lexemes toks, idx_t_ evi, idx_t_ i) { - struct environ ev = evs.buf[evi]; - struct constdecl cd = ev.buf[i]; - if (cd.type.kind != TYPE_UNSET) - return cd.type; - ev.buf[i].type.kind = TYPE_CHECKING; + types[i].kind = TYPE_CHECKING; - struct pair p = ast.kids[cd.astidx]; + struct pair p = ast.kids[i]; struct type ltype, rtype; ltype.kind = TYPE_UNSET; @@ -150,38 +168,66 @@ typecheck_constdecl(struct environs evs, struct ast ast, struct lexemes toks, if (p.lhs != AST_EMPTY) ltype = *typegrab(ast, toks, p.lhs); - rtype = typecheck_expr(evs, ast, toks, p.rhs, evi); + idx_t_ ni = typecheckexpr(evs, types, ast, toks, evi, p.rhs); + rtype = types[p.rhs]; if (ltype.kind == TYPE_UNSET) ltype = rtype; else if (!typecompat(ltype, rtype)) err("analyzer: Type mismatch"); - return ev.buf[i].type = ltype; + types[i] = ltype; + return ni; } -struct type -typecheck_expr(struct environs evs, struct ast ast, struct lexemes toks, - idx_t_ i, idx_t_ evi) +idx_t_ +typecheckstmt(struct environs evs, struct type *types, struct ast ast, + struct lexemes toks, idx_t_ evi, idx_t_ i) +{ + switch (ast.kinds[i]) { + case ASTDECL: + case ASTCDECL: + return typecheckdecl(evs, types, ast, toks, evi, i); + case ASTRET: { + idx_t_ ni = typecheckexpr(evs, types, ast, toks, evi, ast.kids[i].rhs); + types[i] = types[ast.kids[i].rhs]; + return ni; + } + } + + assert(!"unreachable"); + __builtin_unreachable(); +} + +idx_t_ +typecheckexpr(struct environs evs, struct type *types, struct ast ast, + struct lexemes toks, idx_t_ evi, idx_t_ i) { switch (ast.kinds[i]) { case ASTNUMLIT: - return (struct type){.kind = TYPE_INT_UNTYPED, .issigned = true}; + types[i].kind = TYPE_NUM; + types[i].size = 0; + types[i].issigned = true; + return i + 1; case ASTIDENT: { struct environ ev = evs.buf[evi]; struct strview sv = toks.strs[ast.lexemes[i]]; for (;;) { - for (size_t i = 0; i < ev.len; i++) { - struct strview sv2 = toks.strs[ast.lexemes[ev.buf[i].astidx]]; + for (size_t j = 0; j < ev.len; j++) { + struct strview sv2 = toks.strs[ast.lexemes[ev.buf[j].astidx]]; if (sv.len != sv2.len || memcmp(sv.p, sv2.p, sv.len) != 0) continue; - struct type t = typecheck_constdecl(evs, ast, toks, i, evi); - if (t.kind == TYPE_CHECKING) { - err("analyzer: Circular dependency for type ‘%.*s’", - (int)sv2.len, sv2.p); + switch (types[j].kind) { + case TYPE_UNSET: + typecheckdecl(evs, types, ast, toks, evi, j); + break; + case TYPE_CHECKING: + err("analyzer: Circular definition of ‘%.*s’", (int)sv2.len, + sv2.p); } - return t; + types[i] = types[j]; + return i + 1; } if (evi == 0) err("analyzer: Unknown constant ‘%.*s’", (int)sv.len, sv.p); @@ -189,28 +235,38 @@ typecheck_expr(struct environs evs, struct ast ast, struct lexemes toks, } } case ASTFN: - return typecheck_fn(evs, ast, toks, i, evi); + return typecheckfn(evs, types, ast, toks, evi, i); default: err("analyzer: Unexpected AST kind %u", ast.kinds[i]); __builtin_unreachable(); } } -struct type -typecheck_fn(struct environs evs, struct ast ast, struct lexemes toks, - idx_t_ i, idx_t_ evi) +idx_t_ +typecheckfn(struct environs evs, struct type *types, struct ast ast, + struct lexemes toks, idx_t_ evi, idx_t_ i) { struct type t = {.kind = TYPE_FN}; struct pair p = ast.kids[i]; idx_t_ proto = p.lhs; - if (ast.kids[proto].rhs == AST_EMPTY) - return t; - - t.ret = typegrab(ast, toks, ast.kids[proto].rhs); - return t; + if (ast.kids[proto].rhs != AST_EMPTY) + t.ret = typegrab(ast, toks, ast.kids[proto].rhs); + types[i] = t; + idx_t_ ni = typecheckblk(evs, types, ast, toks, evi, p.rhs); + // if (!typecompat(types[i], types[p.rhs])) + // err("analyzer: Type mismatch"); + return ni; +} - /* TODO: Typecheck function body */ +idx_t_ +typecheckblk(struct environs evs, struct type *types, struct ast ast, + struct lexemes toks, idx_t_ evi, idx_t_ i) +{ + struct pair p = ast.kids[i]; + for (i = p.lhs; i <= p.rhs;) + i = typecheckstmt(evs, types, ast, toks, evi, i); + return i; } bool @@ -223,7 +279,7 @@ typecompat(struct type lhs, struct type rhs) return true; /* TODO: Need to actually parse it! 256 should not coerce to i8. */ - if (rhs.kind == TYPE_INT_UNTYPED) + if (rhs.kind == TYPE_NUM) return true; if (lhs.issigned != rhs.issigned) diff --git a/src/analyzer.h b/src/analyzer.h index f44cbac..5e28903 100644 --- a/src/analyzer.h +++ b/src/analyzer.h @@ -10,22 +10,10 @@ enum { TYPE_UNSET = 0, TYPE_CHECKING = 1, - /* Signed integers */ - TYPE_I8, - TYPE_I16, - TYPE_I32, - TYPE_I64, - TYPE_INT, - TYPE_INT_UNTYPED, - - /* Unsigned integers */ - TYPE_U8, - TYPE_U16, - TYPE_U32, - TYPE_U64, - TYPE_UINT, + TYPE_NUM, /* Floating point numbers */ + TYPE_F16, TYPE_F32, TYPE_F64, @@ -52,6 +40,7 @@ struct type { idx_t_ paramcnt; }; -void analyzeast(struct ast, struct lexemes); +struct type *analyzeast(struct ast, struct lexemes) + __attribute__((returns_nonnull)); #endif /* !ORYX_ANALYZER_H */ diff --git a/src/codegen.c b/src/codegen.c index a3a1b87..a5d0f46 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -12,18 +12,25 @@ #include "parser.h" #include "types.h" -static size_t codegenstmt(LLVMBuilderRef, struct ast, struct lexemes, - size_t); -static size_t codegenexpr(LLVMBuilderRef, struct ast, struct lexemes, - size_t, LLVMValueRef *) +static size_t codegenstmt(LLVMBuilderRef, LLVMValueRef *, struct ast, + struct lexemes, size_t); +static size_t codegenexpr(LLVMBuilderRef, LLVMValueRef *, struct ast, + struct lexemes, size_t, LLVMValueRef *) __attribute__((nonnull)); +static LLVMTypeRef type2llvm(struct type); + void -codegen(struct ast ast, struct lexemes toks) +codegen(const char *file, struct type *types, struct ast ast, + struct lexemes toks) { LLVMModuleRef mod = LLVMModuleCreateWithName("oryx"); + LLVMSetSourceFileName(mod, file, strlen(file)); LLVMBuilderRef builder = LLVMCreateBuilder(); + LLVMValueRef *declvals = bufalloc(NULL, ast.len, sizeof(*declvals)); + memset(declvals, 0, ast.len * sizeof(*declvals)); + for (size_t i = 0; i < ast.len;) { switch (ast.kinds[i]) { case ASTDECL: { @@ -32,10 +39,11 @@ codegen(struct ast ast, struct lexemes toks) char *name = bufalloc(NULL, sv.len + 1, 1); ((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0; + LLVMTypeRef T = type2llvm(types[i]); LLVMValueRef globl, val; - globl = LLVMAddGlobal(mod, LLVMInt64Type(), name); - i = codegenexpr(builder, ast, toks, ast.kids[i].rhs, &val); - LLVMSetInitializer(globl, val); + globl = LLVMAddGlobal(mod, T, name); + i = codegenexpr(builder, declvals, ast, toks, ast.kids[i].rhs, &val); + LLVMSetInitializer(globl, LLVMConstTrunc(val, T)); free(name); break; } @@ -77,7 +85,7 @@ codegen(struct ast ast, struct lexemes toks) free(fnname); for (i = ast.kids[body].lhs; i <= ast.kids[body].rhs;) - i = codegenstmt(builder, ast, toks, i); + i = codegenstmt(builder, declvals, ast, toks, i); break; } default: @@ -85,6 +93,7 @@ codegen(struct ast ast, struct lexemes toks) } } + free(declvals); LLVMDisposeBuilder(builder); char *error = NULL; @@ -96,8 +105,8 @@ codegen(struct ast ast, struct lexemes toks) } size_t -codegenstmt(LLVMBuilderRef builder, struct ast ast, struct lexemes toks, - size_t i) +codegenstmt(LLVMBuilderRef builder, LLVMValueRef *declvals, struct ast ast, + struct lexemes toks, size_t i) { switch (ast.kinds[i]) { case ASTRET: @@ -106,7 +115,7 @@ codegenstmt(LLVMBuilderRef builder, struct ast ast, struct lexemes toks, return i + 1; } LLVMValueRef v; - i = codegenexpr(builder, ast, toks, ast.kids[i].rhs, &v); + i = codegenexpr(builder, declvals, ast, toks, ast.kids[i].rhs, &v); LLVMBuildRet(builder, v); return i; } @@ -116,8 +125,8 @@ codegenstmt(LLVMBuilderRef builder, struct ast ast, struct lexemes toks, } size_t -codegenexpr(LLVMBuilderRef builder, struct ast ast, struct lexemes toks, - size_t i, LLVMValueRef *v) +codegenexpr(LLVMBuilderRef builder, LLVMValueRef *declvals, struct ast ast, + struct lexemes toks, size_t i, LLVMValueRef *v) { (void)builder; switch (ast.kinds[i]) { @@ -125,20 +134,51 @@ codegenexpr(LLVMBuilderRef builder, struct ast ast, struct lexemes toks, /* TODO: Arbitrary precision? */ struct strview sv = toks.strs[ast.lexemes[i]]; + bool has_sep = memchr(sv.p, '\'', sv.len) != NULL; + /* TODO: Temporary one-time-use allocator? */ - size_t len = 0; - char *p = bufalloc(NULL, sv.len, 1); - for (size_t i = 0; i < sv.len; i++) { - if (sv.p[i] != '\'') - p[len++] = sv.p[i]; - } + if (has_sep) { + size_t len = 0; + char *p = bufalloc(NULL, sv.len, 1); + for (size_t i = 0; i < sv.len; i++) { + if (sv.p[i] != '\'') + p[len++] = sv.p[i]; + } - *v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), p, len, 10); - free(p); + *v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), p, len, 10); + free(p); + } else + *v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), sv.p, sv.len, 10); return i + 1; } + case ASTIDENT: + err("codegen: %s: Not implemented", __func__); } assert(!"unreachable"); __builtin_unreachable(); } + +LLVMTypeRef +type2llvm(struct type t) +{ + switch (t.kind) { + case TYPE_UNSET: + case TYPE_CHECKING: + assert(!"codegen: Hit TYPE_UNSET or TYPE_CHECKING"); + __builtin_unreachable(); + case TYPE_FN: + case TYPE_F16: + case TYPE_F32: + case TYPE_F64: + err("codegen: %s: Not implemented", __func__); + case TYPE_NUM: + assert(t.issigned); + /* TODO: Arbitrary precision */ + if (t.size == 0) + t.size = 8; + return LLVMIntType(t.size * 8); + default: + __builtin_unreachable(); + } +} diff --git a/src/codegen.h b/src/codegen.h index 517b94d..74bd4fb 100644 --- a/src/codegen.h +++ b/src/codegen.h @@ -1,9 +1,11 @@ #ifndef ORYX_CODEGEN_H #define ORYX_CODEGEN_H +#include "analyzer.h" #include "lexer.h" #include "parser.h" -void codegen(struct ast ast, struct lexemes toks); +void codegen(const char *, struct type *, struct ast, struct lexemes) + __attribute__((nonnull)); #endif /* !ORYX_CODEGEN_H */ diff --git a/src/main.c b/src/main.c index b32a723..2cfc1d1 100644 --- a/src/main.c +++ b/src/main.c @@ -29,10 +29,11 @@ main(int argc, char **argv) struct lexemes toks = lexstring(src, srclen); struct ast ast = parsetoks(toks); - analyzeast(ast, toks); - codegen(ast, toks); + struct type *types = analyzeast(ast, toks); + codegen(argv[1], types, ast, toks); #if DEBUG + free(types); free(src); lexemes_free(toks); ast_free(ast); diff --git a/src/primitives.gperf b/src/primitives.gperf index 449a030..7594b29 100644 --- a/src/primitives.gperf +++ b/src/primitives.gperf @@ -16,16 +16,16 @@ struct typeslot { char *name; struct type inner; }; %% -i8, { TYPE_I8, 1, true } -i16, { TYPE_I16, 2, true } -i32, { TYPE_I32, 4, true } -i64, { TYPE_I64, 8, true } -int, { TYPE_INT, 8, true } -u8, { TYPE_U8, 1, false } -u16, { TYPE_U16, 2, false } -u32, { TYPE_U32, 4, false } -u64, { TYPE_U64, 8, false } -uint, { TYPE_UINT, 8, false } +i8, { TYPE_NUM, 1, true } +i16, { TYPE_NUM, 2, true } +i32, { TYPE_NUM, 4, true } +i64, { TYPE_NUM, 8, true } +int, { TYPE_NUM, 8, true } +u8, { TYPE_NUM, 1, false } +u16, { TYPE_NUM, 2, false } +u32, { TYPE_NUM, 4, false } +u64, { TYPE_NUM, 8, false } +uint, { TYPE_NUM, 8, false } rune, { TYPE_RUNE, 4, true } %% const struct type * -- cgit v1.2.3