aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-18 14:33:06 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-18 14:33:06 +0200
commitf883a252b108bd9c4fadb1a78daec85440cc7f08 (patch)
tree5feb4a1f26b717e7a7f83393058a2d13344acdbd
parentcc8060636ee0a341f68db9ad8fda4ebaabaf49cf (diff)
Do more work on the typechecker and compiler
-rw-r--r--src/analyzer.c166
-rw-r--r--src/analyzer.h19
-rw-r--r--src/codegen.c84
-rw-r--r--src/codegen.h4
-rw-r--r--src/main.c5
-rw-r--r--src/primitives.gperf20
6 files changed, 193 insertions, 105 deletions
diff --git a/src/analyzer.c b/src/analyzer.c
index d02096e..43b1f61 100644
--- a/src/analyzer.c
+++ b/src/analyzer.c
@@ -14,8 +14,7 @@
struct environ {
idx_t_ up;
- struct constdecl {
- struct type type;
+ struct declaration {
idx_t_ astidx;
} *buf;
size_t len, cap;
@@ -30,14 +29,24 @@ static struct environ *create_environments(struct ast, struct lexemes,
struct environs *, idx_t_, idx_t_,
arena *)
__attribute__((returns_nonnull, nonnull));
-static void typecheck_environment(struct environs, struct ast, struct lexemes,
- idx_t_);
-static struct type typecheck_constdecl(struct environs, struct ast,
- struct lexemes, idx_t_, idx_t_);
-static struct type typecheck_expr(struct environs, struct ast, struct lexemes,
- idx_t_, idx_t_);
-static struct type typecheck_fn(struct environs, struct ast, struct lexemes,
- idx_t_, idx_t_);
+static void typecheckast(struct environs, struct type *, struct ast,
+ struct lexemes)
+ __attribute__((nonnull));
+static idx_t_ typecheckdecl(struct environs, struct type *, struct ast,
+ struct lexemes, idx_t_, idx_t_)
+ __attribute__((nonnull));
+static idx_t_ typecheckstmt(struct environs, struct type *, struct ast,
+ struct lexemes, idx_t_, idx_t_)
+ __attribute__((nonnull));
+static idx_t_ typecheckexpr(struct environs, struct type *, struct ast,
+ struct lexemes, idx_t_, idx_t_)
+ __attribute__((nonnull));
+static idx_t_ typecheckfn(struct environs, struct type *, struct ast,
+ struct lexemes, idx_t_, idx_t_)
+ __attribute__((nonnull));
+static idx_t_ typecheckblk(struct environs, struct type *, struct ast,
+ struct lexemes, idx_t_, idx_t_)
+ __attribute__((nonnull));
static const struct type *typegrab(struct ast, struct lexemes, idx_t_)
__attribute__((returns_nonnull));
static bool typecompat(struct type, struct type);
@@ -46,18 +55,21 @@ static bool typecompat(struct type, struct type);
const struct type *typelookup(const uchar *, size_t)
__attribute__((nonnull));
-void
+struct type *
analyzeast(struct ast ast, struct lexemes toks)
{
arena a = NULL;
struct environs evs = {0};
+ struct type *types = bufalloc(NULL, ast.len, sizeof(*types));
+ memset(types, 0, ast.len * sizeof(*types));
create_environments(ast, toks, &evs, 0, ast.len - 1, &a);
- for (size_t i = 0; i < evs.len; i++)
- typecheck_environment(evs, ast, toks, i);
+ typecheckast(evs, types, ast, toks);
arena_free(&a);
free(evs.buf);
+
+ return types;
}
struct environ *
@@ -73,25 +85,29 @@ create_environments(struct ast ast, struct lexemes toks, struct environs *evs,
struct environ *ev = evs->buf + evs->len++;
*ev = (struct environ){.cap = 16};
- ev->buf = arena_new(a, struct constdecl, ev->cap);
+ ev->buf = arena_new(a, struct declaration, ev->cap);
for (idx_t_ i = beg; likely(i <= end); i++) {
switch (ast.kinds[i]) {
+ case ASTDECL:
+ if (beg != 0)
+ break;
+ __attribute__((fallthrough));
case ASTCDECL: {
- struct constdecl cd = {.astidx = i};
+ struct declaration cd = {.astidx = i};
struct strview sv = toks.strs[ast.lexemes[i]];
/* TODO: Sorted insert and existence check */
for (size_t i = 0; i < ev->len; i++) {
struct strview sv2 = toks.strs[ast.lexemes[ev->buf[i].astidx]];
if (sv.len == sv2.len && memcmp(sv.p, sv2.p, sv.len) == 0) {
- err("analyzer: Constant ‘%.*s’ declared multiple times",
+ err("analyzer: Symbol ‘%.*s’ declared multiple times",
(int)sv.len, sv.p);
}
}
if (ev->len == ev->cap) {
- ev->buf = arena_grow(a, ev->buf, struct constdecl, ev->cap,
+ ev->buf = arena_grow(a, ev->buf, struct declaration, ev->cap,
ev->cap * 2);
ev->cap *= 2;
}
@@ -114,12 +130,18 @@ create_environments(struct ast ast, struct lexemes toks, struct environs *evs,
}
void
-typecheck_environment(struct environs evs, struct ast ast, struct lexemes toks,
- idx_t_ i)
+typecheckast(struct environs evs, struct type *types, struct ast ast,
+ struct lexemes toks)
{
- struct environ ev = evs.buf[i];
- for (size_t j = 0; j < ev.len; j++)
- typecheck_constdecl(evs, ast, toks, j, i);
+ for (idx_t_ i = 0; likely(i < ast.len);) {
+ assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL);
+ if (types[i].kind == TYPE_UNSET)
+ i = typecheckdecl(evs, types, ast, toks, 0, i);
+ else {
+ while (++i < ast.len && ast.kinds[i] != ASTDECL && ast.kinds[i] != ASTCDECL)
+ ;
+ }
+ }
}
const struct type *
@@ -132,17 +154,13 @@ typegrab(struct ast ast, struct lexemes toks, idx_t_ i)
return tp;
}
-struct type
-typecheck_constdecl(struct environs evs, struct ast ast, struct lexemes toks,
- idx_t_ i, idx_t_ evi)
+idx_t_
+typecheckdecl(struct environs evs, struct type *types, struct ast ast,
+ struct lexemes toks, idx_t_ evi, idx_t_ i)
{
- struct environ ev = evs.buf[evi];
- struct constdecl cd = ev.buf[i];
- if (cd.type.kind != TYPE_UNSET)
- return cd.type;
- ev.buf[i].type.kind = TYPE_CHECKING;
+ types[i].kind = TYPE_CHECKING;
- struct pair p = ast.kids[cd.astidx];
+ struct pair p = ast.kids[i];
struct type ltype, rtype;
ltype.kind = TYPE_UNSET;
@@ -150,38 +168,66 @@ typecheck_constdecl(struct environs evs, struct ast ast, struct lexemes toks,
if (p.lhs != AST_EMPTY)
ltype = *typegrab(ast, toks, p.lhs);
- rtype = typecheck_expr(evs, ast, toks, p.rhs, evi);
+ idx_t_ ni = typecheckexpr(evs, types, ast, toks, evi, p.rhs);
+ rtype = types[p.rhs];
if (ltype.kind == TYPE_UNSET)
ltype = rtype;
else if (!typecompat(ltype, rtype))
err("analyzer: Type mismatch");
- return ev.buf[i].type = ltype;
+ types[i] = ltype;
+ return ni;
}
-struct type
-typecheck_expr(struct environs evs, struct ast ast, struct lexemes toks,
- idx_t_ i, idx_t_ evi)
+idx_t_
+typecheckstmt(struct environs evs, struct type *types, struct ast ast,
+ struct lexemes toks, idx_t_ evi, idx_t_ i)
+{
+ switch (ast.kinds[i]) {
+ case ASTDECL:
+ case ASTCDECL:
+ return typecheckdecl(evs, types, ast, toks, evi, i);
+ case ASTRET: {
+ idx_t_ ni = typecheckexpr(evs, types, ast, toks, evi, ast.kids[i].rhs);
+ types[i] = types[ast.kids[i].rhs];
+ return ni;
+ }
+ }
+
+ assert(!"unreachable");
+ __builtin_unreachable();
+}
+
+idx_t_
+typecheckexpr(struct environs evs, struct type *types, struct ast ast,
+ struct lexemes toks, idx_t_ evi, idx_t_ i)
{
switch (ast.kinds[i]) {
case ASTNUMLIT:
- return (struct type){.kind = TYPE_INT_UNTYPED, .issigned = true};
+ types[i].kind = TYPE_NUM;
+ types[i].size = 0;
+ types[i].issigned = true;
+ return i + 1;
case ASTIDENT: {
struct environ ev = evs.buf[evi];
struct strview sv = toks.strs[ast.lexemes[i]];
for (;;) {
- for (size_t i = 0; i < ev.len; i++) {
- struct strview sv2 = toks.strs[ast.lexemes[ev.buf[i].astidx]];
+ for (size_t j = 0; j < ev.len; j++) {
+ struct strview sv2 = toks.strs[ast.lexemes[ev.buf[j].astidx]];
if (sv.len != sv2.len || memcmp(sv.p, sv2.p, sv.len) != 0)
continue;
- struct type t = typecheck_constdecl(evs, ast, toks, i, evi);
- if (t.kind == TYPE_CHECKING) {
- err("analyzer: Circular dependency for type ‘%.*s’",
- (int)sv2.len, sv2.p);
+ switch (types[j].kind) {
+ case TYPE_UNSET:
+ typecheckdecl(evs, types, ast, toks, evi, j);
+ break;
+ case TYPE_CHECKING:
+ err("analyzer: Circular definition of ‘%.*s’", (int)sv2.len,
+ sv2.p);
}
- return t;
+ types[i] = types[j];
+ return i + 1;
}
if (evi == 0)
err("analyzer: Unknown constant ‘%.*s’", (int)sv.len, sv.p);
@@ -189,28 +235,38 @@ typecheck_expr(struct environs evs, struct ast ast, struct lexemes toks,
}
}
case ASTFN:
- return typecheck_fn(evs, ast, toks, i, evi);
+ return typecheckfn(evs, types, ast, toks, evi, i);
default:
err("analyzer: Unexpected AST kind %u", ast.kinds[i]);
__builtin_unreachable();
}
}
-struct type
-typecheck_fn(struct environs evs, struct ast ast, struct lexemes toks,
- idx_t_ i, idx_t_ evi)
+idx_t_
+typecheckfn(struct environs evs, struct type *types, struct ast ast,
+ struct lexemes toks, idx_t_ evi, idx_t_ i)
{
struct type t = {.kind = TYPE_FN};
struct pair p = ast.kids[i];
idx_t_ proto = p.lhs;
- if (ast.kids[proto].rhs == AST_EMPTY)
- return t;
-
- t.ret = typegrab(ast, toks, ast.kids[proto].rhs);
- return t;
+ if (ast.kids[proto].rhs != AST_EMPTY)
+ t.ret = typegrab(ast, toks, ast.kids[proto].rhs);
+ types[i] = t;
+ idx_t_ ni = typecheckblk(evs, types, ast, toks, evi, p.rhs);
+ // if (!typecompat(types[i], types[p.rhs]))
+ // err("analyzer: Type mismatch");
+ return ni;
+}
- /* TODO: Typecheck function body */
+idx_t_
+typecheckblk(struct environs evs, struct type *types, struct ast ast,
+ struct lexemes toks, idx_t_ evi, idx_t_ i)
+{
+ struct pair p = ast.kids[i];
+ for (i = p.lhs; i <= p.rhs;)
+ i = typecheckstmt(evs, types, ast, toks, evi, i);
+ return i;
}
bool
@@ -223,7 +279,7 @@ typecompat(struct type lhs, struct type rhs)
return true;
/* TODO: Need to actually parse it! 256 should not coerce to i8. */
- if (rhs.kind == TYPE_INT_UNTYPED)
+ if (rhs.kind == TYPE_NUM)
return true;
if (lhs.issigned != rhs.issigned)
diff --git a/src/analyzer.h b/src/analyzer.h
index f44cbac..5e28903 100644
--- a/src/analyzer.h
+++ b/src/analyzer.h
@@ -10,22 +10,10 @@ enum {
TYPE_UNSET = 0,
TYPE_CHECKING = 1,
- /* Signed integers */
- TYPE_I8,
- TYPE_I16,
- TYPE_I32,
- TYPE_I64,
- TYPE_INT,
- TYPE_INT_UNTYPED,
-
- /* Unsigned integers */
- TYPE_U8,
- TYPE_U16,
- TYPE_U32,
- TYPE_U64,
- TYPE_UINT,
+ TYPE_NUM,
/* Floating point numbers */
+ TYPE_F16,
TYPE_F32,
TYPE_F64,
@@ -52,6 +40,7 @@ struct type {
idx_t_ paramcnt;
};
-void analyzeast(struct ast, struct lexemes);
+struct type *analyzeast(struct ast, struct lexemes)
+ __attribute__((returns_nonnull));
#endif /* !ORYX_ANALYZER_H */
diff --git a/src/codegen.c b/src/codegen.c
index a3a1b87..a5d0f46 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -12,18 +12,25 @@
#include "parser.h"
#include "types.h"
-static size_t codegenstmt(LLVMBuilderRef, struct ast, struct lexemes,
- size_t);
-static size_t codegenexpr(LLVMBuilderRef, struct ast, struct lexemes,
- size_t, LLVMValueRef *)
+static size_t codegenstmt(LLVMBuilderRef, LLVMValueRef *, struct ast,
+ struct lexemes, size_t);
+static size_t codegenexpr(LLVMBuilderRef, LLVMValueRef *, struct ast,
+ struct lexemes, size_t, LLVMValueRef *)
__attribute__((nonnull));
+static LLVMTypeRef type2llvm(struct type);
+
void
-codegen(struct ast ast, struct lexemes toks)
+codegen(const char *file, struct type *types, struct ast ast,
+ struct lexemes toks)
{
LLVMModuleRef mod = LLVMModuleCreateWithName("oryx");
+ LLVMSetSourceFileName(mod, file, strlen(file));
LLVMBuilderRef builder = LLVMCreateBuilder();
+ LLVMValueRef *declvals = bufalloc(NULL, ast.len, sizeof(*declvals));
+ memset(declvals, 0, ast.len * sizeof(*declvals));
+
for (size_t i = 0; i < ast.len;) {
switch (ast.kinds[i]) {
case ASTDECL: {
@@ -32,10 +39,11 @@ codegen(struct ast ast, struct lexemes toks)
char *name = bufalloc(NULL, sv.len + 1, 1);
((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0;
+ LLVMTypeRef T = type2llvm(types[i]);
LLVMValueRef globl, val;
- globl = LLVMAddGlobal(mod, LLVMInt64Type(), name);
- i = codegenexpr(builder, ast, toks, ast.kids[i].rhs, &val);
- LLVMSetInitializer(globl, val);
+ globl = LLVMAddGlobal(mod, T, name);
+ i = codegenexpr(builder, declvals, ast, toks, ast.kids[i].rhs, &val);
+ LLVMSetInitializer(globl, LLVMConstTrunc(val, T));
free(name);
break;
}
@@ -77,7 +85,7 @@ codegen(struct ast ast, struct lexemes toks)
free(fnname);
for (i = ast.kids[body].lhs; i <= ast.kids[body].rhs;)
- i = codegenstmt(builder, ast, toks, i);
+ i = codegenstmt(builder, declvals, ast, toks, i);
break;
}
default:
@@ -85,6 +93,7 @@ codegen(struct ast ast, struct lexemes toks)
}
}
+ free(declvals);
LLVMDisposeBuilder(builder);
char *error = NULL;
@@ -96,8 +105,8 @@ codegen(struct ast ast, struct lexemes toks)
}
size_t
-codegenstmt(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
- size_t i)
+codegenstmt(LLVMBuilderRef builder, LLVMValueRef *declvals, struct ast ast,
+ struct lexemes toks, size_t i)
{
switch (ast.kinds[i]) {
case ASTRET:
@@ -106,7 +115,7 @@ codegenstmt(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
return i + 1;
}
LLVMValueRef v;
- i = codegenexpr(builder, ast, toks, ast.kids[i].rhs, &v);
+ i = codegenexpr(builder, declvals, ast, toks, ast.kids[i].rhs, &v);
LLVMBuildRet(builder, v);
return i;
}
@@ -116,8 +125,8 @@ codegenstmt(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
}
size_t
-codegenexpr(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
- size_t i, LLVMValueRef *v)
+codegenexpr(LLVMBuilderRef builder, LLVMValueRef *declvals, struct ast ast,
+ struct lexemes toks, size_t i, LLVMValueRef *v)
{
(void)builder;
switch (ast.kinds[i]) {
@@ -125,20 +134,51 @@ codegenexpr(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
/* TODO: Arbitrary precision? */
struct strview sv = toks.strs[ast.lexemes[i]];
+ bool has_sep = memchr(sv.p, '\'', sv.len) != NULL;
+
/* TODO: Temporary one-time-use allocator? */
- size_t len = 0;
- char *p = bufalloc(NULL, sv.len, 1);
- for (size_t i = 0; i < sv.len; i++) {
- if (sv.p[i] != '\'')
- p[len++] = sv.p[i];
- }
+ if (has_sep) {
+ size_t len = 0;
+ char *p = bufalloc(NULL, sv.len, 1);
+ for (size_t i = 0; i < sv.len; i++) {
+ if (sv.p[i] != '\'')
+ p[len++] = sv.p[i];
+ }
- *v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), p, len, 10);
- free(p);
+ *v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), p, len, 10);
+ free(p);
+ } else
+ *v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), sv.p, sv.len, 10);
return i + 1;
}
+ case ASTIDENT:
+ err("codegen: %s: Not implemented", __func__);
}
assert(!"unreachable");
__builtin_unreachable();
}
+
+LLVMTypeRef
+type2llvm(struct type t)
+{
+ switch (t.kind) {
+ case TYPE_UNSET:
+ case TYPE_CHECKING:
+ assert(!"codegen: Hit TYPE_UNSET or TYPE_CHECKING");
+ __builtin_unreachable();
+ case TYPE_FN:
+ case TYPE_F16:
+ case TYPE_F32:
+ case TYPE_F64:
+ err("codegen: %s: Not implemented", __func__);
+ case TYPE_NUM:
+ assert(t.issigned);
+ /* TODO: Arbitrary precision */
+ if (t.size == 0)
+ t.size = 8;
+ return LLVMIntType(t.size * 8);
+ default:
+ __builtin_unreachable();
+ }
+}
diff --git a/src/codegen.h b/src/codegen.h
index 517b94d..74bd4fb 100644
--- a/src/codegen.h
+++ b/src/codegen.h
@@ -1,9 +1,11 @@
#ifndef ORYX_CODEGEN_H
#define ORYX_CODEGEN_H
+#include "analyzer.h"
#include "lexer.h"
#include "parser.h"
-void codegen(struct ast ast, struct lexemes toks);
+void codegen(const char *, struct type *, struct ast, struct lexemes)
+ __attribute__((nonnull));
#endif /* !ORYX_CODEGEN_H */
diff --git a/src/main.c b/src/main.c
index b32a723..2cfc1d1 100644
--- a/src/main.c
+++ b/src/main.c
@@ -29,10 +29,11 @@ main(int argc, char **argv)
struct lexemes toks = lexstring(src, srclen);
struct ast ast = parsetoks(toks);
- analyzeast(ast, toks);
- codegen(ast, toks);
+ struct type *types = analyzeast(ast, toks);
+ codegen(argv[1], types, ast, toks);
#if DEBUG
+ free(types);
free(src);
lexemes_free(toks);
ast_free(ast);
diff --git a/src/primitives.gperf b/src/primitives.gperf
index 449a030..7594b29 100644
--- a/src/primitives.gperf
+++ b/src/primitives.gperf
@@ -16,16 +16,16 @@
struct typeslot { char *name; struct type inner; };
%%
-i8, { TYPE_I8, 1, true }
-i16, { TYPE_I16, 2, true }
-i32, { TYPE_I32, 4, true }
-i64, { TYPE_I64, 8, true }
-int, { TYPE_INT, 8, true }
-u8, { TYPE_U8, 1, false }
-u16, { TYPE_U16, 2, false }
-u32, { TYPE_U32, 4, false }
-u64, { TYPE_U64, 8, false }
-uint, { TYPE_UINT, 8, false }
+i8, { TYPE_NUM, 1, true }
+i16, { TYPE_NUM, 2, true }
+i32, { TYPE_NUM, 4, true }
+i64, { TYPE_NUM, 8, true }
+int, { TYPE_NUM, 8, true }
+u8, { TYPE_NUM, 1, false }
+u16, { TYPE_NUM, 2, false }
+u32, { TYPE_NUM, 4, false }
+u64, { TYPE_NUM, 8, false }
+uint, { TYPE_NUM, 8, false }
rune, { TYPE_RUNE, 4, true }
%%
const struct type *