From f883a252b108bd9c4fadb1a78daec85440cc7f08 Mon Sep 17 00:00:00 2001
From: Thomas Voss <mail@thomasvoss.com>
Date: Tue, 18 Jun 2024 14:33:06 +0200
Subject: Do more work on the typechecker and compiler

---
 src/analyzer.c       | 166 ++++++++++++++++++++++++++++++++++-----------------
 src/analyzer.h       |  19 ++----
 src/codegen.c        |  84 +++++++++++++++++++-------
 src/codegen.h        |   4 +-
 src/main.c           |   5 +-
 src/primitives.gperf |  20 +++----
 6 files changed, 193 insertions(+), 105 deletions(-)

(limited to 'src')

diff --git a/src/analyzer.c b/src/analyzer.c
index d02096e..43b1f61 100644
--- a/src/analyzer.c
+++ b/src/analyzer.c
@@ -14,8 +14,7 @@
 
 struct environ {
 	idx_t_ up;
-	struct constdecl {
-		struct type type;
+	struct declaration {
 		idx_t_ astidx;
 	} *buf;
 	size_t len, cap;
@@ -30,14 +29,24 @@ static struct environ *create_environments(struct ast, struct lexemes,
                                            struct environs *, idx_t_, idx_t_,
                                            arena *)
 	__attribute__((returns_nonnull, nonnull));
-static void typecheck_environment(struct environs, struct ast, struct lexemes,
-                                  idx_t_);
-static struct type typecheck_constdecl(struct environs, struct ast,
-                                       struct lexemes, idx_t_, idx_t_);
-static struct type typecheck_expr(struct environs, struct ast, struct lexemes,
-                                  idx_t_, idx_t_);
-static struct type typecheck_fn(struct environs, struct ast, struct lexemes,
-                                idx_t_, idx_t_);
+static void typecheckast(struct environs, struct type *, struct ast,
+                         struct lexemes)
+	__attribute__((nonnull));
+static idx_t_ typecheckdecl(struct environs, struct type *, struct ast,
+                            struct lexemes, idx_t_, idx_t_)
+	__attribute__((nonnull));
+static idx_t_ typecheckstmt(struct environs, struct type *, struct ast,
+                            struct lexemes, idx_t_, idx_t_)
+	__attribute__((nonnull));
+static idx_t_ typecheckexpr(struct environs, struct type *, struct ast,
+                            struct lexemes, idx_t_, idx_t_)
+	__attribute__((nonnull));
+static idx_t_ typecheckfn(struct environs, struct type *, struct ast,
+                          struct lexemes, idx_t_, idx_t_)
+	__attribute__((nonnull));
+static idx_t_ typecheckblk(struct environs, struct type *, struct ast,
+                           struct lexemes, idx_t_, idx_t_)
+	__attribute__((nonnull));
 static const struct type *typegrab(struct ast, struct lexemes, idx_t_)
 	__attribute__((returns_nonnull));
 static bool typecompat(struct type, struct type);
@@ -46,18 +55,21 @@ static bool typecompat(struct type, struct type);
 const struct type *typelookup(const uchar *, size_t)
 	__attribute__((nonnull));
 
-void
+struct type *
 analyzeast(struct ast ast, struct lexemes toks)
 {
 	arena a = NULL;
 	struct environs evs = {0};
+	struct type *types = bufalloc(NULL, ast.len, sizeof(*types));
+	memset(types, 0, ast.len * sizeof(*types));
 
 	create_environments(ast, toks, &evs, 0, ast.len - 1, &a);
-	for (size_t i = 0; i < evs.len; i++)
-		typecheck_environment(evs, ast, toks, i);
+	typecheckast(evs, types, ast, toks);
 
 	arena_free(&a);
 	free(evs.buf);
+
+	return types;
 }
 
 struct environ *
@@ -73,25 +85,29 @@ create_environments(struct ast ast, struct lexemes toks, struct environs *evs,
 
 	struct environ *ev = evs->buf + evs->len++;
 	*ev = (struct environ){.cap = 16};
-	ev->buf = arena_new(a, struct constdecl, ev->cap);
+	ev->buf = arena_new(a, struct declaration, ev->cap);
 
 	for (idx_t_ i = beg; likely(i <= end); i++) {
 		switch (ast.kinds[i]) {
+		case ASTDECL:
+			if (beg != 0)
+				break;
+			__attribute__((fallthrough));
 		case ASTCDECL: {
-			struct constdecl cd = {.astidx = i};
+			struct declaration cd = {.astidx = i};
 			struct strview sv = toks.strs[ast.lexemes[i]];
 
 			/* TODO: Sorted insert and existence check */
 			for (size_t i = 0; i < ev->len; i++) {
 				struct strview sv2 = toks.strs[ast.lexemes[ev->buf[i].astidx]];
 				if (sv.len == sv2.len && memcmp(sv.p, sv2.p, sv.len) == 0) {
-					err("analyzer: Constant ‘%.*s’ declared multiple times",
+					err("analyzer: Symbol ‘%.*s’ declared multiple times",
 					    (int)sv.len, sv.p);
 				}
 			}
 
 			if (ev->len == ev->cap) {
-				ev->buf = arena_grow(a, ev->buf, struct constdecl, ev->cap,
+				ev->buf = arena_grow(a, ev->buf, struct declaration, ev->cap,
 				                     ev->cap * 2);
 				ev->cap *= 2;
 			}
@@ -114,12 +130,18 @@ create_environments(struct ast ast, struct lexemes toks, struct environs *evs,
 }
 
 void
-typecheck_environment(struct environs evs, struct ast ast, struct lexemes toks,
-                      idx_t_ i)
+typecheckast(struct environs evs, struct type *types, struct ast ast,
+             struct lexemes toks)
 {
-	struct environ ev = evs.buf[i];
-	for (size_t j = 0; j < ev.len; j++)
-		typecheck_constdecl(evs, ast, toks, j, i);
+	for (idx_t_ i = 0; likely(i < ast.len);) {
+		assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL);
+		if (types[i].kind == TYPE_UNSET)
+			i = typecheckdecl(evs, types, ast, toks, 0, i);
+		else {
+			while (++i < ast.len && ast.kinds[i] != ASTDECL && ast.kinds[i] != ASTCDECL)
+				;
+		}
+	}
 }
 
 const struct type *
@@ -132,17 +154,13 @@ typegrab(struct ast ast, struct lexemes toks, idx_t_ i)
 	return tp;
 }
 
-struct type
-typecheck_constdecl(struct environs evs, struct ast ast, struct lexemes toks,
-                    idx_t_ i, idx_t_ evi)
+idx_t_
+typecheckdecl(struct environs evs, struct type *types, struct ast ast,
+              struct lexemes toks, idx_t_ evi, idx_t_ i)
 {
-	struct environ ev = evs.buf[evi];
-	struct constdecl cd = ev.buf[i];
-	if (cd.type.kind != TYPE_UNSET)
-		return cd.type;
-	ev.buf[i].type.kind = TYPE_CHECKING;
+	types[i].kind = TYPE_CHECKING;
 
-	struct pair p = ast.kids[cd.astidx];
+	struct pair p = ast.kids[i];
 	struct type ltype, rtype;
 	ltype.kind = TYPE_UNSET;
 
@@ -150,38 +168,66 @@ typecheck_constdecl(struct environs evs, struct ast ast, struct lexemes toks,
 
 	if (p.lhs != AST_EMPTY)
 		ltype = *typegrab(ast, toks, p.lhs);
-	rtype = typecheck_expr(evs, ast, toks, p.rhs, evi);
+	idx_t_ ni = typecheckexpr(evs, types, ast, toks, evi, p.rhs);
+	rtype = types[p.rhs];
 
 	if (ltype.kind == TYPE_UNSET)
 		ltype = rtype;
 	else if (!typecompat(ltype, rtype))
 		err("analyzer: Type mismatch");
 
-	return ev.buf[i].type = ltype;
+	types[i] = ltype;
+	return ni;
 }
 
-struct type
-typecheck_expr(struct environs evs, struct ast ast, struct lexemes toks,
-               idx_t_ i, idx_t_ evi)
+idx_t_
+typecheckstmt(struct environs evs, struct type *types, struct ast ast,
+              struct lexemes toks, idx_t_ evi, idx_t_ i)
+{
+	switch (ast.kinds[i]) {
+	case ASTDECL:
+	case ASTCDECL:
+		return typecheckdecl(evs, types, ast, toks, evi, i);
+	case ASTRET: {
+		idx_t_ ni = typecheckexpr(evs, types, ast, toks, evi, ast.kids[i].rhs);
+		types[i] = types[ast.kids[i].rhs];
+		return ni;
+	}
+	}
+
+	assert(!"unreachable");
+	__builtin_unreachable();
+}
+
+idx_t_
+typecheckexpr(struct environs evs, struct type *types, struct ast ast,
+              struct lexemes toks, idx_t_ evi, idx_t_ i)
 {
 	switch (ast.kinds[i]) {
 	case ASTNUMLIT:
-		return (struct type){.kind = TYPE_INT_UNTYPED, .issigned = true};
+		types[i].kind = TYPE_NUM;
+		types[i].size = 0;
+		types[i].issigned = true;
+		return i + 1;
 	case ASTIDENT: {
 		struct environ ev = evs.buf[evi];
 		struct strview sv = toks.strs[ast.lexemes[i]];
 
 		for (;;) {
-			for (size_t i = 0; i < ev.len; i++) {
-				struct strview sv2 = toks.strs[ast.lexemes[ev.buf[i].astidx]];
+			for (size_t j = 0; j < ev.len; j++) {
+				struct strview sv2 = toks.strs[ast.lexemes[ev.buf[j].astidx]];
 				if (sv.len != sv2.len || memcmp(sv.p, sv2.p, sv.len) != 0)
 					continue;
-				struct type t = typecheck_constdecl(evs, ast, toks, i, evi);
-				if (t.kind == TYPE_CHECKING) {
-					err("analyzer: Circular dependency for type ‘%.*s’",
-					    (int)sv2.len, sv2.p);
+				switch (types[j].kind) {
+				case TYPE_UNSET:
+					typecheckdecl(evs, types, ast, toks, evi, j);
+					break;
+				case TYPE_CHECKING:
+					err("analyzer: Circular definition of ‘%.*s’", (int)sv2.len,
+					    sv2.p);
 				}
-				return t;
+				types[i] = types[j];
+				return i + 1;
 			}
 			if (evi == 0)
 				err("analyzer: Unknown constant ‘%.*s’", (int)sv.len, sv.p);
@@ -189,28 +235,38 @@ typecheck_expr(struct environs evs, struct ast ast, struct lexemes toks,
 		}
 	}
 	case ASTFN:
-		return typecheck_fn(evs, ast, toks, i, evi);
+		return typecheckfn(evs, types, ast, toks, evi, i);
 	default:
 		err("analyzer: Unexpected AST kind %u", ast.kinds[i]);
 		__builtin_unreachable();
 	}
 }
 
-struct type
-typecheck_fn(struct environs evs, struct ast ast, struct lexemes toks,
-             idx_t_ i, idx_t_ evi)
+idx_t_
+typecheckfn(struct environs evs, struct type *types, struct ast ast,
+            struct lexemes toks, idx_t_ evi, idx_t_ i)
 {
 	struct type t = {.kind = TYPE_FN};
 	struct pair p = ast.kids[i];
 
 	idx_t_ proto = p.lhs;
-	if (ast.kids[proto].rhs == AST_EMPTY)
-		return t;
-
-	t.ret = typegrab(ast, toks, ast.kids[proto].rhs);
-	return t;
+	if (ast.kids[proto].rhs != AST_EMPTY)
+		t.ret = typegrab(ast, toks, ast.kids[proto].rhs);
+	types[i] = t;
+	idx_t_ ni = typecheckblk(evs, types, ast, toks, evi, p.rhs);
+	// if (!typecompat(types[i], types[p.rhs]))
+	// 	err("analyzer: Type mismatch");
+	return ni;
+}
 
-	/* TODO: Typecheck function body */
+idx_t_
+typecheckblk(struct environs evs, struct type *types, struct ast ast,
+             struct lexemes toks, idx_t_ evi, idx_t_ i)
+{
+	struct pair p = ast.kids[i];
+	for (i = p.lhs; i <= p.rhs;)
+		i = typecheckstmt(evs, types, ast, toks, evi, i);
+	return i;
 }
 
 bool
@@ -223,7 +279,7 @@ typecompat(struct type lhs, struct type rhs)
 		return true;
 
 	/* TODO: Need to actually parse it!  256 should not coerce to i8. */
-	if (rhs.kind == TYPE_INT_UNTYPED)
+	if (rhs.kind == TYPE_NUM)
 		return true;
 
 	if (lhs.issigned != rhs.issigned)
diff --git a/src/analyzer.h b/src/analyzer.h
index f44cbac..5e28903 100644
--- a/src/analyzer.h
+++ b/src/analyzer.h
@@ -10,22 +10,10 @@ enum {
 	TYPE_UNSET = 0,
 	TYPE_CHECKING = 1,
 
-	/* Signed integers */
-	TYPE_I8,
-	TYPE_I16,
-	TYPE_I32,
-	TYPE_I64,
-	TYPE_INT,
-	TYPE_INT_UNTYPED,
-
-	/* Unsigned integers */
-	TYPE_U8,
-	TYPE_U16,
-	TYPE_U32,
-	TYPE_U64,
-	TYPE_UINT,
+	TYPE_NUM,
 
 	/* Floating point numbers */
+	TYPE_F16,
 	TYPE_F32,
 	TYPE_F64,
 
@@ -52,6 +40,7 @@ struct type {
 	idx_t_ paramcnt;
 };
 
-void analyzeast(struct ast, struct lexemes);
+struct type *analyzeast(struct ast, struct lexemes)
+	__attribute__((returns_nonnull));
 
 #endif /* !ORYX_ANALYZER_H */
diff --git a/src/codegen.c b/src/codegen.c
index a3a1b87..a5d0f46 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -12,18 +12,25 @@
 #include "parser.h"
 #include "types.h"
 
-static size_t codegenstmt(LLVMBuilderRef, struct ast, struct lexemes,
-                          size_t);
-static size_t codegenexpr(LLVMBuilderRef, struct ast, struct lexemes,
-                          size_t, LLVMValueRef *)
+static size_t codegenstmt(LLVMBuilderRef, LLVMValueRef *, struct ast,
+                          struct lexemes, size_t);
+static size_t codegenexpr(LLVMBuilderRef, LLVMValueRef *, struct ast,
+                          struct lexemes, size_t, LLVMValueRef *)
 	__attribute__((nonnull));
 
+static LLVMTypeRef type2llvm(struct type);
+
 void
-codegen(struct ast ast, struct lexemes toks)
+codegen(const char *file, struct type *types, struct ast ast,
+        struct lexemes toks)
 {
 	LLVMModuleRef mod = LLVMModuleCreateWithName("oryx");
+	LLVMSetSourceFileName(mod, file, strlen(file));
 	LLVMBuilderRef builder = LLVMCreateBuilder();
 
+	LLVMValueRef *declvals = bufalloc(NULL, ast.len, sizeof(*declvals));
+	memset(declvals, 0, ast.len * sizeof(*declvals));
+
 	for (size_t i = 0; i < ast.len;) {
 		switch (ast.kinds[i]) {
 		case ASTDECL: {
@@ -32,10 +39,11 @@ codegen(struct ast ast, struct lexemes toks)
 			char *name = bufalloc(NULL, sv.len + 1, 1);
 			((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0;
 
+			LLVMTypeRef T = type2llvm(types[i]);
 			LLVMValueRef globl, val;
-			globl = LLVMAddGlobal(mod, LLVMInt64Type(), name);
-			i = codegenexpr(builder, ast, toks, ast.kids[i].rhs, &val);
-			LLVMSetInitializer(globl, val);
+			globl = LLVMAddGlobal(mod, T, name);
+			i = codegenexpr(builder, declvals, ast, toks, ast.kids[i].rhs, &val);
+			LLVMSetInitializer(globl, LLVMConstTrunc(val, T));
 			free(name);
 			break;
 		}
@@ -77,7 +85,7 @@ codegen(struct ast ast, struct lexemes toks)
 			free(fnname);
 
 			for (i = ast.kids[body].lhs; i <= ast.kids[body].rhs;)
-				i = codegenstmt(builder, ast, toks, i);
+				i = codegenstmt(builder, declvals, ast, toks, i);
 			break;
 		}
 		default:
@@ -85,6 +93,7 @@ codegen(struct ast ast, struct lexemes toks)
 		}
 	}
 
+	free(declvals);
 	LLVMDisposeBuilder(builder);
 
 	char *error = NULL;
@@ -96,8 +105,8 @@ codegen(struct ast ast, struct lexemes toks)
 }
 
 size_t
-codegenstmt(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
-            size_t i)
+codegenstmt(LLVMBuilderRef builder, LLVMValueRef *declvals, struct ast ast,
+            struct lexemes toks, size_t i)
 {
 	switch (ast.kinds[i]) {
 	case ASTRET:
@@ -106,7 +115,7 @@ codegenstmt(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
 			return i + 1;
 		}
 		LLVMValueRef v;
-		i = codegenexpr(builder, ast, toks, ast.kids[i].rhs, &v);
+		i = codegenexpr(builder, declvals, ast, toks, ast.kids[i].rhs, &v);
 		LLVMBuildRet(builder, v);
 		return i;
 	}
@@ -116,8 +125,8 @@ codegenstmt(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
 }
 
 size_t
-codegenexpr(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
-            size_t i, LLVMValueRef *v)
+codegenexpr(LLVMBuilderRef builder, LLVMValueRef *declvals, struct ast ast,
+            struct lexemes toks, size_t i, LLVMValueRef *v)
 {
 	(void)builder;
 	switch (ast.kinds[i]) {
@@ -125,20 +134,51 @@ codegenexpr(LLVMBuilderRef builder, struct ast ast, struct lexemes toks,
 		/* TODO: Arbitrary precision? */
 		struct strview sv = toks.strs[ast.lexemes[i]];
 
+		bool has_sep = memchr(sv.p, '\'', sv.len) != NULL;
+
 		/* TODO: Temporary one-time-use allocator? */
-		size_t len = 0;
-		char *p = bufalloc(NULL, sv.len, 1);
-		for (size_t i = 0; i < sv.len; i++) {
-			if (sv.p[i] != '\'')
-				p[len++] = sv.p[i];
-		}
+		if (has_sep) {
+			size_t len = 0;
+			char *p = bufalloc(NULL, sv.len, 1);
+			for (size_t i = 0; i < sv.len; i++) {
+				if (sv.p[i] != '\'')
+					p[len++] = sv.p[i];
+			}
 
-		*v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), p, len, 10);
-		free(p);
+			*v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), p, len, 10);
+			free(p);
+		} else
+			*v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), sv.p, sv.len, 10);
 		return i + 1;
 	}
+	case ASTIDENT:
+		err("codegen: %s: Not implemented", __func__);
 	}
 
 	assert(!"unreachable");
 	__builtin_unreachable();
 }
+
+LLVMTypeRef
+type2llvm(struct type t)
+{
+	switch (t.kind) {
+	case TYPE_UNSET:
+	case TYPE_CHECKING:
+		assert(!"codegen: Hit TYPE_UNSET or TYPE_CHECKING");
+		__builtin_unreachable();
+	case TYPE_FN:
+	case TYPE_F16:
+	case TYPE_F32:
+	case TYPE_F64:
+		err("codegen: %s: Not implemented", __func__);
+	case TYPE_NUM:
+		assert(t.issigned);
+		/* TODO: Arbitrary precision */
+		if (t.size == 0)
+			t.size = 8;
+		return LLVMIntType(t.size * 8);
+	default:
+		__builtin_unreachable();
+	}
+}
diff --git a/src/codegen.h b/src/codegen.h
index 517b94d..74bd4fb 100644
--- a/src/codegen.h
+++ b/src/codegen.h
@@ -1,9 +1,11 @@
 #ifndef ORYX_CODEGEN_H
 #define ORYX_CODEGEN_H
 
+#include "analyzer.h"
 #include "lexer.h"
 #include "parser.h"
 
-void codegen(struct ast ast, struct lexemes toks);
+void codegen(const char *, struct type *, struct ast, struct lexemes)
+	__attribute__((nonnull));
 
 #endif /* !ORYX_CODEGEN_H */
diff --git a/src/main.c b/src/main.c
index b32a723..2cfc1d1 100644
--- a/src/main.c
+++ b/src/main.c
@@ -29,10 +29,11 @@ main(int argc, char **argv)
 
 	struct lexemes toks = lexstring(src, srclen);
 	struct ast ast = parsetoks(toks);
-	analyzeast(ast, toks);
-	codegen(ast, toks);
+	struct type *types = analyzeast(ast, toks);
+	codegen(argv[1], types, ast, toks);
 
 #if DEBUG
+	free(types);
 	free(src);
 	lexemes_free(toks);
 	ast_free(ast);
diff --git a/src/primitives.gperf b/src/primitives.gperf
index 449a030..7594b29 100644
--- a/src/primitives.gperf
+++ b/src/primitives.gperf
@@ -16,16 +16,16 @@
 
 struct typeslot { char *name; struct type inner; };
 %%
-i8,   { TYPE_I8,   1, true  }
-i16,  { TYPE_I16,  2, true  }
-i32,  { TYPE_I32,  4, true  }
-i64,  { TYPE_I64,  8, true  }
-int,  { TYPE_INT,  8, true  }
-u8,   { TYPE_U8,   1, false }
-u16,  { TYPE_U16,  2, false }
-u32,  { TYPE_U32,  4, false }
-u64,  { TYPE_U64,  8, false }
-uint, { TYPE_UINT, 8, false }
+i8,   { TYPE_NUM,  1, true  }
+i16,  { TYPE_NUM,  2, true  }
+i32,  { TYPE_NUM,  4, true  }
+i64,  { TYPE_NUM,  8, true  }
+int,  { TYPE_NUM,  8, true  }
+u8,   { TYPE_NUM,  1, false }
+u16,  { TYPE_NUM,  2, false }
+u32,  { TYPE_NUM,  4, false }
+u64,  { TYPE_NUM,  8, false }
+uint, { TYPE_NUM,  8, false }
 rune, { TYPE_RUNE, 4, true  }
 %%
 const struct type *
-- 
cgit v1.2.3