Various parser fixes

author: Thomas Voss <mail@thomasvoss.com> 2024-06-19 20:16:49 +0200
committer: Thomas Voss <mail@thomasvoss.com> 2024-06-19 20:16:49 +0200
commit: cb358312c86c6fe13631fb4022cefccee8bba91e (patch)
tree: 6bec6a2a98a6df93084b39f6a2e51e163764e9fc
parent: 4486b6eccd96784cc9423b41e5b6dce7c9a3d740 (diff)
5 files changed, 192 insertions, 96 deletions
diff --git a/src/analyzer.c b/src/analyzer.c
index ae3e247..1b9e9cb 100644
--- a/src/analyzer.c
+++ b/src/analyzer.c
@@ -72,44 +72,6 @@ analyzeast(struct ast ast, struct lexemes toks)
 	return types;
 }
 
-static idx_t_ fwdnode(struct ast, idx_t_);
-
-idx_t_
-fwdnode(struct ast ast, idx_t_ i)
-{
-	while (likely(i < ast.len)) {
-		switch (ast.kinds[i]) {
-		case ASTBLK:
-			i = ast.kids[i].lhs == AST_EMPTY ? i + 1 : ast.kids[i].rhs;
-			break;
-		case ASTDECL:
-			i = ast.kids[i].rhs == AST_EMPTY ? ast.kids[i].lhs
-			                                 : ast.kids[i].rhs;
-			break;
-		case ASTRET:
-			if (ast.kids[i].rhs == AST_EMPTY)
-				return i + 1;
-			i = ast.kids[i].rhs;
-			break;
-		case ASTBINADD:
-		case ASTBINSUB:
-		case ASTCDECL:
-		case ASTFN:
-			i = ast.kids[i].rhs;
-			break;
-		case ASTIDENT:
-		case ASTNUMLIT:
-		case ASTTYPE:
-			return i + 1;
-		case ASTFNPROTO:
-			assert("analyzer: Not reachable");
-			__builtin_unreachable();
-		}
-	}
-
-	return i;
-}
-
 const struct type *
 typegrab(struct ast ast, struct lexemes toks, idx_t_ i)
 {
@@ -128,7 +90,7 @@ typechkast(struct evstack evs, struct type *types, struct ast ast,
 	ev.buf = bufalloc(NULL, ev.cap, sizeof(*ev.buf));
 
 	for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) {
-		assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL);
+		assert(ast.kinds[i] <= _AST_DECLS_END);
 		if (ev.len == ev.cap) {
 			ev.cap *= 2;
 			ev.buf = bufalloc(ev.buf, ev.cap, sizeof(*ev.buf));
@@ -142,7 +104,7 @@ typechkast(struct evstack evs, struct type *types, struct ast ast,
 
 	struct typechkctx ctx = {0};
 	for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) {
-		assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL);
+		assert(ast.kinds[i] <= _AST_DECLS_END);
 		typechkdecl(ctx, evs, types, ast, toks, i);
 	}
 
diff --git a/src/codegen.c b/src/codegen.c
index 4253bd4..504291a 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -1,4 +1,5 @@
 #include <assert.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -12,67 +13,120 @@
 #include "parser.h"
 #include "types.h"
 
-static size_t codegenexpr(LLVMBuilderRef, struct type *, struct ast,
+struct cgctx {
+	LLVMModuleRef mod;
+	LLVMBuilderRef bob;
+	struct strview namespace;
+};
+
+static size_t codegendecl(struct cgctx, struct type *, struct ast,
+                          struct lexemes, size_t)
+	__attribute__((nonnull));
+static size_t codegenexpr(struct cgctx, struct type *, struct ast,
                           struct lexemes, size_t, LLVMValueRef *)
 	__attribute__((nonnull));
 
 static LLVMTypeRef type2llvm(struct type);
 
+/* TODO: Don’t do this? */
+#define lengthof(xs) (sizeof(xs) / sizeof(*(xs)))
+static struct {
+	struct strview key;
+	LLVMValueRef val;
+} constants[1024];
+static size_t constcnt;
+
 void
 codegen(const char *file, struct type *types, struct ast ast,
         struct lexemes toks)
 {
-	LLVMModuleRef mod = LLVMModuleCreateWithName("oryx");
-	LLVMSetSourceFileName(mod, file, strlen(file));
-	LLVMBuilderRef bob = LLVMCreateBuilder();
+	struct cgctx ctx = {0};
+	ctx.mod = LLVMModuleCreateWithName("oryx");
+	ctx.bob = LLVMCreateBuilder();
+	LLVMSetSourceFileName(ctx.mod, file, strlen(file));
 
 	for (size_t i = 0; i < ast.len;) {
 		// LLVMValueRef val;
 		assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL);
-		// if (ast.kids[i].rhs != AST_EMPTY && types[ast.kids[i].rhs] ==
-		// TYPE_FN) 	codegenfn(builder, types, ast, toks, i, &val); else
-		// 	codegenexpr(builder, types, ast, toks, i, &val);
+		i = codegendecl(ctx, types, ast, toks, i);
 
 		/* TODO: Temporary allocator */
-		struct strview sv = toks.strs[ast.lexemes[i]];
-		char *name = bufalloc(NULL, sv.len + 1, 1);
-		((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0;
-
-		LLVMValueRef globl, init;
-		LLVMTypeRef vartype = type2llvm(types[i]);
+		// struct strview sv = toks.strs[ast.lexemes[i]];
+		// char *name = bufalloc(NULL, sv.len + 1, 1);
+		// ((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0;
+		//
+		// LLVMValueRef globl, init;
+		// LLVMTypeRef vartype = type2llvm(types[i]);
+		//
+		// globl = LLVMAddGlobal(mod, vartype, name);
+		// LLVMSetGlobalConstant(globl, ast.kinds[i] == ASTCDECL);
+		//
+		// if (ast.kids[i].rhs != AST_EMPTY) {
+		// 	i = codegenexpr(bob, types, ast, toks, ast.kids[i].rhs, &init);
+		// 	init = LLVMConstTrunc(init, vartype);
+		// } else {
+		// 	init = LLVMConstNull(vartype);
+		// 	i = fwdnode(ast, i);
+		// }
+		//
+		// LLVMSetInitializer(globl, init);
+		// LLVMSetLinkage(globl, LLVMPrivateLinkage);
+		//
+		// free(name);
+	}
 
-		globl = LLVMAddGlobal(mod, vartype, name);
-		LLVMSetGlobalConstant(globl, ast.kinds[i] == ASTCDECL);
+	LLVMDisposeBuilder(ctx.bob);
 
-		if (ast.kids[i].rhs != AST_EMPTY) {
-			i = codegenexpr(bob, types, ast, toks, ast.kids[i].rhs, &init);
-			init = LLVMConstTrunc(init, vartype);
-		} else {
-			init = LLVMConstNull(vartype);
-			i += 2;
-		}
+	char *error = NULL;
+	LLVMVerifyModule(ctx.mod, LLVMAbortProcessAction, &error);
+	LLVMDisposeMessage(error);
 
-		LLVMSetInitializer(globl, init);
-		LLVMSetLinkage(globl, LLVMPrivateLinkage);
+	LLVMDumpModule(ctx.mod);
+	LLVMDisposeModule(ctx.mod);
+}
 
-		free(name);
+size_t
+codegendecl(struct cgctx ctx, struct type *types, struct ast ast,
+            struct lexemes toks, size_t i)
+{
+	struct strview ident = toks.strs[ast.lexemes[i]];
+
+	char *name;
+	if (ctx.namespace.len != 0) {
+		size_t namelen = ident.len + ctx.namespace.len + 1;
+		name = bufalloc(NULL, namelen + 1, 1);
+		sprintf(name, "%.*s.%.*s", (int)ctx.namespace.len, ctx.namespace.p,
+				(int)ident.len, ident.p);
+	} else {
+		name = bufalloc(NULL, ident.len + 1, 1);
+		memcpy(name, ident.p, ident.len);
+		name[ident.len] = 0;
 	}
 
-	LLVMDisposeBuilder(bob);
+	LLVMValueRef val;
+	LLVMTypeRef vartype = type2llvm(types[i]);
 
-	char *error = NULL;
-	LLVMVerifyModule(mod, LLVMAbortProcessAction, &error);
-	LLVMDisposeMessage(error);
+	if (ast.kids[i].rhs != AST_EMPTY) {
+		i = codegenexpr(ctx, types, ast, toks, ast.kids[i].rhs, &val);
+		val = LLVMConstTrunc(val, vartype);
+	} else {
+		i = fwdnode(ast, i);
+		val = LLVMConstNull(vartype);
+	}
+
+	LLVMValueRef globl = LLVMAddGlobal(ctx.mod, vartype, name);
+	LLVMSetInitializer(globl, val);
+	LLVMSetLinkage(globl, LLVMLinkerPrivateLinkage);
 
-	LLVMDumpModule(mod);
-	LLVMDisposeModule(mod);
+	free(name);
+	return i;
 }
 
 size_t
-codegenexpr(LLVMBuilderRef builder, struct type *types, struct ast ast,
+codegenexpr(struct cgctx ctx, struct type *types, struct ast ast,
             struct lexemes toks, size_t i, LLVMValueRef *v)
 {
-	(void)builder;
+	(void)ctx;
 	switch (ast.kinds[i]) {
 	case ASTNUMLIT: {
 		/* TODO: Arbitrary precision? */
diff --git a/src/main.c b/src/main.c
index 2cfc1d1..a61b860 100644
--- a/src/main.c
+++ b/src/main.c
@@ -30,7 +30,7 @@ main(int argc, char **argv)
 	struct lexemes toks = lexstring(src, srclen);
 	struct ast ast = parsetoks(toks);
 	struct type *types = analyzeast(ast, toks);
-	codegen(argv[1], types, ast, toks);
+	// codegen(argv[1], types, ast, toks);
 
 #if DEBUG
 	free(types);
diff --git a/src/parser.c b/src/parser.c
index d10a82f..44888ae 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -18,22 +18,65 @@
 #endif
 #define SIZE_WDTH (sizeof(size_t) * CHAR_BIT)
 
-typedef idx_t_ parsefn(struct ast *, struct lexemes) __attribute__((nonnull));
-static parsefn parseblk, parsedecl, parseexpr, parseproto, parsestmt, parsetype;
+typedef idx_t_ parsefn(struct ast *, struct lexemes)
+	__attribute__((nonnull));
+static parsefn parseblk, parseexpr, parsefunc, parseproto, parsestmt, parsetype;
+static idx_t_ parsedecl(struct ast *, struct lexemes, bool)
+	__attribute__((nonnull));
 
 static struct ast mkast(void);
-static idx_t_ astalloc(struct ast *) __attribute__((nonnull));
-static void astresz(struct ast *)    __attribute__((nonnull));
+static idx_t_ astalloc(struct ast *)
+	__attribute__((nonnull));
+static void astresz(struct ast *)
+	__attribute__((nonnull));
 
 static size_t toksidx;
 
+idx_t_
+fwdnode(struct ast ast, idx_t_ i)
+{
+	while (likely(i < ast.len)) {
+		switch (ast.kinds[i]) {
+		case ASTBLK:
+			i = ast.kids[i].lhs == AST_EMPTY ? i + 1 : ast.kids[i].rhs;
+			break;
+		case ASTDECL:
+		case ASTPDECL:
+			i = ast.kids[i].rhs == AST_EMPTY ? ast.kids[i].lhs
+			                                 : ast.kids[i].rhs;
+			break;
+		case ASTRET:
+			if (ast.kids[i].rhs == AST_EMPTY)
+				return i + 1;
+			i = ast.kids[i].rhs;
+			break;
+		case ASTBINADD:
+		case ASTBINSUB:
+		case ASTCDECL:
+		case ASTPCDECL:
+		case ASTFN:
+			i = ast.kids[i].rhs;
+			break;
+		case ASTIDENT:
+		case ASTNUMLIT:
+		case ASTTYPE:
+			return i + 1;
+		case ASTFNPROTO:
+			assert("analyzer: Not reachable");
+			__builtin_unreachable();
+		}
+	}
+
+	return i;
+}
+
 struct ast
 parsetoks(struct lexemes toks)
 {
 	struct ast ast = mkast();
 
 	for (;;) {
-		(void)parsedecl(&ast, toks);
+		(void)parsedecl(&ast, toks, true);
 		if (toks.kinds[toksidx] == LEXEOF)
 			break;
 	}
@@ -68,11 +111,20 @@ parseblk(struct ast *ast, struct lexemes toks)
 }
 
 idx_t_
-parsedecl(struct ast *ast, struct lexemes toks)
+parsedecl(struct ast *ast, struct lexemes toks, bool toplvl)
 {
 	idx_t_ i = astalloc(ast);
 	ast->lexemes[i] = toksidx;
 
+	bool pub;
+	if (toplvl && toks.kinds[toksidx] == LEXIDENT && toks.strs[toksidx].len == 3
+	    && memcmp("pub", toks.strs[toksidx].p, 3) == 0)
+	{
+		pub = true;
+		toksidx++;
+	} else
+		pub = false;
+
 	if (toks.kinds[toksidx++] != LEXIDENT)
 		err("parser: Expected identifier");
 	if (toks.kinds[toksidx++] != LEXCOLON)
@@ -86,28 +138,49 @@ parsedecl(struct ast *ast, struct lexemes toks)
 	case LEXSEMI:
 		if (ast->kids[i].lhs == AST_EMPTY)
 			err("parser: No type provided in non-assigning declaration");
-		ast->kinds[i] = ASTDECL;
+		ast->kinds[i] = ASTDECL + pub;
 		ast->kids[i].rhs = AST_EMPTY;
 		return i;
 	case LEXCOLON:
-		ast->kinds[i] = ASTCDECL;
+		ast->kinds[i] = ASTCDECL + pub;
 		break;
 	case LEXEQ:
-		ast->kinds[i] = ASTDECL;
+		ast->kinds[i] = ASTDECL + pub;
 		break;
 	default:
-		err("parser: Expected semicolon or equals");
+		err("parser: Expected colon, equals, or semicolon");
 	}
 
-	idx_t_ rhs = parseexpr(ast, toks);
+	bool func = toks.kinds[toksidx] == LEXLPAR;
+	if (func && ast->kinds[i] - pub == ASTDECL)
+		err("Cannot assign function to mutable variable");
+
+	idx_t_ rhs = (func ? parsefunc : parseexpr)(ast, toks);
 	ast->kids[i].rhs = rhs;
-	if (toks.kinds[toksidx++] != LEXSEMI)
+	if (!func && toks.kinds[toksidx++] != LEXSEMI)
 		err("parser: Expected semicolon");
 
 	return i;
 }
 
 idx_t_
+parsefunc(struct ast *ast, struct lexemes toks)
+{
+	idx_t_ i = astalloc(ast);
+	ast->lexemes[i] = toksidx;
+
+	assert(toks.kinds[toksidx] == LEXLPAR);
+
+	ast->kinds[i] = ASTFN;
+	idx_t_ lhs = parseproto(ast, toks);
+	idx_t_ rhs = parseblk(ast, toks);
+	ast->kids[i].lhs = lhs;
+	ast->kids[i].rhs = rhs;
+
+	return i;
+}
+
+idx_t_
 parseexpr(struct ast *ast, struct lexemes toks)
 {
 	idx_t_ i = astalloc(ast);
@@ -122,13 +195,6 @@ parseexpr(struct ast *ast, struct lexemes toks)
 		toksidx++;
 		ast->kinds[i] = ASTIDENT;
 		break;
-	case LEXLPAR:
-		ast->kinds[i] = ASTFN;
-		idx_t_ lhs = parseproto(ast, toks);
-		idx_t_ rhs = parseblk(ast, toks);
-		ast->kids[i].lhs = lhs;
-		ast->kids[i].rhs = rhs;
-		break;
 	default:
 		err("parser: Expected expression");
 	}
@@ -164,7 +230,7 @@ parsestmt(struct ast *ast, struct lexemes toks)
 		err("parser: Expected identifier");
 
 	struct strview sv = toks.strs[toksidx];
-	if (strncmp("return", sv.p, sv.len) == 0) {
+	if (sv.len == 6 && memcmp(sv.p, "return", 6) == 0) {
 		i = astalloc(ast);
 		ast->lexemes[i] = toksidx++;
 		ast->kinds[i] = ASTRET;
@@ -175,9 +241,9 @@ parsestmt(struct ast *ast, struct lexemes toks)
 		if (toks.kinds[toksidx++] != LEXSEMI)
 			err("parser: Expected semicolon");
 	} else if (toks.kinds[toksidx + 1] == LEXCOLON)
-		i = parsedecl(ast, toks);
+		i = parsedecl(ast, toks, false);
 	else
-		i = parseexpr(ast, toks);
+		err("parser: Invalid statement");
 
 	return i;
 }
diff --git a/src/parser.h b/src/parser.h
index fceedc0..6392c24 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -14,10 +14,20 @@ enum {
 	   ‘x: lhs = rhs’ */
 	ASTDECL,
 
+	/* Public variable declaration, lhs and rhs may be unused
+	   ‘pub x: lhs = rhs’ */
+	ASTPDECL,
+
 	/* Constant declaration, lhs may be unused
 	   ‘x: lhs : rhs’ */
 	ASTCDECL,
 
+	/* Public constant declaration, lhs may be unused
+	   ‘pub x: lhs : rhs’ */
+	ASTPCDECL,
+
+	_AST_DECLS_END = ASTPCDECL,
+
 	/* Function prototype
 	   ‘(a: b, c: d) rhs’; aux[lhs].fnproto */
 	ASTFNPROTO,
@@ -75,4 +85,8 @@ struct ast {
 /* Parse the tokens in TOKS into an abstract syntax tree */
 struct ast parsetoks(struct lexemes toks);
 
+/* Starting from the node at indent I in AST, return the index of the next node
+   in AST that is of the same nest-depth as I */
+idx_t_ fwdnode(struct ast ast, idx_t_ i);
+
 #endif /* !ORYX_PARSER_H */
author	Thomas Voss <mail@thomasvoss.com>	2024-06-19 20:16:49 +0200
committer	Thomas Voss <mail@thomasvoss.com>	2024-06-19 20:16:49 +0200
commit	cb358312c86c6fe13631fb4022cefccee8bba91e (patch)
tree	6bec6a2a98a6df93084b39f6a2e51e163764e9fc
parent	4486b6eccd96784cc9423b41e5b6dce7c9a3d740 (diff)