aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-19 20:16:49 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-19 20:16:49 +0200
commitcb358312c86c6fe13631fb4022cefccee8bba91e (patch)
tree6bec6a2a98a6df93084b39f6a2e51e163764e9fc
parent4486b6eccd96784cc9423b41e5b6dce7c9a3d740 (diff)
Various parser fixes
-rw-r--r--src/analyzer.c42
-rw-r--r--src/codegen.c120
-rw-r--r--src/main.c2
-rw-r--r--src/parser.c110
-rw-r--r--src/parser.h14
5 files changed, 192 insertions, 96 deletions
diff --git a/src/analyzer.c b/src/analyzer.c
index ae3e247..1b9e9cb 100644
--- a/src/analyzer.c
+++ b/src/analyzer.c
@@ -72,44 +72,6 @@ analyzeast(struct ast ast, struct lexemes toks)
return types;
}
-static idx_t_ fwdnode(struct ast, idx_t_);
-
-idx_t_
-fwdnode(struct ast ast, idx_t_ i)
-{
- while (likely(i < ast.len)) {
- switch (ast.kinds[i]) {
- case ASTBLK:
- i = ast.kids[i].lhs == AST_EMPTY ? i + 1 : ast.kids[i].rhs;
- break;
- case ASTDECL:
- i = ast.kids[i].rhs == AST_EMPTY ? ast.kids[i].lhs
- : ast.kids[i].rhs;
- break;
- case ASTRET:
- if (ast.kids[i].rhs == AST_EMPTY)
- return i + 1;
- i = ast.kids[i].rhs;
- break;
- case ASTBINADD:
- case ASTBINSUB:
- case ASTCDECL:
- case ASTFN:
- i = ast.kids[i].rhs;
- break;
- case ASTIDENT:
- case ASTNUMLIT:
- case ASTTYPE:
- return i + 1;
- case ASTFNPROTO:
- assert("analyzer: Not reachable");
- __builtin_unreachable();
- }
- }
-
- return i;
-}
-
const struct type *
typegrab(struct ast ast, struct lexemes toks, idx_t_ i)
{
@@ -128,7 +90,7 @@ typechkast(struct evstack evs, struct type *types, struct ast ast,
ev.buf = bufalloc(NULL, ev.cap, sizeof(*ev.buf));
for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) {
- assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL);
+ assert(ast.kinds[i] <= _AST_DECLS_END);
if (ev.len == ev.cap) {
ev.cap *= 2;
ev.buf = bufalloc(ev.buf, ev.cap, sizeof(*ev.buf));
@@ -142,7 +104,7 @@ typechkast(struct evstack evs, struct type *types, struct ast ast,
struct typechkctx ctx = {0};
for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) {
- assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL);
+ assert(ast.kinds[i] <= _AST_DECLS_END);
typechkdecl(ctx, evs, types, ast, toks, i);
}
diff --git a/src/codegen.c b/src/codegen.c
index 4253bd4..504291a 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -1,4 +1,5 @@
#include <assert.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -12,67 +13,120 @@
#include "parser.h"
#include "types.h"
-static size_t codegenexpr(LLVMBuilderRef, struct type *, struct ast,
+struct cgctx {
+ LLVMModuleRef mod;
+ LLVMBuilderRef bob;
+ struct strview namespace;
+};
+
+static size_t codegendecl(struct cgctx, struct type *, struct ast,
+ struct lexemes, size_t)
+ __attribute__((nonnull));
+static size_t codegenexpr(struct cgctx, struct type *, struct ast,
struct lexemes, size_t, LLVMValueRef *)
__attribute__((nonnull));
static LLVMTypeRef type2llvm(struct type);
+/* TODO: Don’t do this? */
+#define lengthof(xs) (sizeof(xs) / sizeof(*(xs)))
+static struct {
+ struct strview key;
+ LLVMValueRef val;
+} constants[1024];
+static size_t constcnt;
+
void
codegen(const char *file, struct type *types, struct ast ast,
struct lexemes toks)
{
- LLVMModuleRef mod = LLVMModuleCreateWithName("oryx");
- LLVMSetSourceFileName(mod, file, strlen(file));
- LLVMBuilderRef bob = LLVMCreateBuilder();
+ struct cgctx ctx = {0};
+ ctx.mod = LLVMModuleCreateWithName("oryx");
+ ctx.bob = LLVMCreateBuilder();
+ LLVMSetSourceFileName(ctx.mod, file, strlen(file));
for (size_t i = 0; i < ast.len;) {
// LLVMValueRef val;
assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL);
- // if (ast.kids[i].rhs != AST_EMPTY && types[ast.kids[i].rhs] ==
- // TYPE_FN) codegenfn(builder, types, ast, toks, i, &val); else
- // codegenexpr(builder, types, ast, toks, i, &val);
+ i = codegendecl(ctx, types, ast, toks, i);
/* TODO: Temporary allocator */
- struct strview sv = toks.strs[ast.lexemes[i]];
- char *name = bufalloc(NULL, sv.len + 1, 1);
- ((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0;
-
- LLVMValueRef globl, init;
- LLVMTypeRef vartype = type2llvm(types[i]);
+ // struct strview sv = toks.strs[ast.lexemes[i]];
+ // char *name = bufalloc(NULL, sv.len + 1, 1);
+ // ((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0;
+ //
+ // LLVMValueRef globl, init;
+ // LLVMTypeRef vartype = type2llvm(types[i]);
+ //
+ // globl = LLVMAddGlobal(mod, vartype, name);
+ // LLVMSetGlobalConstant(globl, ast.kinds[i] == ASTCDECL);
+ //
+ // if (ast.kids[i].rhs != AST_EMPTY) {
+ // i = codegenexpr(bob, types, ast, toks, ast.kids[i].rhs, &init);
+ // init = LLVMConstTrunc(init, vartype);
+ // } else {
+ // init = LLVMConstNull(vartype);
+ // i = fwdnode(ast, i);
+ // }
+ //
+ // LLVMSetInitializer(globl, init);
+ // LLVMSetLinkage(globl, LLVMPrivateLinkage);
+ //
+ // free(name);
+ }
- globl = LLVMAddGlobal(mod, vartype, name);
- LLVMSetGlobalConstant(globl, ast.kinds[i] == ASTCDECL);
+ LLVMDisposeBuilder(ctx.bob);
- if (ast.kids[i].rhs != AST_EMPTY) {
- i = codegenexpr(bob, types, ast, toks, ast.kids[i].rhs, &init);
- init = LLVMConstTrunc(init, vartype);
- } else {
- init = LLVMConstNull(vartype);
- i += 2;
- }
+ char *error = NULL;
+ LLVMVerifyModule(ctx.mod, LLVMAbortProcessAction, &error);
+ LLVMDisposeMessage(error);
- LLVMSetInitializer(globl, init);
- LLVMSetLinkage(globl, LLVMPrivateLinkage);
+ LLVMDumpModule(ctx.mod);
+ LLVMDisposeModule(ctx.mod);
+}
- free(name);
+size_t
+codegendecl(struct cgctx ctx, struct type *types, struct ast ast,
+ struct lexemes toks, size_t i)
+{
+ struct strview ident = toks.strs[ast.lexemes[i]];
+
+ char *name;
+ if (ctx.namespace.len != 0) {
+ size_t namelen = ident.len + ctx.namespace.len + 1;
+ name = bufalloc(NULL, namelen + 1, 1);
+ sprintf(name, "%.*s.%.*s", (int)ctx.namespace.len, ctx.namespace.p,
+ (int)ident.len, ident.p);
+ } else {
+ name = bufalloc(NULL, ident.len + 1, 1);
+ memcpy(name, ident.p, ident.len);
+ name[ident.len] = 0;
}
- LLVMDisposeBuilder(bob);
+ LLVMValueRef val;
+ LLVMTypeRef vartype = type2llvm(types[i]);
- char *error = NULL;
- LLVMVerifyModule(mod, LLVMAbortProcessAction, &error);
- LLVMDisposeMessage(error);
+ if (ast.kids[i].rhs != AST_EMPTY) {
+ i = codegenexpr(ctx, types, ast, toks, ast.kids[i].rhs, &val);
+ val = LLVMConstTrunc(val, vartype);
+ } else {
+ i = fwdnode(ast, i);
+ val = LLVMConstNull(vartype);
+ }
+
+ LLVMValueRef globl = LLVMAddGlobal(ctx.mod, vartype, name);
+ LLVMSetInitializer(globl, val);
+ LLVMSetLinkage(globl, LLVMLinkerPrivateLinkage);
- LLVMDumpModule(mod);
- LLVMDisposeModule(mod);
+ free(name);
+ return i;
}
size_t
-codegenexpr(LLVMBuilderRef builder, struct type *types, struct ast ast,
+codegenexpr(struct cgctx ctx, struct type *types, struct ast ast,
struct lexemes toks, size_t i, LLVMValueRef *v)
{
- (void)builder;
+ (void)ctx;
switch (ast.kinds[i]) {
case ASTNUMLIT: {
/* TODO: Arbitrary precision? */
diff --git a/src/main.c b/src/main.c
index 2cfc1d1..a61b860 100644
--- a/src/main.c
+++ b/src/main.c
@@ -30,7 +30,7 @@ main(int argc, char **argv)
struct lexemes toks = lexstring(src, srclen);
struct ast ast = parsetoks(toks);
struct type *types = analyzeast(ast, toks);
- codegen(argv[1], types, ast, toks);
+ // codegen(argv[1], types, ast, toks);
#if DEBUG
free(types);
diff --git a/src/parser.c b/src/parser.c
index d10a82f..44888ae 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -18,22 +18,65 @@
#endif
#define SIZE_WDTH (sizeof(size_t) * CHAR_BIT)
-typedef idx_t_ parsefn(struct ast *, struct lexemes) __attribute__((nonnull));
-static parsefn parseblk, parsedecl, parseexpr, parseproto, parsestmt, parsetype;
+typedef idx_t_ parsefn(struct ast *, struct lexemes)
+ __attribute__((nonnull));
+static parsefn parseblk, parseexpr, parsefunc, parseproto, parsestmt, parsetype;
+static idx_t_ parsedecl(struct ast *, struct lexemes, bool)
+ __attribute__((nonnull));
static struct ast mkast(void);
-static idx_t_ astalloc(struct ast *) __attribute__((nonnull));
-static void astresz(struct ast *) __attribute__((nonnull));
+static idx_t_ astalloc(struct ast *)
+ __attribute__((nonnull));
+static void astresz(struct ast *)
+ __attribute__((nonnull));
static size_t toksidx;
+idx_t_
+fwdnode(struct ast ast, idx_t_ i)
+{
+ while (likely(i < ast.len)) {
+ switch (ast.kinds[i]) {
+ case ASTBLK:
+ i = ast.kids[i].lhs == AST_EMPTY ? i + 1 : ast.kids[i].rhs;
+ break;
+ case ASTDECL:
+ case ASTPDECL:
+ i = ast.kids[i].rhs == AST_EMPTY ? ast.kids[i].lhs
+ : ast.kids[i].rhs;
+ break;
+ case ASTRET:
+ if (ast.kids[i].rhs == AST_EMPTY)
+ return i + 1;
+ i = ast.kids[i].rhs;
+ break;
+ case ASTBINADD:
+ case ASTBINSUB:
+ case ASTCDECL:
+ case ASTPCDECL:
+ case ASTFN:
+ i = ast.kids[i].rhs;
+ break;
+ case ASTIDENT:
+ case ASTNUMLIT:
+ case ASTTYPE:
+ return i + 1;
+ case ASTFNPROTO:
+ assert("analyzer: Not reachable");
+ __builtin_unreachable();
+ }
+ }
+
+ return i;
+}
+
struct ast
parsetoks(struct lexemes toks)
{
struct ast ast = mkast();
for (;;) {
- (void)parsedecl(&ast, toks);
+ (void)parsedecl(&ast, toks, true);
if (toks.kinds[toksidx] == LEXEOF)
break;
}
@@ -68,11 +111,20 @@ parseblk(struct ast *ast, struct lexemes toks)
}
idx_t_
-parsedecl(struct ast *ast, struct lexemes toks)
+parsedecl(struct ast *ast, struct lexemes toks, bool toplvl)
{
idx_t_ i = astalloc(ast);
ast->lexemes[i] = toksidx;
+ bool pub;
+ if (toplvl && toks.kinds[toksidx] == LEXIDENT && toks.strs[toksidx].len == 3
+ && memcmp("pub", toks.strs[toksidx].p, 3) == 0)
+ {
+ pub = true;
+ toksidx++;
+ } else
+ pub = false;
+
if (toks.kinds[toksidx++] != LEXIDENT)
err("parser: Expected identifier");
if (toks.kinds[toksidx++] != LEXCOLON)
@@ -86,28 +138,49 @@ parsedecl(struct ast *ast, struct lexemes toks)
case LEXSEMI:
if (ast->kids[i].lhs == AST_EMPTY)
err("parser: No type provided in non-assigning declaration");
- ast->kinds[i] = ASTDECL;
+ ast->kinds[i] = ASTDECL + pub;
ast->kids[i].rhs = AST_EMPTY;
return i;
case LEXCOLON:
- ast->kinds[i] = ASTCDECL;
+ ast->kinds[i] = ASTCDECL + pub;
break;
case LEXEQ:
- ast->kinds[i] = ASTDECL;
+ ast->kinds[i] = ASTDECL + pub;
break;
default:
- err("parser: Expected semicolon or equals");
+ err("parser: Expected colon, equals, or semicolon");
}
- idx_t_ rhs = parseexpr(ast, toks);
+ bool func = toks.kinds[toksidx] == LEXLPAR;
+ if (func && ast->kinds[i] - pub == ASTDECL)
+ err("Cannot assign function to mutable variable");
+
+ idx_t_ rhs = (func ? parsefunc : parseexpr)(ast, toks);
ast->kids[i].rhs = rhs;
- if (toks.kinds[toksidx++] != LEXSEMI)
+ if (!func && toks.kinds[toksidx++] != LEXSEMI)
err("parser: Expected semicolon");
return i;
}
idx_t_
+parsefunc(struct ast *ast, struct lexemes toks)
+{
+ idx_t_ i = astalloc(ast);
+ ast->lexemes[i] = toksidx;
+
+ assert(toks.kinds[toksidx] == LEXLPAR);
+
+ ast->kinds[i] = ASTFN;
+ idx_t_ lhs = parseproto(ast, toks);
+ idx_t_ rhs = parseblk(ast, toks);
+ ast->kids[i].lhs = lhs;
+ ast->kids[i].rhs = rhs;
+
+ return i;
+}
+
+idx_t_
parseexpr(struct ast *ast, struct lexemes toks)
{
idx_t_ i = astalloc(ast);
@@ -122,13 +195,6 @@ parseexpr(struct ast *ast, struct lexemes toks)
toksidx++;
ast->kinds[i] = ASTIDENT;
break;
- case LEXLPAR:
- ast->kinds[i] = ASTFN;
- idx_t_ lhs = parseproto(ast, toks);
- idx_t_ rhs = parseblk(ast, toks);
- ast->kids[i].lhs = lhs;
- ast->kids[i].rhs = rhs;
- break;
default:
err("parser: Expected expression");
}
@@ -164,7 +230,7 @@ parsestmt(struct ast *ast, struct lexemes toks)
err("parser: Expected identifier");
struct strview sv = toks.strs[toksidx];
- if (strncmp("return", sv.p, sv.len) == 0) {
+ if (sv.len == 6 && memcmp(sv.p, "return", 6) == 0) {
i = astalloc(ast);
ast->lexemes[i] = toksidx++;
ast->kinds[i] = ASTRET;
@@ -175,9 +241,9 @@ parsestmt(struct ast *ast, struct lexemes toks)
if (toks.kinds[toksidx++] != LEXSEMI)
err("parser: Expected semicolon");
} else if (toks.kinds[toksidx + 1] == LEXCOLON)
- i = parsedecl(ast, toks);
+ i = parsedecl(ast, toks, false);
else
- i = parseexpr(ast, toks);
+ err("parser: Invalid statement");
return i;
}
diff --git a/src/parser.h b/src/parser.h
index fceedc0..6392c24 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -14,10 +14,20 @@ enum {
‘x: lhs = rhs’ */
ASTDECL,
+ /* Public variable declaration, lhs and rhs may be unused
+ ‘pub x: lhs = rhs’ */
+ ASTPDECL,
+
/* Constant declaration, lhs may be unused
‘x: lhs : rhs’ */
ASTCDECL,
+ /* Public constant declaration, lhs may be unused
+ ‘pub x: lhs : rhs’ */
+ ASTPCDECL,
+
+ _AST_DECLS_END = ASTPCDECL,
+
/* Function prototype
‘(a: b, c: d) rhs’; aux[lhs].fnproto */
ASTFNPROTO,
@@ -75,4 +85,8 @@ struct ast {
/* Parse the tokens in TOKS into an abstract syntax tree */
struct ast parsetoks(struct lexemes toks);
+/* Starting from the node at indent I in AST, return the index of the next node
+ in AST that is of the same nest-depth as I */
+idx_t_ fwdnode(struct ast ast, idx_t_ i);
+
#endif /* !ORYX_PARSER_H */