From cb358312c86c6fe13631fb4022cefccee8bba91e Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Wed, 19 Jun 2024 20:16:49 +0200 Subject: Various parser fixes --- src/analyzer.c | 42 +------------------- src/codegen.c | 120 +++++++++++++++++++++++++++++++++++++++++---------------- src/main.c | 2 +- src/parser.c | 110 +++++++++++++++++++++++++++++++++++++++++----------- src/parser.h | 14 +++++++ 5 files changed, 192 insertions(+), 96 deletions(-) (limited to 'src') diff --git a/src/analyzer.c b/src/analyzer.c index ae3e247..1b9e9cb 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -72,44 +72,6 @@ analyzeast(struct ast ast, struct lexemes toks) return types; } -static idx_t_ fwdnode(struct ast, idx_t_); - -idx_t_ -fwdnode(struct ast ast, idx_t_ i) -{ - while (likely(i < ast.len)) { - switch (ast.kinds[i]) { - case ASTBLK: - i = ast.kids[i].lhs == AST_EMPTY ? i + 1 : ast.kids[i].rhs; - break; - case ASTDECL: - i = ast.kids[i].rhs == AST_EMPTY ? ast.kids[i].lhs - : ast.kids[i].rhs; - break; - case ASTRET: - if (ast.kids[i].rhs == AST_EMPTY) - return i + 1; - i = ast.kids[i].rhs; - break; - case ASTBINADD: - case ASTBINSUB: - case ASTCDECL: - case ASTFN: - i = ast.kids[i].rhs; - break; - case ASTIDENT: - case ASTNUMLIT: - case ASTTYPE: - return i + 1; - case ASTFNPROTO: - assert("analyzer: Not reachable"); - __builtin_unreachable(); - } - } - - return i; -} - const struct type * typegrab(struct ast ast, struct lexemes toks, idx_t_ i) { @@ -128,7 +90,7 @@ typechkast(struct evstack evs, struct type *types, struct ast ast, ev.buf = bufalloc(NULL, ev.cap, sizeof(*ev.buf)); for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { - assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL); + assert(ast.kinds[i] <= _AST_DECLS_END); if (ev.len == ev.cap) { ev.cap *= 2; ev.buf = bufalloc(ev.buf, ev.cap, sizeof(*ev.buf)); @@ -142,7 +104,7 @@ typechkast(struct evstack evs, struct type *types, struct ast ast, struct typechkctx ctx = {0}; for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { - assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL); + assert(ast.kinds[i] <= _AST_DECLS_END); typechkdecl(ctx, evs, types, ast, toks, i); } diff --git a/src/codegen.c b/src/codegen.c index 4253bd4..504291a 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -1,4 +1,5 @@ #include +#include #include #include @@ -12,67 +13,120 @@ #include "parser.h" #include "types.h" -static size_t codegenexpr(LLVMBuilderRef, struct type *, struct ast, +struct cgctx { + LLVMModuleRef mod; + LLVMBuilderRef bob; + struct strview namespace; +}; + +static size_t codegendecl(struct cgctx, struct type *, struct ast, + struct lexemes, size_t) + __attribute__((nonnull)); +static size_t codegenexpr(struct cgctx, struct type *, struct ast, struct lexemes, size_t, LLVMValueRef *) __attribute__((nonnull)); static LLVMTypeRef type2llvm(struct type); +/* TODO: Don’t do this? */ +#define lengthof(xs) (sizeof(xs) / sizeof(*(xs))) +static struct { + struct strview key; + LLVMValueRef val; +} constants[1024]; +static size_t constcnt; + void codegen(const char *file, struct type *types, struct ast ast, struct lexemes toks) { - LLVMModuleRef mod = LLVMModuleCreateWithName("oryx"); - LLVMSetSourceFileName(mod, file, strlen(file)); - LLVMBuilderRef bob = LLVMCreateBuilder(); + struct cgctx ctx = {0}; + ctx.mod = LLVMModuleCreateWithName("oryx"); + ctx.bob = LLVMCreateBuilder(); + LLVMSetSourceFileName(ctx.mod, file, strlen(file)); for (size_t i = 0; i < ast.len;) { // LLVMValueRef val; assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL); - // if (ast.kids[i].rhs != AST_EMPTY && types[ast.kids[i].rhs] == - // TYPE_FN) codegenfn(builder, types, ast, toks, i, &val); else - // codegenexpr(builder, types, ast, toks, i, &val); + i = codegendecl(ctx, types, ast, toks, i); /* TODO: Temporary allocator */ - struct strview sv = toks.strs[ast.lexemes[i]]; - char *name = bufalloc(NULL, sv.len + 1, 1); - ((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0; - - LLVMValueRef globl, init; - LLVMTypeRef vartype = type2llvm(types[i]); + // struct strview sv = toks.strs[ast.lexemes[i]]; + // char *name = bufalloc(NULL, sv.len + 1, 1); + // ((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0; + // + // LLVMValueRef globl, init; + // LLVMTypeRef vartype = type2llvm(types[i]); + // + // globl = LLVMAddGlobal(mod, vartype, name); + // LLVMSetGlobalConstant(globl, ast.kinds[i] == ASTCDECL); + // + // if (ast.kids[i].rhs != AST_EMPTY) { + // i = codegenexpr(bob, types, ast, toks, ast.kids[i].rhs, &init); + // init = LLVMConstTrunc(init, vartype); + // } else { + // init = LLVMConstNull(vartype); + // i = fwdnode(ast, i); + // } + // + // LLVMSetInitializer(globl, init); + // LLVMSetLinkage(globl, LLVMPrivateLinkage); + // + // free(name); + } - globl = LLVMAddGlobal(mod, vartype, name); - LLVMSetGlobalConstant(globl, ast.kinds[i] == ASTCDECL); + LLVMDisposeBuilder(ctx.bob); - if (ast.kids[i].rhs != AST_EMPTY) { - i = codegenexpr(bob, types, ast, toks, ast.kids[i].rhs, &init); - init = LLVMConstTrunc(init, vartype); - } else { - init = LLVMConstNull(vartype); - i += 2; - } + char *error = NULL; + LLVMVerifyModule(ctx.mod, LLVMAbortProcessAction, &error); + LLVMDisposeMessage(error); - LLVMSetInitializer(globl, init); - LLVMSetLinkage(globl, LLVMPrivateLinkage); + LLVMDumpModule(ctx.mod); + LLVMDisposeModule(ctx.mod); +} - free(name); +size_t +codegendecl(struct cgctx ctx, struct type *types, struct ast ast, + struct lexemes toks, size_t i) +{ + struct strview ident = toks.strs[ast.lexemes[i]]; + + char *name; + if (ctx.namespace.len != 0) { + size_t namelen = ident.len + ctx.namespace.len + 1; + name = bufalloc(NULL, namelen + 1, 1); + sprintf(name, "%.*s.%.*s", (int)ctx.namespace.len, ctx.namespace.p, + (int)ident.len, ident.p); + } else { + name = bufalloc(NULL, ident.len + 1, 1); + memcpy(name, ident.p, ident.len); + name[ident.len] = 0; } - LLVMDisposeBuilder(bob); + LLVMValueRef val; + LLVMTypeRef vartype = type2llvm(types[i]); - char *error = NULL; - LLVMVerifyModule(mod, LLVMAbortProcessAction, &error); - LLVMDisposeMessage(error); + if (ast.kids[i].rhs != AST_EMPTY) { + i = codegenexpr(ctx, types, ast, toks, ast.kids[i].rhs, &val); + val = LLVMConstTrunc(val, vartype); + } else { + i = fwdnode(ast, i); + val = LLVMConstNull(vartype); + } + + LLVMValueRef globl = LLVMAddGlobal(ctx.mod, vartype, name); + LLVMSetInitializer(globl, val); + LLVMSetLinkage(globl, LLVMLinkerPrivateLinkage); - LLVMDumpModule(mod); - LLVMDisposeModule(mod); + free(name); + return i; } size_t -codegenexpr(LLVMBuilderRef builder, struct type *types, struct ast ast, +codegenexpr(struct cgctx ctx, struct type *types, struct ast ast, struct lexemes toks, size_t i, LLVMValueRef *v) { - (void)builder; + (void)ctx; switch (ast.kinds[i]) { case ASTNUMLIT: { /* TODO: Arbitrary precision? */ diff --git a/src/main.c b/src/main.c index 2cfc1d1..a61b860 100644 --- a/src/main.c +++ b/src/main.c @@ -30,7 +30,7 @@ main(int argc, char **argv) struct lexemes toks = lexstring(src, srclen); struct ast ast = parsetoks(toks); struct type *types = analyzeast(ast, toks); - codegen(argv[1], types, ast, toks); + // codegen(argv[1], types, ast, toks); #if DEBUG free(types); diff --git a/src/parser.c b/src/parser.c index d10a82f..44888ae 100644 --- a/src/parser.c +++ b/src/parser.c @@ -18,22 +18,65 @@ #endif #define SIZE_WDTH (sizeof(size_t) * CHAR_BIT) -typedef idx_t_ parsefn(struct ast *, struct lexemes) __attribute__((nonnull)); -static parsefn parseblk, parsedecl, parseexpr, parseproto, parsestmt, parsetype; +typedef idx_t_ parsefn(struct ast *, struct lexemes) + __attribute__((nonnull)); +static parsefn parseblk, parseexpr, parsefunc, parseproto, parsestmt, parsetype; +static idx_t_ parsedecl(struct ast *, struct lexemes, bool) + __attribute__((nonnull)); static struct ast mkast(void); -static idx_t_ astalloc(struct ast *) __attribute__((nonnull)); -static void astresz(struct ast *) __attribute__((nonnull)); +static idx_t_ astalloc(struct ast *) + __attribute__((nonnull)); +static void astresz(struct ast *) + __attribute__((nonnull)); static size_t toksidx; +idx_t_ +fwdnode(struct ast ast, idx_t_ i) +{ + while (likely(i < ast.len)) { + switch (ast.kinds[i]) { + case ASTBLK: + i = ast.kids[i].lhs == AST_EMPTY ? i + 1 : ast.kids[i].rhs; + break; + case ASTDECL: + case ASTPDECL: + i = ast.kids[i].rhs == AST_EMPTY ? ast.kids[i].lhs + : ast.kids[i].rhs; + break; + case ASTRET: + if (ast.kids[i].rhs == AST_EMPTY) + return i + 1; + i = ast.kids[i].rhs; + break; + case ASTBINADD: + case ASTBINSUB: + case ASTCDECL: + case ASTPCDECL: + case ASTFN: + i = ast.kids[i].rhs; + break; + case ASTIDENT: + case ASTNUMLIT: + case ASTTYPE: + return i + 1; + case ASTFNPROTO: + assert("analyzer: Not reachable"); + __builtin_unreachable(); + } + } + + return i; +} + struct ast parsetoks(struct lexemes toks) { struct ast ast = mkast(); for (;;) { - (void)parsedecl(&ast, toks); + (void)parsedecl(&ast, toks, true); if (toks.kinds[toksidx] == LEXEOF) break; } @@ -68,11 +111,20 @@ parseblk(struct ast *ast, struct lexemes toks) } idx_t_ -parsedecl(struct ast *ast, struct lexemes toks) +parsedecl(struct ast *ast, struct lexemes toks, bool toplvl) { idx_t_ i = astalloc(ast); ast->lexemes[i] = toksidx; + bool pub; + if (toplvl && toks.kinds[toksidx] == LEXIDENT && toks.strs[toksidx].len == 3 + && memcmp("pub", toks.strs[toksidx].p, 3) == 0) + { + pub = true; + toksidx++; + } else + pub = false; + if (toks.kinds[toksidx++] != LEXIDENT) err("parser: Expected identifier"); if (toks.kinds[toksidx++] != LEXCOLON) @@ -86,27 +138,48 @@ parsedecl(struct ast *ast, struct lexemes toks) case LEXSEMI: if (ast->kids[i].lhs == AST_EMPTY) err("parser: No type provided in non-assigning declaration"); - ast->kinds[i] = ASTDECL; + ast->kinds[i] = ASTDECL + pub; ast->kids[i].rhs = AST_EMPTY; return i; case LEXCOLON: - ast->kinds[i] = ASTCDECL; + ast->kinds[i] = ASTCDECL + pub; break; case LEXEQ: - ast->kinds[i] = ASTDECL; + ast->kinds[i] = ASTDECL + pub; break; default: - err("parser: Expected semicolon or equals"); + err("parser: Expected colon, equals, or semicolon"); } - idx_t_ rhs = parseexpr(ast, toks); + bool func = toks.kinds[toksidx] == LEXLPAR; + if (func && ast->kinds[i] - pub == ASTDECL) + err("Cannot assign function to mutable variable"); + + idx_t_ rhs = (func ? parsefunc : parseexpr)(ast, toks); ast->kids[i].rhs = rhs; - if (toks.kinds[toksidx++] != LEXSEMI) + if (!func && toks.kinds[toksidx++] != LEXSEMI) err("parser: Expected semicolon"); return i; } +idx_t_ +parsefunc(struct ast *ast, struct lexemes toks) +{ + idx_t_ i = astalloc(ast); + ast->lexemes[i] = toksidx; + + assert(toks.kinds[toksidx] == LEXLPAR); + + ast->kinds[i] = ASTFN; + idx_t_ lhs = parseproto(ast, toks); + idx_t_ rhs = parseblk(ast, toks); + ast->kids[i].lhs = lhs; + ast->kids[i].rhs = rhs; + + return i; +} + idx_t_ parseexpr(struct ast *ast, struct lexemes toks) { @@ -122,13 +195,6 @@ parseexpr(struct ast *ast, struct lexemes toks) toksidx++; ast->kinds[i] = ASTIDENT; break; - case LEXLPAR: - ast->kinds[i] = ASTFN; - idx_t_ lhs = parseproto(ast, toks); - idx_t_ rhs = parseblk(ast, toks); - ast->kids[i].lhs = lhs; - ast->kids[i].rhs = rhs; - break; default: err("parser: Expected expression"); } @@ -164,7 +230,7 @@ parsestmt(struct ast *ast, struct lexemes toks) err("parser: Expected identifier"); struct strview sv = toks.strs[toksidx]; - if (strncmp("return", sv.p, sv.len) == 0) { + if (sv.len == 6 && memcmp(sv.p, "return", 6) == 0) { i = astalloc(ast); ast->lexemes[i] = toksidx++; ast->kinds[i] = ASTRET; @@ -175,9 +241,9 @@ parsestmt(struct ast *ast, struct lexemes toks) if (toks.kinds[toksidx++] != LEXSEMI) err("parser: Expected semicolon"); } else if (toks.kinds[toksidx + 1] == LEXCOLON) - i = parsedecl(ast, toks); + i = parsedecl(ast, toks, false); else - i = parseexpr(ast, toks); + err("parser: Invalid statement"); return i; } diff --git a/src/parser.h b/src/parser.h index fceedc0..6392c24 100644 --- a/src/parser.h +++ b/src/parser.h @@ -14,10 +14,20 @@ enum { ‘x: lhs = rhs’ */ ASTDECL, + /* Public variable declaration, lhs and rhs may be unused + ‘pub x: lhs = rhs’ */ + ASTPDECL, + /* Constant declaration, lhs may be unused ‘x: lhs : rhs’ */ ASTCDECL, + /* Public constant declaration, lhs may be unused + ‘pub x: lhs : rhs’ */ + ASTPCDECL, + + _AST_DECLS_END = ASTPCDECL, + /* Function prototype ‘(a: b, c: d) rhs’; aux[lhs].fnproto */ ASTFNPROTO, @@ -75,4 +85,8 @@ struct ast { /* Parse the tokens in TOKS into an abstract syntax tree */ struct ast parsetoks(struct lexemes toks); +/* Starting from the node at indent I in AST, return the index of the next node + in AST that is of the same nest-depth as I */ +idx_t_ fwdnode(struct ast ast, idx_t_ i); + #endif /* !ORYX_PARSER_H */ -- cgit v1.2.3