diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-06-11 07:46:51 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-06-11 07:46:51 +0200 |
commit | 744ec3c18d6e312d1e2cc281915d251fca49ae55 (patch) | |
tree | 6c16a41a6f8da70da34753c8a7892c55813f5c28 | |
parent | d40fe3a6503586eb2ce93311cc4bf23febfbeb83 (diff) |
Add basic LLVM codegen
-rw-r--r-- | src/alloc.c | 18 | ||||
-rw-r--r-- | src/alloc.h | 11 | ||||
-rw-r--r-- | src/codegen.c | 111 | ||||
-rw-r--r-- | src/lexer.c | 11 | ||||
-rw-r--r-- | src/lexer.h | 1 | ||||
-rw-r--r-- | src/main.c | 5 | ||||
-rw-r--r-- | src/parser.c | 243 | ||||
-rw-r--r-- | src/parser.h | 58 |
8 files changed, 306 insertions, 152 deletions
diff --git a/src/alloc.c b/src/alloc.c new file mode 100644 index 0000000..35b45c0 --- /dev/null +++ b/src/alloc.c @@ -0,0 +1,18 @@ +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> + +#include "alloc.h" +#include "errors.h" + +void * +bufalloc(void *ptr, size_t nmemb, size_t size) +{ + if (size > SIZE_MAX / nmemb) { + errno = EOVERFLOW; + err("%s:", __func__); + } + if ((ptr = realloc(ptr, nmemb * size)) == NULL) + err("%s:", __func__); + return ptr; +} diff --git a/src/alloc.h b/src/alloc.h new file mode 100644 index 0000000..8e8b365 --- /dev/null +++ b/src/alloc.h @@ -0,0 +1,11 @@ +#ifndef ORYX_ALLOC_H +#define ORYX_ALLOC_H + +#include <stddef.h> + +/* Allocate a buffer of NMEMB elements of size SIZE. If PTR is non-null then + reallocate the buffer it points to. Aborts on out-of-memory or overflow. */ +void *bufalloc(void *ptr, size_t nmemb, size_t size) + __attribute__((returns_nonnull)); + +#endif /* !ORYX_ALLOC_H */ diff --git a/src/codegen.c b/src/codegen.c index a9e5006..4f10e61 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -1,38 +1,113 @@ +#include <assert.h> +#include <stdlib.h> +#include <string.h> + #include <llvm-c/Analysis.h> #include <llvm-c/Core.h> +#include "alloc.h" #include "codegen.h" +#include "errors.h" + +#define AST_EMPTY ((size_t)-1) + +static size_t codegenstmt(LLVMBuilderRef, struct ast_soa, struct lexemes_soa, + size_t); +static size_t codegenexpr(LLVMBuilderRef, struct ast_soa, struct lexemes_soa, + size_t, LLVMValueRef *) + __attribute__((nonnull)); void codegen(struct ast_soa ast, struct lexemes_soa toks) { - (void)ast; - (void)toks; - - /* Create a new module */ LLVMModuleRef mod = LLVMModuleCreateWithName("oryx"); - /* Declare the function ‘sum :: (int, int) int’ */ - LLVMTypeRef param_types[] = {LLVMInt32Type(), LLVMInt32Type()}; - LLVMTypeRef ret_type = LLVMFunctionType(LLVMInt32Type(), param_types, 2, 0); - LLVMValueRef sum = LLVMAddFunction(mod, "sum", ret_type); + for (size_t i = 0; i < ast.len;) { + if (ast.kinds[i] != ASTCDECL) + err("codegen: Expected constant declaration"); + + size_t expr = ast.kids[i].rhs; + if (ast.kinds[expr] != ASTFN) + assert(!"not implemented"); + + size_t proto = ast.kids[expr].lhs, + body = ast.kids[expr].rhs; + + LLVMTypeRef ret; + LLVMTypeRef params[] = {0}; + + if (ast.kids[proto].rhs == AST_EMPTY) + ret = LLVMVoidType(); + else { + size_t type = ast.kids[proto].rhs; + struct strview sv = toks.strs[ast.lexemes[type]]; + + /* TODO: Make int 32bit on 32bit platforms */ + if (strncmp("int", sv.p, sv.len) == 0) + ret = LLVMInt64Type(); + else + err("codegen: Unknown type: %.*s", (int)sv.len, sv.p); + + } + + LLVMTypeRef fnproto = LLVMFunctionType(ret, params, 0, false); - /* Create an entry block, and a builder */ - LLVMBasicBlockRef entry = LLVMAppendBasicBlock(sum, "entry"); - LLVMBuilderRef builder = LLVMCreateBuilder(); - LLVMPositionBuilderAtEnd(builder, entry); + struct strview sv = toks.strs[ast.lexemes[i]]; + char *fnname = bufalloc(NULL, sv.len + 1, 1); + ((char *)memcpy(fnname, sv.p, sv.len))[sv.len] = 0; - /* Generate an ADD instruction */ - LLVMValueRef tmp = LLVMBuildAdd(builder, LLVMGetParam(sum, 0), - LLVMGetParam(sum, 1), "tmpadd"); - LLVMBuildRet(builder, tmp); + LLVMValueRef fn = LLVMAddFunction(mod, fnname, fnproto); + LLVMBasicBlockRef entry = LLVMAppendBasicBlock(fn, "entry"); + LLVMBuilderRef builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, entry); + + free(fnname); + + for (i = ast.kids[body].lhs; i <= ast.kids[body].rhs;) + i = codegenstmt(builder, ast, toks, i); + + LLVMDisposeBuilder(builder); + } - /* Verify that all went well and if not we abort */ char *error = NULL; LLVMVerifyModule(mod, LLVMAbortProcessAction, &error); LLVMDisposeMessage(error); LLVMDumpModule(mod); +} + +size_t +codegenstmt(LLVMBuilderRef builder, struct ast_soa ast, struct lexemes_soa toks, + size_t i) +{ + switch (ast.kinds[i]) { + case ASTRET: + if (ast.kids[i].rhs == AST_EMPTY) { + LLVMBuildRetVoid(builder); + return i + 1; + } + LLVMValueRef v; + i = codegenexpr(builder, ast, toks, ast.kids[i].rhs, &v); + LLVMBuildRet(builder, v); + return i; + } + + assert(!"unreachable"); +} + +size_t +codegenexpr(LLVMBuilderRef builder, struct ast_soa ast, struct lexemes_soa toks, + size_t i, LLVMValueRef *v) +{ + (void)builder; + switch (ast.kinds[i]) { + case ASTNUMLIT: { + /* TODO: Arbitrary precision? */ + struct strview sv = toks.strs[ast.lexemes[i]]; + *v = LLVMConstIntOfStringAndSize(LLVMInt64Type(), sv.p, sv.len, 10); + return i + 1; + } + } - LLVMDisposeBuilder(builder); + assert(!"unreachable"); } diff --git a/src/lexer.c b/src/lexer.c index 0aa318b..67ec212 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -8,6 +8,7 @@ #include <stdlib.h> #include <string.h> +#include "alloc.h" #include "errors.h" #include "lexer.h" #include "unicode.h" @@ -120,6 +121,9 @@ lexstring(const uchar *code, size_t codesz) lexemes_soa_resz(&data); } + if (unlikely(data.len == data.cap)) + lexemes_soa_resz(&data); + data.kinds[data.len++] = LEXEOF; return data; } @@ -163,9 +167,7 @@ mk_lexemes_soa(void) soa.len = 0; soa.cap = LEXEMES_DFLT_CAP; - - if ((soa.kinds = malloc(soa.cap * LEXEMES_SOA_BLKSZ)) == NULL) - err("malloc:"); + soa.kinds = bufalloc(NULL, soa.cap, LEXEMES_SOA_BLKSZ); soa.strs = (void *)((char *)soa.kinds + soa.cap * sizeof(*soa.kinds)); return soa; @@ -197,8 +199,7 @@ lexemes_soa_resz(struct lexemes_soa *soa) newsz = ncap * LEXEMES_SOA_BLKSZ + pad; - if ((soa->kinds = realloc(soa->kinds, newsz)) == NULL) - err("realloc:"); + soa->kinds = bufalloc(soa->kinds, newsz, 1); soa->strs = (void *)((char *)soa->kinds + ncap * sizeof(*soa->kinds) + pad); memmove(soa->strs, (char *)soa->kinds + off, soa->len * sizeof(*soa->strs)); soa->cap = ncap; diff --git a/src/lexer.h b/src/lexer.h index 33c5078..0deaa73 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -7,6 +7,7 @@ #include "types.h" enum { + LEXEOF, /* End of token stream */ LEXIDENT, /* Identifier */ LEXNUM, /* Numeric constant */ @@ -5,6 +5,7 @@ #include <stdlib.h> #include <unistd.h> +#include "alloc.h" #include "codegen.h" #include "errors.h" #include "lexer.h" @@ -46,9 +47,7 @@ readfile(const char *filename, size_t *n) if (fstat(fd, &sb) == -1) err("fstat: %s", filename); - char *p = malloc(sb.st_size + 4); - if (p == NULL) - err("malloc:"); + char *p = bufalloc(NULL, sb.st_size + 4, 1); ssize_t nr; for (size_t off = 0; (nr = read(fd, p + off, sb.st_blksize)) > 0; off += nr) diff --git a/src/parser.c b/src/parser.c index 4a902b9..3ba9c79 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,25 +6,27 @@ #include <stdlib.h> #include <string.h> +#include "alloc.h" #include "errors.h" #include "parser.h" /* #define AST_DFLT_CAP (2048) */ #define AST_DFLT_CAP (8) #define AST_EMPTY ((size_t)-1) -#define AUX_DFLT_CAP (128) #define SIZE_WDTH (sizeof(size_t) * CHAR_BIT) -static size_t parsedecl(struct ast_soa *, struct lexemes_soa); -static size_t parseexpr(struct ast_soa *, struct lexemes_soa); -static size_t parsetype(struct ast_soa *, struct lexemes_soa); +typedef size_t parsefn(struct ast_soa *, struct lexemes_soa); +static parsefn parseblk, + parsedecl, + parseexpr, + parseproto, + parsestmt, + parsetype; static struct ast_soa mk_ast_soa(void); static void ast_soa_resz(struct ast_soa *); -static size_t aux_push(struct auxilliary *, union extra); -static size_t ast_soa_push(struct ast_soa *, ast_kind, size_t, size_t, size_t, - size_t); +static size_t ast_alloc(struct ast_soa *); static size_t toksidx; @@ -33,85 +35,155 @@ parsetoks(struct lexemes_soa toks) { struct ast_soa ast = mk_ast_soa(); - while (toksidx < toks.len) + for (;;) { parsedecl(&ast, toks); + if (toks.kinds[toksidx] == LEXEOF) + break; + } return ast; } size_t -parsedecl(struct ast_soa *ast, struct lexemes_soa toks) +parseblk(struct ast_soa *ast, struct lexemes_soa toks) { - bool constdecl; - size_t lexeme, lhs, rhs; + size_t i = ast_alloc(ast); + ast->lexemes[i] = toksidx; + ast->kinds[i] = ASTBLK; + ast->kids[i].lhs = ast->kids[i].rhs = AST_EMPTY; + + if (toks.kinds[toksidx++] != LEXLBRACE) + err("parser: Expected left brace"); + + while (toks.kinds[toksidx] != LEXRBRACE) { + ast->kids[i].rhs = parsestmt(ast, toks); + if (ast->kids[i].lhs == AST_EMPTY) + ast->kids[i].lhs = ast->kids[i].rhs; + } + + toksidx++; /* Eat rbrace */ + return i; +} - lexeme = toksidx; +size_t +parsedecl(struct ast_soa *ast, struct lexemes_soa toks) +{ + size_t i = ast_alloc(ast); + ast->lexemes[i] = toksidx; if (toks.kinds[toksidx++] != LEXIDENT) - err("Expected identifier"); + err("parser: Expected identifier"); if (toks.kinds[toksidx++] != LEXCOLON) - err("Expected colon"); - - switch (toks.kinds[toksidx]) { + err("parser: Expected colon"); + + ast->kids[i].lhs = toks.kinds[toksidx] == LEXIDENT + ? parsetype(ast, toks) + : AST_EMPTY; + + switch (toks.kinds[toksidx++]) { + case LEXSEMI: + if (ast->kids[i].lhs == AST_EMPTY) + err("parser: No type provided in non-assigning declaration"); + ast->kinds[i] = ASTDECL; + ast->kids[i].rhs = AST_EMPTY; + return i; case LEXCOLON: + ast->kinds[i] = ASTCDECL; + break; case LEXEQ: - constdecl = toks.kinds[toksidx++] == LEXCOLON; - lhs = AST_EMPTY; - rhs = parseexpr(ast, toks); + ast->kinds[i] = ASTDECL; break; default: - lhs = parsetype(ast, toks); - if (toks.kinds[toksidx] == LEXCOLON || toks.kinds[toksidx] == LEXEQ) { - constdecl = toks.kinds[toksidx++] == LEXEQ; - rhs = parseexpr(ast, toks); - } else { - constdecl = false; - rhs = AST_EMPTY; - } - break; + err("parser: Expected semicolon or equals"); } + ast->kids[i].rhs = parseexpr(ast, toks); if (toks.kinds[toksidx++] != LEXSEMI) - err("Expected semicolon"); + err("parser: Expected semicolon"); - size_t extra = aux_push(&ast->aux, (union extra){.constdecl = constdecl}); - return ast_soa_push(ast, PRSDECL, lexeme, lhs, rhs, extra); + return i; } size_t parseexpr(struct ast_soa *ast, struct lexemes_soa toks) { - ast_kind kind; - size_t lexeme, lhs, rhs, extra; - - lexeme = lhs = rhs = extra = AST_EMPTY; + size_t i = ast_alloc(ast); + ast->lexemes[i] = toksidx; switch (toks.kinds[toksidx]) { case LEXNUM: - kind = PRSNUMERIC; - lexeme = toksidx++; + toksidx++; + ast->kinds[i] = ASTNUMLIT; + break; + case LEXLPAR: + ast->kinds[i] = ASTFN; + ast->kids[i].lhs = parseproto(ast, toks); + ast->kids[i].rhs = parseblk(ast, toks); break; default: - err("Expected expression"); + err("parser: Expected expression"); } - return ast_soa_push(ast, kind, lexeme, lhs, rhs, extra); + return i; +} + +size_t +parseproto(struct ast_soa *ast, struct lexemes_soa toks) +{ + size_t i = ast_alloc(ast); + ast->lexemes[i] = toksidx; + ast->kinds[i] = ASTFNPROTO; + ast->kids[i].lhs = AST_EMPTY; + + if (toks.kinds[toksidx++] != LEXLPAR) + err("parser: Expected left parenthesis"); + if (toks.kinds[toksidx++] != LEXRPAR) + err("parser: Expected right parenthesis"); + + ast->kids[i].rhs = toks.kinds[toksidx] == LEXIDENT + ? parsetype(ast, toks) + : AST_EMPTY; + return i; +} + +size_t +parsestmt(struct ast_soa *ast, struct lexemes_soa toks) +{ + size_t i; + + if (toks.kinds[toksidx] != LEXIDENT) + err("parser: Expected identifier"); + + struct strview sv = toks.strs[toksidx]; + if (strncmp("return", sv.p, sv.len) == 0) { + i = ast_alloc(ast); + ast->lexemes[i] = toksidx++; + ast->kinds[i] = ASTRET; + if (toks.kinds[toksidx] != LEXSEMI) + ast->kids[i].rhs = parseexpr(ast, toks); + else + ast->kids[i].rhs = AST_EMPTY; + if (toks.kinds[toksidx++] != LEXSEMI) + err("parser: Expected semicolon"); + } else if (toks.kinds[toksidx + 1] == LEXCOLON) + i = parsedecl(ast, toks); + else + i = parseexpr(ast, toks); + + return i; } size_t parsetype(struct ast_soa *ast, struct lexemes_soa toks) { - size_t lexeme; + size_t i = ast_alloc(ast); + ast->kinds[i] = ASTTYPE; + ast->lexemes[i] = toksidx; - switch (toks.kinds[toksidx]) { - case LEXIDENT: - lexeme = toksidx++; - break; - default: - err("Expected type"); - } + if (toks.kinds[toksidx++] != LEXIDENT) + err("parser: Expected type"); - return ast_soa_push(ast, PRSTYPE, lexeme, AST_EMPTY, AST_EMPTY, AST_EMPTY); + return i; } struct ast_soa @@ -126,26 +198,13 @@ mk_ast_soa(void) % alignof(*soa.kids) == 0, "Additional padding is required to properly align KIDS"); - static_assert(AST_DFLT_CAP - * (sizeof(*soa.kinds) + sizeof(*soa.lexemes) - + sizeof(*soa.kids)) - % alignof(*soa.extra) - == 0, - "Additional padding is required to properly align EXTRA"); soa.len = 0; soa.cap = AST_DFLT_CAP; - soa.aux.len = 0; - soa.aux.cap = AUX_DFLT_CAP; - if ((soa.kinds = malloc(soa.cap * AST_SOA_BLKSZ)) == NULL) - err("malloc:"); + soa.kinds = bufalloc(NULL, soa.cap, AST_SOA_BLKSZ); soa.lexemes = (void *)((char *)soa.kinds + soa.cap * sizeof(*soa.kinds)); soa.kids = (void *)((char *)soa.lexemes + soa.cap * sizeof(*soa.lexemes)); - soa.extra = (void *)((char *)soa.kids + soa.cap * sizeof(*soa.kids)); - - if ((soa.aux.buf = malloc(soa.aux.cap * sizeof(*soa.aux.buf))) == NULL) - err("malloc:"); return soa; } @@ -153,12 +212,11 @@ mk_ast_soa(void) void ast_soa_resz(struct ast_soa *soa) { - size_t ncap, pad1, pad2, pad3, newsz; - ptrdiff_t lexemes_off, kids_off, extra_off; + size_t ncap, pad1, pad2, newsz; + ptrdiff_t lexemes_off, kids_off; lexemes_off = (char *)soa->lexemes - (char *)soa->kinds; kids_off = (char *)soa->kids - (char *)soa->kinds; - extra_off = (char *)soa->extra - (char *)soa->kinds; /* The capacity is always going to be a power of 2, so checking for overflow becomes pretty trivial */ @@ -181,28 +239,13 @@ ast_soa_resz(struct ast_soa *soa) if (pad2 != alignof(*soa->kids)) pad2 = 0; - /* Ensure that soa->extra is properly aligned */ - pad3 = alignof(*soa->extra) - - (ncap - * (sizeof(*soa->kinds) + sizeof(*soa->lexemes) - + sizeof(*soa->kids)) - + pad1 + pad2) - % alignof(*soa->extra); - if (pad3 != alignof(*soa->extra)) - pad3 = 0; - - newsz = ncap * AST_SOA_BLKSZ + pad1 + pad2 + pad3; - if ((soa->kinds = realloc(soa->kinds, newsz)) == NULL) - err("realloc:"); - + newsz = ncap * AST_SOA_BLKSZ + pad1 + pad2; + soa->kinds = bufalloc(soa->kinds, newsz, 1); soa->lexemes = (void *)((char *)soa->kinds + ncap * sizeof(*soa->kinds) + pad1); soa->kids = (void *)((char *)soa->lexemes + ncap * sizeof(*soa->lexemes) + pad2); - soa->extra = (void *)((char *)soa->kids + ncap * sizeof(*soa->kids) + pad3); - memmove(soa->extra, (char *)soa->kinds + extra_off, - soa->len * sizeof(*soa->extra)); memmove(soa->kids, (char *)soa->kinds + kids_off, soa->len * sizeof(*soa->kids)); memmove(soa->lexemes, (char *)soa->kinds + lexemes_off, @@ -212,29 +255,23 @@ ast_soa_resz(struct ast_soa *soa) } size_t -aux_push(struct auxilliary *aux, union extra e) -{ - if (aux->len == aux->cap) { - size_t ncap = aux->cap * 2; - if ((aux->buf = realloc(aux->buf, ncap)) == NULL) - err("realloc:"); - aux->cap = ncap; - } - aux->buf[aux->len] = e; - return aux->len++; -} - -size_t -ast_soa_push(struct ast_soa *soa, ast_kind kind, size_t lexeme, size_t lhs, - size_t rhs, size_t extra) +ast_alloc(struct ast_soa *soa) { if (soa->len == soa->cap) ast_soa_resz(soa); - - soa->kinds[soa->len] = kind; - soa->lexemes[soa->len] = lexeme; - soa->kids[soa->len].lhs = lhs; - soa->kids[soa->len].rhs = rhs; - soa->extra[soa->len] = extra; return soa->len++; } + +/* size_t */ +/* ast_soa_push(struct ast_soa *soa, ast_kind kind, size_t lexeme, size_t lhs, */ +/* size_t rhs) */ +/* { */ +/* if (soa->len == soa->cap) */ +/* ast_soa_resz(soa); */ +/**/ +/* soa->kinds[soa->len] = kind; */ +/* soa->lexemes[soa->len] = lexeme; */ +/* soa->kids[soa->len].lhs = lhs; */ +/* soa->kids[soa->len].rhs = rhs; */ +/* return soa->len++; */ +/* } */ diff --git a/src/parser.h b/src/parser.h index ba5aa58..62f557b 100644 --- a/src/parser.h +++ b/src/parser.h @@ -8,45 +8,57 @@ #include "lexer.h" enum { - PRSDECL, /* Declaration */ - PRSNUMERIC, /* Numeric constant */ - PRSTYPE, /* Type */ + /* Variable declaration, lhs and rhs may be unused + ‘x: lhs = rhs’ */ + ASTDECL, - PRSBINADD = '+', /* Addition */ + /* Constant declaration, lhs and rhs may be unused + ‘x: lhs : rhs’ */ + ASTCDECL, + + /* Function prototype + ‘(a: b, c: d) rhs’; aux[lhs].fnproto */ + ASTFNPROTO, + + /* Function, lhs is the prototype and rhs is the body block */ + ASTFN, + + /* Braced block, sublist[lhs…rhs] */ + ASTBLK, + + /* Numeric literal */ + ASTNUMLIT, + + /* Typename */ + ASTTYPE, + + /* Return statement, rhs may be unused + ‘return rhs’ */ + ASTRET, + + /* Binary add + ‘lhs + rhs’ */ + ASTBINADD = '+', + + /* Binary sub + ‘lhs - rhs’ */ + ASTBINSUB = '-', }; typedef uint8_t ast_kind; #define AST_SOA_BLKSZ (sizeof(ast_kind) + sizeof(size_t) * 4) -/* - * AST Notes: - * - * Declarations have .lhs set to the type of the symbol being declared and .rhs - * set to the value of the declaration if it is an assignment declaration. They - * also have aux.constdecl set to true if it’s a declaration of a constant. - */ - -struct auxilliary { - union extra { - bool constdecl; - } *buf; - size_t len, cap; -}; - struct ast_soa { ast_kind *kinds; size_t *lexemes; struct { size_t lhs, rhs; } *kids; - size_t *extra; size_t len, cap; - - struct auxilliary aux; }; -#define ast_free(x) do { free((x).kinds); free((x).aux.buf); } while (0) +#define ast_free(x) free((x).kinds) /* Parse the tokens in TOKS into an abstract syntax tree */ struct ast_soa parsetoks(struct lexemes_soa toks); |