diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:36:36 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-06-21 23:42:26 +0200 |
commit | a89a14ef5da44684a16b204e7a70460cc8c4922a (patch) | |
tree | b23b4c6b155977909ef508fdae2f48d33d802813 /src | |
parent | 1db63fcedab0b288820d66e100b1877b1a5a8851 (diff) |
Basic constant folding implementation
Diffstat (limited to 'src')
-rw-r--r-- | src/analyzer.c | 156 | ||||
-rw-r--r-- | src/analyzer.h | 4 | ||||
-rw-r--r-- | src/codegen.c | 81 | ||||
-rw-r--r-- | src/main.c | 6 |
4 files changed, 223 insertions, 24 deletions
diff --git a/src/analyzer.c b/src/analyzer.c index e842859..eaf0ab2 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -1,10 +1,13 @@ #include <assert.h> +#include <ctype.h> #include <stdbool.h> #include <stddef.h> #include <stdint.h> #include <stdlib.h> #include <string.h> +#include <gmp.h> + #include "alloc.h" #include "analyzer.h" #include "common.h" @@ -39,17 +42,26 @@ struct azctx { definition */ struct strview decl; + /* The index of the current scope in the scopes array */ + idx_t_ si; + /* If we need to check for return statements. Only true for the outer body-block of a function that returns a value. */ bool chkrets; +}; - /* The index of the current scope in the scopes array */ +struct cfctx { + arena *a; + struct strview decl; idx_t_ si; }; static void analyzeast(struct scope *, struct type *, struct ast, struct lexemes, arena *) __attribute__((nonnull)); +static void constfold(mpq_t *, struct scope *, struct type *, struct ast, + struct lexemes, arena *) + __attribute__((nonnull)); /* Perform a pass over the entire AST and return an array of symbol tables, one for each scope in the program */ @@ -87,13 +99,16 @@ const struct type *typelookup(const uchar *, size_t) void analyzeprog(struct ast ast, struct lexemes toks, arena *a, struct type **types, - struct scope **scps) + struct scope **scps, mpq_t **folds) { *types = bufalloc(NULL, ast.len, sizeof(**types)); memset(*types, 0, ast.len * sizeof(**types)); *scps = gensymtabs(ast, toks, a); analyzeast(*scps, *types, ast, toks, a); + + *folds = bufalloc(NULL, ast.len, sizeof(**folds)); + constfold(*folds, *scps, *types, ast, toks, a); } struct scope * @@ -322,6 +337,143 @@ analyzeblk(struct azctx ctx, struct scope *scps, struct type *types, return i; } +static idx_t_ +constfolddecl(struct cfctx ctx, mpq_t *folds, struct scope *scps, + struct type *types, struct ast ast, struct lexemes toks, idx_t_ i); +static idx_t_ +constfoldexpr(struct cfctx ctx, mpq_t *folds, struct scope *scps, + struct type *types, struct ast ast, struct lexemes toks, idx_t_ i); + +idx_t_ +constfoldstmt(struct cfctx ctx, mpq_t *folds, struct scope *scps, + struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) +{ + switch (ast.kinds[i]) { + case ASTDECL: + case ASTCDECL: + case ASTPCDECL: + case ASTPDECL: + return constfolddecl(ctx, folds, scps, types, ast, toks, i); + case ASTRET: + return constfoldexpr(ctx, folds, scps, types, ast, toks, + ast.kids[i].rhs); + default: + __builtin_unreachable(); + } +} + +idx_t_ +constfoldblk(struct cfctx ctx, mpq_t *folds, struct scope *scps, + struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) +{ + struct pair p = ast.kids[i]; + while (scps[ctx.si].i != p.lhs) + ctx.si++; + for (i = p.lhs; i <= p.rhs; + i = constfoldstmt(ctx, folds, scps, types, ast, toks, i)) + ; + + return i; +} + +idx_t_ +constfoldexpr(struct cfctx ctx, mpq_t *folds, struct scope *scps, + struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) +{ + /* Check if this expression has already been constant folded. This + works because when an mpq_t is initialized via mpq_init(), it is + set to 0/1 meaning that the denominator pointer can’t be NULL. */ + if ((*folds[i])._mp_den._mp_d != NULL) + return fwdnode(ast, i); + + switch (ast.kinds[i]) { + case ASTNUMLIT: { + mpq_init(folds[i]); + + /* TODO: Temporary allocator */ + struct strview sv = toks.strs[ast.lexemes[i]]; + char *buf = bufalloc(NULL, sv.len + 1, 1); + size_t len = 0; + + for (size_t i = 0; i < sv.len; i++) { + if (isdigit(sv.p[i])) + buf[len++] = sv.p[i]; + } + buf[len] = 0; + + (void)mpq_set_str(folds[i], buf, 10); + + free(buf); + return fwdnode(ast, i); + } + case ASTIDENT: { + struct strview sv = toks.strs[ast.lexemes[i]]; + + /* Variable shadowing */ + if (strview_eq(sv, ctx.decl) && ctx.si > 0) + ctx.si--; + + for (idx_t_ lvl = ctx.si;;) { + struct scope scp = scps[lvl]; + idx_t_ *ip = symtab_insert(&scp.map, sv, NULL); + + if (ip == NULL) { + assert(lvl != 0); + lvl = scp.up; + } else { + switch (ast.kinds[*ip]) { + case ASTDECL: + case ASTPDECL: + break; + case ASTCDECL: + case ASTPCDECL: { + *folds[i] = *folds[*ip]; + if ((*folds[i])._mp_den._mp_d == NULL) { + ctx.si = lvl; + (void)constfolddecl(ctx, folds, scps, types, ast, toks, + *ip); + *folds[i] = *folds[*ip]; + assert((*folds[i])._mp_den._mp_d != NULL); + } + break; + } + default: + __builtin_unreachable(); + } + + return fwdnode(ast, i); + } + } + } + case ASTFN: + return constfoldblk(ctx, folds, scps, types, ast, toks, + ast.kids[i].rhs); + default: + __builtin_unreachable(); + } +} + +idx_t_ +constfolddecl(struct cfctx ctx, mpq_t *folds, struct scope *scps, + struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) +{ + if (ast.kids[i].rhs == AST_EMPTY) + return fwdnode(ast, i); + ctx.decl = toks.strs[ast.lexemes[i]]; + return constfoldexpr(ctx, folds, scps, types, ast, toks, ast.kids[i].rhs); +} + +void +constfold(mpq_t *folds, struct scope *scps, struct type *types, struct ast ast, + struct lexemes toks, arena *a) +{ + struct cfctx ctx = {.a = a}; + for (idx_t_ i = 0; likely(i < ast.len);) { + assert(ast.kinds[i] <= _AST_DECLS_END); + i = constfolddecl(ctx, folds, scps, types, ast, toks, i); + } +} + bool returns(struct ast ast, idx_t_ i) { diff --git a/src/analyzer.h b/src/analyzer.h index a1c55f5..ec6089c 100644 --- a/src/analyzer.h +++ b/src/analyzer.h @@ -3,6 +3,8 @@ #include <stdint.h> +#include <gmp.h> + #include "alloc.h" #include "lexer.h" #include "parser.h" @@ -55,7 +57,7 @@ struct type { }; void analyzeprog(struct ast, struct lexemes, arena *, struct type **, - struct scope **) + struct scope **, mpq_t **) __attribute__((nonnull)); #endif /* !ORYX_ANALYZER_H */ diff --git a/src/codegen.c b/src/codegen.c index f835f5c..48c43ce 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -1,4 +1,6 @@ +#include <ctype.h> #include <stdint.h> +#include <stdlib.h> #include <string.h> #include <llvm-c/Analysis.h> @@ -23,12 +25,18 @@ struct cgctx { struct strview namespace; }; -static LLVMTypeRef type2llvm(struct cgctx, struct type); +// static LLVMTypeRef type2llvm(struct cgctx, struct type); +// static void str2val(mpq_t, struct strview); +// static struct val *cvmap_insert(cvmap **, struct strview, arena *) +// __attribute__((nonnull(1))); void codegen(const char *file, struct type *types, struct ast ast, struct lexemes toks) { + (void)types; + (void)ast; + (void)toks; char *triple = LLVMGetDefaultTargetTriple(); struct cgctx ctx; @@ -55,22 +63,55 @@ codegen(const char *file, struct type *types, struct ast ast, LLVMContextDispose(ctx.ctx); } -LLVMTypeRef -type2llvm(struct cgctx ctx, struct type t) -{ - switch (t.kind) { - case TYPE_FN: - err("codegen: %s: Not implemented for function types", __func__); - case TYPE_NUM: - /* TODO: Floats */ - if (t.isfloat) - err("codegen: %s: Not implemented for floats", __func__); - /* TODO: Arbitrary precision */ - if (t.size == 0) - return LLVMInt64TypeInContext(ctx.ctx); - assert((unsigned)t.size * 8 <= UINT8_MAX); - return LLVMIntTypeInContext(ctx.ctx, t.size * 8); - default: - __builtin_unreachable(); - } -} +// LLVMTypeRef +// type2llvm(struct cgctx ctx, struct type t) +// { +// switch (t.kind) { +// case TYPE_FN: +// err("codegen: %s: Not implemented for function types", __func__); +// case TYPE_NUM: +// /* TODO: Floats */ +// if (t.isfloat) +// err("codegen: %s: Not implemented for floats", __func__); +// /* TODO: Arbitrary precision */ +// if (t.size == 0) +// return LLVMInt64TypeInContext(ctx.ctx); +// assert((unsigned)t.size * 8 <= UINT8_MAX); +// return LLVMIntTypeInContext(ctx.ctx, t.size * 8); +// default: +// __builtin_unreachable(); +// } +// } +// +// void +// str2val(mpq_t rop, struct strview sv) +// { +// mpq_init(rop); +// char *clean = bufalloc(NULL, sv.len + 1, 1); +// size_t len = 0; +// +// for (size_t i = 0; i < sv.len; i++) { +// if (isdigit(sv.p[i])) +// clean[len++] = sv.p[i]; +// } +// clean[len] = 0; +// +// mpq_set_str(rop, clean, 10); +// +// free(clean); +// } +// +// struct val * +// cvmap_insert(cvmap **m, struct strview k, arena *a) +// { +// for (uint64_t h = strview_hash(k); *m; h <<= 2) { +// if (strview_eq(k, (*m)->key)) +// return &(*m)->val; +// m = &(*m)->child[h >> 62]; +// } +// if (a == NULL) +// return NULL; +// *m = arena_new(a, cvmap, 1); +// (*m)->key = k; +// return &(*m)->val; +// } @@ -5,6 +5,8 @@ #include <stdlib.h> #include <unistd.h> +#include <gmp.h> + #include "alloc.h" #include "analyzer.h" #include "codegen.h" @@ -28,15 +30,17 @@ main(int argc, char **argv) char *src = readfile(argv[1], &srclen); arena a = NULL; + mpq_t *folds; struct type *types; struct scope *scps; struct lexemes toks = lexstring(src, srclen); struct ast ast = parsetoks(toks); - analyzeprog(ast, toks, &a, &types, &scps); + analyzeprog(ast, toks, &a, &types, &scps, &folds); codegen(argv[1], scps, types, ast, toks); #if DEBUG + free(folds); free(scps); free(src); free(types); |