From 5e384049170dcb9e7e8aae5e98729bbc26661f90 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Fri, 21 Jun 2024 02:04:37 +0200 Subject: Huge changes to static analysis --- make.c | 2 + src/analyzer.c | 261 ++++++++++++++++++++++++++++++------------------- src/analyzer.h | 46 +++++++-- src/codegen.c | 269 ++++++++++++++++++++++++++++++--------------------- src/lexer.c | 1 + src/main.c | 10 +- src/parser.c | 14 +-- src/primitives.gperf | 30 +++--- src/strview.c | 23 +++++ src/strview.h | 39 ++++++++ src/types.h | 6 -- 11 files changed, 453 insertions(+), 248 deletions(-) create mode 100644 src/strview.c create mode 100644 src/strview.h diff --git a/make.c b/make.c index 4a21735..65d84d1 100644 --- a/make.c +++ b/make.c @@ -147,6 +147,8 @@ main(int argc, char **argv) void cc(void *arg) { + if (strcmp(arg, "src/codegen.c") == 0) + return; if (!tagvalid(arg)) return; diff --git a/src/analyzer.c b/src/analyzer.c index d563b84..e842859 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -10,33 +10,64 @@ #include "common.h" #include "errors.h" #include "parser.h" +#include "strview.h" #include "types.h" -struct environ { - struct declaration { - idx_t_ astidx; - } *buf; - size_t len, cap; -}; +/* A hashmap mapping symbol names to their indicies in the AST */ +typedef struct symtab { + struct symtab *child[4]; + struct strview key; + idx_t_ val; +} symtab; -struct evstack { - struct environ *buf; +/* A dynamic array of scopes */ +struct scopes { + struct scope *buf; size_t len, cap; }; +/* Analyzer context; keeps track of the state of static analysis */ struct azctx { + /* An arena allocator */ + arena *a; + + /* The return type of the function being analyzed */ struct type fnret; + + /* The name of the symbol being declared. This is necessary to allow + for ‘X :: X’ to be treated as shadowing and not a circular + definition */ struct strview decl; + + /* If we need to check for return statements. Only true for the + outer body-block of a function that returns a value. */ bool chkrets; + + /* The index of the current scope in the scopes array */ + idx_t_ si; }; -static void analyzeast(struct evstack, struct type *, struct ast, - struct lexemes) +static void analyzeast(struct scope *, struct type *, struct ast, + struct lexemes, arena *) __attribute__((nonnull)); -typedef idx_t_ analyzer(struct azctx, struct evstack, struct type *, struct ast, +/* Perform a pass over the entire AST and return an array of symbol + tables, one for each scope in the program */ +static struct scope *gensymtabs(struct ast, struct lexemes, arena *) + __attribute__((returns_nonnull, nonnull)); + +/* Find all the unordered symbols in the scope delimited by the inclusive + indicies BEG and END in the AST, and accumulate them into a symbol + table appended to the symbol table list. UP is the index of the + previous scopes symbol table in the symbol table list. */ +static void find_unordered_syms(struct scopes *, struct ast, struct lexemes, + idx_t_ up, idx_t_ beg, idx_t_ end, arena *) + __attribute__((nonnull)); + +typedef idx_t_ analyzer(struct azctx, struct scope *, struct type *, struct ast, struct lexemes, idx_t_) __attribute__((nonnull)); + static analyzer analyzeblk, analyzedecl, analyzeexpr, analyzefn, analyzestmt; static const struct type *typegrab(struct ast, struct lexemes, idx_t_) @@ -44,24 +75,70 @@ static const struct type *typegrab(struct ast, struct lexemes, idx_t_) static bool typecompat(struct type, struct type); static bool returns(struct ast, idx_t_); +/* Index the symbol table M with the key SV, returning a pointer to the + value. If no entry exists and A is non-null, a pointer to a newly + allocated (and zeroed) value is returned, NULL otherwise. */ +static idx_t_ *symtab_insert(symtab **m, struct strview sv, arena *a) + __attribute__((nonnull(1))); + /* Defined in primitives.gperf */ const struct type *typelookup(const uchar *, size_t) __attribute__((nonnull)); -struct type * -analyzeprog(struct ast ast, struct lexemes toks) +void +analyzeprog(struct ast ast, struct lexemes toks, arena *a, struct type **types, + struct scope **scps) +{ + *types = bufalloc(NULL, ast.len, sizeof(**types)); + memset(*types, 0, ast.len * sizeof(**types)); + + *scps = gensymtabs(ast, toks, a); + analyzeast(*scps, *types, ast, toks, a); +} + +struct scope * +gensymtabs(struct ast ast, struct lexemes toks, arena *a) { - arena a = NULL; - struct evstack evs = {.cap = 16}; - evs.buf = bufalloc(NULL, evs.cap, sizeof(*evs.buf)); - struct type *types = bufalloc(NULL, ast.len, sizeof(*types)); - memset(types, 0, ast.len * sizeof(*types)); + struct scopes scps = {.cap = 32}; + scps.buf = bufalloc(NULL, scps.cap, sizeof(*scps.buf)); + find_unordered_syms(&scps, ast, toks, 0, 0, ast.len - 1, a); + return scps.buf; +} - analyzeast(evs, types, ast, toks); +void +find_unordered_syms(struct scopes *scps, struct ast ast, struct lexemes toks, + idx_t_ up, idx_t_ beg, idx_t_ end, arena *a) +{ + if (scps->len == scps->cap) { + scps->cap *= 2; + scps->buf = bufalloc(scps->buf, scps->cap, sizeof(*scps->buf)); + } - arena_free(&a); - free(evs.buf); - return types; + struct scope *scp = scps->buf + scps->len++; + *scp = (struct scope){ + .i = beg, + .up = up, + .map = NULL, + }; + + for (idx_t_ i = beg; likely(i <= end); i++) { + bool globl_var = ast.kinds[i] <= _AST_DECLS_END && beg == 0; + bool const_var = (ast.kinds[i] | 1) == ASTPCDECL; + + if (globl_var || const_var) { + struct strview sv = toks.strs[ast.lexemes[i]]; + idx_t_ *p = symtab_insert(&scp->map, sv, a); + if (*p != 0) { + err("analyzer: Symbol ‘%.*s’ declared multiple times", + SV_PRI_ARGS(sv)); + } + *p = i; + } else if (ast.kinds[i] == ASTBLK) { + struct pair p = ast.kids[i]; + find_unordered_syms(scps, ast, toks, beg, p.lhs, p.rhs, a); + i = p.rhs; + } + } } const struct type * @@ -75,39 +152,31 @@ typegrab(struct ast ast, struct lexemes toks, idx_t_ i) } void -analyzeast(struct evstack evs, struct type *types, struct ast ast, - struct lexemes toks) +analyzeast(struct scope *scps, struct type *types, struct ast ast, + struct lexemes toks, arena *a) { - struct environ ev = {.cap = 16}; - ev.buf = bufalloc(NULL, ev.cap, sizeof(*ev.buf)); - - for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { - assert(ast.kinds[i] <= _AST_DECLS_END); - if (ev.len == ev.cap) { - ev.cap *= 2; - ev.buf = bufalloc(ev.buf, ev.cap, sizeof(*ev.buf)); - } - ev.buf[ev.len++] = (struct declaration){.astidx = i}; - } - - assert(evs.cap > 0); - evs.buf[0] = ev; - evs.len = 1; - - struct azctx ctx = {0}; + struct azctx ctx = {.a = a}; for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { assert(ast.kinds[i] <= _AST_DECLS_END); - analyzedecl(ctx, evs, types, ast, toks, i); + analyzedecl(ctx, scps, types, ast, toks, i); } - - free(ev.buf); } idx_t_ -analyzedecl(struct azctx ctx, struct evstack evs, struct type *types, +analyzedecl(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { - ctx.decl = toks.strs[ast.lexemes[i]]; + struct strview sv = toks.strs[ast.lexemes[i]]; + if (ctx.si > 0 && (ast.kinds[i] | 1) == ASTPDECL) { + idx_t_ *ip = symtab_insert(&scps[ctx.si].map, sv, ctx.a); + if (*ip == 0) + *ip = i; + else { + err("analyzer: Variable ‘%.*s’ declared multiple times", + SV_PRI_ARGS(sv)); + } + } + types[i].kind = TYPE_CHECKING; struct pair p = ast.kids[i]; @@ -121,7 +190,8 @@ analyzedecl(struct azctx ctx, struct evstack evs, struct type *types, if (p.lhs != AST_EMPTY) ltype = *typegrab(ast, toks, p.lhs); if (p.rhs != AST_EMPTY) { - ni = analyzeexpr(ctx, evs, types, ast, toks, p.rhs); + ctx.decl = sv; + ni = analyzeexpr(ctx, scps, types, ast, toks, p.rhs); rtype = types[p.rhs]; } else ni = fwdnode(ast, i); @@ -136,13 +206,13 @@ analyzedecl(struct azctx ctx, struct evstack evs, struct type *types, } idx_t_ -analyzestmt(struct azctx ctx, struct evstack evs, struct type *types, +analyzestmt(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { switch (ast.kinds[i]) { case ASTDECL: case ASTCDECL: - return analyzedecl(ctx, evs, types, ast, toks, i); + return analyzedecl(ctx, scps, types, ast, toks, i); case ASTRET: { idx_t_ expr = ast.kids[i].rhs; if (expr == AST_EMPTY) { @@ -152,19 +222,18 @@ analyzestmt(struct azctx ctx, struct evstack evs, struct type *types, } else if (ctx.fnret.kind == TYPE_UNSET) err("analyzer: Function has no return value"); - idx_t_ ni = analyzeexpr(ctx, evs, types, ast, toks, ast.kids[i].rhs); + idx_t_ ni = analyzeexpr(ctx, scps, types, ast, toks, ast.kids[i].rhs); if (!typecompat(ctx.fnret, types[ast.kids[i].rhs])) err("analyzer: Return type mismatch"); return ni; } default: - assert(!"unreachable"); __builtin_unreachable(); } } idx_t_ -analyzeexpr(struct azctx ctx, struct evstack evs, struct type *types, +analyzeexpr(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { switch (ast.kinds[i]) { @@ -176,49 +245,44 @@ analyzeexpr(struct azctx ctx, struct evstack evs, struct type *types, case ASTIDENT: { struct strview sv = toks.strs[ast.lexemes[i]]; - for (size_t lvl = evs.len; lvl-- > 0;) { - struct environ ev = evs.buf[lvl]; - /* TODO: Binary search */ - for (size_t j = 0; j < ev.len; j++) { - struct strview sv2 = toks.strs[ast.lexemes[ev.buf[j].astidx]]; - if (sv.len != sv2.len || memcmp(sv.p, sv2.p, sv.len) != 0) - continue; - - /* We need this to allow shadowing in situations like - FOO :FOO */ - if (lvl == evs.len - 1 && sv.len == ctx.decl.len - && memcmp(sv.p, ctx.decl.p, sv.len) == 0) - { - break; - } + /* Variable shadowing */ + if (strview_eq(sv, ctx.decl) && ctx.si > 0) + ctx.si--; - switch (types[j].kind) { + for (idx_t_ lvl = ctx.si;;) { + struct scope scp = scps[lvl]; + idx_t_ *ip = symtab_insert(&scp.map, sv, NULL); + + if (ip == NULL) { + if (lvl == 0) + break; + lvl = scp.up; + } else { + switch (types[*ip].kind) { case TYPE_UNSET: - evs.len = lvl + 1; - analyzedecl(ctx, evs, types, ast, toks, ev.buf[j].astidx); + ctx.si = lvl; + analyzedecl(ctx, scps, types, ast, toks, *ip); break; case TYPE_CHECKING: - err("analyzer: Circular definition of ‘%.*s’", (int)sv.len, - sv.p); + err("analyzer: Circular definition of ‘%.*s’", SV_PRI_ARGS(sv)); } - types[i] = types[j]; + types[i] = types[*ip]; return i + 1; } } - err("analyzer: Unknown constant ‘%.*s’", (int)sv.len, sv.p); + err("analyzer: Unknown symbol ‘%.*s’", SV_PRI_ARGS(sv)); } case ASTFN: - return analyzefn(ctx, evs, types, ast, toks, i); + return analyzefn(ctx, scps, types, ast, toks, i); default: - err("analyzer: Unexpected AST kind %u", ast.kinds[i]); __builtin_unreachable(); } } idx_t_ -analyzefn(struct azctx ctx, struct evstack evs, struct type *types, +analyzefn(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { struct type t = {.kind = TYPE_FN}; @@ -232,33 +296,17 @@ analyzefn(struct azctx ctx, struct evstack evs, struct type *types, } else ctx.fnret.kind = TYPE_UNSET; types[i] = t; - return analyzeblk(ctx, evs, types, ast, toks, p.rhs); + return analyzeblk(ctx, scps, types, ast, toks, p.rhs); } idx_t_ -analyzeblk(struct azctx ctx, struct evstack evs, struct type *types, +analyzeblk(struct azctx ctx, struct scope *scps, struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) { - struct environ ev = {.cap = 16}; - ev.buf = bufalloc(NULL, ev.cap, sizeof(*ev.buf)); - struct pair p = ast.kids[i]; - for (idx_t_ i = p.lhs; likely(i < p.rhs); i = fwdnode(ast, i)) { - if (ast.kinds[i] != ASTCDECL) - continue; - if (ev.len == ev.cap) { - ev.cap *= 2; - ev.buf = bufalloc(ev.buf, ev.cap, sizeof(*ev.buf)); - } - ev.buf[ev.len++] = (struct declaration){.astidx = i}; - } - - if (evs.len == evs.cap) { - evs.cap *= 2; - evs.buf = bufalloc(evs.buf, evs.cap, sizeof(*evs.buf)); - } - evs.buf[evs.len++] = ev; + while (scps[ctx.si].i != p.lhs) + ctx.si++; bool chkrets = ctx.chkrets, hasret = false; ctx.chkrets = false; @@ -266,13 +314,11 @@ analyzeblk(struct azctx ctx, struct evstack evs, struct type *types, for (i = p.lhs; i <= p.rhs;) { if (chkrets && returns(ast, i)) hasret = true; - i = analyzestmt(ctx, evs, types, ast, toks, i); + i = analyzestmt(ctx, scps, types, ast, toks, i); } - if (chkrets && !hasret) err("analyzer: Function doesn’t return on all paths"); - free(ev.buf); return i; } @@ -312,3 +358,18 @@ typecompat(struct type lhs, struct type rhs) return lhs.issigned == rhs.issigned && lhs.isfloat == rhs.isfloat && lhs.size == rhs.size; } + +idx_t_ * +symtab_insert(symtab **m, struct strview k, arena *a) +{ + for (uint64_t h = strview_hash(k); *m; h <<= 2) { + if (strview_eq(k, (*m)->key)) + return &(*m)->val; + m = &(*m)->child[h >> 62]; + } + if (a == NULL) + return NULL; + *m = arena_new(a, symtab, 1); + (*m)->key = k; + return &(*m)->val; +} diff --git a/src/analyzer.h b/src/analyzer.h index 726b4e5..a1c55f5 100644 --- a/src/analyzer.h +++ b/src/analyzer.h @@ -3,14 +3,26 @@ #include +#include "alloc.h" #include "lexer.h" #include "parser.h" +#include "types.h" +/* The different base types */ enum { - TYPE_UNSET = 0, - TYPE_CHECKING = 1, + /* No type exists (or hasn’t yet been typechecked) */ + TYPE_UNSET, + + /* Currently in the process of being typechecked. Useful for + detecting cyclic definitions. */ + TYPE_CHECKING, + + /* A numeric type */ TYPE_NUM, + + /* A function type */ TYPE_FN, + _TYPE_LAST_ENT, }; @@ -18,18 +30,32 @@ typedef uint8_t type_kind_t_; static_assert(_TYPE_LAST_ENT - 1 <= (type_kind_t_)-1, "Too many AST tokens to fix in TYPE_KIND_T_"); +typedef struct symtab symtab; + +struct scope { + idx_t_ up, i; + symtab *map; +}; + +/* A variable type */ struct type { type_kind_t_ kind; - uint8_t size : 6; /* bytes */ - bool issigned : 1; - bool isfloat : 1; - /* For functions */ - const struct type *params, *ret; - idx_t_ paramcnt; + union { + struct { + uint8_t size; + bool issigned; + bool isfloat; + }; + struct { + const struct type *params, *ret; + idx_t_ paramcnt; + }; + }; }; -struct type *analyzeprog(struct ast, struct lexemes) - __attribute__((returns_nonnull)); +void analyzeprog(struct ast, struct lexemes, arena *, struct type **, + struct scope **) + __attribute__((nonnull)); #endif /* !ORYX_ANALYZER_H */ diff --git a/src/codegen.c b/src/codegen.c index 504291a..f04be03 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -1,8 +1,10 @@ #include +#include #include #include #include +#include #include #include @@ -13,67 +15,55 @@ #include "parser.h" #include "types.h" +typedef mpq_t constval; + +typedef struct constmap { + struct constmap *child[4]; + struct strview key; + constval val; +} constmap; + +/* A context structure we can pass to all the codegen functions just so they + have easy access to everything */ struct cgctx { + LLVMContextRef ctx; LLVMModuleRef mod; LLVMBuilderRef bob; struct strview namespace; + constmap *consts; + arena a; }; -static size_t codegendecl(struct cgctx, struct type *, struct ast, - struct lexemes, size_t) - __attribute__((nonnull)); -static size_t codegenexpr(struct cgctx, struct type *, struct ast, - struct lexemes, size_t, LLVMValueRef *) +static void codegenprog(struct cgctx, struct type *, struct ast, + struct lexemes) __attribute__((nonnull)); +// static size_t codegenexpr(struct cgctx, struct type *, struct ast, +// struct lexemes, size_t, LLVMValueRef *) +// __attribute__((nonnull)); static LLVMTypeRef type2llvm(struct type); +static bool constmap_equals(struct strview, struct strview); +static uint64_t constmap_hash(struct strview); +static constval *constmap_upsert(constmap **, struct strview, arena *) + __attribute__((nonnull(1))); + /* TODO: Don’t do this? */ #define lengthof(xs) (sizeof(xs) / sizeof(*(xs))) -static struct { - struct strview key; - LLVMValueRef val; -} constants[1024]; -static size_t constcnt; void codegen(const char *file, struct type *types, struct ast ast, struct lexemes toks) { struct cgctx ctx = {0}; - ctx.mod = LLVMModuleCreateWithName("oryx"); - ctx.bob = LLVMCreateBuilder(); + ctx.ctx = LLVMContextCreate(); + ctx.mod = LLVMModuleCreateWithNameInContext("oryx", ctx.ctx); + ctx.bob = LLVMCreateBuilderInContext(ctx.ctx); LLVMSetSourceFileName(ctx.mod, file, strlen(file)); - for (size_t i = 0; i < ast.len;) { - // LLVMValueRef val; - assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL); - i = codegendecl(ctx, types, ast, toks, i); - - /* TODO: Temporary allocator */ - // struct strview sv = toks.strs[ast.lexemes[i]]; - // char *name = bufalloc(NULL, sv.len + 1, 1); - // ((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0; - // - // LLVMValueRef globl, init; - // LLVMTypeRef vartype = type2llvm(types[i]); - // - // globl = LLVMAddGlobal(mod, vartype, name); - // LLVMSetGlobalConstant(globl, ast.kinds[i] == ASTCDECL); - // - // if (ast.kids[i].rhs != AST_EMPTY) { - // i = codegenexpr(bob, types, ast, toks, ast.kids[i].rhs, &init); - // init = LLVMConstTrunc(init, vartype); - // } else { - // init = LLVMConstNull(vartype); - // i = fwdnode(ast, i); - // } - // - // LLVMSetInitializer(globl, init); - // LLVMSetLinkage(globl, LLVMPrivateLinkage); - // - // free(name); - } + codegenprog(ctx, types, ast, toks); + + arena_free(&ctx.a); LLVMDisposeBuilder(ctx.bob); @@ -83,82 +73,117 @@ codegen(const char *file, struct type *types, struct ast ast, LLVMDumpModule(ctx.mod); LLVMDisposeModule(ctx.mod); + LLVMContextDispose(ctx.ctx); } -size_t -codegendecl(struct cgctx ctx, struct type *types, struct ast ast, - struct lexemes toks, size_t i) +void +interp_const_expr(struct cgctx ctx, struct type *types, struct ast ast, + struct lexemes toks, idx_t_ i, constval v) { - struct strview ident = toks.strs[ast.lexemes[i]]; - - char *name; - if (ctx.namespace.len != 0) { - size_t namelen = ident.len + ctx.namespace.len + 1; - name = bufalloc(NULL, namelen + 1, 1); - sprintf(name, "%.*s.%.*s", (int)ctx.namespace.len, ctx.namespace.p, - (int)ident.len, ident.p); - } else { - name = bufalloc(NULL, ident.len + 1, 1); - memcpy(name, ident.p, ident.len); - name[ident.len] = 0; - } + switch (ast.kinds[i]) { + case ASTNUMLIT: { + mpf_t f; + mpq_init(v); - LLVMValueRef val; - LLVMTypeRef vartype = type2llvm(types[i]); + struct strview sv = toks.strs[ast.lexemes[i]]; + char *buf = bufalloc(NULL, sv.len + 1, 1); + memcpy(buf, sv.p, sv.len); + buf[sv.len] = 0; - if (ast.kids[i].rhs != AST_EMPTY) { - i = codegenexpr(ctx, types, ast, toks, ast.kids[i].rhs, &val); - val = LLVMConstTrunc(val, vartype); - } else { - i = fwdnode(ast, i); - val = LLVMConstNull(vartype); + if (mpf_init_set_str(f, buf, 10) == -1) + err("mpf_init_set_str: Invalid input ‘%s’", buf); + free(buf); + + mpq_set_f(v, f); + mpf_clear(f); + break; + } + case ASTIDENT: { + struct strview sv = toks.strs[ast.lexemes[i]]; + constval *w = constmap_upsert(&ctx.consts, sv, NULL); + v = *w; + break; + } + default: + err("codegen: Not implemented"); } +} + +void +register_const(struct cgctx ctx, struct type *types, struct ast ast, + struct lexemes toks, idx_t_ i) +{ + struct pair p = ast.kids[i]; + assert(p.rhs != AST_EMPTY); + + constval v; + interp_const_expr(ctx, types, ast, toks, p.rhs, v); - LLVMValueRef globl = LLVMAddGlobal(ctx.mod, vartype, name); - LLVMSetInitializer(globl, val); - LLVMSetLinkage(globl, LLVMLinkerPrivateLinkage); + struct strview sv = toks.strs[ast.lexemes[i]]; + *constmap_upsert(&ctx.consts, sv, &ctx.a) = v; - free(name); - return i; + /* TODO: Remove */ + printf("%.*s = ", (int)sv.len, sv.p); + mpq_out_str(stdout, 10, v); + putchar('\n'); + mpq_clear(v); } -size_t -codegenexpr(struct cgctx ctx, struct type *types, struct ast ast, - struct lexemes toks, size_t i, LLVMValueRef *v) +void +codegenprog(struct cgctx ctx, struct type *types, struct ast ast, + struct lexemes toks) { - (void)ctx; - switch (ast.kinds[i]) { - case ASTNUMLIT: { - /* TODO: Arbitrary precision? */ - struct strview sv = toks.strs[ast.lexemes[i]]; + for (size_t i = 0; i < ast.len; i = fwdnode(ast, i)) { + if ((ast.kinds[i] | 1) == ASTPCDECL) + register_const(ctx, types, ast, toks, i); - bool has_sep = memchr(sv.p, '\'', sv.len) != NULL; - - /* TODO: Temporary one-time-use allocator? */ - if (has_sep) { - size_t len = 0; - char *p = bufalloc(NULL, sv.len, 1); - for (size_t i = 0; i < sv.len; i++) { - if (sv.p[i] != '\'') - p[len++] = sv.p[i]; - } - - *v = LLVMConstIntOfStringAndSize(type2llvm(types[i]), p, len, 10); - free(p); - } else { - *v = LLVMConstIntOfStringAndSize(type2llvm(types[i]), sv.p, sv.len, - 10); - } - return i + 1; - } - case ASTIDENT: - err("codegen: %s: Not implemented", __func__); - default: - assert(!"unreachable"); - __builtin_unreachable(); + // mpq_t v = interp_const_expr(ctx, ast, toks, p.rhs); + // constmap_upsert(&ctx.consts, ast.lexemes[p.lhs], &ctx.a); } + + // for (size_t i = 0; i < ast.len;) { + // assert(ast.kinds[i] <= _AST_DECLS_END); + // i = codegenprog(ctx, types, ast, toks, i); + // } } +// size_t +// codegenexpr(struct cgctx ctx, struct type *types, struct ast ast, +// struct lexemes toks, size_t i, LLVMValueRef *v) +// { +// (void)ctx; +// switch (ast.kinds[i]) { +// case ASTNUMLIT: { +// /* TODO: Arbitrary precision? */ +// struct strview sv = toks.strs[ast.lexemes[i]]; +// +// bool has_sep = memchr(sv.p, '\'', sv.len) != NULL; +// +// /* TODO: Temporary one-time-use allocator? */ +// if (has_sep) { +// size_t len = 0; +// char *p = bufalloc(NULL, sv.len, 1); +// for (size_t i = 0; i < sv.len; i++) { +// if (sv.p[i] != '\'') +// p[len++] = sv.p[i]; +// } +// +// *v = LLVMConstIntOfStringAndSize(type2llvm(types[i]), p, len, 10); +// free(p); +// } else { +// *v = LLVMConstIntOfStringAndSize(type2llvm(types[i]), sv.p, sv.len, +// 10); +// } +// return i + 1; +// } +// case ASTIDENT: +// err("codegen: %s: Not implemented", __func__); +// default: +// assert(!"unreachable"); +// __builtin_unreachable(); +// } +// } + LLVMTypeRef type2llvm(struct type t) { @@ -169,14 +194,8 @@ type2llvm(struct type t) __builtin_unreachable(); case TYPE_FN: err("codegen: %s: Not implemented", __func__); - case TYPE_F16: - return LLVMHalfType(); - case TYPE_F32: - return LLVMFloatType(); - case TYPE_F64: - return LLVMDoubleType(); case TYPE_NUM: - assert(t.issigned); + /* TODO: Floats */ /* TODO: Arbitrary precision */ if (t.size == 0) return LLVMInt64Type(); @@ -185,3 +204,35 @@ type2llvm(struct type t) __builtin_unreachable(); } } + +bool +constmap_equals(struct strview x, struct strview y) +{ + return x.len == y.len && memcmp(x.p, y.p, x.len) == 0; +} + +uint64_t +constmap_hash(struct strview sv) +{ + uint64_t h = 0x100; + for (size_t i = 0; i < sv.len; i++) { + h ^= sv.p[i]; + h *= 1111111111111111111u; + } + return h; +} + +constval * +constmap_upsert(constmap **m, struct strview key, arena *a) +{ + for (uint64_t h = constmap_hash(key); *m; h <<= 2) { + if (constmap_equals(key, (*m)->key)) + return &(*m)->val; + m = &(*m)->child[h >> 62]; + } + if (a == NULL) + return NULL; + *m = arena_new(a, constmap, 1); + (*m)->key = key; + return &(*m)->val; +} diff --git a/src/lexer.c b/src/lexer.c index 25e41c7..a980812 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -12,6 +12,7 @@ #include "common.h" #include "errors.h" #include "lexer.h" +#include "strview.h" #include "unicode.h" #if DEBUG diff --git a/src/main.c b/src/main.c index 6cb715a..05228d7 100644 --- a/src/main.c +++ b/src/main.c @@ -27,16 +27,22 @@ main(int argc, char **argv) size_t srclen; char *src = readfile(argv[1], &srclen); + arena a = NULL; + struct type *types; + struct scope *scps; + struct lexemes toks = lexstring(src, srclen); struct ast ast = parsetoks(toks); - struct type *types = analyzeprog(ast, toks); + analyzeprog(ast, toks, &a, &types, &scps); // codegen(argv[1], types, ast, toks); #if DEBUG - free(types); + free(scps); free(src); + free(types); lexemes_free(toks); ast_free(ast); + arena_free(&a); #endif return EXIT_SUCCESS; } diff --git a/src/parser.c b/src/parser.c index 44888ae..5d9bf9c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -10,6 +10,7 @@ #include "common.h" #include "errors.h" #include "parser.h" +#include "strview.h" #if DEBUG # define AST_DFLT_CAP (8) @@ -114,16 +115,17 @@ idx_t_ parsedecl(struct ast *ast, struct lexemes toks, bool toplvl) { idx_t_ i = astalloc(ast); - ast->lexemes[i] = toksidx; bool pub; - if (toplvl && toks.kinds[toksidx] == LEXIDENT && toks.strs[toksidx].len == 3 - && memcmp("pub", toks.strs[toksidx].p, 3) == 0) + if (toplvl && toks.kinds[toksidx] == LEXIDENT + && strview_eq(SV("pub"), toks.strs[toksidx])) { pub = true; - toksidx++; - } else + ast->lexemes[i] = ++toksidx; + } else { pub = false; + ast->lexemes[i] = toksidx; + } if (toks.kinds[toksidx++] != LEXIDENT) err("parser: Expected identifier"); @@ -230,7 +232,7 @@ parsestmt(struct ast *ast, struct lexemes toks) err("parser: Expected identifier"); struct strview sv = toks.strs[toksidx]; - if (sv.len == 6 && memcmp(sv.p, "return", 6) == 0) { + if (strview_eq(SV("return"), sv)) { i = astalloc(ast); ast->lexemes[i] = toksidx++; ast->kinds[i] = ASTRET; diff --git a/src/primitives.gperf b/src/primitives.gperf index 09d9b68..230413f 100644 --- a/src/primitives.gperf +++ b/src/primitives.gperf @@ -16,21 +16,21 @@ struct typeslot { char *name; struct type inner; }; %% -i8, { TYPE_NUM, 1, true, false } -i16, { TYPE_NUM, 2, true, false } -i32, { TYPE_NUM, 4, true, false } -i64, { TYPE_NUM, 8, true, false } -i128, { TYPE_NUM, 16, true, false } -int, { TYPE_NUM, 8, true, false } -u8, { TYPE_NUM, 1, false, false } -u16, { TYPE_NUM, 2, false, false } -u32, { TYPE_NUM, 4, false, false } -u64, { TYPE_NUM, 8, false, false } -u128, { TYPE_NUM, 16, false, false } -uint, { TYPE_NUM, 8, false, false } -rune, { TYPE_NUM , 4, true, false } -f32, { TYPE_NUM, 4, true, true } -f64, { TYPE_NUM, 8, true, true } +i8, { TYPE_NUM, {.size = 1, .issigned=true, .isfloat=false} } +i16, { TYPE_NUM, {.size = 2, .issigned=true, .isfloat=false} } +i32, { TYPE_NUM, {.size = 4, .issigned=true, .isfloat=false} } +i64, { TYPE_NUM, {.size = 8, .issigned=true, .isfloat=false} } +i128, { TYPE_NUM, {.size = 16, .issigned=true, .isfloat=false} } +int, { TYPE_NUM, {.size = 8, .issigned=true, .isfloat=false} } +u8, { TYPE_NUM, {.size = 1, .issigned=false, .isfloat=false} } +u16, { TYPE_NUM, {.size = 2, .issigned=false, .isfloat=false} } +u32, { TYPE_NUM, {.size = 4, .issigned=false, .isfloat=false} } +u64, { TYPE_NUM, {.size = 8, .issigned=false, .isfloat=false} } +u128, { TYPE_NUM, {.size = 16, .issigned=false, .isfloat=false} } +uint, { TYPE_NUM, {.size = 8, .issigned=false, .isfloat=false} } +rune, { TYPE_NUM ,{.size = 4, .issigned=true, .isfloat=false} } +f32, { TYPE_NUM, {.size = 4, .issigned=true, .isfloat=true } } +f64, { TYPE_NUM, {.size = 8, .issigned=true, .isfloat=true } } %% const struct type * typelookup(const uchar *p, size_t len) diff --git a/src/strview.c b/src/strview.c new file mode 100644 index 0000000..f9b80e4 --- /dev/null +++ b/src/strview.c @@ -0,0 +1,23 @@ +#include + +#include "strview.h" +#include "types.h" + +uint64_t +strview_hash(struct strview sv) +{ + uint64_t h = 0x100; + for (size_t i = 0; i < sv.len; i++) { + h ^= sv.p[i]; + h *= 1111111111111111111u; + } + return h; +} + +uchar * +svtocstr(uchar *dst, struct strview src) +{ + memcpy(dst, src.p, src.len); + dst[src.len] = 0; + return dst; +} diff --git a/src/strview.h b/src/strview.h new file mode 100644 index 0000000..1153dcb --- /dev/null +++ b/src/strview.h @@ -0,0 +1,39 @@ +#ifndef ORYX_STRVIEW_H +#define ORYX_STRVIEW_H + +#include +#include +#include +#include + +#include "types.h" + +struct strview { + const uchar *p; + size_t len; +}; + +/* Expand SV into arguments suitable for a call to printf() */ +#define SV_PRI_ARGS(sv) ((int)(sv).len), ((sv).p) + +/* Convert the string-literal S into a string-view */ +#define SV(s) ((struct strview){s, sizeof(s) - 1}) + +/* Return the hash of SV */ +uint64_t strview_hash(struct strview sv); + + +/* Copy the contents of SV to DST including a null terminator, and return DST */ +uchar *svtocstr(uchar *dst, struct strview sv) + __attribute__((returns_nonnull, nonnull)); + +/* Return whether or not X and Y are equal */ +__attribute__((always_inline)) +static inline bool +strview_eq(struct strview x, struct strview y) +{ + return x.len == y.len && memcmp(x.p, y.p, x.len) == 0; +} + + +#endif /* !ORYX_STRVIEW_H */ diff --git a/src/types.h b/src/types.h index 443aa12..bd28e36 100644 --- a/src/types.h +++ b/src/types.h @@ -1,7 +1,6 @@ #ifndef ORYX_TYPES_H #define ORYX_TYPES_H -#include #include typedef uint32_t idx_t_; @@ -10,9 +9,4 @@ typedef unsigned char uchar; #define RUNE_C(x) UINT32_C(x) -struct strview { - const uchar *p; - size_t len; -}; - #endif /* !ORYX_TYPES_H */ -- cgit v1.2.3