aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/analyzer.c261
-rw-r--r--src/analyzer.h46
-rw-r--r--src/codegen.c269
-rw-r--r--src/lexer.c1
-rw-r--r--src/main.c10
-rw-r--r--src/parser.c14
-rw-r--r--src/primitives.gperf30
-rw-r--r--src/strview.c23
-rw-r--r--src/strview.h39
-rw-r--r--src/types.h6
10 files changed, 451 insertions, 248 deletions
diff --git a/src/analyzer.c b/src/analyzer.c
index d563b84..e842859 100644
--- a/src/analyzer.c
+++ b/src/analyzer.c
@@ -10,33 +10,64 @@
#include "common.h"
#include "errors.h"
#include "parser.h"
+#include "strview.h"
#include "types.h"
-struct environ {
- struct declaration {
- idx_t_ astidx;
- } *buf;
- size_t len, cap;
-};
+/* A hashmap mapping symbol names to their indicies in the AST */
+typedef struct symtab {
+ struct symtab *child[4];
+ struct strview key;
+ idx_t_ val;
+} symtab;
-struct evstack {
- struct environ *buf;
+/* A dynamic array of scopes */
+struct scopes {
+ struct scope *buf;
size_t len, cap;
};
+/* Analyzer context; keeps track of the state of static analysis */
struct azctx {
+ /* An arena allocator */
+ arena *a;
+
+ /* The return type of the function being analyzed */
struct type fnret;
+
+ /* The name of the symbol being declared. This is necessary to allow
+ for ‘X :: X’ to be treated as shadowing and not a circular
+ definition */
struct strview decl;
+
+ /* If we need to check for return statements. Only true for the
+ outer body-block of a function that returns a value. */
bool chkrets;
+
+ /* The index of the current scope in the scopes array */
+ idx_t_ si;
};
-static void analyzeast(struct evstack, struct type *, struct ast,
- struct lexemes)
+static void analyzeast(struct scope *, struct type *, struct ast,
+ struct lexemes, arena *)
__attribute__((nonnull));
-typedef idx_t_ analyzer(struct azctx, struct evstack, struct type *, struct ast,
+/* Perform a pass over the entire AST and return an array of symbol
+ tables, one for each scope in the program */
+static struct scope *gensymtabs(struct ast, struct lexemes, arena *)
+ __attribute__((returns_nonnull, nonnull));
+
+/* Find all the unordered symbols in the scope delimited by the inclusive
+ indicies BEG and END in the AST, and accumulate them into a symbol
+ table appended to the symbol table list. UP is the index of the
+ previous scopes symbol table in the symbol table list. */
+static void find_unordered_syms(struct scopes *, struct ast, struct lexemes,
+ idx_t_ up, idx_t_ beg, idx_t_ end, arena *)
+ __attribute__((nonnull));
+
+typedef idx_t_ analyzer(struct azctx, struct scope *, struct type *, struct ast,
struct lexemes, idx_t_)
__attribute__((nonnull));
+
static analyzer analyzeblk, analyzedecl, analyzeexpr, analyzefn, analyzestmt;
static const struct type *typegrab(struct ast, struct lexemes, idx_t_)
@@ -44,24 +75,70 @@ static const struct type *typegrab(struct ast, struct lexemes, idx_t_)
static bool typecompat(struct type, struct type);
static bool returns(struct ast, idx_t_);
+/* Index the symbol table M with the key SV, returning a pointer to the
+ value. If no entry exists and A is non-null, a pointer to a newly
+ allocated (and zeroed) value is returned, NULL otherwise. */
+static idx_t_ *symtab_insert(symtab **m, struct strview sv, arena *a)
+ __attribute__((nonnull(1)));
+
/* Defined in primitives.gperf */
const struct type *typelookup(const uchar *, size_t)
__attribute__((nonnull));
-struct type *
-analyzeprog(struct ast ast, struct lexemes toks)
+void
+analyzeprog(struct ast ast, struct lexemes toks, arena *a, struct type **types,
+ struct scope **scps)
+{
+ *types = bufalloc(NULL, ast.len, sizeof(**types));
+ memset(*types, 0, ast.len * sizeof(**types));
+
+ *scps = gensymtabs(ast, toks, a);
+ analyzeast(*scps, *types, ast, toks, a);
+}
+
+struct scope *
+gensymtabs(struct ast ast, struct lexemes toks, arena *a)
{
- arena a = NULL;
- struct evstack evs = {.cap = 16};
- evs.buf = bufalloc(NULL, evs.cap, sizeof(*evs.buf));
- struct type *types = bufalloc(NULL, ast.len, sizeof(*types));
- memset(types, 0, ast.len * sizeof(*types));
+ struct scopes scps = {.cap = 32};
+ scps.buf = bufalloc(NULL, scps.cap, sizeof(*scps.buf));
+ find_unordered_syms(&scps, ast, toks, 0, 0, ast.len - 1, a);
+ return scps.buf;
+}
- analyzeast(evs, types, ast, toks);
+void
+find_unordered_syms(struct scopes *scps, struct ast ast, struct lexemes toks,
+ idx_t_ up, idx_t_ beg, idx_t_ end, arena *a)
+{
+ if (scps->len == scps->cap) {
+ scps->cap *= 2;
+ scps->buf = bufalloc(scps->buf, scps->cap, sizeof(*scps->buf));
+ }
- arena_free(&a);
- free(evs.buf);
- return types;
+ struct scope *scp = scps->buf + scps->len++;
+ *scp = (struct scope){
+ .i = beg,
+ .up = up,
+ .map = NULL,
+ };
+
+ for (idx_t_ i = beg; likely(i <= end); i++) {
+ bool globl_var = ast.kinds[i] <= _AST_DECLS_END && beg == 0;
+ bool const_var = (ast.kinds[i] | 1) == ASTPCDECL;
+
+ if (globl_var || const_var) {
+ struct strview sv = toks.strs[ast.lexemes[i]];
+ idx_t_ *p = symtab_insert(&scp->map, sv, a);
+ if (*p != 0) {
+ err("analyzer: Symbol ‘%.*s’ declared multiple times",
+ SV_PRI_ARGS(sv));
+ }
+ *p = i;
+ } else if (ast.kinds[i] == ASTBLK) {
+ struct pair p = ast.kids[i];
+ find_unordered_syms(scps, ast, toks, beg, p.lhs, p.rhs, a);
+ i = p.rhs;
+ }
+ }
}
const struct type *
@@ -75,39 +152,31 @@ typegrab(struct ast ast, struct lexemes toks, idx_t_ i)
}
void
-analyzeast(struct evstack evs, struct type *types, struct ast ast,
- struct lexemes toks)
+analyzeast(struct scope *scps, struct type *types, struct ast ast,
+ struct lexemes toks, arena *a)
{
- struct environ ev = {.cap = 16};
- ev.buf = bufalloc(NULL, ev.cap, sizeof(*ev.buf));
-
- for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) {
- assert(ast.kinds[i] <= _AST_DECLS_END);
- if (ev.len == ev.cap) {
- ev.cap *= 2;
- ev.buf = bufalloc(ev.buf, ev.cap, sizeof(*ev.buf));
- }
- ev.buf[ev.len++] = (struct declaration){.astidx = i};
- }
-
- assert(evs.cap > 0);
- evs.buf[0] = ev;
- evs.len = 1;
-
- struct azctx ctx = {0};
+ struct azctx ctx = {.a = a};
for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) {
assert(ast.kinds[i] <= _AST_DECLS_END);
- analyzedecl(ctx, evs, types, ast, toks, i);
+ analyzedecl(ctx, scps, types, ast, toks, i);
}
-
- free(ev.buf);
}
idx_t_
-analyzedecl(struct azctx ctx, struct evstack evs, struct type *types,
+analyzedecl(struct azctx ctx, struct scope *scps, struct type *types,
struct ast ast, struct lexemes toks, idx_t_ i)
{
- ctx.decl = toks.strs[ast.lexemes[i]];
+ struct strview sv = toks.strs[ast.lexemes[i]];
+ if (ctx.si > 0 && (ast.kinds[i] | 1) == ASTPDECL) {
+ idx_t_ *ip = symtab_insert(&scps[ctx.si].map, sv, ctx.a);
+ if (*ip == 0)
+ *ip = i;
+ else {
+ err("analyzer: Variable ‘%.*s’ declared multiple times",
+ SV_PRI_ARGS(sv));
+ }
+ }
+
types[i].kind = TYPE_CHECKING;
struct pair p = ast.kids[i];
@@ -121,7 +190,8 @@ analyzedecl(struct azctx ctx, struct evstack evs, struct type *types,
if (p.lhs != AST_EMPTY)
ltype = *typegrab(ast, toks, p.lhs);
if (p.rhs != AST_EMPTY) {
- ni = analyzeexpr(ctx, evs, types, ast, toks, p.rhs);
+ ctx.decl = sv;
+ ni = analyzeexpr(ctx, scps, types, ast, toks, p.rhs);
rtype = types[p.rhs];
} else
ni = fwdnode(ast, i);
@@ -136,13 +206,13 @@ analyzedecl(struct azctx ctx, struct evstack evs, struct type *types,
}
idx_t_
-analyzestmt(struct azctx ctx, struct evstack evs, struct type *types,
+analyzestmt(struct azctx ctx, struct scope *scps, struct type *types,
struct ast ast, struct lexemes toks, idx_t_ i)
{
switch (ast.kinds[i]) {
case ASTDECL:
case ASTCDECL:
- return analyzedecl(ctx, evs, types, ast, toks, i);
+ return analyzedecl(ctx, scps, types, ast, toks, i);
case ASTRET: {
idx_t_ expr = ast.kids[i].rhs;
if (expr == AST_EMPTY) {
@@ -152,19 +222,18 @@ analyzestmt(struct azctx ctx, struct evstack evs, struct type *types,
} else if (ctx.fnret.kind == TYPE_UNSET)
err("analyzer: Function has no return value");
- idx_t_ ni = analyzeexpr(ctx, evs, types, ast, toks, ast.kids[i].rhs);
+ idx_t_ ni = analyzeexpr(ctx, scps, types, ast, toks, ast.kids[i].rhs);
if (!typecompat(ctx.fnret, types[ast.kids[i].rhs]))
err("analyzer: Return type mismatch");
return ni;
}
default:
- assert(!"unreachable");
__builtin_unreachable();
}
}
idx_t_
-analyzeexpr(struct azctx ctx, struct evstack evs, struct type *types,
+analyzeexpr(struct azctx ctx, struct scope *scps, struct type *types,
struct ast ast, struct lexemes toks, idx_t_ i)
{
switch (ast.kinds[i]) {
@@ -176,49 +245,44 @@ analyzeexpr(struct azctx ctx, struct evstack evs, struct type *types,
case ASTIDENT: {
struct strview sv = toks.strs[ast.lexemes[i]];
- for (size_t lvl = evs.len; lvl-- > 0;) {
- struct environ ev = evs.buf[lvl];
- /* TODO: Binary search */
- for (size_t j = 0; j < ev.len; j++) {
- struct strview sv2 = toks.strs[ast.lexemes[ev.buf[j].astidx]];
- if (sv.len != sv2.len || memcmp(sv.p, sv2.p, sv.len) != 0)
- continue;
-
- /* We need this to allow shadowing in situations like
- FOO :FOO */
- if (lvl == evs.len - 1 && sv.len == ctx.decl.len
- && memcmp(sv.p, ctx.decl.p, sv.len) == 0)
- {
- break;
- }
+ /* Variable shadowing */
+ if (strview_eq(sv, ctx.decl) && ctx.si > 0)
+ ctx.si--;
- switch (types[j].kind) {
+ for (idx_t_ lvl = ctx.si;;) {
+ struct scope scp = scps[lvl];
+ idx_t_ *ip = symtab_insert(&scp.map, sv, NULL);
+
+ if (ip == NULL) {
+ if (lvl == 0)
+ break;
+ lvl = scp.up;
+ } else {
+ switch (types[*ip].kind) {
case TYPE_UNSET:
- evs.len = lvl + 1;
- analyzedecl(ctx, evs, types, ast, toks, ev.buf[j].astidx);
+ ctx.si = lvl;
+ analyzedecl(ctx, scps, types, ast, toks, *ip);
break;
case TYPE_CHECKING:
- err("analyzer: Circular definition of ‘%.*s’", (int)sv.len,
- sv.p);
+ err("analyzer: Circular definition of ‘%.*s’", SV_PRI_ARGS(sv));
}
- types[i] = types[j];
+ types[i] = types[*ip];
return i + 1;
}
}
- err("analyzer: Unknown constant ‘%.*s’", (int)sv.len, sv.p);
+ err("analyzer: Unknown symbol ‘%.*s’", SV_PRI_ARGS(sv));
}
case ASTFN:
- return analyzefn(ctx, evs, types, ast, toks, i);
+ return analyzefn(ctx, scps, types, ast, toks, i);
default:
- err("analyzer: Unexpected AST kind %u", ast.kinds[i]);
__builtin_unreachable();
}
}
idx_t_
-analyzefn(struct azctx ctx, struct evstack evs, struct type *types,
+analyzefn(struct azctx ctx, struct scope *scps, struct type *types,
struct ast ast, struct lexemes toks, idx_t_ i)
{
struct type t = {.kind = TYPE_FN};
@@ -232,33 +296,17 @@ analyzefn(struct azctx ctx, struct evstack evs, struct type *types,
} else
ctx.fnret.kind = TYPE_UNSET;
types[i] = t;
- return analyzeblk(ctx, evs, types, ast, toks, p.rhs);
+ return analyzeblk(ctx, scps, types, ast, toks, p.rhs);
}
idx_t_
-analyzeblk(struct azctx ctx, struct evstack evs, struct type *types,
+analyzeblk(struct azctx ctx, struct scope *scps, struct type *types,
struct ast ast, struct lexemes toks, idx_t_ i)
{
- struct environ ev = {.cap = 16};
- ev.buf = bufalloc(NULL, ev.cap, sizeof(*ev.buf));
-
struct pair p = ast.kids[i];
- for (idx_t_ i = p.lhs; likely(i < p.rhs); i = fwdnode(ast, i)) {
- if (ast.kinds[i] != ASTCDECL)
- continue;
- if (ev.len == ev.cap) {
- ev.cap *= 2;
- ev.buf = bufalloc(ev.buf, ev.cap, sizeof(*ev.buf));
- }
- ev.buf[ev.len++] = (struct declaration){.astidx = i};
- }
-
- if (evs.len == evs.cap) {
- evs.cap *= 2;
- evs.buf = bufalloc(evs.buf, evs.cap, sizeof(*evs.buf));
- }
- evs.buf[evs.len++] = ev;
+ while (scps[ctx.si].i != p.lhs)
+ ctx.si++;
bool chkrets = ctx.chkrets, hasret = false;
ctx.chkrets = false;
@@ -266,13 +314,11 @@ analyzeblk(struct azctx ctx, struct evstack evs, struct type *types,
for (i = p.lhs; i <= p.rhs;) {
if (chkrets && returns(ast, i))
hasret = true;
- i = analyzestmt(ctx, evs, types, ast, toks, i);
+ i = analyzestmt(ctx, scps, types, ast, toks, i);
}
-
if (chkrets && !hasret)
err("analyzer: Function doesn’t return on all paths");
- free(ev.buf);
return i;
}
@@ -312,3 +358,18 @@ typecompat(struct type lhs, struct type rhs)
return lhs.issigned == rhs.issigned && lhs.isfloat == rhs.isfloat
&& lhs.size == rhs.size;
}
+
+idx_t_ *
+symtab_insert(symtab **m, struct strview k, arena *a)
+{
+ for (uint64_t h = strview_hash(k); *m; h <<= 2) {
+ if (strview_eq(k, (*m)->key))
+ return &(*m)->val;
+ m = &(*m)->child[h >> 62];
+ }
+ if (a == NULL)
+ return NULL;
+ *m = arena_new(a, symtab, 1);
+ (*m)->key = k;
+ return &(*m)->val;
+}
diff --git a/src/analyzer.h b/src/analyzer.h
index 726b4e5..a1c55f5 100644
--- a/src/analyzer.h
+++ b/src/analyzer.h
@@ -3,14 +3,26 @@
#include <stdint.h>
+#include "alloc.h"
#include "lexer.h"
#include "parser.h"
+#include "types.h"
+/* The different base types */
enum {
- TYPE_UNSET = 0,
- TYPE_CHECKING = 1,
+ /* No type exists (or hasn’t yet been typechecked) */
+ TYPE_UNSET,
+
+ /* Currently in the process of being typechecked. Useful for
+ detecting cyclic definitions. */
+ TYPE_CHECKING,
+
+ /* A numeric type */
TYPE_NUM,
+
+ /* A function type */
TYPE_FN,
+
_TYPE_LAST_ENT,
};
@@ -18,18 +30,32 @@ typedef uint8_t type_kind_t_;
static_assert(_TYPE_LAST_ENT - 1 <= (type_kind_t_)-1,
"Too many AST tokens to fix in TYPE_KIND_T_");
+typedef struct symtab symtab;
+
+struct scope {
+ idx_t_ up, i;
+ symtab *map;
+};
+
+/* A variable type */
struct type {
type_kind_t_ kind;
- uint8_t size : 6; /* bytes */
- bool issigned : 1;
- bool isfloat : 1;
- /* For functions */
- const struct type *params, *ret;
- idx_t_ paramcnt;
+ union {
+ struct {
+ uint8_t size;
+ bool issigned;
+ bool isfloat;
+ };
+ struct {
+ const struct type *params, *ret;
+ idx_t_ paramcnt;
+ };
+ };
};
-struct type *analyzeprog(struct ast, struct lexemes)
- __attribute__((returns_nonnull));
+void analyzeprog(struct ast, struct lexemes, arena *, struct type **,
+ struct scope **)
+ __attribute__((nonnull));
#endif /* !ORYX_ANALYZER_H */
diff --git a/src/codegen.c b/src/codegen.c
index 504291a..f04be03 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -1,8 +1,10 @@
#include <assert.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <gmp.h>
#include <llvm-c/Analysis.h>
#include <llvm-c/Core.h>
@@ -13,67 +15,55 @@
#include "parser.h"
#include "types.h"
+typedef mpq_t constval;
+
+typedef struct constmap {
+ struct constmap *child[4];
+ struct strview key;
+ constval val;
+} constmap;
+
+/* A context structure we can pass to all the codegen functions just so they
+ have easy access to everything */
struct cgctx {
+ LLVMContextRef ctx;
LLVMModuleRef mod;
LLVMBuilderRef bob;
struct strview namespace;
+ constmap *consts;
+ arena a;
};
-static size_t codegendecl(struct cgctx, struct type *, struct ast,
- struct lexemes, size_t)
- __attribute__((nonnull));
-static size_t codegenexpr(struct cgctx, struct type *, struct ast,
- struct lexemes, size_t, LLVMValueRef *)
+static void codegenprog(struct cgctx, struct type *, struct ast,
+ struct lexemes)
__attribute__((nonnull));
+// static size_t codegenexpr(struct cgctx, struct type *, struct ast,
+// struct lexemes, size_t, LLVMValueRef *)
+// __attribute__((nonnull));
static LLVMTypeRef type2llvm(struct type);
+static bool constmap_equals(struct strview, struct strview);
+static uint64_t constmap_hash(struct strview);
+static constval *constmap_upsert(constmap **, struct strview, arena *)
+ __attribute__((nonnull(1)));
+
/* TODO: Don’t do this? */
#define lengthof(xs) (sizeof(xs) / sizeof(*(xs)))
-static struct {
- struct strview key;
- LLVMValueRef val;
-} constants[1024];
-static size_t constcnt;
void
codegen(const char *file, struct type *types, struct ast ast,
struct lexemes toks)
{
struct cgctx ctx = {0};
- ctx.mod = LLVMModuleCreateWithName("oryx");
- ctx.bob = LLVMCreateBuilder();
+ ctx.ctx = LLVMContextCreate();
+ ctx.mod = LLVMModuleCreateWithNameInContext("oryx", ctx.ctx);
+ ctx.bob = LLVMCreateBuilderInContext(ctx.ctx);
LLVMSetSourceFileName(ctx.mod, file, strlen(file));
- for (size_t i = 0; i < ast.len;) {
- // LLVMValueRef val;
- assert(ast.kinds[i] == ASTDECL || ast.kinds[i] == ASTCDECL);
- i = codegendecl(ctx, types, ast, toks, i);
-
- /* TODO: Temporary allocator */
- // struct strview sv = toks.strs[ast.lexemes[i]];
- // char *name = bufalloc(NULL, sv.len + 1, 1);
- // ((uchar *)memcpy(name, sv.p, sv.len))[sv.len] = 0;
- //
- // LLVMValueRef globl, init;
- // LLVMTypeRef vartype = type2llvm(types[i]);
- //
- // globl = LLVMAddGlobal(mod, vartype, name);
- // LLVMSetGlobalConstant(globl, ast.kinds[i] == ASTCDECL);
- //
- // if (ast.kids[i].rhs != AST_EMPTY) {
- // i = codegenexpr(bob, types, ast, toks, ast.kids[i].rhs, &init);
- // init = LLVMConstTrunc(init, vartype);
- // } else {
- // init = LLVMConstNull(vartype);
- // i = fwdnode(ast, i);
- // }
- //
- // LLVMSetInitializer(globl, init);
- // LLVMSetLinkage(globl, LLVMPrivateLinkage);
- //
- // free(name);
- }
+ codegenprog(ctx, types, ast, toks);
+
+ arena_free(&ctx.a);
LLVMDisposeBuilder(ctx.bob);
@@ -83,82 +73,117 @@ codegen(const char *file, struct type *types, struct ast ast,
LLVMDumpModule(ctx.mod);
LLVMDisposeModule(ctx.mod);
+ LLVMContextDispose(ctx.ctx);
}
-size_t
-codegendecl(struct cgctx ctx, struct type *types, struct ast ast,
- struct lexemes toks, size_t i)
+void
+interp_const_expr(struct cgctx ctx, struct type *types, struct ast ast,
+ struct lexemes toks, idx_t_ i, constval v)
{
- struct strview ident = toks.strs[ast.lexemes[i]];
-
- char *name;
- if (ctx.namespace.len != 0) {
- size_t namelen = ident.len + ctx.namespace.len + 1;
- name = bufalloc(NULL, namelen + 1, 1);
- sprintf(name, "%.*s.%.*s", (int)ctx.namespace.len, ctx.namespace.p,
- (int)ident.len, ident.p);
- } else {
- name = bufalloc(NULL, ident.len + 1, 1);
- memcpy(name, ident.p, ident.len);
- name[ident.len] = 0;
- }
+ switch (ast.kinds[i]) {
+ case ASTNUMLIT: {
+ mpf_t f;
+ mpq_init(v);
- LLVMValueRef val;
- LLVMTypeRef vartype = type2llvm(types[i]);
+ struct strview sv = toks.strs[ast.lexemes[i]];
+ char *buf = bufalloc(NULL, sv.len + 1, 1);
+ memcpy(buf, sv.p, sv.len);
+ buf[sv.len] = 0;
- if (ast.kids[i].rhs != AST_EMPTY) {
- i = codegenexpr(ctx, types, ast, toks, ast.kids[i].rhs, &val);
- val = LLVMConstTrunc(val, vartype);
- } else {
- i = fwdnode(ast, i);
- val = LLVMConstNull(vartype);
+ if (mpf_init_set_str(f, buf, 10) == -1)
+ err("mpf_init_set_str: Invalid input ‘%s’", buf);
+ free(buf);
+
+ mpq_set_f(v, f);
+ mpf_clear(f);
+ break;
+ }
+ case ASTIDENT: {
+ struct strview sv = toks.strs[ast.lexemes[i]];
+ constval *w = constmap_upsert(&ctx.consts, sv, NULL);
+ v = *w;
+ break;
+ }
+ default:
+ err("codegen: Not implemented");
}
+}
+
+void
+register_const(struct cgctx ctx, struct type *types, struct ast ast,
+ struct lexemes toks, idx_t_ i)
+{
+ struct pair p = ast.kids[i];
+ assert(p.rhs != AST_EMPTY);
+
+ constval v;
+ interp_const_expr(ctx, types, ast, toks, p.rhs, v);
- LLVMValueRef globl = LLVMAddGlobal(ctx.mod, vartype, name);
- LLVMSetInitializer(globl, val);
- LLVMSetLinkage(globl, LLVMLinkerPrivateLinkage);
+ struct strview sv = toks.strs[ast.lexemes[i]];
+ *constmap_upsert(&ctx.consts, sv, &ctx.a) = v;
- free(name);
- return i;
+ /* TODO: Remove */
+ printf("%.*s = ", (int)sv.len, sv.p);
+ mpq_out_str(stdout, 10, v);
+ putchar('\n');
+ mpq_clear(v);
}
-size_t
-codegenexpr(struct cgctx ctx, struct type *types, struct ast ast,
- struct lexemes toks, size_t i, LLVMValueRef *v)
+void
+codegenprog(struct cgctx ctx, struct type *types, struct ast ast,
+ struct lexemes toks)
{
- (void)ctx;
- switch (ast.kinds[i]) {
- case ASTNUMLIT: {
- /* TODO: Arbitrary precision? */
- struct strview sv = toks.strs[ast.lexemes[i]];
+ for (size_t i = 0; i < ast.len; i = fwdnode(ast, i)) {
+ if ((ast.kinds[i] | 1) == ASTPCDECL)
+ register_const(ctx, types, ast, toks, i);
- bool has_sep = memchr(sv.p, '\'', sv.len) != NULL;
-
- /* TODO: Temporary one-time-use allocator? */
- if (has_sep) {
- size_t len = 0;
- char *p = bufalloc(NULL, sv.len, 1);
- for (size_t i = 0; i < sv.len; i++) {
- if (sv.p[i] != '\'')
- p[len++] = sv.p[i];
- }
-
- *v = LLVMConstIntOfStringAndSize(type2llvm(types[i]), p, len, 10);
- free(p);
- } else {
- *v = LLVMConstIntOfStringAndSize(type2llvm(types[i]), sv.p, sv.len,
- 10);
- }
- return i + 1;
- }
- case ASTIDENT:
- err("codegen: %s: Not implemented", __func__);
- default:
- assert(!"unreachable");
- __builtin_unreachable();
+ // mpq_t v = interp_const_expr(ctx, ast, toks, p.rhs);
+ // constmap_upsert(&ctx.consts, ast.lexemes[p.lhs], &ctx.a);
}
+
+ // for (size_t i = 0; i < ast.len;) {
+ // assert(ast.kinds[i] <= _AST_DECLS_END);
+ // i = codegenprog(ctx, types, ast, toks, i);
+ // }
}
+// size_t
+// codegenexpr(struct cgctx ctx, struct type *types, struct ast ast,
+// struct lexemes toks, size_t i, LLVMValueRef *v)
+// {
+// (void)ctx;
+// switch (ast.kinds[i]) {
+// case ASTNUMLIT: {
+// /* TODO: Arbitrary precision? */
+// struct strview sv = toks.strs[ast.lexemes[i]];
+//
+// bool has_sep = memchr(sv.p, '\'', sv.len) != NULL;
+//
+// /* TODO: Temporary one-time-use allocator? */
+// if (has_sep) {
+// size_t len = 0;
+// char *p = bufalloc(NULL, sv.len, 1);
+// for (size_t i = 0; i < sv.len; i++) {
+// if (sv.p[i] != '\'')
+// p[len++] = sv.p[i];
+// }
+//
+// *v = LLVMConstIntOfStringAndSize(type2llvm(types[i]), p, len, 10);
+// free(p);
+// } else {
+// *v = LLVMConstIntOfStringAndSize(type2llvm(types[i]), sv.p, sv.len,
+// 10);
+// }
+// return i + 1;
+// }
+// case ASTIDENT:
+// err("codegen: %s: Not implemented", __func__);
+// default:
+// assert(!"unreachable");
+// __builtin_unreachable();
+// }
+// }
+
LLVMTypeRef
type2llvm(struct type t)
{
@@ -169,14 +194,8 @@ type2llvm(struct type t)
__builtin_unreachable();
case TYPE_FN:
err("codegen: %s: Not implemented", __func__);
- case TYPE_F16:
- return LLVMHalfType();
- case TYPE_F32:
- return LLVMFloatType();
- case TYPE_F64:
- return LLVMDoubleType();
case TYPE_NUM:
- assert(t.issigned);
+ /* TODO: Floats */
/* TODO: Arbitrary precision */
if (t.size == 0)
return LLVMInt64Type();
@@ -185,3 +204,35 @@ type2llvm(struct type t)
__builtin_unreachable();
}
}
+
+bool
+constmap_equals(struct strview x, struct strview y)
+{
+ return x.len == y.len && memcmp(x.p, y.p, x.len) == 0;
+}
+
+uint64_t
+constmap_hash(struct strview sv)
+{
+ uint64_t h = 0x100;
+ for (size_t i = 0; i < sv.len; i++) {
+ h ^= sv.p[i];
+ h *= 1111111111111111111u;
+ }
+ return h;
+}
+
+constval *
+constmap_upsert(constmap **m, struct strview key, arena *a)
+{
+ for (uint64_t h = constmap_hash(key); *m; h <<= 2) {
+ if (constmap_equals(key, (*m)->key))
+ return &(*m)->val;
+ m = &(*m)->child[h >> 62];
+ }
+ if (a == NULL)
+ return NULL;
+ *m = arena_new(a, constmap, 1);
+ (*m)->key = key;
+ return &(*m)->val;
+}
diff --git a/src/lexer.c b/src/lexer.c
index 25e41c7..a980812 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -12,6 +12,7 @@
#include "common.h"
#include "errors.h"
#include "lexer.h"
+#include "strview.h"
#include "unicode.h"
#if DEBUG
diff --git a/src/main.c b/src/main.c
index 6cb715a..05228d7 100644
--- a/src/main.c
+++ b/src/main.c
@@ -27,16 +27,22 @@ main(int argc, char **argv)
size_t srclen;
char *src = readfile(argv[1], &srclen);
+ arena a = NULL;
+ struct type *types;
+ struct scope *scps;
+
struct lexemes toks = lexstring(src, srclen);
struct ast ast = parsetoks(toks);
- struct type *types = analyzeprog(ast, toks);
+ analyzeprog(ast, toks, &a, &types, &scps);
// codegen(argv[1], types, ast, toks);
#if DEBUG
- free(types);
+ free(scps);
free(src);
+ free(types);
lexemes_free(toks);
ast_free(ast);
+ arena_free(&a);
#endif
return EXIT_SUCCESS;
}
diff --git a/src/parser.c b/src/parser.c
index 44888ae..5d9bf9c 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -10,6 +10,7 @@
#include "common.h"
#include "errors.h"
#include "parser.h"
+#include "strview.h"
#if DEBUG
# define AST_DFLT_CAP (8)
@@ -114,16 +115,17 @@ idx_t_
parsedecl(struct ast *ast, struct lexemes toks, bool toplvl)
{
idx_t_ i = astalloc(ast);
- ast->lexemes[i] = toksidx;
bool pub;
- if (toplvl && toks.kinds[toksidx] == LEXIDENT && toks.strs[toksidx].len == 3
- && memcmp("pub", toks.strs[toksidx].p, 3) == 0)
+ if (toplvl && toks.kinds[toksidx] == LEXIDENT
+ && strview_eq(SV("pub"), toks.strs[toksidx]))
{
pub = true;
- toksidx++;
- } else
+ ast->lexemes[i] = ++toksidx;
+ } else {
pub = false;
+ ast->lexemes[i] = toksidx;
+ }
if (toks.kinds[toksidx++] != LEXIDENT)
err("parser: Expected identifier");
@@ -230,7 +232,7 @@ parsestmt(struct ast *ast, struct lexemes toks)
err("parser: Expected identifier");
struct strview sv = toks.strs[toksidx];
- if (sv.len == 6 && memcmp(sv.p, "return", 6) == 0) {
+ if (strview_eq(SV("return"), sv)) {
i = astalloc(ast);
ast->lexemes[i] = toksidx++;
ast->kinds[i] = ASTRET;
diff --git a/src/primitives.gperf b/src/primitives.gperf
index 09d9b68..230413f 100644
--- a/src/primitives.gperf
+++ b/src/primitives.gperf
@@ -16,21 +16,21 @@
struct typeslot { char *name; struct type inner; };
%%
-i8, { TYPE_NUM, 1, true, false }
-i16, { TYPE_NUM, 2, true, false }
-i32, { TYPE_NUM, 4, true, false }
-i64, { TYPE_NUM, 8, true, false }
-i128, { TYPE_NUM, 16, true, false }
-int, { TYPE_NUM, 8, true, false }
-u8, { TYPE_NUM, 1, false, false }
-u16, { TYPE_NUM, 2, false, false }
-u32, { TYPE_NUM, 4, false, false }
-u64, { TYPE_NUM, 8, false, false }
-u128, { TYPE_NUM, 16, false, false }
-uint, { TYPE_NUM, 8, false, false }
-rune, { TYPE_NUM , 4, true, false }
-f32, { TYPE_NUM, 4, true, true }
-f64, { TYPE_NUM, 8, true, true }
+i8, { TYPE_NUM, {.size = 1, .issigned=true, .isfloat=false} }
+i16, { TYPE_NUM, {.size = 2, .issigned=true, .isfloat=false} }
+i32, { TYPE_NUM, {.size = 4, .issigned=true, .isfloat=false} }
+i64, { TYPE_NUM, {.size = 8, .issigned=true, .isfloat=false} }
+i128, { TYPE_NUM, {.size = 16, .issigned=true, .isfloat=false} }
+int, { TYPE_NUM, {.size = 8, .issigned=true, .isfloat=false} }
+u8, { TYPE_NUM, {.size = 1, .issigned=false, .isfloat=false} }
+u16, { TYPE_NUM, {.size = 2, .issigned=false, .isfloat=false} }
+u32, { TYPE_NUM, {.size = 4, .issigned=false, .isfloat=false} }
+u64, { TYPE_NUM, {.size = 8, .issigned=false, .isfloat=false} }
+u128, { TYPE_NUM, {.size = 16, .issigned=false, .isfloat=false} }
+uint, { TYPE_NUM, {.size = 8, .issigned=false, .isfloat=false} }
+rune, { TYPE_NUM ,{.size = 4, .issigned=true, .isfloat=false} }
+f32, { TYPE_NUM, {.size = 4, .issigned=true, .isfloat=true } }
+f64, { TYPE_NUM, {.size = 8, .issigned=true, .isfloat=true } }
%%
const struct type *
typelookup(const uchar *p, size_t len)
diff --git a/src/strview.c b/src/strview.c
new file mode 100644
index 0000000..f9b80e4
--- /dev/null
+++ b/src/strview.c
@@ -0,0 +1,23 @@
+#include <string.h>
+
+#include "strview.h"
+#include "types.h"
+
+uint64_t
+strview_hash(struct strview sv)
+{
+ uint64_t h = 0x100;
+ for (size_t i = 0; i < sv.len; i++) {
+ h ^= sv.p[i];
+ h *= 1111111111111111111u;
+ }
+ return h;
+}
+
+uchar *
+svtocstr(uchar *dst, struct strview src)
+{
+ memcpy(dst, src.p, src.len);
+ dst[src.len] = 0;
+ return dst;
+}
diff --git a/src/strview.h b/src/strview.h
new file mode 100644
index 0000000..1153dcb
--- /dev/null
+++ b/src/strview.h
@@ -0,0 +1,39 @@
+#ifndef ORYX_STRVIEW_H
+#define ORYX_STRVIEW_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "types.h"
+
+struct strview {
+ const uchar *p;
+ size_t len;
+};
+
+/* Expand SV into arguments suitable for a call to printf() */
+#define SV_PRI_ARGS(sv) ((int)(sv).len), ((sv).p)
+
+/* Convert the string-literal S into a string-view */
+#define SV(s) ((struct strview){s, sizeof(s) - 1})
+
+/* Return the hash of SV */
+uint64_t strview_hash(struct strview sv);
+
+
+/* Copy the contents of SV to DST including a null terminator, and return DST */
+uchar *svtocstr(uchar *dst, struct strview sv)
+ __attribute__((returns_nonnull, nonnull));
+
+/* Return whether or not X and Y are equal */
+__attribute__((always_inline))
+static inline bool
+strview_eq(struct strview x, struct strview y)
+{
+ return x.len == y.len && memcmp(x.p, y.p, x.len) == 0;
+}
+
+
+#endif /* !ORYX_STRVIEW_H */
diff --git a/src/types.h b/src/types.h
index 443aa12..bd28e36 100644
--- a/src/types.h
+++ b/src/types.h
@@ -1,7 +1,6 @@
#ifndef ORYX_TYPES_H
#define ORYX_TYPES_H
-#include <stddef.h>
#include <stdint.h>
typedef uint32_t idx_t_;
@@ -10,9 +9,4 @@ typedef unsigned char uchar;
#define RUNE_C(x) UINT32_C(x)
-struct strview {
- const uchar *p;
- size_t len;
-};
-
#endif /* !ORYX_TYPES_H */