aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-17 16:21:07 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-17 16:21:07 +0200
commit7e6ade6682c26d7ed40eaa22ae152686d44982d0 (patch)
treef7ba34b0f1c130ea3c87f94cb4b9b3ed91bdcd37 /src
parent49b8874884185606319de5c44bc90c5e78d9f48f (diff)
Begin work on the static analyzer
Diffstat (limited to 'src')
-rw-r--r--src/alloc.h9
-rw-r--r--src/analyzer.c211
-rw-r--r--src/analyzer.h39
-rw-r--r--src/arena.c43
-rw-r--r--src/codegen.c1
-rw-r--r--src/common.h5
-rw-r--r--src/main.c2
-rw-r--r--src/parser.h2
-rw-r--r--src/primitives.gperf36
9 files changed, 344 insertions, 4 deletions
diff --git a/src/alloc.h b/src/alloc.h
index 004e6ec..770adba 100644
--- a/src/alloc.h
+++ b/src/alloc.h
@@ -16,9 +16,13 @@ void *bufalloc(void *ptr, size_t nmemb, size_t size)
/* Allocate a buffer of NMEMB elements of size SIZE with alignment ALIGN using
the arena-allocator A. */
void *arena_alloc(arena *a, size_t nmemb, size_t size, size_t align)
- __attribute__((returns_nonnull, warn_unused_result, malloc,
+ __attribute__((returns_nonnull, nonnull, warn_unused_result, malloc,
alloc_size(2, 3), alloc_align(4)));
+void *_arena_grow(arena *a, void *ptr, size_t old_nmemb, size_t new_nmemb,
+ size_t size, size_t align)
+ __attribute__((returns_nonnull, nonnull, warn_unused_result));
+
/* Deallocate all memory associated with the arena A. */
void arena_free(arena *a)
__attribute__((nonnull));
@@ -26,4 +30,7 @@ void arena_free(arena *a)
/* Allocate a buffer of N elements of type T using the arena-allocator A. */
#define arena_new(a, T, n) ((T *)arena_alloc((a), (n), sizeof(T), alignof(T)))
+#define arena_grow(a, p, T, on, nn) \
+ ((T *)_arena_grow((a), (p), (on), (nn), sizeof(T), alignof(T)))
+
#endif /* !ORYX_ALLOC_H */
diff --git a/src/analyzer.c b/src/analyzer.c
new file mode 100644
index 0000000..6687856
--- /dev/null
+++ b/src/analyzer.c
@@ -0,0 +1,211 @@
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc.h"
+#include "analyzer.h"
+#include "common.h"
+#include "errors.h"
+#include "parser.h"
+#include "types.h"
+
+struct environ {
+ idx_t_ up;
+ struct constdecl {
+ struct type type;
+ idx_t_ astidx;
+ } *buf;
+ size_t len, cap;
+};
+
+struct environs {
+ struct environ *buf;
+ size_t len, cap;
+};
+
+static struct environ *create_environments(struct ast, struct lexemes,
+ struct environs *, idx_t_, idx_t_,
+ arena *)
+ __attribute__((nonnull));
+static void typecheck_environment(struct environs, struct ast, struct lexemes,
+ idx_t_);
+
+/* Defined in primitives.gperf */
+const struct type *typelookup(const uchar *, size_t)
+ __attribute__((nonnull));
+
+void
+analyzeast(struct ast ast, struct lexemes toks)
+{
+ arena a = NULL;
+ struct environs evs = {0};
+
+ create_environments(ast, toks, &evs, 0, ast.len - 1, &a);
+ for (size_t i = 0; i < evs.len; i++)
+ typecheck_environment(evs, ast, toks, i);
+
+ arena_free(&a);
+ free(evs.buf);
+}
+
+struct environ *
+create_environments(struct ast ast, struct lexemes toks, struct environs *evs,
+ idx_t_ beg, idx_t_ end, arena *a)
+{
+ assert(evs != NULL);
+
+ if (evs->len == evs->cap) {
+ evs->cap = evs->cap == 0 ? 8 : evs->cap * 2;
+ evs->buf = bufalloc(evs->buf, evs->cap, sizeof(*evs->buf));
+ }
+
+ struct environ *ev = evs->buf + evs->len++;
+ *ev = (struct environ){.cap = 16};
+ ev->buf = arena_new(a, struct constdecl, ev->cap);
+
+ for (idx_t_ i = beg; likely(i <= end); i++) {
+ switch (ast.kinds[i]) {
+ case ASTCDECL: {
+ struct constdecl cd = {.astidx = i};
+ struct strview sv = toks.strs[ast.lexemes[i]];
+
+ /* TODO: Sorted insert and existence check */
+ for (size_t i = 0; i < ev->len; i++) {
+ struct strview sv2 = toks.strs[ast.lexemes[ev->buf[i].astidx]];
+ if (sv.len == sv2.len && memcmp(sv.p, sv2.p, sv.len) == 0) {
+ err("analyzer: Constant ‘%.*s’ declared multiple times",
+ (int)sv.len, sv.p);
+ }
+ }
+
+ if (ev->len == ev->cap) {
+ ev->buf = arena_grow(a, ev->buf, struct constdecl, ev->cap,
+ ev->cap * 2);
+ ev->cap *= 2;
+ }
+ ev->buf[ev->len++] = cd;
+ break;
+ }
+ case ASTBLK: {
+ idx_t_ lhs, rhs, up;
+ lhs = ast.kids[i].lhs;
+ rhs = ast.kids[i].rhs;
+ up = ev - evs->buf;
+ create_environments(ast, toks, evs, lhs, rhs, a)->up = up;
+ i = rhs;
+ break;
+ }
+ }
+ }
+
+ return ev;
+}
+
+static struct type typegrab(struct ast, struct lexemes, idx_t_);
+static struct type typecheck_constdecl(struct environs, struct ast,
+ struct lexemes, idx_t_, idx_t_);
+static struct type typecheck_expr(struct environs, struct ast, struct lexemes,
+ idx_t_, idx_t_);
+static bool typecompat(struct type, struct type);
+
+void
+typecheck_environment(struct environs evs, struct ast ast, struct lexemes toks,
+ idx_t_ i)
+{
+ struct environ ev = evs.buf[i];
+ for (size_t j = 0; j < ev.len; j++)
+ typecheck_constdecl(evs, ast, toks, j, i);
+}
+
+struct type
+typegrab(struct ast ast, struct lexemes toks, idx_t_ i)
+{
+ struct strview sv = toks.strs[ast.lexemes[i]];
+ const struct type *tp = typelookup(sv.p, sv.len);
+ if (tp == NULL)
+ err("analyzer: Unknown type ‘%.*s’", (int)sv.len, sv.p);
+ return *tp;
+}
+
+struct type
+typecheck_constdecl(struct environs evs, struct ast ast, struct lexemes toks,
+ idx_t_ i, idx_t_ evi)
+{
+ struct environ ev = evs.buf[evi];
+ struct constdecl cd = ev.buf[i];
+ if (cd.type.kind != TYPE_UNSET)
+ return cd.type;
+ ev.buf[i].type.kind = TYPE_CHECKING;
+
+ struct pair p = ast.kids[cd.astidx];
+ struct type ltype, rtype;
+ ltype.kind = TYPE_UNSET;
+
+ assert(p.rhs != AST_EMPTY);
+
+ if (p.lhs != AST_EMPTY)
+ ltype = typegrab(ast, toks, p.lhs);
+ rtype = typecheck_expr(evs, ast, toks, p.rhs, evi);
+
+ if (ltype.kind == TYPE_UNSET)
+ ltype = rtype;
+ else if (!typecompat(ltype, rtype))
+ err("analyzer: Type mismatch");
+
+ return ev.buf[i].type = ltype;
+}
+
+struct type
+typecheck_expr(struct environs evs, struct ast ast, struct lexemes toks,
+ idx_t_ i, idx_t_ evi)
+{
+ switch (ast.kinds[i]) {
+ case ASTNUMLIT:
+ return (struct type){.kind = TYPE_INT_UNTYPED, .issigned = true};
+ case ASTIDENT: {
+ struct environ ev = evs.buf[evi];
+ struct strview sv = toks.strs[ast.lexemes[i]];
+
+ for (;;) {
+ for (size_t i = 0; i < ev.len; i++) {
+ struct strview sv2 = toks.strs[ast.lexemes[ev.buf[i].astidx]];
+ if (sv.len != sv2.len || memcmp(sv.p, sv2.p, sv.len) != 0)
+ continue;
+ struct type t = typecheck_constdecl(evs, ast, toks, i, evi);
+ if (t.kind == TYPE_CHECKING) {
+ err("analyzer: Circular dependency for type ‘%.*s’",
+ (int)sv2.len, sv2.p);
+ }
+ return t;
+ }
+ if (evi == 0)
+ err("analyzer: Unknown constant ‘%.*s’", (int)sv.len, sv.p);
+ evi = ev.up;
+ }
+ }
+ case ASTFN:
+ assert(!"not implemented");
+ default:
+ err("analyzer: Unexpected AST kind %u", ast.kinds[i]);
+ __builtin_unreachable();
+ }
+}
+
+bool
+typecompat(struct type lhs, struct type rhs)
+{
+ if (lhs.kind == rhs.kind)
+ return true;
+
+ /* TODO: Need to actually parse it! 256 should not coerce to i8. */
+ if (rhs.kind == TYPE_INT_UNTYPED)
+ return true;
+
+ if (lhs.issigned != rhs.issigned)
+ return false;
+
+ return lhs.size >= rhs.size;
+}
diff --git a/src/analyzer.h b/src/analyzer.h
new file mode 100644
index 0000000..c3c9b57
--- /dev/null
+++ b/src/analyzer.h
@@ -0,0 +1,39 @@
+#ifndef ORYX_ANALYZER_H
+#define ORYX_ANALYZER_H
+
+#include <stdint.h>
+
+#include "lexer.h"
+#include "parser.h"
+
+enum {
+ TYPE_UNSET = 0,
+ TYPE_CHECKING = 1,
+
+ TYPE_I8,
+ TYPE_I16,
+ TYPE_I32,
+ TYPE_I64,
+ TYPE_INT,
+ TYPE_INT_UNTYPED,
+
+ TYPE_U8,
+ TYPE_U16,
+ TYPE_U32,
+ TYPE_U64,
+ TYPE_UINT,
+
+ TYPE_RUNE,
+};
+
+typedef uint8_t type_kind_t_;
+
+struct type {
+ type_kind_t_ kind;
+ uint8_t size : 7; /* bytes */
+ bool issigned : 1;
+};
+
+void analyzeast(struct ast, struct lexemes);
+
+#endif /* !ORYX_ANALYZER_H */
diff --git a/src/arena.c b/src/arena.c
index 7ca5626..5b50404 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -5,6 +5,7 @@
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
+#include <string.h>
#include "alloc.h"
#include "common.h"
@@ -69,6 +70,48 @@ arena_alloc(struct _arena **a, size_t nmemb, size_t size, size_t align)
return p->data;
}
+void *
+_arena_grow(arena *a, void *ptr, size_t old_nmemb, size_t new_nmemb,
+ size_t size, size_t align)
+{
+ assert(IS_POW_2(align));
+ assert(new_nmemb * size != 0);
+
+ if (size > SIZE_MAX / new_nmemb) {
+ errno = EOVERFLOW;
+ err("%s:", __func__);
+ }
+
+ size *= new_nmemb;
+
+ for (struct _arena *p = *a; p != NULL; p = p->next) {
+ if (ptr < p->data || ptr > p->last)
+ continue;
+
+ /* If we need to grow the given allocation, but it was the last
+ allocation made in a region, then we first see if we can just eat
+ more trailing free space in the region to avoid a memcpy(). */
+ if (ptr == p->last) {
+ size_t rest = p->cap - p->len;
+ size_t need = (new_nmemb - old_nmemb) * size;
+ if (need <= rest) {
+ p->len += need;
+ return ptr;
+ }
+ }
+
+ void *dst = arena_alloc(a, new_nmemb, size, align);
+ return memcpy(dst, ptr, old_nmemb * size);
+ }
+
+#if DEBUG
+ err("%s:%d: tried to resize pointer that wasn’t allocated", __func__,
+ __LINE__);
+#else
+ __builtin_unreachable();
+#endif
+}
+
void
arena_free(struct _arena **a)
{
diff --git a/src/codegen.c b/src/codegen.c
index 7f5d010..dd21ad7 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -20,6 +20,7 @@ static size_t codegenexpr(LLVMBuilderRef, struct ast, struct lexemes,
void
codegen(struct ast ast, struct lexemes toks)
{
+ exit(EXIT_SUCCESS);
LLVMModuleRef mod = LLVMModuleCreateWithName("oryx");
for (size_t i = 0; i < ast.len;) {
diff --git a/src/common.h b/src/common.h
index be88f0b..5756c83 100644
--- a/src/common.h
+++ b/src/common.h
@@ -6,8 +6,9 @@
# define unlikely(x) __builtin_expect(!!(x), 0)
#else
# define __attribute__(x)
-# define likely(x) (x)
-# define unlikely(x) (x)
+# define __builtin_unreachable() (((char *)0)[0] = 0)
+# define likely(x) (x)
+# define unlikely(x) (x)
#endif
#endif /* !ORYX_COMMON_H */
diff --git a/src/main.c b/src/main.c
index f478b6d..b32a723 100644
--- a/src/main.c
+++ b/src/main.c
@@ -6,6 +6,7 @@
#include <unistd.h>
#include "alloc.h"
+#include "analyzer.h"
#include "codegen.h"
#include "common.h"
#include "errors.h"
@@ -28,6 +29,7 @@ main(int argc, char **argv)
struct lexemes toks = lexstring(src, srclen);
struct ast ast = parsetoks(toks);
+ analyzeast(ast, toks);
codegen(ast, toks);
#if DEBUG
diff --git a/src/parser.h b/src/parser.h
index 7883722..5ecf8ef 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -14,7 +14,7 @@ enum {
‘x: lhs = rhs’ */
ASTDECL,
- /* Constant declaration, lhs and rhs may be unused
+ /* Constant declaration, lhs may be unused
‘x: lhs : rhs’ */
ASTCDECL,
diff --git a/src/primitives.gperf b/src/primitives.gperf
new file mode 100644
index 0000000..449a030
--- /dev/null
+++ b/src/primitives.gperf
@@ -0,0 +1,36 @@
+%compare-strncmp
+%includes
+%language=ANSI-C
+%readonly-tables
+%struct-type
+
+%{
+#include <stdbool.h>
+
+#include "analyzer.h"
+#include "types.h"
+
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+%}
+
+struct typeslot { char *name; struct type inner; };
+%%
+i8, { TYPE_I8, 1, true }
+i16, { TYPE_I16, 2, true }
+i32, { TYPE_I32, 4, true }
+i64, { TYPE_I64, 8, true }
+int, { TYPE_INT, 8, true }
+u8, { TYPE_U8, 1, false }
+u16, { TYPE_U16, 2, false }
+u32, { TYPE_U32, 4, false }
+u64, { TYPE_U64, 8, false }
+uint, { TYPE_UINT, 8, false }
+rune, { TYPE_RUNE, 4, true }
+%%
+const struct type *
+typelookup(const uchar *p, size_t len)
+{
+ const struct typeslot *tp = in_word_set(p, len);
+ return tp == NULL ? NULL : &tp->inner;
+}