From 7e6ade6682c26d7ed40eaa22ae152686d44982d0 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 17 Jun 2024 16:21:07 +0200 Subject: Begin work on the static analyzer --- src/alloc.h | 9 ++- src/analyzer.c | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/analyzer.h | 39 ++++++++++ src/arena.c | 43 +++++++++++ src/codegen.c | 1 + src/common.h | 5 +- src/main.c | 2 + src/parser.h | 2 +- src/primitives.gperf | 36 +++++++++ 9 files changed, 344 insertions(+), 4 deletions(-) create mode 100644 src/analyzer.c create mode 100644 src/analyzer.h create mode 100644 src/primitives.gperf (limited to 'src') diff --git a/src/alloc.h b/src/alloc.h index 004e6ec..770adba 100644 --- a/src/alloc.h +++ b/src/alloc.h @@ -16,9 +16,13 @@ void *bufalloc(void *ptr, size_t nmemb, size_t size) /* Allocate a buffer of NMEMB elements of size SIZE with alignment ALIGN using the arena-allocator A. */ void *arena_alloc(arena *a, size_t nmemb, size_t size, size_t align) - __attribute__((returns_nonnull, warn_unused_result, malloc, + __attribute__((returns_nonnull, nonnull, warn_unused_result, malloc, alloc_size(2, 3), alloc_align(4))); +void *_arena_grow(arena *a, void *ptr, size_t old_nmemb, size_t new_nmemb, + size_t size, size_t align) + __attribute__((returns_nonnull, nonnull, warn_unused_result)); + /* Deallocate all memory associated with the arena A. */ void arena_free(arena *a) __attribute__((nonnull)); @@ -26,4 +30,7 @@ void arena_free(arena *a) /* Allocate a buffer of N elements of type T using the arena-allocator A. */ #define arena_new(a, T, n) ((T *)arena_alloc((a), (n), sizeof(T), alignof(T))) +#define arena_grow(a, p, T, on, nn) \ + ((T *)_arena_grow((a), (p), (on), (nn), sizeof(T), alignof(T))) + #endif /* !ORYX_ALLOC_H */ diff --git a/src/analyzer.c b/src/analyzer.c new file mode 100644 index 0000000..6687856 --- /dev/null +++ b/src/analyzer.c @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include +#include + +#include "alloc.h" +#include "analyzer.h" +#include "common.h" +#include "errors.h" +#include "parser.h" +#include "types.h" + +struct environ { + idx_t_ up; + struct constdecl { + struct type type; + idx_t_ astidx; + } *buf; + size_t len, cap; +}; + +struct environs { + struct environ *buf; + size_t len, cap; +}; + +static struct environ *create_environments(struct ast, struct lexemes, + struct environs *, idx_t_, idx_t_, + arena *) + __attribute__((nonnull)); +static void typecheck_environment(struct environs, struct ast, struct lexemes, + idx_t_); + +/* Defined in primitives.gperf */ +const struct type *typelookup(const uchar *, size_t) + __attribute__((nonnull)); + +void +analyzeast(struct ast ast, struct lexemes toks) +{ + arena a = NULL; + struct environs evs = {0}; + + create_environments(ast, toks, &evs, 0, ast.len - 1, &a); + for (size_t i = 0; i < evs.len; i++) + typecheck_environment(evs, ast, toks, i); + + arena_free(&a); + free(evs.buf); +} + +struct environ * +create_environments(struct ast ast, struct lexemes toks, struct environs *evs, + idx_t_ beg, idx_t_ end, arena *a) +{ + assert(evs != NULL); + + if (evs->len == evs->cap) { + evs->cap = evs->cap == 0 ? 8 : evs->cap * 2; + evs->buf = bufalloc(evs->buf, evs->cap, sizeof(*evs->buf)); + } + + struct environ *ev = evs->buf + evs->len++; + *ev = (struct environ){.cap = 16}; + ev->buf = arena_new(a, struct constdecl, ev->cap); + + for (idx_t_ i = beg; likely(i <= end); i++) { + switch (ast.kinds[i]) { + case ASTCDECL: { + struct constdecl cd = {.astidx = i}; + struct strview sv = toks.strs[ast.lexemes[i]]; + + /* TODO: Sorted insert and existence check */ + for (size_t i = 0; i < ev->len; i++) { + struct strview sv2 = toks.strs[ast.lexemes[ev->buf[i].astidx]]; + if (sv.len == sv2.len && memcmp(sv.p, sv2.p, sv.len) == 0) { + err("analyzer: Constant ‘%.*s’ declared multiple times", + (int)sv.len, sv.p); + } + } + + if (ev->len == ev->cap) { + ev->buf = arena_grow(a, ev->buf, struct constdecl, ev->cap, + ev->cap * 2); + ev->cap *= 2; + } + ev->buf[ev->len++] = cd; + break; + } + case ASTBLK: { + idx_t_ lhs, rhs, up; + lhs = ast.kids[i].lhs; + rhs = ast.kids[i].rhs; + up = ev - evs->buf; + create_environments(ast, toks, evs, lhs, rhs, a)->up = up; + i = rhs; + break; + } + } + } + + return ev; +} + +static struct type typegrab(struct ast, struct lexemes, idx_t_); +static struct type typecheck_constdecl(struct environs, struct ast, + struct lexemes, idx_t_, idx_t_); +static struct type typecheck_expr(struct environs, struct ast, struct lexemes, + idx_t_, idx_t_); +static bool typecompat(struct type, struct type); + +void +typecheck_environment(struct environs evs, struct ast ast, struct lexemes toks, + idx_t_ i) +{ + struct environ ev = evs.buf[i]; + for (size_t j = 0; j < ev.len; j++) + typecheck_constdecl(evs, ast, toks, j, i); +} + +struct type +typegrab(struct ast ast, struct lexemes toks, idx_t_ i) +{ + struct strview sv = toks.strs[ast.lexemes[i]]; + const struct type *tp = typelookup(sv.p, sv.len); + if (tp == NULL) + err("analyzer: Unknown type ‘%.*s’", (int)sv.len, sv.p); + return *tp; +} + +struct type +typecheck_constdecl(struct environs evs, struct ast ast, struct lexemes toks, + idx_t_ i, idx_t_ evi) +{ + struct environ ev = evs.buf[evi]; + struct constdecl cd = ev.buf[i]; + if (cd.type.kind != TYPE_UNSET) + return cd.type; + ev.buf[i].type.kind = TYPE_CHECKING; + + struct pair p = ast.kids[cd.astidx]; + struct type ltype, rtype; + ltype.kind = TYPE_UNSET; + + assert(p.rhs != AST_EMPTY); + + if (p.lhs != AST_EMPTY) + ltype = typegrab(ast, toks, p.lhs); + rtype = typecheck_expr(evs, ast, toks, p.rhs, evi); + + if (ltype.kind == TYPE_UNSET) + ltype = rtype; + else if (!typecompat(ltype, rtype)) + err("analyzer: Type mismatch"); + + return ev.buf[i].type = ltype; +} + +struct type +typecheck_expr(struct environs evs, struct ast ast, struct lexemes toks, + idx_t_ i, idx_t_ evi) +{ + switch (ast.kinds[i]) { + case ASTNUMLIT: + return (struct type){.kind = TYPE_INT_UNTYPED, .issigned = true}; + case ASTIDENT: { + struct environ ev = evs.buf[evi]; + struct strview sv = toks.strs[ast.lexemes[i]]; + + for (;;) { + for (size_t i = 0; i < ev.len; i++) { + struct strview sv2 = toks.strs[ast.lexemes[ev.buf[i].astidx]]; + if (sv.len != sv2.len || memcmp(sv.p, sv2.p, sv.len) != 0) + continue; + struct type t = typecheck_constdecl(evs, ast, toks, i, evi); + if (t.kind == TYPE_CHECKING) { + err("analyzer: Circular dependency for type ‘%.*s’", + (int)sv2.len, sv2.p); + } + return t; + } + if (evi == 0) + err("analyzer: Unknown constant ‘%.*s’", (int)sv.len, sv.p); + evi = ev.up; + } + } + case ASTFN: + assert(!"not implemented"); + default: + err("analyzer: Unexpected AST kind %u", ast.kinds[i]); + __builtin_unreachable(); + } +} + +bool +typecompat(struct type lhs, struct type rhs) +{ + if (lhs.kind == rhs.kind) + return true; + + /* TODO: Need to actually parse it! 256 should not coerce to i8. */ + if (rhs.kind == TYPE_INT_UNTYPED) + return true; + + if (lhs.issigned != rhs.issigned) + return false; + + return lhs.size >= rhs.size; +} diff --git a/src/analyzer.h b/src/analyzer.h new file mode 100644 index 0000000..c3c9b57 --- /dev/null +++ b/src/analyzer.h @@ -0,0 +1,39 @@ +#ifndef ORYX_ANALYZER_H +#define ORYX_ANALYZER_H + +#include + +#include "lexer.h" +#include "parser.h" + +enum { + TYPE_UNSET = 0, + TYPE_CHECKING = 1, + + TYPE_I8, + TYPE_I16, + TYPE_I32, + TYPE_I64, + TYPE_INT, + TYPE_INT_UNTYPED, + + TYPE_U8, + TYPE_U16, + TYPE_U32, + TYPE_U64, + TYPE_UINT, + + TYPE_RUNE, +}; + +typedef uint8_t type_kind_t_; + +struct type { + type_kind_t_ kind; + uint8_t size : 7; /* bytes */ + bool issigned : 1; +}; + +void analyzeast(struct ast, struct lexemes); + +#endif /* !ORYX_ANALYZER_H */ diff --git a/src/arena.c b/src/arena.c index 7ca5626..5b50404 100644 --- a/src/arena.c +++ b/src/arena.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "alloc.h" #include "common.h" @@ -69,6 +70,48 @@ arena_alloc(struct _arena **a, size_t nmemb, size_t size, size_t align) return p->data; } +void * +_arena_grow(arena *a, void *ptr, size_t old_nmemb, size_t new_nmemb, + size_t size, size_t align) +{ + assert(IS_POW_2(align)); + assert(new_nmemb * size != 0); + + if (size > SIZE_MAX / new_nmemb) { + errno = EOVERFLOW; + err("%s:", __func__); + } + + size *= new_nmemb; + + for (struct _arena *p = *a; p != NULL; p = p->next) { + if (ptr < p->data || ptr > p->last) + continue; + + /* If we need to grow the given allocation, but it was the last + allocation made in a region, then we first see if we can just eat + more trailing free space in the region to avoid a memcpy(). */ + if (ptr == p->last) { + size_t rest = p->cap - p->len; + size_t need = (new_nmemb - old_nmemb) * size; + if (need <= rest) { + p->len += need; + return ptr; + } + } + + void *dst = arena_alloc(a, new_nmemb, size, align); + return memcpy(dst, ptr, old_nmemb * size); + } + +#if DEBUG + err("%s:%d: tried to resize pointer that wasn’t allocated", __func__, + __LINE__); +#else + __builtin_unreachable(); +#endif +} + void arena_free(struct _arena **a) { diff --git a/src/codegen.c b/src/codegen.c index 7f5d010..dd21ad7 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -20,6 +20,7 @@ static size_t codegenexpr(LLVMBuilderRef, struct ast, struct lexemes, void codegen(struct ast ast, struct lexemes toks) { + exit(EXIT_SUCCESS); LLVMModuleRef mod = LLVMModuleCreateWithName("oryx"); for (size_t i = 0; i < ast.len;) { diff --git a/src/common.h b/src/common.h index be88f0b..5756c83 100644 --- a/src/common.h +++ b/src/common.h @@ -6,8 +6,9 @@ # define unlikely(x) __builtin_expect(!!(x), 0) #else # define __attribute__(x) -# define likely(x) (x) -# define unlikely(x) (x) +# define __builtin_unreachable() (((char *)0)[0] = 0) +# define likely(x) (x) +# define unlikely(x) (x) #endif #endif /* !ORYX_COMMON_H */ diff --git a/src/main.c b/src/main.c index f478b6d..b32a723 100644 --- a/src/main.c +++ b/src/main.c @@ -6,6 +6,7 @@ #include #include "alloc.h" +#include "analyzer.h" #include "codegen.h" #include "common.h" #include "errors.h" @@ -28,6 +29,7 @@ main(int argc, char **argv) struct lexemes toks = lexstring(src, srclen); struct ast ast = parsetoks(toks); + analyzeast(ast, toks); codegen(ast, toks); #if DEBUG diff --git a/src/parser.h b/src/parser.h index 7883722..5ecf8ef 100644 --- a/src/parser.h +++ b/src/parser.h @@ -14,7 +14,7 @@ enum { ‘x: lhs = rhs’ */ ASTDECL, - /* Constant declaration, lhs and rhs may be unused + /* Constant declaration, lhs may be unused ‘x: lhs : rhs’ */ ASTCDECL, diff --git a/src/primitives.gperf b/src/primitives.gperf new file mode 100644 index 0000000..449a030 --- /dev/null +++ b/src/primitives.gperf @@ -0,0 +1,36 @@ +%compare-strncmp +%includes +%language=ANSI-C +%readonly-tables +%struct-type + +%{ +#include + +#include "analyzer.h" +#include "types.h" + +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wunused-parameter" +%} + +struct typeslot { char *name; struct type inner; }; +%% +i8, { TYPE_I8, 1, true } +i16, { TYPE_I16, 2, true } +i32, { TYPE_I32, 4, true } +i64, { TYPE_I64, 8, true } +int, { TYPE_INT, 8, true } +u8, { TYPE_U8, 1, false } +u16, { TYPE_U16, 2, false } +u32, { TYPE_U32, 4, false } +u64, { TYPE_U64, 8, false } +uint, { TYPE_UINT, 8, false } +rune, { TYPE_RUNE, 4, true } +%% +const struct type * +typelookup(const uchar *p, size_t len) +{ + const struct typeslot *tp = in_word_set(p, len); + return tp == NULL ? NULL : &tp->inner; +} -- cgit v1.2.3