diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | src/alloc.h | 9 | ||||
| -rw-r--r-- | src/analyzer.c | 211 | ||||
| -rw-r--r-- | src/analyzer.h | 39 | ||||
| -rw-r--r-- | src/arena.c | 43 | ||||
| -rw-r--r-- | src/codegen.c | 1 | ||||
| -rw-r--r-- | src/common.h | 5 | ||||
| -rw-r--r-- | src/main.c | 2 | ||||
| -rw-r--r-- | src/parser.h | 2 | ||||
| -rw-r--r-- | src/primitives.gperf | 36 | 
10 files changed, 345 insertions, 4 deletions
| @@ -1,5 +1,6 @@  .cache/  compile_commands.json +*.gen.c  make  *.o  oryx diff --git a/src/alloc.h b/src/alloc.h index 004e6ec..770adba 100644 --- a/src/alloc.h +++ b/src/alloc.h @@ -16,9 +16,13 @@ void *bufalloc(void *ptr, size_t nmemb, size_t size)  /* Allocate a buffer of NMEMB elements of size SIZE with alignment ALIGN using     the arena-allocator A. */  void *arena_alloc(arena *a, size_t nmemb, size_t size, size_t align) -	__attribute__((returns_nonnull, warn_unused_result, malloc, +	__attribute__((returns_nonnull, nonnull, warn_unused_result, malloc,                     alloc_size(2, 3), alloc_align(4))); +void *_arena_grow(arena *a, void *ptr, size_t old_nmemb, size_t new_nmemb, +                  size_t size, size_t align) +	__attribute__((returns_nonnull, nonnull, warn_unused_result)); +  /* Deallocate all memory associated with the arena A. */  void arena_free(arena *a)  	__attribute__((nonnull)); @@ -26,4 +30,7 @@ void arena_free(arena *a)  /* Allocate a buffer of N elements of type T using the arena-allocator A. */  #define arena_new(a, T, n) ((T *)arena_alloc((a), (n), sizeof(T), alignof(T))) +#define arena_grow(a, p, T, on, nn)                                            \ +	((T *)_arena_grow((a), (p), (on), (nn), sizeof(T), alignof(T))) +  #endif /* !ORYX_ALLOC_H */ diff --git a/src/analyzer.c b/src/analyzer.c new file mode 100644 index 0000000..6687856 --- /dev/null +++ b/src/analyzer.c @@ -0,0 +1,211 @@ +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc.h" +#include "analyzer.h" +#include "common.h" +#include "errors.h" +#include "parser.h" +#include "types.h" + +struct environ { +	idx_t_ up; +	struct constdecl { +		struct type type; +		idx_t_ astidx; +	} *buf; +	size_t len, cap; +}; + +struct environs { +	struct environ *buf; +	size_t len, cap; +}; + +static struct environ *create_environments(struct ast, struct lexemes, +                                           struct environs *, idx_t_, idx_t_, +                                           arena *) +	__attribute__((nonnull)); +static void typecheck_environment(struct environs, struct ast, struct lexemes, +                                  idx_t_); + +/* Defined in primitives.gperf */ +const struct type *typelookup(const uchar *, size_t) +	__attribute__((nonnull)); + +void +analyzeast(struct ast ast, struct lexemes toks) +{ +	arena a = NULL; +	struct environs evs = {0}; + +	create_environments(ast, toks, &evs, 0, ast.len - 1, &a); +	for (size_t i = 0; i < evs.len; i++) +		typecheck_environment(evs, ast, toks, i); + +	arena_free(&a); +	free(evs.buf); +} + +struct environ * +create_environments(struct ast ast, struct lexemes toks, struct environs *evs, +                    idx_t_ beg, idx_t_ end, arena *a) +{ +	assert(evs != NULL); + +	if (evs->len == evs->cap) { +		evs->cap = evs->cap == 0 ? 8 : evs->cap * 2; +		evs->buf = bufalloc(evs->buf, evs->cap, sizeof(*evs->buf)); +	} + +	struct environ *ev = evs->buf + evs->len++; +	*ev = (struct environ){.cap = 16}; +	ev->buf = arena_new(a, struct constdecl, ev->cap); + +	for (idx_t_ i = beg; likely(i <= end); i++) { +		switch (ast.kinds[i]) { +		case ASTCDECL: { +			struct constdecl cd = {.astidx = i}; +			struct strview sv = toks.strs[ast.lexemes[i]]; + +			/* TODO: Sorted insert and existence check */ +			for (size_t i = 0; i < ev->len; i++) { +				struct strview sv2 = toks.strs[ast.lexemes[ev->buf[i].astidx]]; +				if (sv.len == sv2.len && memcmp(sv.p, sv2.p, sv.len) == 0) { +					err("analyzer: Constant ‘%.*s’ declared multiple times", +					    (int)sv.len, sv.p); +				} +			} + +			if (ev->len == ev->cap) { +				ev->buf = arena_grow(a, ev->buf, struct constdecl, ev->cap, +				                     ev->cap * 2); +				ev->cap *= 2; +			} +			ev->buf[ev->len++] = cd; +			break; +		} +		case ASTBLK: { +			idx_t_ lhs, rhs, up; +			lhs = ast.kids[i].lhs; +			rhs = ast.kids[i].rhs; +			up = ev - evs->buf; +			create_environments(ast, toks, evs, lhs, rhs, a)->up = up; +			i = rhs; +			break; +		} +		} +	} + +	return ev; +} + +static struct type typegrab(struct ast, struct lexemes, idx_t_); +static struct type typecheck_constdecl(struct environs, struct ast, +                                       struct lexemes, idx_t_, idx_t_); +static struct type typecheck_expr(struct environs, struct ast, struct lexemes, +                                  idx_t_, idx_t_); +static bool typecompat(struct type, struct type); + +void +typecheck_environment(struct environs evs, struct ast ast, struct lexemes toks, +                      idx_t_ i) +{ +	struct environ ev = evs.buf[i]; +	for (size_t j = 0; j < ev.len; j++) +		typecheck_constdecl(evs, ast, toks, j, i); +} + +struct type +typegrab(struct ast ast, struct lexemes toks, idx_t_ i) +{ +	struct strview sv = toks.strs[ast.lexemes[i]]; +	const struct type *tp = typelookup(sv.p, sv.len); +	if (tp == NULL) +		err("analyzer: Unknown type ‘%.*s’", (int)sv.len, sv.p); +	return *tp; +} + +struct type +typecheck_constdecl(struct environs evs, struct ast ast, struct lexemes toks, +                    idx_t_ i, idx_t_ evi) +{ +	struct environ ev = evs.buf[evi]; +	struct constdecl cd = ev.buf[i]; +	if (cd.type.kind != TYPE_UNSET) +		return cd.type; +	ev.buf[i].type.kind = TYPE_CHECKING; + +	struct pair p = ast.kids[cd.astidx]; +	struct type ltype, rtype; +	ltype.kind = TYPE_UNSET; + +	assert(p.rhs != AST_EMPTY); + +	if (p.lhs != AST_EMPTY) +		ltype = typegrab(ast, toks, p.lhs); +	rtype = typecheck_expr(evs, ast, toks, p.rhs, evi); + +	if (ltype.kind == TYPE_UNSET) +		ltype = rtype; +	else if (!typecompat(ltype, rtype)) +		err("analyzer: Type mismatch"); + +	return ev.buf[i].type = ltype; +} + +struct type +typecheck_expr(struct environs evs, struct ast ast, struct lexemes toks, +               idx_t_ i, idx_t_ evi) +{ +	switch (ast.kinds[i]) { +	case ASTNUMLIT: +		return (struct type){.kind = TYPE_INT_UNTYPED, .issigned = true}; +	case ASTIDENT: { +		struct environ ev = evs.buf[evi]; +		struct strview sv = toks.strs[ast.lexemes[i]]; + +		for (;;) { +			for (size_t i = 0; i < ev.len; i++) { +				struct strview sv2 = toks.strs[ast.lexemes[ev.buf[i].astidx]]; +				if (sv.len != sv2.len || memcmp(sv.p, sv2.p, sv.len) != 0) +					continue; +				struct type t = typecheck_constdecl(evs, ast, toks, i, evi); +				if (t.kind == TYPE_CHECKING) { +					err("analyzer: Circular dependency for type ‘%.*s’", +					    (int)sv2.len, sv2.p); +				} +				return t; +			} +			if (evi == 0) +				err("analyzer: Unknown constant ‘%.*s’", (int)sv.len, sv.p); +			evi = ev.up; +		} +	} +	case ASTFN: +		assert(!"not implemented"); +	default: +		err("analyzer: Unexpected AST kind %u", ast.kinds[i]); +		__builtin_unreachable(); +	} +} + +bool +typecompat(struct type lhs, struct type rhs) +{ +	if (lhs.kind == rhs.kind) +		return true; + +	/* TODO: Need to actually parse it!  256 should not coerce to i8. */ +	if (rhs.kind == TYPE_INT_UNTYPED) +		return true; + +	if (lhs.issigned != rhs.issigned) +		return false; + +	return lhs.size >= rhs.size; +} diff --git a/src/analyzer.h b/src/analyzer.h new file mode 100644 index 0000000..c3c9b57 --- /dev/null +++ b/src/analyzer.h @@ -0,0 +1,39 @@ +#ifndef ORYX_ANALYZER_H +#define ORYX_ANALYZER_H + +#include <stdint.h> + +#include "lexer.h" +#include "parser.h" + +enum { +	TYPE_UNSET = 0, +	TYPE_CHECKING = 1, + +	TYPE_I8, +	TYPE_I16, +	TYPE_I32, +	TYPE_I64, +	TYPE_INT, +	TYPE_INT_UNTYPED, + +	TYPE_U8, +	TYPE_U16, +	TYPE_U32, +	TYPE_U64, +	TYPE_UINT, + +	TYPE_RUNE, +}; + +typedef uint8_t type_kind_t_; + +struct type { +	type_kind_t_ kind; +	uint8_t size  : 7; /* bytes */ +	bool issigned : 1; +}; + +void analyzeast(struct ast, struct lexemes); + +#endif /* !ORYX_ANALYZER_H */ diff --git a/src/arena.c b/src/arena.c index 7ca5626..5b50404 100644 --- a/src/arena.c +++ b/src/arena.c @@ -5,6 +5,7 @@  #include <stddef.h>  #include <stdint.h>  #include <stdlib.h> +#include <string.h>  #include "alloc.h"  #include "common.h" @@ -69,6 +70,48 @@ arena_alloc(struct _arena **a, size_t nmemb, size_t size, size_t align)  	return p->data;  } +void * +_arena_grow(arena *a, void *ptr, size_t old_nmemb, size_t new_nmemb, +            size_t size, size_t align) +{ +	assert(IS_POW_2(align)); +	assert(new_nmemb * size != 0); + +	if (size > SIZE_MAX / new_nmemb) { +		errno = EOVERFLOW; +		err("%s:", __func__); +	} + +	size *= new_nmemb; + +	for (struct _arena *p = *a; p != NULL; p = p->next) { +		if (ptr < p->data || ptr > p->last) +			continue; + +		/* If we need to grow the given allocation, but it was the last +		   allocation made in a region, then we first see if we can just eat +		   more trailing free space in the region to avoid a memcpy(). */ +		if (ptr == p->last) { +			size_t rest = p->cap - p->len; +			size_t need = (new_nmemb - old_nmemb) * size; +			if (need <= rest) { +				p->len += need; +				return ptr; +			} +		} + +		void *dst = arena_alloc(a, new_nmemb, size, align); +		return memcpy(dst, ptr, old_nmemb * size); +	} + +#if DEBUG +	err("%s:%d: tried to resize pointer that wasn’t allocated", __func__, +	    __LINE__); +#else +	__builtin_unreachable(); +#endif +} +  void  arena_free(struct _arena **a)  { diff --git a/src/codegen.c b/src/codegen.c index 7f5d010..dd21ad7 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -20,6 +20,7 @@ static size_t codegenexpr(LLVMBuilderRef, struct ast, struct lexemes,  void  codegen(struct ast ast, struct lexemes toks)  { +	exit(EXIT_SUCCESS);  	LLVMModuleRef mod = LLVMModuleCreateWithName("oryx");  	for (size_t i = 0; i < ast.len;) { diff --git a/src/common.h b/src/common.h index be88f0b..5756c83 100644 --- a/src/common.h +++ b/src/common.h @@ -6,8 +6,9 @@  #	define unlikely(x) __builtin_expect(!!(x), 0)  #else  #	define __attribute__(x) -#	define likely(x)   (x) -#	define unlikely(x) (x) +#	define __builtin_unreachable() (((char *)0)[0] = 0) +#	define likely(x)               (x) +#	define unlikely(x)             (x)  #endif  #endif /* !ORYX_COMMON_H */ @@ -6,6 +6,7 @@  #include <unistd.h>  #include "alloc.h" +#include "analyzer.h"  #include "codegen.h"  #include "common.h"  #include "errors.h" @@ -28,6 +29,7 @@ main(int argc, char **argv)  	struct lexemes toks = lexstring(src, srclen);  	struct ast ast = parsetoks(toks); +	analyzeast(ast, toks);  	codegen(ast, toks);  #if DEBUG diff --git a/src/parser.h b/src/parser.h index 7883722..5ecf8ef 100644 --- a/src/parser.h +++ b/src/parser.h @@ -14,7 +14,7 @@ enum {  	   ‘x: lhs = rhs’ */  	ASTDECL, -	/* Constant declaration, lhs and rhs may be unused +	/* Constant declaration, lhs may be unused  	   ‘x: lhs : rhs’ */  	ASTCDECL, diff --git a/src/primitives.gperf b/src/primitives.gperf new file mode 100644 index 0000000..449a030 --- /dev/null +++ b/src/primitives.gperf @@ -0,0 +1,36 @@ +%compare-strncmp +%includes +%language=ANSI-C +%readonly-tables +%struct-type + +%{ +#include <stdbool.h> + +#include "analyzer.h" +#include "types.h" + +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wunused-parameter" +%} + +struct typeslot { char *name; struct type inner; }; +%% +i8,   { TYPE_I8,   1, true  } +i16,  { TYPE_I16,  2, true  } +i32,  { TYPE_I32,  4, true  } +i64,  { TYPE_I64,  8, true  } +int,  { TYPE_INT,  8, true  } +u8,   { TYPE_U8,   1, false } +u16,  { TYPE_U16,  2, false } +u32,  { TYPE_U32,  4, false } +u64,  { TYPE_U64,  8, false } +uint, { TYPE_UINT, 8, false } +rune, { TYPE_RUNE, 4, true  } +%% +const struct type * +typelookup(const uchar *p, size_t len) +{ +	const struct typeslot *tp = in_word_set(p, len); +	return tp == NULL ? NULL : &tp->inner; +} |