From d3c95ef09fd493241273d6e63aca31d703c2503c Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sat, 6 Jul 2024 03:33:04 +0200 Subject: Implement booleans --- .gitignore | 3 +- grammar.ebnf | 2 + src/analyzer.c | 199 +++++++++++++++++++++++++++++++++++++++++---------------- src/analyzer.h | 14 +++- src/bitset.h | 38 +++++++++++ src/codegen.c | 53 ++++++++++----- src/codegen.h | 4 +- src/lexer.c | 17 +++-- src/lexer.h | 8 ++- src/main.c | 12 ++-- src/parser.c | 18 +++++- src/parser.h | 8 +++ src/symtab.c | 48 -------------- src/symtab.h | 46 ------------- src/tables.c | 48 ++++++++++++++ src/tables.h | 46 +++++++++++++ test.yx | 133 -------------------------------------- 17 files changed, 377 insertions(+), 320 deletions(-) create mode 100644 src/bitset.h delete mode 100644 src/symtab.c delete mode 100644 src/symtab.h create mode 100644 src/tables.c create mode 100644 src/tables.h delete mode 100644 test.yx diff --git a/.gitignore b/.gitignore index 45a74dd..ad32da2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,10 +9,11 @@ oryx *.gen.c *.o -# Test files. Autogenerated with the following: +# Test files. Partially autogenerated with the following: # # ./make; find test -type f -executable # +test.* test/arena # Compilation artifacts of GNU MP. Autogenerated with the following: diff --git a/grammar.ebnf b/grammar.ebnf index a5f5e50..b28e0ac 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -54,12 +54,14 @@ unop binop : '+' + | '!=' | '%' | '&' | '*' | '-' | '/' | '<<' + | '==' | '>>' | '|' | '~' diff --git a/src/analyzer.c b/src/analyzer.c index 6dff9ff..f40d12b 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -12,11 +12,12 @@ #include "alloc.h" #include "analyzer.h" +#include "bitset.h" #include "common.h" #include "errors.h" #include "parser.h" #include "strview.h" -#include "symtab.h" +#include "tables.h" #include "types.h" #define LOG2_10 (3.321928) @@ -41,11 +42,12 @@ struct azctx { ast_t ast; aux_t aux; + bitset_t *cnst; + fold_t *folds; lexemes_t toks; - mpq_t *folds; scopes_t scps; - type_t **types; typetab_t *ttab; + type_t **types; /* The return type of the function being analyzed */ type_t *fnret; @@ -100,6 +102,9 @@ static bool typecompat(type_t *t1, type_t *t2) /* Check if the statement at node I in the AST returns from the function */ static bool returns(ast_t, idx_t i); +static bool memzeroed(void *, size_t) + __attribute__((nonnull)); + enum { PRIM_INT8, PRIM_INT16, @@ -115,6 +120,8 @@ enum { PRIM_UINT128, PRIM_UINT, + PRIM_BOOL, + PRIM_RUNE, PRIM_F16, @@ -143,6 +150,8 @@ static struct { [PRIM_UINT64] = {SVC("u64"), {.kind = TYPE_NUM, .size = 8}}, [PRIM_UINT128] = {SVC("u128"), {.kind = TYPE_NUM, .size = 16}}, + [PRIM_BOOL] = {SVC("bool"), {.kind = TYPE_BOOL, .size = 1}}, + [PRIM_RUNE] = {SVC("rune"), {.kind = TYPE_NUM, .size = 4, .issigned = true}}, [PRIM_F16] = {SVC("f16"), {.kind = TYPE_NUM, .size = 2, .isfloat = true}}, @@ -151,13 +160,14 @@ static struct { [PRIM_F128] = {SVC("f128"), {.kind = TYPE_NUM, .size = 16, .isfloat = true}}, }; -static type_t NOT_CHECKED = {.kind = TYPE_CHECKING}; -static type_t UNTYPED_INT = {.kind = TYPE_NUM, .size = 0}; -static type_t UNTYPED_FLT = {.kind = TYPE_NUM, .size = 0, .isfloat = true}; +static type_t NOT_CHECKED = {.kind = TYPE_CHECKING}; +static type_t UNTYPED_BOOL = {.kind = TYPE_BOOL}; +static type_t UNTYPED_INT = {.kind = TYPE_NUM}; +static type_t UNTYPED_FLT = {.kind = TYPE_NUM, .isfloat = true}; type_t ** analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, scope_t **scps, - mpq_t **folds) + fold_t **folds, bitset_t **cnst) { struct azctx ctx = { .a = a, @@ -174,15 +184,17 @@ analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, scope_t **scps, if ((ctx.types = calloc(ctx.ast.len, sizeof(*ctx.types))) == NULL) err("calloc:"); + ctx.cnst = mkbitset(ctx.a, ctx.ast.len); gensymtabs(&ctx); analyzeast(&ctx); - if ((ctx.folds = calloc(ctx.ast.len, sizeof(**ctx.folds))) == NULL) + if ((ctx.folds = calloc(ctx.ast.len, sizeof(fold_t))) == NULL) err("calloc:"); constfold(&ctx); - *scps = ctx.scps.buf; + *cnst = ctx.cnst; *folds = ctx.folds; + *scps = ctx.scps.buf; return ctx.types; } @@ -247,6 +259,9 @@ analyzedecl(struct azctx *ctx, idx_t i) bool isstatic = ctx->ast.kinds[i] <= _AST_DECLS_END && ctx->aux.buf[ctx->ast.kids[i].lhs].decl.isstatic; + if (isconst) + SETBIT(ctx->cnst, i); + if (isstatic && isundef) err("analyzer: Static variables may not be undefined"); @@ -289,8 +304,16 @@ analyzedecl(struct azctx *ctx, idx_t i) if (ltype == NULL) { ltype = rtype; - if (ctx->ast.kinds[i] == ASTDECL && rtype->size == 0) - ltype = &primitives[rtype == &UNTYPED_INT ? PRIM_INT : PRIM_F64].t; + if (ctx->ast.kinds[i] == ASTDECL && rtype->size == 0) { + if (rtype == &UNTYPED_INT) + ltype = &primitives[PRIM_INT].t; + else if (rtype == &UNTYPED_FLT) + ltype = &primitives[PRIM_F64].t; + else if (rtype == &UNTYPED_BOOL) + ltype = &primitives[PRIM_BOOL].t; + else + __builtin_unreachable(); + } } else if (rtype != NULL && !typecompat(ltype, rtype)) err("analyzer: Type mismatch"); @@ -346,6 +369,7 @@ analyzeexpr(struct azctx *ctx, idx_t i) switch (ctx->ast.kinds[i]) { case ASTNUMLIT: { + SETBIT(ctx->cnst, i); strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]]; ctx->types[i] = memchr(sv.p, '.', sv.len) != NULL ? &UNTYPED_FLT : &UNTYPED_INT; @@ -375,6 +399,7 @@ analyzeexpr(struct azctx *ctx, idx_t i) SV_PRI_ARGS(sv)); } + SET_DST_IF_SRC(ctx->cnst, i, sym->i); ctx->types[i] = ctx->types[sym->i]; return fwdnode(ctx->ast, i); } @@ -387,6 +412,7 @@ analyzeexpr(struct azctx *ctx, idx_t i) idx_t ni, rhs; rhs = ctx->ast.kids[i].rhs; ni = analyzeexpr(ctx, rhs); + SET_DST_IF_SRC(ctx->cnst, i, rhs); type_t *t = ctx->types[rhs]; if (ctx->ast.kinds[i] == ASTUNNEG && (t->kind != TYPE_NUM || !t->issigned)) @@ -405,9 +431,11 @@ analyzeexpr(struct azctx *ctx, idx_t i) case ASTBINADD: case ASTBINAND: case ASTBINDIV: + case ASTBINEQ: + case ASTBINIOR: case ASTBINMOD: case ASTBINMUL: - case ASTBINIOR: + case ASTBINNEQ: case ASTBINSHL: case ASTBINSHR: case ASTBINSUB: @@ -418,6 +446,9 @@ analyzeexpr(struct azctx *ctx, idx_t i) (void)analyzeexpr(ctx, lhs); idx_t ni = analyzeexpr(ctx, rhs); + if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) + SETBIT(ctx->cnst, i); + bool isshift = ctx->ast.kinds[i] == ASTBINSHL || ctx->ast.kinds[i] == ASTBINSHR; if (!isshift && !typecompat(ctx->types[lhs], ctx->types[rhs])) @@ -428,6 +459,11 @@ analyzeexpr(struct azctx *ctx, idx_t i) [ASTBINSHL] = true, [ASTBINSHR] = true, [ASTBINXOR] = true, }; + static const bool logical[UINT8_MAX + 1] = { + [ASTBINEQ] = true, + [ASTBINNEQ] = true, + }; + if (int_only[ctx->ast.kinds[i]] && (ctx->types[lhs]->kind != TYPE_NUM || ctx->types[lhs]->isfloat || ctx->types[rhs]->kind != TYPE_NUM @@ -448,13 +484,16 @@ analyzeexpr(struct azctx *ctx, idx_t i) There is an exception for the left- and right shift operators. Expressions for these operators always take the type of x, and y can be any integer type. */ - if (isshift) + if (isshift) { ctx->types[i] = ctx->types[lhs]; - else { - ctx->types[i] = ctx->types[lhs]->size != 0 ? ctx->types[lhs] - : ctx->types[rhs]; - ctx->types[i]->isfloat = ctx->types[lhs]->isfloat - || ctx->types[rhs]->isfloat; + } else if (logical[ctx->ast.kinds[i]]) { + ctx->types[i] = TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs) + ? &UNTYPED_BOOL + : &primitives[PRIM_BOOL].t; + } else if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) { + ctx->types[i] = ctx->types[ctx->types[lhs]->isfloat ? lhs : rhs]; + } else { + ctx->types[i] = ctx->types[TESTBIT(ctx->cnst, lhs) ? rhs : lhs]; } return ni; } @@ -549,7 +588,7 @@ constfoldblk(struct azctx *ctx, idx_t i) idx_t constfoldexpr(struct azctx *ctx, type_t *T, idx_t i) { - if (MPQ_IS_INIT(ctx->folds[i])) + if (!memzeroed(ctx->folds[i].data, sizeof(ctx->folds[i].data))) return fwdnode(ctx->ast, i); idx_t ni; @@ -559,7 +598,7 @@ constfoldexpr(struct azctx *ctx, type_t *T, idx_t i) case ASTFN: return constfoldblk(ctx, ctx->ast.kids[i].rhs); case ASTNUMLIT: { - mpq_init(ctx->folds[i]); + mpq_init(ctx->folds[i].q); strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]]; char *buf = tmpalloc(ctx->s, sv.len + 1, 1); @@ -589,13 +628,13 @@ constfoldexpr(struct azctx *ctx, type_t *T, idx_t i) #endif mpf_set_str(x, buf, 10); assert(ret == 0); - mpq_set_f(ctx->folds[i], x); + mpq_set_f(ctx->folds[i].q, x); mpf_clear(x); } else { #if DEBUG int ret = #endif - mpq_set_str(ctx->folds[i], buf, 10); + mpq_set_str(ctx->folds[i].q, buf, 10); assert(ret == 0); } ni = fwdnode(ctx->ast, i); @@ -626,12 +665,14 @@ constfoldexpr(struct azctx *ctx, type_t *T, idx_t i) case ASTCDECL: { idx_t expr = ctx->ast.kids[sym->i].rhs; assert(expr != AST_EMPTY); - if (!MPQ_IS_INIT(ctx->folds[i])) { + if (memzeroed(ctx->folds[i].data, sizeof(ctx->folds[i].data))) { ctx->si = lvl; (void)constfolddecl(ctx, sym->i); } - MPQCPY(ctx->folds[i], ctx->folds[expr]); - assert(MPQ_IS_INIT(ctx->folds[i])); + if (ctx->types[i]->kind == TYPE_NUM) + MPQCPY(ctx->folds[i].q, ctx->folds[expr].q); + else if (ctx->types[i]->kind == TYPE_BOOL) + ctx->folds[i].b = ctx->folds[expr].b; ni = fwdnode(ctx->ast, i); goto out; } @@ -645,17 +686,16 @@ out: } case ASTUNCMPL: { ni = constfoldexpr(ctx, ctx->types[i], ctx->ast.kids[i].rhs); - if (MPQ_IS_INIT(ctx->folds[ctx->ast.kids[i].rhs])) + if (TESTBIT(ctx->cnst, ctx->ast.kids[i].rhs)) err("analyzer: Cannot perform bitwise complement of constant"); break; } case ASTUNNEG: { idx_t rhs = ctx->ast.kids[i].rhs; ni = constfoldexpr(ctx, ctx->types[i], rhs); - mpq_t *x = ctx->folds + rhs; - if (MPQ_IS_INIT(*x)) { - MPQCPY(ctx->folds[i], *x); - mpq_neg(ctx->folds[i], ctx->folds[i]); + if (TESTBIT(ctx->cnst, rhs)) { + MPQCPY(ctx->folds[i].q, ctx->folds[rhs].q); + mpq_neg(ctx->folds[i].q, ctx->folds[i].q); } break; } @@ -673,10 +713,10 @@ out: rhs = ctx->ast.kids[i].rhs; (void)constfoldexpr(ctx, ctx->types[i], lhs); ni = constfoldexpr(ctx, ctx->types[i], rhs); - if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) { - mpq_init(ctx->folds[i]); - mpq_fns[ctx->ast.kinds[i]](ctx->folds[i], ctx->folds[lhs], - ctx->folds[rhs]); + if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) { + mpq_init(ctx->folds[i].q); + mpq_fns[ctx->ast.kinds[i]](ctx->folds[i].q, ctx->folds[lhs].q, + ctx->folds[rhs].q); } break; } @@ -688,13 +728,14 @@ out: (void)constfoldexpr(ctx, ctx->types[i], lhs); ni = constfoldexpr(ctx, ctx->types[i], rhs); - if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) { - mpq_init(ctx->folds[i]); + if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) { + mpq_init(ctx->folds[i].q); if (ctx->types[i]->isfloat) - mpq_div(ctx->folds[i], ctx->folds[lhs], ctx->folds[rhs]); + mpq_div(ctx->folds[i].q, ctx->folds[lhs].q, ctx->folds[rhs].q); else { - mpz_tdiv_q(mpq_numref(ctx->folds[i]), mpq_numref(ctx->folds[lhs]), - mpq_numref(ctx->folds[rhs])); + mpz_tdiv_q(mpq_numref(ctx->folds[i].q), + mpq_numref(ctx->folds[lhs].q), + mpq_numref(ctx->folds[rhs].q)); } } break; @@ -718,12 +759,13 @@ out: (void)constfoldexpr(ctx, ctx->types[i], lhs); ni = constfoldexpr(ctx, ctx->types[i], rhs); - if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) { - assert(MPQ_IS_WHOLE(ctx->folds[lhs])); - assert(MPQ_IS_WHOLE(ctx->folds[rhs])); - mpq_init(ctx->folds[i]); - mpz_fns[ctx->ast.kinds[i]](mpq_numref(ctx->folds[i]), mpq_numref(ctx->folds[lhs]), - mpq_numref(ctx->folds[rhs])); + if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) { + assert(MPQ_IS_WHOLE(ctx->folds[lhs].q)); + assert(MPQ_IS_WHOLE(ctx->folds[rhs].q)); + mpq_init(ctx->folds[i].q); + mpz_fns[ctx->ast.kinds[i]](mpq_numref(ctx->folds[i].q), + mpq_numref(ctx->folds[lhs].q), + mpq_numref(ctx->folds[rhs].q)); } break; } @@ -742,18 +784,19 @@ out: (void)constfoldexpr(ctx, ctx->types[lhs], lhs); ni = constfoldexpr(ctx, ctx->types[rhs], rhs); - if (MPQ_IS_INIT(ctx->folds[rhs])) { - if (mpq_sgn(ctx->folds[rhs]) == -1) + if (TESTBIT(ctx->cnst, rhs)) { + if (mpq_sgn(ctx->folds[rhs].q) == -1) err("analyzer: Cannot shift by negative value"); + /* TODO: Assert that in X<folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) { + if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) { mpz_ptr cur_z, lhs_z, rhs_z; - cur_z = mpq_numref(ctx->folds[i]); - lhs_z = mpq_numref(ctx->folds[lhs]); - rhs_z = mpq_numref(ctx->folds[rhs]); + cur_z = mpq_numref(ctx->folds[i].q); + lhs_z = mpq_numref(ctx->folds[lhs].q); + rhs_z = mpq_numref(ctx->folds[rhs].q); - mpq_init(ctx->folds[i]); + mpq_init(ctx->folds[i].q); if (mpz_cmp_ui(rhs_z, ULONG_MAX) > 0) err("analyzer: Shift oprand too large"); mp_bitcnt_t shftcnt = mpz_get_ui(rhs_z); @@ -761,19 +804,52 @@ out: } break; } + case ASTBINEQ: + case ASTBINNEQ: { + idx_t lhs, rhs; + lhs = ctx->ast.kids[i].lhs; + rhs = ctx->ast.kids[i].rhs; + + (void)constfoldexpr(ctx, ctx->types[i], lhs); + ni = constfoldexpr(ctx, ctx->types[i], rhs); + + bool both_oprs_const = TESTBIT(ctx->cnst, lhs) + && TESTBIT(ctx->cnst, rhs); + bool can_constfold_ints = ctx->types[lhs]->kind == TYPE_NUM + && both_oprs_const; + bool can_constfold_bools = ctx->types[lhs]->kind == TYPE_BOOL + && both_oprs_const; + + bool eq = can_constfold_ints + ? mpq_equal(ctx->folds[lhs].q, ctx->folds[rhs].q) + : can_constfold_bools ? ctx->folds[lhs].b == ctx->folds[rhs].b + : false; + + if (can_constfold_ints || can_constfold_bools) { + ctx->folds[i].set = true; + ctx->folds[i].b = (ctx->ast.kinds[i] == ASTBINEQ && eq) + || (ctx->ast.kinds[i] == ASTBINNEQ && !eq); + } + break; + } default: __builtin_unreachable(); } - if (MPQ_IS_INIT(ctx->folds[i]) && !T->issigned && mpq_sgn(ctx->folds[i]) == -1) + if (ctx->types[i]->kind == TYPE_NUM && TESTBIT(ctx->cnst, i) + && !T->issigned && mpq_sgn(ctx->folds[i].q) == -1) + { err("analyzer: Cannot convert negative value to unsigned type"); + } - if (T->size != 0 && !T->isfloat && MPQ_IS_INIT(ctx->folds[i])) { - if (!MPQ_IS_WHOLE(ctx->folds[i])) + if (ctx->types[i]->kind == TYPE_NUM && TESTBIT(ctx->cnst, i) + && T->size != 0 && !T->isfloat) + { + if (!MPQ_IS_WHOLE(ctx->folds[i].q)) err("analyzer: Invalid integer"); int cmp; - mpz_ptr num = mpq_numref(ctx->folds[i]); + mpz_ptr num = mpq_numref(ctx->folds[i].q); if (T->size < sizeof(unsigned long)) { unsigned long x = 1UL << (T->size * 8 - T->issigned); cmp = mpz_cmp_ui(num, x - 1); @@ -834,6 +910,10 @@ typecompat(type_t *lhs, type_t *rhs) if (lhs == rhs) return true; + /* Boolean types are only compatible with boolean types */ + if (lhs->kind == TYPE_BOOL || rhs->kind == TYPE_BOOL) + return lhs->kind == TYPE_BOOL && rhs->kind == TYPE_BOOL; + /* Function types are compatible if they have the same parameter- and return types */ if (lhs->kind == TYPE_FN && rhs->kind == TYPE_FN) @@ -852,3 +932,10 @@ typecompat(type_t *lhs, type_t *rhs) return lhs->issigned == rhs->issigned && lhs->isfloat == rhs->isfloat && lhs->size == rhs->size; } + +bool +memzeroed(void *p, size_t n) +{ + uchar *m = p; + return (*m == 0) && memcmp(m, m + 1, n - 1) == 0; +} diff --git a/src/analyzer.h b/src/analyzer.h index 4cba2a7..8ca4c38 100644 --- a/src/analyzer.h +++ b/src/analyzer.h @@ -2,14 +2,16 @@ #define ORYX_ANALYZER_H #include +#include #include #include #include "alloc.h" +#include "bitset.h" #include "lexer.h" #include "parser.h" -#include "symtab.h" +#include "tables.h" #include "types.h" /* The different base types */ @@ -18,6 +20,7 @@ enum { detecting cyclic definitions. */ TYPE_CHECKING, + TYPE_BOOL, TYPE_NUM, TYPE_FN, @@ -32,7 +35,14 @@ typedef struct { symtab_t *map; } scope_t; -type_t **analyzeprog(ast_t, aux_t, lexemes_t, arena_t *, scope_t **, mpq_t **) +typedef union { + char data[sizeof(mpq_t)]; + mpq_t q; + struct { bool b, set; }; +} fold_t; + +type_t **analyzeprog(ast_t, aux_t, lexemes_t, arena_t *, scope_t **, fold_t **, + bitset_t **) __attribute__((returns_nonnull, nonnull)); #endif /* !ORYX_ANALYZER_H */ diff --git a/src/bitset.h b/src/bitset.h new file mode 100644 index 0000000..2b46090 --- /dev/null +++ b/src/bitset.h @@ -0,0 +1,38 @@ +#ifndef ORYX_BITSET_H +#define ORYX_BITSET_H + +#include +#include + +#include "alloc.h" +#include "common.h" + +typedef unsigned char bitset_t; + +#define _BITSET_BITS (sizeof(bitset_t) * CHAR_BIT) +#define _BITSLOT(x) ((x) / _BITSET_BITS) +#define _BITMASK(x) (1 << ((x) % _BITSET_BITS)) + +/* Set- or test the bit as position X in BS */ +#define SETBIT(bs, x) ((bs)[_BITSLOT(x)] |= _BITMASK(x)) +#define TESTBIT(bs, x) ((bool)((bs)[_BITSLOT(x)] & _BITMASK(x))) + +/* Set the bit DST in BS if the bit SRC is set */ +#define SET_DST_IF_SRC(bs, dst, src) \ + do { \ + if (TESTBIT(bs, src)) \ + SETBIT(bs, dst); \ + } while (false) + +/* Allocate a new bitset capable of holding N bits */ +static inline bitset_t *mkbitset(arena_t *, size_t n) + __attribute__((always_inline, returns_nonnull, nonnull, malloc)); + +bitset_t * +mkbitset(arena_t *a, size_t n) +{ + size_t bits = CHAR_BIT * sizeof(bitset_t); + return arena_new(a, bitset_t, (n + bits - 1) / bits); +} + +#endif /* !ORYX_BITSET_H */ diff --git a/src/codegen.c b/src/codegen.c index a263db2..252ac29 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -12,6 +12,7 @@ #include "alloc.h" #include "analyzer.h" +#include "bitset.h" #include "common.h" #include "errors.h" #include "parser.h" @@ -34,12 +35,13 @@ struct cgctx { arena_t *a; scratch_t *s; - mpq_t *folds; - scope_t *scps; - type_t **types; ast_t ast; aux_t aux; + bitset_t *cnst; + fold_t *folds; lexemes_t toks; + scope_t *scps; + type_t **types; LLVMBuilderRef bob; LLVMContextRef ctx; @@ -60,8 +62,8 @@ extern bool lflag, sflag; extern const char *oflag; void -codegen(const char *file, mpq_t *folds, scope_t *scps, type_t **types, - ast_t ast, aux_t aux, lexemes_t toks) +codegen(const char *file, bitset_t *cnst, fold_t *folds, scope_t *scps, + type_t **types, ast_t ast, aux_t aux, lexemes_t toks) { LLVM_TARGET_INIT(AArch64); LLVM_TARGET_INIT(X86); @@ -81,12 +83,13 @@ codegen(const char *file, mpq_t *folds, scope_t *scps, type_t **types, .a = &(arena_t){0}, .s = &(scratch_t){0}, - .folds = folds, - .scps = scps, - .types = types, .ast = ast, .aux = aux, + .cnst = cnst, + .folds = folds, + .scps = scps, .toks = toks, + .types = types, .ctx = llctx, .mod = llmod, @@ -143,13 +146,12 @@ static idx_t codegendecl(struct cgctx ctx, idx_t); idx_t codegentypedexpr(struct cgctx ctx, idx_t i, type_t *T, LLVMValueRef *outv) { - /* If true, implies numeric constant */ - if (MPQ_IS_INIT(ctx.folds[i]) && !T->isfloat) { + if (T->kind == TYPE_NUM && TESTBIT(ctx.cnst, i) && !T->isfloat) { char buf[40 /* The max value of a u128 is length 39 */]; - mpz_get_str(buf, 10, mpq_numref(ctx.folds[i])); + mpz_get_str(buf, 10, mpq_numref(ctx.folds[i].q)); *outv = LLVMConstIntOfString(type2llvm(ctx, T), buf, 10); return fwdnode(ctx.ast, i); - } else if (MPQ_IS_INIT(ctx.folds[i]) /* && type.isfloat */) { + } else if (T->kind == TYPE_NUM && TESTBIT(ctx.cnst, i)) { char *s, *buf; size_t len; mpf_t x; @@ -175,7 +177,7 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t *T, LLVMValueRef *outv) } mpf_init2(x, prec); - mpf_set_q(x, ctx.folds[i]); + mpf_set_q(x, ctx.folds[i].q); s = mpf_get_str(NULL, &e, 10, 0, x); len = strlen(s); @@ -191,6 +193,9 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t *T, LLVMValueRef *outv) free(s); mpf_clear(x); return fwdnode(ctx.ast, i); + } else if (T->kind == TYPE_BOOL && TESTBIT(ctx.cnst, i)) { + *outv = LLVMConstInt(type2llvm(ctx, ctx.types[i]), ctx.folds[i].b, false); + return fwdnode(ctx.ast, i); } switch (ctx.ast.kinds[i]) { @@ -257,6 +262,24 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t *T, LLVMValueRef *outv) ctx.bob, vl, vr, bo.name); return ni; } + case ASTBINEQ: + case ASTBINNEQ: { + static const struct binop { + LLVMIntPredicate pred; + const char *name; + } binoptbl[UINT8_MAX + 1] = { + [ASTBINEQ] = {LLVMIntEQ, "ieq"}, + [ASTBINNEQ] = {LLVMIntNE, "ine"}, + }; + + idx_t lhs = ctx.ast.kids[i].lhs, rhs = ctx.ast.kids[i].rhs; + LLVMValueRef vl, vr; + (void)codegentypedexpr(ctx, lhs, ctx.types[i], &vl); + idx_t ni = codegentypedexpr(ctx, rhs, ctx.types[i], &vr); + struct binop bo = binoptbl[ctx.ast.kinds[i]]; + *outv = LLVMBuildICmp(ctx.bob, bo.pred, vl, vr, bo.name); + return ni; + } default: __builtin_unreachable(); } @@ -451,8 +474,8 @@ LLVMTypeRef type2llvm(struct cgctx ctx, type_t *T) { switch (T->kind) { - case TYPE_FN: - err("codegen: %s: Not implemented for function types", __func__); + case TYPE_BOOL: + return LLVMInt1TypeInContext(ctx.ctx); case TYPE_NUM: assert(T->size != 0); assert((unsigned)T->size * 8 <= 128); diff --git a/src/codegen.h b/src/codegen.h index e214067..fe0b08b 100644 --- a/src/codegen.h +++ b/src/codegen.h @@ -7,8 +7,8 @@ #include "lexer.h" #include "parser.h" -void codegen(const char *, mpq_t *, scope_t *, type_t **, ast_t, aux_t, - lexemes_t) +void codegen(const char *, bitset_t *, fold_t *, scope_t *, type_t **, ast_t, + aux_t, lexemes_t) __attribute__((nonnull)); #endif /* !ORYX_CODEGEN_H */ diff --git a/src/lexer.c b/src/lexer.c index e35f1cd..0ec82d5 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -76,9 +76,8 @@ lexstring(const uchar *code, size_t codesz) /* Single-byte literals */ case '%': case '&': case '(': case ')': case '*': - case '+': case '-': case ':': case ';': case '=': - case '[': case ']': case '{': case '|': case '}': - case '~': + case '+': case '-': case ':': case ';': case '[': + case ']': case '{': case '|': case '}': case '~': data.kinds[data.len++] = ch; break; @@ -88,7 +87,7 @@ lexstring(const uchar *code, size_t codesz) /* Single- or double-byte literals */ case '/': - if (code < end && code[0] == '*') { + if (likely(code < end) && code[0] == '*') { if (!skpcmnt(&code, end)) err("Unterminated comment at byte %td", code - start); continue; @@ -96,11 +95,17 @@ lexstring(const uchar *code, size_t codesz) data.kinds[data.len++] = ch; break; - case '<': case '>': + case '!': + if (unlikely(code == end || code[0] != '=')) + goto fallback; + code++; + data.kinds[data.len++] = LEXBANGEQ; + break; + case '<': case '=': case '>': data.kinds[data.len++] = ch; /* See the comment in lexer.h for where 193 comes from */ - if (code < end && code[0] == ch) { + if (likely(code < end) && code[0] == ch) { code++; data.kinds[data.len - 1] += 193; } diff --git a/src/lexer.h b/src/lexer.h index b62cc17..1ecafde 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -37,10 +37,12 @@ enum { LEXSTAR = '*', LEXTILDE = '~', - /* We keep these exactly 2 away from each other, because ‘<’ and ‘>’ are 2 - away from each other in ASCII. This gives us a simple mapping from some - token T to the doubled equivalent by doing T += 193. */ + LEXBANGEQ = UINT8_MAX - 3, /* Not equals */ + + /* This gives us a simple mapping from some token T to the doubled + equivalent by doing T += 193. */ LEXLANGL_DBL = UINT8_MAX - 2, /* << */ + LEXEQ_DBL = UINT8_MAX - 1, /* == */ LEXRANGL_DBL = UINT8_MAX - 0, /* >> */ }; diff --git a/src/main.c b/src/main.c index 1555d17..e308c7c 100644 --- a/src/main.c +++ b/src/main.c @@ -12,6 +12,7 @@ #include "alloc.h" #include "analyzer.h" +#include "bitset.h" #include "codegen.h" #include "common.h" #include "errors.h" @@ -73,19 +74,20 @@ usage: char *src = readfile(argv[0], &srclen); aux_t aux; - mpq_t *folds; + fold_t *folds; scope_t *scps; + bitset_t *cnst; arena_t a = NULL; lexemes_t toks = lexstring(src, srclen); ast_t ast = parsetoks(toks, &aux); - type_t **types = analyzeprog(ast, aux, toks, &a, &scps, &folds); - codegen(argv[0], folds, scps, types, ast, aux, toks); + type_t **types = analyzeprog(ast, aux, toks, &a, &scps, &folds, &cnst); + codegen(argv[0], cnst, folds, scps, types, ast, aux, toks); #if DEBUG for (size_t i = 0; i < ast.len; i++) { - if ((*folds[i])._mp_den._mp_d != NULL) - mpq_clear(folds[i]); + if ((*folds[i].q)._mp_den._mp_d != NULL) + mpq_clear(folds[i].q); } free(folds); diff --git a/src/parser.c b/src/parser.c index a6d82a8..49067e2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -70,9 +70,11 @@ fwdnode(ast_t ast, idx_t i) case ASTBINADD: case ASTBINAND: case ASTBINDIV: + case ASTBINEQ: case ASTBINIOR: case ASTBINMOD: case ASTBINMUL: + case ASTBINNEQ: case ASTBINSHL: case ASTBINSHR: case ASTBINSUB: @@ -88,7 +90,7 @@ fwdnode(ast_t ast, idx_t i) case ASTTYPE: return i + 1; case ASTFNPROTO: - assert("analyzer: Not reachable"); + default: __builtin_unreachable(); } } @@ -284,8 +286,18 @@ idx_t parseexprinc(ast_t *ast, lexemes_t toks, idx_t lhs, int minprec) { static const int prectbl[UINT8_MAX + 1] = { - ['+'] = 1, ['-'] = 1, ['|'] = 1, ['~'] = 1, ['%'] = 2, - ['&'] = 2, ['*'] = 2, ['/'] = 2, [LEXLANGL_DBL] = 2, [LEXRANGL_DBL] = 2, + [LEXBANGEQ] = 3, + [LEXEQ_DBL] = 3, + ['+'] = 4, + ['-'] = 4, + ['|'] = 4, + ['~'] = 4, + ['%'] = 5, + ['&'] = 5, + ['*'] = 5, + ['/'] = 5, + [LEXLANGL_DBL] = 5, + [LEXRANGL_DBL] = 5, }; uint8_t op = toks.kinds[toksidx]; diff --git a/src/parser.h b/src/parser.h index e30ec8e..363478c 100644 --- a/src/parser.h +++ b/src/parser.h @@ -97,10 +97,18 @@ enum { ‘lhs & rhs’ */ ASTBINIOR = '|', + /* Binary not equals + ‘lhs != rhs’ */ + ASTBINNEQ = UINT8_MAX - 3, + /* Binary left shift ‘lhs << rhs’ */ ASTBINSHL = UINT8_MAX - 2, + /* Binary equals + ‘lhs == rhs’ */ + ASTBINEQ = UINT8_MAX - 1, + /* Binary right shift ‘lhs >> rhs’ */ ASTBINSHR = UINT8_MAX - 0, diff --git a/src/symtab.c b/src/symtab.c deleted file mode 100644 index 8fcba3c..0000000 --- a/src/symtab.c +++ /dev/null @@ -1,48 +0,0 @@ -#include -#include - -#include "alloc.h" -#include "symtab.h" -#include "strview.h" - -struct symtab { - symtab_t *child[4]; - strview_t key; - symval_t val; -}; - -struct typetab { - typetab_t *child[4]; - strview_t key; - type_t *val; -}; - -symval_t * -symtab_insert(symtab_t **m, strview_t sv, arena_t *a) -{ - for (uint64_t h = strview_hash(sv); *m; h <<= 2) { - if (strview_eq(sv, (*m)->key)) - return &(*m)->val; - m = &(*m)->child[h >> 62]; - } - if (a == NULL) - return NULL; - *m = arena_new(a, symtab_t, 1); - (*m)->key = sv; - return &(*m)->val; -} - -type_t ** -typetab_insert(typetab_t **m, strview_t sv, arena_t *a) -{ - for (uint64_t h = strview_hash(sv); *m; h <<= 2) { - if (strview_eq(sv, (*m)->key)) - return &(*m)->val; - m = &(*m)->child[h >> 62]; - } - if (a == NULL) - return NULL; - *m = arena_new(a, typetab_t, 1); - (*m)->key = sv; - return &(*m)->val; -} diff --git a/src/symtab.h b/src/symtab.h deleted file mode 100644 index 448ffd9..0000000 --- a/src/symtab.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef ORYX_TABLES_H -#define ORYX_TABLES_H - -#include - -#include - -#include "alloc.h" -#include "strview.h" -#include "types.h" - -typedef struct symtab symtab_t; -typedef struct typetab typetab_t; - -typedef struct { - bool exists; - idx_t i; - LLVMValueRef v; -} symval_t; - -typedef struct type { - uint8_t kind; - - union { - struct { - uint8_t size; /* number of bytes */ - bool isfloat; - bool issigned; - }; - struct { - struct type *params, *ret; - idx_t paramcnt; - }; - }; -} type_t; - -/* Index the symbol table M with the key SV, returning a pointer to the - value. If no entry exists and A is non-null, a pointer to a newly - allocated (and zeroed) value is returned, NULL otherwise. */ -symval_t *symtab_insert(symtab_t **m, strview_t sv, arena_t *a) - __attribute__((nonnull(1))); - -type_t **typetab_insert(typetab_t **m, strview_t sv, arena_t *a) - __attribute__((nonnull(1))); - -#endif /* !ORYX_TABLES_H */ diff --git a/src/tables.c b/src/tables.c new file mode 100644 index 0000000..61adff7 --- /dev/null +++ b/src/tables.c @@ -0,0 +1,48 @@ +#include +#include + +#include "alloc.h" +#include "strview.h" +#include "tables.h" + +struct symtab { + symtab_t *child[4]; + strview_t key; + symval_t val; +}; + +struct typetab { + typetab_t *child[4]; + strview_t key; + type_t *val; +}; + +symval_t * +symtab_insert(symtab_t **m, strview_t sv, arena_t *a) +{ + for (uint64_t h = strview_hash(sv); *m; h <<= 2) { + if (strview_eq(sv, (*m)->key)) + return &(*m)->val; + m = &(*m)->child[h >> 62]; + } + if (a == NULL) + return NULL; + *m = arena_new(a, symtab_t, 1); + (*m)->key = sv; + return &(*m)->val; +} + +type_t ** +typetab_insert(typetab_t **m, strview_t sv, arena_t *a) +{ + for (uint64_t h = strview_hash(sv); *m; h <<= 2) { + if (strview_eq(sv, (*m)->key)) + return &(*m)->val; + m = &(*m)->child[h >> 62]; + } + if (a == NULL) + return NULL; + *m = arena_new(a, typetab_t, 1); + (*m)->key = sv; + return &(*m)->val; +} diff --git a/src/tables.h b/src/tables.h new file mode 100644 index 0000000..448ffd9 --- /dev/null +++ b/src/tables.h @@ -0,0 +1,46 @@ +#ifndef ORYX_TABLES_H +#define ORYX_TABLES_H + +#include + +#include + +#include "alloc.h" +#include "strview.h" +#include "types.h" + +typedef struct symtab symtab_t; +typedef struct typetab typetab_t; + +typedef struct { + bool exists; + idx_t i; + LLVMValueRef v; +} symval_t; + +typedef struct type { + uint8_t kind; + + union { + struct { + uint8_t size; /* number of bytes */ + bool isfloat; + bool issigned; + }; + struct { + struct type *params, *ret; + idx_t paramcnt; + }; + }; +} type_t; + +/* Index the symbol table M with the key SV, returning a pointer to the + value. If no entry exists and A is non-null, a pointer to a newly + allocated (and zeroed) value is returned, NULL otherwise. */ +symval_t *symtab_insert(symtab_t **m, strview_t sv, arena_t *a) + __attribute__((nonnull(1))); + +type_t **typetab_insert(typetab_t **m, strview_t sv, arena_t *a) + __attribute__((nonnull(1))); + +#endif /* !ORYX_TABLES_H */ diff --git a/test.yx b/test.yx deleted file mode 100644 index 346f095..0000000 --- a/test.yx +++ /dev/null @@ -1,133 +0,0 @@ -X :: 5; -Y: u8 : X; -Z :: Y; - -ZZ :: 127; - -my_global: i8 = ZZ; -no_init: u128; - -another_global := 123'456'789; - -uninit :: () { - x: int = …; - y: u32 = ...; -} - -pub main :: () { - no_init: u128; - x := no_init; - - no_init_undef: int = …; -} - -pub bar :: () int { - hello_world_this_is_my_var := 126; - hello_world_this_is_my_var′ := 127; - hello_world_this_is_my_var″ := 128; - return hello_world_this_is_my_var′; -} - -foo :: () int { - baz :: () int { - X :: X; - return X; - } - - y := 5; - x := y; - return x; -} - -rune :: () { - ch: rune = 8224; /* U+2020 DAGGER */ -} - -float_test :: () { - π :: 3.14159265358979323846264338327950288419716939937510582097494459; - a: f16 = π; - b: f32 = π; - c: f64 = π; - d: f128 = π; - - f := 3.14; -} - -/* Yes we have comments in this language, - /* You can even nest them! */ */ - -/* There are no line-comments, because why have 2 styles when you can have 1? */ - -neg_test :: () int { - x := 420; - return -x; -} - -/* This method should return 1337, because it’s not an increment but - actually parsed as (+ (+ x)), which is equivalant to ‘x’. */ -some_math :: () int { - x := 1337; - return ++x; -} - -complex_math :: () uint { - x: uint = 42; - y: uint = 123; - z := (x + y) / x; - return x + y / x; -} - -div :: () f64 { - return 5 / 2; -} - -remainder :: () int { - x := 5; - y := 2; - z := 5 % 2; - return x % y; -} - -xor :: () int { - x := 42; - y := 123; - z := ~y; - return x + z ~ y; -} - -shl_kinda_sus :: () u8 { - x: u8 = 1; - y: u16 : 1<<8 - 1; - return x<> 256; -} - -bit_fidling :: () int { - x := 122; - y := x | 1; - return y; -} - -assignment :: () int { - x := 5; - y := 4; - x = x + y; - y = 69; - return x + y; -} - -some_global: int; -mutate_global :: () { - some_global = 42; -} - -float_div :: () f64 { - x := 5.0; - y := 2.0; - return x / y; -} -- cgit v1.2.3