aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-07-06 03:33:04 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-07-06 03:49:03 +0200
commitd3c95ef09fd493241273d6e63aca31d703c2503c (patch)
tree94dc719ed4eb4a3bc67c35b19bca69916b56fc32
parentb0b7d209fa978ff4a6b9c8cc5a8e4a3207cdd63f (diff)
Implement booleans
-rw-r--r--.gitignore3
-rw-r--r--grammar.ebnf2
-rw-r--r--src/analyzer.c199
-rw-r--r--src/analyzer.h14
-rw-r--r--src/bitset.h38
-rw-r--r--src/codegen.c53
-rw-r--r--src/codegen.h4
-rw-r--r--src/lexer.c17
-rw-r--r--src/lexer.h8
-rw-r--r--src/main.c12
-rw-r--r--src/parser.c18
-rw-r--r--src/parser.h8
-rw-r--r--src/tables.c (renamed from src/symtab.c)2
-rw-r--r--src/tables.h (renamed from src/symtab.h)0
-rw-r--r--test.yx133
15 files changed, 284 insertions, 227 deletions
diff --git a/.gitignore b/.gitignore
index 45a74dd..ad32da2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,10 +9,11 @@ oryx
*.gen.c
*.o
-# Test files. Autogenerated with the following:
+# Test files. Partially autogenerated with the following:
#
# ./make; find test -type f -executable
#
+test.*
test/arena
# Compilation artifacts of GNU MP. Autogenerated with the following:
diff --git a/grammar.ebnf b/grammar.ebnf
index a5f5e50..b28e0ac 100644
--- a/grammar.ebnf
+++ b/grammar.ebnf
@@ -54,12 +54,14 @@ unop
binop
: '+'
+ | '!='
| '%'
| '&'
| '*'
| '-'
| '/'
| '<<'
+ | '=='
| '>>'
| '|'
| '~'
diff --git a/src/analyzer.c b/src/analyzer.c
index 6dff9ff..f40d12b 100644
--- a/src/analyzer.c
+++ b/src/analyzer.c
@@ -12,11 +12,12 @@
#include "alloc.h"
#include "analyzer.h"
+#include "bitset.h"
#include "common.h"
#include "errors.h"
#include "parser.h"
#include "strview.h"
-#include "symtab.h"
+#include "tables.h"
#include "types.h"
#define LOG2_10 (3.321928)
@@ -41,11 +42,12 @@ struct azctx {
ast_t ast;
aux_t aux;
+ bitset_t *cnst;
+ fold_t *folds;
lexemes_t toks;
- mpq_t *folds;
scopes_t scps;
- type_t **types;
typetab_t *ttab;
+ type_t **types;
/* The return type of the function being analyzed */
type_t *fnret;
@@ -100,6 +102,9 @@ static bool typecompat(type_t *t1, type_t *t2)
/* Check if the statement at node I in the AST returns from the function */
static bool returns(ast_t, idx_t i);
+static bool memzeroed(void *, size_t)
+ __attribute__((nonnull));
+
enum {
PRIM_INT8,
PRIM_INT16,
@@ -115,6 +120,8 @@ enum {
PRIM_UINT128,
PRIM_UINT,
+ PRIM_BOOL,
+
PRIM_RUNE,
PRIM_F16,
@@ -143,6 +150,8 @@ static struct {
[PRIM_UINT64] = {SVC("u64"), {.kind = TYPE_NUM, .size = 8}},
[PRIM_UINT128] = {SVC("u128"), {.kind = TYPE_NUM, .size = 16}},
+ [PRIM_BOOL] = {SVC("bool"), {.kind = TYPE_BOOL, .size = 1}},
+
[PRIM_RUNE] = {SVC("rune"), {.kind = TYPE_NUM, .size = 4, .issigned = true}},
[PRIM_F16] = {SVC("f16"), {.kind = TYPE_NUM, .size = 2, .isfloat = true}},
@@ -151,13 +160,14 @@ static struct {
[PRIM_F128] = {SVC("f128"), {.kind = TYPE_NUM, .size = 16, .isfloat = true}},
};
-static type_t NOT_CHECKED = {.kind = TYPE_CHECKING};
-static type_t UNTYPED_INT = {.kind = TYPE_NUM, .size = 0};
-static type_t UNTYPED_FLT = {.kind = TYPE_NUM, .size = 0, .isfloat = true};
+static type_t NOT_CHECKED = {.kind = TYPE_CHECKING};
+static type_t UNTYPED_BOOL = {.kind = TYPE_BOOL};
+static type_t UNTYPED_INT = {.kind = TYPE_NUM};
+static type_t UNTYPED_FLT = {.kind = TYPE_NUM, .isfloat = true};
type_t **
analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, scope_t **scps,
- mpq_t **folds)
+ fold_t **folds, bitset_t **cnst)
{
struct azctx ctx = {
.a = a,
@@ -174,15 +184,17 @@ analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, scope_t **scps,
if ((ctx.types = calloc(ctx.ast.len, sizeof(*ctx.types))) == NULL)
err("calloc:");
+ ctx.cnst = mkbitset(ctx.a, ctx.ast.len);
gensymtabs(&ctx);
analyzeast(&ctx);
- if ((ctx.folds = calloc(ctx.ast.len, sizeof(**ctx.folds))) == NULL)
+ if ((ctx.folds = calloc(ctx.ast.len, sizeof(fold_t))) == NULL)
err("calloc:");
constfold(&ctx);
- *scps = ctx.scps.buf;
+ *cnst = ctx.cnst;
*folds = ctx.folds;
+ *scps = ctx.scps.buf;
return ctx.types;
}
@@ -247,6 +259,9 @@ analyzedecl(struct azctx *ctx, idx_t i)
bool isstatic = ctx->ast.kinds[i] <= _AST_DECLS_END
&& ctx->aux.buf[ctx->ast.kids[i].lhs].decl.isstatic;
+ if (isconst)
+ SETBIT(ctx->cnst, i);
+
if (isstatic && isundef)
err("analyzer: Static variables may not be undefined");
@@ -289,8 +304,16 @@ analyzedecl(struct azctx *ctx, idx_t i)
if (ltype == NULL) {
ltype = rtype;
- if (ctx->ast.kinds[i] == ASTDECL && rtype->size == 0)
- ltype = &primitives[rtype == &UNTYPED_INT ? PRIM_INT : PRIM_F64].t;
+ if (ctx->ast.kinds[i] == ASTDECL && rtype->size == 0) {
+ if (rtype == &UNTYPED_INT)
+ ltype = &primitives[PRIM_INT].t;
+ else if (rtype == &UNTYPED_FLT)
+ ltype = &primitives[PRIM_F64].t;
+ else if (rtype == &UNTYPED_BOOL)
+ ltype = &primitives[PRIM_BOOL].t;
+ else
+ __builtin_unreachable();
+ }
} else if (rtype != NULL && !typecompat(ltype, rtype))
err("analyzer: Type mismatch");
@@ -346,6 +369,7 @@ analyzeexpr(struct azctx *ctx, idx_t i)
switch (ctx->ast.kinds[i]) {
case ASTNUMLIT: {
+ SETBIT(ctx->cnst, i);
strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]];
ctx->types[i] = memchr(sv.p, '.', sv.len) != NULL ? &UNTYPED_FLT
: &UNTYPED_INT;
@@ -375,6 +399,7 @@ analyzeexpr(struct azctx *ctx, idx_t i)
SV_PRI_ARGS(sv));
}
+ SET_DST_IF_SRC(ctx->cnst, i, sym->i);
ctx->types[i] = ctx->types[sym->i];
return fwdnode(ctx->ast, i);
}
@@ -387,6 +412,7 @@ analyzeexpr(struct azctx *ctx, idx_t i)
idx_t ni, rhs;
rhs = ctx->ast.kids[i].rhs;
ni = analyzeexpr(ctx, rhs);
+ SET_DST_IF_SRC(ctx->cnst, i, rhs);
type_t *t = ctx->types[rhs];
if (ctx->ast.kinds[i] == ASTUNNEG
&& (t->kind != TYPE_NUM || !t->issigned))
@@ -405,9 +431,11 @@ analyzeexpr(struct azctx *ctx, idx_t i)
case ASTBINADD:
case ASTBINAND:
case ASTBINDIV:
+ case ASTBINEQ:
+ case ASTBINIOR:
case ASTBINMOD:
case ASTBINMUL:
- case ASTBINIOR:
+ case ASTBINNEQ:
case ASTBINSHL:
case ASTBINSHR:
case ASTBINSUB:
@@ -418,6 +446,9 @@ analyzeexpr(struct azctx *ctx, idx_t i)
(void)analyzeexpr(ctx, lhs);
idx_t ni = analyzeexpr(ctx, rhs);
+ if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs))
+ SETBIT(ctx->cnst, i);
+
bool isshift = ctx->ast.kinds[i] == ASTBINSHL
|| ctx->ast.kinds[i] == ASTBINSHR;
if (!isshift && !typecompat(ctx->types[lhs], ctx->types[rhs]))
@@ -428,6 +459,11 @@ analyzeexpr(struct azctx *ctx, idx_t i)
[ASTBINSHL] = true, [ASTBINSHR] = true, [ASTBINXOR] = true,
};
+ static const bool logical[UINT8_MAX + 1] = {
+ [ASTBINEQ] = true,
+ [ASTBINNEQ] = true,
+ };
+
if (int_only[ctx->ast.kinds[i]]
&& (ctx->types[lhs]->kind != TYPE_NUM || ctx->types[lhs]->isfloat
|| ctx->types[rhs]->kind != TYPE_NUM
@@ -448,13 +484,16 @@ analyzeexpr(struct azctx *ctx, idx_t i)
There is an exception for the left- and right shift operators.
Expressions for these operators always take the type of x, and
y can be any integer type. */
- if (isshift)
+ if (isshift) {
ctx->types[i] = ctx->types[lhs];
- else {
- ctx->types[i] = ctx->types[lhs]->size != 0 ? ctx->types[lhs]
- : ctx->types[rhs];
- ctx->types[i]->isfloat = ctx->types[lhs]->isfloat
- || ctx->types[rhs]->isfloat;
+ } else if (logical[ctx->ast.kinds[i]]) {
+ ctx->types[i] = TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)
+ ? &UNTYPED_BOOL
+ : &primitives[PRIM_BOOL].t;
+ } else if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) {
+ ctx->types[i] = ctx->types[ctx->types[lhs]->isfloat ? lhs : rhs];
+ } else {
+ ctx->types[i] = ctx->types[TESTBIT(ctx->cnst, lhs) ? rhs : lhs];
}
return ni;
}
@@ -549,7 +588,7 @@ constfoldblk(struct azctx *ctx, idx_t i)
idx_t
constfoldexpr(struct azctx *ctx, type_t *T, idx_t i)
{
- if (MPQ_IS_INIT(ctx->folds[i]))
+ if (!memzeroed(ctx->folds[i].data, sizeof(ctx->folds[i].data)))
return fwdnode(ctx->ast, i);
idx_t ni;
@@ -559,7 +598,7 @@ constfoldexpr(struct azctx *ctx, type_t *T, idx_t i)
case ASTFN:
return constfoldblk(ctx, ctx->ast.kids[i].rhs);
case ASTNUMLIT: {
- mpq_init(ctx->folds[i]);
+ mpq_init(ctx->folds[i].q);
strview_t sv = ctx->toks.strs[ctx->ast.lexemes[i]];
char *buf = tmpalloc(ctx->s, sv.len + 1, 1);
@@ -589,13 +628,13 @@ constfoldexpr(struct azctx *ctx, type_t *T, idx_t i)
#endif
mpf_set_str(x, buf, 10);
assert(ret == 0);
- mpq_set_f(ctx->folds[i], x);
+ mpq_set_f(ctx->folds[i].q, x);
mpf_clear(x);
} else {
#if DEBUG
int ret =
#endif
- mpq_set_str(ctx->folds[i], buf, 10);
+ mpq_set_str(ctx->folds[i].q, buf, 10);
assert(ret == 0);
}
ni = fwdnode(ctx->ast, i);
@@ -626,12 +665,14 @@ constfoldexpr(struct azctx *ctx, type_t *T, idx_t i)
case ASTCDECL: {
idx_t expr = ctx->ast.kids[sym->i].rhs;
assert(expr != AST_EMPTY);
- if (!MPQ_IS_INIT(ctx->folds[i])) {
+ if (memzeroed(ctx->folds[i].data, sizeof(ctx->folds[i].data))) {
ctx->si = lvl;
(void)constfolddecl(ctx, sym->i);
}
- MPQCPY(ctx->folds[i], ctx->folds[expr]);
- assert(MPQ_IS_INIT(ctx->folds[i]));
+ if (ctx->types[i]->kind == TYPE_NUM)
+ MPQCPY(ctx->folds[i].q, ctx->folds[expr].q);
+ else if (ctx->types[i]->kind == TYPE_BOOL)
+ ctx->folds[i].b = ctx->folds[expr].b;
ni = fwdnode(ctx->ast, i);
goto out;
}
@@ -645,17 +686,16 @@ out:
}
case ASTUNCMPL: {
ni = constfoldexpr(ctx, ctx->types[i], ctx->ast.kids[i].rhs);
- if (MPQ_IS_INIT(ctx->folds[ctx->ast.kids[i].rhs]))
+ if (TESTBIT(ctx->cnst, ctx->ast.kids[i].rhs))
err("analyzer: Cannot perform bitwise complement of constant");
break;
}
case ASTUNNEG: {
idx_t rhs = ctx->ast.kids[i].rhs;
ni = constfoldexpr(ctx, ctx->types[i], rhs);
- mpq_t *x = ctx->folds + rhs;
- if (MPQ_IS_INIT(*x)) {
- MPQCPY(ctx->folds[i], *x);
- mpq_neg(ctx->folds[i], ctx->folds[i]);
+ if (TESTBIT(ctx->cnst, rhs)) {
+ MPQCPY(ctx->folds[i].q, ctx->folds[rhs].q);
+ mpq_neg(ctx->folds[i].q, ctx->folds[i].q);
}
break;
}
@@ -673,10 +713,10 @@ out:
rhs = ctx->ast.kids[i].rhs;
(void)constfoldexpr(ctx, ctx->types[i], lhs);
ni = constfoldexpr(ctx, ctx->types[i], rhs);
- if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) {
- mpq_init(ctx->folds[i]);
- mpq_fns[ctx->ast.kinds[i]](ctx->folds[i], ctx->folds[lhs],
- ctx->folds[rhs]);
+ if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) {
+ mpq_init(ctx->folds[i].q);
+ mpq_fns[ctx->ast.kinds[i]](ctx->folds[i].q, ctx->folds[lhs].q,
+ ctx->folds[rhs].q);
}
break;
}
@@ -688,13 +728,14 @@ out:
(void)constfoldexpr(ctx, ctx->types[i], lhs);
ni = constfoldexpr(ctx, ctx->types[i], rhs);
- if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) {
- mpq_init(ctx->folds[i]);
+ if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) {
+ mpq_init(ctx->folds[i].q);
if (ctx->types[i]->isfloat)
- mpq_div(ctx->folds[i], ctx->folds[lhs], ctx->folds[rhs]);
+ mpq_div(ctx->folds[i].q, ctx->folds[lhs].q, ctx->folds[rhs].q);
else {
- mpz_tdiv_q(mpq_numref(ctx->folds[i]), mpq_numref(ctx->folds[lhs]),
- mpq_numref(ctx->folds[rhs]));
+ mpz_tdiv_q(mpq_numref(ctx->folds[i].q),
+ mpq_numref(ctx->folds[lhs].q),
+ mpq_numref(ctx->folds[rhs].q));
}
}
break;
@@ -718,12 +759,13 @@ out:
(void)constfoldexpr(ctx, ctx->types[i], lhs);
ni = constfoldexpr(ctx, ctx->types[i], rhs);
- if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) {
- assert(MPQ_IS_WHOLE(ctx->folds[lhs]));
- assert(MPQ_IS_WHOLE(ctx->folds[rhs]));
- mpq_init(ctx->folds[i]);
- mpz_fns[ctx->ast.kinds[i]](mpq_numref(ctx->folds[i]), mpq_numref(ctx->folds[lhs]),
- mpq_numref(ctx->folds[rhs]));
+ if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) {
+ assert(MPQ_IS_WHOLE(ctx->folds[lhs].q));
+ assert(MPQ_IS_WHOLE(ctx->folds[rhs].q));
+ mpq_init(ctx->folds[i].q);
+ mpz_fns[ctx->ast.kinds[i]](mpq_numref(ctx->folds[i].q),
+ mpq_numref(ctx->folds[lhs].q),
+ mpq_numref(ctx->folds[rhs].q));
}
break;
}
@@ -742,18 +784,19 @@ out:
(void)constfoldexpr(ctx, ctx->types[lhs], lhs);
ni = constfoldexpr(ctx, ctx->types[rhs], rhs);
- if (MPQ_IS_INIT(ctx->folds[rhs])) {
- if (mpq_sgn(ctx->folds[rhs]) == -1)
+ if (TESTBIT(ctx->cnst, rhs)) {
+ if (mpq_sgn(ctx->folds[rhs].q) == -1)
err("analyzer: Cannot shift by negative value");
+ /* TODO: Assert that in X<<Y, Y is ≤ widthof(X) */
}
- if (MPQ_IS_INIT(ctx->folds[lhs]) && MPQ_IS_INIT(ctx->folds[rhs])) {
+ if (TESTBIT(ctx->cnst, lhs) && TESTBIT(ctx->cnst, rhs)) {
mpz_ptr cur_z, lhs_z, rhs_z;
- cur_z = mpq_numref(ctx->folds[i]);
- lhs_z = mpq_numref(ctx->folds[lhs]);
- rhs_z = mpq_numref(ctx->folds[rhs]);
+ cur_z = mpq_numref(ctx->folds[i].q);
+ lhs_z = mpq_numref(ctx->folds[lhs].q);
+ rhs_z = mpq_numref(ctx->folds[rhs].q);
- mpq_init(ctx->folds[i]);
+ mpq_init(ctx->folds[i].q);
if (mpz_cmp_ui(rhs_z, ULONG_MAX) > 0)
err("analyzer: Shift oprand too large");
mp_bitcnt_t shftcnt = mpz_get_ui(rhs_z);
@@ -761,19 +804,52 @@ out:
}
break;
}
+ case ASTBINEQ:
+ case ASTBINNEQ: {
+ idx_t lhs, rhs;
+ lhs = ctx->ast.kids[i].lhs;
+ rhs = ctx->ast.kids[i].rhs;
+
+ (void)constfoldexpr(ctx, ctx->types[i], lhs);
+ ni = constfoldexpr(ctx, ctx->types[i], rhs);
+
+ bool both_oprs_const = TESTBIT(ctx->cnst, lhs)
+ && TESTBIT(ctx->cnst, rhs);
+ bool can_constfold_ints = ctx->types[lhs]->kind == TYPE_NUM
+ && both_oprs_const;
+ bool can_constfold_bools = ctx->types[lhs]->kind == TYPE_BOOL
+ && both_oprs_const;
+
+ bool eq = can_constfold_ints
+ ? mpq_equal(ctx->folds[lhs].q, ctx->folds[rhs].q)
+ : can_constfold_bools ? ctx->folds[lhs].b == ctx->folds[rhs].b
+ : false;
+
+ if (can_constfold_ints || can_constfold_bools) {
+ ctx->folds[i].set = true;
+ ctx->folds[i].b = (ctx->ast.kinds[i] == ASTBINEQ && eq)
+ || (ctx->ast.kinds[i] == ASTBINNEQ && !eq);
+ }
+ break;
+ }
default:
__builtin_unreachable();
}
- if (MPQ_IS_INIT(ctx->folds[i]) && !T->issigned && mpq_sgn(ctx->folds[i]) == -1)
+ if (ctx->types[i]->kind == TYPE_NUM && TESTBIT(ctx->cnst, i)
+ && !T->issigned && mpq_sgn(ctx->folds[i].q) == -1)
+ {
err("analyzer: Cannot convert negative value to unsigned type");
+ }
- if (T->size != 0 && !T->isfloat && MPQ_IS_INIT(ctx->folds[i])) {
- if (!MPQ_IS_WHOLE(ctx->folds[i]))
+ if (ctx->types[i]->kind == TYPE_NUM && TESTBIT(ctx->cnst, i)
+ && T->size != 0 && !T->isfloat)
+ {
+ if (!MPQ_IS_WHOLE(ctx->folds[i].q))
err("analyzer: Invalid integer");
int cmp;
- mpz_ptr num = mpq_numref(ctx->folds[i]);
+ mpz_ptr num = mpq_numref(ctx->folds[i].q);
if (T->size < sizeof(unsigned long)) {
unsigned long x = 1UL << (T->size * 8 - T->issigned);
cmp = mpz_cmp_ui(num, x - 1);
@@ -834,6 +910,10 @@ typecompat(type_t *lhs, type_t *rhs)
if (lhs == rhs)
return true;
+ /* Boolean types are only compatible with boolean types */
+ if (lhs->kind == TYPE_BOOL || rhs->kind == TYPE_BOOL)
+ return lhs->kind == TYPE_BOOL && rhs->kind == TYPE_BOOL;
+
/* Function types are compatible if they have the same parameter- and
return types */
if (lhs->kind == TYPE_FN && rhs->kind == TYPE_FN)
@@ -852,3 +932,10 @@ typecompat(type_t *lhs, type_t *rhs)
return lhs->issigned == rhs->issigned && lhs->isfloat == rhs->isfloat
&& lhs->size == rhs->size;
}
+
+bool
+memzeroed(void *p, size_t n)
+{
+ uchar *m = p;
+ return (*m == 0) && memcmp(m, m + 1, n - 1) == 0;
+}
diff --git a/src/analyzer.h b/src/analyzer.h
index 4cba2a7..8ca4c38 100644
--- a/src/analyzer.h
+++ b/src/analyzer.h
@@ -2,14 +2,16 @@
#define ORYX_ANALYZER_H
#include <assert.h>
+#include <stdbool.h>
#include <stdint.h>
#include <gmp.h>
#include "alloc.h"
+#include "bitset.h"
#include "lexer.h"
#include "parser.h"
-#include "symtab.h"
+#include "tables.h"
#include "types.h"
/* The different base types */
@@ -18,6 +20,7 @@ enum {
detecting cyclic definitions. */
TYPE_CHECKING,
+ TYPE_BOOL,
TYPE_NUM,
TYPE_FN,
@@ -32,7 +35,14 @@ typedef struct {
symtab_t *map;
} scope_t;
-type_t **analyzeprog(ast_t, aux_t, lexemes_t, arena_t *, scope_t **, mpq_t **)
+typedef union {
+ char data[sizeof(mpq_t)];
+ mpq_t q;
+ struct { bool b, set; };
+} fold_t;
+
+type_t **analyzeprog(ast_t, aux_t, lexemes_t, arena_t *, scope_t **, fold_t **,
+ bitset_t **)
__attribute__((returns_nonnull, nonnull));
#endif /* !ORYX_ANALYZER_H */
diff --git a/src/bitset.h b/src/bitset.h
new file mode 100644
index 0000000..2b46090
--- /dev/null
+++ b/src/bitset.h
@@ -0,0 +1,38 @@
+#ifndef ORYX_BITSET_H
+#define ORYX_BITSET_H
+
+#include <limits.h>
+#include <stdbool.h>
+
+#include "alloc.h"
+#include "common.h"
+
+typedef unsigned char bitset_t;
+
+#define _BITSET_BITS (sizeof(bitset_t) * CHAR_BIT)
+#define _BITSLOT(x) ((x) / _BITSET_BITS)
+#define _BITMASK(x) (1 << ((x) % _BITSET_BITS))
+
+/* Set- or test the bit as position X in BS */
+#define SETBIT(bs, x) ((bs)[_BITSLOT(x)] |= _BITMASK(x))
+#define TESTBIT(bs, x) ((bool)((bs)[_BITSLOT(x)] & _BITMASK(x)))
+
+/* Set the bit DST in BS if the bit SRC is set */
+#define SET_DST_IF_SRC(bs, dst, src) \
+ do { \
+ if (TESTBIT(bs, src)) \
+ SETBIT(bs, dst); \
+ } while (false)
+
+/* Allocate a new bitset capable of holding N bits */
+static inline bitset_t *mkbitset(arena_t *, size_t n)
+ __attribute__((always_inline, returns_nonnull, nonnull, malloc));
+
+bitset_t *
+mkbitset(arena_t *a, size_t n)
+{
+ size_t bits = CHAR_BIT * sizeof(bitset_t);
+ return arena_new(a, bitset_t, (n + bits - 1) / bits);
+}
+
+#endif /* !ORYX_BITSET_H */
diff --git a/src/codegen.c b/src/codegen.c
index a263db2..252ac29 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -12,6 +12,7 @@
#include "alloc.h"
#include "analyzer.h"
+#include "bitset.h"
#include "common.h"
#include "errors.h"
#include "parser.h"
@@ -34,12 +35,13 @@ struct cgctx {
arena_t *a;
scratch_t *s;
- mpq_t *folds;
- scope_t *scps;
- type_t **types;
ast_t ast;
aux_t aux;
+ bitset_t *cnst;
+ fold_t *folds;
lexemes_t toks;
+ scope_t *scps;
+ type_t **types;
LLVMBuilderRef bob;
LLVMContextRef ctx;
@@ -60,8 +62,8 @@ extern bool lflag, sflag;
extern const char *oflag;
void
-codegen(const char *file, mpq_t *folds, scope_t *scps, type_t **types,
- ast_t ast, aux_t aux, lexemes_t toks)
+codegen(const char *file, bitset_t *cnst, fold_t *folds, scope_t *scps,
+ type_t **types, ast_t ast, aux_t aux, lexemes_t toks)
{
LLVM_TARGET_INIT(AArch64);
LLVM_TARGET_INIT(X86);
@@ -81,12 +83,13 @@ codegen(const char *file, mpq_t *folds, scope_t *scps, type_t **types,
.a = &(arena_t){0},
.s = &(scratch_t){0},
- .folds = folds,
- .scps = scps,
- .types = types,
.ast = ast,
.aux = aux,
+ .cnst = cnst,
+ .folds = folds,
+ .scps = scps,
.toks = toks,
+ .types = types,
.ctx = llctx,
.mod = llmod,
@@ -143,13 +146,12 @@ static idx_t codegendecl(struct cgctx ctx, idx_t);
idx_t
codegentypedexpr(struct cgctx ctx, idx_t i, type_t *T, LLVMValueRef *outv)
{
- /* If true, implies numeric constant */
- if (MPQ_IS_INIT(ctx.folds[i]) && !T->isfloat) {
+ if (T->kind == TYPE_NUM && TESTBIT(ctx.cnst, i) && !T->isfloat) {
char buf[40 /* The max value of a u128 is length 39 */];
- mpz_get_str(buf, 10, mpq_numref(ctx.folds[i]));
+ mpz_get_str(buf, 10, mpq_numref(ctx.folds[i].q));
*outv = LLVMConstIntOfString(type2llvm(ctx, T), buf, 10);
return fwdnode(ctx.ast, i);
- } else if (MPQ_IS_INIT(ctx.folds[i]) /* && type.isfloat */) {
+ } else if (T->kind == TYPE_NUM && TESTBIT(ctx.cnst, i)) {
char *s, *buf;
size_t len;
mpf_t x;
@@ -175,7 +177,7 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t *T, LLVMValueRef *outv)
}
mpf_init2(x, prec);
- mpf_set_q(x, ctx.folds[i]);
+ mpf_set_q(x, ctx.folds[i].q);
s = mpf_get_str(NULL, &e, 10, 0, x);
len = strlen(s);
@@ -191,6 +193,9 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t *T, LLVMValueRef *outv)
free(s);
mpf_clear(x);
return fwdnode(ctx.ast, i);
+ } else if (T->kind == TYPE_BOOL && TESTBIT(ctx.cnst, i)) {
+ *outv = LLVMConstInt(type2llvm(ctx, ctx.types[i]), ctx.folds[i].b, false);
+ return fwdnode(ctx.ast, i);
}
switch (ctx.ast.kinds[i]) {
@@ -257,6 +262,24 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t *T, LLVMValueRef *outv)
ctx.bob, vl, vr, bo.name);
return ni;
}
+ case ASTBINEQ:
+ case ASTBINNEQ: {
+ static const struct binop {
+ LLVMIntPredicate pred;
+ const char *name;
+ } binoptbl[UINT8_MAX + 1] = {
+ [ASTBINEQ] = {LLVMIntEQ, "ieq"},
+ [ASTBINNEQ] = {LLVMIntNE, "ine"},
+ };
+
+ idx_t lhs = ctx.ast.kids[i].lhs, rhs = ctx.ast.kids[i].rhs;
+ LLVMValueRef vl, vr;
+ (void)codegentypedexpr(ctx, lhs, ctx.types[i], &vl);
+ idx_t ni = codegentypedexpr(ctx, rhs, ctx.types[i], &vr);
+ struct binop bo = binoptbl[ctx.ast.kinds[i]];
+ *outv = LLVMBuildICmp(ctx.bob, bo.pred, vl, vr, bo.name);
+ return ni;
+ }
default:
__builtin_unreachable();
}
@@ -451,8 +474,8 @@ LLVMTypeRef
type2llvm(struct cgctx ctx, type_t *T)
{
switch (T->kind) {
- case TYPE_FN:
- err("codegen: %s: Not implemented for function types", __func__);
+ case TYPE_BOOL:
+ return LLVMInt1TypeInContext(ctx.ctx);
case TYPE_NUM:
assert(T->size != 0);
assert((unsigned)T->size * 8 <= 128);
diff --git a/src/codegen.h b/src/codegen.h
index e214067..fe0b08b 100644
--- a/src/codegen.h
+++ b/src/codegen.h
@@ -7,8 +7,8 @@
#include "lexer.h"
#include "parser.h"
-void codegen(const char *, mpq_t *, scope_t *, type_t **, ast_t, aux_t,
- lexemes_t)
+void codegen(const char *, bitset_t *, fold_t *, scope_t *, type_t **, ast_t,
+ aux_t, lexemes_t)
__attribute__((nonnull));
#endif /* !ORYX_CODEGEN_H */
diff --git a/src/lexer.c b/src/lexer.c
index e35f1cd..0ec82d5 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -76,9 +76,8 @@ lexstring(const uchar *code, size_t codesz)
/* Single-byte literals */
case '%': case '&': case '(': case ')': case '*':
- case '+': case '-': case ':': case ';': case '=':
- case '[': case ']': case '{': case '|': case '}':
- case '~':
+ case '+': case '-': case ':': case ';': case '[':
+ case ']': case '{': case '|': case '}': case '~':
data.kinds[data.len++] = ch;
break;
@@ -88,7 +87,7 @@ lexstring(const uchar *code, size_t codesz)
/* Single- or double-byte literals */
case '/':
- if (code < end && code[0] == '*') {
+ if (likely(code < end) && code[0] == '*') {
if (!skpcmnt(&code, end))
err("Unterminated comment at byte %td", code - start);
continue;
@@ -96,11 +95,17 @@ lexstring(const uchar *code, size_t codesz)
data.kinds[data.len++] = ch;
break;
- case '<': case '>':
+ case '!':
+ if (unlikely(code == end || code[0] != '='))
+ goto fallback;
+ code++;
+ data.kinds[data.len++] = LEXBANGEQ;
+ break;
+ case '<': case '=': case '>':
data.kinds[data.len++] = ch;
/* See the comment in lexer.h for where 193 comes from */
- if (code < end && code[0] == ch) {
+ if (likely(code < end) && code[0] == ch) {
code++;
data.kinds[data.len - 1] += 193;
}
diff --git a/src/lexer.h b/src/lexer.h
index b62cc17..1ecafde 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -37,10 +37,12 @@ enum {
LEXSTAR = '*',
LEXTILDE = '~',
- /* We keep these exactly 2 away from each other, because ‘<’ and ‘>’ are 2
- away from each other in ASCII. This gives us a simple mapping from some
- token T to the doubled equivalent by doing T += 193. */
+ LEXBANGEQ = UINT8_MAX - 3, /* Not equals */
+
+ /* This gives us a simple mapping from some token T to the doubled
+ equivalent by doing T += 193. */
LEXLANGL_DBL = UINT8_MAX - 2, /* << */
+ LEXEQ_DBL = UINT8_MAX - 1, /* == */
LEXRANGL_DBL = UINT8_MAX - 0, /* >> */
};
diff --git a/src/main.c b/src/main.c
index 1555d17..e308c7c 100644
--- a/src/main.c
+++ b/src/main.c
@@ -12,6 +12,7 @@
#include "alloc.h"
#include "analyzer.h"
+#include "bitset.h"
#include "codegen.h"
#include "common.h"
#include "errors.h"
@@ -73,19 +74,20 @@ usage:
char *src = readfile(argv[0], &srclen);
aux_t aux;
- mpq_t *folds;
+ fold_t *folds;
scope_t *scps;
+ bitset_t *cnst;
arena_t a = NULL;
lexemes_t toks = lexstring(src, srclen);
ast_t ast = parsetoks(toks, &aux);
- type_t **types = analyzeprog(ast, aux, toks, &a, &scps, &folds);
- codegen(argv[0], folds, scps, types, ast, aux, toks);
+ type_t **types = analyzeprog(ast, aux, toks, &a, &scps, &folds, &cnst);
+ codegen(argv[0], cnst, folds, scps, types, ast, aux, toks);
#if DEBUG
for (size_t i = 0; i < ast.len; i++) {
- if ((*folds[i])._mp_den._mp_d != NULL)
- mpq_clear(folds[i]);
+ if ((*folds[i].q)._mp_den._mp_d != NULL)
+ mpq_clear(folds[i].q);
}
free(folds);
diff --git a/src/parser.c b/src/parser.c
index a6d82a8..49067e2 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -70,9 +70,11 @@ fwdnode(ast_t ast, idx_t i)
case ASTBINADD:
case ASTBINAND:
case ASTBINDIV:
+ case ASTBINEQ:
case ASTBINIOR:
case ASTBINMOD:
case ASTBINMUL:
+ case ASTBINNEQ:
case ASTBINSHL:
case ASTBINSHR:
case ASTBINSUB:
@@ -88,7 +90,7 @@ fwdnode(ast_t ast, idx_t i)
case ASTTYPE:
return i + 1;
case ASTFNPROTO:
- assert("analyzer: Not reachable");
+ default:
__builtin_unreachable();
}
}
@@ -284,8 +286,18 @@ idx_t
parseexprinc(ast_t *ast, lexemes_t toks, idx_t lhs, int minprec)
{
static const int prectbl[UINT8_MAX + 1] = {
- ['+'] = 1, ['-'] = 1, ['|'] = 1, ['~'] = 1, ['%'] = 2,
- ['&'] = 2, ['*'] = 2, ['/'] = 2, [LEXLANGL_DBL] = 2, [LEXRANGL_DBL] = 2,
+ [LEXBANGEQ] = 3,
+ [LEXEQ_DBL] = 3,
+ ['+'] = 4,
+ ['-'] = 4,
+ ['|'] = 4,
+ ['~'] = 4,
+ ['%'] = 5,
+ ['&'] = 5,
+ ['*'] = 5,
+ ['/'] = 5,
+ [LEXLANGL_DBL] = 5,
+ [LEXRANGL_DBL] = 5,
};
uint8_t op = toks.kinds[toksidx];
diff --git a/src/parser.h b/src/parser.h
index e30ec8e..363478c 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -97,10 +97,18 @@ enum {
‘lhs & rhs’ */
ASTBINIOR = '|',
+ /* Binary not equals
+ ‘lhs != rhs’ */
+ ASTBINNEQ = UINT8_MAX - 3,
+
/* Binary left shift
‘lhs << rhs’ */
ASTBINSHL = UINT8_MAX - 2,
+ /* Binary equals
+ ‘lhs == rhs’ */
+ ASTBINEQ = UINT8_MAX - 1,
+
/* Binary right shift
‘lhs >> rhs’ */
ASTBINSHR = UINT8_MAX - 0,
diff --git a/src/symtab.c b/src/tables.c
index 8fcba3c..61adff7 100644
--- a/src/symtab.c
+++ b/src/tables.c
@@ -2,8 +2,8 @@
#include <stdint.h>
#include "alloc.h"
-#include "symtab.h"
#include "strview.h"
+#include "tables.h"
struct symtab {
symtab_t *child[4];
diff --git a/src/symtab.h b/src/tables.h
index 448ffd9..448ffd9 100644
--- a/src/symtab.h
+++ b/src/tables.h
diff --git a/test.yx b/test.yx
deleted file mode 100644
index 346f095..0000000
--- a/test.yx
+++ /dev/null
@@ -1,133 +0,0 @@
-X :: 5;
-Y: u8 : X;
-Z :: Y;
-
-ZZ :: 127;
-
-my_global: i8 = ZZ;
-no_init: u128;
-
-another_global := 123'456'789;
-
-uninit :: () {
- x: int = …;
- y: u32 = ...;
-}
-
-pub main :: () {
- no_init: u128;
- x := no_init;
-
- no_init_undef: int = …;
-}
-
-pub bar :: () int {
- hello_world_this_is_my_var := 126;
- hello_world_this_is_my_var′ := 127;
- hello_world_this_is_my_var″ := 128;
- return hello_world_this_is_my_var′;
-}
-
-foo :: () int {
- baz :: () int {
- X :: X;
- return X;
- }
-
- y := 5;
- x := y;
- return x;
-}
-
-rune :: () {
- ch: rune = 8224; /* U+2020 DAGGER */
-}
-
-float_test :: () {
- π :: 3.14159265358979323846264338327950288419716939937510582097494459;
- a: f16 = π;
- b: f32 = π;
- c: f64 = π;
- d: f128 = π;
-
- f := 3.14;
-}
-
-/* Yes we have comments in this language,
- /* You can even nest them! */ */
-
-/* There are no line-comments, because why have 2 styles when you can have 1? */
-
-neg_test :: () int {
- x := 420;
- return -x;
-}
-
-/* This method should return 1337, because it’s not an increment but
- actually parsed as (+ (+ x)), which is equivalant to ‘x’. */
-some_math :: () int {
- x := 1337;
- return ++x;
-}
-
-complex_math :: () uint {
- x: uint = 42;
- y: uint = 123;
- z := (x + y) / x;
- return x + y / x;
-}
-
-div :: () f64 {
- return 5 / 2;
-}
-
-remainder :: () int {
- x := 5;
- y := 2;
- z := 5 % 2;
- return x % y;
-}
-
-xor :: () int {
- x := 42;
- y := 123;
- z := ~y;
- return x + z ~ y;
-}
-
-shl_kinda_sus :: () u8 {
- x: u8 = 1;
- y: u16 : 1<<8 - 1;
- return x<<y;
-}
-
-shr :: () u8 {
- x: u8 = 255;
- y: u8 = 3;
- return x >> 256;
-}
-
-bit_fidling :: () int {
- x := 122;
- y := x | 1;
- return y;
-}
-
-assignment :: () int {
- x := 5;
- y := 4;
- x = x + y;
- y = 69;
- return x + y;
-}
-
-some_global: int;
-mutate_global :: () {
- some_global = 42;
-}
-
-float_div :: () f64 {
- x := 5.0;
- y := 2.0;
- return x / y;
-}