aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-28 15:20:32 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-28 15:20:32 +0200
commit545f8ebebba5f3e78351b0074948c08cbfd9f418 (patch)
tree293de851f8f72a70fb583cda091608cc56f8f9f2
parentb75cc76276216af71d7bc89c894187df726e4a71 (diff)
Do some more work to support modulus and parenthesis
-rw-r--r--src/analyzer.c11
-rw-r--r--src/analyzer.h1
-rw-r--r--src/codegen.c52
-rw-r--r--src/lexer.c7
-rw-r--r--src/lexer.h7
-rw-r--r--src/parser.c84
-rw-r--r--src/parser.h8
7 files changed, 112 insertions, 58 deletions
diff --git a/src/analyzer.c b/src/analyzer.c
index de4400c..2686e07 100644
--- a/src/analyzer.c
+++ b/src/analyzer.c
@@ -322,7 +322,8 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast,
case ASTBINADD:
case ASTBINSUB:
case ASTBINMUL:
- case ASTBINDIV: {
+ case ASTBINDIV:
+ case ASTBINMOD: {
idx_t lhs, rhs;
lhs = ast.kids[i].lhs;
rhs = ast.kids[i].rhs;
@@ -330,6 +331,8 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast,
idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, rhs);
if (!typecompat(types[lhs], types[rhs]))
err("analyzer: Binary oprand type mismatch");
+ if (ast.kinds[i] == ASTBINMOD && types[lhs].isfloat)
+ err("analyzer: Modulus is not defined for floating-point types");
types[i] = types[rhs];
return ni;
}
@@ -513,13 +516,15 @@ constfoldexpr(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types,
case ASTBINADD:
case ASTBINSUB:
case ASTBINMUL:
- case ASTBINDIV: {
- static void (*const mpq_fns[_AST_LAST_ENT])(mpq_t, const mpq_t, const mpq_t) = {
+ case ASTBINDIV:
+ case ASTBINMOD: {
+ static void (*const mpq_fns[UINT8_MAX])(mpq_t, const mpq_t, const mpq_t) = {
['+'] = mpq_add,
['-'] = mpq_sub,
['*'] = mpq_mul,
['/'] = mpq_div,
};
+ /* TODO: Support modulus */
idx_t lhs, rhs;
lhs = ast.kids[i].lhs;
diff --git a/src/analyzer.h b/src/analyzer.h
index abb4395..12c1b26 100644
--- a/src/analyzer.h
+++ b/src/analyzer.h
@@ -1,6 +1,7 @@
#ifndef ORYX_ANALYZER_H
#define ORYX_ANALYZER_H
+#include <assert.h>
#include <stdint.h>
#include <gmp.h>
diff --git a/src/codegen.c b/src/codegen.c
index eb26036..71a8e1d 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -145,7 +145,8 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv)
{
/* If true, implies numeric constant */
if (MPQ_IS_INIT(ctx.folds[i]) && !type.isfloat) {
- /* TODO: Move this kind of range checking to the constant folding stage? */
+ /* TODO: Move this kind of range checking to the
+ constant folding stage? */
if (!type.issigned && mpq_sgn(ctx.folds[i]) == -1)
err("Cannot convert negative value to unsigned type");
@@ -189,11 +190,20 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv)
/* TODO: Is this even correct? */
switch (type.size) {
- case 2: prec = 5; break;
- case 4: prec = 8; break;
- case 8: prec = 11; break;
- case 16: prec = 16; break;
- default: __builtin_unreachable();
+ case 2:
+ prec = 5;
+ break;
+ case 4:
+ prec = 8;
+ break;
+ case 8:
+ prec = 11;
+ break;
+ case 16:
+ prec = 16;
+ break;
+ default:
+ __builtin_unreachable();
}
mpf_init2(x, prec);
@@ -219,7 +229,8 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv)
case ASTIDENT: {
strview_t sv = ctx.toks.strs[ctx.ast.lexemes[i]];
LLVMTypeRef t = type2llvm(ctx, ctx.types[i]);
- LLVMValueRef ptrval = symtab_insert(&ctx.scps[ctx.scpi].map, sv, NULL)->v;
+ LLVMValueRef ptrval =
+ symtab_insert(&ctx.scps[ctx.scpi].map, sv, NULL)->v;
*outv = LLVMBuildLoad2(ctx.bob, t, ptrval, "loadtmp");
return fwdnode(ctx.ast, i);
}
@@ -231,21 +242,28 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv)
}
case ASTBINADD:
case ASTBINSUB:
- case ASTBINMUL: {
- typedef LLVMValueRef llbfn(LLVMBuilderRef, LLVMValueRef, LLVMValueRef, const char *);
+ case ASTBINMUL:
+ case ASTBINDIV: {
+ typedef LLVMValueRef llbfn(LLVMBuilderRef, LLVMValueRef, LLVMValueRef,
+ const char *);
static const struct binop {
- llbfn *fn;
+ llbfn *fn[2];
const char *name;
- } binoptbl[_AST_LAST_ENT] = {
- ['+'] = { LLVMBuildAdd, "addtmp" },
- ['-'] = { LLVMBuildSub, "subtmp" },
- ['*'] = { LLVMBuildMul, "multmp" },
+ } binoptbl[UINT8_MAX] = {
+ ['+'] = {{LLVMBuildAdd, LLVMBuildAdd}, "addtmp"},
+ ['-'] = {{LLVMBuildSub, LLVMBuildSub}, "subtmp"},
+ ['*'] = {{LLVMBuildMul, LLVMBuildMul}, "multmp"},
+ ['/'] = {{LLVMBuildUDiv, LLVMBuildSDiv}, "divtmp"},
+ ['%'] = {{LLVMBuildURem, LLVMBuildSRem}, "remtmp"},
};
+
LLVMValueRef vl, vr;
- (void)codegentypedexpr(ctx, ctx.ast.kids[i].lhs, ctx.types[i], &vl);
- idx_t ni = codegentypedexpr(ctx, ctx.ast.kids[i].rhs, ctx.types[i], &vr);
+ (void)codegentypedexpr(ctx, ctx.ast.kids[i].lhs, ctx.types[i], &vl);
+ idx_t ni = codegentypedexpr(ctx, ctx.ast.kids[i].rhs,
+ ctx.types[i], &vr);
+
struct binop bo = binoptbl[ctx.ast.kinds[i]];
- *outv = bo.fn(ctx.bob, vl, vr, bo.name);
+ *outv = bo.fn[ctx.types[i].issigned](ctx.bob, vl, vr, bo.name);
return ni;
}
default:
diff --git a/src/lexer.c b/src/lexer.c
index 7938b28..e35f1cd 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -75,9 +75,10 @@ lexstring(const uchar *code, size_t codesz)
break;
/* Single-byte literals */
- case '&': case '(': case ')': case '*': case '+':
- case '-': case ':': case ';': case '=': case '[':
- case ']': case '{': case '|': case '}': case '~':
+ case '%': case '&': case '(': case ')': case '*':
+ case '+': case '-': case ':': case ';': case '=':
+ case '[': case ']': case '{': case '|': case '}':
+ case '~':
data.kinds[data.len++] = ch;
break;
diff --git a/src/lexer.h b/src/lexer.h
index fb63fd8..b62cc17 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -1,7 +1,6 @@
#ifndef ORYX_LEXER_H
#define ORYX_LEXER_H
-#include <assert.h>
#include <stddef.h>
#include <stdint.h>
@@ -26,6 +25,7 @@ enum {
LEXLBRKT = '[',
LEXLPAR = '(',
LEXMINUS = '-',
+ LEXPERC = '%',
LEXPIPE = '|',
LEXPLUS = '+',
LEXRANGL = '>',
@@ -42,13 +42,8 @@ enum {
token T to the doubled equivalent by doing T += 193. */
LEXLANGL_DBL = UINT8_MAX - 2, /* << */
LEXRANGL_DBL = UINT8_MAX - 0, /* >> */
-
- _LEX_LAST_ENT,
};
-static_assert(_LEX_LAST_ENT - 1 <= UINT8_MAX,
- "Too many lexer tokens to fix in uint8_t");
-
#define LEXEMES_BLKSZ (1 + sizeof(strview_t))
typedef struct {
diff --git a/src/parser.c b/src/parser.c
index 69417b2..d71627e 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -28,8 +28,13 @@ typedef idx_t parsefn(ast_t *, aux_t *, lexemes_t)
static parsefn parseblk, parsefunc, parseproto, parsestmt, parsetype;
static idx_t parsedecl(ast_t *, aux_t *, lexemes_t, bool)
__attribute__((nonnull));
-static idx_t parseexpr(ast_t *, aux_t *, lexemes_t, int)
+static idx_t parseexpr(ast_t *, lexemes_t, int)
__attribute__((nonnull));
+static idx_t parseexprinc(ast_t *, lexemes_t, idx_t, int)
+ __attribute__((nonnull));
+static idx_t parseexpratom(ast_t *, lexemes_t)
+ __attribute__((nonnull));
+static bool isfunc(lexemes_t);
static ast_t mkast(void);
@@ -45,14 +50,6 @@ static void astresz(ast_t *ast)
/* TODO: Make thread-local? */
static size_t toksidx;
-static int prectbl[_LEX_LAST_ENT] = {
- ['+'] = 1,
- ['-'] = 1,
- ['*'] = 2,
- ['/'] = 2,
- ['%'] = 2,
-};
-
idx_t
fwdnode(ast_t ast, idx_t i)
{
@@ -70,6 +67,9 @@ fwdnode(ast_t ast, idx_t i)
i = ast.kids[i].rhs;
break;
case ASTBINADD:
+ case ASTBINDIV:
+ case ASTBINMOD:
+ case ASTBINMUL:
case ASTBINSUB:
case ASTCDECL:
case ASTFN:
@@ -187,7 +187,8 @@ parsedecl(ast_t *ast, aux_t *aux, lexemes_t toks, bool toplvl)
switch (toks.kinds[toksidx]) {
case LEXLPAR:
- func = true;
+ if (!(func = isfunc(toks)))
+ goto not_fn;
if (ast->kinds[i] == ASTDECL)
err("Cannot assign function to mutable variable");
rhs = parsefunc(ast, aux, toks);
@@ -200,7 +201,8 @@ parsedecl(ast_t *ast, aux_t *aux, lexemes_t toks, bool toplvl)
aux->buf[j].decl.isundef = true;
break;
default:
- rhs = parseexpr(ast, aux, toks, 1);
+not_fn:
+ rhs = parseexpr(ast, toks, 0);
}
ast->kids[i].rhs = rhs;
@@ -228,8 +230,18 @@ parsefunc(ast_t *ast, aux_t *aux, lexemes_t toks)
}
idx_t
-parseexprunit(ast_t *ast, lexemes_t toks)
+parseexpratom(ast_t *ast, lexemes_t toks)
{
+ /* We handle parenthesised expressions up here because we don’t want
+ to allocate a new AST node for them */
+ if (toks.kinds[toksidx] == LEXLPAR) {
+ toksidx++;
+ idx_t i = parseexpr(ast, toks, 0);
+ if (toks.kinds[toksidx++] != LEXRPAR)
+ err("parser: Expected closing parenthesis after expression");
+ return i;
+ }
+
idx_t i = astalloc(ast);
/* Unary plus is kind of a fake syntactic construct. We just pretend
@@ -249,7 +261,7 @@ parseexprunit(ast_t *ast, lexemes_t toks)
break;
case LEXMINUS:
ast->kinds[i] = ASTUNNEG;
- ast->kids[i].rhs = parseexprunit(ast, toks);
+ ast->kids[i].rhs = parseexpratom(ast, toks);
break;
default:
err("parser: Invalid expression leaf");
@@ -258,16 +270,23 @@ parseexprunit(ast_t *ast, lexemes_t toks)
}
idx_t
-parseexprinc(ast_t *ast, aux_t *aux, lexemes_t toks, idx_t lhs, int minprec)
+parseexprinc(ast_t *ast, lexemes_t toks, idx_t lhs, int minprec)
{
+ static const int prectbl[UINT8_MAX] = {
+ ['+'] = 1,
+ ['-'] = 1,
+ ['*'] = 2,
+ ['/'] = 2,
+ ['%'] = 2,
+ };
+
uint8_t op = toks.kinds[toksidx];
int nxtprec = prectbl[op];
- if (nxtprec != 0)
- toksidx++;
- if (nxtprec < minprec)
+ if (nxtprec <= minprec)
return lhs;
+ toksidx++;
idx_t i = astalloc(ast);
- idx_t rhs = parseexpr(ast, aux, toks, nxtprec);
+ idx_t rhs = parseexpr(ast, toks, nxtprec);
ast->kinds[i] = op;
ast->lexemes[i] = toksidx - 1;
ast->kids[i].lhs = lhs;
@@ -276,13 +295,13 @@ parseexprinc(ast_t *ast, aux_t *aux, lexemes_t toks, idx_t lhs, int minprec)
}
idx_t
-parseexpr(ast_t *ast, aux_t *aux, lexemes_t toks, int minprec)
+parseexpr(ast_t *ast, lexemes_t toks, int minprec)
{
- idx_t lhs = parseexprunit(ast, toks);
+ idx_t lhs = parseexpratom(ast, toks);
for (;;) {
- idx_t rhs = parseexprinc(ast, aux, toks, lhs, minprec);
- if (rhs == lhs)
+ idx_t rhs = parseexprinc(ast, toks, lhs, minprec);
+ if (lhs == rhs)
break;
lhs = rhs;
}
@@ -323,8 +342,8 @@ parsestmt(ast_t *ast, aux_t *aux, lexemes_t toks)
ast->lexemes[i] = toksidx++;
ast->kinds[i] = ASTRET;
- idx_t rhs = toks.kinds[toksidx] != LEXSEMI ? parseexpr(ast, aux, toks, 1)
- : AST_EMPTY;
+ idx_t rhs = toks.kinds[toksidx] != LEXSEMI ? parseexpr(ast, toks, 0)
+ : AST_EMPTY;
ast->kids[i].rhs = rhs;
if (toks.kinds[toksidx++] != LEXSEMI)
err("parser: Expected semicolon");
@@ -352,6 +371,23 @@ parsetype(ast_t *ast, aux_t *aux, lexemes_t toks)
return i;
}
+bool
+isfunc(lexemes_t toks)
+{
+ assert(toks.kinds[toksidx] == LEXLPAR);
+
+ if (toks.kinds[toksidx + 1] == LEXRPAR)
+ return true;
+ for (size_t i = toksidx + 1;; i++) {
+ switch (toks.kinds[i]) {
+ case LEXRPAR:
+ return false;
+ case LEXCOLON:
+ return true;
+ }
+ }
+}
+
ast_t
mkast(void)
{
diff --git a/src/parser.h b/src/parser.h
index 9cdf0c4..0a25a4c 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -1,7 +1,6 @@
#ifndef ORYX_PARSER_H
#define ORYX_PARSER_H
-#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
@@ -74,12 +73,11 @@ enum {
‘lhs - rhs’ */
ASTBINDIV = '/',
- _AST_LAST_ENT,
+ /* Binary modulus
+ ‘lhs % rhs’ */
+ ASTBINMOD = '%',
};
-static_assert(_AST_LAST_ENT - 1 <= UINT8_MAX,
- "Too many AST tokens to fix in uint8_t");
-
#define AST_EMPTY ((idx_t)-1)
#define AST_SOA_BLKSZ (1 + sizeof(idx_t) + sizeof(pair_t))