From 545f8ebebba5f3e78351b0074948c08cbfd9f418 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Fri, 28 Jun 2024 15:20:32 +0200 Subject: Do some more work to support modulus and parenthesis --- src/analyzer.c | 11 +++++--- src/analyzer.h | 1 + src/codegen.c | 52 ++++++++++++++++++++++++------------ src/lexer.c | 7 ++--- src/lexer.h | 7 +---- src/parser.c | 84 +++++++++++++++++++++++++++++++++++++++++----------------- src/parser.h | 8 +++--- 7 files changed, 112 insertions(+), 58 deletions(-) diff --git a/src/analyzer.c b/src/analyzer.c index de4400c..2686e07 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -322,7 +322,8 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, case ASTBINADD: case ASTBINSUB: case ASTBINMUL: - case ASTBINDIV: { + case ASTBINDIV: + case ASTBINMOD: { idx_t lhs, rhs; lhs = ast.kids[i].lhs; rhs = ast.kids[i].rhs; @@ -330,6 +331,8 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, rhs); if (!typecompat(types[lhs], types[rhs])) err("analyzer: Binary oprand type mismatch"); + if (ast.kinds[i] == ASTBINMOD && types[lhs].isfloat) + err("analyzer: Modulus is not defined for floating-point types"); types[i] = types[rhs]; return ni; } @@ -513,13 +516,15 @@ constfoldexpr(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, case ASTBINADD: case ASTBINSUB: case ASTBINMUL: - case ASTBINDIV: { - static void (*const mpq_fns[_AST_LAST_ENT])(mpq_t, const mpq_t, const mpq_t) = { + case ASTBINDIV: + case ASTBINMOD: { + static void (*const mpq_fns[UINT8_MAX])(mpq_t, const mpq_t, const mpq_t) = { ['+'] = mpq_add, ['-'] = mpq_sub, ['*'] = mpq_mul, ['/'] = mpq_div, }; + /* TODO: Support modulus */ idx_t lhs, rhs; lhs = ast.kids[i].lhs; diff --git a/src/analyzer.h b/src/analyzer.h index abb4395..12c1b26 100644 --- a/src/analyzer.h +++ b/src/analyzer.h @@ -1,6 +1,7 @@ #ifndef ORYX_ANALYZER_H #define ORYX_ANALYZER_H +#include #include #include diff --git a/src/codegen.c b/src/codegen.c index eb26036..71a8e1d 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -145,7 +145,8 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) { /* If true, implies numeric constant */ if (MPQ_IS_INIT(ctx.folds[i]) && !type.isfloat) { - /* TODO: Move this kind of range checking to the constant folding stage? */ + /* TODO: Move this kind of range checking to the + constant folding stage? */ if (!type.issigned && mpq_sgn(ctx.folds[i]) == -1) err("Cannot convert negative value to unsigned type"); @@ -189,11 +190,20 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) /* TODO: Is this even correct? */ switch (type.size) { - case 2: prec = 5; break; - case 4: prec = 8; break; - case 8: prec = 11; break; - case 16: prec = 16; break; - default: __builtin_unreachable(); + case 2: + prec = 5; + break; + case 4: + prec = 8; + break; + case 8: + prec = 11; + break; + case 16: + prec = 16; + break; + default: + __builtin_unreachable(); } mpf_init2(x, prec); @@ -219,7 +229,8 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) case ASTIDENT: { strview_t sv = ctx.toks.strs[ctx.ast.lexemes[i]]; LLVMTypeRef t = type2llvm(ctx, ctx.types[i]); - LLVMValueRef ptrval = symtab_insert(&ctx.scps[ctx.scpi].map, sv, NULL)->v; + LLVMValueRef ptrval = + symtab_insert(&ctx.scps[ctx.scpi].map, sv, NULL)->v; *outv = LLVMBuildLoad2(ctx.bob, t, ptrval, "loadtmp"); return fwdnode(ctx.ast, i); } @@ -231,21 +242,28 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) } case ASTBINADD: case ASTBINSUB: - case ASTBINMUL: { - typedef LLVMValueRef llbfn(LLVMBuilderRef, LLVMValueRef, LLVMValueRef, const char *); + case ASTBINMUL: + case ASTBINDIV: { + typedef LLVMValueRef llbfn(LLVMBuilderRef, LLVMValueRef, LLVMValueRef, + const char *); static const struct binop { - llbfn *fn; + llbfn *fn[2]; const char *name; - } binoptbl[_AST_LAST_ENT] = { - ['+'] = { LLVMBuildAdd, "addtmp" }, - ['-'] = { LLVMBuildSub, "subtmp" }, - ['*'] = { LLVMBuildMul, "multmp" }, + } binoptbl[UINT8_MAX] = { + ['+'] = {{LLVMBuildAdd, LLVMBuildAdd}, "addtmp"}, + ['-'] = {{LLVMBuildSub, LLVMBuildSub}, "subtmp"}, + ['*'] = {{LLVMBuildMul, LLVMBuildMul}, "multmp"}, + ['/'] = {{LLVMBuildUDiv, LLVMBuildSDiv}, "divtmp"}, + ['%'] = {{LLVMBuildURem, LLVMBuildSRem}, "remtmp"}, }; + LLVMValueRef vl, vr; - (void)codegentypedexpr(ctx, ctx.ast.kids[i].lhs, ctx.types[i], &vl); - idx_t ni = codegentypedexpr(ctx, ctx.ast.kids[i].rhs, ctx.types[i], &vr); + (void)codegentypedexpr(ctx, ctx.ast.kids[i].lhs, ctx.types[i], &vl); + idx_t ni = codegentypedexpr(ctx, ctx.ast.kids[i].rhs, + ctx.types[i], &vr); + struct binop bo = binoptbl[ctx.ast.kinds[i]]; - *outv = bo.fn(ctx.bob, vl, vr, bo.name); + *outv = bo.fn[ctx.types[i].issigned](ctx.bob, vl, vr, bo.name); return ni; } default: diff --git a/src/lexer.c b/src/lexer.c index 7938b28..e35f1cd 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -75,9 +75,10 @@ lexstring(const uchar *code, size_t codesz) break; /* Single-byte literals */ - case '&': case '(': case ')': case '*': case '+': - case '-': case ':': case ';': case '=': case '[': - case ']': case '{': case '|': case '}': case '~': + case '%': case '&': case '(': case ')': case '*': + case '+': case '-': case ':': case ';': case '=': + case '[': case ']': case '{': case '|': case '}': + case '~': data.kinds[data.len++] = ch; break; diff --git a/src/lexer.h b/src/lexer.h index fb63fd8..b62cc17 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -1,7 +1,6 @@ #ifndef ORYX_LEXER_H #define ORYX_LEXER_H -#include #include #include @@ -26,6 +25,7 @@ enum { LEXLBRKT = '[', LEXLPAR = '(', LEXMINUS = '-', + LEXPERC = '%', LEXPIPE = '|', LEXPLUS = '+', LEXRANGL = '>', @@ -42,13 +42,8 @@ enum { token T to the doubled equivalent by doing T += 193. */ LEXLANGL_DBL = UINT8_MAX - 2, /* << */ LEXRANGL_DBL = UINT8_MAX - 0, /* >> */ - - _LEX_LAST_ENT, }; -static_assert(_LEX_LAST_ENT - 1 <= UINT8_MAX, - "Too many lexer tokens to fix in uint8_t"); - #define LEXEMES_BLKSZ (1 + sizeof(strview_t)) typedef struct { diff --git a/src/parser.c b/src/parser.c index 69417b2..d71627e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -28,8 +28,13 @@ typedef idx_t parsefn(ast_t *, aux_t *, lexemes_t) static parsefn parseblk, parsefunc, parseproto, parsestmt, parsetype; static idx_t parsedecl(ast_t *, aux_t *, lexemes_t, bool) __attribute__((nonnull)); -static idx_t parseexpr(ast_t *, aux_t *, lexemes_t, int) +static idx_t parseexpr(ast_t *, lexemes_t, int) __attribute__((nonnull)); +static idx_t parseexprinc(ast_t *, lexemes_t, idx_t, int) + __attribute__((nonnull)); +static idx_t parseexpratom(ast_t *, lexemes_t) + __attribute__((nonnull)); +static bool isfunc(lexemes_t); static ast_t mkast(void); @@ -45,14 +50,6 @@ static void astresz(ast_t *ast) /* TODO: Make thread-local? */ static size_t toksidx; -static int prectbl[_LEX_LAST_ENT] = { - ['+'] = 1, - ['-'] = 1, - ['*'] = 2, - ['/'] = 2, - ['%'] = 2, -}; - idx_t fwdnode(ast_t ast, idx_t i) { @@ -70,6 +67,9 @@ fwdnode(ast_t ast, idx_t i) i = ast.kids[i].rhs; break; case ASTBINADD: + case ASTBINDIV: + case ASTBINMOD: + case ASTBINMUL: case ASTBINSUB: case ASTCDECL: case ASTFN: @@ -187,7 +187,8 @@ parsedecl(ast_t *ast, aux_t *aux, lexemes_t toks, bool toplvl) switch (toks.kinds[toksidx]) { case LEXLPAR: - func = true; + if (!(func = isfunc(toks))) + goto not_fn; if (ast->kinds[i] == ASTDECL) err("Cannot assign function to mutable variable"); rhs = parsefunc(ast, aux, toks); @@ -200,7 +201,8 @@ parsedecl(ast_t *ast, aux_t *aux, lexemes_t toks, bool toplvl) aux->buf[j].decl.isundef = true; break; default: - rhs = parseexpr(ast, aux, toks, 1); +not_fn: + rhs = parseexpr(ast, toks, 0); } ast->kids[i].rhs = rhs; @@ -228,8 +230,18 @@ parsefunc(ast_t *ast, aux_t *aux, lexemes_t toks) } idx_t -parseexprunit(ast_t *ast, lexemes_t toks) +parseexpratom(ast_t *ast, lexemes_t toks) { + /* We handle parenthesised expressions up here because we don’t want + to allocate a new AST node for them */ + if (toks.kinds[toksidx] == LEXLPAR) { + toksidx++; + idx_t i = parseexpr(ast, toks, 0); + if (toks.kinds[toksidx++] != LEXRPAR) + err("parser: Expected closing parenthesis after expression"); + return i; + } + idx_t i = astalloc(ast); /* Unary plus is kind of a fake syntactic construct. We just pretend @@ -249,7 +261,7 @@ parseexprunit(ast_t *ast, lexemes_t toks) break; case LEXMINUS: ast->kinds[i] = ASTUNNEG; - ast->kids[i].rhs = parseexprunit(ast, toks); + ast->kids[i].rhs = parseexpratom(ast, toks); break; default: err("parser: Invalid expression leaf"); @@ -258,16 +270,23 @@ parseexprunit(ast_t *ast, lexemes_t toks) } idx_t -parseexprinc(ast_t *ast, aux_t *aux, lexemes_t toks, idx_t lhs, int minprec) +parseexprinc(ast_t *ast, lexemes_t toks, idx_t lhs, int minprec) { + static const int prectbl[UINT8_MAX] = { + ['+'] = 1, + ['-'] = 1, + ['*'] = 2, + ['/'] = 2, + ['%'] = 2, + }; + uint8_t op = toks.kinds[toksidx]; int nxtprec = prectbl[op]; - if (nxtprec != 0) - toksidx++; - if (nxtprec < minprec) + if (nxtprec <= minprec) return lhs; + toksidx++; idx_t i = astalloc(ast); - idx_t rhs = parseexpr(ast, aux, toks, nxtprec); + idx_t rhs = parseexpr(ast, toks, nxtprec); ast->kinds[i] = op; ast->lexemes[i] = toksidx - 1; ast->kids[i].lhs = lhs; @@ -276,13 +295,13 @@ parseexprinc(ast_t *ast, aux_t *aux, lexemes_t toks, idx_t lhs, int minprec) } idx_t -parseexpr(ast_t *ast, aux_t *aux, lexemes_t toks, int minprec) +parseexpr(ast_t *ast, lexemes_t toks, int minprec) { - idx_t lhs = parseexprunit(ast, toks); + idx_t lhs = parseexpratom(ast, toks); for (;;) { - idx_t rhs = parseexprinc(ast, aux, toks, lhs, minprec); - if (rhs == lhs) + idx_t rhs = parseexprinc(ast, toks, lhs, minprec); + if (lhs == rhs) break; lhs = rhs; } @@ -323,8 +342,8 @@ parsestmt(ast_t *ast, aux_t *aux, lexemes_t toks) ast->lexemes[i] = toksidx++; ast->kinds[i] = ASTRET; - idx_t rhs = toks.kinds[toksidx] != LEXSEMI ? parseexpr(ast, aux, toks, 1) - : AST_EMPTY; + idx_t rhs = toks.kinds[toksidx] != LEXSEMI ? parseexpr(ast, toks, 0) + : AST_EMPTY; ast->kids[i].rhs = rhs; if (toks.kinds[toksidx++] != LEXSEMI) err("parser: Expected semicolon"); @@ -352,6 +371,23 @@ parsetype(ast_t *ast, aux_t *aux, lexemes_t toks) return i; } +bool +isfunc(lexemes_t toks) +{ + assert(toks.kinds[toksidx] == LEXLPAR); + + if (toks.kinds[toksidx + 1] == LEXRPAR) + return true; + for (size_t i = toksidx + 1;; i++) { + switch (toks.kinds[i]) { + case LEXRPAR: + return false; + case LEXCOLON: + return true; + } + } +} + ast_t mkast(void) { diff --git a/src/parser.h b/src/parser.h index 9cdf0c4..0a25a4c 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,7 +1,6 @@ #ifndef ORYX_PARSER_H #define ORYX_PARSER_H -#include #include #include #include @@ -74,12 +73,11 @@ enum { ‘lhs - rhs’ */ ASTBINDIV = '/', - _AST_LAST_ENT, + /* Binary modulus + ‘lhs % rhs’ */ + ASTBINMOD = '%', }; -static_assert(_AST_LAST_ENT - 1 <= UINT8_MAX, - "Too many AST tokens to fix in uint8_t"); - #define AST_EMPTY ((idx_t)-1) #define AST_SOA_BLKSZ (1 + sizeof(idx_t) + sizeof(pair_t)) -- cgit v1.2.3