From 545f8ebebba5f3e78351b0074948c08cbfd9f418 Mon Sep 17 00:00:00 2001
From: Thomas Voss <mail@thomasvoss.com>
Date: Fri, 28 Jun 2024 15:20:32 +0200
Subject: Do some more work to support modulus and parenthesis

---
 src/analyzer.c | 11 +++++---
 src/analyzer.h |  1 +
 src/codegen.c  | 52 ++++++++++++++++++++++++------------
 src/lexer.c    |  7 ++---
 src/lexer.h    |  7 +----
 src/parser.c   | 84 +++++++++++++++++++++++++++++++++++++++++-----------------
 src/parser.h   |  8 +++---
 7 files changed, 112 insertions(+), 58 deletions(-)

diff --git a/src/analyzer.c b/src/analyzer.c
index de4400c..2686e07 100644
--- a/src/analyzer.c
+++ b/src/analyzer.c
@@ -322,7 +322,8 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast,
 	case ASTBINADD:
 	case ASTBINSUB:
 	case ASTBINMUL:
-	case ASTBINDIV: {
+	case ASTBINDIV:
+	case ASTBINMOD: {
 		idx_t lhs, rhs;
 		lhs = ast.kids[i].lhs;
 		rhs = ast.kids[i].rhs;
@@ -330,6 +331,8 @@ analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast,
 		idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, rhs);
 		if (!typecompat(types[lhs], types[rhs]))
 			err("analyzer: Binary oprand type mismatch");
+		if (ast.kinds[i] == ASTBINMOD && types[lhs].isfloat)
+			err("analyzer: Modulus is not defined for floating-point types");
 		types[i] = types[rhs];
 		return ni;
 	}
@@ -513,13 +516,15 @@ constfoldexpr(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types,
 	case ASTBINADD:
 	case ASTBINSUB:
 	case ASTBINMUL:
-	case ASTBINDIV: {
-		static void (*const mpq_fns[_AST_LAST_ENT])(mpq_t, const mpq_t, const mpq_t) = {
+	case ASTBINDIV:
+	case ASTBINMOD: {
+		static void (*const mpq_fns[UINT8_MAX])(mpq_t, const mpq_t, const mpq_t) = {
 			['+'] = mpq_add,
 			['-'] = mpq_sub,
 			['*'] = mpq_mul,
 			['/'] = mpq_div,
 		};
+		/* TODO: Support modulus */
 
 		idx_t lhs, rhs;
 		lhs = ast.kids[i].lhs;
diff --git a/src/analyzer.h b/src/analyzer.h
index abb4395..12c1b26 100644
--- a/src/analyzer.h
+++ b/src/analyzer.h
@@ -1,6 +1,7 @@
 #ifndef ORYX_ANALYZER_H
 #define ORYX_ANALYZER_H
 
+#include <assert.h>
 #include <stdint.h>
 
 #include <gmp.h>
diff --git a/src/codegen.c b/src/codegen.c
index eb26036..71a8e1d 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -145,7 +145,8 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv)
 {
 	/* If true, implies numeric constant */
 	if (MPQ_IS_INIT(ctx.folds[i]) && !type.isfloat) {
-		/* TODO: Move this kind of range checking to the constant folding stage? */
+		/* TODO: Move this kind of range checking to the
+		   constant folding stage? */
 		if (!type.issigned && mpq_sgn(ctx.folds[i]) == -1)
 			err("Cannot convert negative value to unsigned type");
 
@@ -189,11 +190,20 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv)
 
 		/* TODO: Is this even correct? */
 		switch (type.size) {
-		case 2:  prec =  5; break;
-		case 4:  prec =  8; break;
-		case 8:  prec = 11; break;
-		case 16: prec = 16; break;
-		default: __builtin_unreachable();
+		case 2:
+			prec = 5;
+			break;
+		case 4:
+			prec = 8;
+			break;
+		case 8:
+			prec = 11;
+			break;
+		case 16:
+			prec = 16;
+			break;
+		default:
+			__builtin_unreachable();
 		}
 
 		mpf_init2(x, prec);
@@ -219,7 +229,8 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv)
 	case ASTIDENT: {
 		strview_t sv = ctx.toks.strs[ctx.ast.lexemes[i]];
 		LLVMTypeRef t = type2llvm(ctx, ctx.types[i]);
-		LLVMValueRef ptrval = symtab_insert(&ctx.scps[ctx.scpi].map, sv, NULL)->v;
+		LLVMValueRef ptrval =
+			symtab_insert(&ctx.scps[ctx.scpi].map, sv, NULL)->v;
 		*outv = LLVMBuildLoad2(ctx.bob, t, ptrval, "loadtmp");
 		return fwdnode(ctx.ast, i);
 	}
@@ -231,21 +242,28 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv)
 	}
 	case ASTBINADD:
 	case ASTBINSUB:
-	case ASTBINMUL: {
-		typedef LLVMValueRef llbfn(LLVMBuilderRef, LLVMValueRef, LLVMValueRef, const char *);
+	case ASTBINMUL:
+	case ASTBINDIV: {
+		typedef LLVMValueRef llbfn(LLVMBuilderRef, LLVMValueRef, LLVMValueRef,
+		                           const char *);
 		static const struct binop {
-			llbfn *fn;
+			llbfn *fn[2];
 			const char *name;
-		} binoptbl[_AST_LAST_ENT] = {
-			['+'] = { LLVMBuildAdd, "addtmp" },
-			['-'] = { LLVMBuildSub, "subtmp" },
-			['*'] = { LLVMBuildMul, "multmp" },
+		} binoptbl[UINT8_MAX] = {
+			['+'] = {{LLVMBuildAdd,  LLVMBuildAdd},  "addtmp"},
+			['-'] = {{LLVMBuildSub,  LLVMBuildSub},  "subtmp"},
+			['*'] = {{LLVMBuildMul,  LLVMBuildMul},  "multmp"},
+			['/'] = {{LLVMBuildUDiv, LLVMBuildSDiv}, "divtmp"},
+			['%'] = {{LLVMBuildURem, LLVMBuildSRem}, "remtmp"},
 		};
+
 		LLVMValueRef vl, vr;
-		     (void)codegentypedexpr(ctx, ctx.ast.kids[i].lhs, ctx.types[i], &vl);
-		idx_t ni = codegentypedexpr(ctx, ctx.ast.kids[i].rhs, ctx.types[i], &vr);
+		(void)codegentypedexpr(ctx, ctx.ast.kids[i].lhs, ctx.types[i], &vl);
+		idx_t ni = codegentypedexpr(ctx, ctx.ast.kids[i].rhs,
+		                            ctx.types[i], &vr);
+
 		struct binop bo = binoptbl[ctx.ast.kinds[i]];
-		*outv = bo.fn(ctx.bob, vl, vr, bo.name);
+		*outv = bo.fn[ctx.types[i].issigned](ctx.bob, vl, vr, bo.name);
 		return ni;
 	}
 	default:
diff --git a/src/lexer.c b/src/lexer.c
index 7938b28..e35f1cd 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -75,9 +75,10 @@ lexstring(const uchar *code, size_t codesz)
 			break;
 
 		/* Single-byte literals */
-		case '&': case '(': case ')': case '*': case '+':
-		case '-': case ':': case ';': case '=': case '[':
-		case ']': case '{': case '|': case '}': case '~':
+		case '%': case '&': case '(': case ')': case '*':
+		case '+': case '-': case ':': case ';': case '=':
+		case '[': case ']': case '{': case '|': case '}':
+		case '~':
 			data.kinds[data.len++] = ch;
 			break;
 
diff --git a/src/lexer.h b/src/lexer.h
index fb63fd8..b62cc17 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -1,7 +1,6 @@
 #ifndef ORYX_LEXER_H
 #define ORYX_LEXER_H
 
-#include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -26,6 +25,7 @@ enum {
 	LEXLBRKT  = '[',
 	LEXLPAR   = '(',
 	LEXMINUS  = '-',
+	LEXPERC   = '%',
 	LEXPIPE   = '|',
 	LEXPLUS   = '+',
 	LEXRANGL  = '>',
@@ -42,13 +42,8 @@ enum {
 	   token T to the doubled equivalent by doing T += 193. */
 	LEXLANGL_DBL = UINT8_MAX - 2, /* << */
 	LEXRANGL_DBL = UINT8_MAX - 0, /* >> */
-
-	_LEX_LAST_ENT,
 };
 
-static_assert(_LEX_LAST_ENT - 1 <= UINT8_MAX,
-              "Too many lexer tokens to fix in uint8_t");
-
 #define LEXEMES_BLKSZ (1 + sizeof(strview_t))
 
 typedef struct {
diff --git a/src/parser.c b/src/parser.c
index 69417b2..d71627e 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -28,8 +28,13 @@ typedef idx_t parsefn(ast_t *, aux_t *, lexemes_t)
 static parsefn parseblk, parsefunc, parseproto, parsestmt, parsetype;
 static idx_t parsedecl(ast_t *, aux_t *, lexemes_t, bool)
 	__attribute__((nonnull));
-static idx_t parseexpr(ast_t *, aux_t *, lexemes_t, int)
+static idx_t parseexpr(ast_t *, lexemes_t, int)
 	__attribute__((nonnull));
+static idx_t parseexprinc(ast_t *, lexemes_t, idx_t, int)
+	__attribute__((nonnull));
+static idx_t parseexpratom(ast_t *, lexemes_t)
+	__attribute__((nonnull));
+static bool isfunc(lexemes_t);
 
 static ast_t mkast(void);
 
@@ -45,14 +50,6 @@ static void astresz(ast_t *ast)
 /* TODO: Make thread-local? */
 static size_t toksidx;
 
-static int prectbl[_LEX_LAST_ENT] = {
-	['+'] = 1,
-	['-'] = 1,
-	['*'] = 2,
-	['/'] = 2,
-	['%'] = 2,
-};
-
 idx_t
 fwdnode(ast_t ast, idx_t i)
 {
@@ -70,6 +67,9 @@ fwdnode(ast_t ast, idx_t i)
 			i = ast.kids[i].rhs;
 			break;
 		case ASTBINADD:
+		case ASTBINDIV:
+		case ASTBINMOD:
+		case ASTBINMUL:
 		case ASTBINSUB:
 		case ASTCDECL:
 		case ASTFN:
@@ -187,7 +187,8 @@ parsedecl(ast_t *ast, aux_t *aux, lexemes_t toks, bool toplvl)
 
 	switch (toks.kinds[toksidx]) {
 	case LEXLPAR:
-		func = true;
+		if (!(func = isfunc(toks)))
+			goto not_fn;
 		if (ast->kinds[i] == ASTDECL)
 			err("Cannot assign function to mutable variable");
 		rhs = parsefunc(ast, aux, toks);
@@ -200,7 +201,8 @@ parsedecl(ast_t *ast, aux_t *aux, lexemes_t toks, bool toplvl)
 		aux->buf[j].decl.isundef = true;
 		break;
 	default:
-		rhs = parseexpr(ast, aux, toks, 1);
+not_fn:
+		rhs = parseexpr(ast, toks, 0);
 	}
 
 	ast->kids[i].rhs = rhs;
@@ -228,8 +230,18 @@ parsefunc(ast_t *ast, aux_t *aux, lexemes_t toks)
 }
 
 idx_t
-parseexprunit(ast_t *ast, lexemes_t toks)
+parseexpratom(ast_t *ast, lexemes_t toks)
 {
+	/* We handle parenthesised expressions up here because we don’t want
+	   to allocate a new AST node for them */
+	if (toks.kinds[toksidx] == LEXLPAR) {
+		toksidx++;
+		idx_t i = parseexpr(ast, toks, 0);
+		if (toks.kinds[toksidx++] != LEXRPAR)
+			err("parser: Expected closing parenthesis after expression");
+		return i;
+	}
+
 	idx_t i = astalloc(ast);
 
 	/* Unary plus is kind of a fake syntactic construct.  We just pretend
@@ -249,7 +261,7 @@ parseexprunit(ast_t *ast, lexemes_t toks)
 		break;
 	case LEXMINUS:
 		ast->kinds[i] = ASTUNNEG;
-		ast->kids[i].rhs = parseexprunit(ast, toks);
+		ast->kids[i].rhs = parseexpratom(ast, toks);
 		break;
 	default:
 		err("parser: Invalid expression leaf");
@@ -258,16 +270,23 @@ parseexprunit(ast_t *ast, lexemes_t toks)
 }
 
 idx_t
-parseexprinc(ast_t *ast, aux_t *aux, lexemes_t toks, idx_t lhs, int minprec)
+parseexprinc(ast_t *ast, lexemes_t toks, idx_t lhs, int minprec)
 {
+	static const int prectbl[UINT8_MAX] = {
+		['+'] = 1,
+		['-'] = 1,
+		['*'] = 2,
+		['/'] = 2,
+		['%'] = 2,
+	};
+
 	uint8_t op = toks.kinds[toksidx];
 	int nxtprec = prectbl[op];
-	if (nxtprec != 0)
-		toksidx++;
-	if (nxtprec < minprec)
+	if (nxtprec <= minprec)
 		return lhs;
+	toksidx++;
 	idx_t i = astalloc(ast);
-	idx_t rhs = parseexpr(ast, aux, toks, nxtprec);
+	idx_t rhs = parseexpr(ast, toks, nxtprec);
 	ast->kinds[i] = op;
 	ast->lexemes[i] = toksidx - 1;
 	ast->kids[i].lhs = lhs;
@@ -276,13 +295,13 @@ parseexprinc(ast_t *ast, aux_t *aux, lexemes_t toks, idx_t lhs, int minprec)
 }
 
 idx_t
-parseexpr(ast_t *ast, aux_t *aux, lexemes_t toks, int minprec)
+parseexpr(ast_t *ast, lexemes_t toks, int minprec)
 {
-	idx_t lhs = parseexprunit(ast, toks);
+	idx_t lhs = parseexpratom(ast, toks);
 
 	for (;;) {
-		idx_t rhs = parseexprinc(ast, aux, toks, lhs, minprec);
-		if (rhs == lhs)
+		idx_t rhs = parseexprinc(ast, toks, lhs, minprec);
+		if (lhs == rhs)
 			break;
 		lhs = rhs;
 	}
@@ -323,8 +342,8 @@ parsestmt(ast_t *ast, aux_t *aux, lexemes_t toks)
 		ast->lexemes[i] = toksidx++;
 		ast->kinds[i] = ASTRET;
 
-		idx_t rhs = toks.kinds[toksidx] != LEXSEMI ? parseexpr(ast, aux, toks, 1)
-		                                            : AST_EMPTY;
+		idx_t rhs = toks.kinds[toksidx] != LEXSEMI ? parseexpr(ast, toks, 0)
+		                                           : AST_EMPTY;
 		ast->kids[i].rhs = rhs;
 		if (toks.kinds[toksidx++] != LEXSEMI)
 			err("parser: Expected semicolon");
@@ -352,6 +371,23 @@ parsetype(ast_t *ast, aux_t *aux, lexemes_t toks)
 	return i;
 }
 
+bool
+isfunc(lexemes_t toks)
+{
+	assert(toks.kinds[toksidx] == LEXLPAR);
+
+	if (toks.kinds[toksidx + 1] == LEXRPAR)
+		return true;
+	for (size_t i = toksidx + 1;; i++) {
+		switch (toks.kinds[i]) {
+		case LEXRPAR:
+			return false;
+		case LEXCOLON:
+			return true;
+		}
+	}
+}
+
 ast_t
 mkast(void)
 {
diff --git a/src/parser.h b/src/parser.h
index 9cdf0c4..0a25a4c 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -1,7 +1,6 @@
 #ifndef ORYX_PARSER_H
 #define ORYX_PARSER_H
 
-#include <assert.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -74,12 +73,11 @@ enum {
 	   ‘lhs - rhs’ */
 	ASTBINDIV = '/',
 
-	_AST_LAST_ENT,
+	/* Binary modulus
+	   ‘lhs % rhs’ */
+	ASTBINMOD = '%',
 };
 
-static_assert(_AST_LAST_ENT - 1 <= UINT8_MAX,
-              "Too many AST tokens to fix in uint8_t");
-
 #define AST_EMPTY     ((idx_t)-1)
 #define AST_SOA_BLKSZ (1 + sizeof(idx_t) + sizeof(pair_t))
 
-- 
cgit v1.2.3