From df36746da04ad7e1c3dc4db8233b1910a37f2f49 Mon Sep 17 00:00:00 2001
From: Thomas Voss <mail@thomasvoss.com>
Date: Tue, 11 Jun 2024 00:18:57 +0200
Subject: Begin work on a basic parser

---
 src/main.c   |  78 ++++-------------------------------------
 src/parser.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/parser.h |  32 +++++++++++++++++
 3 files changed, 151 insertions(+), 71 deletions(-)
 create mode 100644 src/parser.c
 create mode 100644 src/parser.h

diff --git a/src/main.c b/src/main.c
index bef6b0d..b156466 100644
--- a/src/main.c
+++ b/src/main.c
@@ -7,6 +7,7 @@
 
 #include "errors.h"
 #include "lexer.h"
+#include "parser.h"
 
 static char *readfile(const char *, size_t *);
 
@@ -18,81 +19,16 @@ main(int argc, char **argv)
 		exit(EXIT_FAILURE);
 	}
 
-	struct {
-		char *p;
-		size_t len;
-	} file;
-	file.p = readfile(argv[1], &file.len);
+	size_t srclen;
+	char *src = readfile(argv[1], &srclen);
 
-	struct lexemes_soa toks = lexstring(file.p, file.len);
-
-	for (size_t i = 0; i < toks.len; i++) {
-		switch (toks.kinds[i]) {
-		case LEXIDENT: {
-			struct strview sv = toks.strs[i];
-			printf("Identifier: ‘%.*s’\n", (int)sv.len, sv.p);
-			break;
-		}
-		case LEXAMP:
-			puts("Ampersand");
-			break;
-		case LEXCOLON:
-			puts("Colon");
-			break;
-		case LEXEQ:
-			puts("Equals");
-			break;
-		case LEXLANGL:
-			puts("Left angle bracket");
-			break;
-		case LEXLBRACE:
-			puts("Left brace");
-			break;
-		case LEXLBRKT:
-			puts("Right bracket");
-			break;
-		case LEXLPAR:
-			puts("Left parenthesis");
-			break;
-		case LEXMINUS:
-			puts("Minus");
-			break;
-		case LEXPIPE:
-			puts("Pipe");
-			break;
-		case LEXPLUS:
-			puts("Plus");
-			break;
-		case LEXRANGL:
-			puts("Right angle bracket");
-			break;
-		case LEXRBRACE:
-			puts("Right brace");
-			break;
-		case LEXRBRKT:
-			puts("Right bracket");
-			break;
-		case LEXRPAR:
-			puts("Right parenthesis");
-			break;
-		case LEXSEMI:
-			puts("Semicolon");
-			break;
-		case LEXSLASH:
-			puts("Slash");
-			break;
-		case LEXSTAR:
-			puts("Asterisk");
-			break;
-		case LEXTILDE:
-			puts("Tilde");
-			break;
-		}
-	}
+	struct lexemes_soa toks = lexstring(src, srclen);
+	struct ast_soa ast = parsetoks(toks);
 
 #if DEBUG
-	free(file.p);
+	free(src);
 	lexemes_free(toks);
+	ast_free(ast);
 #endif
 	return EXIT_SUCCESS;
 }
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..a235746
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,112 @@
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdalign.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "errors.h"
+#include "parser.h"
+
+/* #define AST_DFLT_CAP (2048) */
+#define AST_DFLT_CAP (8)
+#define SIZE_WDTH    (sizeof(size_t) * CHAR_BIT)
+
+static struct ast_soa mk_ast_soa(void);
+static void ast_soa_resz(struct ast_soa *);
+
+struct ast_soa
+parsetoks(struct lexemes_soa toks)
+{
+	struct ast_soa ast = mk_ast_soa();
+
+	return ast;
+}
+
+struct ast_soa
+mk_ast_soa(void)
+{
+	struct ast_soa soa;
+
+	static_assert(offsetof(struct ast_soa, kinds)
+	                  < offsetof(struct ast_soa, lexemes),
+	              "KINDS is not the first field before LEXEMES");
+	static_assert(offsetof(struct ast_soa, kinds)
+	                  < offsetof(struct ast_soa, kids),
+	              "KINDS is not the first field before KIDS");
+	static_assert(AST_DFLT_CAP * sizeof(*soa.kinds) % alignof(*soa.lexemes)
+	                  == 0,
+	              "Additional padding is required to properly align LEXEMES");
+	static_assert(AST_DFLT_CAP * (sizeof(*soa.kinds) + sizeof(*soa.lexemes))
+	                      % alignof(*soa.kids)
+	                  == 0,
+	              "Additional padding is required to properly align KIDS");
+
+	soa.len = 0;
+	soa.cap = AST_DFLT_CAP;
+
+	if ((soa.kinds = malloc(soa.cap * AST_SOA_BLKSZ)) == NULL)
+		err("malloc:");
+	soa.lexemes = (void *)((char *)soa.kinds + soa.cap * sizeof(*soa.kinds));
+	soa.kids = (void *)((char *)soa.lexemes + soa.cap * sizeof(*soa.lexemes));
+
+	return soa;
+}
+
+void
+ast_soa_resz(struct ast_soa *soa)
+{
+	static_assert(offsetof(struct ast_soa, kinds)
+	                  < offsetof(struct ast_soa, lexemes),
+	              "KINDS is not the first field before LEXEMES");
+	static_assert(offsetof(struct ast_soa, kinds)
+	                  < offsetof(struct ast_soa, kids),
+	              "KINDS is not the first field before KIDS");
+	static_assert(offsetof(struct ast_soa, lexemes)
+	                  < offsetof(struct ast_soa, kids),
+	              "LEXEMES is not the second field before KIDS");
+
+	size_t ncap, pad1, pad2, newsz;
+	ptrdiff_t lexemes_off, kids_off;
+
+	lexemes_off = (char *)soa->lexemes - (char *)soa->kinds;
+	kids_off = (char *)soa->kids - (char *)soa->kinds;
+
+	/* The capacity is always going to be a power of 2, so checking for overflow
+	   becomes pretty trivial */
+	if ((soa->cap >> (SIZE_WDTH - 1)) != 0) {
+		errno = EOVERFLOW;
+		err("ast_soa_resz:");
+	}
+	ncap = soa->cap << 1;
+
+	/* Ensure that soa->lexemes is properly aligned */
+	pad1 = alignof(*soa->lexemes)
+	     - ncap * sizeof(*soa->kinds) % alignof(*soa->lexemes);
+	if (pad1 == alignof(*soa->lexemes))
+		pad1 = 0;
+
+	pad2 = alignof(*soa->kids)
+	     - (ncap * (sizeof(*soa->kinds) + sizeof(*soa->lexemes)) + pad1)
+	           % alignof(*soa->kids);
+	if (pad2 != alignof(*soa->kids))
+		pad2 = 0;
+
+	newsz = ncap * AST_SOA_BLKSZ + pad1 + pad2;
+
+	if ((soa->kinds = realloc(soa->kinds, newsz)) == NULL)
+		err("realloc:");
+
+	soa->lexemes = (void *)((char *)soa->kinds + ncap * sizeof(*soa->kinds)
+	                        + pad1);
+	soa->kids = (void *)((char *)soa->lexemes + ncap * sizeof(*soa->lexemes)
+	                     + pad2);
+
+	memmove(soa->kids, (char *)soa->kinds + kids_off,
+	        soa->len * sizeof(*soa->kids));
+	memmove(soa->lexemes, (char *)soa->kinds + lexemes_off,
+	        soa->len * sizeof(*soa->lexemes));
+
+	soa->cap = ncap;
+}
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 0000000..8da762d
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,32 @@
+#ifndef ORYX_PARSER_H
+#define ORYX_PARSER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lexer.h"
+
+enum {
+	PRSBINADD = '+',
+	PRSBINSUB = '-',
+};
+
+typedef uint8_t ast_kind;
+
+#define AST_SOA_BLKSZ (sizeof(ast_kind) + sizeof(size_t) * 3)
+
+struct ast_soa {
+	ast_kind *kinds;
+	size_t *lexemes;
+	struct {
+		size_t lhs, rhs;
+	} *kids;
+	size_t len, cap;
+};
+
+#define ast_free(x) free((x).kinds)
+
+/* Parse the tokens in TOKS into an abstract syntax tree */
+struct ast_soa parsetoks(struct lexemes_soa toks);
+
+#endif /* !ORYX_PARSER_H */
-- 
cgit v1.2.3