aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-11 00:18:57 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-11 00:19:05 +0200
commitdf36746da04ad7e1c3dc4db8233b1910a37f2f49 (patch)
tree53c742ff09f3c3b3ff22bcfe6c81833f7dc6c2e7 /src
parent39c78e864f6600af0403fc2c0470ef97e3befce5 (diff)
Begin work on a basic parser
Diffstat (limited to 'src')
-rw-r--r--src/main.c78
-rw-r--r--src/parser.c112
-rw-r--r--src/parser.h32
3 files changed, 151 insertions, 71 deletions
diff --git a/src/main.c b/src/main.c
index bef6b0d..b156466 100644
--- a/src/main.c
+++ b/src/main.c
@@ -7,6 +7,7 @@
#include "errors.h"
#include "lexer.h"
+#include "parser.h"
static char *readfile(const char *, size_t *);
@@ -18,81 +19,16 @@ main(int argc, char **argv)
exit(EXIT_FAILURE);
}
- struct {
- char *p;
- size_t len;
- } file;
- file.p = readfile(argv[1], &file.len);
+ size_t srclen;
+ char *src = readfile(argv[1], &srclen);
- struct lexemes_soa toks = lexstring(file.p, file.len);
-
- for (size_t i = 0; i < toks.len; i++) {
- switch (toks.kinds[i]) {
- case LEXIDENT: {
- struct strview sv = toks.strs[i];
- printf("Identifier: ā€˜%.*sā€™\n", (int)sv.len, sv.p);
- break;
- }
- case LEXAMP:
- puts("Ampersand");
- break;
- case LEXCOLON:
- puts("Colon");
- break;
- case LEXEQ:
- puts("Equals");
- break;
- case LEXLANGL:
- puts("Left angle bracket");
- break;
- case LEXLBRACE:
- puts("Left brace");
- break;
- case LEXLBRKT:
- puts("Right bracket");
- break;
- case LEXLPAR:
- puts("Left parenthesis");
- break;
- case LEXMINUS:
- puts("Minus");
- break;
- case LEXPIPE:
- puts("Pipe");
- break;
- case LEXPLUS:
- puts("Plus");
- break;
- case LEXRANGL:
- puts("Right angle bracket");
- break;
- case LEXRBRACE:
- puts("Right brace");
- break;
- case LEXRBRKT:
- puts("Right bracket");
- break;
- case LEXRPAR:
- puts("Right parenthesis");
- break;
- case LEXSEMI:
- puts("Semicolon");
- break;
- case LEXSLASH:
- puts("Slash");
- break;
- case LEXSTAR:
- puts("Asterisk");
- break;
- case LEXTILDE:
- puts("Tilde");
- break;
- }
- }
+ struct lexemes_soa toks = lexstring(src, srclen);
+ struct ast_soa ast = parsetoks(toks);
#if DEBUG
- free(file.p);
+ free(src);
lexemes_free(toks);
+ ast_free(ast);
#endif
return EXIT_SUCCESS;
}
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..a235746
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,112 @@
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdalign.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "errors.h"
+#include "parser.h"
+
+/* #define AST_DFLT_CAP (2048) */
+#define AST_DFLT_CAP (8)
+#define SIZE_WDTH (sizeof(size_t) * CHAR_BIT)
+
+static struct ast_soa mk_ast_soa(void);
+static void ast_soa_resz(struct ast_soa *);
+
+struct ast_soa
+parsetoks(struct lexemes_soa toks)
+{
+ struct ast_soa ast = mk_ast_soa();
+
+ return ast;
+}
+
+struct ast_soa
+mk_ast_soa(void)
+{
+ struct ast_soa soa;
+
+ static_assert(offsetof(struct ast_soa, kinds)
+ < offsetof(struct ast_soa, lexemes),
+ "KINDS is not the first field before LEXEMES");
+ static_assert(offsetof(struct ast_soa, kinds)
+ < offsetof(struct ast_soa, kids),
+ "KINDS is not the first field before KIDS");
+ static_assert(AST_DFLT_CAP * sizeof(*soa.kinds) % alignof(*soa.lexemes)
+ == 0,
+ "Additional padding is required to properly align LEXEMES");
+ static_assert(AST_DFLT_CAP * (sizeof(*soa.kinds) + sizeof(*soa.lexemes))
+ % alignof(*soa.kids)
+ == 0,
+ "Additional padding is required to properly align KIDS");
+
+ soa.len = 0;
+ soa.cap = AST_DFLT_CAP;
+
+ if ((soa.kinds = malloc(soa.cap * AST_SOA_BLKSZ)) == NULL)
+ err("malloc:");
+ soa.lexemes = (void *)((char *)soa.kinds + soa.cap * sizeof(*soa.kinds));
+ soa.kids = (void *)((char *)soa.lexemes + soa.cap * sizeof(*soa.lexemes));
+
+ return soa;
+}
+
+void
+ast_soa_resz(struct ast_soa *soa)
+{
+ static_assert(offsetof(struct ast_soa, kinds)
+ < offsetof(struct ast_soa, lexemes),
+ "KINDS is not the first field before LEXEMES");
+ static_assert(offsetof(struct ast_soa, kinds)
+ < offsetof(struct ast_soa, kids),
+ "KINDS is not the first field before KIDS");
+ static_assert(offsetof(struct ast_soa, lexemes)
+ < offsetof(struct ast_soa, kids),
+ "LEXEMES is not the second field before KIDS");
+
+ size_t ncap, pad1, pad2, newsz;
+ ptrdiff_t lexemes_off, kids_off;
+
+ lexemes_off = (char *)soa->lexemes - (char *)soa->kinds;
+ kids_off = (char *)soa->kids - (char *)soa->kinds;
+
+ /* The capacity is always going to be a power of 2, so checking for overflow
+ becomes pretty trivial */
+ if ((soa->cap >> (SIZE_WDTH - 1)) != 0) {
+ errno = EOVERFLOW;
+ err("ast_soa_resz:");
+ }
+ ncap = soa->cap << 1;
+
+ /* Ensure that soa->lexemes is properly aligned */
+ pad1 = alignof(*soa->lexemes)
+ - ncap * sizeof(*soa->kinds) % alignof(*soa->lexemes);
+ if (pad1 == alignof(*soa->lexemes))
+ pad1 = 0;
+
+ pad2 = alignof(*soa->kids)
+ - (ncap * (sizeof(*soa->kinds) + sizeof(*soa->lexemes)) + pad1)
+ % alignof(*soa->kids);
+ if (pad2 != alignof(*soa->kids))
+ pad2 = 0;
+
+ newsz = ncap * AST_SOA_BLKSZ + pad1 + pad2;
+
+ if ((soa->kinds = realloc(soa->kinds, newsz)) == NULL)
+ err("realloc:");
+
+ soa->lexemes = (void *)((char *)soa->kinds + ncap * sizeof(*soa->kinds)
+ + pad1);
+ soa->kids = (void *)((char *)soa->lexemes + ncap * sizeof(*soa->lexemes)
+ + pad2);
+
+ memmove(soa->kids, (char *)soa->kinds + kids_off,
+ soa->len * sizeof(*soa->kids));
+ memmove(soa->lexemes, (char *)soa->kinds + lexemes_off,
+ soa->len * sizeof(*soa->lexemes));
+
+ soa->cap = ncap;
+}
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 0000000..8da762d
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,32 @@
+#ifndef ORYX_PARSER_H
+#define ORYX_PARSER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lexer.h"
+
+enum {
+ PRSBINADD = '+',
+ PRSBINSUB = '-',
+};
+
+typedef uint8_t ast_kind;
+
+#define AST_SOA_BLKSZ (sizeof(ast_kind) + sizeof(size_t) * 3)
+
+struct ast_soa {
+ ast_kind *kinds;
+ size_t *lexemes;
+ struct {
+ size_t lhs, rhs;
+ } *kids;
+ size_t len, cap;
+};
+
+#define ast_free(x) free((x).kinds)
+
+/* Parse the tokens in TOKS into an abstract syntax tree */
+struct ast_soa parsetoks(struct lexemes_soa toks);
+
+#endif /* !ORYX_PARSER_H */