From df36746da04ad7e1c3dc4db8233b1910a37f2f49 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 11 Jun 2024 00:18:57 +0200 Subject: Begin work on a basic parser --- src/main.c | 78 ++++------------------------------------- src/parser.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/parser.h | 32 +++++++++++++++++ 3 files changed, 151 insertions(+), 71 deletions(-) create mode 100644 src/parser.c create mode 100644 src/parser.h diff --git a/src/main.c b/src/main.c index bef6b0d..b156466 100644 --- a/src/main.c +++ b/src/main.c @@ -7,6 +7,7 @@ #include "errors.h" #include "lexer.h" +#include "parser.h" static char *readfile(const char *, size_t *); @@ -18,81 +19,16 @@ main(int argc, char **argv) exit(EXIT_FAILURE); } - struct { - char *p; - size_t len; - } file; - file.p = readfile(argv[1], &file.len); + size_t srclen; + char *src = readfile(argv[1], &srclen); - struct lexemes_soa toks = lexstring(file.p, file.len); - - for (size_t i = 0; i < toks.len; i++) { - switch (toks.kinds[i]) { - case LEXIDENT: { - struct strview sv = toks.strs[i]; - printf("Identifier: ā€˜%.*sā€™\n", (int)sv.len, sv.p); - break; - } - case LEXAMP: - puts("Ampersand"); - break; - case LEXCOLON: - puts("Colon"); - break; - case LEXEQ: - puts("Equals"); - break; - case LEXLANGL: - puts("Left angle bracket"); - break; - case LEXLBRACE: - puts("Left brace"); - break; - case LEXLBRKT: - puts("Right bracket"); - break; - case LEXLPAR: - puts("Left parenthesis"); - break; - case LEXMINUS: - puts("Minus"); - break; - case LEXPIPE: - puts("Pipe"); - break; - case LEXPLUS: - puts("Plus"); - break; - case LEXRANGL: - puts("Right angle bracket"); - break; - case LEXRBRACE: - puts("Right brace"); - break; - case LEXRBRKT: - puts("Right bracket"); - break; - case LEXRPAR: - puts("Right parenthesis"); - break; - case LEXSEMI: - puts("Semicolon"); - break; - case LEXSLASH: - puts("Slash"); - break; - case LEXSTAR: - puts("Asterisk"); - break; - case LEXTILDE: - puts("Tilde"); - break; - } - } + struct lexemes_soa toks = lexstring(src, srclen); + struct ast_soa ast = parsetoks(toks); #if DEBUG - free(file.p); + free(src); lexemes_free(toks); + ast_free(ast); #endif return EXIT_SUCCESS; } diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..a235746 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "errors.h" +#include "parser.h" + +/* #define AST_DFLT_CAP (2048) */ +#define AST_DFLT_CAP (8) +#define SIZE_WDTH (sizeof(size_t) * CHAR_BIT) + +static struct ast_soa mk_ast_soa(void); +static void ast_soa_resz(struct ast_soa *); + +struct ast_soa +parsetoks(struct lexemes_soa toks) +{ + struct ast_soa ast = mk_ast_soa(); + + return ast; +} + +struct ast_soa +mk_ast_soa(void) +{ + struct ast_soa soa; + + static_assert(offsetof(struct ast_soa, kinds) + < offsetof(struct ast_soa, lexemes), + "KINDS is not the first field before LEXEMES"); + static_assert(offsetof(struct ast_soa, kinds) + < offsetof(struct ast_soa, kids), + "KINDS is not the first field before KIDS"); + static_assert(AST_DFLT_CAP * sizeof(*soa.kinds) % alignof(*soa.lexemes) + == 0, + "Additional padding is required to properly align LEXEMES"); + static_assert(AST_DFLT_CAP * (sizeof(*soa.kinds) + sizeof(*soa.lexemes)) + % alignof(*soa.kids) + == 0, + "Additional padding is required to properly align KIDS"); + + soa.len = 0; + soa.cap = AST_DFLT_CAP; + + if ((soa.kinds = malloc(soa.cap * AST_SOA_BLKSZ)) == NULL) + err("malloc:"); + soa.lexemes = (void *)((char *)soa.kinds + soa.cap * sizeof(*soa.kinds)); + soa.kids = (void *)((char *)soa.lexemes + soa.cap * sizeof(*soa.lexemes)); + + return soa; +} + +void +ast_soa_resz(struct ast_soa *soa) +{ + static_assert(offsetof(struct ast_soa, kinds) + < offsetof(struct ast_soa, lexemes), + "KINDS is not the first field before LEXEMES"); + static_assert(offsetof(struct ast_soa, kinds) + < offsetof(struct ast_soa, kids), + "KINDS is not the first field before KIDS"); + static_assert(offsetof(struct ast_soa, lexemes) + < offsetof(struct ast_soa, kids), + "LEXEMES is not the second field before KIDS"); + + size_t ncap, pad1, pad2, newsz; + ptrdiff_t lexemes_off, kids_off; + + lexemes_off = (char *)soa->lexemes - (char *)soa->kinds; + kids_off = (char *)soa->kids - (char *)soa->kinds; + + /* The capacity is always going to be a power of 2, so checking for overflow + becomes pretty trivial */ + if ((soa->cap >> (SIZE_WDTH - 1)) != 0) { + errno = EOVERFLOW; + err("ast_soa_resz:"); + } + ncap = soa->cap << 1; + + /* Ensure that soa->lexemes is properly aligned */ + pad1 = alignof(*soa->lexemes) + - ncap * sizeof(*soa->kinds) % alignof(*soa->lexemes); + if (pad1 == alignof(*soa->lexemes)) + pad1 = 0; + + pad2 = alignof(*soa->kids) + - (ncap * (sizeof(*soa->kinds) + sizeof(*soa->lexemes)) + pad1) + % alignof(*soa->kids); + if (pad2 != alignof(*soa->kids)) + pad2 = 0; + + newsz = ncap * AST_SOA_BLKSZ + pad1 + pad2; + + if ((soa->kinds = realloc(soa->kinds, newsz)) == NULL) + err("realloc:"); + + soa->lexemes = (void *)((char *)soa->kinds + ncap * sizeof(*soa->kinds) + + pad1); + soa->kids = (void *)((char *)soa->lexemes + ncap * sizeof(*soa->lexemes) + + pad2); + + memmove(soa->kids, (char *)soa->kinds + kids_off, + soa->len * sizeof(*soa->kids)); + memmove(soa->lexemes, (char *)soa->kinds + lexemes_off, + soa->len * sizeof(*soa->lexemes)); + + soa->cap = ncap; +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..8da762d --- /dev/null +++ b/src/parser.h @@ -0,0 +1,32 @@ +#ifndef ORYX_PARSER_H +#define ORYX_PARSER_H + +#include +#include + +#include "lexer.h" + +enum { + PRSBINADD = '+', + PRSBINSUB = '-', +}; + +typedef uint8_t ast_kind; + +#define AST_SOA_BLKSZ (sizeof(ast_kind) + sizeof(size_t) * 3) + +struct ast_soa { + ast_kind *kinds; + size_t *lexemes; + struct { + size_t lhs, rhs; + } *kids; + size_t len, cap; +}; + +#define ast_free(x) free((x).kinds) + +/* Parse the tokens in TOKS into an abstract syntax tree */ +struct ast_soa parsetoks(struct lexemes_soa toks); + +#endif /* !ORYX_PARSER_H */ -- cgit v1.2.3