From 5a78c6d8f0e30b7815729d9589e2252e439553e0 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 11 Jun 2024 02:00:25 +0200 Subject: Parse very basic declarations --- src/parser.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++------- src/parser.h | 30 +++++++++-- 2 files changed, 174 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/parser.c b/src/parser.c index a235746..4a902b9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -11,30 +11,114 @@ /* #define AST_DFLT_CAP (2048) */ #define AST_DFLT_CAP (8) +#define AST_EMPTY ((size_t)-1) +#define AUX_DFLT_CAP (128) #define SIZE_WDTH (sizeof(size_t) * CHAR_BIT) +static size_t parsedecl(struct ast_soa *, struct lexemes_soa); +static size_t parseexpr(struct ast_soa *, struct lexemes_soa); +static size_t parsetype(struct ast_soa *, struct lexemes_soa); + static struct ast_soa mk_ast_soa(void); static void ast_soa_resz(struct ast_soa *); +static size_t aux_push(struct auxilliary *, union extra); +static size_t ast_soa_push(struct ast_soa *, ast_kind, size_t, size_t, size_t, + size_t); + +static size_t toksidx; + struct ast_soa parsetoks(struct lexemes_soa toks) { struct ast_soa ast = mk_ast_soa(); + while (toksidx < toks.len) + parsedecl(&ast, toks); + return ast; } +size_t +parsedecl(struct ast_soa *ast, struct lexemes_soa toks) +{ + bool constdecl; + size_t lexeme, lhs, rhs; + + lexeme = toksidx; + + if (toks.kinds[toksidx++] != LEXIDENT) + err("Expected identifier"); + if (toks.kinds[toksidx++] != LEXCOLON) + err("Expected colon"); + + switch (toks.kinds[toksidx]) { + case LEXCOLON: + case LEXEQ: + constdecl = toks.kinds[toksidx++] == LEXCOLON; + lhs = AST_EMPTY; + rhs = parseexpr(ast, toks); + break; + default: + lhs = parsetype(ast, toks); + if (toks.kinds[toksidx] == LEXCOLON || toks.kinds[toksidx] == LEXEQ) { + constdecl = toks.kinds[toksidx++] == LEXEQ; + rhs = parseexpr(ast, toks); + } else { + constdecl = false; + rhs = AST_EMPTY; + } + break; + } + + if (toks.kinds[toksidx++] != LEXSEMI) + err("Expected semicolon"); + + size_t extra = aux_push(&ast->aux, (union extra){.constdecl = constdecl}); + return ast_soa_push(ast, PRSDECL, lexeme, lhs, rhs, extra); +} + +size_t +parseexpr(struct ast_soa *ast, struct lexemes_soa toks) +{ + ast_kind kind; + size_t lexeme, lhs, rhs, extra; + + lexeme = lhs = rhs = extra = AST_EMPTY; + + switch (toks.kinds[toksidx]) { + case LEXNUM: + kind = PRSNUMERIC; + lexeme = toksidx++; + break; + default: + err("Expected expression"); + } + + return ast_soa_push(ast, kind, lexeme, lhs, rhs, extra); +} + +size_t +parsetype(struct ast_soa *ast, struct lexemes_soa toks) +{ + size_t lexeme; + + switch (toks.kinds[toksidx]) { + case LEXIDENT: + lexeme = toksidx++; + break; + default: + err("Expected type"); + } + + return ast_soa_push(ast, PRSTYPE, lexeme, AST_EMPTY, AST_EMPTY, AST_EMPTY); +} + struct ast_soa mk_ast_soa(void) { struct ast_soa soa; - static_assert(offsetof(struct ast_soa, kinds) - < offsetof(struct ast_soa, lexemes), - "KINDS is not the first field before LEXEMES"); - static_assert(offsetof(struct ast_soa, kinds) - < offsetof(struct ast_soa, kids), - "KINDS is not the first field before KIDS"); static_assert(AST_DFLT_CAP * sizeof(*soa.kinds) % alignof(*soa.lexemes) == 0, "Additional padding is required to properly align LEXEMES"); @@ -42,14 +126,26 @@ mk_ast_soa(void) % alignof(*soa.kids) == 0, "Additional padding is required to properly align KIDS"); + static_assert(AST_DFLT_CAP + * (sizeof(*soa.kinds) + sizeof(*soa.lexemes) + + sizeof(*soa.kids)) + % alignof(*soa.extra) + == 0, + "Additional padding is required to properly align EXTRA"); soa.len = 0; soa.cap = AST_DFLT_CAP; + soa.aux.len = 0; + soa.aux.cap = AUX_DFLT_CAP; if ((soa.kinds = malloc(soa.cap * AST_SOA_BLKSZ)) == NULL) err("malloc:"); soa.lexemes = (void *)((char *)soa.kinds + soa.cap * sizeof(*soa.kinds)); soa.kids = (void *)((char *)soa.lexemes + soa.cap * sizeof(*soa.lexemes)); + soa.extra = (void *)((char *)soa.kids + soa.cap * sizeof(*soa.kids)); + + if ((soa.aux.buf = malloc(soa.aux.cap * sizeof(*soa.aux.buf))) == NULL) + err("malloc:"); return soa; } @@ -57,21 +153,12 @@ mk_ast_soa(void) void ast_soa_resz(struct ast_soa *soa) { - static_assert(offsetof(struct ast_soa, kinds) - < offsetof(struct ast_soa, lexemes), - "KINDS is not the first field before LEXEMES"); - static_assert(offsetof(struct ast_soa, kinds) - < offsetof(struct ast_soa, kids), - "KINDS is not the first field before KIDS"); - static_assert(offsetof(struct ast_soa, lexemes) - < offsetof(struct ast_soa, kids), - "LEXEMES is not the second field before KIDS"); - - size_t ncap, pad1, pad2, newsz; - ptrdiff_t lexemes_off, kids_off; + size_t ncap, pad1, pad2, pad3, newsz; + ptrdiff_t lexemes_off, kids_off, extra_off; lexemes_off = (char *)soa->lexemes - (char *)soa->kinds; kids_off = (char *)soa->kids - (char *)soa->kinds; + extra_off = (char *)soa->extra - (char *)soa->kinds; /* The capacity is always going to be a power of 2, so checking for overflow becomes pretty trivial */ @@ -87,14 +174,24 @@ ast_soa_resz(struct ast_soa *soa) if (pad1 == alignof(*soa->lexemes)) pad1 = 0; + /* Ensure that soa->kids is properly aligned */ pad2 = alignof(*soa->kids) - (ncap * (sizeof(*soa->kinds) + sizeof(*soa->lexemes)) + pad1) % alignof(*soa->kids); if (pad2 != alignof(*soa->kids)) pad2 = 0; - newsz = ncap * AST_SOA_BLKSZ + pad1 + pad2; - + /* Ensure that soa->extra is properly aligned */ + pad3 = alignof(*soa->extra) + - (ncap + * (sizeof(*soa->kinds) + sizeof(*soa->lexemes) + + sizeof(*soa->kids)) + + pad1 + pad2) + % alignof(*soa->extra); + if (pad3 != alignof(*soa->extra)) + pad3 = 0; + + newsz = ncap * AST_SOA_BLKSZ + pad1 + pad2 + pad3; if ((soa->kinds = realloc(soa->kinds, newsz)) == NULL) err("realloc:"); @@ -102,7 +199,10 @@ ast_soa_resz(struct ast_soa *soa) + pad1); soa->kids = (void *)((char *)soa->lexemes + ncap * sizeof(*soa->lexemes) + pad2); + soa->extra = (void *)((char *)soa->kids + ncap * sizeof(*soa->kids) + pad3); + memmove(soa->extra, (char *)soa->kinds + extra_off, + soa->len * sizeof(*soa->extra)); memmove(soa->kids, (char *)soa->kinds + kids_off, soa->len * sizeof(*soa->kids)); memmove(soa->lexemes, (char *)soa->kinds + lexemes_off, @@ -110,3 +210,31 @@ ast_soa_resz(struct ast_soa *soa) soa->cap = ncap; } + +size_t +aux_push(struct auxilliary *aux, union extra e) +{ + if (aux->len == aux->cap) { + size_t ncap = aux->cap * 2; + if ((aux->buf = realloc(aux->buf, ncap)) == NULL) + err("realloc:"); + aux->cap = ncap; + } + aux->buf[aux->len] = e; + return aux->len++; +} + +size_t +ast_soa_push(struct ast_soa *soa, ast_kind kind, size_t lexeme, size_t lhs, + size_t rhs, size_t extra) +{ + if (soa->len == soa->cap) + ast_soa_resz(soa); + + soa->kinds[soa->len] = kind; + soa->lexemes[soa->len] = lexeme; + soa->kids[soa->len].lhs = lhs; + soa->kids[soa->len].rhs = rhs; + soa->extra[soa->len] = extra; + return soa->len++; +} diff --git a/src/parser.h b/src/parser.h index 8da762d..ba5aa58 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,19 +1,38 @@ #ifndef ORYX_PARSER_H #define ORYX_PARSER_H +#include #include #include #include "lexer.h" enum { - PRSBINADD = '+', - PRSBINSUB = '-', + PRSDECL, /* Declaration */ + PRSNUMERIC, /* Numeric constant */ + PRSTYPE, /* Type */ + + PRSBINADD = '+', /* Addition */ }; typedef uint8_t ast_kind; -#define AST_SOA_BLKSZ (sizeof(ast_kind) + sizeof(size_t) * 3) +#define AST_SOA_BLKSZ (sizeof(ast_kind) + sizeof(size_t) * 4) + +/* + * AST Notes: + * + * Declarations have .lhs set to the type of the symbol being declared and .rhs + * set to the value of the declaration if it is an assignment declaration. They + * also have aux.constdecl set to true if it’s a declaration of a constant. + */ + +struct auxilliary { + union extra { + bool constdecl; + } *buf; + size_t len, cap; +}; struct ast_soa { ast_kind *kinds; @@ -21,10 +40,13 @@ struct ast_soa { struct { size_t lhs, rhs; } *kids; + size_t *extra; size_t len, cap; + + struct auxilliary aux; }; -#define ast_free(x) free((x).kinds) +#define ast_free(x) do { free((x).kinds); free((x).aux.buf); } while (0) /* Parse the tokens in TOKS into an abstract syntax tree */ struct ast_soa parsetoks(struct lexemes_soa toks); -- cgit v1.2.3