diff options
-rw-r--r-- | src/alloc.c | 7 | ||||
-rw-r--r-- | src/alloc.h | 8 | ||||
-rw-r--r-- | src/analyzer.c | 227 | ||||
-rw-r--r-- | src/analyzer.h | 23 | ||||
-rw-r--r-- | src/arena.c | 30 | ||||
-rw-r--r-- | src/codegen.c | 50 | ||||
-rw-r--r-- | src/codegen.h | 3 | ||||
-rw-r--r-- | src/lexer.c | 40 | ||||
-rw-r--r-- | src/lexer.h | 18 | ||||
-rw-r--r-- | src/main.c | 23 | ||||
-rw-r--r-- | src/parser.c | 120 | ||||
-rw-r--r-- | src/parser.h | 43 | ||||
-rw-r--r-- | src/strview.c | 10 | ||||
-rw-r--r-- | src/strview.h | 12 | ||||
-rw-r--r-- | src/types.h | 2 | ||||
-rw-r--r-- | src/unicode-avx2.c | 3 | ||||
-rw-r--r-- | src/unicode-neon.c | 3 | ||||
-rw-r--r-- | src/unicode-sse4_1.c | 3 | ||||
-rw-r--r-- | src/unicode.c | 18 |
19 files changed, 332 insertions, 311 deletions
diff --git a/src/alloc.c b/src/alloc.c index 8682656..63095fb 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -4,17 +4,18 @@ #include <stdlib.h> #include "alloc.h" +#include "common.h" #include "errors.h" void * bufalloc(void *ptr, size_t nmemb, size_t size) { assert(nmemb * size != 0); - if (size > SIZE_MAX / nmemb) { - errno = EOVERFLOW; + if (unlikely(size > SIZE_MAX / nmemb)) { + errno = ENOMEM; err("%s:", __func__); } - if ((ptr = realloc(ptr, nmemb * size)) == NULL) + if (unlikely((ptr = realloc(ptr, nmemb * size)) == NULL)) err("%s:", __func__); return ptr; } diff --git a/src/alloc.h b/src/alloc.h index 770adba..2c67fb7 100644 --- a/src/alloc.h +++ b/src/alloc.h @@ -6,7 +6,7 @@ #include "common.h" -typedef struct _arena *arena; +typedef struct _arena *arena_t; /* Allocate a buffer of NMEMB elements of size SIZE. If PTR is non-null then reallocate the buffer it points to. Aborts on out-of-memory or overflow. */ @@ -15,16 +15,16 @@ void *bufalloc(void *ptr, size_t nmemb, size_t size) /* Allocate a buffer of NMEMB elements of size SIZE with alignment ALIGN using the arena-allocator A. */ -void *arena_alloc(arena *a, size_t nmemb, size_t size, size_t align) +void *arena_alloc(arena_t *a, size_t nmemb, size_t size, size_t align) __attribute__((returns_nonnull, nonnull, warn_unused_result, malloc, alloc_size(2, 3), alloc_align(4))); -void *_arena_grow(arena *a, void *ptr, size_t old_nmemb, size_t new_nmemb, +void *_arena_grow(arena_t *a, void *ptr, size_t old_nmemb, size_t new_nmemb, size_t size, size_t align) __attribute__((returns_nonnull, nonnull, warn_unused_result)); /* Deallocate all memory associated with the arena A. */ -void arena_free(arena *a) +void arena_free(arena_t *a) __attribute__((nonnull)); /* Allocate a buffer of N elements of type T using the arena-allocator A. */ diff --git a/src/analyzer.c b/src/analyzer.c index e8d4c51..32cdc15 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -16,34 +16,31 @@ #include "strview.h" #include "types.h" -/* A hashmap mapping symbol names to their indicies in the AST */ +/* Mapping of symbol names to their indicies in the AST */ typedef struct symtab { struct symtab *child[4]; - struct strview key; - idx_t_ val; -} symtab; + strview_t key; + idx_t val; +} symtab_t; -/* A dynamic array of scopes */ -struct scopes { - struct scope *buf; +typedef struct { + scope_t *buf; size_t len, cap; -}; +} scopes_t; -/* Analyzer context; keeps track of the state of static analysis */ struct azctx { - /* An arena allocator */ - arena *a; + arena_t *a; /* The return type of the function being analyzed */ - struct type fnret; + type_t fnret; /* The name of the symbol being declared. This is necessary to allow for ‘X :: X’ to be treated as shadowing and not a circular definition */ - struct strview decl; + strview_t decl; /* The index of the current scope in the scopes array */ - idx_t_ si; + idx_t si; /* If we need to check for return statements. Only true for the outer body-block of a function that returns a value. */ @@ -51,60 +48,58 @@ struct azctx { }; struct cfctx { - arena *a; - struct strview decl; - idx_t_ si; + arena_t *a; + strview_t decl; + idx_t si; }; -static void analyzeast(struct scope *, struct type *, struct ast, struct aux, - struct lexemes, arena *) +/* Perform static analysis over the AST */ +static void analyzeast(scope_t *, type_t *, ast_t, aux_t, lexemes_t, arena_t *) __attribute__((nonnull)); -static void constfold(mpq_t *, struct scope *, struct type *, struct ast, - struct lexemes, arena *) + +/* Perform constant folding over the AST */ +static void constfold(mpq_t *, scope_t *, type_t *, ast_t, lexemes_t, arena_t *) __attribute__((nonnull)); /* Perform a pass over the entire AST and return an array of symbol tables, one for each scope in the program */ -static struct scope *gensymtabs(struct ast, struct aux, struct lexemes, arena *) +static scope_t *gensymtabs(ast_t, aux_t, lexemes_t, arena_t *) __attribute__((returns_nonnull, nonnull)); /* Find all the unordered symbols in the scope delimited by the inclusive indicies BEG and END in the AST, and accumulate them into a symbol table appended to the symbol table list. UP is the index of the previous scopes symbol table in the symbol table list. */ -static void find_unordered_syms(struct scopes *, struct ast, struct aux, - struct lexemes, idx_t_ up, idx_t_ beg, - idx_t_ end, arena *) +static void find_unordered_syms(scopes_t *, ast_t, aux_t, lexemes_t, idx_t up, + idx_t beg, idx_t end, arena_t *) __attribute__((nonnull)); -typedef idx_t_ analyzer(struct azctx, struct scope *, struct type *, struct ast, - struct aux, struct lexemes, idx_t_) - __attribute__((nonnull)); -typedef idx_t_ constfolder(struct cfctx, mpq_t *, struct scope *, struct type *, - struct ast, struct lexemes, idx_t_) - __attribute__((nonnull)); +typedef idx_t analyzer(struct azctx, scope_t *, type_t *, ast_t, aux_t, + lexemes_t, idx_t) __attribute__((nonnull)); +typedef idx_t constfolder(struct cfctx, mpq_t *, scope_t *, type_t *, ast_t, + lexemes_t, idx_t) __attribute__((nonnull)); static analyzer analyzeblk, analyzedecl, analyzeexpr, analyzefn, analyzestmt; static constfolder constfoldblk, constfolddecl, constfoldexpr, constfoldstmt; -static const struct type *typegrab(struct ast, struct lexemes, idx_t_) +static const type_t *typegrab(ast_t, lexemes_t, idx_t) __attribute__((returns_nonnull)); -static bool typecompat(struct type, struct type); -static bool returns(struct ast, idx_t_); +static bool typecompat(type_t, type_t); +static bool returns(ast_t, idx_t); /* Index the symbol table M with the key SV, returning a pointer to the value. If no entry exists and A is non-null, a pointer to a newly allocated (and zeroed) value is returned, NULL otherwise. */ -static idx_t_ *symtab_insert(symtab **m, struct strview sv, arena *a) +static idx_t *symtab_insert(symtab **m, strview_t sv, arena_t *a) __attribute__((nonnull(1))); /* Defined in primitives.gperf */ -const struct type *typelookup(const uchar *, size_t) +const type_t *typelookup(const uchar *, size_t) __attribute__((nonnull)); void -analyzeprog(struct ast ast, struct aux aux, struct lexemes toks, arena *a, - struct type **types, struct scope **scps, mpq_t **folds) +analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, type_t **types, + scope_t **scps, mpq_t **folds) { *types = bufalloc(NULL, ast.len, sizeof(**types)); memset(*types, 0, ast.len * sizeof(**types)); @@ -117,100 +112,99 @@ analyzeprog(struct ast ast, struct aux aux, struct lexemes toks, arena *a, constfold(*folds, *scps, *types, ast, toks, a); } -struct scope * -gensymtabs(struct ast ast, struct aux aux, struct lexemes toks, arena *a) +scope_t * +gensymtabs(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a) { - struct scopes scps = {.cap = 32}; + scopes_t scps = {.cap = 32}; scps.buf = bufalloc(NULL, scps.cap, sizeof(*scps.buf)); find_unordered_syms(&scps, ast, aux, toks, 0, 0, ast.len - 1, a); return scps.buf; } void -find_unordered_syms(struct scopes *scps, struct ast ast, struct aux aux, - struct lexemes toks, idx_t_ up, idx_t_ beg, idx_t_ end, - arena *a) +find_unordered_syms(scopes_t *scps, ast_t ast, aux_t aux, lexemes_t toks, + idx_t up, idx_t beg, idx_t end, arena_t *a) { if (scps->len == scps->cap) { scps->cap *= 2; scps->buf = bufalloc(scps->buf, scps->cap, sizeof(*scps->buf)); } - struct scope *scp = scps->buf + scps->len++; - *scp = (struct scope){ + scope_t *scp = scps->buf + scps->len++; + *scp = (scope_t){ .i = beg, .up = up, .map = NULL, }; - for (idx_t_ i = beg; likely(i <= end); i++) { + for (idx_t i = beg; likely(i <= end); i++) { bool isstatic = ast.kinds[i] <= _AST_DECLS_END && aux.buf[ast.kids[i].lhs].decl.isstatic; bool isconst = ast.kinds[i] == ASTCDECL; if (isstatic || isconst) { - struct strview sv = toks.strs[ast.lexemes[i]]; - idx_t_ *p = symtab_insert(&scp->map, sv, a); + strview_t sv = toks.strs[ast.lexemes[i]]; + idx_t *p = symtab_insert(&scp->map, sv, a); if (*p != 0) { err("analyzer: Symbol ‘%.*s’ declared multiple times", SV_PRI_ARGS(sv)); } *p = i; } else if (ast.kinds[i] == ASTBLK) { - struct pair p = ast.kids[i]; + pair_t p = ast.kids[i]; find_unordered_syms(scps, ast, aux, toks, beg, p.lhs, p.rhs, a); i = p.rhs; } } } -const struct type * -typegrab(struct ast ast, struct lexemes toks, idx_t_ i) +const type_t * +typegrab(ast_t ast, lexemes_t toks, idx_t i) { - struct strview sv = toks.strs[ast.lexemes[i]]; - const struct type *tp = typelookup(sv.p, sv.len); + strview_t sv = toks.strs[ast.lexemes[i]]; + const type_t *tp = typelookup(sv.p, sv.len); if (tp == NULL) err("analyzer: Unknown type ‘%.*s’", (int)sv.len, sv.p); return tp; } void -analyzeast(struct scope *scps, struct type *types, struct ast ast, - struct aux aux, struct lexemes toks, arena *a) +analyzeast(scope_t *scps, type_t *types, ast_t ast, aux_t aux, lexemes_t toks, + arena_t *a) { struct azctx ctx = {.a = a}; - for (idx_t_ i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { + for (idx_t i = 0; likely(i < ast.len); i = fwdnode(ast, i)) { assert(ast.kinds[i] <= _AST_DECLS_END); analyzedecl(ctx, scps, types, ast, aux, toks, i); } } -idx_t_ -analyzedecl(struct azctx ctx, struct scope *scps, struct type *types, - struct ast ast, struct aux aux, struct lexemes toks, idx_t_ i) +idx_t +analyzedecl(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, + aux_t aux, lexemes_t toks, idx_t i) { - struct strview sv = toks.strs[ast.lexemes[i]]; + strview_t sv = toks.strs[ast.lexemes[i]]; if (ctx.si > 0 && ast.kinds[i] == ASTDECL) { - idx_t_ *ip = symtab_insert(&scps[ctx.si].map, sv, ctx.a); + idx_t *ip = symtab_insert(&scps[ctx.si].map, sv, ctx.a); if (*ip == 0) *ip = i; else { err("analyzer: Variable ‘%.*s’ declared multiple times", - SV_PRI_ARGS(sv)); + SV_PRI_ARGS(sv)); } } types[i].kind = TYPE_CHECKING; - struct pair p = ast.kids[i]; - struct type ltype, rtype; + pair_t p = ast.kids[i]; + type_t ltype, rtype; ltype.kind = TYPE_UNSET; - idx_t_ typeidx = aux.buf[p.lhs].decl.type; + idx_t typeidx = aux.buf[p.lhs].decl.type; assert(typeidx != AST_EMPTY || p.rhs != AST_EMPTY); - idx_t_ ni; + idx_t ni; if (typeidx != AST_EMPTY) ltype = *typegrab(ast, toks, typeidx); @@ -230,16 +224,16 @@ analyzedecl(struct azctx ctx, struct scope *scps, struct type *types, return ni; } -idx_t_ -analyzestmt(struct azctx ctx, struct scope *scps, struct type *types, - struct ast ast, struct aux aux, struct lexemes toks, idx_t_ i) +idx_t +analyzestmt(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, + aux_t aux, lexemes_t toks, idx_t i) { switch (ast.kinds[i]) { case ASTDECL: case ASTCDECL: return analyzedecl(ctx, scps, types, ast, aux, toks, i); case ASTRET: { - idx_t_ expr = ast.kids[i].rhs; + idx_t expr = ast.kids[i].rhs; if (expr == AST_EMPTY) { if (ctx.fnret.kind != TYPE_UNSET) err("analyzer: Missing return value"); @@ -247,7 +241,7 @@ analyzestmt(struct azctx ctx, struct scope *scps, struct type *types, } else if (ctx.fnret.kind == TYPE_UNSET) err("analyzer: Function has no return value"); - idx_t_ ni = analyzeexpr(ctx, scps, types, ast, aux, toks, + idx_t ni = analyzeexpr(ctx, scps, types, ast, aux, toks, ast.kids[i].rhs); if (!typecompat(ctx.fnret, types[ast.kids[i].rhs])) err("analyzer: Return type mismatch"); @@ -258,9 +252,9 @@ analyzestmt(struct azctx ctx, struct scope *scps, struct type *types, } } -idx_t_ -analyzeexpr(struct azctx ctx, struct scope *scps, struct type *types, - struct ast ast, struct aux aux, struct lexemes toks, idx_t_ i) +idx_t +analyzeexpr(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, + aux_t aux, lexemes_t toks, idx_t i) { switch (ast.kinds[i]) { case ASTNUMLIT: @@ -269,15 +263,15 @@ analyzeexpr(struct azctx ctx, struct scope *scps, struct type *types, types[i].issigned = true; return i + 1; case ASTIDENT: { - struct strview sv = toks.strs[ast.lexemes[i]]; + strview_t sv = toks.strs[ast.lexemes[i]]; /* Variable shadowing */ if (strview_eq(sv, ctx.decl) && ctx.si > 0) ctx.si--; - for (idx_t_ lvl = ctx.si;;) { - struct scope scp = scps[lvl]; - idx_t_ *ip = symtab_insert(&scp.map, sv, NULL); + for (idx_t lvl = ctx.si;;) { + scope_t scp = scps[lvl]; + idx_t *ip = symtab_insert(&scp.map, sv, NULL); if (ip == NULL) { if (lvl == 0) @@ -290,7 +284,8 @@ analyzeexpr(struct azctx ctx, struct scope *scps, struct type *types, analyzedecl(ctx, scps, types, ast, aux, toks, *ip); break; case TYPE_CHECKING: - err("analyzer: Circular definition of ‘%.*s’", SV_PRI_ARGS(sv)); + err("analyzer: Circular definition of ‘%.*s’", + SV_PRI_ARGS(sv)); } types[i] = types[*ip]; @@ -307,14 +302,14 @@ analyzeexpr(struct azctx ctx, struct scope *scps, struct type *types, } } -idx_t_ -analyzefn(struct azctx ctx, struct scope *scps, struct type *types, - struct ast ast, struct aux aux, struct lexemes toks, idx_t_ i) +idx_t +analyzefn(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, + lexemes_t toks, idx_t i) { - struct type t = {.kind = TYPE_FN}; - struct pair p = ast.kids[i]; + type_t t = {.kind = TYPE_FN}; + pair_t p = ast.kids[i]; - idx_t_ proto = p.lhs; + idx_t proto = p.lhs; if (ast.kids[proto].rhs != AST_EMPTY) { t.ret = typegrab(ast, toks, ast.kids[proto].rhs); ctx.fnret = *t.ret; @@ -325,11 +320,11 @@ analyzefn(struct azctx ctx, struct scope *scps, struct type *types, return analyzeblk(ctx, scps, types, ast, aux, toks, p.rhs); } -idx_t_ -analyzeblk(struct azctx ctx, struct scope *scps, struct type *types, - struct ast ast, struct aux aux, struct lexemes toks, idx_t_ i) +idx_t +analyzeblk(struct azctx ctx, scope_t *scps, type_t *types, ast_t ast, aux_t aux, + lexemes_t toks, idx_t i) { - struct pair p = ast.kids[i]; + pair_t p = ast.kids[i]; while (scps[ctx.si].i != p.lhs) ctx.si++; @@ -348,9 +343,9 @@ analyzeblk(struct azctx ctx, struct scope *scps, struct type *types, return i; } -idx_t_ -constfoldstmt(struct cfctx ctx, mpq_t *folds, struct scope *scps, - struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) +idx_t +constfoldstmt(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, + ast_t ast, lexemes_t toks, idx_t i) { switch (ast.kinds[i]) { case ASTDECL: @@ -364,11 +359,11 @@ constfoldstmt(struct cfctx ctx, mpq_t *folds, struct scope *scps, } } -idx_t_ -constfoldblk(struct cfctx ctx, mpq_t *folds, struct scope *scps, - struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) +idx_t +constfoldblk(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, + ast_t ast, lexemes_t toks, idx_t i) { - struct pair p = ast.kids[i]; + pair_t p = ast.kids[i]; while (scps[ctx.si].i != p.lhs) ctx.si++; for (i = p.lhs; i <= p.rhs; @@ -378,9 +373,9 @@ constfoldblk(struct cfctx ctx, mpq_t *folds, struct scope *scps, return i; } -idx_t_ -constfoldexpr(struct cfctx ctx, mpq_t *folds, struct scope *scps, - struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) +idx_t +constfoldexpr(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, + ast_t ast, lexemes_t toks, idx_t i) { /* Check if this expression has already been constant folded. This works because when an mpq_t is initialized via mpq_init(), it is @@ -393,7 +388,7 @@ constfoldexpr(struct cfctx ctx, mpq_t *folds, struct scope *scps, mpq_init(folds[i]); /* TODO: Temporary allocator */ - struct strview sv = toks.strs[ast.lexemes[i]]; + strview_t sv = toks.strs[ast.lexemes[i]]; char *buf = bufalloc(NULL, sv.len + 1, 1); size_t len = 0; @@ -410,15 +405,15 @@ constfoldexpr(struct cfctx ctx, mpq_t *folds, struct scope *scps, return fwdnode(ast, i); } case ASTIDENT: { - struct strview sv = toks.strs[ast.lexemes[i]]; + strview_t sv = toks.strs[ast.lexemes[i]]; /* Variable shadowing */ if (strview_eq(sv, ctx.decl) && ctx.si > 0) ctx.si--; - for (idx_t_ lvl = ctx.si;;) { - struct scope scp = scps[lvl]; - idx_t_ *ip = symtab_insert(&scp.map, sv, NULL); + for (idx_t lvl = ctx.si;;) { + scope_t scp = scps[lvl]; + idx_t *ip = symtab_insert(&scp.map, sv, NULL); if (ip == NULL) { assert(lvl != 0); @@ -428,7 +423,7 @@ constfoldexpr(struct cfctx ctx, mpq_t *folds, struct scope *scps, case ASTDECL: break; case ASTCDECL: { - idx_t_ expr = ast.kids[*ip].rhs; + idx_t expr = ast.kids[*ip].rhs; assert(expr != AST_EMPTY); #if DEBUG mpq_init(folds[i]); @@ -466,9 +461,9 @@ constfoldexpr(struct cfctx ctx, mpq_t *folds, struct scope *scps, } } -idx_t_ -constfolddecl(struct cfctx ctx, mpq_t *folds, struct scope *scps, - struct type *types, struct ast ast, struct lexemes toks, idx_t_ i) +idx_t +constfolddecl(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, + ast_t ast, lexemes_t toks, idx_t i) { if (ast.kids[i].rhs == AST_EMPTY) return fwdnode(ast, i); @@ -477,18 +472,18 @@ constfolddecl(struct cfctx ctx, mpq_t *folds, struct scope *scps, } void -constfold(mpq_t *folds, struct scope *scps, struct type *types, struct ast ast, - struct lexemes toks, arena *a) +constfold(mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, + arena_t *a) { struct cfctx ctx = {.a = a}; - for (idx_t_ i = 0; likely(i < ast.len);) { + for (idx_t i = 0; likely(i < ast.len);) { assert(ast.kinds[i] <= _AST_DECLS_END); i = constfolddecl(ctx, folds, scps, types, ast, toks, i); } } bool -returns(struct ast ast, idx_t_ i) +returns(ast_t ast, idx_t i) { switch (ast.kinds[i]) { case ASTDECL: @@ -501,7 +496,7 @@ returns(struct ast ast, idx_t_ i) } bool -typecompat(struct type lhs, struct type rhs) +typecompat(type_t lhs, type_t rhs) { /* Function types are compatible if they have the same parameter- and return types */ @@ -522,8 +517,8 @@ typecompat(struct type lhs, struct type rhs) && lhs.size == rhs.size; } -idx_t_ * -symtab_insert(symtab **m, struct strview k, arena *a) +idx_t * +symtab_insert(symtab **m, strview_t k, arena_t *a) { for (uint64_t h = strview_hash(k); *m; h <<= 2) { if (strview_eq(k, (*m)->key)) diff --git a/src/analyzer.h b/src/analyzer.h index ceb4899..ca32eea 100644 --- a/src/analyzer.h +++ b/src/analyzer.h @@ -28,20 +28,19 @@ enum { _TYPE_LAST_ENT, }; -typedef uint8_t type_kind_t_; -static_assert(_TYPE_LAST_ENT - 1 <= (type_kind_t_)-1, - "Too many AST tokens to fix in TYPE_KIND_T_"); +static_assert(_TYPE_LAST_ENT - 1 <= UINT8_MAX, + "Too many AST tokens to fix in uint8_t"); typedef struct symtab symtab; -struct scope { - idx_t_ up, i; +typedef struct { + idx_t up, i; symtab *map; -}; +} scope_t; /* A variable type */ -struct type { - type_kind_t_ kind; +typedef struct type { + uint8_t kind; union { struct { @@ -51,13 +50,13 @@ struct type { }; struct { const struct type *params, *ret; - idx_t_ paramcnt; + idx_t paramcnt; }; }; -}; +} type_t; -void analyzeprog(struct ast, struct aux, struct lexemes, arena *, - struct type **, struct scope **, mpq_t **) +void analyzeprog(ast_t, aux_t, lexemes_t, arena_t *, type_t **, scope_t **, + mpq_t **) __attribute__((nonnull)); #endif /* !ORYX_ANALYZER_H */ diff --git a/src/arena.c b/src/arena.c index 5b50404..e7091fb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -26,14 +26,20 @@ #define IS_POW_2(n) ((n) != 0 && ((n) & ((n)-1)) == 0) struct _arena { + /* DATA points to the start of the block’s memory while FREE points + to the beginning of the unused data in the block */ + void *data, *free; size_t len, cap; - void *data, *last; struct _arena *next; }; -static struct _arena *mkblk(size_t) +/* Return a new arena block of size SZ */ +static struct _arena *mkblk(size_t sz) __attribute__((returns_nonnull)); -static inline size_t pad(size_t, size_t) + +/* Return the padding required to properly align an allocation with + alignment ALIGN at offset OFF */ +static inline size_t pad(size_t off, size_t align) __attribute__((const, always_inline)); void * @@ -42,8 +48,8 @@ arena_alloc(struct _arena **a, size_t nmemb, size_t size, size_t align) assert(IS_POW_2(align)); assert(nmemb * size != 0); - if (size > SIZE_MAX / nmemb) { - errno = EOVERFLOW; + if (unlikely(size > SIZE_MAX / nmemb)) { + errno = ENOMEM; err("%s:", __func__); } @@ -57,7 +63,7 @@ arena_alloc(struct _arena **a, size_t nmemb, size_t size, size_t align) if (nlen <= p->cap) { void *ret = (char *)p->data + off; p->len = nlen; - p->last = ret; + p->free = ret; return ret; } } @@ -71,27 +77,27 @@ arena_alloc(struct _arena **a, size_t nmemb, size_t size, size_t align) } void * -_arena_grow(arena *a, void *ptr, size_t old_nmemb, size_t new_nmemb, +_arena_grow(arena_t *a, void *ptr, size_t old_nmemb, size_t new_nmemb, size_t size, size_t align) { assert(IS_POW_2(align)); assert(new_nmemb * size != 0); - if (size > SIZE_MAX / new_nmemb) { - errno = EOVERFLOW; + if (unlikely(size > SIZE_MAX / new_nmemb)) { + errno = ENOMEM; err("%s:", __func__); } size *= new_nmemb; for (struct _arena *p = *a; p != NULL; p = p->next) { - if (ptr < p->data || ptr > p->last) + if (ptr < p->data || ptr > p->free) continue; /* If we need to grow the given allocation, but it was the last allocation made in a region, then we first see if we can just eat more trailing free space in the region to avoid a memcpy(). */ - if (ptr == p->last) { + if (ptr == p->free) { size_t rest = p->cap - p->len; size_t need = (new_nmemb - old_nmemb) * size; if (need <= rest) { @@ -134,7 +140,7 @@ mkblk(size_t cap) a->data = mmap(NULL, cap, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (a->data == MAP_FAILED) err("mmap:"); - a->last = a->data; + a->free = a->data; return a; } diff --git a/src/codegen.c b/src/codegen.c index bbe7d5f..cbeeb32 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -22,25 +22,25 @@ /* A context structure we can pass to all the codegen functions just so they have easy access to everything */ struct cgctx { - arena a; + arena_t a; LLVMContextRef ctx; LLVMModuleRef mod; LLVMBuilderRef bob; - struct strview namespace; + strview_t namespace; }; -static LLVMTypeRef type2llvm(struct cgctx, struct type); -// static void str2val(mpq_t, struct strview); -// static struct val *cvmap_insert(cvmap **, struct strview, arena *) +static LLVMTypeRef type2llvm(struct cgctx, type_t); +// static void str2val(mpq_t, strview_t); +// static struct val *cvmap_insert(cvmap **, strview_t, arena_t *) // __attribute__((nonnull(1))); -static void codegenast(struct cgctx, mpq_t *, struct type *, struct ast, - struct lexemes) +static void codegenast(struct cgctx, mpq_t *, type_t *, ast_t, + lexemes_t) __attribute__((nonnull)); void -codegen(const char *file, mpq_t *folds, struct scope *scps, struct type *types, - struct ast ast, struct lexemes toks) +codegen(const char *file, mpq_t *folds, scope_t *scps, type_t *types, + ast_t ast, lexemes_t toks) { (void)scps; char *triple = LLVMGetDefaultTargetTriple(); @@ -71,33 +71,33 @@ codegen(const char *file, mpq_t *folds, struct scope *scps, struct type *types, LLVMContextDispose(ctx.ctx); } -idx_t_ -codegenfunc(struct cgctx ctx, mpq_t *folds, struct type *types, struct ast ast, - struct lexemes toks, idx_t_ i, const char *name) +idx_t +codegenfunc(struct cgctx ctx, mpq_t *folds, type_t *types, ast_t ast, + lexemes_t toks, idx_t i, const char *name) { LLVMTypeRef ret = type2llvm(ctx, types[ast.kids[i].rhs]); LLVMTypeRef ft = LLVMFunctionType(ret, NULL, 0, false); LLVMValueRef fn = LLVMAddFunction(ctx.mod, name, ft); LLVMBasicBlockRef entry = LLVMAppendBasicBlock(fn, "entry"); - struct pair p = ast.kids[i]; + pair_t p = ast.kids[i]; // for (i = p.lhs; i <= p.rhs; i = codegenstmt(ctx, folds, types, ast, toks, i)) // ; return fwdnode(ast, p.rhs); } -idx_t_ -codegendecl(struct cgctx ctx, mpq_t *folds, struct type *types, struct ast ast, - struct lexemes toks, idx_t_ i) +idx_t +codegendecl(struct cgctx ctx, mpq_t *folds, type_t *types, ast_t ast, + lexemes_t toks, idx_t i) { /* Constants are purely a compiler concept; they aren’t generated into anything */ if (ast.kinds[i] == ASTCDECL) return fwdnode(ast, i); - struct pair p = ast.kids[i]; + pair_t p = ast.kids[i]; if (ast.kinds[p.rhs] == ASTFN) { - struct strview sv = toks.strs[ast.lexemes[i]]; + strview_t sv = toks.strs[ast.lexemes[i]]; /* TODO: Namespace the name */ /* TODO: Temporary allocator */ char *name = bufalloc(NULL, sv.len + 1, 1); @@ -106,7 +106,7 @@ codegendecl(struct cgctx ctx, mpq_t *folds, struct type *types, struct ast ast, free(name); return i; } else if (!types[i].isfloat) { - struct strview sv = toks.strs[ast.lexemes[i]]; + strview_t sv = toks.strs[ast.lexemes[i]]; /* TODO: Namespace the name */ /* TODO: Temporary allocator */ char *name = bufalloc(NULL, sv.len + 1, 1); @@ -135,16 +135,16 @@ codegendecl(struct cgctx ctx, mpq_t *folds, struct type *types, struct ast ast, } void -codegenast(struct cgctx ctx, mpq_t *folds, struct type *types, struct ast ast, - struct lexemes toks) +codegenast(struct cgctx ctx, mpq_t *folds, type_t *types, ast_t ast, + lexemes_t toks) { - for (idx_t_ i = 0; i < ast.len; + for (idx_t i = 0; i < ast.len; i = codegendecl(ctx, folds, types, ast, toks, i)) ; } LLVMTypeRef -type2llvm(struct cgctx ctx, struct type t) +type2llvm(struct cgctx ctx, type_t t) { switch (t.kind) { case TYPE_FN: @@ -171,7 +171,7 @@ type2llvm(struct cgctx ctx, struct type t) } // void -// str2val(mpq_t rop, struct strview sv) +// str2val(mpq_t rop, strview_t sv) // { // mpq_init(rop); // char *clean = bufalloc(NULL, sv.len + 1, 1); @@ -189,7 +189,7 @@ type2llvm(struct cgctx ctx, struct type t) // } // // struct val * -// cvmap_insert(cvmap **m, struct strview k, arena *a) +// cvmap_insert(cvmap **m, strview_t k, arena_t *a) // { // for (uint64_t h = strview_hash(k); *m; h <<= 2) { // if (strview_eq(k, (*m)->key)) diff --git a/src/codegen.h b/src/codegen.h index 6f4af7a..ece93aa 100644 --- a/src/codegen.h +++ b/src/codegen.h @@ -7,8 +7,7 @@ #include "lexer.h" #include "parser.h" -void codegen(const char *, mpq_t *, struct scope *, struct type *, struct ast, - struct lexemes) +void codegen(const char *, mpq_t *, scope_t *, type_t *, ast_t, lexemes_t) __attribute__((nonnull)); #endif /* !ORYX_CODEGEN_H */ diff --git a/src/lexer.c b/src/lexer.c index a980812..30686d8 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -22,10 +22,17 @@ #endif #define SIZE_WDTH (sizeof(size_t) * CHAR_BIT) -static bool skip_comment(const uchar **, const uchar *); +static lexemes_t mklexemes(void); -static struct lexemes mklexemes(void); -static void lexemesresz(struct lexemes *); +/* Resize TOKS to the next power-of-2 capacity */ +static void lexemesresz(lexemes_t *toks) + __attribute__((nonnull)); + +/* Advance PTR (which points to the start of a comment) to the end of the + comment, or END. Returns true if the comment was well-formed and + false if the comment was unterminated. Handles nested comments. */ +static bool skip_comment(const uchar **ptr, const uchar *end) + __attribute__((nonnull)); static const bool is_numeric_lookup[UCHAR_MAX + 1] = { ['0'] = true, ['1'] = true, ['2'] = true, ['3'] = true, @@ -33,7 +40,7 @@ static const bool is_numeric_lookup[UCHAR_MAX + 1] = { ['8'] = true, ['9'] = true, ['\''] = true, }; -struct lexemes +lexemes_t lexstring(const uchar *code, size_t codesz) { #if ORYX_SIMD @@ -48,7 +55,7 @@ lexstring(const uchar *code, size_t codesz) } #endif - struct lexemes data = mklexemes(); + lexemes_t data = mklexemes(); const uchar *start = code, *end = start + codesz; while (likely(code < end)) { @@ -74,8 +81,7 @@ lexstring(const uchar *code, size_t codesz) data.kinds[data.len++] = ch; break; - case '<': - case '>': + case '<': case '>': data.kinds[data.len++] = ch; /* See the comment in lexer.h for where 193 comes from */ @@ -159,13 +165,12 @@ out: return true; } -struct lexemes +lexemes_t mklexemes(void) { - struct lexemes soa; + lexemes_t soa; - static_assert(offsetof(struct lexemes, kinds) - < offsetof(struct lexemes, strs), + static_assert(offsetof(lexemes_t, kinds) < offsetof(lexemes_t, strs), "KINDS is not the first field before STRS"); static_assert(LEXEMES_DFLT_CAP * sizeof(*soa.kinds) % alignof(*soa.strs) == 0, @@ -180,19 +185,18 @@ mklexemes(void) } void -lexemesresz(struct lexemes *soa) +lexemesresz(lexemes_t *soa) { - static_assert(offsetof(struct lexemes, kinds) - < offsetof(struct lexemes, strs), + static_assert(offsetof(lexemes_t, kinds) < offsetof(lexemes_t, strs), "KINDS is not the first field before STRS"); size_t ncap, pad, newsz; ptrdiff_t off = (char *)soa->strs - (char *)soa->kinds; - /* The capacity is always going to be a power of 2, so checking for overflow - becomes pretty trivial */ - if ((soa->cap >> (SIZE_WDTH - 1)) != 0) { - errno = EOVERFLOW; + /* The capacity is always going to be a power of 2, so checking for + overflow becomes pretty trivial */ + if (unlikely((soa->cap >> (SIZE_WDTH - 1)) != 0)) { + errno = ENOMEM; err("%s:", __func__); } ncap = soa->cap << 1; diff --git a/src/lexer.h b/src/lexer.h index 67adc93..5436030 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -6,6 +6,7 @@ #include <stdint.h> #include "common.h" +#include "strview.h" #include "types.h" enum { @@ -41,21 +42,20 @@ enum { _LEX_LAST_ENT, }; -typedef uint8_t lexeme_kind_t_; -static_assert(_LEX_LAST_ENT - 1 <= (lexeme_kind_t_)-1, - "Too many lexer tokens to fix in LEXEME_KIND_T_"); +static_assert(_LEX_LAST_ENT - 1 <= UINT8_MAX, + "Too many lexer tokens to fix in uint8_t"); -#define LEXEMES_BLKSZ (sizeof(lexeme_kind_t_) + sizeof(struct strview)) +#define LEXEMES_BLKSZ (1 + sizeof(strview_t)) -struct lexemes { - lexeme_kind_t_ *kinds; - struct strview *strs; +typedef struct { + uint8_t *kinds; + strview_t *strs; size_t len, cap; -}; +} lexemes_t; #define lexemes_free(x) free((x).kinds) -struct lexemes lexstring(const uchar *, size_t) +lexemes_t lexstring(const uchar *, size_t) __attribute__((nonnull)); #endif /* !ORYX_LEXER_H */ @@ -15,7 +15,9 @@ #include "lexer.h" #include "parser.h" -static char *readfile(const char *, size_t *) +/* Read the contents of FILE into a dynamically allocated buffer and + return it, storing the buffer size in BUFSZ. */ +static char *readfile(const char *file, size_t *bufsz) __attribute__((returns_nonnull, nonnull)); int @@ -29,14 +31,14 @@ main(int argc, char **argv) size_t srclen; char *src = readfile(argv[1], &srclen); - arena a = NULL; + aux_t aux; mpq_t *folds; - struct aux aux; - struct type *types; - struct scope *scps; + type_t *types; + scope_t *scps; + arena_t a = NULL; - struct lexemes toks = lexstring(src, srclen); - struct ast ast = parsetoks(toks, &aux); + lexemes_t toks = lexstring(src, srclen); + ast_t ast = parsetoks(toks, &aux); analyzeprog(ast, aux, toks, &a, &types, &scps, &folds); codegen(argv[1], folds, scps, types, ast, toks); @@ -76,8 +78,11 @@ readfile(const char *filename, size_t *n) ; if (nr == -1) err("read: %s", filename); - for (int i = 0; i < 4; i++) - p[sb.st_size + i] = 0; + + p[sb.st_size + 0] = + p[sb.st_size + 1] = + p[sb.st_size + 2] = + p[sb.st_size + 3] = 0; *n = sb.st_size; close(fd); diff --git a/src/parser.c b/src/parser.c index 3f8195c..ab0684a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -21,22 +21,28 @@ #endif #define SIZE_WDTH (sizeof(size_t) * CHAR_BIT) -typedef idx_t_ parsefn(struct ast *, struct aux *, struct lexemes) +typedef idx_t parsefn(ast_t *, aux_t *, lexemes_t) __attribute__((nonnull)); static parsefn parseblk, parseexpr, parsefunc, parseproto, parsestmt, parsetype; -static idx_t_ parsedecl(struct ast *, struct aux *, struct lexemes, bool) +static idx_t parsedecl(ast_t *, aux_t *, lexemes_t, bool) __attribute__((nonnull)); -static struct ast mkast(void); -static idx_t_ astalloc(struct ast *) +static ast_t mkast(void); + +/* Return a new index in AST where a node can be stored. This function + automatically resizes AST if it runs out of capacity. */ +static idx_t astalloc(ast_t *ast) __attribute__((nonnull)); -static void astresz(struct ast *) + +/* Resize AST to the next power-of-2 capacity */ +static void astresz(ast_t *ast) __attribute__((nonnull)); +/* TODO: Make thread-local? */ static size_t toksidx; -idx_t_ -fwdnode(struct ast ast, idx_t_ i) +idx_t +fwdnode(ast_t ast, idx_t i) { while (likely(i < ast.len)) { switch (ast.kinds[i]) { @@ -71,10 +77,10 @@ fwdnode(struct ast ast, idx_t_ i) return i; } -struct ast -parsetoks(struct lexemes toks, struct aux *aux) +ast_t +parsetoks(lexemes_t toks, aux_t *aux) { - struct ast ast = mkast(); + ast_t ast = mkast(); aux->buf = bufalloc(NULL, aux->cap = AUX_DFLT_CAP, sizeof(*aux->buf)); for (;;) { @@ -86,10 +92,10 @@ parsetoks(struct lexemes toks, struct aux *aux) return ast; } -idx_t_ -parseblk(struct ast *ast, struct aux *aux, struct lexemes toks) +idx_t +parseblk(ast_t *ast, aux_t *aux, lexemes_t toks) { - idx_t_ i = astalloc(ast); + idx_t i = astalloc(ast); ast->lexemes[i] = toksidx; ast->kinds[i] = ASTBLK; ast->kids[i].lhs = AST_EMPTY; @@ -99,12 +105,12 @@ parseblk(struct ast *ast, struct aux *aux, struct lexemes toks) err("parser: Expected left brace"); if (toks.kinds[toksidx] != LEXRBRACE) { - idx_t_ stmt = parsestmt(ast, aux, toks); + idx_t stmt = parsestmt(ast, aux, toks); ast->kids[i].lhs = ast->kids[i].rhs = stmt; } while (toks.kinds[toksidx] != LEXRBRACE) { - idx_t_ stmt = parsestmt(ast, aux, toks); + idx_t stmt = parsestmt(ast, aux, toks); ast->kids[i].rhs = stmt; } @@ -112,10 +118,10 @@ parseblk(struct ast *ast, struct aux *aux, struct lexemes toks) return i; } -idx_t_ -parsedecl(struct ast *ast, struct aux *aux, struct lexemes toks, bool toplvl) +idx_t +parsedecl(ast_t *ast, aux_t *aux, lexemes_t toks, bool toplvl) { - idx_t_ i = astalloc(ast), j = aux->len++; + idx_t i = astalloc(ast), j = aux->len++; if (aux->len > aux->cap) { aux->cap *= 2; @@ -164,7 +170,7 @@ parsedecl(struct ast *ast, struct aux *aux, struct lexemes toks, bool toplvl) if (func && ast->kinds[i] == ASTDECL) err("Cannot assign function to mutable variable"); - idx_t_ rhs = (func ? parsefunc : parseexpr)(ast, aux, toks); + idx_t rhs = (func ? parsefunc : parseexpr)(ast, aux, toks); ast->kids[i].rhs = rhs; if (!func && toks.kinds[toksidx++] != LEXSEMI) err("parser: Expected semicolon"); @@ -172,28 +178,28 @@ parsedecl(struct ast *ast, struct aux *aux, struct lexemes toks, bool toplvl) return i; } -idx_t_ -parsefunc(struct ast *ast, struct aux *aux, struct lexemes toks) +idx_t +parsefunc(ast_t *ast, aux_t *aux, lexemes_t toks) { - idx_t_ i = astalloc(ast); + idx_t i = astalloc(ast); ast->lexemes[i] = toksidx; assert(toks.kinds[toksidx] == LEXLPAR); ast->kinds[i] = ASTFN; - idx_t_ lhs = parseproto(ast, aux, toks); - idx_t_ rhs = parseblk(ast, aux, toks); + idx_t lhs = parseproto(ast, aux, toks); + idx_t rhs = parseblk(ast, aux, toks); ast->kids[i].lhs = lhs; ast->kids[i].rhs = rhs; return i; } -idx_t_ -parseexpr(struct ast *ast, struct aux *aux, struct lexemes toks) +idx_t +parseexpr(ast_t *ast, aux_t *aux, lexemes_t toks) { (void)aux; - idx_t_ i = astalloc(ast); + idx_t i = astalloc(ast); ast->lexemes[i] = toksidx; switch (toks.kinds[toksidx]) { @@ -212,10 +218,10 @@ parseexpr(struct ast *ast, struct aux *aux, struct lexemes toks) return i; } -idx_t_ -parseproto(struct ast *ast, struct aux *aux, struct lexemes toks) +idx_t +parseproto(ast_t *ast, aux_t *aux, lexemes_t toks) { - idx_t_ i = astalloc(ast); + idx_t i = astalloc(ast); ast->lexemes[i] = toksidx; ast->kinds[i] = ASTFNPROTO; ast->kids[i].lhs = AST_EMPTY; @@ -225,27 +231,27 @@ parseproto(struct ast *ast, struct aux *aux, struct lexemes toks) if (toks.kinds[toksidx++] != LEXRPAR) err("parser: Expected right parenthesis"); - idx_t_ rhs = toks.kinds[toksidx] == LEXIDENT ? parsetype(ast, aux, toks) + idx_t rhs = toks.kinds[toksidx] == LEXIDENT ? parsetype(ast, aux, toks) : AST_EMPTY; ast->kids[i].rhs = rhs; return i; } -idx_t_ -parsestmt(struct ast *ast, struct aux *aux, struct lexemes toks) +idx_t +parsestmt(ast_t *ast, aux_t *aux, lexemes_t toks) { - idx_t_ i; + idx_t i; if (toks.kinds[toksidx] != LEXIDENT) err("parser: Expected identifier"); - struct strview sv = toks.strs[toksidx]; + strview_t sv = toks.strs[toksidx]; if (strview_eq(SV("return"), sv)) { i = astalloc(ast); ast->lexemes[i] = toksidx++; ast->kinds[i] = ASTRET; - idx_t_ rhs = toks.kinds[toksidx] != LEXSEMI ? parseexpr(ast, aux, toks) + idx_t rhs = toks.kinds[toksidx] != LEXSEMI ? parseexpr(ast, aux, toks) : AST_EMPTY; ast->kids[i].rhs = rhs; if (toks.kinds[toksidx++] != LEXSEMI) @@ -259,11 +265,12 @@ parsestmt(struct ast *ast, struct aux *aux, struct lexemes toks) return i; } -idx_t_ -parsetype(struct ast *ast, struct aux *aux, struct lexemes toks) +idx_t +parsetype(ast_t *ast, aux_t *aux, lexemes_t toks) { (void)aux; - idx_t_ i = astalloc(ast); + + idx_t i = astalloc(ast); ast->kinds[i] = ASTTYPE; ast->lexemes[i] = toksidx; @@ -273,15 +280,15 @@ parsetype(struct ast *ast, struct aux *aux, struct lexemes toks) return i; } -struct ast +ast_t mkast(void) { - struct ast soa; + ast_t soa; - static_assert(AST_DFLT_CAP * sizeof(*soa.kinds) % alignof(idx_t_) == 0, + static_assert(AST_DFLT_CAP * sizeof(*soa.kinds) % alignof(idx_t) == 0, "Additional padding is required to properly align LEXEMES"); static_assert(AST_DFLT_CAP * (sizeof(*soa.kinds) + sizeof(*soa.lexemes)) - % alignof(struct pair) + % alignof(pair_t) == 0, "Additional padding is required to properly align KIDS"); @@ -296,7 +303,7 @@ mkast(void) } void -astresz(struct ast *soa) +astresz(ast_t *soa) { size_t ncap, pad1, pad2, newsz; ptrdiff_t lexemes_off, kids_off; @@ -304,24 +311,23 @@ astresz(struct ast *soa) lexemes_off = (char *)soa->lexemes - (char *)soa->kinds; kids_off = (char *)soa->kids - (char *)soa->kinds; - /* The capacity is always going to be a power of 2, so checking for overflow - becomes pretty trivial */ - if ((soa->cap >> (SIZE_WDTH - 1)) != 0) { - errno = EOVERFLOW; + /* The capacity is always going to be a power of 2, so checking for + overflow becomes pretty trivial */ + if (unlikely((soa->cap >> (SIZE_WDTH - 1)) != 0)) { + errno = ENOMEM; err("%s:", __func__); } ncap = soa->cap << 1; /* Ensure that soa->lexemes is properly aligned */ - pad1 = alignof(idx_t_) - ncap * sizeof(ast_kind_t_) % alignof(idx_t_); - if (pad1 == alignof(idx_t_)) + pad1 = alignof(idx_t) - ncap % alignof(idx_t); + if (pad1 == alignof(idx_t)) pad1 = 0; /* Ensure that soa->kids is properly aligned */ - pad2 = alignof(struct pair) - - (ncap * (sizeof(ast_kind_t_) + sizeof(idx_t_)) + pad1) - % alignof(struct pair); - if (pad2 == alignof(struct pair)) + pad2 = alignof(pair_t) + - (ncap * (1 + sizeof(idx_t)) + pad1) % alignof(pair_t); + if (pad2 == alignof(pair_t)) pad2 = 0; newsz = ncap * AST_SOA_BLKSZ + pad1 + pad2; @@ -339,10 +345,10 @@ astresz(struct ast *soa) soa->cap = ncap; } -idx_t_ -astalloc(struct ast *soa) +idx_t +astalloc(ast_t *soa) { - if (soa->len == soa->cap) + if (unlikely(soa->len == soa->cap)) astresz(soa); return soa->len++; } diff --git a/src/parser.h b/src/parser.h index b4ce8c4..9f7bf5e 100644 --- a/src/parser.h +++ b/src/parser.h @@ -10,10 +10,10 @@ #include "types.h" enum { - /* The first four AST tokens are declarations. A declaration is any token T - for which ‘T <= _AST_DECLS_END’ holds. Declarations can also be made - public by using the ‘pub’ keyword, and you can tell if a declaration is - public by if the LSB is set. */ + /* The first four AST tokens are declarations. A declaration is any + token T for which ‘T <= _AST_DECLS_END’ holds. Declarations can + also be made public by using the ‘pub’ keyword, and you can tell + if a declaration is public by if the LSB is set. */ /* Variable declaration, lhs and rhs may be unused ‘x := rhs’; aux[lhs].decl */ @@ -61,43 +61,44 @@ enum { _AST_LAST_ENT, }; -typedef uint8_t ast_kind_t_; -static_assert(_AST_LAST_ENT - 1 <= (ast_kind_t_)-1, - "Too many AST tokens to fix in AST_KIND_T_"); +static_assert(_AST_LAST_ENT - 1 <= UINT8_MAX, + "Too many AST tokens to fix in uint8_t"); -#define AST_EMPTY ((idx_t_)-1) -#define AST_SOA_BLKSZ (sizeof(ast_kind_t_) + sizeof(idx_t_) * 3) +#define AST_EMPTY ((idx_t)-1) +#define AST_SOA_BLKSZ (1 + sizeof(idx_t) + sizeof(pair_t)) -struct aux { +typedef struct { union { struct { - idx_t_ type; + idx_t type; bool ispub; bool isstatic; } decl; } *buf; size_t len, cap; -}; +} aux_t; + +typedef struct { + idx_t lhs, rhs; +} pair_t; -struct ast { - ast_kind_t_ *kinds; - idx_t_ *lexemes; - struct pair { - idx_t_ lhs, rhs; - } *kids; +typedef struct { + uint8_t *kinds; + idx_t *lexemes; + pair_t *kids; size_t len, cap; -}; +} ast_t; #define ast_free(x) free((x).kinds) #define aux_free(x) free((x).buf) /* Parse the tokens in TOKS into an abstract syntax tree, and store auxilliary information in AUX */ -struct ast parsetoks(struct lexemes toks, struct aux *aux) +ast_t parsetoks(lexemes_t toks, aux_t *aux) __attribute__((nonnull)); /* Starting from the node at indent I in AST, return the index of the next node in AST that is of the same nest-depth as I */ -idx_t_ fwdnode(struct ast ast, idx_t_ i); +idx_t fwdnode(ast_t ast, idx_t i); #endif /* !ORYX_PARSER_H */ diff --git a/src/strview.c b/src/strview.c index f9b80e4..70e78f0 100644 --- a/src/strview.c +++ b/src/strview.c @@ -1,13 +1,14 @@ #include <string.h> +#include "common.h" #include "strview.h" #include "types.h" uint64_t -strview_hash(struct strview sv) +strview_hash(strview_t sv) { uint64_t h = 0x100; - for (size_t i = 0; i < sv.len; i++) { + for (size_t i = 0; likely(i < sv.len); i++) { h ^= sv.p[i]; h *= 1111111111111111111u; } @@ -15,9 +16,8 @@ strview_hash(struct strview sv) } uchar * -svtocstr(uchar *dst, struct strview src) +svtocstr(uchar *dst, strview_t src) { - memcpy(dst, src.p, src.len); - dst[src.len] = 0; + ((uchar *)memcpy(dst, src.p, src.len))[src.len] = 0; return dst; } diff --git a/src/strview.h b/src/strview.h index 1153dcb..d6629ee 100644 --- a/src/strview.h +++ b/src/strview.h @@ -8,29 +8,29 @@ #include "types.h" -struct strview { +typedef struct { const uchar *p; size_t len; -}; +} strview_t; /* Expand SV into arguments suitable for a call to printf() */ #define SV_PRI_ARGS(sv) ((int)(sv).len), ((sv).p) /* Convert the string-literal S into a string-view */ -#define SV(s) ((struct strview){s, sizeof(s) - 1}) +#define SV(s) ((strview_t){s, sizeof(s) - 1}) /* Return the hash of SV */ -uint64_t strview_hash(struct strview sv); +uint64_t strview_hash(strview_t sv); /* Copy the contents of SV to DST including a null terminator, and return DST */ -uchar *svtocstr(uchar *dst, struct strview sv) +uchar *svtocstr(uchar *dst, strview_t sv) __attribute__((returns_nonnull, nonnull)); /* Return whether or not X and Y are equal */ __attribute__((always_inline)) static inline bool -strview_eq(struct strview x, struct strview y) +strview_eq(strview_t x, strview_t y) { return x.len == y.len && memcmp(x.p, y.p, x.len) == 0; } diff --git a/src/types.h b/src/types.h index bd28e36..5727e62 100644 --- a/src/types.h +++ b/src/types.h @@ -3,7 +3,7 @@ #include <stdint.h> -typedef uint32_t idx_t_; +typedef uint32_t idx_t; typedef uint32_t rune; typedef unsigned char uchar; diff --git a/src/unicode-avx2.c b/src/unicode-avx2.c index 9367b1e..bb67b76 100644 --- a/src/unicode-avx2.c +++ b/src/unicode-avx2.c @@ -27,6 +27,7 @@ #include <stdint.h> #include <x86intrin.h> +#include "types.h" #include "unicode.h" #pragma GCC diagnostic ignored "-Woverflow" @@ -81,7 +82,7 @@ push_last_3bytes_of_a_to_b(__m256i a, __m256i b) } bool -utf8_validate_simd(const unsigned char *s, size_t len) +utf8_validate_simd(const uchar *s, size_t len) { if (len >= 32) { __m256i prev_input = _mm256_set1_epi8(0); diff --git a/src/unicode-neon.c b/src/unicode-neon.c index c072aee..bbabd35 100644 --- a/src/unicode-neon.c +++ b/src/unicode-neon.c @@ -27,6 +27,7 @@ #include <arm_neon.h> #include <stdint.h> +#include "types.h" #include "unicode.h" #pragma GCC diagnostic ignored "-Woverflow" @@ -54,7 +55,7 @@ static const uint8_t _range_adjust_tbl[] = { }; bool -utf8_validate_simd(const unsigned char *s, size_t len) +utf8_validate_simd(const uchar *s, size_t len) { if (len >= 32) { uint8x16_t prev_input = vdupq_n_u8(0); diff --git a/src/unicode-sse4_1.c b/src/unicode-sse4_1.c index 0f5f4e5..a62cd01 100644 --- a/src/unicode-sse4_1.c +++ b/src/unicode-sse4_1.c @@ -28,6 +28,7 @@ #include <stdint.h> #include <x86intrin.h> +#include "types.h" #include "unicode.h" #pragma GCC diagnostic ignored "-Woverflow" @@ -58,7 +59,7 @@ static const int8_t _ef_fe_tbl[] = { /* Return 0 on success, -1 on error */ bool -utf8_validate_simd(const unsigned char *s, size_t len) +utf8_validate_simd(const uchar *s, size_t len) { if (len >= 32) { __m128i prev_input = _mm_set1_epi8(0); diff --git a/src/unicode.c b/src/unicode.c index 2f9e3e2..04b2007 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -1,5 +1,7 @@ -#include "unicode.h" +#include "common.h" +#include "types.h" #include "unicode-data.h" +#include "unicode.h" #define RUNE_IS_GEN(fn, stg1, stg2, blksz) \ bool fn(rune ch) \ @@ -49,9 +51,9 @@ static const int shiftc[] = {0, 18, 12, 6, 0}; static const int shifte[] = {0, 6, 4, 2, 0}; rune -utf8_decode(const unsigned char **buf) +utf8_decode(const uchar **buf) { - const unsigned char *s = *buf; + const uchar *s = *buf; int len = lengths[s[0] >> 3]; *buf = s + len + !len; @@ -63,13 +65,13 @@ utf8_decode(const unsigned char **buf) } size_t -utf8_validate_off(const unsigned char *s, size_t len) +utf8_validate_off(const uchar *s, size_t len) { - const unsigned char *start = s, *end = start + len; - while (s < end) { + const uchar *start = s, *end = start + len; + while (likely(s < end)) { int len = lengths[s[0] >> 3]; - const unsigned char *next = s + len + !len; + const uchar *next = s + len + !len; rune c = (rune)(s[0] & masks[len]) << 18; c |= (rune)(s[1] & 0x3f) << 12; @@ -85,7 +87,7 @@ utf8_validate_off(const unsigned char *s, size_t len) e |= (s[3]) >> 6; e ^= 0x2A; e >>= shifte[len]; - if (e != 0) + if (unlikely(e != 0)) return s - start + 1; s = next; } |