From e1ca4f3603010dd85c6da0d3a887c9fdcfa9c43e Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 24 Jun 2024 07:14:04 +0200 Subject: Add basic support for floating point numbers --- README | 9 ++++++++- src/analyzer.c | 37 +++++++++++++++++++++++++++---------- src/codegen.c | 42 +++++++++++++++++++++++++----------------- src/lexer.c | 20 ++++++++++++++++---- 4 files changed, 76 insertions(+), 32 deletions(-) diff --git a/README b/README index 514046b..a770ac8 100644 --- a/README +++ b/README @@ -55,10 +55,17 @@ The build script also accepts some subcommands. They are as follows: ────────────────────────────── 1. The following datatypes are supported. The unsized integer types - default to the systems word size (typically 64 bits). + default to the systems word size (typically 64 bits). The rune type + is an alias for the i32 type and serves a purely semantic purpose. + In the future it will be a distinct type. + /* Integer types */ i8, i16, i32, i64, i128, int u8, u16, u32, u64, u128, uint + rune + + /* Floating-point types */ + f16, f32, f64, f128 2. C-style block comments. Line comments are intentionally not included. diff --git a/src/analyzer.c b/src/analyzer.c index c5a6200..a449653 100644 --- a/src/analyzer.c +++ b/src/analyzer.c @@ -51,6 +51,7 @@ struct azctx { struct cfctx { arena_t *a; + scratch_t *s; strview_t decl; idx_t si; }; @@ -60,7 +61,8 @@ static void analyzeast(scope_t *, type_t *, ast_t, aux_t, lexemes_t, arena_t *) __attribute__((nonnull)); /* Perform constant folding over the AST */ -static void constfold(mpq_t *, scope_t *, type_t *, ast_t, lexemes_t, arena_t *) +static void constfold(mpq_t *, scope_t *, type_t *, ast_t, lexemes_t, arena_t *, + scratch_t *) __attribute__((nonnull)); /* Perform a pass over the entire AST and return an array of symbol @@ -103,9 +105,10 @@ analyzeprog(ast_t ast, aux_t aux, lexemes_t toks, arena_t *a, type_t **types, *scps = gensymtabs(ast, aux, toks, a); analyzeast(*scps, *types, ast, aux, toks, a); + scratch_t s = {0}; *folds = bufalloc(NULL, ast.len, sizeof(**folds)); memset(*folds, 0, ast.len * sizeof(**folds)); - constfold(*folds, *scps, *types, ast, toks, a); + constfold(*folds, *scps, *types, ast, toks, a, &s); } scope_t * @@ -383,21 +386,35 @@ constfoldexpr(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, case ASTNUMLIT: { mpq_init(folds[i]); - /* TODO: Temporary allocator */ strview_t sv = toks.strs[ast.lexemes[i]]; - char *buf = bufalloc(NULL, sv.len + 1, 1); + char *buf = tmpalloc(ctx.s, sv.len + 1, 1); size_t len = 0; + bool isfloat = false; + for (size_t i = 0; i < sv.len; i++) { + if (sv.p[i] == '.') { + isfloat = true; + buf[len++] = sv.p[i]; + } if (isdigit(sv.p[i])) buf[len++] = sv.p[i]; } buf[len] = 0; - int ret = mpq_set_str(folds[i], buf, 10); - assert(ret == 0); - - free(buf); + if (isfloat) { + mpf_t x; + /* FIXME: This uses global precision, not thread safe! */ + /* FIXME: This doesn’t try to figure out what the correct + precision is. */ + int ret = mpf_init_set_str(x, buf, 10); + assert(ret == 0); + mpq_set_f(folds[i], x); + mpf_clear(x); + } else { + int ret = mpq_set_str(folds[i], buf, 10); + assert(ret == 0); + } return fwdnode(ast, i); } case ASTIDENT: { @@ -459,9 +476,9 @@ constfolddecl(struct cfctx ctx, mpq_t *folds, scope_t *scps, type_t *types, void constfold(mpq_t *folds, scope_t *scps, type_t *types, ast_t ast, lexemes_t toks, - arena_t *a) + arena_t *a, scratch_t *s) { - struct cfctx ctx = {.a = a}; + struct cfctx ctx = {.a = a, .s = s}; for (idx_t i = 0; likely(i < ast.len);) { assert(ast.kinds[i] <= _AST_DECLS_END); i = constfolddecl(ctx, folds, scps, types, ast, toks, i); diff --git a/src/codegen.c b/src/codegen.c index d432478..80e42cc 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -121,7 +121,7 @@ idx_t codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) { /* If true, implies numeric constant */ - if (MPQ_IS_INIT(ctx.folds[i])) { + if (MPQ_IS_INIT(ctx.folds[i]) && !type.isfloat) { mpz_ptr num, den; num = mpq_numref(ctx.folds[i]); den = mpq_denref(ctx.folds[i]); @@ -154,6 +154,16 @@ codegentypedexpr(struct cgctx ctx, idx_t i, type_t type, LLVMValueRef *outv) mpz_get_str(buf, 10, num); *outv = LLVMConstIntOfString(type2llvm(ctx, type), buf, 10); return fwdnode(ctx.ast, i); + } else if (MPQ_IS_INIT(ctx.folds[i]) /* && type.isfloat */) { + /* FIXME: This is bad and broken */ + mpf_t x; + mpf_init(x); + mpf_set_q(x, ctx.folds[i]); + char buf[256]; + gmp_snprintf(buf, sizeof(buf), "%Ff", x); + *outv = LLVMConstRealOfString(type2llvm(ctx, type), buf); + mpf_clear(x); + return fwdnode(ctx.ast, i); } assert(ctx.ast.kinds[i] == ASTIDENT); @@ -298,7 +308,7 @@ codegendecl(struct cgctx ctx, idx_t i) if (ctx.aux.buf[p.lhs].decl.isundef) return fwdnode(ctx.ast, i); - if (!ctx.types[i].isfloat && ctx.aux.buf[p.lhs].decl.isstatic) { + if (ctx.aux.buf[p.lhs].decl.isstatic) { strview_t sv = ctx.toks.strs[ctx.ast.lexemes[i]]; /* TODO: Namespace the name */ char *name = tmpalloc(ctx.s, sv.len + 1, 1); @@ -315,22 +325,20 @@ codegendecl(struct cgctx ctx, idx_t i) LLVMSetLinkage(globl, LLVMInternalLinkage); return i; } - if (!ctx.types[i].isfloat /* && !aux.buf[p.lhs].decl.isstatic */) { - LLVMValueRef var, val; - /* TODO: Namespace the name */ - strview_t sv = ctx.toks.strs[ctx.ast.lexemes[i]]; - var = symtab_insert(&ctx.scps[ctx.scpi].map, sv, NULL)->v; - if (p.rhs == AST_EMPTY) { - val = LLVMConstNull(type2llvm(ctx, ctx.types[i])); - i = fwdnode(ctx.ast, i); - } else - i = codegentypedexpr(ctx, p.rhs, ctx.types[i], &val); - LLVMBuildStore(ctx.bob, val, var); - return i; - } - /* types[i].isfloat */ - err("%s():%d: TODO", __func__, __LINE__); + /* Non-static, non-undef, mutable */ + + LLVMValueRef var, val; + /* TODO: Namespace the name */ + strview_t sv = ctx.toks.strs[ctx.ast.lexemes[i]]; + var = symtab_insert(&ctx.scps[ctx.scpi].map, sv, NULL)->v; + if (p.rhs == AST_EMPTY) { + val = LLVMConstNull(type2llvm(ctx, ctx.types[i])); + i = fwdnode(ctx.ast, i); + } else + i = codegentypedexpr(ctx, p.rhs, ctx.types[i], &val); + LLVMBuildStore(ctx.bob, val, var); + return i; } void diff --git a/src/lexer.c b/src/lexer.c index 09a1a31..ed2414a 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -37,7 +38,7 @@ static bool skip_comment(const uchar **ptr, const uchar *end) static const bool is_numeric_lookup[UCHAR_MAX + 1] = { ['0'] = true, ['1'] = true, ['2'] = true, ['3'] = true, ['4'] = true, ['5'] = true, ['6'] = true, ['7'] = true, - ['8'] = true, ['9'] = true, ['\''] = true, + ['8'] = true, ['9'] = true, ['\''] = true, ['.'] = true, }; lexemes_t @@ -105,18 +106,28 @@ lexstring(const uchar *code, size_t codesz) break; case '.': - if (unlikely(end - code < 2) || code[0] != '.' || code[1] != '.') + if (unlikely(end - code < 2) || code[0] != '.' || code[1] != '.') { + if (likely(end - code) >= 1 && isdigit(code[0])) + goto number; goto fallback; + } code += 2; data.kinds[data.len++] = LEXELIP; break; case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': + case '5': case '6': case '7': case '8': case '9': { +number: + bool saw_dot = false; data.kinds[data.len] = LEXNUM; data.strs[data.len].p = spnbeg; while (likely(code < end) && is_numeric_lookup[code[0]]) { + if (unlikely(code[0] == '.')) { + if (saw_dot) + err("lexer: Decimal separator given multiple times in numeric literal"); + saw_dot = true; + } if (unlikely(code[0] == '\'' && code[-1] == '\'')) { err("Adjacent numeric separators at byte %td", code - start); @@ -130,10 +141,11 @@ lexstring(const uchar *code, size_t codesz) data.strs[data.len++].len = code - spnbeg; break; + } default: fallback: - if (!rune_is_xids(ch)) + if (unlikely(!rune_is_xids(ch))) err("lexer: Unexpected rune U+%04" PRIXRUNE, ch); data.kinds[data.len] = LEXIDENT; -- cgit v1.2.3