diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-06-24 04:18:59 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-06-24 04:20:14 +0200 |
commit | fbaa65a2319745c8a236a5c9c66e3406f42447c3 (patch) | |
tree | d3200a0713af19432aa8aaa0fa7a3c52ae86e89c | |
parent | 02ce35872c86d4ff056b8121121253fec40bafc0 (diff) |
Support ‘…’ and ‘...’ in initializers
-rw-r--r-- | .exrc | 1 | ||||
-rw-r--r-- | README | 58 | ||||
-rw-r--r-- | src/codegen.c | 4 | ||||
-rw-r--r-- | src/lexer.c | 13 | ||||
-rw-r--r-- | src/lexer.h | 4 | ||||
-rw-r--r-- | src/parser.c | 25 | ||||
-rw-r--r-- | src/parser.h | 5 |
7 files changed, 102 insertions, 8 deletions
@@ -1 +1,2 @@ set makeprg=./make +autocmd BufRead README setlocal et tw=73 @@ -1 +1,57 @@ -Oryx — Programming Made Better + Oryx — Programming Made Better + + ┌────────────────────────────┐ + │ Existing Language Features │ + └────────────────────────────┘ + +1. The following datatypes are supported. The unsigned integer types + default to the systems word size (typically 64 bits). + + i8, i16, i32, i64, i128, int + u8, u16, u32, u64, u128, uint + +2. C-style block comments. Line comments are intentionally not + included. + +3. Declaration of mutable variables with optional type-inference. The + syntax is simple and consistent regardless of if type-inference is + used or not. Variables are also zero-initialized unless ‘…’ + (U+2026 HORIZONTAL ELLIPSIS) or ‘...’ is given as a value. + + x: int; /* Declare a zero-initialized integer */ + x: int = 69; /* Declare an integer and set it to 69 */ + x: = 69; /* Same as above but infer the type */ + x := 69; /* Recommended style when inferring types */ + x: int = …; /* Declare an uninitialized integer (preferred) */ + x: int = ...; /* Same as above when Unicode is not possible */ + +4. Declaration of constant variables with optional type-inference + including constants of arbitrary precision. The syntax is + intentionally designed to be consistent with mutable variable + declaration. + + Constants are unordered, meaning that a constant may refer to another + constant that is declared later in the source file. + + FOO: u8 : BAR + BAR: u8 : 69; + + REALLY_BIG :: 123'456'789'876'543'210; + + pub MyFunc :: () int { + return BAR; + } + +5. Constants of arbitrary precision (overflow is not possible), with ‘'’ + (U+0027 APOSTROPHE) as an optional digit seperator. + + REALLY_BIG :: 123'456'789'876'543'210; + +6. No implicit type conversions between types. This includes between + different integer types which may have the same size (i.e. int and + int64) + + pub MyFunc :: () { + x: int = 69; + y: i64 = x; /* Compile-time error */ + } diff --git a/src/codegen.c b/src/codegen.c index 6fc7848..665f071 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -240,6 +240,10 @@ codegendecl(struct cgctx ctx, idx_t i) assert(ctx.ast.kinds[i] == ASTDECL); + /* Don’t assign a default value to ‘x: int = …’ */ + if (ctx.aux.buf[p.lhs].decl.isundef) + return fwdnode(ctx.ast, i); + if (!ctx.types[i].isfloat && ctx.aux.buf[p.lhs].decl.isstatic) { strview_t sv = ctx.toks.strs[ctx.ast.lexemes[i]]; /* TODO: Namespace the name */ diff --git a/src/lexer.c b/src/lexer.c index 30686d8..a613585 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -70,6 +70,10 @@ lexstring(const uchar *code, size_t codesz) data.kinds[data.len++] = ch; break; + case RUNE_C(0x2026): /* U+2026 HORIZONTAL ELLIPSIS */ + data.kinds[data.len++] = LEXELIP; + break; + /* Single- or double-byte literals */ case '/': if (code < end && code[0] == '*') { @@ -80,7 +84,6 @@ lexstring(const uchar *code, size_t codesz) data.kinds[data.len++] = ch; break; - case '<': case '>': data.kinds[data.len++] = ch; @@ -91,6 +94,13 @@ lexstring(const uchar *code, size_t codesz) } break; + case '.': + if (unlikely(end - code < 2) || code[0] != '.' || code[1] != '.') + goto fallback; + code += 2; + data.kinds[data.len++] = LEXELIP; + break; + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': data.kinds[data.len] = LEXNUM; @@ -112,6 +122,7 @@ lexstring(const uchar *code, size_t codesz) break; default: +fallback: if (!rune_is_xids(ch)) continue; diff --git a/src/lexer.h b/src/lexer.h index 5436030..fb63fd8 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -13,6 +13,10 @@ enum { LEXEOF, /* End of token stream */ LEXIDENT, /* Identifier */ LEXNUM, /* Numeric constant */ + LEXELIP, /* Ellipsis */ + + /* NOTE: Make sure that the enumerations above this comment don’t + conflict with the following explicitly assigned enumerations! */ LEXAMP = '&', LEXCOLON = ':', diff --git a/src/parser.c b/src/parser.c index 5b12e21..46a1fb6 100644 --- a/src/parser.c +++ b/src/parser.c @@ -128,6 +128,7 @@ parsedecl(ast_t *ast, aux_t *aux, lexemes_t toks, bool toplvl) } aux->buf[j].decl.isstatic = toplvl; + aux->buf[j].decl.isundef = false; if (toplvl && toks.kinds[toksidx] == LEXIDENT && strview_eq(SV("pub"), toks.strs[toksidx])) { @@ -165,11 +166,27 @@ parsedecl(ast_t *ast, aux_t *aux, lexemes_t toks, bool toplvl) err("parser: Expected colon, equals, or semicolon"); } - bool func = toks.kinds[toksidx] == LEXLPAR; - if (func && ast->kinds[i] == ASTDECL) - err("Cannot assign function to mutable variable"); + idx_t rhs; + bool func = false; + + switch (toks.kinds[toksidx]) { + case LEXLPAR: + func = true; + if (ast->kinds[i] == ASTDECL) + err("Cannot assign function to mutable variable"); + rhs = parsefunc(ast, aux, toks); + break; + case LEXELIP: + toksidx++; + if (ast->kinds[i] == ASTCDECL) + err("parser: Cannot assign to ‘…’ in constant declaration"); + rhs = AST_EMPTY; + aux->buf[j].decl.isundef = true; + break; + default: + rhs = parseexpr(ast, aux, toks); + } - idx_t rhs = (func ? parsefunc : parseexpr)(ast, aux, toks); ast->kids[i].rhs = rhs; if (!func && toks.kinds[toksidx++] != LEXSEMI) err("parser: Expected semicolon"); diff --git a/src/parser.h b/src/parser.h index 9f7bf5e..d8991c0 100644 --- a/src/parser.h +++ b/src/parser.h @@ -71,8 +71,9 @@ typedef struct { union { struct { idx_t type; - bool ispub; - bool isstatic; + bool ispub : 1; + bool isstatic : 1; + bool isundef : 1; } decl; } *buf; size_t len, cap; |