From 39c78e864f6600af0403fc2c0470ef97e3befce5 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 11 Jun 2024 00:18:49 +0200 Subject: Lex number literals --- src/lexer.c | 26 ++++++++++++++++++++++++++ src/lexer.h | 1 + 2 files changed, 27 insertions(+) diff --git a/src/lexer.c b/src/lexer.c index 8b121b3..0aa318b 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -20,6 +20,12 @@ static bool skip_comment(const uchar **, const uchar *); static struct lexemes_soa mk_lexemes_soa(void); static void lexemes_soa_resz(struct lexemes_soa *); +static const bool is_numeric_lookup[UCHAR_MAX + 1] = { + ['0'] = true, ['1'] = true, ['2'] = true, ['3'] = true, + ['4'] = true, ['5'] = true, ['6'] = true, ['7'] = true, + ['8'] = true, ['9'] = true, ['\''] = true, +}; + struct lexemes_soa lexstring(const uchar *code, size_t codesz) { @@ -72,6 +78,26 @@ lexstring(const uchar *code, size_t codesz) } break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + data.kinds[data.len] = LEXNUM; + data.strs[data.len].p = spnbeg; + + while (likely(code < end) && is_numeric_lookup[code[0]]) { + if (unlikely(code[0] == '\'' && code[-1] == '\'')) { + err("Adjacent numeric separators at byte %td", + code - start); + } + code++; + } + if (unlikely(code < end && code[-1] == '\'')) { + err("Numeric literal ends with numeric separator at byte %td", + code - start); + } + + data.strs[data.len++].len = code - spnbeg; + break; + default: if (!rune_is_xids(ch)) continue; diff --git a/src/lexer.h b/src/lexer.h index 7fd8533..33c5078 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -8,6 +8,7 @@ enum { LEXIDENT, /* Identifier */ + LEXNUM, /* Numeric constant */ LEXAMP = '&', LEXCOLON = ':', -- cgit v1.2.3