summaryrefslogtreecommitdiff
path: root/oryxc
diff options
context:
space:
mode:
Diffstat (limited to 'oryxc')
-rw-r--r--oryxc/src/errors.rs11
-rw-r--r--oryxc/src/lexer.rs143
-rw-r--r--oryxc/src/parser.rs597
3 files changed, 466 insertions, 285 deletions
diff --git a/oryxc/src/errors.rs b/oryxc/src/errors.rs
index a8350dd..3c00220 100644
--- a/oryxc/src/errors.rs
+++ b/oryxc/src/errors.rs
@@ -11,13 +11,11 @@ use std::fmt::{
Formatter,
};
use std::io::Write;
-use std::ops::Deref;
use std::path::Path;
use std::sync::OnceLock;
use std::{
env,
io,
- process,
};
use unicode_width::UnicodeWidthStr;
@@ -81,15 +79,6 @@ macro_rules! err {
}};
}
-pub fn err_at_position<T, S>(filename: T, s: S) -> !
-where
- T: Deref<Target = OsStr>,
- S: Display,
-{
- eprintln!("{}: \x1b[31;1mError:\x1b[0m {}", filename.display(), s);
- process::exit(1);
-}
-
#[derive(Debug)]
pub struct OryxError {
pub span: (usize, usize),
diff --git a/oryxc/src/lexer.rs b/oryxc/src/lexer.rs
index a76c643..c82cd2c 100644
--- a/oryxc/src/lexer.rs
+++ b/oryxc/src/lexer.rs
@@ -18,38 +18,47 @@ use crate::unicode;
#[repr(u8)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum TokenType {
- Eof = 0,
- Ampersand = '&' as u8,
- AngleL = '<' as u8,
- AngleR = '>' as u8,
- Asterisk = '*' as u8,
- Bar = '|' as u8,
- BraceL = '{' as u8,
- BraceR = '}' as u8,
- BracketL = '[' as u8,
- BracketR = ']' as u8,
- Caret = '^' as u8,
- Comma = ',' as u8,
- Equals = '=' as u8,
- Exclamation = '!' as u8,
- Minus = '-' as u8,
- ParenL = '(' as u8,
- ParenR = ')' as u8,
- Plus = '+' as u8,
- Semicolon = ';' as u8,
- Slash = '/' as u8,
- Tilde = '~' as u8,
+ Eof = 0,
+ Ampersand = '&' as u8,
+ AngleL = '<' as u8,
+ AngleR = '>' as u8,
+ Asterisk = '*' as u8,
+ Bang = '!' as u8,
+ Bar = '|' as u8,
+ BraceL = '{' as u8,
+ BraceR = '}' as u8,
+ BracketL = '[' as u8,
+ BracketR = ']' as u8,
+ Caret = '^' as u8,
+ Comma = ',' as u8,
+ Equals = '=' as u8,
+ Minus = '-' as u8,
+ ParenL = '(' as u8,
+ ParenR = ')' as u8,
+ Percent = '%' as u8,
+ Plus = '+' as u8,
+ Semicolon = ';' as u8,
+ Slash = '/' as u8,
+ Tilde = '~' as u8,
+ _AsciiEnd = 0x7F as u8,
+ Ampersand2,
AmpersandTilde,
AngleL2,
AngleL3,
+ AngleLEquals,
AngleR2,
AngleR3,
+ AngleREquals,
+ BangEquals,
+ Bar2,
Ellipsis,
+ Equals2,
Identifier,
KeywordDef,
KeywordFunc,
KeywordReturn,
Number,
+ Percent2,
String,
}
@@ -58,8 +67,8 @@ impl TokenType {
pub fn exprp(&self) -> bool {
return match self {
Self::Ampersand
+ | Self::Bang
| Self::Caret
- | Self::Exclamation
| Self::Identifier
| Self::KeywordFunc
| Self::Minus
@@ -154,6 +163,55 @@ pub fn tokenize(s: &str) -> Result<Soa<Token>, OryxError> {
view: (i, ctx.pos_a),
})
},
+ '<' if ctx.peek().is_some_and(|c| c == '=') => {
+ ctx.next(); /* Consume ‘=’ */
+ Some(Token {
+ kind: TokenType::AngleLEquals,
+ view: (i, ctx.pos_a),
+ })
+ },
+ '>' if ctx.peek().is_some_and(|c| c == '=') => {
+ ctx.next(); /* Consume ‘=’ */
+ Some(Token {
+ kind: TokenType::AngleREquals,
+ view: (i, ctx.pos_a),
+ })
+ },
+ '|' if ctx.peek().is_some_and(|c| c == '|') => {
+ ctx.next(); /* Consume ‘|’ */
+ Some(Token {
+ kind: TokenType::Bar2,
+ view: (i, ctx.pos_a),
+ })
+ },
+ '&' if ctx.peek().is_some_and(|c| c == '&') => {
+ ctx.next(); /* Consume ‘&’ */
+ Some(Token {
+ kind: TokenType::Ampersand2,
+ view: (i, ctx.pos_a),
+ })
+ },
+ '%' if ctx.peek().is_some_and(|c| c == '%') => {
+ ctx.next(); /* Consume ‘%’ */
+ Some(Token {
+ kind: TokenType::Percent2,
+ view: (i, ctx.pos_a),
+ })
+ },
+ '=' if ctx.peek().is_some_and(|c| c == '=') => {
+ ctx.next(); /* Consume ‘=’ */
+ Some(Token {
+ kind: TokenType::Equals2,
+ view: (i, ctx.pos_a),
+ })
+ },
+ '!' if ctx.peek().is_some_and(|c| c == '=') => {
+ ctx.next(); /* Consume ‘=’ */
+ Some(Token {
+ kind: TokenType::BangEquals,
+ view: (i, ctx.pos_a),
+ })
+ },
'&' if ctx.peek().is_some_and(|c| c == '~') => {
ctx.next(); /* Consume ‘~’ */
Some(Token {
@@ -162,12 +220,11 @@ pub fn tokenize(s: &str) -> Result<Soa<Token>, OryxError> {
})
},
'!' | '&' | '(' | ')' | '*' | '+' | ',' | '-' | '/' | ';' | '<'
- | '=' | '>' | '[' | ']' | '^' | '{' | '|' | '}' | '~' | '…' => {
- Some(Token {
- kind: unsafe { mem::transmute(c as u8) },
- view: (i, j),
- })
- },
+ | '=' | '>' | '[' | ']' | '^' | '{' | '|' | '}' | '~' | '…'
+ | '%' => Some(Token {
+ kind: unsafe { mem::transmute(c as u8) },
+ view: (i, j),
+ }),
'#' => Some(tokenize_number_based(&mut ctx)?),
'0'..='9' => Some(tokenize_number(&mut ctx, "0123456789")?),
'"' => Some(tokenize_string(&mut ctx)?),
@@ -176,7 +233,7 @@ pub fn tokenize(s: &str) -> Result<Soa<Token>, OryxError> {
c => {
return Err(OryxError::new(
(i, j),
- format!("Invalid character ‘{c}’"),
+ format!("invalid character ‘{c}’"),
));
},
} {
@@ -211,7 +268,7 @@ fn skip_comment<'a>(ctx: &mut LexerContext<'a>) -> Result<(), OryxError> {
_ => {},
};
}
- return Err(OryxError::new((beg, ctx.pos_a), "Unterminated comment"));
+ return Err(OryxError::new((beg, ctx.pos_a), "unterminated comment"));
}
fn tokenize_number_based<'a>(
@@ -227,7 +284,7 @@ fn tokenize_number_based<'a>(
return Err(OryxError::new(
(ctx.pos_b, ctx.pos_a),
format!(
- "Invalid number base specifier ‘{c}’, did you mean ‘{}’?",
+ "invalid number base specifier ‘{c}’, did you mean ‘{}’?",
c.to_ascii_lowercase()
),
));
@@ -235,13 +292,13 @@ fn tokenize_number_based<'a>(
Some(c) if c.is_alphanumeric() => {
return Err(OryxError::new(
(ctx.pos_b, ctx.pos_a),
- format!("Invalid number base specifier ‘{c}’"),
+ format!("invalid number base specifier ‘{c}’"),
));
},
_ => {
return Err(OryxError::new(
(i, i + 1),
- "Expected number base specifier after ‘#’",
+ "expected number base specifier after ‘#’",
));
},
};
@@ -252,21 +309,21 @@ fn tokenize_number_based<'a>(
Some(c) if alphabet.len() == 16 && c.is_ascii_hexdigit() => {
return Err(OryxError::new(
(ctx.pos_b, ctx.pos_a),
- format!("Hexadecimal digits must be uppercase"),
+ format!("hexadecimal digits must be uppercase"),
));
},
Some(c) if c.is_alphanumeric() => {
let base = base2str(alphabet.len());
return Err(OryxError::new(
(ctx.pos_b, ctx.pos_a),
- format!("Invalid {base} digit ‘{c}’"),
+ format!("invalid {base} digit ‘{c}’"),
));
},
Some('\'') => {
return Err(OryxError::new(
(ctx.pos_b, ctx.pos_a),
format!(
- "Numeric literals may not begin with a digit separator"
+ "numeric literals may not begin with a digit separator"
),
));
},
@@ -274,7 +331,7 @@ fn tokenize_number_based<'a>(
let base = base2str(alphabet.len());
return Err(OryxError::new(
(beg, end),
- format!("Expected {base} digit after base specifier"),
+ format!("expected {base} digit after base specifier"),
));
},
};
@@ -323,21 +380,21 @@ fn span_raw_number<'a>(
Some(c) if alphabet.len() == 16 && c.is_ascii_hexdigit() => {
return Err(OryxError::new(
(ctx.pos_b, ctx.pos_a),
- format!("Hexadecimal digits must be uppercase"),
+ format!("hexadecimal digits must be uppercase"),
));
},
Some(c) if c.is_alphanumeric() => {
let base = base2str(alphabet.len());
return Err(OryxError::new(
(ctx.pos_b, ctx.pos_a),
- format!("Invalid {base} digit ‘{c}’"),
+ format!("invalid {base} digit ‘{c}’"),
));
},
_ => {
let base = base2str(alphabet.len());
return Err(OryxError::new(
(ctx.pos_b, ctx.pos_a),
- format!("Expected {base} digit"),
+ format!("expected {base} digit"),
));
},
};
@@ -350,7 +407,7 @@ fn span_raw_number<'a>(
'\'' if last_was_apos_p => {
return Err(OryxError::new(
(ctx.pos_b, ctx.pos_a + 1),
- "Numeric literals may not have adjecent digit separators",
+ "numeric literals may not have adjecent digit separators",
));
},
'\'' => {
@@ -369,7 +426,7 @@ fn span_raw_number<'a>(
if last_was_apos_p {
return Err(OryxError::new(
(beg, end),
- "Numeric literals may not end with a digit separator",
+ "numeric literals may not end with a digit separator",
));
}
@@ -386,7 +443,7 @@ fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Result<Token, OryxError> {
None => {
return Err(OryxError::new(
(i, ctx.pos_a),
- "Unterminated string literal",
+ "unterminated string literal",
));
},
}
diff --git a/oryxc/src/parser.rs b/oryxc/src/parser.rs
index 6e2e4b0..3d5d78c 100644
--- a/oryxc/src/parser.rs
+++ b/oryxc/src/parser.rs
@@ -3,8 +3,7 @@ use std::fmt::{
Debug,
Formatter,
};
-use std::mem::ManuallyDrop;
-use std::process;
+use std::ops::FnOnce;
use std::vec::Vec;
use soa_rs::{
@@ -19,24 +18,24 @@ use crate::lexer::{
};
use crate::size;
-const MIN_PREC: i64 = 0;
const MAX_PREC: i64 = 6;
+/* Remember to edit the cases in Parser.node_span_1() when editing this list! */
#[repr(u8)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum AstType {
- Assign, /* (ident-token, expression) */
- Block, /* (extra-data, _) */
+ Assign, /* (extra-data-lhs, extra-data-rhs) */
+ Block, /* (extra-data, extra-data-len) */
Dereference, /* (lhs, _) */
Empty, /* (_, _) */
FunCall, /* (expression, extra-data) */
- FunProto, /* (extra-data, _) */
+ FunProto, /* (extra-data-args, extra-data-rets) */
Function, /* (prototype, body) */
Identifier, /* (_, _) */
- MultiDefBind, /* (extra-data, _) */
+ MultiDefBind, /* (extra-data-decls, extra-data-exprs) */
Number, /* (_, _) */
Pointer, /* (rhs, _) */
- Return, /* (extra-data, _) */
+ Return, /* (extra-data, extra-data-len) */
String, /* (_, _) */
UnaryOperator, /* (rhs, _) */
BinaryOperator, /* (lhs, rhs) */
@@ -70,39 +69,9 @@ pub struct AstNode {
pub sub: SubNodes,
}
-pub struct DeclData {
- lhs: Vec<(u32, u32)>, /* (ident, type) tuple */
- rhs: Vec<u32>,
-}
-
-pub struct FunCallData {
- args: Vec<u32>,
-}
-
-pub struct FunProtoData {
- args: Vec<(u32, u32)>, /* (ident, type) tuple */
- ret: Vec<u32>,
-}
-
-pub struct BlockData {
- stmts: Vec<u32>,
-}
-
-pub struct ReturnData {
- exprs: Vec<u32>,
-}
-
-pub union ExtraData {
- block: ManuallyDrop<BlockData>,
- decl: ManuallyDrop<DeclData>,
- funcall: ManuallyDrop<FunCallData>,
- funproto: ManuallyDrop<FunProtoData>,
- r#return: ManuallyDrop<ReturnData>,
-}
-
struct Parser<'a> {
ast: Soa<AstNode>,
- extra_data: Vec<ExtraData>,
+ extra_data: Vec<u32>,
cursor: u32,
scratch: Vec<u32>,
tokens: &'a Soa<Token>,
@@ -110,13 +79,6 @@ struct Parser<'a> {
}
impl<'a> Parser<'a> {
- fn err_at_position(&self, i: u32, msg: &str) -> ! {
- for e in &self.errors {
- eprintln!("{e}");
- }
- process::exit(69);
- }
-
fn new(tokens: &'a Soa<Token>) -> Self {
return Self {
ast: Soa::with_capacity(size::kibibytes(10)),
@@ -130,9 +92,12 @@ impl<'a> Parser<'a> {
#[inline(always)]
fn get(&self) -> TokenType {
- return unsafe {
- *self.tokens.kind().get_unchecked(self.cursor as usize)
- };
+ return self.get_at(self.cursor);
+ }
+
+ #[inline(always)]
+ fn get_at(&self, pos: u32) -> TokenType {
+ return unsafe { *self.tokens.kind().get_unchecked(pos as usize) };
}
#[inline(always)]
@@ -165,12 +130,6 @@ impl<'a> Parser<'a> {
}
#[inline(always)]
- fn new_extra_data(&mut self, d: ExtraData) -> u32 {
- self.extra_data.push(d);
- return (self.extra_data.len() - 1) as u32;
- }
-
- #[inline(always)]
fn new_error(&mut self, e: OryxError) {
self.errors.push(e);
}
@@ -180,6 +139,16 @@ impl<'a> Parser<'a> {
while !toks.contains(&self.next()) {}
}
+ fn scratch_guard<F, R>(&mut self, f: F) -> R
+ where
+ F: FnOnce(&mut Self) -> R,
+ {
+ let n = self.scratch.len();
+ let res = f(self);
+ self.scratch.truncate(n);
+ return res;
+ }
+
fn node_span(&self, node: u32) -> (usize, usize) {
let toks = self.ast.tok();
let views = self.tokens.view();
@@ -191,45 +160,51 @@ impl<'a> Parser<'a> {
fn node_span_1(&self, node: u32) -> (u32, u32) {
let SubNodes(_0, _1) = self.ast.sub()[node as usize];
return match self.ast.kind()[node as usize] {
- AstType::Assign => (self.node_span_1(_0).0, self.node_span_1(_1).1),
- AstType::Block => {
- todo!()
+ AstType::Assign => {
+ let lhs =
+ self.node_span_1(self.extra_data[(_0 + 1) as usize]).0;
+ let rhs = self
+ .node_span_1(
+ self.extra_data
+ [(_1 + self.extra_data[_1 as usize]) as usize],
+ )
+ .1;
+ (lhs, rhs)
},
- /* (extra-data, _) */
AstType::Dereference => (self.node_span_1(_0).0, node),
- AstType::Empty => (node, node),
- AstType::FunCall => {
- todo!()
- },
- /* (expression, extra-data) */
AstType::FunProto => {
- todo!()
+ let nargs = self.extra_data[_0 as usize];
+ let nrets = self.extra_data[_1 as usize];
+ let rhs = match (nargs, nrets) {
+ (0, 0) => node,
+ (_, 0) => self.extra_data[(_0 + nargs) as usize],
+ (_, _) => self.extra_data[(_1 + nrets) as usize],
+ };
+ (node, self.node_span_1(rhs).1)
},
- /* (extra-data, _) */
- AstType::Function => {
- todo!()
- },
- /* (prototype, body) */
AstType::Identifier => (node, node),
AstType::MultiDefBind => {
- todo!()
+ let expr = self.extra_data
+ [(_1 + self.extra_data[_1 as usize]) as usize];
+ (node, self.node_span_1(expr).1)
},
- /* (extra-data, _) */
- AstType::Number => (node, node),
AstType::Pointer => (node, self.node_span_1(_0).1),
- AstType::Return => {
- let exprs =
- unsafe { &self.extra_data[_0 as usize].r#return.exprs };
- if exprs.len() == 0 {
+ AstType::Block | AstType::FunCall | AstType::Return => {
+ if _1 == 0 {
(node, node)
} else {
- let last = *exprs.last().unwrap();
- (node, self.node_span_1(last).1)
+ (
+ node,
+ self.node_span_1(
+ self.extra_data[(_0 + _1 - 1) as usize],
+ )
+ .1,
+ )
}
},
- AstType::String => (node, node),
+ AstType::Empty | AstType::Number | AstType::String => (node, node),
AstType::UnaryOperator => (node, self.node_span_1(_0).1),
- AstType::BinaryOperator => {
+ AstType::BinaryOperator | AstType::Function => {
(self.node_span_1(_0).0, self.node_span_1(_1).1)
},
};
@@ -250,7 +225,7 @@ impl<'a> Parser<'a> {
self.new_error(OryxError::new(
self.get_view(),
format!(
- "Expected top-level statement but got {:?}",
+ "expected top-level statement but got {:?}",
self.get(),
),
));
@@ -285,42 +260,131 @@ impl<'a> Parser<'a> {
TokenType::KeywordReturn => 'label: {
let main_tok = self.cursor;
self.next(); /* Consume ‘return’ */
- let exprs = self.parse_expr_list();
+ let exprbeg = match self.parse_expr_list() {
+ Ok(i) => i,
+ Err(e) => {
+ self.new_error(e);
+ syncp = true;
+ break 'label u32::MAX;
+ },
+ };
if self.get_n_move() != TokenType::Semicolon {
self.new_error(OryxError::new(
(
self.get_view_at(main_tok).0,
self.get_view_at(self.cursor - 1).1,
),
- "Expected semicolon after return statement",
+ "expected semicolon after return statement",
));
syncp = true;
break 'label u32::MAX;
}
- let i = self.new_extra_data(ExtraData {
- r#return: ManuallyDrop::new(ReturnData { exprs }),
- });
+
+ let nexprs = self.scratch.len() - exprbeg;
+ let extra_data_beg = self.extra_data.len();
+ for x in self.scratch.drain(exprbeg..) {
+ self.extra_data.push(x);
+ }
self.new_node(AstNode {
kind: AstType::Return,
tok: main_tok,
- sub: SubNodes(i, u32::MAX),
+ sub: SubNodes(extra_data_beg as u32, nexprs as u32),
})
},
t if t.exprp() => {
- let k = self.parse_expr(MIN_PREC);
- if self.get_n_move() != TokenType::Semicolon {
- self.new_error(OryxError::new(
- self.node_span(k),
- "Expected semicolon after expression",
- ));
- syncp = true;
+ /* Här kan vi antigen ha ett uttryck (t.ex. ‘foo()’)
+ * eller en uttyrckslista som används i en tilldelning
+ * (t.ex. ‘x, y = 69 420’) */
+
+ match self.scratch_guard(|p| {
+ let lhs = p.parse_expr_list()?;
+ let nlexprs = p.scratch.len() - lhs;
+
+ if nlexprs == 1 && p.get() != TokenType::Equals {
+ if p.get_at(p.cursor - 1) == TokenType::Comma {
+ return Err(OryxError::new(
+ p.get_view_at(p.cursor - 1),
+ "unexpected comma after expression",
+ ));
+ }
+ if p.get_n_move() != TokenType::Semicolon {
+ let k = p.scratch[lhs];
+ return Err(OryxError::new(
+ p.node_span(k),
+ "expected semicolon after expression",
+ ));
+ }
+ return Ok(p.scratch[lhs]);
+ }
+
+ if p.get_at(p.cursor - 1) == TokenType::Comma {
+ /* Returnera inte felet eftersom återställningen är
+ * implicit */
+ p.new_error(OryxError::new(
+ p.get_view_at(p.cursor - 1),
+ "assignment expression lists do not accept trailing commas",
+ ));
+ }
+
+ let main_tok = p.cursor;
+ if p.get_n_move() != TokenType::Equals {
+ let lexpr = p.scratch[lhs];
+ let rexpr = p.scratch[lhs + nlexprs - 1];
+ return Err(OryxError::new(
+ (p.node_span(lexpr).0, p.node_span(rexpr).1),
+ "expected ‘=’ operator after expression list",
+ ));
+ }
+
+ let rhs = p.parse_expr_list()?;
+ let nrexprs = p.scratch.len() - rhs;
+
+ if nrexprs == 0 {
+ return Err(OryxError::new(
+ p.get_view_at(main_tok),
+ "expected expression(s) on the right-hand side of assignment",
+ ));
+ }
+ if p.get_at(p.cursor - 1) == TokenType::Comma {
+ /* Returnera inte felet eftersom återställningen är
+ * implicit */
+ p.new_error(OryxError::new(
+ p.get_view_at(p.cursor - 1),
+ "assignment expression lists do not accept trailing commas",
+ ));
+ }
+
+ let extra_data_beg = p.extra_data.len();
+ p.extra_data.push(nlexprs as u32);
+ for x in &p.scratch[lhs..rhs] {
+ p.extra_data.push(*x);
+ }
+ p.extra_data.push(nrexprs as u32);
+ for x in &p.scratch[rhs..] {
+ p.extra_data.push(*x);
+ }
+
+ return Ok(p.new_node(AstNode {
+ kind: AstType::Assign,
+ tok: main_tok,
+ sub: SubNodes(
+ extra_data_beg as u32,
+ (extra_data_beg + nlexprs + 1) as u32,
+ ),
+ }));
+ }) {
+ Ok(e) => e,
+ Err(e) => {
+ self.new_error(e);
+ syncp = true;
+ u32::MAX
+ },
}
- k
},
_ => {
self.new_error(OryxError::new(
self.get_view(),
- format!("Expected statement but got {:?}", self.get()),
+ format!("expected statement but got {:?}", self.get()),
));
syncp = true;
u32::MAX
@@ -345,140 +409,172 @@ impl<'a> Parser<'a> {
if self.get_n_move() != TokenType::KeywordDef {
return Err(OryxError::new(
self.get_view_at(self.cursor - 1),
- "Expected ‘def’",
- ));
- }
- let lhs = self.parse_decl_list();
- if lhs.len() == 0 {
- return Err(OryxError::new(
- self.get_view_at(main_tok),
- "Expected an identifier",
+ "expected ‘def’",
));
}
+ return self.scratch_guard(|p| {
+ let lhs = p.parse_decl_list()?;
+ if p.scratch.len() - lhs == 0 {
+ return Err(OryxError::new(
+ p.get_view_at(main_tok),
+ "expected an identifier",
+ ));
+ }
- let t = self.get_n_move();
- if t != TokenType::Equals {
- return Err(if t == TokenType::Semicolon {
- OryxError::new(
- (self.get_view_at(main_tok).0, self.get_view().1),
- "Symbols defined with ‘def’ must be initialized",
- )
- } else {
- OryxError::new(
- self.get_view_at(self.cursor - 1),
- "Expected ‘=’",
- )
- });
- }
+ let t = p.get_n_move();
+ if t != TokenType::Equals {
+ return Err(if t == TokenType::Semicolon {
+ OryxError::new(
+ (p.get_view_at(main_tok).0, p.get_view().1),
+ "symbols defined with ‘def’ must be initialized",
+ )
+ } else {
+ OryxError::new(p.get_view_at(p.cursor - 1), "expected ‘=’")
+ });
+ }
- let rhs = self.parse_expr_list();
- if rhs.len() == 0 {
- return Err(OryxError::new(
- self.get_view_at(self.cursor - 1),
- "Expected expression after ‘=’",
- ));
- }
- if self.get_n_move() != TokenType::Semicolon {
- return Err(OryxError::new(
- self.get_view_at(self.cursor - 1),
- "Expected semicolon",
- ));
- }
+ let rhs = p.parse_expr_list()?;
+ if p.scratch.len() - rhs == 0 {
+ return Err(OryxError::new(
+ p.get_view_at(p.cursor - 1),
+ "expected expression after ‘=’",
+ ));
+ }
+ if p.get_n_move() != TokenType::Semicolon {
+ return Err(OryxError::new(
+ p.get_view_at(p.cursor - 1),
+ "expected semicolon",
+ ));
+ }
+
+ let ndecls = (rhs - lhs) / 2;
+ let nexprs = p.scratch.len() - rhs;
+ let declbeg = p.extra_data.len();
+ let exprbeg = declbeg + ndecls * 2 + 1;
- let i = self.new_extra_data(ExtraData {
- decl: ManuallyDrop::new(DeclData { lhs, rhs }),
+ p.extra_data.push(ndecls as u32);
+ for x in &p.scratch[lhs..rhs] {
+ p.extra_data.push(*x);
+ }
+ p.extra_data.push(nexprs as u32);
+ for x in &p.scratch[rhs..] {
+ p.extra_data.push(*x);
+ }
+
+ return Ok(p.new_node(AstNode {
+ kind: AstType::MultiDefBind,
+ tok: main_tok,
+ sub: SubNodes(declbeg as u32, exprbeg as u32),
+ }));
});
- return Ok(self.new_node(AstNode {
- kind: AstType::MultiDefBind,
- tok: main_tok,
- sub: SubNodes(i as u32, u32::MAX),
- }));
}
- fn parse_func_proto(&mut self) -> u32 {
+ fn parse_func_proto(&mut self) -> Result<u32, OryxError> {
let main_tok = self.cursor;
- /* No params or return */
if self.next() != TokenType::ParenL {
- return self.new_node(AstNode {
- kind: AstType::FunProto,
- tok: main_tok,
- sub: SubNodes::default(),
- });
+ return Err(OryxError::new(
+ self.get_view_at(main_tok),
+ "expected an argument list after the ‘func’ keyword",
+ ));
}
+ let parenl = self.cursor;
self.next(); /* Consume ‘(’ */
- let args = self.parse_decl_list();
+ return self.scratch_guard(|p| {
+ let lhs = p.parse_decl_list()?;
+
+ if p.get_n_move() != TokenType::ParenR {
+ /* TODO: Highlight the entire argument list */
+ return Err(OryxError::new(
+ p.get_view_at(parenl), /* TODO: Is this the right token? */
+ "parameter list missing closing parenthesis",
+ ));
+ }
- if self.get_n_move() != TokenType::ParenR {
- self.err_at_position(
- self.cursor - 1,
- "Expected closing parenthesis",
- );
- }
+ let t = p.get();
+ let rhs = match t {
+ TokenType::ParenL => {
+ let parenl = p.cursor;
+ p.next(); /* Consume ‘(’ */
+ let i = p.parse_expr_list()?;
+ if p.get_n_move() != TokenType::ParenR {
+ /* TODO: Highlight the entire return list */
+ return Err(OryxError::new(
+ p.get_view_at(parenl),
+ "return list missing closing parenthesis",
+ ));
+ }
+ i
+ },
+ _ if t.exprp() => {
+ let k = p.parse_expr(0)?;
+ p.scratch.push(k);
+ p.scratch.len() - 1
+ },
+ _ => p.scratch.len(),
+ };
- let t = self.get();
- let ret = match t {
- TokenType::ParenL => {
- self.next(); /* Consume ‘(’ */
- let xs = self.parse_expr_list();
- if self.get_n_move() != TokenType::ParenR {
- self.err_at_position(
- self.cursor - 1,
- "Expected closing parenthesis",
- );
- }
- xs
- },
- _ if t.exprp() => {
- // TODO: This is really bad. We should probably optimize
- // for the small cases (or use an arena?)
- vec![self.parse_expr(MIN_PREC)]
- },
- _ => Vec::new(), /* Doesn’t allocate */
- };
+ let nargs = rhs - lhs;
+ let nrets = p.scratch.len() - rhs;
+ let argbeg = p.extra_data.len();
+ let retbeg = argbeg + nargs * 2 + 1;
- let i = self.new_extra_data(ExtraData {
- funproto: ManuallyDrop::new(FunProtoData { args, ret }),
- });
- return self.new_node(AstNode {
- kind: AstType::FunProto,
- tok: main_tok,
- sub: SubNodes(i, u32::MAX),
+ p.extra_data.push(nargs as u32);
+ for x in &p.scratch[lhs..rhs] {
+ p.extra_data.push(*x);
+ }
+ p.extra_data.push(nrets as u32);
+ for x in &p.scratch[rhs..] {
+ p.extra_data.push(*x);
+ }
+
+ return Ok(p.new_node(AstNode {
+ kind: AstType::FunProto,
+ tok: main_tok,
+ sub: SubNodes(argbeg as u32, retbeg as u32),
+ }));
});
}
- fn parse_block(&mut self) -> u32 {
+ fn parse_block(&mut self) -> Result<u32, OryxError> {
let main_tok = self.cursor;
if self.get_n_move() != TokenType::BraceL {
- self.err_at_position(self.cursor - 1, "Expected opening brace");
+ return Err(OryxError::new(
+ self.get_view_at(self.cursor - 1),
+ "expected opening brace",
+ ));
}
- let mut stmts = Vec::<u32>::with_capacity(64);
+ let scratch_beg = self.scratch.len();
while self.get() != TokenType::BraceR {
- stmts.push(self.parse_stmt());
+ let k = self.parse_stmt();
+ self.scratch.push(k);
}
self.next(); /* Consume ‘}’ */
- let i = self.new_extra_data(ExtraData {
- block: ManuallyDrop::new(BlockData { stmts }),
- });
- return self.new_node(AstNode {
+
+ let extra_data_beg = self.extra_data.len();
+ let nstmts = (self.scratch.len() - scratch_beg) as u32;
+
+ for x in self.scratch.drain(scratch_beg..) {
+ self.extra_data.push(x);
+ }
+ return Ok(self.new_node(AstNode {
kind: AstType::Block,
tok: main_tok,
- sub: SubNodes(i, u32::MAX),
- });
+ sub: SubNodes(extra_data_beg as u32, nstmts),
+ }));
}
- fn parse_decl_list(&mut self) -> Vec<(u32, u32)> {
+ fn parse_decl_list(&mut self) -> Result<usize, OryxError> {
let scratch_beg = self.scratch.len();
- let (mut nidents, mut nuntyped) = (0, 0);
+ let mut nuntyped = 0;
loop {
if self.get() != TokenType::Identifier {
break;
}
self.scratch.push(self.cursor);
self.scratch.push(u32::MAX);
- nidents += 1;
nuntyped += 1;
match self.next() {
@@ -486,7 +582,7 @@ impl<'a> Parser<'a> {
self.next();
},
t if t.exprp() => {
- let k = self.parse_expr(MIN_PREC);
+ let k = self.parse_expr(0)?;
let len = self.scratch.len();
for i in 0..nuntyped {
self.scratch[len - 1 - 2 * i] = k;
@@ -497,19 +593,20 @@ impl<'a> Parser<'a> {
};
}
- let mut iter = self.scratch.drain(scratch_beg..);
- let mut pairs = Vec::with_capacity(nidents);
- while let (Some(a), Some(b)) = (iter.next(), iter.next()) {
- pairs.push((a, b));
- }
- return pairs;
+ return Ok(scratch_beg);
}
- fn parse_expr_list(&mut self) -> Vec<u32> {
+ fn parse_expr_list(&mut self) -> Result<usize, OryxError> {
let scratch_beg = self.scratch.len();
while self.get().exprp() {
- let k = self.parse_expr(MIN_PREC);
+ let k = match self.parse_expr(0) {
+ Ok(e) => e,
+ Err(e) => {
+ self.scratch.truncate(scratch_beg);
+ return Err(e);
+ },
+ };
self.scratch.push(k);
if self.get() == TokenType::Comma {
self.next();
@@ -518,10 +615,10 @@ impl<'a> Parser<'a> {
}
}
- return self.scratch.drain(scratch_beg..).collect();
+ return Ok(scratch_beg);
}
- fn parse_expr(&mut self, minprec: i64) -> u32 {
+ fn parse_expr(&mut self, minprec: i64) -> Result<u32, OryxError> {
fn getprec(t: TokenType) -> i64 {
match t {
TokenType::ParenL => 6,
@@ -532,13 +629,22 @@ impl<'a> Parser<'a> {
| TokenType::AngleR2
| TokenType::AngleR3
| TokenType::Asterisk
+ | TokenType::Percent
+ | TokenType::Percent2
| TokenType::Slash => 5,
TokenType::Bar
| TokenType::Minus
| TokenType::Plus
| TokenType::Tilde => 4,
- TokenType::AngleL | TokenType::AngleR => 3,
- _ => -1,
+ TokenType::AngleL
+ | TokenType::AngleLEquals
+ | TokenType::AngleR
+ | TokenType::AngleREquals
+ | TokenType::BangEquals
+ | TokenType::Equals2 => 3,
+ TokenType::Ampersand2 => 2,
+ TokenType::Bar2 => 1,
+ _ => 0,
}
}
@@ -568,13 +674,13 @@ impl<'a> Parser<'a> {
})
},
TokenType::Ampersand
- | TokenType::Exclamation
+ | TokenType::Bang
| TokenType::Minus
| TokenType::Plus
| TokenType::Tilde => {
let i = self.cursor;
self.next();
- let rhs = self.parse_expr(MAX_PREC);
+ let rhs = self.parse_expr(MAX_PREC)?;
self.new_node(AstNode {
kind: AstType::UnaryOperator,
tok: i,
@@ -582,13 +688,14 @@ impl<'a> Parser<'a> {
})
},
TokenType::ParenL => {
+ let parenl = self.cursor;
self.next();
- let k = self.parse_expr(MIN_PREC);
+ let k = self.parse_expr(0)?;
if self.get() != TokenType::ParenR {
- self.err_at_position(
- self.cursor,
- "Expected closing parenthesis",
- );
+ return Err(OryxError::new(
+ self.get_view_at(parenl),
+ "expression missing closing parenthesis",
+ ));
}
self.next(); /* Consume ‘)’ */
k
@@ -596,7 +703,7 @@ impl<'a> Parser<'a> {
TokenType::Caret => {
let tok = self.cursor;
self.next();
- let k = self.parse_expr(MAX_PREC);
+ let k = self.parse_expr(MAX_PREC)?;
self.new_node(AstNode {
kind: AstType::Pointer,
tok,
@@ -605,9 +712,9 @@ impl<'a> Parser<'a> {
},
TokenType::KeywordFunc => {
let tok = self.cursor;
- let proto = self.parse_func_proto();
+ let proto = self.parse_func_proto()?;
if self.get() == TokenType::BraceL {
- let body = self.parse_block();
+ let body = self.parse_block()?;
self.new_node(AstNode {
kind: AstType::Function,
tok,
@@ -617,7 +724,12 @@ impl<'a> Parser<'a> {
proto
}
},
- _ => self.err_at_position(self.cursor, "Expected expression"),
+ _ => {
+ return Err(OryxError::new(
+ self.get_view(),
+ "expected expression",
+ ));
+ },
};
loop {
@@ -630,22 +742,30 @@ impl<'a> Parser<'a> {
lhs = match tok {
/* Binop */
TokenType::Ampersand
+ | TokenType::Ampersand2
| TokenType::AmpersandTilde
+ | TokenType::AngleL
| TokenType::AngleL2
| TokenType::AngleL3
+ | TokenType::AngleLEquals
+ | TokenType::AngleR
| TokenType::AngleR2
| TokenType::AngleR3
+ | TokenType::AngleREquals
| TokenType::Asterisk
- | TokenType::Slash
+ | TokenType::BangEquals
| TokenType::Bar
+ | TokenType::Bar2
+ | TokenType::Equals2
| TokenType::Minus
+ | TokenType::Percent
+ | TokenType::Percent2
| TokenType::Plus
- | TokenType::Tilde
- | TokenType::AngleL
- | TokenType::AngleR => {
+ | TokenType::Slash
+ | TokenType::Tilde => {
let i = self.cursor;
self.next();
- let rhs = self.parse_expr(prec);
+ let rhs = self.parse_expr(prec)?;
self.new_node(AstNode {
kind: AstType::BinaryOperator,
tok: i,
@@ -667,17 +787,32 @@ impl<'a> Parser<'a> {
TokenType::ParenL => {
let tok = self.cursor;
self.next();
- let args = self.parse_expr_list();
- if self.get_n_move() != TokenType::ParenR {
- self.err_at_position(self.cursor - 1, "Expected ‘)’");
+ let exprbeg = self.parse_expr_list()?;
+ match self.get_n_move() {
+ TokenType::ParenR => {},
+ TokenType::Comma => {
+ return Err(OryxError::new(
+ self.get_view_at(self.cursor - 1),
+ "empty function parameter",
+ ));
+ },
+ _ => {
+ return Err(OryxError::new(
+ /* TODO: Highlight the entire argument list */
+ self.get_view_at(tok),
+ "function call missing closing parenthesis",
+ ));
+ },
+ };
+ let nexprs = self.scratch.len() - exprbeg;
+ let extra_data_beg = self.extra_data.len();
+ for x in self.scratch.drain(exprbeg..) {
+ self.extra_data.push(x);
}
- let i = self.new_extra_data(ExtraData {
- funcall: ManuallyDrop::new(FunCallData { args }),
- });
self.new_node(AstNode {
kind: AstType::FunCall,
tok,
- sub: SubNodes(lhs, i),
+ sub: SubNodes(extra_data_beg as u32, nexprs as u32),
})
},
@@ -685,13 +820,13 @@ impl<'a> Parser<'a> {
}
}
- return lhs;
+ return Ok(lhs);
}
}
pub fn parse(
tokens: &Soa<Token>,
-) -> Result<(Soa<AstNode>, Vec<ExtraData>), Vec<OryxError>> {
+) -> Result<(Soa<AstNode>, Vec<u32>), Vec<OryxError>> {
let mut p = Parser::new(tokens);
while p.get() != TokenType::Eof {
p.parse_toplevel();