diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2026-02-27 11:14:09 +0100 |
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2026-02-27 11:14:09 +0100 |
| commit | df512a08f3c14f8496b53dd15f30f772df208202 (patch) | |
| tree | 4a4e510b41b2de43a667118a3e13e496f13c49bc /oryxc | |
| parent | 427e7ac724dfe55f2077cd10eef325df7dea5124 (diff) | |
Big work to the compiler job system
Diffstat (limited to 'oryxc')
| -rw-r--r-- | oryxc/src/compiler.rs | 59 | ||||
| -rw-r--r-- | oryxc/src/lexer.rs | 79 | ||||
| -rw-r--r-- | oryxc/src/main.rs | 1 | ||||
| -rw-r--r-- | oryxc/src/parser.rs | 32 |
4 files changed, 111 insertions, 60 deletions
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs index 05e275f..14919ba 100644 --- a/oryxc/src/compiler.rs +++ b/oryxc/src/compiler.rs @@ -1,5 +1,6 @@ use std::ffi::OsString; use std::iter::IntoIterator; +use std::mem::MaybeUninit; use std::sync::Arc; use std::sync::atomic::{ AtomicUsize, @@ -7,6 +8,8 @@ use std::sync::atomic::{ }; use std::vec::Vec; use std::{ + fs, + io, panic, thread, }; @@ -18,14 +21,37 @@ use crossbeam_deque::{ Worker, }; use dashmap::DashMap; - -use crate::Flags; +use soa_rs::Soa; + +use crate::lexer::Token; +use crate::parser::AstNode; +use crate::{ + Flags, + err, + lexer, + parser, +}; #[derive(Clone, Copy, Eq, Hash, PartialEq)] -pub struct FileId(u32); +pub struct FileId(usize); pub struct FileData { - name: OsString, + name: Arc<OsString>, + buffer: Arc<String>, + tokens: Arc<MaybeUninit<Soa<Token>>>, + ast: Arc<MaybeUninit<Soa<AstNode>>>, +} + +impl FileData { + fn new(name: OsString) -> Result<Self, io::Error> { + let buffer = fs::read_to_string(&name)?; + return Ok(Self { + name: name.into(), + buffer: buffer.into(), + tokens: Arc::new_uninit(), + ast: Arc::new_uninit(), + }); + } } pub enum Job { @@ -51,8 +77,12 @@ where flags, }); for (i, path) in paths.into_iter().enumerate() { - let id = FileId(i as u32); - state.files.insert(id, FileData { name: path.clone() }); + let id = FileId(i); + let data = match FileData::new(path.clone().into()) { + Ok(x) => x, + Err(e) => err!(e, "{}", path.display()), + }; + state.files.insert(id, data); state.njobs.fetch_add(1, Ordering::SeqCst); state.globalq.push(Job::LexAndParse { file: id }); } @@ -95,7 +125,22 @@ fn worker_loop( let job = find_task(&queue, &state.globalq, &stealers); if let Some(job) = job { match job { - LexAndParse { file } => {}, + Job::LexAndParse { file } => { + let (name, buffer) = { + let fdata = state.files.get(&file).unwrap(); + (fdata.name.clone(), fdata.buffer.clone()) + }; + let (name, buffer) = (name.as_ref(), buffer.as_ref()); + let tokens = match lexer::tokenize(name, buffer) { + Ok(xs) => xs, + Err(errs) => todo!(), + }; + let (ast, _extra_data) = parser::parse(name, &tokens); + let mut fdata = state.files.get_mut(&file).unwrap(); + fdata.tokens = Arc::from(MaybeUninit::new(tokens)); + fdata.ast = Arc::from(MaybeUninit::new(ast)); + }, + _ => todo!(), } state.njobs.fetch_sub(1, Ordering::SeqCst); diff --git a/oryxc/src/lexer.rs b/oryxc/src/lexer.rs index 531593d..6457cfd 100644 --- a/oryxc/src/lexer.rs +++ b/oryxc/src/lexer.rs @@ -1,5 +1,7 @@ +use std::borrow::Cow; use std::ffi::OsStr; use std::fmt::Display; +use std::vec::Vec; use std::{ iter, mem, @@ -8,16 +10,16 @@ use std::{ use phf; use soa_rs::{ - self, + Soa, Soars, }; use crate::{ errors, - size, unicode, }; +#[allow(dead_code)] #[repr(u8)] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum TokenType { @@ -87,17 +89,31 @@ impl TokenType { } } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct Span(usize, usize); + #[derive(Soars)] #[soa_derive(Debug)] -pub struct Token<'a> { +pub struct Token { pub kind: TokenType, - pub view: &'a str, + pub view: Span, } -pub struct TokenizedBuffer<'a> { - pub tokens: soa_rs::Soa<Token<'a>>, - pub buffer: &'a str, - pub filename: Option<&'a OsStr>, +pub struct Error { + pub pos: usize, + pub msg: Cow<'static, str>, +} + +impl Error { + fn new<T>(pos: usize, msg: T) -> Self + where + T: Into<Cow<'static, str>>, + { + return Self { + pos, + msg: msg.into(), + }; + } } struct LexerContext<'a> { @@ -105,12 +121,12 @@ struct LexerContext<'a> { pos_b: usize, /* Pos [b]efore char */ chars: iter::Peekable<str::Chars<'a>>, string: &'a str, - filename: Option<&'a OsStr>, + filename: &'a OsStr, expect_punct_p: bool, } impl<'a> LexerContext<'a> { - fn new(filename: Option<&'a OsStr>, string: &'a str) -> Self { + fn new(filename: &'a OsStr, string: &'a str) -> Self { return Self { pos_a: 0, pos_b: 0, @@ -138,7 +154,7 @@ impl<'a> LexerContext<'a> { where S: Display, { - errors::err_at_position(self.filename.unwrap_or(OsStr::new("-")), s); + errors::err_at_position(self.filename, s); } #[inline(always)] @@ -157,11 +173,8 @@ static KEYWORDS: phf::Map<&'static str, TokenType> = phf::phf_map! { "return" => TokenType::KeywordReturn, }; -pub fn tokenize<'a>( - filename: Option<&'a OsStr>, - s: &'a str, -) -> TokenizedBuffer<'a> { - let mut toks = soa_rs::Soa::<Token>::with_capacity(size::kibibytes(10)); +pub fn tokenize(filename: &OsStr, s: &str) -> Result<Soa<Token>, Vec<Error>> { + let mut toks = Soa::<Token>::with_capacity(s.len() / 2); let mut ctx = LexerContext::new(filename, s); while let Some(c) = ctx.next() { @@ -182,7 +195,7 @@ pub fn tokenize<'a>( }; Some(Token { kind, - view: &s[i..ctx.pos_a], + view: Span(i, ctx.pos_a), }) }, '>' if ctx.peek().is_some_and(|c| c == '>') => { @@ -195,21 +208,21 @@ pub fn tokenize<'a>( }; Some(Token { kind, - view: &s[i..ctx.pos_a], + view: Span(i, ctx.pos_a), }) }, '&' if ctx.peek().is_some_and(|c| c == '~') => { ctx.next(); /* Consume ‘~’ */ Some(Token { kind: TokenType::AmpersandTilde, - view: &s[i..j + 1], + view: Span(i, j + 1), }) }, '!' | '&' | '(' | ')' | '*' | '+' | ',' | '-' | '/' | ';' | '<' | '=' | '>' | '[' | ']' | '^' | '{' | '|' | '}' | '~' | '…' => { Some(Token { kind: unsafe { mem::transmute(c as u8) }, - view: &s[i..j], + view: Span(i, j), }) }, '#' => { @@ -246,13 +259,9 @@ pub fn tokenize<'a>( toks.push(Token { kind: TokenType::Eof, - view: &s[s.len() - 1..], + view: Span(s.len() - 1, s.len()), }); - return TokenizedBuffer { - tokens: toks, - buffer: s, - filename, - }; + return Ok(toks); } fn skip_comment<'a>(ctx: &mut LexerContext<'a>) { @@ -277,7 +286,7 @@ fn skip_comment<'a>(ctx: &mut LexerContext<'a>) { ctx.err_at_position("Unterminated comment"); } -fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> { +fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token { let i = ctx.pos_b; let alphabet = match ctx.next() { Some('b') => "01", @@ -305,14 +314,14 @@ fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> { }, None => ctx.err_at_position("Expected number after base specifier"), }; - tok.view = &ctx.string[i..ctx.pos_a]; + tok.view = Span(i, ctx.pos_a); return tok; } fn tokenize_number<'a>( ctx: &mut LexerContext<'a>, alphabet: &'static str, -) -> Token<'a> { +) -> Token { let i = ctx.pos_b; span_raw_number(ctx, alphabet, true); @@ -332,7 +341,7 @@ fn tokenize_number<'a>( return Token { kind: TokenType::Number, - view: &ctx.string[i..ctx.pos_a], + view: Span(i, ctx.pos_a), }; } @@ -396,7 +405,7 @@ fn span_raw_number<'a>( } } -fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> { +fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token { let i = ctx.pos_b; loop { if let Some(c) = ctx.next() { @@ -409,17 +418,17 @@ fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> { } return Token { kind: TokenType::String, - view: &ctx.string[i..ctx.pos_a], + view: Span(i, ctx.pos_a), }; } -fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> { +fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token { let i = ctx.pos_b; while ctx.peek().is_some_and(unicode::xid_continue_p) { ctx.next(); } - let view = &ctx.string[i..ctx.pos_a]; - let kind = match KEYWORDS.get(view) { + let view = Span(i, ctx.pos_a); + let kind = match KEYWORDS.get(&ctx.string[view.0..view.1]) { Some(kind) => kind.clone(), None => TokenType::Identifier, }; diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs index 298093d..3f4be0f 100644 --- a/oryxc/src/main.rs +++ b/oryxc/src/main.rs @@ -10,7 +10,6 @@ mod unicode; use std::ffi::OsString; use std::{ env, - fs, process, thread, }; diff --git a/oryxc/src/parser.rs b/oryxc/src/parser.rs index 212d0db..d726f8d 100644 --- a/oryxc/src/parser.rs +++ b/oryxc/src/parser.rs @@ -9,8 +9,8 @@ use soa_rs::{ }; use crate::lexer::{ + Token, TokenType, - TokenizedBuffer, }; use crate::{ errors, @@ -87,33 +87,31 @@ pub union ExtraData { r#return: ManuallyDrop<ReturnData>, } -struct Parser<'a, 'b> { +struct Parser<'a> { ast: Soa<AstNode>, extra_data: Vec<ExtraData>, - tokbuf: &'a TokenizedBuffer<'b>, cursor: u32, scratch: Vec<u32>, + tokens: &'a Soa<Token>, + filename: &'a OsStr, } -impl<'a, 'b> Parser<'a, 'b> { - fn new(tokbuf: &'a TokenizedBuffer<'b>) -> Self { +impl<'a> Parser<'a> { + fn new(filename: &'a OsStr, tokens: &'a Soa<Token>) -> Self { return Self { ast: Soa::with_capacity(size::kibibytes(10)), extra_data: Vec::with_capacity(size::kibibytes(1)), - tokbuf, cursor: 0, scratch: Vec::with_capacity(64), + tokens, + filename, }; } #[inline(always)] fn get(&self) -> TokenType { return unsafe { - *self - .tokbuf - .tokens - .kind() - .get_unchecked(self.cursor as usize) + *self.tokens.kind().get_unchecked(self.cursor as usize) }; } @@ -146,10 +144,7 @@ impl<'a, 'b> Parser<'a, 'b> { where T: Display, { - errors::err_at_position( - self.tokbuf.filename.unwrap_or(OsStr::new("-")), - s, - ); + errors::err_at_position(self.filename, s); } fn parse_toplevel(&mut self) { @@ -535,8 +530,11 @@ impl<'a, 'b> Parser<'a, 'b> { } } -pub fn parse(tokbuf: &TokenizedBuffer) -> (Soa<AstNode>, Vec<ExtraData>) { - let mut p = Parser::new(tokbuf); +pub fn parse( + filename: &OsStr, + tokens: &Soa<Token>, +) -> (Soa<AstNode>, Vec<ExtraData>) { + let mut p = Parser::new(filename, tokens); while p.get() != TokenType::Eof { p.parse_toplevel(); } |