summaryrefslogtreecommitdiff
path: root/oryxc/src
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2026-02-27 11:14:09 +0100
committerThomas Voss <mail@thomasvoss.com> 2026-02-27 11:14:09 +0100
commitdf512a08f3c14f8496b53dd15f30f772df208202 (patch)
tree4a4e510b41b2de43a667118a3e13e496f13c49bc /oryxc/src
parent427e7ac724dfe55f2077cd10eef325df7dea5124 (diff)
Big work to the compiler job system
Diffstat (limited to 'oryxc/src')
-rw-r--r--oryxc/src/compiler.rs59
-rw-r--r--oryxc/src/lexer.rs79
-rw-r--r--oryxc/src/main.rs1
-rw-r--r--oryxc/src/parser.rs32
4 files changed, 111 insertions, 60 deletions
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs
index 05e275f..14919ba 100644
--- a/oryxc/src/compiler.rs
+++ b/oryxc/src/compiler.rs
@@ -1,5 +1,6 @@
use std::ffi::OsString;
use std::iter::IntoIterator;
+use std::mem::MaybeUninit;
use std::sync::Arc;
use std::sync::atomic::{
AtomicUsize,
@@ -7,6 +8,8 @@ use std::sync::atomic::{
};
use std::vec::Vec;
use std::{
+ fs,
+ io,
panic,
thread,
};
@@ -18,14 +21,37 @@ use crossbeam_deque::{
Worker,
};
use dashmap::DashMap;
-
-use crate::Flags;
+use soa_rs::Soa;
+
+use crate::lexer::Token;
+use crate::parser::AstNode;
+use crate::{
+ Flags,
+ err,
+ lexer,
+ parser,
+};
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
-pub struct FileId(u32);
+pub struct FileId(usize);
pub struct FileData {
- name: OsString,
+ name: Arc<OsString>,
+ buffer: Arc<String>,
+ tokens: Arc<MaybeUninit<Soa<Token>>>,
+ ast: Arc<MaybeUninit<Soa<AstNode>>>,
+}
+
+impl FileData {
+ fn new(name: OsString) -> Result<Self, io::Error> {
+ let buffer = fs::read_to_string(&name)?;
+ return Ok(Self {
+ name: name.into(),
+ buffer: buffer.into(),
+ tokens: Arc::new_uninit(),
+ ast: Arc::new_uninit(),
+ });
+ }
}
pub enum Job {
@@ -51,8 +77,12 @@ where
flags,
});
for (i, path) in paths.into_iter().enumerate() {
- let id = FileId(i as u32);
- state.files.insert(id, FileData { name: path.clone() });
+ let id = FileId(i);
+ let data = match FileData::new(path.clone().into()) {
+ Ok(x) => x,
+ Err(e) => err!(e, "{}", path.display()),
+ };
+ state.files.insert(id, data);
state.njobs.fetch_add(1, Ordering::SeqCst);
state.globalq.push(Job::LexAndParse { file: id });
}
@@ -95,7 +125,22 @@ fn worker_loop(
let job = find_task(&queue, &state.globalq, &stealers);
if let Some(job) = job {
match job {
- LexAndParse { file } => {},
+ Job::LexAndParse { file } => {
+ let (name, buffer) = {
+ let fdata = state.files.get(&file).unwrap();
+ (fdata.name.clone(), fdata.buffer.clone())
+ };
+ let (name, buffer) = (name.as_ref(), buffer.as_ref());
+ let tokens = match lexer::tokenize(name, buffer) {
+ Ok(xs) => xs,
+ Err(errs) => todo!(),
+ };
+ let (ast, _extra_data) = parser::parse(name, &tokens);
+ let mut fdata = state.files.get_mut(&file).unwrap();
+ fdata.tokens = Arc::from(MaybeUninit::new(tokens));
+ fdata.ast = Arc::from(MaybeUninit::new(ast));
+ },
+ _ => todo!(),
}
state.njobs.fetch_sub(1, Ordering::SeqCst);
diff --git a/oryxc/src/lexer.rs b/oryxc/src/lexer.rs
index 531593d..6457cfd 100644
--- a/oryxc/src/lexer.rs
+++ b/oryxc/src/lexer.rs
@@ -1,5 +1,7 @@
+use std::borrow::Cow;
use std::ffi::OsStr;
use std::fmt::Display;
+use std::vec::Vec;
use std::{
iter,
mem,
@@ -8,16 +10,16 @@ use std::{
use phf;
use soa_rs::{
- self,
+ Soa,
Soars,
};
use crate::{
errors,
- size,
unicode,
};
+#[allow(dead_code)]
#[repr(u8)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum TokenType {
@@ -87,17 +89,31 @@ impl TokenType {
}
}
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct Span(usize, usize);
+
#[derive(Soars)]
#[soa_derive(Debug)]
-pub struct Token<'a> {
+pub struct Token {
pub kind: TokenType,
- pub view: &'a str,
+ pub view: Span,
}
-pub struct TokenizedBuffer<'a> {
- pub tokens: soa_rs::Soa<Token<'a>>,
- pub buffer: &'a str,
- pub filename: Option<&'a OsStr>,
+pub struct Error {
+ pub pos: usize,
+ pub msg: Cow<'static, str>,
+}
+
+impl Error {
+ fn new<T>(pos: usize, msg: T) -> Self
+ where
+ T: Into<Cow<'static, str>>,
+ {
+ return Self {
+ pos,
+ msg: msg.into(),
+ };
+ }
}
struct LexerContext<'a> {
@@ -105,12 +121,12 @@ struct LexerContext<'a> {
pos_b: usize, /* Pos [b]efore char */
chars: iter::Peekable<str::Chars<'a>>,
string: &'a str,
- filename: Option<&'a OsStr>,
+ filename: &'a OsStr,
expect_punct_p: bool,
}
impl<'a> LexerContext<'a> {
- fn new(filename: Option<&'a OsStr>, string: &'a str) -> Self {
+ fn new(filename: &'a OsStr, string: &'a str) -> Self {
return Self {
pos_a: 0,
pos_b: 0,
@@ -138,7 +154,7 @@ impl<'a> LexerContext<'a> {
where
S: Display,
{
- errors::err_at_position(self.filename.unwrap_or(OsStr::new("-")), s);
+ errors::err_at_position(self.filename, s);
}
#[inline(always)]
@@ -157,11 +173,8 @@ static KEYWORDS: phf::Map<&'static str, TokenType> = phf::phf_map! {
"return" => TokenType::KeywordReturn,
};
-pub fn tokenize<'a>(
- filename: Option<&'a OsStr>,
- s: &'a str,
-) -> TokenizedBuffer<'a> {
- let mut toks = soa_rs::Soa::<Token>::with_capacity(size::kibibytes(10));
+pub fn tokenize(filename: &OsStr, s: &str) -> Result<Soa<Token>, Vec<Error>> {
+ let mut toks = Soa::<Token>::with_capacity(s.len() / 2);
let mut ctx = LexerContext::new(filename, s);
while let Some(c) = ctx.next() {
@@ -182,7 +195,7 @@ pub fn tokenize<'a>(
};
Some(Token {
kind,
- view: &s[i..ctx.pos_a],
+ view: Span(i, ctx.pos_a),
})
},
'>' if ctx.peek().is_some_and(|c| c == '>') => {
@@ -195,21 +208,21 @@ pub fn tokenize<'a>(
};
Some(Token {
kind,
- view: &s[i..ctx.pos_a],
+ view: Span(i, ctx.pos_a),
})
},
'&' if ctx.peek().is_some_and(|c| c == '~') => {
ctx.next(); /* Consume ‘~’ */
Some(Token {
kind: TokenType::AmpersandTilde,
- view: &s[i..j + 1],
+ view: Span(i, j + 1),
})
},
'!' | '&' | '(' | ')' | '*' | '+' | ',' | '-' | '/' | ';' | '<'
| '=' | '>' | '[' | ']' | '^' | '{' | '|' | '}' | '~' | '…' => {
Some(Token {
kind: unsafe { mem::transmute(c as u8) },
- view: &s[i..j],
+ view: Span(i, j),
})
},
'#' => {
@@ -246,13 +259,9 @@ pub fn tokenize<'a>(
toks.push(Token {
kind: TokenType::Eof,
- view: &s[s.len() - 1..],
+ view: Span(s.len() - 1, s.len()),
});
- return TokenizedBuffer {
- tokens: toks,
- buffer: s,
- filename,
- };
+ return Ok(toks);
}
fn skip_comment<'a>(ctx: &mut LexerContext<'a>) {
@@ -277,7 +286,7 @@ fn skip_comment<'a>(ctx: &mut LexerContext<'a>) {
ctx.err_at_position("Unterminated comment");
}
-fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> {
+fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token {
let i = ctx.pos_b;
let alphabet = match ctx.next() {
Some('b') => "01",
@@ -305,14 +314,14 @@ fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> {
},
None => ctx.err_at_position("Expected number after base specifier"),
};
- tok.view = &ctx.string[i..ctx.pos_a];
+ tok.view = Span(i, ctx.pos_a);
return tok;
}
fn tokenize_number<'a>(
ctx: &mut LexerContext<'a>,
alphabet: &'static str,
-) -> Token<'a> {
+) -> Token {
let i = ctx.pos_b;
span_raw_number(ctx, alphabet, true);
@@ -332,7 +341,7 @@ fn tokenize_number<'a>(
return Token {
kind: TokenType::Number,
- view: &ctx.string[i..ctx.pos_a],
+ view: Span(i, ctx.pos_a),
};
}
@@ -396,7 +405,7 @@ fn span_raw_number<'a>(
}
}
-fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> {
+fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token {
let i = ctx.pos_b;
loop {
if let Some(c) = ctx.next() {
@@ -409,17 +418,17 @@ fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> {
}
return Token {
kind: TokenType::String,
- view: &ctx.string[i..ctx.pos_a],
+ view: Span(i, ctx.pos_a),
};
}
-fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> {
+fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token {
let i = ctx.pos_b;
while ctx.peek().is_some_and(unicode::xid_continue_p) {
ctx.next();
}
- let view = &ctx.string[i..ctx.pos_a];
- let kind = match KEYWORDS.get(view) {
+ let view = Span(i, ctx.pos_a);
+ let kind = match KEYWORDS.get(&ctx.string[view.0..view.1]) {
Some(kind) => kind.clone(),
None => TokenType::Identifier,
};
diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs
index 298093d..3f4be0f 100644
--- a/oryxc/src/main.rs
+++ b/oryxc/src/main.rs
@@ -10,7 +10,6 @@ mod unicode;
use std::ffi::OsString;
use std::{
env,
- fs,
process,
thread,
};
diff --git a/oryxc/src/parser.rs b/oryxc/src/parser.rs
index 212d0db..d726f8d 100644
--- a/oryxc/src/parser.rs
+++ b/oryxc/src/parser.rs
@@ -9,8 +9,8 @@ use soa_rs::{
};
use crate::lexer::{
+ Token,
TokenType,
- TokenizedBuffer,
};
use crate::{
errors,
@@ -87,33 +87,31 @@ pub union ExtraData {
r#return: ManuallyDrop<ReturnData>,
}
-struct Parser<'a, 'b> {
+struct Parser<'a> {
ast: Soa<AstNode>,
extra_data: Vec<ExtraData>,
- tokbuf: &'a TokenizedBuffer<'b>,
cursor: u32,
scratch: Vec<u32>,
+ tokens: &'a Soa<Token>,
+ filename: &'a OsStr,
}
-impl<'a, 'b> Parser<'a, 'b> {
- fn new(tokbuf: &'a TokenizedBuffer<'b>) -> Self {
+impl<'a> Parser<'a> {
+ fn new(filename: &'a OsStr, tokens: &'a Soa<Token>) -> Self {
return Self {
ast: Soa::with_capacity(size::kibibytes(10)),
extra_data: Vec::with_capacity(size::kibibytes(1)),
- tokbuf,
cursor: 0,
scratch: Vec::with_capacity(64),
+ tokens,
+ filename,
};
}
#[inline(always)]
fn get(&self) -> TokenType {
return unsafe {
- *self
- .tokbuf
- .tokens
- .kind()
- .get_unchecked(self.cursor as usize)
+ *self.tokens.kind().get_unchecked(self.cursor as usize)
};
}
@@ -146,10 +144,7 @@ impl<'a, 'b> Parser<'a, 'b> {
where
T: Display,
{
- errors::err_at_position(
- self.tokbuf.filename.unwrap_or(OsStr::new("-")),
- s,
- );
+ errors::err_at_position(self.filename, s);
}
fn parse_toplevel(&mut self) {
@@ -535,8 +530,11 @@ impl<'a, 'b> Parser<'a, 'b> {
}
}
-pub fn parse(tokbuf: &TokenizedBuffer) -> (Soa<AstNode>, Vec<ExtraData>) {
- let mut p = Parser::new(tokbuf);
+pub fn parse(
+ filename: &OsStr,
+ tokens: &Soa<Token>,
+) -> (Soa<AstNode>, Vec<ExtraData>) {
+ let mut p = Parser::new(filename, tokens);
while p.get() != TokenType::Eof {
p.parse_toplevel();
}