diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2026-03-04 20:31:24 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-03-04 20:31:24 +0100 |
| commit | 2a18c3b5841a8bd7ff1776c9801fd2d50e35ba10 (patch) | |
| tree | 485d5c22216869e18215d01caab0d5dd09f459bb /oryxc | |
| parent | ed3258836d0f4e806352ce60bec65a1ea26c8987 (diff) | |
| parent | ff258d9ce16fb99ef2fe3cfbae65634b98f782d2 (diff) | |
Merge pull request #1 from romirk/romirk/misc
vec supremacy
Diffstat (limited to 'oryxc')
| -rw-r--r-- | oryxc/Cargo.toml | 3 | ||||
| -rw-r--r-- | oryxc/src/compiler.rs | 292 | ||||
| -rw-r--r-- | oryxc/src/errors.rs | 14 | ||||
| -rw-r--r-- | oryxc/src/main.rs | 124 |
4 files changed, 216 insertions, 217 deletions
diff --git a/oryxc/Cargo.toml b/oryxc/Cargo.toml index 1b418f3..267e889 100644 --- a/oryxc/Cargo.toml +++ b/oryxc/Cargo.toml @@ -5,9 +5,8 @@ edition = "2024" [dependencies] crossbeam-deque = "0.8.6" -dashmap = "6.1.0" # icu = { version = "2.1.1", features = ["compiled_data"] } -lexopt = "0.3.2" +clap = { version = "4", features = ["derive"] } # num-rational = "0.4.2" phf = { version = "0.13.1", features = ["macros"] } soa-rs = "0.9.1" diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs index 8fdd53f..0d1c65d 100644 --- a/oryxc/src/compiler.rs +++ b/oryxc/src/compiler.rs @@ -1,22 +1,20 @@ use std::ffi::OsString; use std::io::{ self, + Read, Write, }; -use std::iter::{ - self, - IntoIterator, -}; -use std::mem::MaybeUninit; -use std::sync::Arc; +use std::iter::once; use std::sync::atomic::{ AtomicUsize, Ordering, }; -use std::vec::Vec; +use std::sync::{ + Arc, + OnceLock, +}; use std::{ fs, - panic, process, thread, }; @@ -27,7 +25,6 @@ use crossbeam_deque::{ Stealer, Worker, }; -use dashmap::DashMap; use soa_rs::Soa; use crate::errors::OryxError; @@ -44,64 +41,101 @@ use crate::{ pub struct FileId(usize); pub struct FileData { - name: Arc<OsString>, - buffer: Arc<String>, - tokens: Arc<MaybeUninit<Soa<Token>>>, - ast: Arc<MaybeUninit<Soa<AstNode>>>, - extra_data: Arc<MaybeUninit<Vec<u32>>>, + pub name: OsString, + pub buffer: String, + pub tokens: OnceLock<Soa<Token>>, + pub ast: OnceLock<Soa<AstNode>>, + pub extra_data: OnceLock<Vec<u32>>, } impl FileData { + /// Read a source file from disk and create a new [`FileData`]. fn new(name: OsString) -> Result<Self, io::Error> { const PAD: [u8; 64] = [0; 64]; /* 512 bits */ + // Pre-allocate to avoid reallocation when appending padding. // Append extra data to the end so that we can safely read past - // instead of branching on length - let mut buffer = fs::read_to_string(&name)?; - buffer.push_str(unsafe { str::from_utf8_unchecked(&PAD) }); - - return Ok(Self { - name: name.into(), - buffer: buffer.into(), - tokens: Arc::new_uninit(), - ast: Arc::new_uninit(), - extra_data: Arc::new_uninit(), - }); + // instead of branching on length. + let size = fs::metadata(&name)?.len() as usize; + let mut buffer = String::with_capacity(size + PAD.len()); + fs::File::open(&name)?.read_to_string(&mut buffer)?; + buffer.push_str(unsafe { std::str::from_utf8_unchecked(&PAD) }); + + Ok(Self { + name, + buffer, + tokens: OnceLock::new(), + ast: OnceLock::new(), + extra_data: OnceLock::new(), + }) } } +#[allow(dead_code)] pub enum Job { - Lex { file: FileId }, - Parse { file: FileId }, - ResolveSymbols { file: FileId }, + Lex { file: FileId, fdata: Arc<FileData> }, + Parse { file: FileId, fdata: Arc<FileData> }, + ResolveSymbols { file: FileId, fdata: Arc<FileData> }, } pub struct CompilerState { - pub files: DashMap<FileId, FileData>, - pub globalq: Injector<Job>, - pub njobs: AtomicUsize, - pub flags: Flags, + #[allow(dead_code)] + pub files: Vec<Arc<FileData>>, + pub globalq: Injector<Job>, + pub njobs: AtomicUsize, + pub flags: Flags, + pub worker_threads: OnceLock<Box<[thread::Thread]>>, +} + +impl CompilerState { + /// Unpark all worker threads. + fn wake_all(&self) { + if let Some(threads) = self.worker_threads.get() { + for t in threads.iter() { + t.unpark(); + } + } + } + + /// Push a job onto a worker's local queue and wake all threads. + fn push_job(&self, queue: &Worker<Job>, job: Job) { + queue.push(job); + self.wake_all(); + } } +/// Initialize compiler state and drive all source files through the pipeline. pub fn start<T>(paths: T, flags: Flags) where T: IntoIterator<Item = OsString>, { + let mut files = Vec::new(); + let mut initial_jobs = Vec::new(); + + for (i, path) in paths.into_iter().enumerate() { + let id = FileId(i); + + // take ownership of the OsString so we can store it in FileData without + // cloning + let display = path.to_string_lossy().into_owned(); + let fdata = Arc::new( + FileData::new(path).unwrap_or_else(|e| err!(e, "{}", display)), + ); + files.push(Arc::clone(&fdata)); + initial_jobs.push(Job::Lex { file: id, fdata }); + } + + let njobs = initial_jobs.len(); let state = Arc::new(CompilerState { - files: DashMap::new(), + files, globalq: Injector::new(), - njobs: AtomicUsize::new(0), + njobs: AtomicUsize::new(njobs), flags, + worker_threads: OnceLock::new(), }); - for (i, path) in paths.into_iter().enumerate() { - let id = FileId(i); - let data = match FileData::new(path.clone().into()) { - Ok(x) => x, - Err(e) => err!(e, "{}", path.display()), - }; - state.files.insert(id, data); - state.njobs.fetch_add(1, Ordering::Relaxed); - state.globalq.push(Job::Lex { file: id }); + + for job in initial_jobs { + state.globalq.push(job); } let mut workers = Vec::with_capacity(flags.threads); @@ -112,123 +146,99 @@ where workers.push(w); } - let mut threads = Vec::with_capacity(flags.threads); let stealer_view: Arc<[_]> = Arc::from(stealers); + let handles: Vec<_> = workers + .into_iter() + .enumerate() + .map(|(id, w)| { + let stealer_view = Arc::clone(&stealer_view); + let state = Arc::clone(&state); + thread::spawn(move || worker_loop(id, state, w, stealer_view)) + }) + .collect(); - for (id, w) in workers.into_iter().enumerate() { - let stealer_view = Arc::clone(&stealer_view); - let state = Arc::clone(&state); - threads.push(thread::spawn(move || { - worker_loop(id, state, w, stealer_view); - })); - } + let worker_threads: Box<[thread::Thread]> = + handles.iter().map(|h| h.thread().clone()).collect(); + let _ = state.worker_threads.set(worker_threads); - for t in threads { - t.join().unwrap_or_else(|e| panic::resume_unwind(e)); - } -} - -macro_rules! fdata_read { - ($state:expr, $file:expr, $($field:ident),+ $(,)?) => { - #[allow(unused_parens)] - let ($($field),+) = { - let fdata = $state.files.get(&$file).unwrap(); - ($(fdata.$field.clone()),+) - }; - }; -} - -macro_rules! fdata_write { - ($state:expr, $file:expr, $($field:ident),+ $(,)?) => { - { - let mut fdata = $state.files.get_mut(&$file).unwrap(); - $( - fdata.$field = Arc::from(MaybeUninit::new($field)); - )+ + for h in handles { + if let Err(e) = h.join() { + std::panic::resume_unwind(e) } - }; -} - -fn emit_errors<T>(state: Arc<CompilerState>, file: FileId, errors: T) -where - T: IntoIterator<Item = OryxError>, -{ - fdata_read!(state, file, name, buffer); - for e in errors.into_iter() { - e.report(name.as_ref(), buffer.as_ref()); } } +/// Steal and execute jobs until all work is complete. fn worker_loop( - id: usize, + _id: usize, state: Arc<CompilerState>, queue: Worker<Job>, stealers: Arc<[Stealer<Job>]>, ) { loop { - if state.njobs.load(Ordering::Relaxed) == 0 { + if state.njobs.load(Ordering::Acquire) == 0 { break; } - let job = find_task(&queue, &state.globalq, &stealers); - if let Some(job) = job { - match job { - Job::Lex { file } => { - fdata_read!(state, file, buffer); - let tokens = match lexer::tokenize(buffer.as_ref()) { - Ok(xs) => xs, - Err(e) => { - emit_errors(state.clone(), file, iter::once(e)); - process::exit(1); - }, - }; - - if state.flags.debug_lexer { - let mut handle = io::stderr().lock(); - for t in tokens.iter() { - let _ = write!(handle, "{t:?}\n"); - } + let Some(job) = find_task(&queue, &state.globalq, &stealers) else { + thread::park(); + continue; + }; + + match job { + Job::Lex { file, fdata } => { + let tokens = + lexer::tokenize(&fdata.buffer).unwrap_or_else(|e| { + emit_errors(&fdata, once(e)); + process::exit(1) + }); + + if state.flags.debug_lexer { + let mut handle = io::stderr().lock(); + for t in tokens.iter() { + let _ = write!(handle, "{t:?}\n"); } + } + + fdata.tokens.set(tokens).unwrap(); + state.njobs.fetch_add(1, Ordering::Relaxed); + state.push_job(&queue, Job::Parse { file, fdata }); + }, + Job::Parse { file, fdata } => { + let (ast, extra_data) = parser::parse( + fdata.tokens.get().unwrap(), + ) + .unwrap_or_else(|errs| { + emit_errors(&fdata, errs); + process::exit(1) + }); - fdata_write!(state, file, tokens); - state.njobs.fetch_add(1, Ordering::Relaxed); - queue.push(Job::Parse { file }); - }, - Job::Parse { file } => { - fdata_read!(state, file, tokens); - let (ast, extra_data) = match parser::parse( - unsafe { tokens.assume_init() }.as_ref(), - ) { - Ok(xs) => xs, - Err(errs) => { - emit_errors(state.clone(), file, errs); - process::exit(1); - }, - }; - - if state.flags.debug_parser { - let mut handle = io::stderr().lock(); - for n in ast.iter() { - let _ = write!(handle, "{n:?}\n"); - } + if state.flags.debug_parser { + let mut handle = io::stderr().lock(); + for n in ast.iter() { + let _ = write!(handle, "{n:?}\n"); } + } - fdata_write!(state, file, ast, extra_data); - state.njobs.fetch_add(1, Ordering::Relaxed); - queue.push(Job::ResolveSymbols { file }); - }, - Job::ResolveSymbols { file } => { - err!("not implemented"); - }, - } + fdata.ast.set(ast).unwrap(); + fdata.extra_data.set(extra_data).unwrap(); + state.njobs.fetch_add(1, Ordering::Relaxed); + state.push_job(&queue, Job::ResolveSymbols { file, fdata }); + }, + Job::ResolveSymbols { file: _, fdata: _ } => { + err!("not implemented"); + }, + } - state.njobs.fetch_sub(1, Ordering::Relaxed); - } else { - thread::yield_now(); + if state.njobs.fetch_sub(1, Ordering::Release) == 1 { + // njobs is 0; wake all threads so they can observe the termination + // condition and exit. + state.wake_all(); } } } +/// Get next available job or steal from the global queue or peers if local queue is empty. fn find_task( localq: &Worker<Job>, globalq: &Injector<Job>, @@ -256,5 +266,15 @@ fn find_task( } } - return None; + None +} + +/// Print all errors to stderr using the file's name and source buffer. +fn emit_errors<T>(fdata: &FileData, errors: T) +where + T: IntoIterator<Item = OryxError>, +{ + for e in errors { + e.report(&fdata.name, &fdata.buffer); + } } diff --git a/oryxc/src/errors.rs b/oryxc/src/errors.rs index 290abc2..4ad88d6 100644 --- a/oryxc/src/errors.rs +++ b/oryxc/src/errors.rs @@ -12,7 +12,10 @@ use std::fmt::{ }; use std::io::Write; use std::path::Path; -use std::sync::OnceLock; +use std::sync::{ + LazyLock, + OnceLock, +}; use std::{ env, io, @@ -25,8 +28,9 @@ use crate::unicode; const TAB_AS_SPACES: &'static str = " "; const TABSIZE: usize = TAB_AS_SPACES.len(); -#[derive(Clone, Copy, Default, Eq, PartialEq)] +#[derive(Clone, Copy, Default, Eq, PartialEq, clap::ValueEnum)] pub enum ErrorStyle { + #[value(name = "oneline")] OneLine, #[default] Standard, @@ -35,12 +39,12 @@ pub enum ErrorStyle { pub static ERROR_STYLE: OnceLock<ErrorStyle> = OnceLock::new(); pub fn progname() -> &'static OsString { - static ARGV0: OnceLock<OsString> = OnceLock::new(); - return ARGV0.get_or_init(|| { + static ARGV0: LazyLock<OsString> = LazyLock::new(|| { let default = OsStr::new("oryxc"); let s = env::args_os().next().unwrap_or(default.into()); - return Path::new(&s).file_name().unwrap_or(default).to_os_string(); + Path::new(&s).file_name().unwrap_or(default).to_os_string() }); + &ARGV0 } #[macro_export] diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs index 7a0b0a8..7320c19 100644 --- a/oryxc/src/main.rs +++ b/oryxc/src/main.rs @@ -7,101 +7,77 @@ mod parser; mod size; mod unicode; -use std::borrow::Cow; use std::ffi::OsString; -use std::{ - env, - process, - thread, -}; +use std::thread; -use lexopt; +use clap::{ + CommandFactory, + FromArgMatches, + Parser, +}; #[derive(Clone, Copy, Default)] pub struct Flags { pub debug_lexer: bool, pub debug_parser: bool, - pub help: bool, pub threads: usize, pub error_style: errors::ErrorStyle, } -impl Flags { - fn parse() -> Result<(Flags, Vec<OsString>), lexopt::Error> { - use lexopt::prelude::*; +#[derive(Parser)] +struct Args { + #[arg(short = 'l', long)] + debug_lexer: bool, + + #[arg(short = 'p', long)] + debug_parser: bool, + + #[arg(short = 's', long, default_value = "standard")] + error_style: errors::ErrorStyle, + + #[arg(short = 't', long)] + threads: Option<usize>, - let mut rest = Vec::with_capacity(env::args().len()); - let mut flags = Flags::default(); - let mut parser = lexopt::Parser::from_env(); - parser.set_short_equals(false); + files: Vec<OsString>, +} - while let Some(arg) = parser.next()? { - match arg { - Short('h') | Long("help") => flags.help = true, - Short('l') | Long("debug-lexer") => flags.debug_lexer = true, - Short('p') | Long("debug-parser") => flags.debug_parser = true, - Short('s') | Long("error-style") => { - flags.error_style = match parser.value()?.to_string_lossy() - { - Cow::Borrowed("oneline") => errors::ErrorStyle::OneLine, - Cow::Borrowed("standard") => { - errors::ErrorStyle::Standard - }, - s => Err(format!( - "{s}: invalid value for -s/--error-style" - ))?, - }; - }, - Short('t') | Long("threads") => { - flags.threads = parser.value()?.parse()?; - if flags.threads == 0 { - err!("thread count must be greater than 0"); - } - }, - Value(v) => rest.push(v), - _ => return Err(arg.unexpected()), - } - } +fn main() { + let args = Args::from_arg_matches( + &Args::command().override_usage(usage()).get_matches(), + ) + .unwrap_or_else(|e| e.exit()); - if flags.threads == 0 { - flags.threads = thread::available_parallelism().map_or_else( - |e| { - warn!(e, "failed to get thread count"); - 1 - }, - |x| x.get(), - ); - } + let threads = args.threads.unwrap_or_else(|| { + thread::available_parallelism().map_or_else( + |e| { + warn!(e, "failed to get thread count"); + 1 + }, + |x| x.get(), + ) + }); - return Ok((flags, rest)); + if threads == 0 { + err!("thread count must be greater than 0"); } + + let flags = Flags { + debug_lexer: args.debug_lexer, + debug_parser: args.debug_parser, + threads, + error_style: args.error_style, + }; + + let _ = errors::ERROR_STYLE.set(flags.error_style); + compiler::start(args.files, flags); } -fn usage() { - eprintln!( +fn usage() -> String { + format!( concat!( "Usage: {0} [-lp] [-s oneline|standard] [-t threads]\n", " {0} -h", ), errors::progname().display() - ); -} - -fn main() { - let (flags, rest) = match Flags::parse() { - Ok(v) => v, - Err(e) => { - warn!(e); - usage(); - process::exit(1); - }, - }; - - if flags.help { - usage(); - process::exit(0); - } - - let _ = errors::ERROR_STYLE.set(flags.error_style); - compiler::start(rest, flags); + ) } |