summaryrefslogtreecommitdiff
path: root/oryxc
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2026-03-04 20:31:24 +0100
committerGitHub <noreply@github.com> 2026-03-04 20:31:24 +0100
commit2a18c3b5841a8bd7ff1776c9801fd2d50e35ba10 (patch)
tree485d5c22216869e18215d01caab0d5dd09f459bb /oryxc
parented3258836d0f4e806352ce60bec65a1ea26c8987 (diff)
parentff258d9ce16fb99ef2fe3cfbae65634b98f782d2 (diff)
Merge pull request #1 from romirk/romirk/misc
vec supremacy
Diffstat (limited to 'oryxc')
-rw-r--r--oryxc/Cargo.toml3
-rw-r--r--oryxc/src/compiler.rs292
-rw-r--r--oryxc/src/errors.rs14
-rw-r--r--oryxc/src/main.rs124
4 files changed, 216 insertions, 217 deletions
diff --git a/oryxc/Cargo.toml b/oryxc/Cargo.toml
index 1b418f3..267e889 100644
--- a/oryxc/Cargo.toml
+++ b/oryxc/Cargo.toml
@@ -5,9 +5,8 @@ edition = "2024"
[dependencies]
crossbeam-deque = "0.8.6"
-dashmap = "6.1.0"
# icu = { version = "2.1.1", features = ["compiled_data"] }
-lexopt = "0.3.2"
+clap = { version = "4", features = ["derive"] }
# num-rational = "0.4.2"
phf = { version = "0.13.1", features = ["macros"] }
soa-rs = "0.9.1"
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs
index 8fdd53f..0d1c65d 100644
--- a/oryxc/src/compiler.rs
+++ b/oryxc/src/compiler.rs
@@ -1,22 +1,20 @@
use std::ffi::OsString;
use std::io::{
self,
+ Read,
Write,
};
-use std::iter::{
- self,
- IntoIterator,
-};
-use std::mem::MaybeUninit;
-use std::sync::Arc;
+use std::iter::once;
use std::sync::atomic::{
AtomicUsize,
Ordering,
};
-use std::vec::Vec;
+use std::sync::{
+ Arc,
+ OnceLock,
+};
use std::{
fs,
- panic,
process,
thread,
};
@@ -27,7 +25,6 @@ use crossbeam_deque::{
Stealer,
Worker,
};
-use dashmap::DashMap;
use soa_rs::Soa;
use crate::errors::OryxError;
@@ -44,64 +41,101 @@ use crate::{
pub struct FileId(usize);
pub struct FileData {
- name: Arc<OsString>,
- buffer: Arc<String>,
- tokens: Arc<MaybeUninit<Soa<Token>>>,
- ast: Arc<MaybeUninit<Soa<AstNode>>>,
- extra_data: Arc<MaybeUninit<Vec<u32>>>,
+ pub name: OsString,
+ pub buffer: String,
+ pub tokens: OnceLock<Soa<Token>>,
+ pub ast: OnceLock<Soa<AstNode>>,
+ pub extra_data: OnceLock<Vec<u32>>,
}
impl FileData {
+ /// Read a source file from disk and create a new [`FileData`].
fn new(name: OsString) -> Result<Self, io::Error> {
const PAD: [u8; 64] = [0; 64]; /* 512 bits */
+ // Pre-allocate to avoid reallocation when appending padding.
// Append extra data to the end so that we can safely read past
- // instead of branching on length
- let mut buffer = fs::read_to_string(&name)?;
- buffer.push_str(unsafe { str::from_utf8_unchecked(&PAD) });
-
- return Ok(Self {
- name: name.into(),
- buffer: buffer.into(),
- tokens: Arc::new_uninit(),
- ast: Arc::new_uninit(),
- extra_data: Arc::new_uninit(),
- });
+ // instead of branching on length.
+ let size = fs::metadata(&name)?.len() as usize;
+ let mut buffer = String::with_capacity(size + PAD.len());
+ fs::File::open(&name)?.read_to_string(&mut buffer)?;
+ buffer.push_str(unsafe { std::str::from_utf8_unchecked(&PAD) });
+
+ Ok(Self {
+ name,
+ buffer,
+ tokens: OnceLock::new(),
+ ast: OnceLock::new(),
+ extra_data: OnceLock::new(),
+ })
}
}
+#[allow(dead_code)]
pub enum Job {
- Lex { file: FileId },
- Parse { file: FileId },
- ResolveSymbols { file: FileId },
+ Lex { file: FileId, fdata: Arc<FileData> },
+ Parse { file: FileId, fdata: Arc<FileData> },
+ ResolveSymbols { file: FileId, fdata: Arc<FileData> },
}
pub struct CompilerState {
- pub files: DashMap<FileId, FileData>,
- pub globalq: Injector<Job>,
- pub njobs: AtomicUsize,
- pub flags: Flags,
+ #[allow(dead_code)]
+ pub files: Vec<Arc<FileData>>,
+ pub globalq: Injector<Job>,
+ pub njobs: AtomicUsize,
+ pub flags: Flags,
+ pub worker_threads: OnceLock<Box<[thread::Thread]>>,
+}
+
+impl CompilerState {
+ /// Unpark all worker threads.
+ fn wake_all(&self) {
+ if let Some(threads) = self.worker_threads.get() {
+ for t in threads.iter() {
+ t.unpark();
+ }
+ }
+ }
+
+ /// Push a job onto a worker's local queue and wake all threads.
+ fn push_job(&self, queue: &Worker<Job>, job: Job) {
+ queue.push(job);
+ self.wake_all();
+ }
}
+/// Initialize compiler state and drive all source files through the pipeline.
pub fn start<T>(paths: T, flags: Flags)
where
T: IntoIterator<Item = OsString>,
{
+ let mut files = Vec::new();
+ let mut initial_jobs = Vec::new();
+
+ for (i, path) in paths.into_iter().enumerate() {
+ let id = FileId(i);
+
+ // take ownership of the OsString so we can store it in FileData without
+ // cloning
+ let display = path.to_string_lossy().into_owned();
+ let fdata = Arc::new(
+ FileData::new(path).unwrap_or_else(|e| err!(e, "{}", display)),
+ );
+ files.push(Arc::clone(&fdata));
+ initial_jobs.push(Job::Lex { file: id, fdata });
+ }
+
+ let njobs = initial_jobs.len();
let state = Arc::new(CompilerState {
- files: DashMap::new(),
+ files,
globalq: Injector::new(),
- njobs: AtomicUsize::new(0),
+ njobs: AtomicUsize::new(njobs),
flags,
+ worker_threads: OnceLock::new(),
});
- for (i, path) in paths.into_iter().enumerate() {
- let id = FileId(i);
- let data = match FileData::new(path.clone().into()) {
- Ok(x) => x,
- Err(e) => err!(e, "{}", path.display()),
- };
- state.files.insert(id, data);
- state.njobs.fetch_add(1, Ordering::Relaxed);
- state.globalq.push(Job::Lex { file: id });
+
+ for job in initial_jobs {
+ state.globalq.push(job);
}
let mut workers = Vec::with_capacity(flags.threads);
@@ -112,123 +146,99 @@ where
workers.push(w);
}
- let mut threads = Vec::with_capacity(flags.threads);
let stealer_view: Arc<[_]> = Arc::from(stealers);
+ let handles: Vec<_> = workers
+ .into_iter()
+ .enumerate()
+ .map(|(id, w)| {
+ let stealer_view = Arc::clone(&stealer_view);
+ let state = Arc::clone(&state);
+ thread::spawn(move || worker_loop(id, state, w, stealer_view))
+ })
+ .collect();
- for (id, w) in workers.into_iter().enumerate() {
- let stealer_view = Arc::clone(&stealer_view);
- let state = Arc::clone(&state);
- threads.push(thread::spawn(move || {
- worker_loop(id, state, w, stealer_view);
- }));
- }
+ let worker_threads: Box<[thread::Thread]> =
+ handles.iter().map(|h| h.thread().clone()).collect();
+ let _ = state.worker_threads.set(worker_threads);
- for t in threads {
- t.join().unwrap_or_else(|e| panic::resume_unwind(e));
- }
-}
-
-macro_rules! fdata_read {
- ($state:expr, $file:expr, $($field:ident),+ $(,)?) => {
- #[allow(unused_parens)]
- let ($($field),+) = {
- let fdata = $state.files.get(&$file).unwrap();
- ($(fdata.$field.clone()),+)
- };
- };
-}
-
-macro_rules! fdata_write {
- ($state:expr, $file:expr, $($field:ident),+ $(,)?) => {
- {
- let mut fdata = $state.files.get_mut(&$file).unwrap();
- $(
- fdata.$field = Arc::from(MaybeUninit::new($field));
- )+
+ for h in handles {
+ if let Err(e) = h.join() {
+ std::panic::resume_unwind(e)
}
- };
-}
-
-fn emit_errors<T>(state: Arc<CompilerState>, file: FileId, errors: T)
-where
- T: IntoIterator<Item = OryxError>,
-{
- fdata_read!(state, file, name, buffer);
- for e in errors.into_iter() {
- e.report(name.as_ref(), buffer.as_ref());
}
}
+/// Steal and execute jobs until all work is complete.
fn worker_loop(
- id: usize,
+ _id: usize,
state: Arc<CompilerState>,
queue: Worker<Job>,
stealers: Arc<[Stealer<Job>]>,
) {
loop {
- if state.njobs.load(Ordering::Relaxed) == 0 {
+ if state.njobs.load(Ordering::Acquire) == 0 {
break;
}
- let job = find_task(&queue, &state.globalq, &stealers);
- if let Some(job) = job {
- match job {
- Job::Lex { file } => {
- fdata_read!(state, file, buffer);
- let tokens = match lexer::tokenize(buffer.as_ref()) {
- Ok(xs) => xs,
- Err(e) => {
- emit_errors(state.clone(), file, iter::once(e));
- process::exit(1);
- },
- };
-
- if state.flags.debug_lexer {
- let mut handle = io::stderr().lock();
- for t in tokens.iter() {
- let _ = write!(handle, "{t:?}\n");
- }
+ let Some(job) = find_task(&queue, &state.globalq, &stealers) else {
+ thread::park();
+ continue;
+ };
+
+ match job {
+ Job::Lex { file, fdata } => {
+ let tokens =
+ lexer::tokenize(&fdata.buffer).unwrap_or_else(|e| {
+ emit_errors(&fdata, once(e));
+ process::exit(1)
+ });
+
+ if state.flags.debug_lexer {
+ let mut handle = io::stderr().lock();
+ for t in tokens.iter() {
+ let _ = write!(handle, "{t:?}\n");
}
+ }
+
+ fdata.tokens.set(tokens).unwrap();
+ state.njobs.fetch_add(1, Ordering::Relaxed);
+ state.push_job(&queue, Job::Parse { file, fdata });
+ },
+ Job::Parse { file, fdata } => {
+ let (ast, extra_data) = parser::parse(
+ fdata.tokens.get().unwrap(),
+ )
+ .unwrap_or_else(|errs| {
+ emit_errors(&fdata, errs);
+ process::exit(1)
+ });
- fdata_write!(state, file, tokens);
- state.njobs.fetch_add(1, Ordering::Relaxed);
- queue.push(Job::Parse { file });
- },
- Job::Parse { file } => {
- fdata_read!(state, file, tokens);
- let (ast, extra_data) = match parser::parse(
- unsafe { tokens.assume_init() }.as_ref(),
- ) {
- Ok(xs) => xs,
- Err(errs) => {
- emit_errors(state.clone(), file, errs);
- process::exit(1);
- },
- };
-
- if state.flags.debug_parser {
- let mut handle = io::stderr().lock();
- for n in ast.iter() {
- let _ = write!(handle, "{n:?}\n");
- }
+ if state.flags.debug_parser {
+ let mut handle = io::stderr().lock();
+ for n in ast.iter() {
+ let _ = write!(handle, "{n:?}\n");
}
+ }
- fdata_write!(state, file, ast, extra_data);
- state.njobs.fetch_add(1, Ordering::Relaxed);
- queue.push(Job::ResolveSymbols { file });
- },
- Job::ResolveSymbols { file } => {
- err!("not implemented");
- },
- }
+ fdata.ast.set(ast).unwrap();
+ fdata.extra_data.set(extra_data).unwrap();
+ state.njobs.fetch_add(1, Ordering::Relaxed);
+ state.push_job(&queue, Job::ResolveSymbols { file, fdata });
+ },
+ Job::ResolveSymbols { file: _, fdata: _ } => {
+ err!("not implemented");
+ },
+ }
- state.njobs.fetch_sub(1, Ordering::Relaxed);
- } else {
- thread::yield_now();
+ if state.njobs.fetch_sub(1, Ordering::Release) == 1 {
+ // njobs is 0; wake all threads so they can observe the termination
+ // condition and exit.
+ state.wake_all();
}
}
}
+/// Get next available job or steal from the global queue or peers if local queue is empty.
fn find_task(
localq: &Worker<Job>,
globalq: &Injector<Job>,
@@ -256,5 +266,15 @@ fn find_task(
}
}
- return None;
+ None
+}
+
+/// Print all errors to stderr using the file's name and source buffer.
+fn emit_errors<T>(fdata: &FileData, errors: T)
+where
+ T: IntoIterator<Item = OryxError>,
+{
+ for e in errors {
+ e.report(&fdata.name, &fdata.buffer);
+ }
}
diff --git a/oryxc/src/errors.rs b/oryxc/src/errors.rs
index 290abc2..4ad88d6 100644
--- a/oryxc/src/errors.rs
+++ b/oryxc/src/errors.rs
@@ -12,7 +12,10 @@ use std::fmt::{
};
use std::io::Write;
use std::path::Path;
-use std::sync::OnceLock;
+use std::sync::{
+ LazyLock,
+ OnceLock,
+};
use std::{
env,
io,
@@ -25,8 +28,9 @@ use crate::unicode;
const TAB_AS_SPACES: &'static str = " ";
const TABSIZE: usize = TAB_AS_SPACES.len();
-#[derive(Clone, Copy, Default, Eq, PartialEq)]
+#[derive(Clone, Copy, Default, Eq, PartialEq, clap::ValueEnum)]
pub enum ErrorStyle {
+ #[value(name = "oneline")]
OneLine,
#[default]
Standard,
@@ -35,12 +39,12 @@ pub enum ErrorStyle {
pub static ERROR_STYLE: OnceLock<ErrorStyle> = OnceLock::new();
pub fn progname() -> &'static OsString {
- static ARGV0: OnceLock<OsString> = OnceLock::new();
- return ARGV0.get_or_init(|| {
+ static ARGV0: LazyLock<OsString> = LazyLock::new(|| {
let default = OsStr::new("oryxc");
let s = env::args_os().next().unwrap_or(default.into());
- return Path::new(&s).file_name().unwrap_or(default).to_os_string();
+ Path::new(&s).file_name().unwrap_or(default).to_os_string()
});
+ &ARGV0
}
#[macro_export]
diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs
index 7a0b0a8..7320c19 100644
--- a/oryxc/src/main.rs
+++ b/oryxc/src/main.rs
@@ -7,101 +7,77 @@ mod parser;
mod size;
mod unicode;
-use std::borrow::Cow;
use std::ffi::OsString;
-use std::{
- env,
- process,
- thread,
-};
+use std::thread;
-use lexopt;
+use clap::{
+ CommandFactory,
+ FromArgMatches,
+ Parser,
+};
#[derive(Clone, Copy, Default)]
pub struct Flags {
pub debug_lexer: bool,
pub debug_parser: bool,
- pub help: bool,
pub threads: usize,
pub error_style: errors::ErrorStyle,
}
-impl Flags {
- fn parse() -> Result<(Flags, Vec<OsString>), lexopt::Error> {
- use lexopt::prelude::*;
+#[derive(Parser)]
+struct Args {
+ #[arg(short = 'l', long)]
+ debug_lexer: bool,
+
+ #[arg(short = 'p', long)]
+ debug_parser: bool,
+
+ #[arg(short = 's', long, default_value = "standard")]
+ error_style: errors::ErrorStyle,
+
+ #[arg(short = 't', long)]
+ threads: Option<usize>,
- let mut rest = Vec::with_capacity(env::args().len());
- let mut flags = Flags::default();
- let mut parser = lexopt::Parser::from_env();
- parser.set_short_equals(false);
+ files: Vec<OsString>,
+}
- while let Some(arg) = parser.next()? {
- match arg {
- Short('h') | Long("help") => flags.help = true,
- Short('l') | Long("debug-lexer") => flags.debug_lexer = true,
- Short('p') | Long("debug-parser") => flags.debug_parser = true,
- Short('s') | Long("error-style") => {
- flags.error_style = match parser.value()?.to_string_lossy()
- {
- Cow::Borrowed("oneline") => errors::ErrorStyle::OneLine,
- Cow::Borrowed("standard") => {
- errors::ErrorStyle::Standard
- },
- s => Err(format!(
- "{s}: invalid value for -s/--error-style"
- ))?,
- };
- },
- Short('t') | Long("threads") => {
- flags.threads = parser.value()?.parse()?;
- if flags.threads == 0 {
- err!("thread count must be greater than 0");
- }
- },
- Value(v) => rest.push(v),
- _ => return Err(arg.unexpected()),
- }
- }
+fn main() {
+ let args = Args::from_arg_matches(
+ &Args::command().override_usage(usage()).get_matches(),
+ )
+ .unwrap_or_else(|e| e.exit());
- if flags.threads == 0 {
- flags.threads = thread::available_parallelism().map_or_else(
- |e| {
- warn!(e, "failed to get thread count");
- 1
- },
- |x| x.get(),
- );
- }
+ let threads = args.threads.unwrap_or_else(|| {
+ thread::available_parallelism().map_or_else(
+ |e| {
+ warn!(e, "failed to get thread count");
+ 1
+ },
+ |x| x.get(),
+ )
+ });
- return Ok((flags, rest));
+ if threads == 0 {
+ err!("thread count must be greater than 0");
}
+
+ let flags = Flags {
+ debug_lexer: args.debug_lexer,
+ debug_parser: args.debug_parser,
+ threads,
+ error_style: args.error_style,
+ };
+
+ let _ = errors::ERROR_STYLE.set(flags.error_style);
+ compiler::start(args.files, flags);
}
-fn usage() {
- eprintln!(
+fn usage() -> String {
+ format!(
concat!(
"Usage: {0} [-lp] [-s oneline|standard] [-t threads]\n",
" {0} -h",
),
errors::progname().display()
- );
-}
-
-fn main() {
- let (flags, rest) = match Flags::parse() {
- Ok(v) => v,
- Err(e) => {
- warn!(e);
- usage();
- process::exit(1);
- },
- };
-
- if flags.help {
- usage();
- process::exit(0);
- }
-
- let _ = errors::ERROR_STYLE.set(flags.error_style);
- compiler::start(rest, flags);
+ )
}