use std::ffi::OsString; use std::io::{ self, Read, Write, }; use std::sync::atomic::{ AtomicUsize, Ordering, }; use std::sync::{ Arc, OnceLock, }; use std::{ fs, iter, process, thread, }; use boxcar; use crossbeam_deque::{ Injector, Steal, Stealer, Worker, }; use dashmap::DashMap; use soa_rs::Soa; use crate::arena::{ GlobalArena, LocalArena, }; use crate::errors::OryxError; use crate::hashtrie::HTrie; use crate::intern::Interner; use crate::lexer::Token; use crate::parser::{ Ast, AstType, }; use crate::prelude::*; use crate::{ Flags, err, lexer, parser, size, }; #[allow(dead_code)] pub struct FileData { pub name: OsString, pub buffer: String, pub tokens: OnceLock>, pub ast: OnceLock, pub scopes: OnceLock>, } impl FileData { /// Read a source file from disk and create a new [`FileData`]. fn new(name: OsString) -> Result { const PAD: [u8; 64] = [0; 64]; /* 512 bits */ // Pre-allocate to avoid reallocation when appending padding. // Append extra data to the end so that we can safely read past // instead of branching on length. let size = fs::metadata(&name)?.len() as usize; let mut buffer = String::with_capacity(size + PAD.len()); fs::File::open(&name)?.read_to_string(&mut buffer)?; buffer.push_str(unsafe { std::str::from_utf8_unchecked(&PAD) }); return Ok(Self { name, buffer, tokens: OnceLock::new(), ast: OnceLock::new(), scopes: OnceLock::new(), }); } } #[allow(dead_code)] pub enum JobType { Lex { file: FileId, fdata: Arc, }, Parse { file: FileId, fdata: Arc, }, IndexScopeConstants { fdata: Arc, block: u32, parent: ScopeId, }, ResolveDefBind { fdata: Arc, node: u32, scope: ScopeId, }, } pub struct Job { id: usize, kind: JobType, } struct CompilerState<'a> { #[allow(dead_code)] global_arena: GlobalArena, globalq: Injector, njobs: AtomicUsize, flags: Flags, worker_threads: OnceLock>, /* Files needs to be after interner, so that the files get dropped * after the interner. This is because the interner holds references * to substrings of file buffers, so we want to control the drop * order to avoid any potential undefined behaviour. */ interner: Interner<'a>, files: Vec>, deps: DashMap>, next_id: AtomicUsize, types: boxcar::Vec, } impl<'a> CompilerState<'a> { /// Unpark all worker threads. fn wake_all(&self) { if let Some(threads) = self.worker_threads.get() { for t in threads.iter() { t.unpark(); } } } /// Generate a new ID for a job, scope, etc. #[inline(always)] fn genid(&self) -> usize { return self.next_id.fetch_add(1, Ordering::Relaxed); } /// Build a new job of type KIND. #[inline(always)] fn job_new(&self, kind: JobType) -> Job { let id = self.genid(); return Job { id, kind }; } /// Push a job onto a worker’s local queue and wake all threads. #[inline(always)] fn job_push(&self, queue: &Worker, job: Job) { self.njobs.fetch_add(1, Ordering::Relaxed); queue.push(job); self.wake_all(); } /// Signal a job completion by decrementing the job count. /// /// Returns the number of remaining jobs #[inline(always)] fn job_complete(&self) -> usize { return self.njobs.fetch_sub(1, Ordering::Release) - 1; } } /// Initialize compiler state and drive all source files through the /// pipeline. pub fn start(paths: T, flags: Flags) where T: IntoIterator, { let mut files = Vec::new(); let mut initial_jobs = Vec::new(); for (i, path) in paths.into_iter().enumerate() { let id = FileId(i); // Take ownership of the OsString so we can store it in FileData // without cloning let display = path.to_string_lossy().into_owned(); let fdata = Arc::new( FileData::new(path).unwrap_or_else(|e| err!(e, "{}", display)), ); files.push(Arc::clone(&fdata)); initial_jobs.push(Job { id: i, kind: JobType::Lex { file: id, fdata }, }); } let njobs = initial_jobs.len(); let state = Arc::new(CompilerState { files, global_arena: GlobalArena::new(size::kibibytes(64)), globalq: Injector::new(), njobs: AtomicUsize::new(njobs), flags, worker_threads: OnceLock::new(), interner: Interner::new(), deps: DashMap::new(), next_id: AtomicUsize::new(njobs), /* Temporary solution */ types: boxcar::vec![ OryxType::Integer /* int */ { bits: 64, signed: true }, OryxType::Integer /* i8 */ { bits: 8, signed: true }, OryxType::Integer /* i16 */ { bits: 16, signed: true }, OryxType::Integer /* i32 */ { bits: 32, signed: true }, OryxType::Integer /* i64 */ { bits: 64, signed: true }, OryxType::Integer /* i128 */ { bits: 128, signed: true }, OryxType::Integer /* u8 */ { bits: 8, signed: false }, OryxType::Integer /* u16 */ { bits: 16, signed: false }, OryxType::Integer /* u32 */ { bits: 32, signed: false }, OryxType::Integer /* u64 */ { bits: 64, signed: false }, OryxType::Integer /* u128 */ { bits: 128, signed: false }, ], }); for job in initial_jobs { state.globalq.push(job); } let mut workers = Box::new_uninit_slice(flags.threads); let mut stealers = Box::new_uninit_slice(flags.threads); for i in 0..flags.threads { let w = Worker::new_fifo(); stealers[i].write(w.stealer()); workers[i].write(w); } let workers = unsafe { workers.assume_init() }; let stealers = Arc::from(unsafe { stealers.assume_init() }); let mut ok = true; thread::scope(|s| { let mut handles = Box::new_uninit_slice(workers.len()); let mut worker_threads = Box::new_uninit_slice(handles.len()); for (i, w) in workers.into_iter().enumerate() { let stealers = Arc::clone(&stealers); let state = Arc::clone(&state); let arena = LocalArena::new(&state.global_arena); let handle = s.spawn(move || worker_loop(i, state, w, stealers)); worker_threads[i].write(handle.thread().clone()); handles[i].write(handle); } let _ = state .worker_threads .set(unsafe { worker_threads.assume_init() }); state.wake_all(); for h in handles { match unsafe { h.assume_init() }.join() { Ok(thrd_ok) => { if !thrd_ok { ok = false; } }, Err(_) => ok = false, } } }); if !ok { process::exit(1); } } /// Steal and execute jobs until all work is complete. fn worker_loop( _id: usize, c_state: Arc, queue: Worker, stealers: Arc<[Stealer]>, ) -> bool { let mut ok = true; let arena = LocalArena::new(&c_state.global_arena); loop { let Some(job) = find_task(&queue, &c_state.globalq, &stealers) else { /* No work available; check termination condition before * parking to avoid missed wakeups */ let n = c_state.njobs.load(Ordering::Acquire); if n == 0 { c_state.wake_all(); return ok; } thread::park(); continue; }; let result = match job.kind { JobType::Lex { file, fdata } => 'blk: { let tokens = match lexer::tokenize(&fdata.buffer) { Ok(xs) => xs, Err(e) => { emit_errors(&fdata, iter::once(e)); break 'blk false; }, }; if c_state.flags.debug_lexer { let mut handle = io::stderr().lock(); for t in tokens.iter() { let _ = write!(handle, "{t:?}\n"); } } fdata.tokens.set(tokens).unwrap(); c_state.job_push( &queue, c_state.job_new(JobType::Parse { file, fdata }), ); true }, JobType::Parse { file, fdata } => 'blk: { let ast = match parser::parse(fdata.tokens.get().unwrap()) { Ok(ast) => ast, Err(errs) => { emit_errors(&fdata, errs); break 'blk false; }, }; if c_state.flags.debug_parser { let mut handle = io::stderr().lock(); for n in ast.nodes.iter() { let _ = write!(handle, "{n:?}\n"); } } let root = (ast.nodes.len() - 1) as u32; fdata.ast.set(ast).unwrap(); fdata.scopes.set(HTrie::new()).unwrap(); c_state.job_push( &queue, c_state.job_new(JobType::IndexScopeConstants { fdata, block: root, parent: ScopeId::INVALID, }), ); true }, JobType::IndexScopeConstants { fdata, block, parent, } => { let tokens = fdata.tokens.get().unwrap(); let ast = fdata.ast.get().unwrap(); let SubNodes(beg, nstmts) = ast.nodes.sub()[block as usize]; let mut errors = Vec::new(); let scope = Scope::new(parent); /* First pass inserts all the symbols in this scope into the * symbol table */ for i in beg..beg + nstmts { let node = ast.extra[i as usize]; if ast.nodes.kind()[node as usize] != AstType::MultiDefBind { continue; } let identsbeg = ast.nodes.sub()[node as usize].0; let nidents = ast.extra[identsbeg as usize]; for j in 0..nidents { let ident = ast.extra[(identsbeg + 1 + j * 2) as usize]; let span = tokens.view()[ident as usize]; /* Make string slice lifetime 'static */ let view = unsafe { &*(&fdata.buffer[span.0..span.1] as *const str) }; let symid = c_state.interner.intern(view); let sym = Symbol::default(); if let Some(mut sym) = scope.symtab.insert(symid, sym, &arena) { sym.state = ResolutionState::Poisoned; scope.symtab.insert(symid, sym, &arena); errors.push(OryxError::new( span, format!( "symbol ‘{view}’ defined multiple times" ), )); } } } let scopeid = if parent == ScopeId::INVALID { ScopeId::GLOBAL } else { ScopeId(c_state.genid()) }; fdata.scopes.get().unwrap().insert(scopeid, scope, &arena); /* Second pass emits jobs to resolve types */ for i in beg..beg + nstmts { let node = ast.extra[i as usize]; if ast.nodes.kind()[node as usize] != AstType::MultiDefBind { continue; } c_state.job_push(&queue, c_state.job_new(JobType::ResolveDefBind { fdata: fdata.clone(), node, scope: scopeid, })); } let ok = errors.is_empty(); emit_errors(&fdata, errors); ok }, JobType::ResolveDefBind { fdata, node, scope } => { todo!("resolving is yet to be implemented"); true }, }; if !result { ok = false; } if let Some((_, deps)) = c_state.deps.remove(&job.id) { for j in deps { c_state.job_push(&queue, j); } } if c_state.job_complete() == 0 { c_state.wake_all(); return ok; } } } /// Get next available job or steal from the global queue or peers if /// local queue is empty. fn find_task( localq: &Worker, globalq: &Injector, stealers: &Arc<[Stealer]>, ) -> Option { if let Some(job) = localq.pop() { return Some(job); } loop { match globalq.steal_batch_and_pop(localq) { Steal::Success(job) => return Some(job), Steal::Empty => break, Steal::Retry => continue, } } for s in stealers.iter() { loop { match s.steal_batch_and_pop(localq) { Steal::Success(job) => return Some(job), Steal::Empty => break, Steal::Retry => continue, } } } None } /// Print all errors to stderr using the file’s name and source buffer. fn emit_errors(fdata: &FileData, errors: T) where T: IntoIterator, { for e in errors { e.report(&fdata.name, &fdata.buffer); } }