diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2026-03-29 23:09:46 +0200 |
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2026-03-29 23:09:46 +0200 |
| commit | da65ee39162d0323321340b2a9cef9a013ad36ef (patch) | |
| tree | 127f6afd6bb418c5df3216e1ad83239aa693ef77 /oryxc/src/compiler.rs | |
| parent | db11ea02d777a33fedb6af4ee056e85f52fbb008 (diff) | |
Beginning sema work
Diffstat (limited to 'oryxc/src/compiler.rs')
| -rw-r--r-- | oryxc/src/compiler.rs | 210 |
1 files changed, 141 insertions, 69 deletions
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs index 6119915..7e61237 100644 --- a/oryxc/src/compiler.rs +++ b/oryxc/src/compiler.rs @@ -20,6 +20,7 @@ use std::{ thread, }; +use boxcar; use crossbeam_deque::{ Injector, Steal, @@ -31,7 +32,10 @@ use soa_rs::Soa; use crate::errors::OryxError; use crate::intern::Interner; use crate::lexer::Token; -use crate::parser::Ast; +use crate::parser::{ + Ast, + AstType, +}; use crate::prelude::*; use crate::unistr::UniStr; use crate::{ @@ -39,6 +43,7 @@ use crate::{ err, lexer, parser, + sema, }; #[allow(dead_code)] @@ -47,7 +52,7 @@ pub struct FileData { pub buffer: String, pub tokens: OnceLock<Soa<Token>>, pub ast: OnceLock<Ast>, - pub scopes: OnceLock<Vec<Scope>>, + pub scopes: OnceLock<boxcar::Vec<Scope>>, } impl FileData { @@ -76,8 +81,20 @@ impl FileData { #[allow(dead_code)] #[derive(Clone)] pub enum JobType { - Lex { file: FileId, fdata: Arc<FileData> }, - Parse { file: FileId, fdata: Arc<FileData> }, + Lex { + file: FileId, + fdata: Arc<FileData>, + }, + Parse { + file: FileId, + fdata: Arc<FileData>, + }, + TypecheckConstant { + file: FileId, + fdata: Arc<FileData>, + scope: ScopeId, + node: u32, + }, } mkidtype!(JobId); @@ -88,17 +105,18 @@ pub struct Job { kind: JobType, } -struct CompilerState<'a> { +pub struct CompilerState<'a> { #[allow(dead_code)] globalq: Injector<Job>, njobs: AtomicUsize, flags: Flags, worker_threads: OnceLock<Box<[thread::Thread]>>, - /* Files needs to be after interner, so that the files get dropped - * after the interner. This is because the interner holds references - * to substrings of file buffers, so we want to control the drop - * order to avoid any potential undefined behaviour. */ - interner: Interner<UniStr<'a>, SymbolId>, + /* Files needs to be after the identifier interner, so that the files + * get dropped after the interner. This is because the interner holds + * references to substrings of file buffers, so we want to control the + * drop order to avoid any potential undefined behaviour. */ + pub ident_intr: Interner<UniStr<'a>, SymbolId>, + pub type_intr: Interner<OryxType, TypeId>, files: Vec<Arc<FileData>>, next_id: AtomicU32, } @@ -152,6 +170,116 @@ impl<'a> CompilerState<'a> { fn job_complete(&self) -> usize { return self.njobs.fetch_sub(1, Ordering::Release) - 1; } + + fn job_dispatch(&self, queue: &Worker<Job>, job: Job) -> bool { + match job.kind { + JobType::Lex { file, fdata } => 'blk: { + let tokens = match lexer::tokenize(&fdata.buffer) { + Ok(xs) => xs, + Err(e) => { + emit_errors(&fdata, iter::once(e)); + break 'blk false; + }, + }; + + if self.flags.debug_lexer { + let mut handle = io::stderr().lock(); + for t in tokens.iter() { + let _ = write!(handle, "{t:?}\n"); + } + } + + fdata.tokens.set(tokens).unwrap(); + self.job_push( + &queue, + self.job_new(JobType::Parse { file, fdata }), + ); + true + }, + + JobType::Parse { file, fdata } => 'blk: { + let tokens = fdata.tokens.get().unwrap(); + let ast = match parser::parse(tokens) { + Ok(ast) => ast, + Err(errs) => { + emit_errors(&fdata, errs); + break 'blk false; + }, + }; + + if self.flags.debug_parser { + let mut handle = io::stderr().lock(); + for n in ast.nodes.iter() { + let _ = write!(handle, "{n:?}\n"); + } + } + + /* Rust autism */ + fdata.ast.set(ast).unwrap(); + let ast = fdata.ast.get().unwrap(); + + let scopes = boxcar::vec![Scope::new(ScopeId::INVALID)]; + fdata.scopes.set(scopes).unwrap(); + + let root = ast.nodes.len() - 1; + let SubNodes(beg, len) = ast.nodes.sub()[root]; + let gscope = &fdata.scopes.get().unwrap()[0]; + + for i in 0..len { + let stmt = ast.extra[(beg + i) as usize]; + if ast.nodes.kind()[stmt as usize] != AstType::MultiDefBind + { + continue; + } + + let beg = ast.nodes.sub()[stmt as usize].0 as usize; + let nidents = ast.extra[beg] as usize; + for i in 0..nidents { + let tok = ast.extra[beg + 1 + i * 2] as usize; + let view = tokens.view()[tok]; + + /* Pointer fuckery to bypass the borrow checker */ + let s = UniStr(unsafe { + &*(&fdata.buffer[view.0..view.1] as *const str) + }); + let sid = self.ident_intr.intern(s); + let symbol = Symbol::new(SymbolType::Constant); + gscope.symtab.insert(sid, symbol); + } + + self.job_push( + &queue, + self.job_new(JobType::TypecheckConstant { + file, + fdata: fdata.clone(), + scope: ScopeId::GLOBAL, + node: stmt, + }), + ); + } + + true + }, + + JobType::TypecheckConstant { + file, + fdata, + scope, + node, + } => { + let ast = fdata.ast.get().unwrap(); + let tokens = fdata.tokens.get().unwrap(); + match sema::typecheck_multi_def_bind(&self, &fdata, scope, node) + { + Ok(()) => true, + Err(e) => { + emit_errors(&fdata, iter::once(e)); + false + }, + } + }, + } + } } /// Initialize compiler state and drive all source files through the @@ -187,7 +315,8 @@ where njobs: AtomicUsize::new(njobs), flags, worker_threads: OnceLock::new(), - interner: Interner::new(), + ident_intr: Interner::new(), + type_intr: Interner::new(), next_id: AtomicU32::new(njobs as u32), }); @@ -259,66 +388,9 @@ fn worker_loop( thread::park(); continue; }; - - let result = match job.kind { - JobType::Lex { file, fdata } => 'blk: { - let tokens = match lexer::tokenize(&fdata.buffer) { - Ok(xs) => xs, - Err(e) => { - emit_errors(&fdata, iter::once(e)); - break 'blk false; - }, - }; - - if c_state.flags.debug_lexer { - let mut handle = io::stderr().lock(); - for t in tokens.iter() { - let _ = write!(handle, "{t:?}\n"); - } - } - - fdata.tokens.set(tokens).unwrap(); - c_state.job_push( - &queue, - c_state.job_new(JobType::Parse { file, fdata }), - ); - true - }, - - JobType::Parse { file, fdata } => 'blk: { - let ast = match parser::parse(fdata.tokens.get().unwrap()) { - Ok(ast) => ast, - Err(errs) => { - emit_errors(&fdata, errs); - break 'blk false; - }, - }; - - if c_state.flags.debug_parser { - let mut handle = io::stderr().lock(); - for n in ast.nodes.iter() { - let _ = write!(handle, "{n:?}\n"); - } - } - - fdata.ast.set(ast).unwrap(); - fdata.scopes.set(Vec::new()).unwrap(); - - // c_state.job_push( - // &queue, - // c_state.job_new(JobType::IndexScopeConstants { - // fdata, - // block: root, - // parent: ScopeId::INVALID, - // }), - // ); - true - }, - }; - if !result { + if !c_state.job_dispatch(&queue, job) { ok = false; } - if c_state.job_complete() == 0 { c_state.wake_all(); return ok; |