diff options
| -rw-r--r-- | oryxc/src/compiler.rs | 180 | ||||
| -rw-r--r-- | oryxc/src/intern.rs | 13 | ||||
| -rw-r--r-- | oryxc/src/main.rs | 3 | ||||
| -rw-r--r-- | oryxc/src/prelude.rs | 28 | ||||
| -rw-r--r-- | oryxc/src/symtab.rs | 42 | ||||
| -rw-r--r-- | test.xy | 2 |
6 files changed, 186 insertions, 82 deletions
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs index f1f25fc..a9119bc 100644 --- a/oryxc/src/compiler.rs +++ b/oryxc/src/compiler.rs @@ -19,19 +19,24 @@ use std::{ thread, }; +use boxcar; use crossbeam_deque::{ Injector, Steal, Stealer, Worker, }; +use dashmap::DashMap; use soa_rs::Soa; use crate::errors::OryxError; +use crate::intern::Interner; use crate::lexer::Token; -use crate::parser::AstNode; +use crate::parser::{ + AstNode, + AstType, +}; use crate::prelude::*; -use crate::symtab::*; use crate::{ Flags, err, @@ -46,7 +51,7 @@ pub struct FileData { pub tokens: OnceLock<Soa<Token>>, pub ast: OnceLock<Soa<AstNode>>, pub extra_data: OnceLock<Vec<u32>>, - pub symtab: SymbolTable, + pub symtab: DashMap<(ScopeId, SymbolId), Symbol>, } impl FileData { @@ -68,13 +73,13 @@ impl FileData { tokens: OnceLock::new(), ast: OnceLock::new(), extra_data: OnceLock::new(), - symtab: SymbolTable::new(), + symtab: DashMap::new(), }) } } #[allow(dead_code)] -pub enum Job { +pub enum JobType { Lex { file: FileId, fdata: Arc<FileData>, @@ -83,24 +88,43 @@ pub enum Job { file: FileId, fdata: Arc<FileData>, }, - ResolveDef { - file: FileId, + FindSymbolsInScope { + fdata: Arc<FileData>, + scope: ScopeId, + block: u32, + }, + ResolveDefBind { fdata: Arc<FileData>, scope: ScopeId, node: u32, }, } -pub struct CompilerState { +pub struct Job { + id: usize, + kind: JobType, +} + +pub struct CompilerState<'a> { #[allow(dead_code)] - pub files: Vec<Arc<FileData>>, pub globalq: Injector<Job>, pub njobs: AtomicUsize, pub flags: Flags, pub worker_threads: OnceLock<Box<[thread::Thread]>>, + /* Files needs to be after interner, so that the files get dropped + * after the interner. This is because the interner holds references + * to substrings of file buffers, so we want to control the drop + * order to avoid any potential undefined behaviour. */ + + interner: Interner<'a>, + files: Vec<Arc<FileData>>, + + deps: DashMap<usize, boxcar::Vec<Job>>, + next_id: AtomicUsize, + types: boxcar::Vec<OryxType>, } -impl CompilerState { +impl<'a> CompilerState<'a> { /// Unpark all worker threads. fn wake_all(&self) { if let Some(threads) = self.worker_threads.get() { @@ -110,8 +134,13 @@ impl CompilerState { } } + fn job_new(&self, kind: JobType) -> Job { + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + return Job { id, kind }; + } + /// Push a job onto a worker's local queue and wake all threads. - fn push_job(&self, queue: &Worker<Job>, job: Job) { + fn job_push(&self, queue: &Worker<Job>, job: Job) { self.njobs.fetch_add(1, Ordering::Relaxed); queue.push(job); self.wake_all(); @@ -137,7 +166,10 @@ where FileData::new(path).unwrap_or_else(|e| err!(e, "{}", display)), ); files.push(Arc::clone(&fdata)); - initial_jobs.push(Job::Lex { file: id, fdata }); + initial_jobs.push(Job { + id: i, + kind: JobType::Lex { file: id, fdata }, + }); } let njobs = initial_jobs.len(); @@ -147,6 +179,23 @@ where njobs: AtomicUsize::new(njobs), flags, worker_threads: OnceLock::new(), + interner: Interner::new(), + deps: DashMap::new(), + next_id: AtomicUsize::new(njobs), + /* Temporary solution */ + types: boxcar::vec![ + OryxType::Integer /* int */ { bits: 64, signed: true }, + OryxType::Integer /* i8 */ { bits: 8, signed: true }, + OryxType::Integer /* i16 */ { bits: 16, signed: true }, + OryxType::Integer /* i32 */ { bits: 32, signed: true }, + OryxType::Integer /* i64 */ { bits: 64, signed: true }, + OryxType::Integer /* i128 */ { bits: 128, signed: true }, + OryxType::Integer /* u8 */ { bits: 8, signed: false }, + OryxType::Integer /* u16 */ { bits: 16, signed: false }, + OryxType::Integer /* u32 */ { bits: 32, signed: false }, + OryxType::Integer /* u64 */ { bits: 64, signed: false }, + OryxType::Integer /* u128 */ { bits: 128, signed: false }, + ], }); for job in initial_jobs { @@ -209,8 +258,8 @@ fn worker_loop( continue; }; - match job { - Job::Lex { file, fdata } => { + match job.kind { + JobType::Lex { file, fdata } => { let tokens = lexer::tokenize(&fdata.buffer).unwrap_or_else(|e| { emit_errors(&fdata, once(e)); @@ -225,9 +274,13 @@ fn worker_loop( } fdata.tokens.set(tokens).unwrap(); - state.push_job(&queue, Job::Parse { file, fdata }); + state.job_push( + &queue, + state.job_new(JobType::Parse { file, fdata }), + ); }, - Job::Parse { file, fdata } => { + + JobType::Parse { file, fdata } => { let (ast, extra_data) = parser::parse( fdata.tokens.get().unwrap(), ) @@ -243,35 +296,92 @@ fn worker_loop( } } + let root = (ast.len() - 1) as u32; fdata.ast.set(ast).unwrap(); fdata.extra_data.set(extra_data).unwrap(); + state.job_push( + &queue, + state.job_new(JobType::FindSymbolsInScope { + fdata, + scope: ScopeId::GLOBAL, + block: root, + }), + ); + }, + + JobType::FindSymbolsInScope { + fdata, + scope, + block, + } => { + let tokens = fdata.tokens.get().unwrap(); let ast = fdata.ast.get().unwrap(); let extra_data = fdata.extra_data.get().unwrap(); - let SubNodes(i, nstmts) = ast.sub()[ast.len() - 1]; + let SubNodes(beg, nstmts) = ast.sub()[block as usize]; + + let mut errors = Vec::new(); + + for i in beg..beg + nstmts { + let multi_def_bind = extra_data[i as usize]; + + if ast.kind()[multi_def_bind as usize] + != AstType::MultiDefBind + { + continue; + } + + let def_idents = ast.sub()[multi_def_bind as usize].0; + let nidents = extra_data[def_idents as usize]; + + for j in 0..nidents { + let ident = + extra_data[(def_idents + 1 + j * 2) as usize]; + let span = tokens.view()[ident as usize]; + + /* Make string slice lifetime 'static */ + let view = unsafe { + &*(&fdata.buffer[span.0..span.1] as *const str) + }; + + let symid = state.interner.intern(view); + let sym = Symbol::default(); + + if let Some(mut sym) = + fdata.symtab.insert((scope, symid), sym) + { + sym.state = ResolutionState::Poisoned; + fdata.symtab.insert((scope, symid), sym); + + errors.push(OryxError::new( + span, + format!( + "symbol ‘{view}’ defined multiple times" + ), + )); + } + } - for j in 0..nstmts { - let node = extra_data[(i + j) as usize]; - let fdata = fdata.clone(); - state.push_job( + state.job_push( &queue, - Job::ResolveDef { - file, - fdata, - node, - scope: ScopeId::GLOBAL, - }, + state.job_new(JobType::ResolveDefBind { + fdata: fdata.clone(), + scope, + node: multi_def_bind, + }), ); } + + emit_errors(&fdata, errors); }, - Job::ResolveDef { - file: _, - fdata: _, - scope: _, - node: _, - } => { - todo!(); - }, + + JobType::ResolveDefBind { fdata, scope, node } => {}, + } + + if let Some((_, deps)) = state.deps.remove(&job.id) { + for j in deps { + state.job_push(&queue, j); + } } if state.njobs.fetch_sub(1, Ordering::Release) == 1 { diff --git a/oryxc/src/intern.rs b/oryxc/src/intern.rs index 96b5fa9..ea131a2 100644 --- a/oryxc/src/intern.rs +++ b/oryxc/src/intern.rs @@ -42,6 +42,11 @@ impl PartialEq for UniStr<'_> { /* Most code is ASCII, and normalization is obviously a lot * slower than not normalizing, so we try to only normalize when * we have to */ + + if self.0.is_ascii() && other.0.is_ascii() { + return self.0 == other.0; + } + return match ( unicode_normalization::is_nfkd_quick(self.0.chars()) == IsNormalized::Yes, @@ -50,10 +55,10 @@ impl PartialEq for UniStr<'_> { ) { (true, true) => self.0 == other.0, (true, false) => { - self.0.bytes().map(|b| b as char).eq(other.0.nfkd()) + self.0.chars().map(|b| b as char).eq(other.0.nfkd()) }, (false, true) => { - self.0.nfkd().eq(other.0.bytes().map(|b| b as char)) + self.0.nfkd().eq(other.0.chars().map(|b| b as char)) }, (false, false) => self.0.nfkd().eq(other.0.nfkd()), }; @@ -72,7 +77,7 @@ impl<'a> Interner<'a> { return self.store[key.0 as usize]; } - pub fn intern(&mut self, value: &'a str) -> SymbolId { + pub fn intern(&self, value: &'a str) -> SymbolId { if let Some(key) = self.map.get(&UniStr(value)) { return *key; } @@ -88,6 +93,7 @@ fn test_unistr_eq() { assert_eq!(UniStr("fishi"), UniStr("fishi")); assert_eq!(UniStr("fishi"), UniStr("fishᵢ")); assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ")); + assert_eq!(UniStr("corné"), UniStr("corné")); } #[test] @@ -98,6 +104,7 @@ fn test_unistr_hash() { (UniStr("fishi"), UniStr("fishi")), (UniStr("fishi"), UniStr("fishᵢ")), (UniStr("fishᵢ"), UniStr("fishᵢ")), + (UniStr("corné"), UniStr("corné")), ] { let mut hashl = DefaultHasher::new(); let mut hashr = DefaultHasher::new(); diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs index c118ac5..109aed3 100644 --- a/oryxc/src/main.rs +++ b/oryxc/src/main.rs @@ -2,12 +2,11 @@ mod compiler; mod errors; -// mod intern; +mod intern; mod lexer; mod parser; mod prelude; mod size; -mod symtab; mod unicode; use std::ffi::OsString; diff --git a/oryxc/src/prelude.rs b/oryxc/src/prelude.rs index 9c91116..4fe1413 100644 --- a/oryxc/src/prelude.rs +++ b/oryxc/src/prelude.rs @@ -11,6 +11,34 @@ pub struct FileId(pub usize); #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct SymbolId(pub u32); +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ScopeId(pub usize); + +impl ScopeId { + pub const GLOBAL: Self = Self(0); +} + +#[derive(Default)] +pub enum ResolutionState { + #[default] + Unresolved, + Resolving, + Resolved, + Poisoned, +} + +#[derive(Default)] +pub struct Symbol { + pub state: ResolutionState, + pub r#type: u32, +} + +pub enum OryxType { + Integer { bits: usize, signed: bool }, + Pointer { base: u32 }, + Function { args: Vec<u32>, rets: Vec<u32> }, +} + #[derive(Clone, Copy)] pub struct SubNodes(pub u32, pub u32); diff --git a/oryxc/src/symtab.rs b/oryxc/src/symtab.rs deleted file mode 100644 index b65eee0..0000000 --- a/oryxc/src/symtab.rs +++ /dev/null @@ -1,42 +0,0 @@ -#![allow(dead_code)] - -use boxcar; -use dashmap::DashMap; - -use crate::prelude::*; - -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub struct ScopeId(pub u32); - -impl ScopeId { - pub const GLOBAL: Self = Self(0); - pub const INVAL: Self = Self(u32::MAX); -} - -#[derive(Clone, Copy, Debug)] -pub struct Scope { - pub parent: ScopeId, -} - -pub struct SymbolVal {} - -pub struct SymbolTable { - scopes: boxcar::Vec<Scope>, - symbols: DashMap<(ScopeId, SymbolId), SymbolVal>, -} - -impl SymbolTable { - pub fn new() -> Self { - return Self { - /* Initialize with the global scope */ - scopes: boxcar::vec![Scope { - parent: ScopeId::INVAL, - }], - symbols: DashMap::new(), - }; - } - - pub fn insert(&self, scope: ScopeId, symbol: SymbolId, value: SymbolVal) { - self.symbols.insert((scope, symbol), value); - } -} @@ -27,6 +27,8 @@ def main′ = func() { }); }; +def r, g, b u8 = 1, 2, 3; + def some_func = func(n u32) u32 { return n * 2; }; /* def MY_FLOAT = union { f f64; n u64; } { n = 0x482DEF }.f */ |