summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2026-03-05 21:46:09 +0100
committerThomas Voss <mail@thomasvoss.com> 2026-03-05 21:46:09 +0100
commit4f723801d751d520263ce0f14b2cf409f60ac77e (patch)
tree2139dea317cbf9662ebf15d60fe54d88781b1b86
parent723fb5031a1f59f8df5d0a0dbf5dc0a54420e15f (diff)
Continue work on types and static analysis
-rw-r--r--oryxc/src/compiler.rs180
-rw-r--r--oryxc/src/intern.rs13
-rw-r--r--oryxc/src/main.rs3
-rw-r--r--oryxc/src/prelude.rs28
-rw-r--r--oryxc/src/symtab.rs42
-rw-r--r--test.xy2
6 files changed, 186 insertions, 82 deletions
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs
index f1f25fc..a9119bc 100644
--- a/oryxc/src/compiler.rs
+++ b/oryxc/src/compiler.rs
@@ -19,19 +19,24 @@ use std::{
thread,
};
+use boxcar;
use crossbeam_deque::{
Injector,
Steal,
Stealer,
Worker,
};
+use dashmap::DashMap;
use soa_rs::Soa;
use crate::errors::OryxError;
+use crate::intern::Interner;
use crate::lexer::Token;
-use crate::parser::AstNode;
+use crate::parser::{
+ AstNode,
+ AstType,
+};
use crate::prelude::*;
-use crate::symtab::*;
use crate::{
Flags,
err,
@@ -46,7 +51,7 @@ pub struct FileData {
pub tokens: OnceLock<Soa<Token>>,
pub ast: OnceLock<Soa<AstNode>>,
pub extra_data: OnceLock<Vec<u32>>,
- pub symtab: SymbolTable,
+ pub symtab: DashMap<(ScopeId, SymbolId), Symbol>,
}
impl FileData {
@@ -68,13 +73,13 @@ impl FileData {
tokens: OnceLock::new(),
ast: OnceLock::new(),
extra_data: OnceLock::new(),
- symtab: SymbolTable::new(),
+ symtab: DashMap::new(),
})
}
}
#[allow(dead_code)]
-pub enum Job {
+pub enum JobType {
Lex {
file: FileId,
fdata: Arc<FileData>,
@@ -83,24 +88,43 @@ pub enum Job {
file: FileId,
fdata: Arc<FileData>,
},
- ResolveDef {
- file: FileId,
+ FindSymbolsInScope {
+ fdata: Arc<FileData>,
+ scope: ScopeId,
+ block: u32,
+ },
+ ResolveDefBind {
fdata: Arc<FileData>,
scope: ScopeId,
node: u32,
},
}
-pub struct CompilerState {
+pub struct Job {
+ id: usize,
+ kind: JobType,
+}
+
+pub struct CompilerState<'a> {
#[allow(dead_code)]
- pub files: Vec<Arc<FileData>>,
pub globalq: Injector<Job>,
pub njobs: AtomicUsize,
pub flags: Flags,
pub worker_threads: OnceLock<Box<[thread::Thread]>>,
+ /* Files needs to be after interner, so that the files get dropped
+ * after the interner. This is because the interner holds references
+ * to substrings of file buffers, so we want to control the drop
+ * order to avoid any potential undefined behaviour. */
+
+ interner: Interner<'a>,
+ files: Vec<Arc<FileData>>,
+
+ deps: DashMap<usize, boxcar::Vec<Job>>,
+ next_id: AtomicUsize,
+ types: boxcar::Vec<OryxType>,
}
-impl CompilerState {
+impl<'a> CompilerState<'a> {
/// Unpark all worker threads.
fn wake_all(&self) {
if let Some(threads) = self.worker_threads.get() {
@@ -110,8 +134,13 @@ impl CompilerState {
}
}
+ fn job_new(&self, kind: JobType) -> Job {
+ let id = self.next_id.fetch_add(1, Ordering::Relaxed);
+ return Job { id, kind };
+ }
+
/// Push a job onto a worker's local queue and wake all threads.
- fn push_job(&self, queue: &Worker<Job>, job: Job) {
+ fn job_push(&self, queue: &Worker<Job>, job: Job) {
self.njobs.fetch_add(1, Ordering::Relaxed);
queue.push(job);
self.wake_all();
@@ -137,7 +166,10 @@ where
FileData::new(path).unwrap_or_else(|e| err!(e, "{}", display)),
);
files.push(Arc::clone(&fdata));
- initial_jobs.push(Job::Lex { file: id, fdata });
+ initial_jobs.push(Job {
+ id: i,
+ kind: JobType::Lex { file: id, fdata },
+ });
}
let njobs = initial_jobs.len();
@@ -147,6 +179,23 @@ where
njobs: AtomicUsize::new(njobs),
flags,
worker_threads: OnceLock::new(),
+ interner: Interner::new(),
+ deps: DashMap::new(),
+ next_id: AtomicUsize::new(njobs),
+ /* Temporary solution */
+ types: boxcar::vec![
+ OryxType::Integer /* int */ { bits: 64, signed: true },
+ OryxType::Integer /* i8 */ { bits: 8, signed: true },
+ OryxType::Integer /* i16 */ { bits: 16, signed: true },
+ OryxType::Integer /* i32 */ { bits: 32, signed: true },
+ OryxType::Integer /* i64 */ { bits: 64, signed: true },
+ OryxType::Integer /* i128 */ { bits: 128, signed: true },
+ OryxType::Integer /* u8 */ { bits: 8, signed: false },
+ OryxType::Integer /* u16 */ { bits: 16, signed: false },
+ OryxType::Integer /* u32 */ { bits: 32, signed: false },
+ OryxType::Integer /* u64 */ { bits: 64, signed: false },
+ OryxType::Integer /* u128 */ { bits: 128, signed: false },
+ ],
});
for job in initial_jobs {
@@ -209,8 +258,8 @@ fn worker_loop(
continue;
};
- match job {
- Job::Lex { file, fdata } => {
+ match job.kind {
+ JobType::Lex { file, fdata } => {
let tokens =
lexer::tokenize(&fdata.buffer).unwrap_or_else(|e| {
emit_errors(&fdata, once(e));
@@ -225,9 +274,13 @@ fn worker_loop(
}
fdata.tokens.set(tokens).unwrap();
- state.push_job(&queue, Job::Parse { file, fdata });
+ state.job_push(
+ &queue,
+ state.job_new(JobType::Parse { file, fdata }),
+ );
},
- Job::Parse { file, fdata } => {
+
+ JobType::Parse { file, fdata } => {
let (ast, extra_data) = parser::parse(
fdata.tokens.get().unwrap(),
)
@@ -243,35 +296,92 @@ fn worker_loop(
}
}
+ let root = (ast.len() - 1) as u32;
fdata.ast.set(ast).unwrap();
fdata.extra_data.set(extra_data).unwrap();
+ state.job_push(
+ &queue,
+ state.job_new(JobType::FindSymbolsInScope {
+ fdata,
+ scope: ScopeId::GLOBAL,
+ block: root,
+ }),
+ );
+ },
+
+ JobType::FindSymbolsInScope {
+ fdata,
+ scope,
+ block,
+ } => {
+ let tokens = fdata.tokens.get().unwrap();
let ast = fdata.ast.get().unwrap();
let extra_data = fdata.extra_data.get().unwrap();
- let SubNodes(i, nstmts) = ast.sub()[ast.len() - 1];
+ let SubNodes(beg, nstmts) = ast.sub()[block as usize];
+
+ let mut errors = Vec::new();
+
+ for i in beg..beg + nstmts {
+ let multi_def_bind = extra_data[i as usize];
+
+ if ast.kind()[multi_def_bind as usize]
+ != AstType::MultiDefBind
+ {
+ continue;
+ }
+
+ let def_idents = ast.sub()[multi_def_bind as usize].0;
+ let nidents = extra_data[def_idents as usize];
+
+ for j in 0..nidents {
+ let ident =
+ extra_data[(def_idents + 1 + j * 2) as usize];
+ let span = tokens.view()[ident as usize];
+
+ /* Make string slice lifetime 'static */
+ let view = unsafe {
+ &*(&fdata.buffer[span.0..span.1] as *const str)
+ };
+
+ let symid = state.interner.intern(view);
+ let sym = Symbol::default();
+
+ if let Some(mut sym) =
+ fdata.symtab.insert((scope, symid), sym)
+ {
+ sym.state = ResolutionState::Poisoned;
+ fdata.symtab.insert((scope, symid), sym);
+
+ errors.push(OryxError::new(
+ span,
+ format!(
+ "symbol ‘{view}’ defined multiple times"
+ ),
+ ));
+ }
+ }
- for j in 0..nstmts {
- let node = extra_data[(i + j) as usize];
- let fdata = fdata.clone();
- state.push_job(
+ state.job_push(
&queue,
- Job::ResolveDef {
- file,
- fdata,
- node,
- scope: ScopeId::GLOBAL,
- },
+ state.job_new(JobType::ResolveDefBind {
+ fdata: fdata.clone(),
+ scope,
+ node: multi_def_bind,
+ }),
);
}
+
+ emit_errors(&fdata, errors);
},
- Job::ResolveDef {
- file: _,
- fdata: _,
- scope: _,
- node: _,
- } => {
- todo!();
- },
+
+ JobType::ResolveDefBind { fdata, scope, node } => {},
+ }
+
+ if let Some((_, deps)) = state.deps.remove(&job.id) {
+ for j in deps {
+ state.job_push(&queue, j);
+ }
}
if state.njobs.fetch_sub(1, Ordering::Release) == 1 {
diff --git a/oryxc/src/intern.rs b/oryxc/src/intern.rs
index 96b5fa9..ea131a2 100644
--- a/oryxc/src/intern.rs
+++ b/oryxc/src/intern.rs
@@ -42,6 +42,11 @@ impl PartialEq for UniStr<'_> {
/* Most code is ASCII, and normalization is obviously a lot
* slower than not normalizing, so we try to only normalize when
* we have to */
+
+ if self.0.is_ascii() && other.0.is_ascii() {
+ return self.0 == other.0;
+ }
+
return match (
unicode_normalization::is_nfkd_quick(self.0.chars())
== IsNormalized::Yes,
@@ -50,10 +55,10 @@ impl PartialEq for UniStr<'_> {
) {
(true, true) => self.0 == other.0,
(true, false) => {
- self.0.bytes().map(|b| b as char).eq(other.0.nfkd())
+ self.0.chars().map(|b| b as char).eq(other.0.nfkd())
},
(false, true) => {
- self.0.nfkd().eq(other.0.bytes().map(|b| b as char))
+ self.0.nfkd().eq(other.0.chars().map(|b| b as char))
},
(false, false) => self.0.nfkd().eq(other.0.nfkd()),
};
@@ -72,7 +77,7 @@ impl<'a> Interner<'a> {
return self.store[key.0 as usize];
}
- pub fn intern(&mut self, value: &'a str) -> SymbolId {
+ pub fn intern(&self, value: &'a str) -> SymbolId {
if let Some(key) = self.map.get(&UniStr(value)) {
return *key;
}
@@ -88,6 +93,7 @@ fn test_unistr_eq() {
assert_eq!(UniStr("fishi"), UniStr("fishi"));
assert_eq!(UniStr("fishi"), UniStr("fishᵢ"));
assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ"));
+ assert_eq!(UniStr("corné"), UniStr("corné"));
}
#[test]
@@ -98,6 +104,7 @@ fn test_unistr_hash() {
(UniStr("fishi"), UniStr("fishi")),
(UniStr("fishi"), UniStr("fishᵢ")),
(UniStr("fishᵢ"), UniStr("fishᵢ")),
+ (UniStr("corné"), UniStr("corné")),
] {
let mut hashl = DefaultHasher::new();
let mut hashr = DefaultHasher::new();
diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs
index c118ac5..109aed3 100644
--- a/oryxc/src/main.rs
+++ b/oryxc/src/main.rs
@@ -2,12 +2,11 @@
mod compiler;
mod errors;
-// mod intern;
+mod intern;
mod lexer;
mod parser;
mod prelude;
mod size;
-mod symtab;
mod unicode;
use std::ffi::OsString;
diff --git a/oryxc/src/prelude.rs b/oryxc/src/prelude.rs
index 9c91116..4fe1413 100644
--- a/oryxc/src/prelude.rs
+++ b/oryxc/src/prelude.rs
@@ -11,6 +11,34 @@ pub struct FileId(pub usize);
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub struct SymbolId(pub u32);
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct ScopeId(pub usize);
+
+impl ScopeId {
+ pub const GLOBAL: Self = Self(0);
+}
+
+#[derive(Default)]
+pub enum ResolutionState {
+ #[default]
+ Unresolved,
+ Resolving,
+ Resolved,
+ Poisoned,
+}
+
+#[derive(Default)]
+pub struct Symbol {
+ pub state: ResolutionState,
+ pub r#type: u32,
+}
+
+pub enum OryxType {
+ Integer { bits: usize, signed: bool },
+ Pointer { base: u32 },
+ Function { args: Vec<u32>, rets: Vec<u32> },
+}
+
#[derive(Clone, Copy)]
pub struct SubNodes(pub u32, pub u32);
diff --git a/oryxc/src/symtab.rs b/oryxc/src/symtab.rs
deleted file mode 100644
index b65eee0..0000000
--- a/oryxc/src/symtab.rs
+++ /dev/null
@@ -1,42 +0,0 @@
-#![allow(dead_code)]
-
-use boxcar;
-use dashmap::DashMap;
-
-use crate::prelude::*;
-
-#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
-pub struct ScopeId(pub u32);
-
-impl ScopeId {
- pub const GLOBAL: Self = Self(0);
- pub const INVAL: Self = Self(u32::MAX);
-}
-
-#[derive(Clone, Copy, Debug)]
-pub struct Scope {
- pub parent: ScopeId,
-}
-
-pub struct SymbolVal {}
-
-pub struct SymbolTable {
- scopes: boxcar::Vec<Scope>,
- symbols: DashMap<(ScopeId, SymbolId), SymbolVal>,
-}
-
-impl SymbolTable {
- pub fn new() -> Self {
- return Self {
- /* Initialize with the global scope */
- scopes: boxcar::vec![Scope {
- parent: ScopeId::INVAL,
- }],
- symbols: DashMap::new(),
- };
- }
-
- pub fn insert(&self, scope: ScopeId, symbol: SymbolId, value: SymbolVal) {
- self.symbols.insert((scope, symbol), value);
- }
-}
diff --git a/test.xy b/test.xy
index c49d138..a908c0e 100644
--- a/test.xy
+++ b/test.xy
@@ -27,6 +27,8 @@ def main′ = func() {
});
};
+def r, g, b u8 = 1, 2, 3;
+
def some_func = func(n u32) u32 { return n * 2; };
/* def MY_FLOAT = union { f f64; n u64; } { n = 0x482DEF }.f */