Begin working on symbol resolution

author: Thomas Voss <mail@thomasvoss.com> 2026-03-04 23:21:07 +0100
committer: Thomas Voss <mail@thomasvoss.com> 2026-03-04 23:21:07 +0100
commit: 82c14f030b36938cb10c1c8f8e880d0e0acaadc2 (patch)
tree: 50d399aebb9d374c867d86482da129fce1b1c4e7 /oryxc/src
parent: f1a862a334efb1aa1f1cc2c3f30dcbffeaa9b4e3 (diff)
4 files changed, 128 insertions, 33 deletions
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs
index f630166..d8bdfa2 100644
--- a/oryxc/src/compiler.rs
+++ b/oryxc/src/compiler.rs
@@ -25,6 +25,7 @@ use crossbeam_deque::{
 	Stealer,
 	Worker,
 };
+use dashmap::DashMap;
 use soa_rs::Soa;
 
 use crate::errors::OryxError;
@@ -44,6 +45,7 @@ pub struct FileData {
 	pub tokens:     OnceLock<Soa<Token>>,
 	pub ast:        OnceLock<Soa<AstNode>>,
 	pub extra_data: OnceLock<Vec<u32>>,
+	pub scopes:     Vec<DashMap<SymbolId, SymbolVal>>,
 }
 
 impl FileData {
@@ -65,6 +67,7 @@ impl FileData {
 			tokens: OnceLock::new(),
 			ast: OnceLock::new(),
 			extra_data: OnceLock::new(),
+			scopes: Vec::new(),
 		})
 	}
 }
@@ -82,7 +85,8 @@ pub enum Job {
 	ResolveDef {
 		file:  FileId,
 		fdata: Arc<FileData>,
-		node:  NodeId,
+		scope: ScopeId,
+		node:  u32,
 	},
 }
 
@@ -171,8 +175,8 @@ where
 		handles.iter().map(|h| h.thread().clone()).collect();
 	let _ = state.worker_threads.set(worker_threads);
 
-	// if work completes before we get here, wake them so they can observe the
-	// termination condition and exit.
+	// if work completes before we get here, wake them so they can observe
+	// the termination condition and exit.
 	state.wake_all();
 
 	for h in handles {
@@ -195,7 +199,8 @@ fn worker_loop(
 		}
 
 		let Some(job) = find_task(&queue, &state.globalq, &stealers) else {
-			// no work available; check termination condition before parking to avoid missed wakeups
+			// no work available; check termination condition before parking to
+			// avoid missed wakeups
 			if state.njobs.load(Ordering::Acquire) == 0 {
 				break;
 			}
@@ -245,15 +250,25 @@ fn worker_loop(
 				let SubNodes(i, nstmts) = ast.sub()[ast.len() - 1];
 
 				for j in 0..nstmts {
-					let node = NodeId(extra_data[(i + j) as usize]);
+					let node = extra_data[(i + j) as usize];
 					let fdata = fdata.clone();
 					state.push_job(
 						&queue,
-						Job::ResolveDef { file, fdata, node },
+						Job::ResolveDef {
+							file,
+							fdata,
+							node,
+							scope: ScopeId::GLOBAL,
+						},
 					);
 				}
 			},
-			Job::ResolveDef { file, fdata, node } => {
+			Job::ResolveDef {
+				file,
+				fdata,
+				scope,
+				node,
+			} => {
 				eprintln!("Resolving def at node index {node:?}");
 			},
 		}
@@ -263,7 +278,8 @@ fn worker_loop(
 			// condition and exit.
 			state.wake_all();
 
-			// break here to avoid unnecessary steal attempts after work is done.
+			// break here to avoid unnecessary steal attempts after work is
+			// done.
 			break;
 		}
 	}
diff --git a/oryxc/src/intern.rs b/oryxc/src/intern.rs
index 3ab91cf..b0d1a00 100644
--- a/oryxc/src/intern.rs
+++ b/oryxc/src/intern.rs
@@ -1,45 +1,61 @@
-use std::hash;
+use std::hash::{
+	Hash,
+	Hasher,
+};
 
-use dashmap;
-use icu::normalizer;
+use dashmap::DashMap;
+use unicode_normalization::{
+	self,
+	IsNormalized,
+	UnicodeNormalization,
+};
 
-#[repr(transparent)]
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub struct Key(u32);
+// use icu::normalizer::DecomposingNormalizer;
+use crate::prelude::*;
 
 pub struct Interner<'a> {
-	map:   dashmap::DashMap<UniStr<'a>, Key>,
+	map:   DashMap<UniStr<'a>, SymbolId>,
 	store: Vec<&'a str>,
 }
 
-#[derive(Eq)]
+#[derive(Debug, Eq)]
 pub struct UniStr<'a>(pub &'a str);
 
-impl hash::Hash for UniStr<'_> {
-	fn hash<H: hash::Hasher>(&self, state: &mut H) {
+impl Hash for UniStr<'_> {
+	fn hash<H: Hasher>(&self, state: &mut H) {
+		/* In the ASCII common case we use .bytes() to avoid decoding
+		 * every codepoint (a no-op in ASCII) */
 		if self.0.is_ascii() {
-            self.0.chars().for_each(|c| c.hash(state));
+			self.0.bytes().for_each(|c| (c as char).hash(state));
+		} else if unicode_normalization::is_nfkd_quick(self.0.chars())
+			== IsNormalized::Yes
+		{
+			self.0.chars().for_each(|c| c.hash(state));
 		} else {
-			let nfkd = normalizer::DecomposingNormalizer::new_nfkd();
-			nfkd.normalize_iter(self.0.chars()).for_each(|c| c.hash(state));
+			self.0.nfkd().for_each(|c| c.hash(state));
 		}
 	}
 }
 
 impl PartialEq for UniStr<'_> {
 	fn eq(&self, other: &Self) -> bool {
-		let nfkd = normalizer::DecomposingNormalizer::new_nfkd();
-		return match (self.0.is_ascii(), other.0.is_ascii()) {
+		/* Most code is ASCII, and normalization is obviously a lot
+		 * slower than not normalizing, so we try to only normalize when
+		 * we have to */
+		return match (
+			unicode_normalization::is_nfkd_quick(self.0.chars())
+				== IsNormalized::Yes,
+			unicode_normalization::is_nfkd_quick(other.0.chars())
+				== IsNormalized::Yes,
+		) {
 			(true, true) => self.0 == other.0,
 			(true, false) => {
-				self.0.chars().eq(nfkd.normalize_iter(other.0.chars()))
+				self.0.bytes().map(|b| b as char).eq(other.0.nfkd())
 			},
 			(false, true) => {
-				other.0.chars().eq(nfkd.normalize_iter(self.0.chars()))
+				self.0.nfkd().eq(other.0.bytes().map(|b| b as char))
 			},
-			(false, false) => nfkd
-				.normalize_iter(self.0.chars())
-				.eq(nfkd.normalize_iter(other.0.chars())),
+			(false, false) => self.0.nfkd().eq(other.0.nfkd()),
 		};
 	}
 }
@@ -47,22 +63,74 @@ impl PartialEq for UniStr<'_> {
 impl<'a> Interner<'a> {
 	pub fn new() -> Self {
 		return Interner {
-			map:   dashmap::DashMap::new(),
+			map:   DashMap::new(),
 			store: Vec::new(),
 		};
 	}
 
-	pub fn get(&self, key: Key) -> &str {
+	pub fn get(&self, key: SymbolId) -> &str {
 		return self.store[key.0 as usize];
 	}
 
-	pub fn intern(&mut self, value: &'a str) -> Key {
+	pub fn intern(&mut self, value: &'a str) -> SymbolId {
 		if let Some(key) = self.map.get(&UniStr(value)) {
 			return *key;
 		}
-		let key = Key(self.store.len() as u32);
+		let key = SymbolId(self.store.len() as u32);
 		self.map.insert(UniStr(value), key);
 		self.store.push(value);
 		return key;
 	}
 }
+
+#[test]
+fn test_unistr_eq() {
+	assert_eq!(UniStr("fishi"), UniStr("ﬁshᵢ"));
+	assert_eq!(UniStr("fishi"), UniStr("fishi"));
+	assert_eq!(UniStr("ﬁshi"), UniStr("fishᵢ"));
+	assert_eq!(UniStr("ﬁshᵢ"), UniStr("ﬁshᵢ"));
+}
+
+#[test]
+fn test_unistr_hash() {
+	use std::hash::DefaultHasher;
+	for (lhs, rhs) in &[
+		(UniStr("fishi"), UniStr("ﬁshᵢ")),
+		(UniStr("fishi"), UniStr("fishi")),
+		(UniStr("ﬁshi"), UniStr("fishᵢ")),
+		(UniStr("ﬁshᵢ"), UniStr("ﬁshᵢ")),
+	] {
+		let mut hashl = DefaultHasher::new();
+		let mut hashr = DefaultHasher::new();
+		lhs.hash(&mut hashl);
+		rhs.hash(&mut hashr);
+		assert_eq!(hashl.finish(), hashr.finish());
+	}
+}
+
+#[test]
+fn test_interner_intern() {
+	let xs = ["ﬁshi", "fishi", "ﬁshᵢ"];
+	let y = "andy";
+
+	let mut interner = Interner::new();
+	for i in 0..xs.len() {
+		for j in i..xs.len() {
+			assert_eq!(interner.intern(xs[i]), interner.intern(xs[j]));
+		}
+	}
+	for i in 0..xs.len() {
+		assert_ne!(interner.intern(y), interner.intern(xs[i]));
+	}
+}
+
+#[test]
+fn test_interner_gets_first_inserted() {
+	let mut interner = Interner::new();
+	let xs = ["ﬁshi", "fishi", "ﬁshᵢ"];
+	let ys = xs.iter().map(|x| interner.intern(x)).collect::<Vec<_>>();
+
+	for i in 0..ys.len() {
+		assert_eq!(interner.get(ys[i]), xs[0]);
+	}
+}
diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs
index e8c552f..109aed3 100644
--- a/oryxc/src/main.rs
+++ b/oryxc/src/main.rs
@@ -2,6 +2,7 @@
 
 mod compiler;
 mod errors;
+mod intern;
 mod lexer;
 mod parser;
 mod prelude;
diff --git a/oryxc/src/prelude.rs b/oryxc/src/prelude.rs
index 78e7597..b7e80c2 100644
--- a/oryxc/src/prelude.rs
+++ b/oryxc/src/prelude.rs
@@ -8,7 +8,17 @@ use std::fmt::{
 pub struct FileId(pub usize);
 
 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
-pub struct NodeId(pub u32);
+pub struct ScopeId(pub usize);
+
+impl ScopeId {
+	pub const GLOBAL: Self = Self(0);
+}
+
+#[repr(transparent)]
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct SymbolId(pub u32);
+
+pub struct SymbolVal {}
 
 #[derive(Clone, Copy)]
 pub struct SubNodes(pub u32, pub u32);
author	Thomas Voss <mail@thomasvoss.com>	2026-03-04 23:21:07 +0100
committer	Thomas Voss <mail@thomasvoss.com>	2026-03-04 23:21:07 +0100
commit	82c14f030b36938cb10c1c8f8e880d0e0acaadc2 (patch)
tree	50d399aebb9d374c867d86482da129fce1b1c4e7 /oryxc/src
parent	f1a862a334efb1aa1f1cc2c3f30dcbffeaa9b4e3 (diff)