summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock115
-rw-r--r--oryxc/Cargo.toml5
-rw-r--r--oryxc/src/compiler.rs32
-rw-r--r--oryxc/src/intern.rs116
-rw-r--r--oryxc/src/main.rs1
-rw-r--r--oryxc/src/prelude.rs12
6 files changed, 245 insertions, 36 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 8bb0332..ee11eb5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -53,6 +53,18 @@ dependencies = [
]
[[package]]
+name = "bitflags"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
name = "clap"
version = "4.5.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -124,12 +136,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
+[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+
+[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -148,23 +180,59 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "803ec87c9cfb29b9d2633f20cba1f488db3fd53f2158b1024cbefb47ba05d413"
[[package]]
+name = "libc"
+version = "0.2.182"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
name = "once_cell_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
-name = "oryx"
+name = "oryxc"
version = "0.1.0"
dependencies = [
"clap",
"crossbeam-deque",
+ "dashmap",
"phf",
"soa-rs",
+ "unicode-normalization",
"unicode-width",
]
[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
name = "phf"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -226,6 +294,21 @@ dependencies = [
]
[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -261,6 +344,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
name = "soa-rs"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -298,12 +387,36 @@ dependencies = [
]
[[package]]
+name = "tinyvec"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
+name = "unicode-normalization"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/oryxc/Cargo.toml b/oryxc/Cargo.toml
index 3cd08fb..cd5dedf 100644
--- a/oryxc/Cargo.toml
+++ b/oryxc/Cargo.toml
@@ -4,10 +4,11 @@ version = "0.1.0"
edition = "2024"
[dependencies]
-crossbeam-deque = "0.8.6"
-# icu = { version = "2.1.1", features = ["compiled_data"] }
clap = { version = "4", features = ["derive"] }
+crossbeam-deque = "0.8.6"
+dashmap = "6.1.0"
# num-rational = "0.4.2"
phf = { version = "0.13.1", features = ["macros"] }
soa-rs = "0.9.1"
+unicode-normalization = "0.1.25"
unicode-width = "0.2.2"
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs
index f630166..d8bdfa2 100644
--- a/oryxc/src/compiler.rs
+++ b/oryxc/src/compiler.rs
@@ -25,6 +25,7 @@ use crossbeam_deque::{
Stealer,
Worker,
};
+use dashmap::DashMap;
use soa_rs::Soa;
use crate::errors::OryxError;
@@ -44,6 +45,7 @@ pub struct FileData {
pub tokens: OnceLock<Soa<Token>>,
pub ast: OnceLock<Soa<AstNode>>,
pub extra_data: OnceLock<Vec<u32>>,
+ pub scopes: Vec<DashMap<SymbolId, SymbolVal>>,
}
impl FileData {
@@ -65,6 +67,7 @@ impl FileData {
tokens: OnceLock::new(),
ast: OnceLock::new(),
extra_data: OnceLock::new(),
+ scopes: Vec::new(),
})
}
}
@@ -82,7 +85,8 @@ pub enum Job {
ResolveDef {
file: FileId,
fdata: Arc<FileData>,
- node: NodeId,
+ scope: ScopeId,
+ node: u32,
},
}
@@ -171,8 +175,8 @@ where
handles.iter().map(|h| h.thread().clone()).collect();
let _ = state.worker_threads.set(worker_threads);
- // if work completes before we get here, wake them so they can observe the
- // termination condition and exit.
+ // if work completes before we get here, wake them so they can observe
+ // the termination condition and exit.
state.wake_all();
for h in handles {
@@ -195,7 +199,8 @@ fn worker_loop(
}
let Some(job) = find_task(&queue, &state.globalq, &stealers) else {
- // no work available; check termination condition before parking to avoid missed wakeups
+ // no work available; check termination condition before parking to
+ // avoid missed wakeups
if state.njobs.load(Ordering::Acquire) == 0 {
break;
}
@@ -245,15 +250,25 @@ fn worker_loop(
let SubNodes(i, nstmts) = ast.sub()[ast.len() - 1];
for j in 0..nstmts {
- let node = NodeId(extra_data[(i + j) as usize]);
+ let node = extra_data[(i + j) as usize];
let fdata = fdata.clone();
state.push_job(
&queue,
- Job::ResolveDef { file, fdata, node },
+ Job::ResolveDef {
+ file,
+ fdata,
+ node,
+ scope: ScopeId::GLOBAL,
+ },
);
}
},
- Job::ResolveDef { file, fdata, node } => {
+ Job::ResolveDef {
+ file,
+ fdata,
+ scope,
+ node,
+ } => {
eprintln!("Resolving def at node index {node:?}");
},
}
@@ -263,7 +278,8 @@ fn worker_loop(
// condition and exit.
state.wake_all();
- // break here to avoid unnecessary steal attempts after work is done.
+ // break here to avoid unnecessary steal attempts after work is
+ // done.
break;
}
}
diff --git a/oryxc/src/intern.rs b/oryxc/src/intern.rs
index 3ab91cf..b0d1a00 100644
--- a/oryxc/src/intern.rs
+++ b/oryxc/src/intern.rs
@@ -1,45 +1,61 @@
-use std::hash;
+use std::hash::{
+ Hash,
+ Hasher,
+};
-use dashmap;
-use icu::normalizer;
+use dashmap::DashMap;
+use unicode_normalization::{
+ self,
+ IsNormalized,
+ UnicodeNormalization,
+};
-#[repr(transparent)]
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub struct Key(u32);
+// use icu::normalizer::DecomposingNormalizer;
+use crate::prelude::*;
pub struct Interner<'a> {
- map: dashmap::DashMap<UniStr<'a>, Key>,
+ map: DashMap<UniStr<'a>, SymbolId>,
store: Vec<&'a str>,
}
-#[derive(Eq)]
+#[derive(Debug, Eq)]
pub struct UniStr<'a>(pub &'a str);
-impl hash::Hash for UniStr<'_> {
- fn hash<H: hash::Hasher>(&self, state: &mut H) {
+impl Hash for UniStr<'_> {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ /* In the ASCII common case we use .bytes() to avoid decoding
+ * every codepoint (a no-op in ASCII) */
if self.0.is_ascii() {
- self.0.chars().for_each(|c| c.hash(state));
+ self.0.bytes().for_each(|c| (c as char).hash(state));
+ } else if unicode_normalization::is_nfkd_quick(self.0.chars())
+ == IsNormalized::Yes
+ {
+ self.0.chars().for_each(|c| c.hash(state));
} else {
- let nfkd = normalizer::DecomposingNormalizer::new_nfkd();
- nfkd.normalize_iter(self.0.chars()).for_each(|c| c.hash(state));
+ self.0.nfkd().for_each(|c| c.hash(state));
}
}
}
impl PartialEq for UniStr<'_> {
fn eq(&self, other: &Self) -> bool {
- let nfkd = normalizer::DecomposingNormalizer::new_nfkd();
- return match (self.0.is_ascii(), other.0.is_ascii()) {
+ /* Most code is ASCII, and normalization is obviously a lot
+ * slower than not normalizing, so we try to only normalize when
+ * we have to */
+ return match (
+ unicode_normalization::is_nfkd_quick(self.0.chars())
+ == IsNormalized::Yes,
+ unicode_normalization::is_nfkd_quick(other.0.chars())
+ == IsNormalized::Yes,
+ ) {
(true, true) => self.0 == other.0,
(true, false) => {
- self.0.chars().eq(nfkd.normalize_iter(other.0.chars()))
+ self.0.bytes().map(|b| b as char).eq(other.0.nfkd())
},
(false, true) => {
- other.0.chars().eq(nfkd.normalize_iter(self.0.chars()))
+ self.0.nfkd().eq(other.0.bytes().map(|b| b as char))
},
- (false, false) => nfkd
- .normalize_iter(self.0.chars())
- .eq(nfkd.normalize_iter(other.0.chars())),
+ (false, false) => self.0.nfkd().eq(other.0.nfkd()),
};
}
}
@@ -47,22 +63,74 @@ impl PartialEq for UniStr<'_> {
impl<'a> Interner<'a> {
pub fn new() -> Self {
return Interner {
- map: dashmap::DashMap::new(),
+ map: DashMap::new(),
store: Vec::new(),
};
}
- pub fn get(&self, key: Key) -> &str {
+ pub fn get(&self, key: SymbolId) -> &str {
return self.store[key.0 as usize];
}
- pub fn intern(&mut self, value: &'a str) -> Key {
+ pub fn intern(&mut self, value: &'a str) -> SymbolId {
if let Some(key) = self.map.get(&UniStr(value)) {
return *key;
}
- let key = Key(self.store.len() as u32);
+ let key = SymbolId(self.store.len() as u32);
self.map.insert(UniStr(value), key);
self.store.push(value);
return key;
}
}
+
+#[test]
+fn test_unistr_eq() {
+ assert_eq!(UniStr("fishi"), UniStr("fishᵢ"));
+ assert_eq!(UniStr("fishi"), UniStr("fishi"));
+ assert_eq!(UniStr("fishi"), UniStr("fishᵢ"));
+ assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ"));
+}
+
+#[test]
+fn test_unistr_hash() {
+ use std::hash::DefaultHasher;
+ for (lhs, rhs) in &[
+ (UniStr("fishi"), UniStr("fishᵢ")),
+ (UniStr("fishi"), UniStr("fishi")),
+ (UniStr("fishi"), UniStr("fishᵢ")),
+ (UniStr("fishᵢ"), UniStr("fishᵢ")),
+ ] {
+ let mut hashl = DefaultHasher::new();
+ let mut hashr = DefaultHasher::new();
+ lhs.hash(&mut hashl);
+ rhs.hash(&mut hashr);
+ assert_eq!(hashl.finish(), hashr.finish());
+ }
+}
+
+#[test]
+fn test_interner_intern() {
+ let xs = ["fishi", "fishi", "fishᵢ"];
+ let y = "andy";
+
+ let mut interner = Interner::new();
+ for i in 0..xs.len() {
+ for j in i..xs.len() {
+ assert_eq!(interner.intern(xs[i]), interner.intern(xs[j]));
+ }
+ }
+ for i in 0..xs.len() {
+ assert_ne!(interner.intern(y), interner.intern(xs[i]));
+ }
+}
+
+#[test]
+fn test_interner_gets_first_inserted() {
+ let mut interner = Interner::new();
+ let xs = ["fishi", "fishi", "fishᵢ"];
+ let ys = xs.iter().map(|x| interner.intern(x)).collect::<Vec<_>>();
+
+ for i in 0..ys.len() {
+ assert_eq!(interner.get(ys[i]), xs[0]);
+ }
+}
diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs
index e8c552f..109aed3 100644
--- a/oryxc/src/main.rs
+++ b/oryxc/src/main.rs
@@ -2,6 +2,7 @@
mod compiler;
mod errors;
+mod intern;
mod lexer;
mod parser;
mod prelude;
diff --git a/oryxc/src/prelude.rs b/oryxc/src/prelude.rs
index 78e7597..b7e80c2 100644
--- a/oryxc/src/prelude.rs
+++ b/oryxc/src/prelude.rs
@@ -8,7 +8,17 @@ use std::fmt::{
pub struct FileId(pub usize);
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
-pub struct NodeId(pub u32);
+pub struct ScopeId(pub usize);
+
+impl ScopeId {
+ pub const GLOBAL: Self = Self(0);
+}
+
+#[repr(transparent)]
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct SymbolId(pub u32);
+
+pub struct SymbolVal {}
#[derive(Clone, Copy)]
pub struct SubNodes(pub u32, pub u32);