summaryrefslogtreecommitdiff
path: root/oryxc/src
diff options
context:
space:
mode:
Diffstat (limited to 'oryxc/src')
-rw-r--r--oryxc/src/compiler.rs32
-rw-r--r--oryxc/src/intern.rs116
-rw-r--r--oryxc/src/main.rs1
-rw-r--r--oryxc/src/prelude.rs12
4 files changed, 128 insertions, 33 deletions
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs
index f630166..d8bdfa2 100644
--- a/oryxc/src/compiler.rs
+++ b/oryxc/src/compiler.rs
@@ -25,6 +25,7 @@ use crossbeam_deque::{
Stealer,
Worker,
};
+use dashmap::DashMap;
use soa_rs::Soa;
use crate::errors::OryxError;
@@ -44,6 +45,7 @@ pub struct FileData {
pub tokens: OnceLock<Soa<Token>>,
pub ast: OnceLock<Soa<AstNode>>,
pub extra_data: OnceLock<Vec<u32>>,
+ pub scopes: Vec<DashMap<SymbolId, SymbolVal>>,
}
impl FileData {
@@ -65,6 +67,7 @@ impl FileData {
tokens: OnceLock::new(),
ast: OnceLock::new(),
extra_data: OnceLock::new(),
+ scopes: Vec::new(),
})
}
}
@@ -82,7 +85,8 @@ pub enum Job {
ResolveDef {
file: FileId,
fdata: Arc<FileData>,
- node: NodeId,
+ scope: ScopeId,
+ node: u32,
},
}
@@ -171,8 +175,8 @@ where
handles.iter().map(|h| h.thread().clone()).collect();
let _ = state.worker_threads.set(worker_threads);
- // if work completes before we get here, wake them so they can observe the
- // termination condition and exit.
+ // if work completes before we get here, wake them so they can observe
+ // the termination condition and exit.
state.wake_all();
for h in handles {
@@ -195,7 +199,8 @@ fn worker_loop(
}
let Some(job) = find_task(&queue, &state.globalq, &stealers) else {
- // no work available; check termination condition before parking to avoid missed wakeups
+ // no work available; check termination condition before parking to
+ // avoid missed wakeups
if state.njobs.load(Ordering::Acquire) == 0 {
break;
}
@@ -245,15 +250,25 @@ fn worker_loop(
let SubNodes(i, nstmts) = ast.sub()[ast.len() - 1];
for j in 0..nstmts {
- let node = NodeId(extra_data[(i + j) as usize]);
+ let node = extra_data[(i + j) as usize];
let fdata = fdata.clone();
state.push_job(
&queue,
- Job::ResolveDef { file, fdata, node },
+ Job::ResolveDef {
+ file,
+ fdata,
+ node,
+ scope: ScopeId::GLOBAL,
+ },
);
}
},
- Job::ResolveDef { file, fdata, node } => {
+ Job::ResolveDef {
+ file,
+ fdata,
+ scope,
+ node,
+ } => {
eprintln!("Resolving def at node index {node:?}");
},
}
@@ -263,7 +278,8 @@ fn worker_loop(
// condition and exit.
state.wake_all();
- // break here to avoid unnecessary steal attempts after work is done.
+ // break here to avoid unnecessary steal attempts after work is
+ // done.
break;
}
}
diff --git a/oryxc/src/intern.rs b/oryxc/src/intern.rs
index 3ab91cf..b0d1a00 100644
--- a/oryxc/src/intern.rs
+++ b/oryxc/src/intern.rs
@@ -1,45 +1,61 @@
-use std::hash;
+use std::hash::{
+ Hash,
+ Hasher,
+};
-use dashmap;
-use icu::normalizer;
+use dashmap::DashMap;
+use unicode_normalization::{
+ self,
+ IsNormalized,
+ UnicodeNormalization,
+};
-#[repr(transparent)]
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub struct Key(u32);
+// use icu::normalizer::DecomposingNormalizer;
+use crate::prelude::*;
pub struct Interner<'a> {
- map: dashmap::DashMap<UniStr<'a>, Key>,
+ map: DashMap<UniStr<'a>, SymbolId>,
store: Vec<&'a str>,
}
-#[derive(Eq)]
+#[derive(Debug, Eq)]
pub struct UniStr<'a>(pub &'a str);
-impl hash::Hash for UniStr<'_> {
- fn hash<H: hash::Hasher>(&self, state: &mut H) {
+impl Hash for UniStr<'_> {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ /* In the ASCII common case we use .bytes() to avoid decoding
+ * every codepoint (a no-op in ASCII) */
if self.0.is_ascii() {
- self.0.chars().for_each(|c| c.hash(state));
+ self.0.bytes().for_each(|c| (c as char).hash(state));
+ } else if unicode_normalization::is_nfkd_quick(self.0.chars())
+ == IsNormalized::Yes
+ {
+ self.0.chars().for_each(|c| c.hash(state));
} else {
- let nfkd = normalizer::DecomposingNormalizer::new_nfkd();
- nfkd.normalize_iter(self.0.chars()).for_each(|c| c.hash(state));
+ self.0.nfkd().for_each(|c| c.hash(state));
}
}
}
impl PartialEq for UniStr<'_> {
fn eq(&self, other: &Self) -> bool {
- let nfkd = normalizer::DecomposingNormalizer::new_nfkd();
- return match (self.0.is_ascii(), other.0.is_ascii()) {
+ /* Most code is ASCII, and normalization is obviously a lot
+ * slower than not normalizing, so we try to only normalize when
+ * we have to */
+ return match (
+ unicode_normalization::is_nfkd_quick(self.0.chars())
+ == IsNormalized::Yes,
+ unicode_normalization::is_nfkd_quick(other.0.chars())
+ == IsNormalized::Yes,
+ ) {
(true, true) => self.0 == other.0,
(true, false) => {
- self.0.chars().eq(nfkd.normalize_iter(other.0.chars()))
+ self.0.bytes().map(|b| b as char).eq(other.0.nfkd())
},
(false, true) => {
- other.0.chars().eq(nfkd.normalize_iter(self.0.chars()))
+ self.0.nfkd().eq(other.0.bytes().map(|b| b as char))
},
- (false, false) => nfkd
- .normalize_iter(self.0.chars())
- .eq(nfkd.normalize_iter(other.0.chars())),
+ (false, false) => self.0.nfkd().eq(other.0.nfkd()),
};
}
}
@@ -47,22 +63,74 @@ impl PartialEq for UniStr<'_> {
impl<'a> Interner<'a> {
pub fn new() -> Self {
return Interner {
- map: dashmap::DashMap::new(),
+ map: DashMap::new(),
store: Vec::new(),
};
}
- pub fn get(&self, key: Key) -> &str {
+ pub fn get(&self, key: SymbolId) -> &str {
return self.store[key.0 as usize];
}
- pub fn intern(&mut self, value: &'a str) -> Key {
+ pub fn intern(&mut self, value: &'a str) -> SymbolId {
if let Some(key) = self.map.get(&UniStr(value)) {
return *key;
}
- let key = Key(self.store.len() as u32);
+ let key = SymbolId(self.store.len() as u32);
self.map.insert(UniStr(value), key);
self.store.push(value);
return key;
}
}
+
+#[test]
+fn test_unistr_eq() {
+ assert_eq!(UniStr("fishi"), UniStr("fishᵢ"));
+ assert_eq!(UniStr("fishi"), UniStr("fishi"));
+ assert_eq!(UniStr("fishi"), UniStr("fishᵢ"));
+ assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ"));
+}
+
+#[test]
+fn test_unistr_hash() {
+ use std::hash::DefaultHasher;
+ for (lhs, rhs) in &[
+ (UniStr("fishi"), UniStr("fishᵢ")),
+ (UniStr("fishi"), UniStr("fishi")),
+ (UniStr("fishi"), UniStr("fishᵢ")),
+ (UniStr("fishᵢ"), UniStr("fishᵢ")),
+ ] {
+ let mut hashl = DefaultHasher::new();
+ let mut hashr = DefaultHasher::new();
+ lhs.hash(&mut hashl);
+ rhs.hash(&mut hashr);
+ assert_eq!(hashl.finish(), hashr.finish());
+ }
+}
+
+#[test]
+fn test_interner_intern() {
+ let xs = ["fishi", "fishi", "fishᵢ"];
+ let y = "andy";
+
+ let mut interner = Interner::new();
+ for i in 0..xs.len() {
+ for j in i..xs.len() {
+ assert_eq!(interner.intern(xs[i]), interner.intern(xs[j]));
+ }
+ }
+ for i in 0..xs.len() {
+ assert_ne!(interner.intern(y), interner.intern(xs[i]));
+ }
+}
+
+#[test]
+fn test_interner_gets_first_inserted() {
+ let mut interner = Interner::new();
+ let xs = ["fishi", "fishi", "fishᵢ"];
+ let ys = xs.iter().map(|x| interner.intern(x)).collect::<Vec<_>>();
+
+ for i in 0..ys.len() {
+ assert_eq!(interner.get(ys[i]), xs[0]);
+ }
+}
diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs
index e8c552f..109aed3 100644
--- a/oryxc/src/main.rs
+++ b/oryxc/src/main.rs
@@ -2,6 +2,7 @@
mod compiler;
mod errors;
+mod intern;
mod lexer;
mod parser;
mod prelude;
diff --git a/oryxc/src/prelude.rs b/oryxc/src/prelude.rs
index 78e7597..b7e80c2 100644
--- a/oryxc/src/prelude.rs
+++ b/oryxc/src/prelude.rs
@@ -8,7 +8,17 @@ use std::fmt::{
pub struct FileId(pub usize);
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
-pub struct NodeId(pub u32);
+pub struct ScopeId(pub usize);
+
+impl ScopeId {
+ pub const GLOBAL: Self = Self(0);
+}
+
+#[repr(transparent)]
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct SymbolId(pub u32);
+
+pub struct SymbolVal {}
#[derive(Clone, Copy)]
pub struct SubNodes(pub u32, pub u32);