diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2026-03-15 23:04:29 +0100 |
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2026-03-15 23:04:29 +0100 |
| commit | 5d570a58f9cb64478d2a2704c740e05f15a71235 (patch) | |
| tree | 5eeef8612e3cfa61a57e77c1cca0e088527a196e /oryxc/src/intern.rs | |
| parent | 58c294d0a5fedca250eb591ade22ab8b1a768cb5 (diff) | |
Make UniStr it’s own thing
Diffstat (limited to 'oryxc/src/intern.rs')
| -rw-r--r-- | oryxc/src/intern.rs | 166 |
1 files changed, 49 insertions, 117 deletions
diff --git a/oryxc/src/intern.rs b/oryxc/src/intern.rs index ea131a2..684a693 100644 --- a/oryxc/src/intern.rs +++ b/oryxc/src/intern.rs @@ -1,71 +1,24 @@ use std::hash::{ Hash, - Hasher, }; use boxcar; use dashmap::DashMap; -use unicode_normalization::{ - self, - IsNormalized, - UnicodeNormalization, -}; - -use crate::prelude::*; - -pub struct Interner<'a> { - map: DashMap<UniStr<'a>, SymbolId>, - store: boxcar::Vec<&'a str>, -} - -#[derive(Debug, Eq)] -pub struct UniStr<'a>(pub &'a str); - -impl Hash for UniStr<'_> { - fn hash<H: Hasher>(&self, state: &mut H) { - /* In the ASCII common case we use .bytes() to avoid decoding - * every codepoint (a no-op in ASCII) */ - if self.0.is_ascii() { - self.0.bytes().for_each(|c| (c as char).hash(state)); - } else if unicode_normalization::is_nfkd_quick(self.0.chars()) - == IsNormalized::Yes - { - self.0.chars().for_each(|c| c.hash(state)); - } else { - self.0.nfkd().for_each(|c| c.hash(state)); - } - } -} -impl PartialEq for UniStr<'_> { - fn eq(&self, other: &Self) -> bool { - /* Most code is ASCII, and normalization is obviously a lot - * slower than not normalizing, so we try to only normalize when - * we have to */ - - if self.0.is_ascii() && other.0.is_ascii() { - return self.0 == other.0; - } - - return match ( - unicode_normalization::is_nfkd_quick(self.0.chars()) - == IsNormalized::Yes, - unicode_normalization::is_nfkd_quick(other.0.chars()) - == IsNormalized::Yes, - ) { - (true, true) => self.0 == other.0, - (true, false) => { - self.0.chars().map(|b| b as char).eq(other.0.nfkd()) - }, - (false, true) => { - self.0.nfkd().eq(other.0.chars().map(|b| b as char)) - }, - (false, false) => self.0.nfkd().eq(other.0.nfkd()), - }; - } +pub struct Interner<V, I> +where + V: Copy + Eq + Hash, + I: Copy + Into<usize>, +{ + map: DashMap<V, I>, + store: boxcar::Vec<V>, } -impl<'a> Interner<'a> { +impl<V, I> Interner<V, I> +where + V: Copy + Eq + Hash, + I: Copy + From<usize> + Into<usize>, +{ pub fn new() -> Self { return Interner { map: DashMap::new(), @@ -73,70 +26,49 @@ impl<'a> Interner<'a> { }; } - pub fn get(&self, key: SymbolId) -> &str { - return self.store[key.0 as usize]; + pub fn get(&self, key: I) -> V { + return self.store[key.into()]; } - pub fn intern(&self, value: &'a str) -> SymbolId { - if let Some(key) = self.map.get(&UniStr(value)) { + pub fn intern(&self, value: V) -> I { + if let Some(key) = self.map.get(&value) { return *key; } - let key = SymbolId(self.store.push(value) as u32); - self.map.insert(UniStr(value), key); + let key = self.store.push(value).into(); + self.map.insert(value, key); return key; } } -#[test] -fn test_unistr_eq() { - assert_eq!(UniStr("fishi"), UniStr("fishᵢ")); - assert_eq!(UniStr("fishi"), UniStr("fishi")); - assert_eq!(UniStr("fishi"), UniStr("fishᵢ")); - assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ")); - assert_eq!(UniStr("corné"), UniStr("corné")); -} - -#[test] -fn test_unistr_hash() { - use std::hash::DefaultHasher; - for (lhs, rhs) in &[ - (UniStr("fishi"), UniStr("fishᵢ")), - (UniStr("fishi"), UniStr("fishi")), - (UniStr("fishi"), UniStr("fishᵢ")), - (UniStr("fishᵢ"), UniStr("fishᵢ")), - (UniStr("corné"), UniStr("corné")), - ] { - let mut hashl = DefaultHasher::new(); - let mut hashr = DefaultHasher::new(); - lhs.hash(&mut hashl); - rhs.hash(&mut hashr); - assert_eq!(hashl.finish(), hashr.finish()); - } -} - -#[test] -fn test_interner_intern() { - let xs = ["fishi", "fishi", "fishᵢ"]; - let y = "andy"; - - let mut interner = Interner::new(); - for i in 0..xs.len() { - for j in i..xs.len() { - assert_eq!(interner.intern(xs[i]), interner.intern(xs[j])); - } - } - for i in 0..xs.len() { - assert_ne!(interner.intern(y), interner.intern(xs[i])); - } -} - -#[test] -fn test_interner_gets_first_inserted() { - let mut interner = Interner::new(); - let xs = ["fishi", "fishi", "fishᵢ"]; - let ys = xs.iter().map(|x| interner.intern(x)).collect::<Vec<_>>(); - - for i in 0..ys.len() { - assert_eq!(interner.get(ys[i]), xs[0]); - } +#[cfg(test)] +mod tests { + use crate::unistr::UniStr; + use super::Interner; + + #[test] + fn test_interner_intern() { + let xs = [UniStr("fishi"), UniStr("fishi"), UniStr("fishᵢ")]; + let y = UniStr("andy"); + + let mut interner = Interner::<_, usize>::new(); + for i in 0..xs.len() { + for j in i..xs.len() { + assert_eq!(interner.intern(xs[i]), interner.intern(xs[j])); + } + } + for i in 0..xs.len() { + assert_ne!(interner.intern(y), interner.intern(xs[i])); + } + } + + #[test] + fn test_interner_gets_first_inserted() { + let mut interner = Interner::<_, usize>::new(); + let xs = [UniStr("fishi"), UniStr("fishi"), UniStr("fishᵢ")]; + let ys = xs.iter().map(|&x| interner.intern(x)).collect::<Vec<_>>(); + + for i in 0..ys.len() { + assert_eq!(interner.get(ys[i]), xs[0]); + } + } } |