diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2026-03-15 23:04:29 +0100 |
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2026-03-15 23:04:29 +0100 |
| commit | 5d570a58f9cb64478d2a2704c740e05f15a71235 (patch) | |
| tree | 5eeef8612e3cfa61a57e77c1cca0e088527a196e /oryxc/src/unistr.rs | |
| parent | 58c294d0a5fedca250eb591ade22ab8b1a768cb5 (diff) | |
Make UniStr it’s own thing
Diffstat (limited to 'oryxc/src/unistr.rs')
| -rw-r--r-- | oryxc/src/unistr.rs | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/oryxc/src/unistr.rs b/oryxc/src/unistr.rs new file mode 100644 index 0000000..5dc8160 --- /dev/null +++ b/oryxc/src/unistr.rs @@ -0,0 +1,84 @@ +use std::hash::{Hash, Hasher}; + +use unicode_normalization::{self, IsNormalized, UnicodeNormalization}; + +#[repr(transparent)] +#[derive(Copy, Clone, Debug, Eq)] +pub struct UniStr<'a>(pub &'a str); + +impl Hash for UniStr<'_> { + fn hash<H: Hasher>(&self, state: &mut H) { + /* In the ASCII common case we use .bytes() to avoid decoding + * every codepoint (a no-op in ASCII) */ + if self.0.is_ascii() { + self.0.bytes().for_each(|c| (c as char).hash(state)); + } else if unicode_normalization::is_nfkd_quick(self.0.chars()) + == IsNormalized::Yes + { + self.0.chars().for_each(|c| c.hash(state)); + } else { + self.0.nfkd().for_each(|c| c.hash(state)); + } + } +} + +impl PartialEq for UniStr<'_> { + fn eq(&self, other: &Self) -> bool { + /* Most code is ASCII, and normalization is obviously a lot + * slower than not normalizing, so we try to only normalize when + * we have to */ + + if self.0.is_ascii() && other.0.is_ascii() { + return self.0 == other.0; + } + + return match ( + unicode_normalization::is_nfkd_quick(self.0.chars()) + == IsNormalized::Yes, + unicode_normalization::is_nfkd_quick(other.0.chars()) + == IsNormalized::Yes, + ) { + (true, true) => self.0 == other.0, + (true, false) => { + self.0.chars().map(|b| b as char).eq(other.0.nfkd()) + }, + (false, true) => { + self.0.nfkd().eq(other.0.chars().map(|b| b as char)) + }, + (false, false) => self.0.nfkd().eq(other.0.nfkd()), + }; + } +} + +#[cfg(test)] +mod tests { + use std::hash::{DefaultHasher, Hash, Hasher}; + + use super::UniStr; + + #[test] + fn test_unistr_eq() { + assert_eq!(UniStr("fishi"), UniStr("fishᵢ")); + assert_eq!(UniStr("fishi"), UniStr("fishi")); + assert_eq!(UniStr("fishi"), UniStr("fishᵢ")); + assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ")); + assert_eq!(UniStr("corné"), UniStr("corné")); + } + + #[test] + fn test_unistr_hash() { + for (lhs, rhs) in &[ + (UniStr("fishi"), UniStr("fishᵢ")), + (UniStr("fishi"), UniStr("fishi")), + (UniStr("fishi"), UniStr("fishᵢ")), + (UniStr("fishᵢ"), UniStr("fishᵢ")), + (UniStr("corné"), UniStr("corné")), + ] { + let mut hashl = DefaultHasher::new(); + let mut hashr = DefaultHasher::new(); + lhs.hash(&mut hashl); + rhs.hash(&mut hashr); + assert_eq!(hashl.finish(), hashr.finish()); + } + } +} |