use std::hash::{Hash, Hasher}; use unicode_normalization::{self, IsNormalized, UnicodeNormalization}; #[repr(transparent)] #[derive(Copy, Clone, Debug, Eq)] pub struct UniStr<'a>(pub &'a str); impl Hash for UniStr<'_> { fn hash(&self, state: &mut H) { /* In the ASCII common case we use .bytes() to avoid decoding * every codepoint (a no-op in ASCII) */ if self.0.is_ascii() { self.0.bytes().for_each(|c| (c as char).hash(state)); } else if unicode_normalization::is_nfkd_quick(self.0.chars()) == IsNormalized::Yes { self.0.chars().for_each(|c| c.hash(state)); } else { self.0.nfkd().for_each(|c| c.hash(state)); } } } impl PartialEq for UniStr<'_> { fn eq(&self, other: &Self) -> bool { /* Most code is ASCII, and normalization is obviously a lot * slower than not normalizing, so we try to only normalize when * we have to */ if self.0.is_ascii() && other.0.is_ascii() { return self.0 == other.0; } return match ( unicode_normalization::is_nfkd_quick(self.0.chars()) == IsNormalized::Yes, unicode_normalization::is_nfkd_quick(other.0.chars()) == IsNormalized::Yes, ) { (true, true) => self.0 == other.0, (true, false) => { self.0.chars().map(|b| b as char).eq(other.0.nfkd()) }, (false, true) => { self.0.nfkd().eq(other.0.chars().map(|b| b as char)) }, (false, false) => self.0.nfkd().eq(other.0.nfkd()), }; } } #[cfg(test)] mod tests { use std::hash::{DefaultHasher, Hash, Hasher}; use super::UniStr; #[test] fn test_unistr_eq() { assert_eq!(UniStr("fishi"), UniStr("fishᵢ")); assert_eq!(UniStr("fishi"), UniStr("fishi")); assert_eq!(UniStr("fishi"), UniStr("fishᵢ")); assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ")); assert_eq!(UniStr("corné"), UniStr("corné")); } #[test] fn test_unistr_hash() { for (lhs, rhs) in &[ (UniStr("fishi"), UniStr("fishᵢ")), (UniStr("fishi"), UniStr("fishi")), (UniStr("fishi"), UniStr("fishᵢ")), (UniStr("fishᵢ"), UniStr("fishᵢ")), (UniStr("corné"), UniStr("corné")), ] { let mut hashl = DefaultHasher::new(); let mut hashr = DefaultHasher::new(); lhs.hash(&mut hashl); rhs.hash(&mut hashr); assert_eq!(hashl.finish(), hashr.finish()); } } }