summaryrefslogtreecommitdiff
path: root/oryxc/src/unistr.rs
blob: 158fed1d9f2dafba7f42e6b0dbd80f95b0a00b90 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use std::hash::{
	Hash,
	Hasher,
};

use unicode_normalization::{
	self,
	IsNormalized,
	UnicodeNormalization,
};

#[repr(transparent)]
#[derive(Copy, Clone, Debug, Eq)]
pub struct UniStr<'a>(pub &'a str);

impl Hash for UniStr<'_> {
	fn hash<H: Hasher>(&self, state: &mut H) {
		/* In the ASCII common case we use .bytes() to avoid decoding
		 * every codepoint (a no-op in ASCII) */
		if self.0.is_ascii() {
			self.0.bytes().for_each(|c| (c as char).hash(state));
		} else if unicode_normalization::is_nfkd_quick(self.0.chars())
			== IsNormalized::Yes
		{
			self.0.chars().for_each(|c| c.hash(state));
		} else {
			self.0.nfkd().for_each(|c| c.hash(state));
		}
	}
}

impl PartialEq for UniStr<'_> {
	fn eq(&self, other: &Self) -> bool {
		/* Most code is ASCII, and normalization is obviously a lot
		 * slower than not normalizing, so we try to only normalize when
		 * we have to */

		if self.0.is_ascii() && other.0.is_ascii() {
			return self.0 == other.0;
		}

		return match (
			unicode_normalization::is_nfkd_quick(self.0.chars())
				== IsNormalized::Yes,
			unicode_normalization::is_nfkd_quick(other.0.chars())
				== IsNormalized::Yes,
		) {
			(true, true) => self.0 == other.0,
			(true, false) => self.0.chars() == other.0.nfkd(),
			(false, true) => self.0.nfkd() == other.0.chars(),
			(false, false) => self.0.nfkd() == other.0.nfkd(),
		};
	}
}

#[cfg(test)]
mod tests {
	use std::hash::{
		DefaultHasher,
		Hash,
		Hasher,
	};

	use super::UniStr;

	#[test]
	fn test_unistr_eq() {
		assert_eq!(UniStr("fishi"), UniStr("fishᵢ"));
		assert_eq!(UniStr("fishi"), UniStr("fishi"));
		assert_eq!(UniStr("fishi"), UniStr("fishᵢ"));
		assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ"));
		assert_eq!(UniStr("corné"), UniStr("corné"));
		assert_eq!(UniStr("fishᵢ"), UniStr("fishi"));
		assert_eq!(UniStr("fishi"), UniStr("fishi"));
		assert_eq!(UniStr("fishᵢ"), UniStr("fishi"));
		assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ"));
		assert_eq!(UniStr("corné"), UniStr("corné"));
	}

	#[test]
	fn test_unistr_hash() {
		for (lhs, rhs) in &[
			(UniStr("fishi"), UniStr("fishᵢ")),
			(UniStr("fishi"), UniStr("fishi")),
			(UniStr("fishi"), UniStr("fishᵢ")),
			(UniStr("fishᵢ"), UniStr("fishᵢ")),
			(UniStr("corné"), UniStr("corné")),
			(UniStr("fishᵢ"), UniStr("fishi")),
			(UniStr("fishi"), UniStr("fishi")),
			(UniStr("fishᵢ"), UniStr("fishi")),
			(UniStr("fishᵢ"), UniStr("fishᵢ")),
			(UniStr("corné"), UniStr("corné")),
		] {
			let mut hashl = DefaultHasher::new();
			let mut hashr = DefaultHasher::new();
			lhs.hash(&mut hashl);
			rhs.hash(&mut hashr);
			assert_eq!(hashl.finish(), hashr.finish());
		}
	}
}