summaryrefslogtreecommitdiff
path: root/oryxc/src/unistr.rs
diff options
context:
space:
mode:
Diffstat (limited to 'oryxc/src/unistr.rs')
-rw-r--r--oryxc/src/unistr.rs84
1 files changed, 84 insertions, 0 deletions
diff --git a/oryxc/src/unistr.rs b/oryxc/src/unistr.rs
new file mode 100644
index 0000000..5dc8160
--- /dev/null
+++ b/oryxc/src/unistr.rs
@@ -0,0 +1,84 @@
+use std::hash::{Hash, Hasher};
+
+use unicode_normalization::{self, IsNormalized, UnicodeNormalization};
+
+#[repr(transparent)]
+#[derive(Copy, Clone, Debug, Eq)]
+pub struct UniStr<'a>(pub &'a str);
+
+impl Hash for UniStr<'_> {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ /* In the ASCII common case we use .bytes() to avoid decoding
+ * every codepoint (a no-op in ASCII) */
+ if self.0.is_ascii() {
+ self.0.bytes().for_each(|c| (c as char).hash(state));
+ } else if unicode_normalization::is_nfkd_quick(self.0.chars())
+ == IsNormalized::Yes
+ {
+ self.0.chars().for_each(|c| c.hash(state));
+ } else {
+ self.0.nfkd().for_each(|c| c.hash(state));
+ }
+ }
+}
+
+impl PartialEq for UniStr<'_> {
+ fn eq(&self, other: &Self) -> bool {
+ /* Most code is ASCII, and normalization is obviously a lot
+ * slower than not normalizing, so we try to only normalize when
+ * we have to */
+
+ if self.0.is_ascii() && other.0.is_ascii() {
+ return self.0 == other.0;
+ }
+
+ return match (
+ unicode_normalization::is_nfkd_quick(self.0.chars())
+ == IsNormalized::Yes,
+ unicode_normalization::is_nfkd_quick(other.0.chars())
+ == IsNormalized::Yes,
+ ) {
+ (true, true) => self.0 == other.0,
+ (true, false) => {
+ self.0.chars().map(|b| b as char).eq(other.0.nfkd())
+ },
+ (false, true) => {
+ self.0.nfkd().eq(other.0.chars().map(|b| b as char))
+ },
+ (false, false) => self.0.nfkd().eq(other.0.nfkd()),
+ };
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::hash::{DefaultHasher, Hash, Hasher};
+
+ use super::UniStr;
+
+ #[test]
+ fn test_unistr_eq() {
+ assert_eq!(UniStr("fishi"), UniStr("fishᵢ"));
+ assert_eq!(UniStr("fishi"), UniStr("fishi"));
+ assert_eq!(UniStr("fishi"), UniStr("fishᵢ"));
+ assert_eq!(UniStr("fishᵢ"), UniStr("fishᵢ"));
+ assert_eq!(UniStr("corné"), UniStr("corné"));
+ }
+
+ #[test]
+ fn test_unistr_hash() {
+ for (lhs, rhs) in &[
+ (UniStr("fishi"), UniStr("fishᵢ")),
+ (UniStr("fishi"), UniStr("fishi")),
+ (UniStr("fishi"), UniStr("fishᵢ")),
+ (UniStr("fishᵢ"), UniStr("fishᵢ")),
+ (UniStr("corné"), UniStr("corné")),
+ ] {
+ let mut hashl = DefaultHasher::new();
+ let mut hashr = DefaultHasher::new();
+ lhs.hash(&mut hashl);
+ rhs.hash(&mut hashr);
+ assert_eq!(hashl.finish(), hashr.finish());
+ }
+ }
+}