From e64fa5b6154b0049d1bdba174427bd022d93d838 Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Sun, 5 Jun 2022 09:25:15 +0200 Subject: [PATCH 1/9] Implement `HashBag::difference(&self, other: &HashBag)` This corresponds to [`HashSet::difference()`](https://doc.rust-lang.org/std/collections/hash_set/struct.HashSet.html#method.difference), and has the same semantics, and is implemented in a similar manner. This has the following impact on the public API of `hashbag`: ``` % cargo install cargo-public-api % cargo public-api --diff-git-checkouts origin/master implement-difference Removed items from the public API ================================= (none) Changed items in the public API =============================== (none) Added items to the public API ============================= +pub fn hashbag::Difference::fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result +pub fn hashbag::Difference::next(&mut self) -> Option<&'a T> +pub fn hashbag::Difference::size_hint(&self) -> (usize, Option) +pub fn hashbag::HashBag::difference<'a>(&'a self, other: &'a HashBag) -> Difference<'a, T, S> +pub struct hashbag::Difference<'a, T, S> +pub type hashbag::Difference::Item = &'a T ``` --- src/lib.rs | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 69982b1..2ffd73a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -702,6 +702,29 @@ where } } + /// Returns an iterator that visits all values present in `self` that is not + /// present in `other`. Takes the number of occurrences into account in both + /// bags. + /// + /// # Examples + /// + /// ``` + /// use hashbag::HashBag; + /// + /// let a: HashBag<_> = [1, 2, 3, 3].iter().cloned().collect(); + /// let b: HashBag<_> = [2, 3].iter().cloned().collect(); + /// let expected: HashBag<_> = [1, 3].iter().cloned().collect(); + /// let actual: HashBag<_> = a.difference(&b).cloned().collect(); + /// assert_eq!(expected, actual); + /// ``` + pub fn difference<'a>(&'a self, other: &'a HashBag) -> Difference<'a, T, S> { + Difference { + base_iter: self.iter(), + other, + removed_from_other: HashMap::new(), + } + } + /// Removes a value that is equal to the given one, and returns it if it was the last. /// /// If the matching value is not the last, a reference to the remainder is given, along with @@ -1110,6 +1133,61 @@ impl<'a, T> Iterator for Drain<'a, T> { } } +/// This `struct` is created by [`HashBag::difference`]. +/// See its documentation for more. +pub struct Difference<'a, T, S = RandomState> { + /// An iterator over "self" + base_iter: Iter<'a, T>, + + /// The bag with entries we DO NOT want to return + other: &'a HashBag, + + /// Keeps track of many times we have conceptually "consumed" an entry from + /// `other`. + removed_from_other: HashMap<&'a T, usize>, +} + +impl<'a, T: fmt::Debug> fmt::Debug for Difference<'a, T> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("Difference") + .field("base_iter", &self.base_iter) + .field("other", &self.other) + .finish() + } +} + +impl<'a, T, S> Iterator for Difference<'a, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + + #[inline] + fn next(&mut self) -> Option<&'a T> { + loop { + let next = self.base_iter.next()?; + let removal_count = self.removed_from_other.entry(next).or_insert(0); + + // Keep track of how many times we have removed the current entry. + // We don't actually remove anything, we just pretend we do. + *removal_count += 1; + + // If we removed MORE entries from `other`, THEN we may start + // returning entries from the base iterator. + if *removal_count > self.other.contains(next) { + return Some(next); + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let (_, upper_bound) = self.base_iter.size_hint(); + (0, upper_bound) + } +} + #[cfg(test)] mod tests { use super::*; @@ -1142,4 +1220,66 @@ mod tests { assert_eq!(di.size_hint(), (2, Some(2))); assert_eq!(di.count(), 2); } + + #[test] + fn test_difference_from_empty() { + do_test_difference(&[], &[], &[]); + do_test_difference(&[], &[1], &[]); + do_test_difference(&[], &[1, 1], &[]); + do_test_difference(&[], &[1, 1, 2], &[]); + } + + #[test] + fn test_difference_from_one() { + do_test_difference(&[1], &[], &[1]); + do_test_difference(&[1], &[1], &[]); + do_test_difference(&[1], &[1, 1], &[]); + do_test_difference(&[1], &[2], &[1]); + do_test_difference(&[1], &[1, 2], &[]); + do_test_difference(&[1], &[2, 2], &[1]); + } + + #[test] + fn test_difference_from_duplicate_ones() { + do_test_difference(&[1, 1], &[], &[1, 1]); + do_test_difference(&[1, 1], &[1], &[1]); + do_test_difference(&[1, 1], &[1, 1], &[]); + do_test_difference(&[1, 1], &[2], &[1, 1]); + do_test_difference(&[1, 1], &[1, 2], &[1]); + do_test_difference(&[1, 1], &[2, 2], &[1, 1]); + } + + #[test] + fn test_difference_from_one_one_two() { + do_test_difference(&[1, 1, 2], &[], &[1, 1, 2]); + do_test_difference(&[1, 1, 2], &[1], &[1, 2]); + do_test_difference(&[1, 1, 2], &[1, 1], &[2]); + do_test_difference(&[1, 1, 2], &[2], &[1, 1]); + do_test_difference(&[1, 1, 2], &[1, 2], &[1]); + do_test_difference(&[1, 1, 2], &[2, 2], &[1, 1]); + } + + #[test] + fn test_difference_from_larger_bags() { + do_test_difference(&[1, 2, 2, 3], &[3], &[1, 2, 2]); + do_test_difference(&[1, 2, 2, 3], &[4], &[1, 2, 2, 3]); + do_test_difference(&[2, 2, 2, 2], &[2, 2], &[2, 2]); + do_test_difference(&[2, 2, 2, 2], &[], &[2, 2, 2, 2]); + } + + fn do_test_difference( + self_entries: &[isize], + other_entries: &[isize], + expected_entries: &[isize], + ) { + let this = self_entries.iter().collect::>(); + let other = other_entries.iter().collect::>(); + let expected = expected_entries.iter().collect::>(); + assert_eq!( + this.difference(&other) + .copied() + .collect::>(), + expected + ); + } } From e70be47b8b837619446d5337313665cb8b0b2c7d Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Sun, 5 Jun 2022 10:11:32 +0200 Subject: [PATCH 2/9] Boost `Difference` code coverage --- src/lib.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 2ffd73a..242dcdd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1221,6 +1221,15 @@ mod tests { assert_eq!(di.count(), 2); } + #[test] + fn test_difference_debug_and_size_hint() { + let vikings: HashBag<&'static str> = ["Einar", "Olaf", "Harald"].iter().cloned().collect(); + let killed_vikings: HashBag<&'static str> = ["Einar"].iter().cloned().collect(); + let alive_vikings = vikings.difference(&killed_vikings); + println!("{:?}", alive_vikings); + assert_eq!(alive_vikings.size_hint(), (0, Some(3))); + } + #[test] fn test_difference_from_empty() { do_test_difference(&[], &[], &[]); From 4b9fe778dbc58fffd38a093f0b9fec0a781706c9 Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Sun, 12 Jun 2022 20:41:08 +0200 Subject: [PATCH 3/9] Make `HashBag::difference()` iterate over `(&'a T, usize)` --- src/lib.rs | 55 ++++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 242dcdd..849edcf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -710,18 +710,20 @@ where /// /// ``` /// use hashbag::HashBag; + /// use std::collections::HashSet; + /// use std::iter::FromIterator; /// /// let a: HashBag<_> = [1, 2, 3, 3].iter().cloned().collect(); /// let b: HashBag<_> = [2, 3].iter().cloned().collect(); - /// let expected: HashBag<_> = [1, 3].iter().cloned().collect(); - /// let actual: HashBag<_> = a.difference(&b).cloned().collect(); + /// let expected: HashSet<_> = HashSet::from_iter([(&1, 1), (&3, 1)]); + /// let actual: HashSet<_> = a.difference(&b).collect(); /// assert_eq!(expected, actual); /// ``` pub fn difference<'a>(&'a self, other: &'a HashBag) -> Difference<'a, T, S> { Difference { - base_iter: self.iter(), + items: self.items.iter(), other, - removed_from_other: HashMap::new(), + upper_bound: self.count, } } @@ -1136,21 +1138,20 @@ impl<'a, T> Iterator for Drain<'a, T> { /// This `struct` is created by [`HashBag::difference`]. /// See its documentation for more. pub struct Difference<'a, T, S = RandomState> { - /// An iterator over "self" - base_iter: Iter<'a, T>, + /// An iterator over `self` items + items: IterInner<'a, T>, /// The bag with entries we DO NOT want to return other: &'a HashBag, - /// Keeps track of many times we have conceptually "consumed" an entry from - /// `other`. - removed_from_other: HashMap<&'a T, usize>, + /// For `size_hint()` + upper_bound: usize, } impl<'a, T: fmt::Debug> fmt::Debug for Difference<'a, T> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("Difference") - .field("base_iter", &self.base_iter) + .field("items", &self.items) .field("other", &self.other) .finish() } @@ -1161,30 +1162,22 @@ where T: Eq + Hash, S: BuildHasher, { - type Item = &'a T; + type Item = (&'a T, usize); #[inline] - fn next(&mut self) -> Option<&'a T> { + fn next(&mut self) -> Option { loop { - let next = self.base_iter.next()?; - let removal_count = self.removed_from_other.entry(next).or_insert(0); - - // Keep track of how many times we have removed the current entry. - // We don't actually remove anything, we just pretend we do. - *removal_count += 1; - - // If we removed MORE entries from `other`, THEN we may start - // returning entries from the base iterator. - if *removal_count > self.other.contains(next) { - return Some(next); + let (t, n) = self.items.next()?; + let other_n = self.other.contains(t); + if other_n < *n { + return Some((t, *n - other_n)); } } } #[inline] fn size_hint(&self) -> (usize, Option) { - let (_, upper_bound) = self.base_iter.size_hint(); - (0, upper_bound) + (0, Some(self.upper_bound)) } } @@ -1284,11 +1277,11 @@ mod tests { let this = self_entries.iter().collect::>(); let other = other_entries.iter().collect::>(); let expected = expected_entries.iter().collect::>(); - assert_eq!( - this.difference(&other) - .copied() - .collect::>(), - expected - ); + let mut actual = HashBag::new(); + for (t, n) in this.difference(&other) { + actual.insert_many(*t, n); + } + + assert_eq!(actual, expected); } } From 5734de6029aac30e8ffbc26611decd8c3aac2eae Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Sun, 19 Jun 2022 10:34:39 +0200 Subject: [PATCH 4/9] Rename `difference()` to `subtract()` and make `size_hint()` dynamic --- src/lib.rs | 105 ++++++++++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 49 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 849edcf..77e2783 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -716,14 +716,13 @@ where /// let a: HashBag<_> = [1, 2, 3, 3].iter().cloned().collect(); /// let b: HashBag<_> = [2, 3].iter().cloned().collect(); /// let expected: HashSet<_> = HashSet::from_iter([(&1, 1), (&3, 1)]); - /// let actual: HashSet<_> = a.difference(&b).collect(); + /// let actual: HashSet<_> = a.subtract(&b).collect(); /// assert_eq!(expected, actual); /// ``` - pub fn difference<'a>(&'a self, other: &'a HashBag) -> Difference<'a, T, S> { - Difference { + pub fn subtract<'a>(&'a self, other: &'a HashBag) -> Subtract<'a, T, S> { + Subtract { items: self.items.iter(), other, - upper_bound: self.count, } } @@ -1135,29 +1134,26 @@ impl<'a, T> Iterator for Drain<'a, T> { } } -/// This `struct` is created by [`HashBag::difference`]. +/// This `struct` is created by [`HashBag::subtract`]. /// See its documentation for more. -pub struct Difference<'a, T, S = RandomState> { +pub struct Subtract<'a, T, S = RandomState> { /// An iterator over `self` items items: IterInner<'a, T>, /// The bag with entries we DO NOT want to return other: &'a HashBag, - - /// For `size_hint()` - upper_bound: usize, } -impl<'a, T: fmt::Debug> fmt::Debug for Difference<'a, T> { +impl<'a, T: fmt::Debug> fmt::Debug for Subtract<'a, T> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt.debug_struct("Difference") + fmt.debug_struct("Subtract") .field("items", &self.items) .field("other", &self.other) .finish() } } -impl<'a, T, S> Iterator for Difference<'a, T, S> +impl<'a, T, S> Iterator for Subtract<'a, T, S> where T: Eq + Hash, S: BuildHasher, @@ -1177,7 +1173,7 @@ where #[inline] fn size_hint(&self) -> (usize, Option) { - (0, Some(self.upper_bound)) + self.items.size_hint() } } @@ -1215,61 +1211,72 @@ mod tests { } #[test] - fn test_difference_debug_and_size_hint() { + fn test_subtract_debug_and_size_hint() { let vikings: HashBag<&'static str> = ["Einar", "Olaf", "Harald"].iter().cloned().collect(); let killed_vikings: HashBag<&'static str> = ["Einar"].iter().cloned().collect(); - let alive_vikings = vikings.difference(&killed_vikings); + let mut alive_vikings = vikings.subtract(&killed_vikings); println!("{:?}", alive_vikings); - assert_eq!(alive_vikings.size_hint(), (0, Some(3))); + + assert_eq!(alive_vikings.size_hint(), (3, Some(3))); + + // Note that we can't assume in what order the vikings will come, only + // that there shall be Some(_) viking two times + alive_vikings.next().unwrap(); + alive_vikings.next().unwrap(); + assert_eq!(alive_vikings.next(), None); + + // At this point we know that the size hint shall be able to say that + // there are no more items + assert_eq!(alive_vikings.size_hint(), (0, Some(0))); } #[test] - fn test_difference_from_empty() { - do_test_difference(&[], &[], &[]); - do_test_difference(&[], &[1], &[]); - do_test_difference(&[], &[1, 1], &[]); - do_test_difference(&[], &[1, 1, 2], &[]); + fn test_subtract_from_empty() { + do_test_subtract(&[], &[], &[]); + do_test_subtract(&[], &[1], &[]); + do_test_subtract(&[], &[1, 1], &[]); + do_test_subtract(&[], &[1, 1, 2], &[]); } #[test] - fn test_difference_from_one() { - do_test_difference(&[1], &[], &[1]); - do_test_difference(&[1], &[1], &[]); - do_test_difference(&[1], &[1, 1], &[]); - do_test_difference(&[1], &[2], &[1]); - do_test_difference(&[1], &[1, 2], &[]); - do_test_difference(&[1], &[2, 2], &[1]); + fn test_subtract_from_one() { + do_test_subtract(&[1], &[], &[1]); + do_test_subtract(&[1], &[1], &[]); + do_test_subtract(&[1], &[1, 1], &[]); + do_test_subtract(&[1], &[2], &[1]); + do_test_subtract(&[1], &[1, 2], &[]); + do_test_subtract(&[1], &[2, 2], &[1]); } #[test] - fn test_difference_from_duplicate_ones() { - do_test_difference(&[1, 1], &[], &[1, 1]); - do_test_difference(&[1, 1], &[1], &[1]); - do_test_difference(&[1, 1], &[1, 1], &[]); - do_test_difference(&[1, 1], &[2], &[1, 1]); - do_test_difference(&[1, 1], &[1, 2], &[1]); - do_test_difference(&[1, 1], &[2, 2], &[1, 1]); + fn test_subtract_from_duplicate_ones() { + do_test_subtract(&[1, 1], &[], &[1, 1]); + do_test_subtract(&[1, 1], &[1], &[1]); + do_test_subtract(&[1, 1], &[1, 1], &[]); + do_test_subtract(&[1, 1], &[2], &[1, 1]); + do_test_subtract(&[1, 1], &[1, 2], &[1]); + do_test_subtract(&[1, 1], &[2, 2], &[1, 1]); } #[test] - fn test_difference_from_one_one_two() { - do_test_difference(&[1, 1, 2], &[], &[1, 1, 2]); - do_test_difference(&[1, 1, 2], &[1], &[1, 2]); - do_test_difference(&[1, 1, 2], &[1, 1], &[2]); - do_test_difference(&[1, 1, 2], &[2], &[1, 1]); - do_test_difference(&[1, 1, 2], &[1, 2], &[1]); - do_test_difference(&[1, 1, 2], &[2, 2], &[1, 1]); + fn test_subtract_from_one_one_two() { + do_test_subtract(&[1, 1, 2], &[], &[1, 1, 2]); + do_test_subtract(&[1, 1, 2], &[1], &[1, 2]); + do_test_subtract(&[1, 1, 2], &[1, 1], &[2]); + do_test_subtract(&[1, 1, 2], &[2], &[1, 1]); + do_test_subtract(&[1, 1, 2], &[1, 2], &[1]); + do_test_subtract(&[1, 1, 2], &[2, 2], &[1, 1]); } #[test] - fn test_difference_from_larger_bags() { - do_test_difference(&[1, 2, 2, 3], &[3], &[1, 2, 2]); - do_test_difference(&[1, 2, 2, 3], &[4], &[1, 2, 2, 3]); - do_test_difference(&[2, 2, 2, 2], &[2, 2], &[2, 2]); - do_test_difference(&[2, 2, 2, 2], &[], &[2, 2, 2, 2]); + fn test_subtract_from_larger_bags() { + do_test_subtract(&[1, 2, 2, 3], &[3], &[1, 2, 2]); + do_test_subtract(&[1, 2, 2, 3], &[4], &[1, 2, 2, 3]); + do_test_subtract(&[2, 2, 2, 2], &[2, 2], &[2, 2]); + do_test_subtract(&[2, 2, 2, 2], &[], &[2, 2, 2, 2]); } - fn do_test_difference( + fn do_test_subtract( self_entries: &[isize], other_entries: &[isize], expected_entries: &[isize], @@ -1278,7 +1285,7 @@ mod tests { let other = other_entries.iter().collect::>(); let expected = expected_entries.iter().collect::>(); let mut actual = HashBag::new(); - for (t, n) in this.difference(&other) { + for (t, n) in this.subtract(&other) { actual.insert_many(*t, n); } From e7797c4c6dff2ee9454a820c6a5a5fe15eb231fd Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Sun, 19 Jun 2022 16:12:01 +0200 Subject: [PATCH 5/9] Lower bound on `Subtract::size_hint()` must be 0 --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 77e2783..7d9d868 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1173,7 +1173,7 @@ where #[inline] fn size_hint(&self) -> (usize, Option) { - self.items.size_hint() + (0, self.items.size_hint().1) } } @@ -1217,7 +1217,7 @@ mod tests { let mut alive_vikings = vikings.subtract(&killed_vikings); println!("{:?}", alive_vikings); - assert_eq!(alive_vikings.size_hint(), (3, Some(3))); + assert_eq!(alive_vikings.size_hint(), (0, Some(3))); // Note that we can't assume in what order the vikings will come, only // that there shall be Some(_) viking two times From bdba80843769a4e7b7271deeccbf4ea02b5f956e Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Mon, 20 Jun 2022 07:19:40 +0200 Subject: [PATCH 6/9] Rename `subtract()` back to `difference()` again --- src/lib.rs | 86 +++++++++++++++++++++++++++--------------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7d9d868..f04608d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -716,11 +716,11 @@ where /// let a: HashBag<_> = [1, 2, 3, 3].iter().cloned().collect(); /// let b: HashBag<_> = [2, 3].iter().cloned().collect(); /// let expected: HashSet<_> = HashSet::from_iter([(&1, 1), (&3, 1)]); - /// let actual: HashSet<_> = a.subtract(&b).collect(); + /// let actual: HashSet<_> = a.difference(&b).collect(); /// assert_eq!(expected, actual); /// ``` - pub fn subtract<'a>(&'a self, other: &'a HashBag) -> Subtract<'a, T, S> { - Subtract { + pub fn difference<'a>(&'a self, other: &'a HashBag) -> Difference<'a, T, S> { + Difference { items: self.items.iter(), other, } @@ -1134,9 +1134,9 @@ impl<'a, T> Iterator for Drain<'a, T> { } } -/// This `struct` is created by [`HashBag::subtract`]. +/// This `struct` is created by [`HashBag::difference`]. /// See its documentation for more. -pub struct Subtract<'a, T, S = RandomState> { +pub struct Difference<'a, T, S = RandomState> { /// An iterator over `self` items items: IterInner<'a, T>, @@ -1144,16 +1144,16 @@ pub struct Subtract<'a, T, S = RandomState> { other: &'a HashBag, } -impl<'a, T: fmt::Debug> fmt::Debug for Subtract<'a, T> { +impl<'a, T: fmt::Debug> fmt::Debug for Difference<'a, T> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt.debug_struct("Subtract") + fmt.debug_struct("Difference") .field("items", &self.items) .field("other", &self.other) .finish() } } -impl<'a, T, S> Iterator for Subtract<'a, T, S> +impl<'a, T, S> Iterator for Difference<'a, T, S> where T: Eq + Hash, S: BuildHasher, @@ -1211,10 +1211,10 @@ mod tests { } #[test] - fn test_subtract_debug_and_size_hint() { + fn test_difference_debug_and_size_hint() { let vikings: HashBag<&'static str> = ["Einar", "Olaf", "Harald"].iter().cloned().collect(); let killed_vikings: HashBag<&'static str> = ["Einar"].iter().cloned().collect(); - let mut alive_vikings = vikings.subtract(&killed_vikings); + let mut alive_vikings = vikings.difference(&killed_vikings); println!("{:?}", alive_vikings); assert_eq!(alive_vikings.size_hint(), (0, Some(3))); @@ -1231,52 +1231,52 @@ mod tests { } #[test] - fn test_subtract_from_empty() { - do_test_subtract(&[], &[], &[]); - do_test_subtract(&[], &[1], &[]); - do_test_subtract(&[], &[1, 1], &[]); - do_test_subtract(&[], &[1, 1, 2], &[]); + fn test_difference_from_empty() { + do_test_difference(&[], &[], &[]); + do_test_difference(&[], &[1], &[]); + do_test_difference(&[], &[1, 1], &[]); + do_test_difference(&[], &[1, 1, 2], &[]); } #[test] - fn test_subtract_from_one() { - do_test_subtract(&[1], &[], &[1]); - do_test_subtract(&[1], &[1], &[]); - do_test_subtract(&[1], &[1, 1], &[]); - do_test_subtract(&[1], &[2], &[1]); - do_test_subtract(&[1], &[1, 2], &[]); - do_test_subtract(&[1], &[2, 2], &[1]); + fn test_difference_from_one() { + do_test_difference(&[1], &[], &[1]); + do_test_difference(&[1], &[1], &[]); + do_test_difference(&[1], &[1, 1], &[]); + do_test_difference(&[1], &[2], &[1]); + do_test_difference(&[1], &[1, 2], &[]); + do_test_difference(&[1], &[2, 2], &[1]); } #[test] - fn test_subtract_from_duplicate_ones() { - do_test_subtract(&[1, 1], &[], &[1, 1]); - do_test_subtract(&[1, 1], &[1], &[1]); - do_test_subtract(&[1, 1], &[1, 1], &[]); - do_test_subtract(&[1, 1], &[2], &[1, 1]); - do_test_subtract(&[1, 1], &[1, 2], &[1]); - do_test_subtract(&[1, 1], &[2, 2], &[1, 1]); + fn test_difference_from_duplicate_ones() { + do_test_difference(&[1, 1], &[], &[1, 1]); + do_test_difference(&[1, 1], &[1], &[1]); + do_test_difference(&[1, 1], &[1, 1], &[]); + do_test_difference(&[1, 1], &[2], &[1, 1]); + do_test_difference(&[1, 1], &[1, 2], &[1]); + do_test_difference(&[1, 1], &[2, 2], &[1, 1]); } #[test] - fn test_subtract_from_one_one_two() { - do_test_subtract(&[1, 1, 2], &[], &[1, 1, 2]); - do_test_subtract(&[1, 1, 2], &[1], &[1, 2]); - do_test_subtract(&[1, 1, 2], &[1, 1], &[2]); - do_test_subtract(&[1, 1, 2], &[2], &[1, 1]); - do_test_subtract(&[1, 1, 2], &[1, 2], &[1]); - do_test_subtract(&[1, 1, 2], &[2, 2], &[1, 1]); + fn test_difference_from_one_one_two() { + do_test_difference(&[1, 1, 2], &[], &[1, 1, 2]); + do_test_difference(&[1, 1, 2], &[1], &[1, 2]); + do_test_difference(&[1, 1, 2], &[1, 1], &[2]); + do_test_difference(&[1, 1, 2], &[2], &[1, 1]); + do_test_difference(&[1, 1, 2], &[1, 2], &[1]); + do_test_difference(&[1, 1, 2], &[2, 2], &[1, 1]); } #[test] - fn test_subtract_from_larger_bags() { - do_test_subtract(&[1, 2, 2, 3], &[3], &[1, 2, 2]); - do_test_subtract(&[1, 2, 2, 3], &[4], &[1, 2, 2, 3]); - do_test_subtract(&[2, 2, 2, 2], &[2, 2], &[2, 2]); - do_test_subtract(&[2, 2, 2, 2], &[], &[2, 2, 2, 2]); + fn test_difference_from_larger_bags() { + do_test_difference(&[1, 2, 2, 3], &[3], &[1, 2, 2]); + do_test_difference(&[1, 2, 2, 3], &[4], &[1, 2, 2, 3]); + do_test_difference(&[2, 2, 2, 2], &[2, 2], &[2, 2]); + do_test_difference(&[2, 2, 2, 2], &[], &[2, 2, 2, 2]); } - fn do_test_subtract( + fn do_test_difference( self_entries: &[isize], other_entries: &[isize], expected_entries: &[isize], @@ -1285,7 +1285,7 @@ mod tests { let other = other_entries.iter().collect::>(); let expected = expected_entries.iter().collect::>(); let mut actual = HashBag::new(); - for (t, n) in this.subtract(&other) { + for (t, n) in this.difference(&other) { actual.insert_many(*t, n); } From 217ef6fcfdfcafe8e6ef5d5c66eab9deedfe7c04 Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Mon, 20 Jun 2022 07:29:12 +0200 Subject: [PATCH 7/9] Improve test of `Difference::size_hint()` --- src/lib.rs | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f04608d..b0e412d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1211,23 +1211,31 @@ mod tests { } #[test] - fn test_difference_debug_and_size_hint() { + fn test_difference_debug() { let vikings: HashBag<&'static str> = ["Einar", "Olaf", "Harald"].iter().cloned().collect(); let killed_vikings: HashBag<&'static str> = ["Einar"].iter().cloned().collect(); - let mut alive_vikings = vikings.difference(&killed_vikings); + let alive_vikings = vikings.difference(&killed_vikings); println!("{:?}", alive_vikings); + } - assert_eq!(alive_vikings.size_hint(), (0, Some(3))); - - // Note that we can't assume in what order the vikings will come, only - // that there shall be Some(_) viking two times - alive_vikings.next().unwrap(); - alive_vikings.next().unwrap(); - assert_eq!(alive_vikings.next(), None); - - // At this point we know that the size hint shall be able to say that - // there are no more items - assert_eq!(alive_vikings.size_hint(), (0, Some(0))); + #[test] + fn test_difference_size_hint() { + let bag: HashBag<_> = [3, 2, 1].iter().cloned().collect(); + let empty_bag = HashBag::new(); + let mut difference = bag.difference(&empty_bag); + + // Since the difference has the same number of entries as the bag, we + // can predict how the size_hint() will behave, because the iteration + // order does not matter + assert_eq!(difference.size_hint(), (0, Some(3))); + difference.next().unwrap(); + assert_eq!(difference.size_hint(), (0, Some(2))); + difference.next().unwrap(); + assert_eq!(difference.size_hint(), (0, Some(1))); + difference.next().unwrap(); + assert_eq!(difference.size_hint(), (0, Some(0))); + assert_eq!(difference.next(), None); + assert_eq!(difference.size_hint(), (0, Some(0))); } #[test] From 03221adb92f9739d25cabbdefbad71f92361da55 Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Mon, 20 Jun 2022 07:40:03 +0200 Subject: [PATCH 8/9] Improve `HashBag::difference()` doc wording --- src/lib.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b0e412d..9cbdd76 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -702,9 +702,10 @@ where } } - /// Returns an iterator that visits all values present in `self` that is not - /// present in `other`. Takes the number of occurrences into account in both - /// bags. + /// Returns an iterator over all the elements that are in `self` with a + /// higher occurrence count than in `other`. The count in the returned + /// iterator represents how many more of a given element are in `self` than + /// `other`. /// /// # Examples /// From 70ff9c1d344cfd526633135fbc9fba055864c21b Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Mon, 20 Jun 2022 11:04:50 +0200 Subject: [PATCH 9/9] Implement `HashBag::difference()` with an anonymous `impl Iterator` Instead of a struct. Co-authored-by: Federico Stra --- src/lib.rs | 68 ++++++++++-------------------------------------------- 1 file changed, 12 insertions(+), 56 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9cbdd76..216b324 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -720,11 +720,18 @@ where /// let actual: HashSet<_> = a.difference(&b).collect(); /// assert_eq!(expected, actual); /// ``` - pub fn difference<'a>(&'a self, other: &'a HashBag) -> Difference<'a, T, S> { - Difference { - items: self.items.iter(), - other, - } + pub fn difference<'a>( + &'a self, + other: &'a HashBag, + ) -> impl Iterator { + self.items.iter().filter_map(move |(x, &self_count)| { + let other_count = other.contains(x); + if self_count > other_count { + Some((x, self_count - other_count)) + } else { + None + } + }) } /// Removes a value that is equal to the given one, and returns it if it was the last. @@ -1135,49 +1142,6 @@ impl<'a, T> Iterator for Drain<'a, T> { } } -/// This `struct` is created by [`HashBag::difference`]. -/// See its documentation for more. -pub struct Difference<'a, T, S = RandomState> { - /// An iterator over `self` items - items: IterInner<'a, T>, - - /// The bag with entries we DO NOT want to return - other: &'a HashBag, -} - -impl<'a, T: fmt::Debug> fmt::Debug for Difference<'a, T> { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt.debug_struct("Difference") - .field("items", &self.items) - .field("other", &self.other) - .finish() - } -} - -impl<'a, T, S> Iterator for Difference<'a, T, S> -where - T: Eq + Hash, - S: BuildHasher, -{ - type Item = (&'a T, usize); - - #[inline] - fn next(&mut self) -> Option { - loop { - let (t, n) = self.items.next()?; - let other_n = self.other.contains(t); - if other_n < *n { - return Some((t, *n - other_n)); - } - } - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - (0, self.items.size_hint().1) - } -} - #[cfg(test)] mod tests { use super::*; @@ -1211,14 +1175,6 @@ mod tests { assert_eq!(di.count(), 2); } - #[test] - fn test_difference_debug() { - let vikings: HashBag<&'static str> = ["Einar", "Olaf", "Harald"].iter().cloned().collect(); - let killed_vikings: HashBag<&'static str> = ["Einar"].iter().cloned().collect(); - let alive_vikings = vikings.difference(&killed_vikings); - println!("{:?}", alive_vikings); - } - #[test] fn test_difference_size_hint() { let bag: HashBag<_> = [3, 2, 1].iter().cloned().collect();