Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit a8d0547

Browse files
committedFeb 3, 2024·
Add map and set extract_if
1 parent 2044a2e commit a8d0547

File tree

7 files changed

+317
-5
lines changed

7 files changed

+317
-5
lines changed
 

‎src/map.rs

+41-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ mod tests;
1515
pub use self::core::raw_entry_v1::{self, RawEntryApiV1};
1616
pub use self::core::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
1717
pub use self::iter::{
18-
Drain, IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Splice, Values, ValuesMut,
18+
Drain, ExtractIf, IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Splice, Values,
19+
ValuesMut,
1920
};
2021
pub use self::slice::Slice;
2122
pub use crate::mutable_keys::MutableKeys;
@@ -33,7 +34,7 @@ use alloc::vec::Vec;
3334
#[cfg(feature = "std")]
3435
use std::collections::hash_map::RandomState;
3536

36-
use self::core::IndexMapCore;
37+
pub(crate) use self::core::{ExtractCore, IndexMapCore};
3738
use crate::util::{third, try_simplify_range};
3839
use crate::{Bucket, Entries, Equivalent, HashValue, TryReserveError};
3940

@@ -301,6 +302,44 @@ impl<K, V, S> IndexMap<K, V, S> {
301302
Drain::new(self.core.drain(range))
302303
}
303304

305+
/// Creates an iterator which uses a closure to determine if an element should be removed.
306+
///
307+
/// If the closure returns true, the element is removed from the map and yielded.
308+
/// If the closure returns false, or panics, the element remains in the map and will not be
309+
/// yielded.
310+
///
311+
/// Note that `extract_if` lets you mutate every value in the filter closure, regardless of
312+
/// whether you choose to keep or remove it.
313+
///
314+
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
315+
/// or the iteration short-circuits, then the remaining elements will be retained.
316+
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
317+
///
318+
/// [`retain`]: IndexMap::retain
319+
///
320+
/// # Examples
321+
///
322+
/// Splitting a map into even and odd keys, reusing the original map:
323+
///
324+
/// ```
325+
/// use indexmap::IndexMap;
326+
///
327+
/// let mut map: IndexMap<i32, i32> = (0..8).map(|x| (x, x)).collect();
328+
/// let extracted: IndexMap<i32, i32> = map.extract_if(|k, _v| k % 2 == 0).collect();
329+
///
330+
/// let evens = extracted.keys().copied().collect::<Vec<_>>();
331+
/// let odds = map.keys().copied().collect::<Vec<_>>();
332+
///
333+
/// assert_eq!(evens, vec![0, 2, 4, 6]);
334+
/// assert_eq!(odds, vec![1, 3, 5, 7]);
335+
/// ```
336+
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, K, V, F>
337+
where
338+
F: FnMut(&K, &mut V) -> bool,
339+
{
340+
ExtractIf::new(&mut self.core, pred)
341+
}
342+
304343
/// Splits the collection into two at the given index.
305344
///
306345
/// Returns a newly allocated map containing the elements in the range

‎src/map/core.rs

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use crate::util::simplify_range;
2424
use crate::{Bucket, Entries, Equivalent, HashValue};
2525

2626
pub use entry::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
27+
pub(crate) use raw::ExtractCore;
2728

2829
/// Core of the map that does not depend on S
2930
pub(crate) struct IndexMapCore<K, V> {
@@ -145,6 +146,7 @@ impl<K, V> IndexMapCore<K, V> {
145146

146147
#[inline]
147148
pub(crate) fn len(&self) -> usize {
149+
debug_assert_eq!(self.entries.len(), self.indices.len());
148150
self.indices.len()
149151
}
150152

‎src/map/core/raw.rs

+91
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,20 @@ impl<K, V> IndexMapCore<K, V> {
9696
// only the item references that are appropriately bound to `&mut self`.
9797
unsafe { self.indices.iter().map(|bucket| bucket.as_mut()) }
9898
}
99+
100+
pub(crate) fn extract(&mut self) -> ExtractCore<'_, K, V> {
101+
// SAFETY: We must have consistent lengths to start, so that's a hard assertion.
102+
// Then the worst `set_len(0)` can do is leak items if `ExtractCore` doesn't drop.
103+
assert_eq!(self.entries.len(), self.indices.len());
104+
unsafe {
105+
self.entries.set_len(0);
106+
}
107+
ExtractCore {
108+
map: self,
109+
current: 0,
110+
new_len: 0,
111+
}
112+
}
99113
}
100114

101115
/// A view into an occupied raw entry in an `IndexMap`.
@@ -143,3 +157,80 @@ impl<'a, K, V> RawTableEntry<'a, K, V> {
143157
(self.map, index)
144158
}
145159
}
160+
161+
pub(crate) struct ExtractCore<'a, K, V> {
162+
map: &'a mut IndexMapCore<K, V>,
163+
current: usize,
164+
new_len: usize,
165+
}
166+
167+
impl<K, V> Drop for ExtractCore<'_, K, V> {
168+
fn drop(&mut self) {
169+
let old_len = self.map.indices.len();
170+
let mut new_len = self.new_len;
171+
debug_assert!(new_len <= self.current);
172+
debug_assert!(self.current <= old_len);
173+
debug_assert!(old_len <= self.map.entries.capacity());
174+
175+
// SAFETY: We assume `new_len` and `current` were correctly maintained by the iterator.
176+
// So `entries[new_len..current]` were extracted, but the rest before and after are valid.
177+
unsafe {
178+
if new_len == self.current {
179+
// Nothing was extracted, so any remaining items can be left in place.
180+
new_len = old_len;
181+
} else if self.current < old_len {
182+
// Need to shift the remaining items down.
183+
let tail_len = old_len - self.current;
184+
let base = self.map.entries.as_mut_ptr();
185+
let src = base.add(self.current);
186+
let dest = base.add(new_len);
187+
src.copy_to(dest, tail_len);
188+
new_len += tail_len;
189+
}
190+
self.map.entries.set_len(new_len);
191+
}
192+
193+
if new_len != old_len {
194+
// We don't keep track of *which* items were extracted, so reindex everything.
195+
self.map.rebuild_hash_table();
196+
}
197+
}
198+
}
199+
200+
impl<K, V> ExtractCore<'_, K, V> {
201+
pub(crate) fn extract_if<F>(&mut self, mut pred: F) -> Option<Bucket<K, V>>
202+
where
203+
F: FnMut(&mut Bucket<K, V>) -> bool,
204+
{
205+
let old_len = self.map.indices.len();
206+
debug_assert!(old_len <= self.map.entries.capacity());
207+
208+
let base = self.map.entries.as_mut_ptr();
209+
while self.current < old_len {
210+
// SAFETY: We're maintaining both indices within bounds of the original entries, so
211+
// 0..new_len and current..old_len are always valid items for our Drop to keep.
212+
unsafe {
213+
let item = base.add(self.current);
214+
if pred(&mut *item) {
215+
// Extract it!
216+
self.current += 1;
217+
return Some(item.read());
218+
} else {
219+
// Keep it, shifting it down if needed.
220+
if self.new_len != self.current {
221+
debug_assert!(self.new_len < self.current);
222+
let dest = base.add(self.new_len);
223+
item.copy_to_nonoverlapping(dest, 1);
224+
}
225+
self.current += 1;
226+
self.new_len += 1;
227+
}
228+
}
229+
}
230+
None
231+
}
232+
233+
pub(crate) fn remaining(&self) -> usize {
234+
self.map.indices.len() - self.current
235+
}
236+
}

‎src/map/iter.rs

+54-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use super::core::IndexMapCore;
2-
use super::{Bucket, Entries, IndexMap, Slice};
1+
use super::{Bucket, Entries, ExtractCore, IndexMap, IndexMapCore, Slice};
32

43
use alloc::vec::{self, Vec};
54
use core::fmt;
@@ -711,3 +710,56 @@ where
711710
.finish()
712711
}
713712
}
713+
714+
/// An extracting iterator for `IndexMap`.
715+
///
716+
/// This `struct` is created by [`IndexMap::extract_if()`].
717+
/// See its documentation for more.
718+
pub struct ExtractIf<'a, K, V, F>
719+
where
720+
F: FnMut(&K, &mut V) -> bool,
721+
{
722+
inner: ExtractCore<'a, K, V>,
723+
pred: F,
724+
}
725+
726+
impl<K, V, F> ExtractIf<'_, K, V, F>
727+
where
728+
F: FnMut(&K, &mut V) -> bool,
729+
{
730+
pub(super) fn new(core: &mut IndexMapCore<K, V>, pred: F) -> ExtractIf<'_, K, V, F> {
731+
ExtractIf {
732+
inner: core.extract(),
733+
pred,
734+
}
735+
}
736+
}
737+
738+
impl<K, V, F> Iterator for ExtractIf<'_, K, V, F>
739+
where
740+
F: FnMut(&K, &mut V) -> bool,
741+
{
742+
type Item = (K, V);
743+
744+
fn next(&mut self) -> Option<Self::Item> {
745+
self.inner
746+
.extract_if(|bucket| {
747+
let (key, value) = bucket.ref_mut();
748+
(self.pred)(key, value)
749+
})
750+
.map(Bucket::key_value)
751+
}
752+
753+
fn size_hint(&self) -> (usize, Option<usize>) {
754+
(0, Some(self.inner.remaining()))
755+
}
756+
}
757+
758+
impl<'a, K, V, F> fmt::Debug for ExtractIf<'a, K, V, F>
759+
where
760+
F: FnMut(&K, &mut V) -> bool,
761+
{
762+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
763+
f.debug_struct("ExtractIf").finish_non_exhaustive()
764+
}
765+
}

‎src/set.rs

+36-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ mod slice;
77
mod tests;
88

99
pub use self::iter::{
10-
Difference, Drain, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
10+
Difference, Drain, ExtractIf, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
1111
};
1212
pub use self::slice::Slice;
1313

@@ -253,6 +253,41 @@ impl<T, S> IndexSet<T, S> {
253253
Drain::new(self.map.core.drain(range))
254254
}
255255

256+
/// Creates an iterator which uses a closure to determine if a value should be removed.
257+
///
258+
/// If the closure returns true, then the value is removed and yielded.
259+
/// If the closure returns false, the value will remain in the list and will not be yielded
260+
/// by the iterator.
261+
///
262+
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
263+
/// or the iteration short-circuits, then the remaining elements will be retained.
264+
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
265+
///
266+
/// [`retain`]: IndexSet::retain
267+
///
268+
/// # Examples
269+
///
270+
/// Splitting a set into even and odd values, reusing the original set:
271+
///
272+
/// ```
273+
/// use indexmap::IndexSet;
274+
///
275+
/// let mut set: IndexSet<i32> = (0..8).collect();
276+
/// let extracted: IndexSet<i32> = set.extract_if(|v| v % 2 == 0).collect();
277+
///
278+
/// let evens = extracted.into_iter().collect::<Vec<_>>();
279+
/// let odds = set.into_iter().collect::<Vec<_>>();
280+
///
281+
/// assert_eq!(evens, vec![0, 2, 4, 6]);
282+
/// assert_eq!(odds, vec![1, 3, 5, 7]);
283+
/// ```
284+
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, T, F>
285+
where
286+
F: FnMut(&T) -> bool,
287+
{
288+
ExtractIf::new(&mut self.map.core, pred)
289+
}
290+
256291
/// Splits the collection into two at the given index.
257292
///
258293
/// Returns a newly allocated set containing the elements in the range

‎src/set/iter.rs

+52
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use crate::map::{ExtractCore, IndexMapCore};
2+
13
use super::{Bucket, Entries, IndexSet, Slice};
24

35
use alloc::vec::{self, Vec};
@@ -624,3 +626,53 @@ impl<I: fmt::Debug> fmt::Debug for UnitValue<I> {
624626
fmt::Debug::fmt(&self.0, f)
625627
}
626628
}
629+
630+
/// An extracting iterator for `IndexSet`.
631+
///
632+
/// This `struct` is created by [`IndexSet::extract_if()`].
633+
/// See its documentation for more.
634+
pub struct ExtractIf<'a, T, F>
635+
where
636+
F: FnMut(&T) -> bool,
637+
{
638+
inner: ExtractCore<'a, T, ()>,
639+
pred: F,
640+
}
641+
642+
impl<T, F> ExtractIf<'_, T, F>
643+
where
644+
F: FnMut(&T) -> bool,
645+
{
646+
pub(super) fn new(core: &mut IndexMapCore<T, ()>, pred: F) -> ExtractIf<'_, T, F> {
647+
ExtractIf {
648+
inner: core.extract(),
649+
pred,
650+
}
651+
}
652+
}
653+
654+
impl<T, F> Iterator for ExtractIf<'_, T, F>
655+
where
656+
F: FnMut(&T) -> bool,
657+
{
658+
type Item = T;
659+
660+
fn next(&mut self) -> Option<Self::Item> {
661+
self.inner
662+
.extract_if(|bucket| (self.pred)(bucket.key_ref()))
663+
.map(Bucket::key)
664+
}
665+
666+
fn size_hint(&self) -> (usize, Option<usize>) {
667+
(0, Some(self.inner.remaining()))
668+
}
669+
}
670+
671+
impl<'a, T, F> fmt::Debug for ExtractIf<'a, T, F>
672+
where
673+
F: FnMut(&T) -> bool,
674+
{
675+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
676+
f.debug_struct("ExtractIf").finish_non_exhaustive()
677+
}
678+
}

0 commit comments

Comments
 (0)
Please sign in to comment.