Skip to content

Commit bb58488

Browse files
committedJul 30, 2024
Rewrite binary search implementation
This restores the original binary search implementation from rust-lang#45333 which has the nice property of having a loop count that only depends on the size of the slice. This, along with explicit conditional moves from rust-lang#128250, means that the entire binary search loop can be perfectly predicted by the branch predictor. Additionally, LLVM is able to unroll the loop when the slice length is known at compile-time. This results in a very compact code sequence of 3-4 instructions per binary search step and zero branches. Fixes rust-lang#53823
1 parent 595316b commit bb58488

File tree

2 files changed

+50
-37
lines changed

2 files changed

+50
-37
lines changed
 

‎library/core/src/slice/mod.rs

+44-31
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#![stable(feature = "rust1", since = "1.0.0")]
88

99
use crate::cmp::Ordering::{self, Equal, Greater, Less};
10-
use crate::intrinsics::{exact_div, unchecked_sub};
10+
use crate::intrinsics::{exact_div, select_unpredictable, unchecked_sub};
1111
use crate::mem::{self, SizedTypeProperties};
1212
use crate::num::NonZero;
1313
use crate::ops::{Bound, OneSidedRange, Range, RangeBounds};
@@ -2770,41 +2770,54 @@ impl<T> [T] {
27702770
where
27712771
F: FnMut(&'a T) -> Ordering,
27722772
{
2773-
// INVARIANTS:
2774-
// - 0 <= left <= left + size = right <= self.len()
2775-
// - f returns Less for everything in self[..left]
2776-
// - f returns Greater for everything in self[right..]
27772773
let mut size = self.len();
2778-
let mut left = 0;
2779-
let mut right = size;
2780-
while left < right {
2781-
let mid = left + size / 2;
2782-
2783-
// SAFETY: the while condition means `size` is strictly positive, so
2784-
// `size/2 < size`. Thus `left + size/2 < left + size`, which
2785-
// coupled with the `left + size <= self.len()` invariant means
2786-
// we have `left + size/2 < self.len()`, and this is in-bounds.
2774+
if size == 0 {
2775+
return Err(0);
2776+
}
2777+
let mut base = 0usize;
2778+
2779+
// This loop intentionally doesn't have an early exit if the comparison
2780+
// returns Equal. We want the number of loop iterations to depend *only*
2781+
// on the size of the input slice so that the CPU can reliably predict
2782+
// the loop count.
2783+
while size > 1 {
2784+
let half = size / 2;
2785+
let mid = base + half;
2786+
2787+
// SAFETY: the call is made safe by the following inconstants:
2788+
// - `mid >= 0`: by definition
2789+
// - `mid < size`: `mid = size / 2 + size / 4 + size / 8 ...`
27872790
let cmp = f(unsafe { self.get_unchecked(mid) });
27882791

2789-
// This control flow produces conditional moves, which results in
2790-
// fewer branches and instructions than if/else or matching on
2791-
// cmp::Ordering.
2792-
// This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx.
2793-
left = if cmp == Less { mid + 1 } else { left };
2794-
right = if cmp == Greater { mid } else { right };
2795-
if cmp == Equal {
2796-
// SAFETY: same as the `get_unchecked` above
2797-
unsafe { hint::assert_unchecked(mid < self.len()) };
2798-
return Ok(mid);
2799-
}
2800-
2801-
size = right - left;
2792+
// Binary search interacts poorly with branch prediction, so force
2793+
// the compiler to use conditional moves if supported by the target
2794+
// architecture.
2795+
base = select_unpredictable(cmp == Greater, base, mid);
2796+
2797+
// This is imprecise in the case where `size` is odd and the
2798+
// comparison returns Greater: the mid element still gets included
2799+
// by `size` even though it's known to be larger than the element
2800+
// being searched for.
2801+
//
2802+
// This is fine though: we gain more performance by keeping the
2803+
// loop iteration count invariant (and thus predictable) than we
2804+
// lose from considering one additional element.
2805+
size -= half;
28022806
}
28032807

2804-
// SAFETY: directly true from the overall invariant.
2805-
// Note that this is `<=`, unlike the assume in the `Ok` path.
2806-
unsafe { hint::assert_unchecked(left <= self.len()) };
2807-
Err(left)
2808+
// SAFETY: base is always in [0, size) because base <= mid.
2809+
let cmp = f(unsafe { self.get_unchecked(base) });
2810+
if cmp == Equal {
2811+
// SAFETY: same as the `get_unchecked` above.
2812+
unsafe { hint::assert_unchecked(base < self.len()) };
2813+
Ok(base)
2814+
} else {
2815+
let result = base + (cmp == Less) as usize;
2816+
// SAFETY: same as the `get_unchecked` above.
2817+
// Note that this is `<=`, unlike the assume in the `Ok` path.
2818+
unsafe { hint::assert_unchecked(result <= self.len()) };
2819+
Err(result)
2820+
}
28082821
}
28092822

28102823
/// Binary searches this slice with a key extraction function.

‎library/core/tests/slice.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,13 @@ fn test_binary_search() {
6969
assert_eq!(b.binary_search(&8), Err(5));
7070

7171
let b = [(); usize::MAX];
72-
assert_eq!(b.binary_search(&()), Ok(usize::MAX / 2));
72+
assert_eq!(b.binary_search(&()), Ok(usize::MAX - 1));
7373
}
7474

7575
#[test]
7676
fn test_binary_search_by_overflow() {
7777
let b = [(); usize::MAX];
78-
assert_eq!(b.binary_search_by(|_| Ordering::Equal), Ok(usize::MAX / 2));
78+
assert_eq!(b.binary_search_by(|_| Ordering::Equal), Ok(usize::MAX - 1));
7979
assert_eq!(b.binary_search_by(|_| Ordering::Greater), Err(0));
8080
assert_eq!(b.binary_search_by(|_| Ordering::Less), Err(usize::MAX));
8181
}
@@ -87,13 +87,13 @@ fn test_binary_search_implementation_details() {
8787
let b = [1, 1, 2, 2, 3, 3, 3];
8888
assert_eq!(b.binary_search(&1), Ok(1));
8989
assert_eq!(b.binary_search(&2), Ok(3));
90-
assert_eq!(b.binary_search(&3), Ok(5));
90+
assert_eq!(b.binary_search(&3), Ok(6));
9191
let b = [1, 1, 1, 1, 1, 3, 3, 3, 3];
9292
assert_eq!(b.binary_search(&1), Ok(4));
93-
assert_eq!(b.binary_search(&3), Ok(7));
93+
assert_eq!(b.binary_search(&3), Ok(8));
9494
let b = [1, 1, 1, 1, 3, 3, 3, 3, 3];
95-
assert_eq!(b.binary_search(&1), Ok(2));
96-
assert_eq!(b.binary_search(&3), Ok(4));
95+
assert_eq!(b.binary_search(&1), Ok(3));
96+
assert_eq!(b.binary_search(&3), Ok(8));
9797
}
9898

9999
#[test]

0 commit comments

Comments
 (0)