Skip to content

Commit ad9a84a

Browse files
committedFeb 24, 2025·
Eliminate the use of public_test_dep! for a third time
Replace `public_test_dep!` by placing optionally public items into new modules, then controlling what is exported with the `public-test-deps` feature. This is nicer for automatic formatting and diagnostics. This is a reland of 2e2a925 ("Eliminate the use of `public_test_dep!`"), which was reverted in 47e50fd ('Revert "Eliminate the use of..."') due to a bug exposed at [1], reapplied in d4abaf4 because the issue should have been fixed in [2], then reverted again in f6eef07 because [2] did not actually fix the issue. [3] has landed in rust-lang/rust since then, which should resolve the last problem remaining after [2]. So, apply this change for what is hopefully the final time. [1]: rust-lang/rust#128691 [2]: rust-lang/rust#135278 [3]: rust-lang/rust#135501
1 parent f322090 commit ad9a84a

File tree

8 files changed

+763
-772
lines changed

8 files changed

+763
-772
lines changed
 

‎src/float/mod.rs

+5-190
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
use core::ops;
2-
3-
use crate::int::{DInt, Int, MinInt};
4-
51
pub mod add;
62
pub mod cmp;
73
pub mod conv;
@@ -10,192 +6,11 @@ pub mod extend;
106
pub mod mul;
117
pub mod pow;
128
pub mod sub;
9+
pub(crate) mod traits;
1310
pub mod trunc;
1411

15-
/// Wrapper to extract the integer type half of the float's size
16-
pub(crate) type HalfRep<F> = <<F as Float>::Int as DInt>::H;
17-
18-
public_test_dep! {
19-
/// Trait for some basic operations on floats
20-
#[allow(dead_code)]
21-
pub(crate) trait Float:
22-
Copy
23-
+ core::fmt::Debug
24-
+ PartialEq
25-
+ PartialOrd
26-
+ ops::AddAssign
27-
+ ops::MulAssign
28-
+ ops::Add<Output = Self>
29-
+ ops::Sub<Output = Self>
30-
+ ops::Div<Output = Self>
31-
+ ops::Rem<Output = Self>
32-
{
33-
/// A uint of the same width as the float
34-
type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
35-
36-
/// A int of the same width as the float
37-
type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
38-
39-
/// An int capable of containing the exponent bits plus a sign bit. This is signed.
40-
type ExpInt: Int;
41-
42-
const ZERO: Self;
43-
const ONE: Self;
44-
45-
/// The bitwidth of the float type.
46-
const BITS: u32;
47-
48-
/// The bitwidth of the significand.
49-
const SIG_BITS: u32;
50-
51-
/// The bitwidth of the exponent.
52-
const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
53-
54-
/// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
55-
/// representation.
56-
///
57-
/// This is in the rightmost position, use `EXP_MASK` for the shifted value.
58-
const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
59-
60-
/// The exponent bias value.
61-
const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
62-
63-
/// A mask for the sign bit.
64-
const SIGN_MASK: Self::Int;
65-
66-
/// A mask for the significand.
67-
const SIG_MASK: Self::Int;
68-
69-
/// The implicit bit of the float format.
70-
const IMPLICIT_BIT: Self::Int;
71-
72-
/// A mask for the exponent.
73-
const EXP_MASK: Self::Int;
74-
75-
/// Returns `self` transmuted to `Self::Int`
76-
fn to_bits(self) -> Self::Int;
77-
78-
/// Returns `self` transmuted to `Self::SignedInt`
79-
fn to_bits_signed(self) -> Self::SignedInt;
80-
81-
/// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
82-
/// represented in multiple different ways. This method returns `true` if two NaNs are
83-
/// compared.
84-
fn eq_repr(self, rhs: Self) -> bool;
85-
86-
/// Returns true if the sign is negative
87-
fn is_sign_negative(self) -> bool;
88-
89-
/// Returns the exponent, not adjusting for bias.
90-
fn exp(self) -> Self::ExpInt;
91-
92-
/// Returns the significand with no implicit bit (or the "fractional" part)
93-
fn frac(self) -> Self::Int;
94-
95-
/// Returns the significand with implicit bit
96-
fn imp_frac(self) -> Self::Int;
97-
98-
/// Returns a `Self::Int` transmuted back to `Self`
99-
fn from_bits(a: Self::Int) -> Self;
100-
101-
/// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
102-
fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self;
103-
104-
fn abs(self) -> Self {
105-
let abs_mask = !Self::SIGN_MASK ;
106-
Self::from_bits(self.to_bits() & abs_mask)
107-
}
108-
109-
/// Returns (normalized exponent, normalized significand)
110-
fn normalize(significand: Self::Int) -> (i32, Self::Int);
111-
112-
/// Returns if `self` is subnormal
113-
fn is_subnormal(self) -> bool;
114-
}
115-
}
116-
117-
macro_rules! float_impl {
118-
($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
119-
impl Float for $ty {
120-
type Int = $ity;
121-
type SignedInt = $sity;
122-
type ExpInt = $expty;
123-
124-
const ZERO: Self = 0.0;
125-
const ONE: Self = 1.0;
126-
127-
const BITS: u32 = $bits;
128-
const SIG_BITS: u32 = $significand_bits;
129-
130-
const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
131-
const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
132-
const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
133-
const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
134-
135-
fn to_bits(self) -> Self::Int {
136-
self.to_bits()
137-
}
138-
fn to_bits_signed(self) -> Self::SignedInt {
139-
self.to_bits() as Self::SignedInt
140-
}
141-
fn eq_repr(self, rhs: Self) -> bool {
142-
#[cfg(feature = "mangled-names")]
143-
fn is_nan(x: $ty) -> bool {
144-
// When using mangled-names, the "real" compiler-builtins might not have the
145-
// necessary builtin (__unordtf2) to test whether `f128` is NaN.
146-
// FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
147-
// x is NaN if all the bits of the exponent are set and the significand is non-0
148-
x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0
149-
}
150-
#[cfg(not(feature = "mangled-names"))]
151-
fn is_nan(x: $ty) -> bool {
152-
x.is_nan()
153-
}
154-
if is_nan(self) && is_nan(rhs) {
155-
true
156-
} else {
157-
self.to_bits() == rhs.to_bits()
158-
}
159-
}
160-
fn is_sign_negative(self) -> bool {
161-
self.is_sign_negative()
162-
}
163-
fn exp(self) -> Self::ExpInt {
164-
((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
165-
}
166-
fn frac(self) -> Self::Int {
167-
self.to_bits() & Self::SIG_MASK
168-
}
169-
fn imp_frac(self) -> Self::Int {
170-
self.frac() | Self::IMPLICIT_BIT
171-
}
172-
fn from_bits(a: Self::Int) -> Self {
173-
Self::from_bits(a)
174-
}
175-
fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
176-
Self::from_bits(
177-
((negative as Self::Int) << (Self::BITS - 1))
178-
| ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
179-
| (significand & Self::SIG_MASK),
180-
)
181-
}
182-
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
183-
let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
184-
(
185-
1i32.wrapping_sub(shift as i32),
186-
significand << shift as Self::Int,
187-
)
188-
}
189-
fn is_subnormal(self) -> bool {
190-
(self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
191-
}
192-
}
193-
};
194-
}
12+
#[cfg(not(feature = "public-test-deps"))]
13+
pub(crate) use traits::{Float, HalfRep};
19514

196-
#[cfg(f16_enabled)]
197-
float_impl!(f16, u16, i16, i8, 16, 10);
198-
float_impl!(f32, u32, i32, i16, 32, 23);
199-
float_impl!(f64, u64, i64, i16, 64, 52);
200-
#[cfg(f128_enabled)]
201-
float_impl!(f128, u128, i128, i16, 128, 112);
15+
#[cfg(feature = "public-test-deps")]
16+
pub use traits::{Float, HalfRep};

‎src/float/traits.rs

+189
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
use core::ops;
2+
3+
use crate::int::{DInt, Int, MinInt};
4+
5+
/// Wrapper to extract the integer type half of the float's size
6+
pub type HalfRep<F> = <<F as Float>::Int as DInt>::H;
7+
8+
/// Trait for some basic operations on floats
9+
#[allow(dead_code)]
10+
pub trait Float:
11+
Copy
12+
+ core::fmt::Debug
13+
+ PartialEq
14+
+ PartialOrd
15+
+ ops::AddAssign
16+
+ ops::MulAssign
17+
+ ops::Add<Output = Self>
18+
+ ops::Sub<Output = Self>
19+
+ ops::Div<Output = Self>
20+
+ ops::Rem<Output = Self>
21+
{
22+
/// A uint of the same width as the float
23+
type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
24+
25+
/// A int of the same width as the float
26+
type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
27+
28+
/// An int capable of containing the exponent bits plus a sign bit. This is signed.
29+
type ExpInt: Int;
30+
31+
const ZERO: Self;
32+
const ONE: Self;
33+
34+
/// The bitwidth of the float type.
35+
const BITS: u32;
36+
37+
/// The bitwidth of the significand.
38+
const SIG_BITS: u32;
39+
40+
/// The bitwidth of the exponent.
41+
const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
42+
43+
/// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
44+
/// representation.
45+
///
46+
/// This is in the rightmost position, use `EXP_MASK` for the shifted value.
47+
const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
48+
49+
/// The exponent bias value.
50+
const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
51+
52+
/// A mask for the sign bit.
53+
const SIGN_MASK: Self::Int;
54+
55+
/// A mask for the significand.
56+
const SIG_MASK: Self::Int;
57+
58+
/// The implicit bit of the float format.
59+
const IMPLICIT_BIT: Self::Int;
60+
61+
/// A mask for the exponent.
62+
const EXP_MASK: Self::Int;
63+
64+
/// Returns `self` transmuted to `Self::Int`
65+
fn to_bits(self) -> Self::Int;
66+
67+
/// Returns `self` transmuted to `Self::SignedInt`
68+
fn to_bits_signed(self) -> Self::SignedInt;
69+
70+
/// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
71+
/// represented in multiple different ways. This method returns `true` if two NaNs are
72+
/// compared.
73+
fn eq_repr(self, rhs: Self) -> bool;
74+
75+
/// Returns true if the sign is negative
76+
fn is_sign_negative(self) -> bool;
77+
78+
/// Returns the exponent, not adjusting for bias.
79+
fn exp(self) -> Self::ExpInt;
80+
81+
/// Returns the significand with no implicit bit (or the "fractional" part)
82+
fn frac(self) -> Self::Int;
83+
84+
/// Returns the significand with implicit bit
85+
fn imp_frac(self) -> Self::Int;
86+
87+
/// Returns a `Self::Int` transmuted back to `Self`
88+
fn from_bits(a: Self::Int) -> Self;
89+
90+
/// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
91+
fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self;
92+
93+
fn abs(self) -> Self {
94+
let abs_mask = !Self::SIGN_MASK;
95+
Self::from_bits(self.to_bits() & abs_mask)
96+
}
97+
98+
/// Returns (normalized exponent, normalized significand)
99+
fn normalize(significand: Self::Int) -> (i32, Self::Int);
100+
101+
/// Returns if `self` is subnormal
102+
fn is_subnormal(self) -> bool;
103+
}
104+
105+
macro_rules! float_impl {
106+
($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
107+
impl Float for $ty {
108+
type Int = $ity;
109+
type SignedInt = $sity;
110+
type ExpInt = $expty;
111+
112+
const ZERO: Self = 0.0;
113+
const ONE: Self = 1.0;
114+
115+
const BITS: u32 = $bits;
116+
const SIG_BITS: u32 = $significand_bits;
117+
118+
const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
119+
const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
120+
const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
121+
const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
122+
123+
fn to_bits(self) -> Self::Int {
124+
self.to_bits()
125+
}
126+
fn to_bits_signed(self) -> Self::SignedInt {
127+
self.to_bits() as Self::SignedInt
128+
}
129+
fn eq_repr(self, rhs: Self) -> bool {
130+
#[cfg(feature = "mangled-names")]
131+
fn is_nan(x: $ty) -> bool {
132+
// When using mangled-names, the "real" compiler-builtins might not have the
133+
// necessary builtin (__unordtf2) to test whether `f128` is NaN.
134+
// FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
135+
// x is NaN if all the bits of the exponent are set and the significand is non-0
136+
x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0
137+
}
138+
#[cfg(not(feature = "mangled-names"))]
139+
fn is_nan(x: $ty) -> bool {
140+
x.is_nan()
141+
}
142+
if is_nan(self) && is_nan(rhs) {
143+
true
144+
} else {
145+
self.to_bits() == rhs.to_bits()
146+
}
147+
}
148+
fn is_sign_negative(self) -> bool {
149+
self.is_sign_negative()
150+
}
151+
fn exp(self) -> Self::ExpInt {
152+
((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
153+
}
154+
fn frac(self) -> Self::Int {
155+
self.to_bits() & Self::SIG_MASK
156+
}
157+
fn imp_frac(self) -> Self::Int {
158+
self.frac() | Self::IMPLICIT_BIT
159+
}
160+
fn from_bits(a: Self::Int) -> Self {
161+
Self::from_bits(a)
162+
}
163+
fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
164+
Self::from_bits(
165+
((negative as Self::Int) << (Self::BITS - 1))
166+
| ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
167+
| (significand & Self::SIG_MASK),
168+
)
169+
}
170+
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
171+
let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
172+
(
173+
1i32.wrapping_sub(shift as i32),
174+
significand << shift as Self::Int,
175+
)
176+
}
177+
fn is_subnormal(self) -> bool {
178+
(self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
179+
}
180+
}
181+
};
182+
}
183+
184+
#[cfg(f16_enabled)]
185+
float_impl!(f16, u16, i16, i8, 16, 10);
186+
float_impl!(f32, u32, i32, i16, 32, 23);
187+
float_impl!(f64, u64, i64, i16, 64, 52);
188+
#[cfg(f128_enabled)]
189+
float_impl!(f128, u128, i128, i16, 128, 112);

‎src/int/leading_zeros.rs

+115-112
Original file line numberDiff line numberDiff line change
@@ -3,135 +3,138 @@
33
// adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`.
44
// Compilers will insert the check for zero in cases where it is needed.
55

6-
use crate::int::{CastInto, Int};
6+
#[cfg(feature = "public-test-deps")]
7+
pub use implementation::{leading_zeros_default, leading_zeros_riscv};
8+
#[cfg(not(feature = "public-test-deps"))]
9+
pub(crate) use implementation::{leading_zeros_default, leading_zeros_riscv};
710

8-
public_test_dep! {
9-
/// Returns the number of leading binary zeros in `x`.
10-
#[allow(dead_code)]
11-
pub(crate) fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize {
12-
// The basic idea is to test if the higher bits of `x` are zero and bisect the number
13-
// of leading zeros. It is possible for all branches of the bisection to use the same
14-
// code path by conditionally shifting the higher parts down to let the next bisection
15-
// step work on the higher or lower parts of `x`. Instead of starting with `z == 0`
16-
// and adding to the number of zeros, it is slightly faster to start with
17-
// `z == usize::MAX.count_ones()` and subtract from the potential number of zeros,
18-
// because it simplifies the final bisection step.
19-
let mut x = x;
20-
// the number of potential leading zeros
21-
let mut z = T::BITS as usize;
22-
// a temporary
23-
let mut t: T;
11+
mod implementation {
12+
use crate::int::{CastInto, Int};
2413

25-
const { assert!(T::BITS <= 64) };
26-
if T::BITS >= 64 {
27-
t = x >> 32;
14+
/// Returns the number of leading binary zeros in `x`.
15+
#[allow(dead_code)]
16+
pub fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize {
17+
// The basic idea is to test if the higher bits of `x` are zero and bisect the number
18+
// of leading zeros. It is possible for all branches of the bisection to use the same
19+
// code path by conditionally shifting the higher parts down to let the next bisection
20+
// step work on the higher or lower parts of `x`. Instead of starting with `z == 0`
21+
// and adding to the number of zeros, it is slightly faster to start with
22+
// `z == usize::MAX.count_ones()` and subtract from the potential number of zeros,
23+
// because it simplifies the final bisection step.
24+
let mut x = x;
25+
// the number of potential leading zeros
26+
let mut z = T::BITS as usize;
27+
// a temporary
28+
let mut t: T;
29+
30+
const { assert!(T::BITS <= 64) };
31+
if T::BITS >= 64 {
32+
t = x >> 32;
33+
if t != T::ZERO {
34+
z -= 32;
35+
x = t;
36+
}
37+
}
38+
if T::BITS >= 32 {
39+
t = x >> 16;
40+
if t != T::ZERO {
41+
z -= 16;
42+
x = t;
43+
}
44+
}
45+
const { assert!(T::BITS >= 16) };
46+
t = x >> 8;
2847
if t != T::ZERO {
29-
z -= 32;
48+
z -= 8;
3049
x = t;
3150
}
32-
}
33-
if T::BITS >= 32 {
34-
t = x >> 16;
51+
t = x >> 4;
3552
if t != T::ZERO {
36-
z -= 16;
53+
z -= 4;
3754
x = t;
3855
}
39-
}
40-
const { assert!(T::BITS >= 16) };
41-
t = x >> 8;
42-
if t != T::ZERO {
43-
z -= 8;
44-
x = t;
45-
}
46-
t = x >> 4;
47-
if t != T::ZERO {
48-
z -= 4;
49-
x = t;
50-
}
51-
t = x >> 2;
52-
if t != T::ZERO {
53-
z -= 2;
54-
x = t;
55-
}
56-
// the last two bisections are combined into one conditional
57-
t = x >> 1;
58-
if t != T::ZERO {
59-
z - 2
60-
} else {
61-
z - x.cast()
62-
}
56+
t = x >> 2;
57+
if t != T::ZERO {
58+
z -= 2;
59+
x = t;
60+
}
61+
// the last two bisections are combined into one conditional
62+
t = x >> 1;
63+
if t != T::ZERO {
64+
z - 2
65+
} else {
66+
z - x.cast()
67+
}
6368

64-
// We could potentially save a few cycles by using the LUT trick from
65-
// "https://embeddedgurus.com/state-space/2014/09/
66-
// fast-deterministic-and-portable-counting-leading-zeros/".
67-
// However, 256 bytes for a LUT is too large for embedded use cases. We could remove
68-
// the last 3 bisections and use this 16 byte LUT for the rest of the work:
69-
//const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4];
70-
//z -= LUT[x] as usize;
71-
//z
72-
// However, it ends up generating about the same number of instructions. When benchmarked
73-
// on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO
74-
// execution effects. Changing to using a LUT and branching is risky for smaller cores.
75-
}
76-
}
69+
// We could potentially save a few cycles by using the LUT trick from
70+
// "https://embeddedgurus.com/state-space/2014/09/
71+
// fast-deterministic-and-portable-counting-leading-zeros/".
72+
// However, 256 bytes for a LUT is too large for embedded use cases. We could remove
73+
// the last 3 bisections and use this 16 byte LUT for the rest of the work:
74+
//const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4];
75+
//z -= LUT[x] as usize;
76+
//z
77+
// However, it ends up generating about the same number of instructions. When benchmarked
78+
// on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO
79+
// execution effects. Changing to using a LUT and branching is risky for smaller cores.
80+
}
7781

78-
// The above method does not compile well on RISC-V (because of the lack of predicated
79-
// instructions), producing code with many branches or using an excessively long
80-
// branchless solution. This method takes advantage of the set-if-less-than instruction on
81-
// RISC-V that allows `(x >= power-of-two) as usize` to be branchless.
82+
// The above method does not compile well on RISC-V (because of the lack of predicated
83+
// instructions), producing code with many branches or using an excessively long
84+
// branchless solution. This method takes advantage of the set-if-less-than instruction on
85+
// RISC-V that allows `(x >= power-of-two) as usize` to be branchless.
8286

83-
public_test_dep! {
84-
/// Returns the number of leading binary zeros in `x`.
85-
#[allow(dead_code)]
86-
pub(crate) fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize {
87-
let mut x = x;
88-
// the number of potential leading zeros
89-
let mut z = T::BITS;
90-
// a temporary
91-
let mut t: u32;
87+
/// Returns the number of leading binary zeros in `x`.
88+
#[allow(dead_code)]
89+
pub fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize {
90+
let mut x = x;
91+
// the number of potential leading zeros
92+
let mut z = T::BITS;
93+
// a temporary
94+
let mut t: u32;
9295

93-
// RISC-V does not have a set-if-greater-than-or-equal instruction and
94-
// `(x >= power-of-two) as usize` will get compiled into two instructions, but this is
95-
// still the most optimal method. A conditional set can only be turned into a single
96-
// immediate instruction if `x` is compared with an immediate `imm` (that can fit into
97-
// 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the
98-
// right). If we try to save an instruction by using `x < imm` for each bisection, we
99-
// have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
100-
// but the immediate will never fit into 12 bits and never save an instruction.
101-
const { assert!(T::BITS <= 64) };
102-
if T::BITS >= 64 {
103-
// If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
104-
// `t` is set to 0.
105-
t = ((x >= (T::ONE << 32)) as u32) << 5;
106-
// If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
107-
// next step to process.
96+
// RISC-V does not have a set-if-greater-than-or-equal instruction and
97+
// `(x >= power-of-two) as usize` will get compiled into two instructions, but this is
98+
// still the most optimal method. A conditional set can only be turned into a single
99+
// immediate instruction if `x` is compared with an immediate `imm` (that can fit into
100+
// 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the
101+
// right). If we try to save an instruction by using `x < imm` for each bisection, we
102+
// have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
103+
// but the immediate will never fit into 12 bits and never save an instruction.
104+
const { assert!(T::BITS <= 64) };
105+
if T::BITS >= 64 {
106+
// If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
107+
// `t` is set to 0.
108+
t = ((x >= (T::ONE << 32)) as u32) << 5;
109+
// If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
110+
// next step to process.
111+
x >>= t;
112+
// If `t` was set to `1 << 5`, then we subtract 32 from the number of potential
113+
// leading zeros
114+
z -= t;
115+
}
116+
if T::BITS >= 32 {
117+
t = ((x >= (T::ONE << 16)) as u32) << 4;
118+
x >>= t;
119+
z -= t;
120+
}
121+
const { assert!(T::BITS >= 16) };
122+
t = ((x >= (T::ONE << 8)) as u32) << 3;
108123
x >>= t;
109-
// If `t` was set to `1 << 5`, then we subtract 32 from the number of potential
110-
// leading zeros
111124
z -= t;
112-
}
113-
if T::BITS >= 32 {
114-
t = ((x >= (T::ONE << 16)) as u32) << 4;
125+
t = ((x >= (T::ONE << 4)) as u32) << 2;
126+
x >>= t;
127+
z -= t;
128+
t = ((x >= (T::ONE << 2)) as u32) << 1;
115129
x >>= t;
116130
z -= t;
131+
t = (x >= (T::ONE << 1)) as u32;
132+
x >>= t;
133+
z -= t;
134+
// All bits except the LSB are guaranteed to be zero for this final bisection step.
135+
// If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
136+
z as usize - x.cast()
117137
}
118-
const { assert!(T::BITS >= 16) };
119-
t = ((x >= (T::ONE << 8)) as u32) << 3;
120-
x >>= t;
121-
z -= t;
122-
t = ((x >= (T::ONE << 4)) as u32) << 2;
123-
x >>= t;
124-
z -= t;
125-
t = ((x >= (T::ONE << 2)) as u32) << 1;
126-
x >>= t;
127-
z -= t;
128-
t = (x >= (T::ONE << 1)) as u32;
129-
x >>= t;
130-
z -= t;
131-
// All bits except the LSB are guaranteed to be zero for this final bisection step.
132-
// If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
133-
z as usize - x.cast()
134-
}
135138
}
136139

137140
intrinsics! {

‎src/int/mod.rs

+5-419
Large diffs are not rendered by default.

‎src/int/specialized_div_rem/delegate.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ macro_rules! impl_delegate {
185185
};
186186
}
187187

188-
public_test_dep! {
189188
/// Returns `n / d` and sets `*rem = n % d`.
190189
///
191190
/// This specialization exists because:
@@ -195,7 +194,7 @@ public_test_dep! {
195194
/// delegate algorithm strategy the only reasonably fast way to perform `u128` division.
196195
// used on SPARC
197196
#[allow(dead_code)]
198-
pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
197+
pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
199198
use super::*;
200199
let duo_lo = duo as u64;
201200
let duo_hi = (duo >> 64) as u64;
@@ -316,4 +315,3 @@ pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
316315
}
317316
}
318317
}
319-
}

‎src/int/trailing_zeros.rs

+37-32
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,49 @@
1-
use crate::int::{CastInto, Int};
1+
#[cfg(feature = "public-test-deps")]
2+
pub use implementation::trailing_zeros;
3+
#[cfg(not(feature = "public-test-deps"))]
4+
pub(crate) use implementation::trailing_zeros;
25

3-
public_test_dep! {
4-
/// Returns number of trailing binary zeros in `x`.
5-
#[allow(dead_code)]
6-
pub(crate) fn trailing_zeros<T: Int + CastInto<u32> + CastInto<u16> + CastInto<u8>>(x: T) -> usize {
7-
let mut x = x;
8-
let mut r: u32 = 0;
9-
let mut t: u32;
6+
mod implementation {
7+
use crate::int::{CastInto, Int};
108

11-
const { assert!(T::BITS <= 64) };
12-
if T::BITS >= 64 {
13-
r += ((CastInto::<u32>::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0
14-
x >>= r; // remove 32 zero bits
15-
}
9+
/// Returns number of trailing binary zeros in `x`.
10+
#[allow(dead_code)]
11+
pub fn trailing_zeros<T: Int + CastInto<u32> + CastInto<u16> + CastInto<u8>>(x: T) -> usize {
12+
let mut x = x;
13+
let mut r: u32 = 0;
14+
let mut t: u32;
1615

17-
if T::BITS >= 32 {
18-
t = ((CastInto::<u16>::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0
19-
r += t;
20-
x >>= t; // x = [0 - 0xFFFF] + higher garbage bits
21-
}
16+
const { assert!(T::BITS <= 64) };
17+
if T::BITS >= 64 {
18+
r += ((CastInto::<u32>::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0
19+
x >>= r; // remove 32 zero bits
20+
}
2221

23-
const { assert!(T::BITS >= 16) };
24-
t = ((CastInto::<u8>::cast(x) == 0) as u32) << 3;
25-
x >>= t; // x = [0 - 0xFF] + higher garbage bits
26-
r += t;
22+
if T::BITS >= 32 {
23+
t = ((CastInto::<u16>::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0
24+
r += t;
25+
x >>= t; // x = [0 - 0xFFFF] + higher garbage bits
26+
}
2727

28-
let mut x: u8 = x.cast();
28+
const { assert!(T::BITS >= 16) };
29+
t = ((CastInto::<u8>::cast(x) == 0) as u32) << 3;
30+
x >>= t; // x = [0 - 0xFF] + higher garbage bits
31+
r += t;
2932

30-
t = (((x & 0x0F) == 0) as u32) << 2;
31-
x >>= t; // x = [0 - 0xF] + higher garbage bits
32-
r += t;
33+
let mut x: u8 = x.cast();
3334

34-
t = (((x & 0x3) == 0) as u32) << 1;
35-
x >>= t; // x = [0 - 0x3] + higher garbage bits
36-
r += t;
35+
t = (((x & 0x0F) == 0) as u32) << 2;
36+
x >>= t; // x = [0 - 0xF] + higher garbage bits
37+
r += t;
3738

38-
x &= 3;
39+
t = (((x & 0x3) == 0) as u32) << 1;
40+
x >>= t; // x = [0 - 0x3] + higher garbage bits
41+
r += t;
3942

40-
r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg())
41-
}
43+
x &= 3;
44+
45+
r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg())
46+
}
4247
}
4348

4449
intrinsics! {

‎src/int/traits.rs

+411
Large diffs are not rendered by default.

‎src/macros.rs

-16
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,5 @@
11
//! Macros shared throughout the compiler-builtins implementation
22
3-
/// Changes the visibility to `pub` if feature "public-test-deps" is set
4-
#[cfg(not(feature = "public-test-deps"))]
5-
macro_rules! public_test_dep {
6-
($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => {
7-
$(#[$($meta)*])* pub(crate) $ident $($tokens)*
8-
};
9-
}
10-
11-
/// Changes the visibility to `pub` if feature "public-test-deps" is set
12-
#[cfg(feature = "public-test-deps")]
13-
macro_rules! public_test_dep {
14-
{$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => {
15-
$(#[$($meta)*])* pub $ident $($tokens)*
16-
};
17-
}
18-
193
/// The "main macro" used for defining intrinsics.
204
///
215
/// The compiler-builtins library is super platform-specific with tons of crazy

0 commit comments

Comments
 (0)
Please sign in to comment.