rust-lang · tgross35 · Aug 25, 2024 · Mar 8, 2025 · Aug 25, 2024 · Mar 8, 2025
diff --git a/library/core/src/fmt/float.rs b/library/core/src/fmt/float.rs
@@ -20,6 +20,7 @@ macro_rules! impl_general_format {
     }
 }
 
+impl_general_format! { f16 }
 impl_general_format! { f32 f64 }
 
 // Don't inline this so callers don't use the stack space this function
@@ -229,15 +230,7 @@ macro_rules! floating {
     };
 }
 
-floating! { f32 f64 }
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl Debug for f16 {
-    #[inline]
-    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
-        write!(f, "{:#06x}", self.to_bits())
-    }
-}
+floating! { f16 f32 f64 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
 impl Debug for f128 {

diff --git a/library/core/src/num/dec2flt/float.rs b/library/core/src/num/dec2flt/float.rs
@@ -45,7 +45,7 @@ macro_rules! int {
     }
 }
 
-int!(u32, u64);
+int!(u16, u32, u64);
 
 /// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
 ///
@@ -189,9 +189,14 @@ pub trait RawFloat:
 
     /// Returns the mantissa, exponent and sign as integers.
     ///
-    /// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
-    /// For 0, the exponent will be `-(EXP_BIAS + SIG_BITS`, which is the
-    /// minimum subnormal power.
+    /// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the
+    /// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For
+    /// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`.
+    ///
+    /// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned
+    /// with the explicit bit set but otherwise unshifted
+    ///
+    /// `s` is only ever +/-1.
     fn integer_decode(self) -> (u64, i16, i8) {
         let bits = self.to_bits();
         let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
@@ -213,6 +218,49 @@ const fn pow2_to_pow10(a: i64) -> i64 {
     res as i64
 }
 
+impl RawFloat for f16 {
+    type Int = u16;
+
+    const INFINITY: Self = Self::INFINITY;
+    const NEG_INFINITY: Self = Self::NEG_INFINITY;
+    const NAN: Self = Self::NAN;
+    const NEG_NAN: Self = -Self::NAN;
+
+    const BITS: u32 = 16;
+    const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
+    const EXP_MASK: Self::Int = Self::EXP_MASK;
+    const SIG_MASK: Self::Int = Self::MAN_MASK;
+
+    const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -22;
+    const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 5;
+    const SMALLEST_POWER_OF_TEN: i32 = -27;
+
+    #[inline]
+    fn from_u64(v: u64) -> Self {
+        debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
+        v as _
+    }
+
+    #[inline]
+    fn from_u64_bits(v: u64) -> Self {
+        Self::from_bits((v & 0xFFFF) as u16)
+    }
+
+    fn pow10_fast_path(exponent: usize) -> Self {
+        #[allow(clippy::use_self)]
+        const TABLE: [f16; 8] = [1e0, 1e1, 1e2, 1e3, 1e4, 0.0, 0.0, 0.];
+        TABLE[exponent & 7]
+    }
+
+    fn to_bits(self) -> Self::Int {
+        self.to_bits()
+    }
+
+    fn classify(self) -> FpCategory {
+        self.classify()
+    }
+}
+
 impl RawFloat for f32 {
     type Int = u32;
 

diff --git a/library/core/src/num/dec2flt/mod.rs b/library/core/src/num/dec2flt/mod.rs
@@ -171,6 +171,8 @@ macro_rules! from_str_float_impl {
         }
     };
 }
+
+from_str_float_impl!(f16);
 from_str_float_impl!(f32);
 from_str_float_impl!(f64);
 

diff --git a/library/core/src/num/flt2dec/decoder.rs b/library/core/src/num/flt2dec/decoder.rs
@@ -45,6 +45,12 @@ pub trait DecodableFloat: RawFloat + Copy {
     fn min_pos_norm_value() -> Self;
 }
 
+impl DecodableFloat for f16 {
+    fn min_pos_norm_value() -> Self {
+        f16::MIN_POSITIVE
+    }
+}
+
 impl DecodableFloat for f32 {
     fn min_pos_norm_value() -> Self {
         f32::MIN_POSITIVE

diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs
@@ -30,6 +30,7 @@
 #![feature(exact_size_is_empty)]
 #![feature(extend_one)]
 #![feature(extern_types)]
+#![feature(f16)]
 #![feature(float_minimum_maximum)]
 #![feature(flt2dec)]
 #![feature(fmt_internals)]

diff --git a/library/coretests/tests/num/dec2flt/decimal.rs b/library/coretests/tests/num/dec2flt/decimal.rs
@@ -7,6 +7,20 @@ const FPATHS_F32: &[FPath<f32>] =
 const FPATHS_F64: &[FPath<f64>] =
     &[((0, 0, false, false), Some(0.0)), ((0, 0, false, false), Some(0.0))];
 
+// FIXME(f16_f128): enable on all targets once possible.
+#[test]
+#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
+fn check_fast_path_f16() {
+    const FPATHS_F16: &[FPath<f16>] =
+        &[((0, 0, false, false), Some(0.0)), ((0, 0, false, false), Some(0.0))];
+    for ((exponent, mantissa, negative, many_digits), expected) in FPATHS_F16.iter().copied() {
+        let dec = Decimal { exponent, mantissa, negative, many_digits };
+        let actual = dec.try_fast_path::<f16>();
+
+        assert_eq!(actual, expected);
+    }
+}
+
 #[test]
 fn check_fast_path_f32() {
     for ((exponent, mantissa, negative, many_digits), expected) in FPATHS_F32.iter().copied() {

diff --git a/library/coretests/tests/num/dec2flt/float.rs b/library/coretests/tests/num/dec2flt/float.rs
@@ -1,5 +1,23 @@
 use core::num::dec2flt::float::RawFloat;
 
+// FIXME(f16_f128): enable on all targets once possible.
+#[test]
+#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
+fn test_f16_integer_decode() {
+    assert_eq!(3.14159265359f16.integer_decode(), (1608, -9, 1));
+    assert_eq!((-8573.5918555f16).integer_decode(), (1072, 3, -1));
+    assert_eq!(2f16.powf(14.0).integer_decode(), (1 << 10, 4, 1));
+    assert_eq!(0f16.integer_decode(), (0, -25, 1));
+    assert_eq!((-0f16).integer_decode(), (0, -25, -1));
+    assert_eq!(f16::INFINITY.integer_decode(), (1 << 10, 6, 1));
+    assert_eq!(f16::NEG_INFINITY.integer_decode(), (1 << 10, 6, -1));
+
+    // Ignore the "sign" (quiet / signalling flag) of NAN.
+    // It can vary between runtime operations and LLVM folding.
+    let (nan_m, nan_p, _nan_s) = f16::NAN.integer_decode();
+    assert_eq!((nan_m, nan_p), (1536, 6));
+}
+
 #[test]
 fn test_f32_integer_decode() {
     assert_eq!(3.14159265359f32.integer_decode(), (13176795, -22, 1));
@@ -34,6 +52,27 @@ fn test_f64_integer_decode() {
 
 /* Sanity checks of computed magic numbers */
 
+// FIXME(f16_f128): enable on all targets once possible.
+#[test]
+#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
+fn test_f16_consts() {
+    assert_eq!(<f16 as RawFloat>::INFINITY, f16::INFINITY);
+    assert_eq!(<f16 as RawFloat>::NEG_INFINITY, -f16::INFINITY);
+    assert_eq!(<f16 as RawFloat>::NAN.to_bits(), f16::NAN.to_bits());
+    assert_eq!(<f16 as RawFloat>::NEG_NAN.to_bits(), (-f16::NAN).to_bits());
+    assert_eq!(<f16 as RawFloat>::SIG_BITS, 10);
+    assert_eq!(<f16 as RawFloat>::MIN_EXPONENT_ROUND_TO_EVEN, -22);
+    assert_eq!(<f16 as RawFloat>::MAX_EXPONENT_ROUND_TO_EVEN, 5);
+    assert_eq!(<f16 as RawFloat>::MIN_EXPONENT_FAST_PATH, -4);
+    assert_eq!(<f16 as RawFloat>::MAX_EXPONENT_FAST_PATH, 4);
+    assert_eq!(<f16 as RawFloat>::MAX_EXPONENT_DISGUISED_FAST_PATH, 7);
+    assert_eq!(<f16 as RawFloat>::EXP_MIN, -14);
+    assert_eq!(<f16 as RawFloat>::EXP_SAT, 0x1f);
+    assert_eq!(<f16 as RawFloat>::SMALLEST_POWER_OF_TEN, -27);
+    assert_eq!(<f16 as RawFloat>::LARGEST_POWER_OF_TEN, 4);
+    assert_eq!(<f16 as RawFloat>::MAX_MANTISSA_FAST_PATH, 2048);
+}
+
 #[test]
 fn test_f32_consts() {
     assert_eq!(<f32 as RawFloat>::INFINITY, f32::INFINITY);

diff --git a/library/coretests/tests/num/dec2flt/lemire.rs b/library/coretests/tests/num/dec2flt/lemire.rs
@@ -1,6 +1,12 @@
 use core::num::dec2flt::float::RawFloat;
 use core::num::dec2flt::lemire::compute_float;
 
+#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
+fn compute_float16(q: i64, w: u64) -> (i32, u64) {
+    let fp = compute_float::<f16>(q, w);
+    (fp.p_biased, fp.m)
+}
+
 fn compute_float32(q: i64, w: u64) -> (i32, u64) {
     let fp = compute_float::<f32>(q, w);
     (fp.p_biased, fp.m)
@@ -11,23 +17,73 @@ fn compute_float64(q: i64, w: u64) -> (i32, u64) {
     (fp.p_biased, fp.m)
 }
 
+// FIXME(f16_f128): enable on all targets once possible.
+#[test]
+#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
+fn compute_float_f16_rounding() {
+    // The maximum integer that cna be converted to a `f16` without lost precision.
+    let val = 1 << 11;
+    let scale = 10_u64.pow(10);
+
+    // These test near-halfway cases for half-precision floats.
+    assert_eq!(compute_float16(0, val), (26, 0));
+    assert_eq!(compute_float16(0, val + 1), (26, 0));
+    assert_eq!(compute_float16(0, val + 2), (26, 1));
+    assert_eq!(compute_float16(0, val + 3), (26, 2));
+    assert_eq!(compute_float16(0, val + 4), (26, 2));
+
+    // For the next power up, the two nearest representable numbers are twice as far apart.
+    let val2 = 1 << 12;
+    assert_eq!(compute_float16(0, val2), (27, 0));
+    assert_eq!(compute_float16(0, val2 + 2), (27, 0));
+    assert_eq!(compute_float16(0, val2 + 4), (27, 1));
+    assert_eq!(compute_float16(0, val2 + 6), (27, 2));
+    assert_eq!(compute_float16(0, val2 + 8), (27, 2));
+
+    // These are examples of the above tests, with digits from the exponent shifted
+    // to the mantissa.
+    assert_eq!(compute_float16(-10, val * scale), (26, 0));
+    assert_eq!(compute_float16(-10, (val + 1) * scale), (26, 0));
+    assert_eq!(compute_float16(-10, (val + 2) * scale), (26, 1));
+    // Let's check the lines to see if anything is different in table...
+    assert_eq!(compute_float16(-10, (val + 3) * scale), (26, 2));
+    assert_eq!(compute_float16(-10, (val + 4) * scale), (26, 2));
+
+    // Check the rounding point between infinity and the next representable number down
+    assert_eq!(compute_float16(4, 6), (f16::INFINITE_POWER - 1, 851));
+    assert_eq!(compute_float16(4, 7), (f16::INFINITE_POWER, 0)); // infinity
+    assert_eq!(compute_float16(2, 655), (f16::INFINITE_POWER - 1, 1023));
+}
+
 #[test]
 fn compute_float_f32_rounding() {
+    // the maximum integer that cna be converted to a `f32` without lost precision.
+    let val = 1 << 24;
+    let scale = 10_u64.pow(10);
+
     // These test near-halfway cases for single-precision floats.
-    assert_eq!(compute_float32(0, 16777216), (151, 0));
-    assert_eq!(compute_float32(0, 16777217), (151, 0));
-    assert_eq!(compute_float32(0, 16777218), (151, 1));
-    assert_eq!(compute_float32(0, 16777219), (151, 2));
-    assert_eq!(compute_float32(0, 16777220), (151, 2));
-
-    // These are examples of the above tests, with
-    // digits from the exponent shifted to the mantissa.
-    assert_eq!(compute_float32(-10, 167772160000000000), (151, 0));
-    assert_eq!(compute_float32(-10, 167772170000000000), (151, 0));
-    assert_eq!(compute_float32(-10, 167772180000000000), (151, 1));
+    assert_eq!(compute_float32(0, val), (151, 0));
+    assert_eq!(compute_float32(0, val + 1), (151, 0));
+    assert_eq!(compute_float32(0, val + 2), (151, 1));
+    assert_eq!(compute_float32(0, val + 3), (151, 2));
+    assert_eq!(compute_float32(0, val + 4), (151, 2));
+
+    // For the next power up, the two nearest representable numbers are twice as far apart.
+    let val2 = 1 << 25;
+    assert_eq!(compute_float32(0, val2), (152, 0));
+    assert_eq!(compute_float32(0, val2 + 2), (152, 0));
+    assert_eq!(compute_float32(0, val2 + 4), (152, 1));
+    assert_eq!(compute_float32(0, val2 + 6), (152, 2));
+    assert_eq!(compute_float32(0, val2 + 8), (152, 2));
+
+    // These are examples of the above tests, with digits from the exponent shifted
+    // to the mantissa.
+    assert_eq!(compute_float32(-10, val * scale), (151, 0));
+    assert_eq!(compute_float32(-10, (val + 1) * scale), (151, 0));
+    assert_eq!(compute_float32(-10, (val + 2) * scale), (151, 1));
     // Let's check the lines to see if anything is different in table...
-    assert_eq!(compute_float32(-10, 167772190000000000), (151, 2));
-    assert_eq!(compute_float32(-10, 167772200000000000), (151, 2));
+    assert_eq!(compute_float32(-10, (val + 3) * scale), (151, 2));
+    assert_eq!(compute_float32(-10, (val + 4) * scale), (151, 2));
 
     // Check the rounding point between infinity and the next representable number down
     assert_eq!(compute_float32(38, 3), (f32::INFINITE_POWER - 1, 6402534));
@@ -37,23 +93,38 @@ fn compute_float_f32_rounding() {
 
 #[test]
 fn compute_float_f64_rounding() {
+    // The maximum integer that cna be converted to a `f64` without lost precision.
+    let val = 1 << 53;
+    let scale = 1000;
+
     // These test near-halfway cases for double-precision floats.
-    assert_eq!(compute_float64(0, 9007199254740992), (1076, 0));
-    assert_eq!(compute_float64(0, 9007199254740993), (1076, 0));
-    assert_eq!(compute_float64(0, 9007199254740994), (1076, 1));
-    assert_eq!(compute_float64(0, 9007199254740995), (1076, 2));
-    assert_eq!(compute_float64(0, 9007199254740996), (1076, 2));
-    assert_eq!(compute_float64(0, 18014398509481984), (1077, 0));
-    assert_eq!(compute_float64(0, 18014398509481986), (1077, 0));
-    assert_eq!(compute_float64(0, 18014398509481988), (1077, 1));
-    assert_eq!(compute_float64(0, 18014398509481990), (1077, 2));
-    assert_eq!(compute_float64(0, 18014398509481992), (1077, 2));
-
-    // These are examples of the above tests, with
-    // digits from the exponent shifted to the mantissa.
-    assert_eq!(compute_float64(-3, 9007199254740992000), (1076, 0));
-    assert_eq!(compute_float64(-3, 9007199254740993000), (1076, 0));
-    assert_eq!(compute_float64(-3, 9007199254740994000), (1076, 1));
-    assert_eq!(compute_float64(-3, 9007199254740995000), (1076, 2));
-    assert_eq!(compute_float64(-3, 9007199254740996000), (1076, 2));
+    assert_eq!(compute_float64(0, val), (1076, 0));
+    assert_eq!(compute_float64(0, val + 1), (1076, 0));
+    assert_eq!(compute_float64(0, val + 2), (1076, 1));
+    assert_eq!(compute_float64(0, val + 3), (1076, 2));
+    assert_eq!(compute_float64(0, val + 4), (1076, 2));
+
+    // For the next power up, the two nearest representable numbers are twice as far apart.
+    let val2 = 1 << 54;
+    assert_eq!(compute_float64(0, val2), (1077, 0));
+    assert_eq!(compute_float64(0, val2 + 2), (1077, 0));
+    assert_eq!(compute_float64(0, val2 + 4), (1077, 1));
+    assert_eq!(compute_float64(0, val2 + 6), (1077, 2));
+    assert_eq!(compute_float64(0, val2 + 8), (1077, 2));
+
+    // These are examples of the above tests, with digits from the exponent shifted
+    // to the mantissa.
+    assert_eq!(compute_float64(-3, val * scale), (1076, 0));
+    assert_eq!(compute_float64(-3, (val + 1) * scale), (1076, 0));
+    assert_eq!(compute_float64(-3, (val + 2) * scale), (1076, 1));
+    assert_eq!(compute_float64(-3, (val + 3) * scale), (1076, 2));
+    assert_eq!(compute_float64(-3, (val + 4) * scale), (1076, 2));
+
+    // Check the rounding point between infinity and the next representable number down
+    assert_eq!(compute_float64(308, 1), (f64::INFINITE_POWER - 1, 506821272651936));
+    assert_eq!(compute_float64(308, 2), (f64::INFINITE_POWER, 0)); // infinity
+    assert_eq!(
+        compute_float64(292, 17976931348623157),
+        (f64::INFINITE_POWER - 1, 4503599627370495)
+    );
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -171,6 +171,8 @@ macro_rules! from_str_float_impl { @@
             }
         };
     }
+    from_str_float_impl!(f16);
     from_str_float_impl!(f32);
     from_str_float_impl!(f64);
@@ Expand Down @@