Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add f16 formatting and parsing #127013

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions library/core/src/fmt/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ macro_rules! impl_general_format {
}
}

impl_general_format! { f16 }
impl_general_format! { f32 f64 }

// Don't inline this so callers don't use the stack space this function
Expand Down Expand Up @@ -229,15 +230,7 @@ macro_rules! floating {
};
}

floating! { f32 f64 }

#[stable(feature = "rust1", since = "1.0.0")]
impl Debug for f16 {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{:#06x}", self.to_bits())
}
}
floating! { f16 f32 f64 }

#[stable(feature = "rust1", since = "1.0.0")]
impl Debug for f128 {
Expand Down
56 changes: 52 additions & 4 deletions library/core/src/num/dec2flt/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ macro_rules! int {
}
}

int!(u32, u64);
int!(u16, u32, u64);

/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
///
Expand Down Expand Up @@ -189,9 +189,14 @@ pub trait RawFloat:

/// Returns the mantissa, exponent and sign as integers.
///
/// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
/// For 0, the exponent will be `-(EXP_BIAS + SIG_BITS`, which is the
/// minimum subnormal power.
/// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the
/// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For
/// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`.
///
/// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned
/// with the explicit bit set but otherwise unshifted
///
/// `s` is only ever +/-1.
fn integer_decode(self) -> (u64, i16, i8) {
let bits = self.to_bits();
let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
Expand All @@ -213,6 +218,49 @@ const fn pow2_to_pow10(a: i64) -> i64 {
res as i64
}

impl RawFloat for f16 {
type Int = u16;

const INFINITY: Self = Self::INFINITY;
const NEG_INFINITY: Self = Self::NEG_INFINITY;
const NAN: Self = Self::NAN;
const NEG_NAN: Self = -Self::NAN;

const BITS: u32 = 16;
const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
const EXP_MASK: Self::Int = Self::EXP_MASK;
const SIG_MASK: Self::Int = Self::MAN_MASK;

const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -22;
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 5;
const SMALLEST_POWER_OF_TEN: i32 = -27;

#[inline]
fn from_u64(v: u64) -> Self {
debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
v as _
}

#[inline]
fn from_u64_bits(v: u64) -> Self {
Self::from_bits((v & 0xFFFF) as u16)
}

fn pow10_fast_path(exponent: usize) -> Self {
#[allow(clippy::use_self)]
const TABLE: [f16; 8] = [1e0, 1e1, 1e2, 1e3, 1e4, 0.0, 0.0, 0.];
TABLE[exponent & 7]
}

fn to_bits(self) -> Self::Int {
self.to_bits()
}

fn classify(self) -> FpCategory {
self.classify()
}
}

impl RawFloat for f32 {
type Int = u32;

Expand Down
2 changes: 2 additions & 0 deletions library/core/src/num/dec2flt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ macro_rules! from_str_float_impl {
}
};
}

from_str_float_impl!(f16);
from_str_float_impl!(f32);
from_str_float_impl!(f64);

Expand Down
6 changes: 6 additions & 0 deletions library/core/src/num/flt2dec/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ pub trait DecodableFloat: RawFloat + Copy {
fn min_pos_norm_value() -> Self;
}

impl DecodableFloat for f16 {
fn min_pos_norm_value() -> Self {
f16::MIN_POSITIVE
}
}

impl DecodableFloat for f32 {
fn min_pos_norm_value() -> Self {
f32::MIN_POSITIVE
Expand Down
1 change: 1 addition & 0 deletions library/coretests/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#![feature(exact_size_is_empty)]
#![feature(extend_one)]
#![feature(extern_types)]
#![feature(f16)]
#![feature(float_minimum_maximum)]
#![feature(flt2dec)]
#![feature(fmt_internals)]
Expand Down
14 changes: 14 additions & 0 deletions library/coretests/tests/num/dec2flt/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ const FPATHS_F32: &[FPath<f32>] =
const FPATHS_F64: &[FPath<f64>] =
&[((0, 0, false, false), Some(0.0)), ((0, 0, false, false), Some(0.0))];

// FIXME(f16_f128): enable on all targets once possible.
#[test]
#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
fn check_fast_path_f16() {
const FPATHS_F16: &[FPath<f16>] =
&[((0, 0, false, false), Some(0.0)), ((0, 0, false, false), Some(0.0))];
for ((exponent, mantissa, negative, many_digits), expected) in FPATHS_F16.iter().copied() {
let dec = Decimal { exponent, mantissa, negative, many_digits };
let actual = dec.try_fast_path::<f16>();

assert_eq!(actual, expected);
}
}

#[test]
fn check_fast_path_f32() {
for ((exponent, mantissa, negative, many_digits), expected) in FPATHS_F32.iter().copied() {
Expand Down
39 changes: 39 additions & 0 deletions library/coretests/tests/num/dec2flt/float.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
use core::num::dec2flt::float::RawFloat;

// FIXME(f16_f128): enable on all targets once possible.
#[test]
#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
fn test_f16_integer_decode() {
assert_eq!(3.14159265359f16.integer_decode(), (1608, -9, 1));
assert_eq!((-8573.5918555f16).integer_decode(), (1072, 3, -1));
assert_eq!(2f16.powf(14.0).integer_decode(), (1 << 10, 4, 1));
assert_eq!(0f16.integer_decode(), (0, -25, 1));
assert_eq!((-0f16).integer_decode(), (0, -25, -1));
assert_eq!(f16::INFINITY.integer_decode(), (1 << 10, 6, 1));
assert_eq!(f16::NEG_INFINITY.integer_decode(), (1 << 10, 6, -1));

// Ignore the "sign" (quiet / signalling flag) of NAN.
// It can vary between runtime operations and LLVM folding.
let (nan_m, nan_p, _nan_s) = f16::NAN.integer_decode();
assert_eq!((nan_m, nan_p), (1536, 6));
}

#[test]
fn test_f32_integer_decode() {
assert_eq!(3.14159265359f32.integer_decode(), (13176795, -22, 1));
Expand Down Expand Up @@ -34,6 +52,27 @@ fn test_f64_integer_decode() {

/* Sanity checks of computed magic numbers */

// FIXME(f16_f128): enable on all targets once possible.
#[test]
#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
fn test_f16_consts() {
assert_eq!(<f16 as RawFloat>::INFINITY, f16::INFINITY);
assert_eq!(<f16 as RawFloat>::NEG_INFINITY, -f16::INFINITY);
assert_eq!(<f16 as RawFloat>::NAN.to_bits(), f16::NAN.to_bits());
assert_eq!(<f16 as RawFloat>::NEG_NAN.to_bits(), (-f16::NAN).to_bits());
assert_eq!(<f16 as RawFloat>::SIG_BITS, 10);
assert_eq!(<f16 as RawFloat>::MIN_EXPONENT_ROUND_TO_EVEN, -22);
assert_eq!(<f16 as RawFloat>::MAX_EXPONENT_ROUND_TO_EVEN, 5);
assert_eq!(<f16 as RawFloat>::MIN_EXPONENT_FAST_PATH, -4);
assert_eq!(<f16 as RawFloat>::MAX_EXPONENT_FAST_PATH, 4);
assert_eq!(<f16 as RawFloat>::MAX_EXPONENT_DISGUISED_FAST_PATH, 7);
assert_eq!(<f16 as RawFloat>::EXP_MIN, -14);
assert_eq!(<f16 as RawFloat>::EXP_SAT, 0x1f);
assert_eq!(<f16 as RawFloat>::SMALLEST_POWER_OF_TEN, -27);
assert_eq!(<f16 as RawFloat>::LARGEST_POWER_OF_TEN, 4);
assert_eq!(<f16 as RawFloat>::MAX_MANTISSA_FAST_PATH, 2048);
}

#[test]
fn test_f32_consts() {
assert_eq!(<f32 as RawFloat>::INFINITY, f32::INFINITY);
Expand Down
133 changes: 102 additions & 31 deletions library/coretests/tests/num/dec2flt/lemire.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
use core::num::dec2flt::float::RawFloat;
use core::num::dec2flt::lemire::compute_float;

#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
fn compute_float16(q: i64, w: u64) -> (i32, u64) {
let fp = compute_float::<f16>(q, w);
(fp.p_biased, fp.m)
}

fn compute_float32(q: i64, w: u64) -> (i32, u64) {
let fp = compute_float::<f32>(q, w);
(fp.p_biased, fp.m)
Expand All @@ -11,23 +17,73 @@ fn compute_float64(q: i64, w: u64) -> (i32, u64) {
(fp.p_biased, fp.m)
}

// FIXME(f16_f128): enable on all targets once possible.
#[test]
#[cfg(any(target_arch = "x86", all(target_arch = "aarch64", target_feature = "neon")))]
fn compute_float_f16_rounding() {
// The maximum integer that cna be converted to a `f16` without lost precision.
let val = 1 << 11;
let scale = 10_u64.pow(10);

// These test near-halfway cases for half-precision floats.
assert_eq!(compute_float16(0, val), (26, 0));
assert_eq!(compute_float16(0, val + 1), (26, 0));
assert_eq!(compute_float16(0, val + 2), (26, 1));
assert_eq!(compute_float16(0, val + 3), (26, 2));
assert_eq!(compute_float16(0, val + 4), (26, 2));

// For the next power up, the two nearest representable numbers are twice as far apart.
let val2 = 1 << 12;
assert_eq!(compute_float16(0, val2), (27, 0));
assert_eq!(compute_float16(0, val2 + 2), (27, 0));
assert_eq!(compute_float16(0, val2 + 4), (27, 1));
assert_eq!(compute_float16(0, val2 + 6), (27, 2));
assert_eq!(compute_float16(0, val2 + 8), (27, 2));

// These are examples of the above tests, with digits from the exponent shifted
// to the mantissa.
assert_eq!(compute_float16(-10, val * scale), (26, 0));
assert_eq!(compute_float16(-10, (val + 1) * scale), (26, 0));
assert_eq!(compute_float16(-10, (val + 2) * scale), (26, 1));
// Let's check the lines to see if anything is different in table...
assert_eq!(compute_float16(-10, (val + 3) * scale), (26, 2));
assert_eq!(compute_float16(-10, (val + 4) * scale), (26, 2));

// Check the rounding point between infinity and the next representable number down
assert_eq!(compute_float16(4, 6), (f16::INFINITE_POWER - 1, 851));
assert_eq!(compute_float16(4, 7), (f16::INFINITE_POWER, 0)); // infinity
assert_eq!(compute_float16(2, 655), (f16::INFINITE_POWER - 1, 1023));
}

#[test]
fn compute_float_f32_rounding() {
// the maximum integer that cna be converted to a `f32` without lost precision.
let val = 1 << 24;
let scale = 10_u64.pow(10);

// These test near-halfway cases for single-precision floats.
assert_eq!(compute_float32(0, 16777216), (151, 0));
assert_eq!(compute_float32(0, 16777217), (151, 0));
assert_eq!(compute_float32(0, 16777218), (151, 1));
assert_eq!(compute_float32(0, 16777219), (151, 2));
assert_eq!(compute_float32(0, 16777220), (151, 2));

// These are examples of the above tests, with
// digits from the exponent shifted to the mantissa.
assert_eq!(compute_float32(-10, 167772160000000000), (151, 0));
assert_eq!(compute_float32(-10, 167772170000000000), (151, 0));
assert_eq!(compute_float32(-10, 167772180000000000), (151, 1));
assert_eq!(compute_float32(0, val), (151, 0));
assert_eq!(compute_float32(0, val + 1), (151, 0));
assert_eq!(compute_float32(0, val + 2), (151, 1));
assert_eq!(compute_float32(0, val + 3), (151, 2));
assert_eq!(compute_float32(0, val + 4), (151, 2));

// For the next power up, the two nearest representable numbers are twice as far apart.
let val2 = 1 << 25;
assert_eq!(compute_float32(0, val2), (152, 0));
assert_eq!(compute_float32(0, val2 + 2), (152, 0));
assert_eq!(compute_float32(0, val2 + 4), (152, 1));
assert_eq!(compute_float32(0, val2 + 6), (152, 2));
assert_eq!(compute_float32(0, val2 + 8), (152, 2));

// These are examples of the above tests, with digits from the exponent shifted
// to the mantissa.
assert_eq!(compute_float32(-10, val * scale), (151, 0));
assert_eq!(compute_float32(-10, (val + 1) * scale), (151, 0));
assert_eq!(compute_float32(-10, (val + 2) * scale), (151, 1));
// Let's check the lines to see if anything is different in table...
assert_eq!(compute_float32(-10, 167772190000000000), (151, 2));
assert_eq!(compute_float32(-10, 167772200000000000), (151, 2));
assert_eq!(compute_float32(-10, (val + 3) * scale), (151, 2));
assert_eq!(compute_float32(-10, (val + 4) * scale), (151, 2));

// Check the rounding point between infinity and the next representable number down
assert_eq!(compute_float32(38, 3), (f32::INFINITE_POWER - 1, 6402534));
Expand All @@ -37,23 +93,38 @@ fn compute_float_f32_rounding() {

#[test]
fn compute_float_f64_rounding() {
// The maximum integer that cna be converted to a `f64` without lost precision.
let val = 1 << 53;
let scale = 1000;

// These test near-halfway cases for double-precision floats.
assert_eq!(compute_float64(0, 9007199254740992), (1076, 0));
assert_eq!(compute_float64(0, 9007199254740993), (1076, 0));
assert_eq!(compute_float64(0, 9007199254740994), (1076, 1));
assert_eq!(compute_float64(0, 9007199254740995), (1076, 2));
assert_eq!(compute_float64(0, 9007199254740996), (1076, 2));
assert_eq!(compute_float64(0, 18014398509481984), (1077, 0));
assert_eq!(compute_float64(0, 18014398509481986), (1077, 0));
assert_eq!(compute_float64(0, 18014398509481988), (1077, 1));
assert_eq!(compute_float64(0, 18014398509481990), (1077, 2));
assert_eq!(compute_float64(0, 18014398509481992), (1077, 2));

// These are examples of the above tests, with
// digits from the exponent shifted to the mantissa.
assert_eq!(compute_float64(-3, 9007199254740992000), (1076, 0));
assert_eq!(compute_float64(-3, 9007199254740993000), (1076, 0));
assert_eq!(compute_float64(-3, 9007199254740994000), (1076, 1));
assert_eq!(compute_float64(-3, 9007199254740995000), (1076, 2));
assert_eq!(compute_float64(-3, 9007199254740996000), (1076, 2));
assert_eq!(compute_float64(0, val), (1076, 0));
assert_eq!(compute_float64(0, val + 1), (1076, 0));
assert_eq!(compute_float64(0, val + 2), (1076, 1));
assert_eq!(compute_float64(0, val + 3), (1076, 2));
assert_eq!(compute_float64(0, val + 4), (1076, 2));

// For the next power up, the two nearest representable numbers are twice as far apart.
let val2 = 1 << 54;
assert_eq!(compute_float64(0, val2), (1077, 0));
assert_eq!(compute_float64(0, val2 + 2), (1077, 0));
assert_eq!(compute_float64(0, val2 + 4), (1077, 1));
assert_eq!(compute_float64(0, val2 + 6), (1077, 2));
assert_eq!(compute_float64(0, val2 + 8), (1077, 2));

// These are examples of the above tests, with digits from the exponent shifted
// to the mantissa.
assert_eq!(compute_float64(-3, val * scale), (1076, 0));
assert_eq!(compute_float64(-3, (val + 1) * scale), (1076, 0));
assert_eq!(compute_float64(-3, (val + 2) * scale), (1076, 1));
assert_eq!(compute_float64(-3, (val + 3) * scale), (1076, 2));
assert_eq!(compute_float64(-3, (val + 4) * scale), (1076, 2));

// Check the rounding point between infinity and the next representable number down
assert_eq!(compute_float64(308, 1), (f64::INFINITE_POWER - 1, 506821272651936));
assert_eq!(compute_float64(308, 2), (f64::INFINITE_POWER, 0)); // infinity
assert_eq!(
compute_float64(292, 17976931348623157),
(f64::INFINITE_POWER - 1, 4503599627370495)
);
}
Loading
Loading