Skip to content

Commit f1bcfdd

Browse files
committedJan 13, 2016
Auto merge of rust-lang#30639 - rkruppe:dec2flt-fastpath-tables, r=alexcrichton
Add tables of small powers of ten used in the fast path. The tables are redundant: We could also use the big, more accurate table and round the value to the correct type (in fact we did just that before this commit). However, the rounding is extra work and slows down the fast path. Because only very small exponents enter the fast path, the table and thus the space overhead is negligible. Speed-wise, this is a clear win on a [benchmark] comparing the fast path to a naive, hand-optimized, inaccurate algorithm. Specifically, this change narrows the gap from a roughly 5x difference to a roughly 3.4x difference. [benchmark]: https://gist.github.com/Veedrac/dbb0c07994bc7882098e
2 parents 49c3827 + dad1df6 commit f1bcfdd

File tree

4 files changed

+88
-15
lines changed

4 files changed

+88
-15
lines changed
 

‎src/etc/dec2flt_table.py

+29-6
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"""
2626
from __future__ import print_function
2727
import sys
28+
from math import ceil, log
2829
from fractions import Fraction
2930
from collections import namedtuple
3031

@@ -33,7 +34,6 @@
3334
MIN_SIG = 2 ** (N - 1)
3435
MAX_SIG = (2 ** N) - 1
3536

36-
3737
# Hand-rolled fp representation without arithmetic or any other operations.
3838
# The significand is normalized and always N bit, but the exponent is
3939
# unrestricted in range.
@@ -92,7 +92,7 @@ def error(f, e, z):
9292
ulp_err = abs_err / Fraction(2) ** z.exp
9393
return float(ulp_err)
9494

95-
LICENSE = """
95+
HEADER = """
9696
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
9797
// file at the top-level directory of this distribution and at
9898
// http://rust-lang.org/COPYRIGHT.
@@ -102,9 +102,23 @@ def error(f, e, z):
102102
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
103103
// option. This file may not be copied, modified, or distributed
104104
// except according to those terms.
105+
106+
//! Tables of approximations of powers of ten.
107+
//! DO NOT MODIFY: Generated by `src/etc/dec2flt_table.py`
105108
"""
106109

110+
107111
def main():
112+
print(HEADER.strip())
113+
print()
114+
print_proper_powers()
115+
print()
116+
print_short_powers(32, 24)
117+
print()
118+
print_short_powers(64, 53)
119+
120+
121+
def print_proper_powers():
108122
MIN_E = -305
109123
MAX_E = 305
110124
e_range = range(MIN_E, MAX_E+1)
@@ -114,13 +128,10 @@ def main():
114128
err = error(1, e, z)
115129
assert err < 0.5
116130
powers.append(z)
117-
typ = "([u64; {0}], [i16; {0}])".format(len(e_range))
118-
print(LICENSE.strip())
119-
print("// Table of approximations of powers of ten.")
120-
print("// DO NOT MODIFY: Generated by a src/etc/dec2flt_table.py")
121131
print("pub const MIN_E: i16 = {};".format(MIN_E))
122132
print("pub const MAX_E: i16 = {};".format(MAX_E))
123133
print()
134+
typ = "([u64; {0}], [i16; {0}])".format(len(powers))
124135
print("pub const POWERS: ", typ, " = ([", sep='')
125136
for z in powers:
126137
print(" 0x{:x},".format(z.sig))
@@ -130,5 +141,17 @@ def main():
130141
print("]);")
131142

132143

144+
def print_short_powers(num_bits, significand_size):
145+
max_sig = 2**significand_size - 1
146+
# The fast path bails out for exponents >= ceil(log5(max_sig))
147+
max_e = int(ceil(log(max_sig, 5)))
148+
e_range = range(max_e)
149+
typ = "[f{}; {}]".format(num_bits, len(e_range))
150+
print("pub const F", num_bits, "_SHORT_POWERS: ", typ, " = [", sep='')
151+
for e in e_range:
152+
print(" 1e{},".format(e))
153+
print("];")
154+
155+
133156
if __name__ == '__main__':
134157
main()

‎src/libcore/num/dec2flt/algorithm.rs

+3-7
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,13 @@ pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Opt
6060
if f > T::max_sig() {
6161
return None;
6262
}
63-
let e = e as i16; // Can't overflow because e.abs() <= LOG5_OF_EXP_N
6463
// The case e < 0 cannot be folded into the other branch. Negative powers result in
6564
// a repeating fractional part in binary, which are rounded, which causes real
6665
// (and occasioally quite significant!) errors in the final result.
67-
// The case `e == 0`, however, is unnecessary for correctness. It's just measurably faster.
68-
if e == 0 {
69-
Some(T::from_int(f))
70-
} else if e > 0 {
71-
Some(T::from_int(f) * fp_to_float(power_of_ten(e)))
66+
if e >= 0 {
67+
Some(T::from_int(f) * T::short_fast_pow10(e as usize))
7268
} else {
73-
Some(T::from_int(f) / fp_to_float(power_of_ten(-e)))
69+
Some(T::from_int(f) / T::short_fast_pow10(e.abs() as usize))
7470
}
7571
}
7672

‎src/libcore/num/dec2flt/rawfp.rs

+12
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use num::diy_float::Fp;
3737
use num::FpCategory::{Infinite, Zero, Subnormal, Normal, Nan};
3838
use num::Float;
3939
use num::dec2flt::num::{self, Big};
40+
use num::dec2flt::table;
4041

4142
#[derive(Copy, Clone, Debug)]
4243
pub struct Unpacked {
@@ -73,6 +74,9 @@ pub trait RawFloat : Float + Copy + Debug + LowerExp
7374
/// represented, the other code in this module makes sure to never let that happen.
7475
fn from_int(x: u64) -> Self;
7576

77+
/// Get the value 10^e from a pre-computed table. Panics for e >= ceil_log5_of_max_sig().
78+
fn short_fast_pow10(e: usize) -> Self;
79+
7680
// FIXME Everything that follows should be associated constants, but taking the value of an
7781
// associated constant from a type parameter does not work (yet?)
7882
// A possible workaround is having a `FloatInfo` struct for all the constants, but so far
@@ -175,6 +179,10 @@ impl RawFloat for f32 {
175179
x as f32
176180
}
177181

182+
fn short_fast_pow10(e: usize) -> Self {
183+
table::F32_SHORT_POWERS[e]
184+
}
185+
178186
fn max_normal_digits() -> usize {
179187
35
180188
}
@@ -222,6 +230,10 @@ impl RawFloat for f64 {
222230
x as f64
223231
}
224232

233+
fn short_fast_pow10(e: usize) -> Self {
234+
table::F64_SHORT_POWERS[e]
235+
}
236+
225237
fn max_normal_digits() -> usize {
226238
305
227239
}

‎src/libcore/num/dec2flt/table.rs

+44-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
10-
// Table of approximations of powers of ten.
11-
// DO NOT MODIFY: Generated by a src/etc/dec2flt_table.py
10+
11+
//! Tables of approximations of powers of ten.
12+
//! DO NOT MODIFY: Generated by `src/etc/dec2flt_table.py`
13+
1214
pub const MIN_E: i16 = -305;
1315
pub const MAX_E: i16 = 305;
1416

@@ -1237,3 +1239,43 @@ pub const POWERS: ([u64; 611], [i16; 611]) = ([
12371239
946,
12381240
950,
12391241
]);
1242+
1243+
pub const F32_SHORT_POWERS: [f32; 11] = [
1244+
1e0,
1245+
1e1,
1246+
1e2,
1247+
1e3,
1248+
1e4,
1249+
1e5,
1250+
1e6,
1251+
1e7,
1252+
1e8,
1253+
1e9,
1254+
1e10,
1255+
];
1256+
1257+
pub const F64_SHORT_POWERS: [f64; 23] = [
1258+
1e0,
1259+
1e1,
1260+
1e2,
1261+
1e3,
1262+
1e4,
1263+
1e5,
1264+
1e6,
1265+
1e7,
1266+
1e8,
1267+
1e9,
1268+
1e10,
1269+
1e11,
1270+
1e12,
1271+
1e13,
1272+
1e14,
1273+
1e15,
1274+
1e16,
1275+
1e17,
1276+
1e18,
1277+
1e19,
1278+
1e20,
1279+
1e21,
1280+
1e22,
1281+
];

0 commit comments

Comments
 (0)
Please sign in to comment.