Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit ae3b59e

Browse files
committedMay 22, 2023
[libc] Use MPFR for strtofloat fuzzing
The previous string to float tests didn't check correctness, but due to the atof differential test proving unreliable the strtofloat fuzz test has been changed to use MPFR for correctness checking. Some minor bugs have been found and fixed as well. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D150905
1 parent d25fb4e commit ae3b59e

File tree

11 files changed

+131
-77
lines changed

11 files changed

+131
-77
lines changed
 

‎libc/cmake/modules/LLVMLibCTestRules.cmake

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ endfunction(add_libc_long_running_testsuite)
323323
function(add_libc_fuzzer target_name)
324324
cmake_parse_arguments(
325325
"LIBC_FUZZER"
326-
"" # No optional arguments
326+
"NEED_MPFR" # Optional arguments
327327
"" # Single value arguments
328328
"SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi-value arguments
329329
${ARGN}
@@ -337,6 +337,16 @@ function(add_libc_fuzzer target_name)
337337
"'add_entrypoint_object' targets.")
338338
endif()
339339

340+
list(APPEND LIBC_FUZZER_LINK_LIBRARIES "")
341+
if(LIBC_FUZZER_NEED_MPFR)
342+
if(NOT LIBC_TESTS_CAN_USE_MPFR)
343+
message(VERBOSE "Fuzz test ${name} will be skipped as MPFR library is not available.")
344+
return()
345+
endif()
346+
list(APPEND LIBC_FUZZER_LINK_LIBRARIES mpfr gmp)
347+
endif()
348+
349+
340350
get_fq_target_name(${target_name} fq_target_name)
341351
get_fq_deps_list(fq_deps_list ${LIBC_FUZZER_DEPENDS})
342352
get_object_files_for_test(
@@ -372,7 +382,10 @@ function(add_libc_fuzzer target_name)
372382
${LIBC_BUILD_DIR}/include
373383
)
374384

375-
target_link_libraries(${fq_target_name} PRIVATE ${link_object_files})
385+
target_link_libraries(${fq_target_name} PRIVATE
386+
${link_object_files}
387+
${LIBC_FUZZER_LINK_LIBRARIES}
388+
)
376389

377390
set_target_properties(${fq_target_name}
378391
PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})

‎libc/fuzzing/stdlib/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,11 @@ add_libc_fuzzer(
1414
StringParserOutputDiff.h
1515
DEPENDS
1616
libc.src.stdlib.atof
17-
COMPILE_OPTIONS
18-
-DLLVM_LIBC_ATOF_DIF_FUZZ_SKIP_GLIBC_HEX_SUBNORMAL_ERR
1917
)
2018

2119
add_libc_fuzzer(
2220
strtofloat_fuzz
21+
NEED_MPFR
2322
SRCS
2423
strtofloat_fuzz.cpp
2524
DEPENDS

‎libc/fuzzing/stdlib/atof_differential_fuzz.cpp

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -16,40 +16,6 @@
1616

1717
#include "fuzzing/stdlib/StringParserOutputDiff.h"
1818

19-
// TODO: Remove this once glibc fixes hex subnormal rounding. See
20-
// https://sourceware.org/bugzilla/show_bug.cgi?id=30220
21-
#ifdef LLVM_LIBC_ATOF_DIF_FUZZ_SKIP_GLIBC_HEX_SUBNORMAL_ERR
22-
#include <ctype.h>
23-
constexpr double MIN_NORMAL = 0x1p-1022;
24-
25-
bool has_hex_prefix(const uint8_t *str) {
26-
size_t index = 0;
27-
28-
// Skip over leading whitespace
29-
while (isspace(str[index])) {
30-
++index;
31-
}
32-
// Skip over sign
33-
if (str[index] == '-' || str[index] == '+') {
34-
++index;
35-
}
36-
return str[index] == '0' && (tolower(str[index + 1])) == 'x';
37-
}
38-
39-
bool should_be_skipped(const uint8_t *str) {
40-
double init_result = ::atof(reinterpret_cast<const char *>(str));
41-
if (init_result < 0) {
42-
init_result = -init_result;
43-
}
44-
if (init_result < MIN_NORMAL && init_result != 0) {
45-
return has_hex_prefix(str);
46-
}
47-
return false;
48-
}
49-
#else
50-
bool should_be_skipped(const uint8_t *) { return false; }
51-
#endif // LLVM_LIBC_ATOF_DIF_FUZZ_SKIP_GLIBC_HEX_SUBNORMAL_ERR
52-
5319
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
5420
uint8_t *container = new uint8_t[size + 1];
5521
if (!container)
@@ -60,10 +26,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
6026
container[i] = data[i];
6127
container[size] = '\0'; // Add null terminator to container.
6228

63-
if (!should_be_skipped(container)) {
64-
StringParserOutputDiff<double>(&__llvm_libc::atof, &::atof, container,
65-
size);
66-
}
29+
StringParserOutputDiff<double>(&__llvm_libc::atof, &::atof, container, size);
6730
delete[] container;
6831
return 0;
6932
}

‎libc/fuzzing/stdlib/strtofloat_fuzz.cpp

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,50 +13,87 @@
1313
#include "src/stdlib/strtod.h"
1414
#include "src/stdlib/strtof.h"
1515
#include "src/stdlib/strtold.h"
16+
1617
#include <math.h>
1718
#include <stddef.h>
1819
#include <stdint.h>
1920

21+
#include "utils/MPFRWrapper/mpfr_inc.h"
22+
2023
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
2124
uint8_t *container = new uint8_t[size + 1];
2225
if (!container)
2326
__builtin_trap();
2427
size_t i;
2528

26-
for (i = 0; i < size; ++i)
27-
container[i] = data[i];
29+
for (i = 0; i < size; ++i) {
30+
// MPFR's strtofr uses "@" as a base-independent exponent symbol
31+
if (data[i] != '@')
32+
container[i] = data[i];
33+
else {
34+
container[i] = '#';
35+
}
36+
}
2837
container[size] = '\0'; // Add null terminator to container.
2938

3039
const char *str_ptr = reinterpret_cast<const char *>(container);
3140

3241
char *out_ptr = nullptr;
3342

34-
// This fuzzer only checks that the algorithms didn't read beyond the end of
35-
// the string in container. Combined with sanitizers, this will check that the
36-
// code is not reading memory beyond what's expected. This test does not
37-
// effectively check the correctness of the result.
43+
mpfr_t result;
44+
mpfr_init2(result, 256);
45+
mpfr_t bin_result;
46+
mpfr_init2(bin_result, 256);
47+
mpfr_strtofr(result, str_ptr, &out_ptr, 0 /* base */, MPFR_RNDN);
48+
ptrdiff_t result_strlen = out_ptr - str_ptr;
49+
mpfr_strtofr(bin_result, str_ptr, &out_ptr, 2 /* base */, MPFR_RNDN);
50+
ptrdiff_t bin_result_strlen = out_ptr - str_ptr;
51+
52+
long double bin_result_ld = mpfr_get_ld(bin_result, MPFR_RNDN);
53+
long double result_ld = mpfr_get_ld(result, MPFR_RNDN);
54+
55+
// This detects if mpfr's strtofr selected a base of 2, which libc does not
56+
// support. If a base 2 decoding is detected, it is replaced by a base 10
57+
// decoding.
58+
if ((bin_result_ld != 0.0 || bin_result_strlen == result_strlen) &&
59+
bin_result_ld == result_ld) {
60+
mpfr_strtofr(result, str_ptr, &out_ptr, 10 /* base */, MPFR_RNDN);
61+
result_strlen = out_ptr - str_ptr;
62+
}
63+
3864
auto volatile atof_output = __llvm_libc::atof(str_ptr);
3965
auto volatile strtof_output = __llvm_libc::strtof(str_ptr, &out_ptr);
40-
if (str_ptr + size < out_ptr)
66+
ptrdiff_t strtof_strlen = out_ptr - str_ptr;
67+
if (result_strlen != strtof_strlen)
4168
__builtin_trap();
4269
auto volatile strtod_output = __llvm_libc::strtod(str_ptr, &out_ptr);
43-
if (str_ptr + size < out_ptr)
70+
ptrdiff_t strtod_strlen = out_ptr - str_ptr;
71+
if (result_strlen != strtod_strlen)
4472
__builtin_trap();
4573
auto volatile strtold_output = __llvm_libc::strtold(str_ptr, &out_ptr);
46-
if (str_ptr + size < out_ptr)
74+
ptrdiff_t strtold_strlen = out_ptr - str_ptr;
75+
if (result_strlen != strtold_strlen)
4776
__builtin_trap();
4877

49-
// If any of the outputs are NaN
50-
if (isnan(atof_output) || isnan(strtof_output) || isnan(strtod_output) ||
51-
isnan(strtold_output)) {
52-
// Then all the outputs should be NaN.
53-
// This is a trivial check meant to silence the "unused variable" warnings.
54-
if (!isnan(atof_output) || !isnan(strtof_output) || !isnan(strtod_output) ||
55-
!isnan(strtold_output)) {
78+
// If any result is NaN, all of them should be NaN. We can't use the usual
79+
// comparisons because NaN != NaN.
80+
if (isnan(result_ld)) {
81+
if (!(isnan(atof_output) && isnan(strtof_output) && isnan(strtod_output) &&
82+
isnan(strtold_output)))
83+
__builtin_trap();
84+
} else {
85+
if (mpfr_get_d(result, MPFR_RNDN) != atof_output)
86+
__builtin_trap();
87+
if (mpfr_get_flt(result, MPFR_RNDN) != strtof_output)
88+
__builtin_trap();
89+
if (mpfr_get_d(result, MPFR_RNDN) != strtod_output)
90+
__builtin_trap();
91+
if (mpfr_get_ld(result, MPFR_RNDN) != strtold_output)
5692
__builtin_trap();
57-
}
5893
}
5994

95+
mpfr_clear(result);
96+
mpfr_clear(bin_result);
6097
delete[] container;
6198
return 0;
6299
}

‎libc/src/__support/str_to_float.h

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,15 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
265265
UInt128 approx_upper = static_cast<UInt128>(high64(mantissa)) *
266266
static_cast<UInt128>(power_of_ten[1]);
267267

268-
UInt128 approx_middle = static_cast<UInt128>(high64(mantissa)) *
269-
static_cast<UInt128>(power_of_ten[0]) +
270-
static_cast<UInt128>(low64(mantissa)) *
271-
static_cast<UInt128>(power_of_ten[1]);
268+
UInt128 approx_middle_a = static_cast<UInt128>(high64(mantissa)) *
269+
static_cast<UInt128>(power_of_ten[0]);
270+
UInt128 approx_middle_b = static_cast<UInt128>(low64(mantissa)) *
271+
static_cast<UInt128>(power_of_ten[1]);
272+
273+
UInt128 approx_middle = approx_middle_a + approx_middle_b;
274+
275+
// Handle overflow in the middle
276+
approx_upper += (approx_middle < approx_middle_a) ? UInt128(1) << 64 : 0;
272277

273278
UInt128 approx_lower = static_cast<UInt128>(low64(mantissa)) *
274279
static_cast<UInt128>(power_of_ten[0]);
@@ -928,8 +933,11 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
928933
return output;
929934

930935
if (tolower(src[index]) == EXPONENT_MARKER) {
931-
if (src[index + 1] == '+' || src[index + 1] == '-' ||
932-
isdigit(src[index + 1])) {
936+
bool has_sign = false;
937+
if (src[index + 1] == '+' || src[index + 1] == '-') {
938+
has_sign = true;
939+
}
940+
if (isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) {
933941
++index;
934942
auto result = strtointeger<int32_t>(src + index, 10);
935943
if (result.has_error())
@@ -1036,8 +1044,11 @@ hexadecimal_string_to_float(const char *__restrict src,
10361044
exponent *= 4;
10371045

10381046
if (tolower(src[index]) == EXPONENT_MARKER) {
1039-
if (src[index + 1] == '+' || src[index + 1] == '-' ||
1040-
isdigit(src[index + 1])) {
1047+
bool has_sign = false;
1048+
if (src[index + 1] == '+' || src[index + 1] == '-') {
1049+
has_sign = true;
1050+
}
1051+
if (isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) {
10411052
++index;
10421053
auto result = strtointeger<int32_t>(src + index, 10);
10431054
if (result.has_error())

‎libc/test/src/stdlib/strtod_test.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,11 @@ TEST_F(LlvmLibcStrToDTest, FuzzFailures) {
223223
// Same as above but for hex.
224224
run_test("0x0164810157p2047", 17, uint64_t(0x7ff0000000000000), ERANGE);
225225

226+
// This test ensures that only the correct number of characters is accepted.
227+
// An exponent symbol followed by a sign isn't a valid exponent.
228+
run_test("2e+", 1, uint64_t(0x4000000000000000));
229+
run_test("0x2p+", 3, uint64_t(0x4000000000000000));
230+
226231
// This bug was in the handling of very large exponents in the exponent
227232
// marker. Previously anything greater than 10,000 would be set to 10,000.
228233
// This caused incorrect behavior if there were more than 10,000 '0's in the

‎libc/test/src/stdlib/strtold_test.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,16 @@ TEST_F(LlvmLibcStrToLDTest, Float64SpecificFailures) {
147147
UInt128(0x8803000000000000)));
148148
}
149149

150+
TEST_F(LlvmLibcStrToLDTest, Float80SpecificFailures) {
151+
run_test("7777777777777777777777777777777777777777777777777777777777777777777"
152+
"777777777777777777777777777777777",
153+
100,
154+
SELECT_CONST(uint64_t(0x54ac729b8fcaf734),
155+
(UInt128(0x414ae394dc) << 40) + UInt128(0x7e57b9a0c2),
156+
(UInt128(0x414ac729b8fcaf73) << 64) +
157+
UInt128(0x4184a3d793224129)));
158+
}
159+
150160
TEST_F(LlvmLibcStrToLDTest, MaxSizeNumbers) {
151161
run_test("1.1897314953572317650e4932", 26,
152162
SELECT_CONST(uint64_t(0x7FF0000000000000),

‎libc/utils/MPFRWrapper/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ if(LIBC_TESTS_CAN_USE_MPFR)
22
add_library(libcMPFRWrapper
33
MPFRUtils.cpp
44
MPFRUtils.h
5+
mpfr_inc.h
56
)
67
add_compile_options(
78
-O3

‎libc/utils/MPFRWrapper/MPFRUtils.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,7 @@
1919
#include <memory>
2020
#include <stdint.h>
2121

22-
#ifdef CUSTOM_MPFR_INCLUDER
23-
// Some downstream repos are monoliths carrying MPFR sources in their third
24-
// party directory. In such repos, including the MPFR header as
25-
// `#include <mpfr.h>` is either disallowed or not possible. If that is the
26-
// case, a file named `CustomMPFRIncluder.h` should be added through which the
27-
// MPFR header can be included in manner allowed in that repo.
28-
#include "CustomMPFRIncluder.h"
29-
#else
30-
#include <mpfr.h>
31-
#endif
22+
#include "mpfr_inc.h"
3223

3324
template <typename T> using FPBits = __llvm_libc::fputil::FPBits<T>;
3425

‎libc/utils/MPFRWrapper/mpfr_inc.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===-- MPFRUtils.h ---------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_UTILS_MPFRWRAPPER_MPFR_INC_H
10+
#define LLVM_LIBC_UTILS_MPFRWRAPPER_MPFR_INC_H
11+
12+
#ifdef CUSTOM_MPFR_INCLUDER
13+
// Some downstream repos are monoliths carrying MPFR sources in their third
14+
// party directory. In such repos, including the MPFR header as
15+
// `#include <mpfr.h>` is either disallowed or not possible. If that is the
16+
// case, a file named `CustomMPFRIncluder.h` should be added through which the
17+
// MPFR header can be included in manner allowed in that repo.
18+
#include "CustomMPFRIncluder.h"
19+
#else
20+
#include <mpfr.h>
21+
#endif
22+
23+
#endif // LLVM_LIBC_UTILS_MPFRWRAPPER_MPFR_INC_H

‎utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ licenses(["notice"])
1010

1111
cc_library(
1212
name = "mpfr_impl",
13+
hdrs = ["mpfr_inc.h"],
1314
target_compatible_with = select({
1415
"//conditions:default": [],
1516
"//libc:mpfr_disable": ["@platforms//:incompatible"],

0 commit comments

Comments
 (0)
Please sign in to comment.