[cfg_match] Library edition

rust-lang · Jan 29, 2025 · 836b137 · 836b137
1 parent 7d9fe91
commit 836b137
Show file tree

Hide file tree

Showing 8 changed files with 247 additions and 0 deletions.
diff --git a/library/core/src/ffi/mod.rs b/library/core/src/ffi/mod.rs
@@ -90,6 +90,7 @@ pub type c_ptrdiff_t = isize;
 pub type c_ssize_t = isize;
 
 mod c_char_definition {
+    #[cfg(bootstrap)]
     cfg_if! {
         // These are the targets on which c_char is unsigned. Usually the
         // signedness is the same for all target_os values on a given architecture
@@ -178,9 +179,101 @@ mod c_char_definition {
             pub type c_char = i8;
         }
     }
+    #[cfg(not(bootstrap))]
+    crate::cfg_match! {
+        // These are the targets on which c_char is unsigned. Usually the
+        // signedness is the same for all target_os values on a given architecture
+        // but there are some exceptions (see isSignedCharDefault() in clang).
+        //
+        // aarch64:
+        //   Section 10 "Arm C and C++ language mappings" in Procedure Call Standard for the Arm®
+        //   64-bit Architecture (AArch64) says C/C++ char is unsigned byte.
+        //   https://github.com/ARM-software/abi-aa/blob/2024Q3/aapcs64/aapcs64.rst#arm-c-and-c-language-mappings
+        // arm:
+        //   Section 8 "Arm C and C++ Language Mappings" in Procedure Call Standard for the Arm®
+        //   Architecture says C/C++ char is unsigned byte.
+        //   https://github.com/ARM-software/abi-aa/blob/2024Q3/aapcs32/aapcs32.rst#arm-c-and-c-language-mappings
+        // csky:
+        //   Section 2.1.2 "Primary Data Type" in C-SKY V2 CPU Applications Binary Interface
+        //   Standards Manual says ANSI C char is unsigned byte.
+        //   https://github.com/c-sky/csky-doc/blob/9f7121f7d40970ba5cc0f15716da033db2bb9d07/C-SKY_V2_CPU_Applications_Binary_Interface_Standards_Manual.pdf
+        //   Note: this doesn't seem to match Clang's default (https://github.com/rust-lang/rust/issues/129945).
+        // hexagon:
+        //   Section 3.1 "Basic data type" in Qualcomm Hexagon™ Application
+        //   Binary Interface User Guide says "By default, the `char` data type is unsigned."
+        //   https://docs.qualcomm.com/bundle/publicresource/80-N2040-23_REV_K_Qualcomm_Hexagon_Application_Binary_Interface_User_Guide.pdf
+        // msp430:
+        //   Section 2.1 "Basic Types" in MSP430 Embedded Application Binary
+        //   Interface says "The char type is unsigned by default".
+        //   https://www.ti.com/lit/an/slaa534a/slaa534a.pdf
+        //   Note: this doesn't seem to match Clang's default (https://github.com/rust-lang/rust/issues/129945).
+        // powerpc/powerpc64:
+        //   - PPC32 SysV: "Table 3-1 Scalar Types" in System V Application Binary Interface PowerPC
+        //     Processor Supplement says ANSI C char is unsigned byte
+        //     https://refspecs.linuxfoundation.org/elf/elfspec_ppc.pdf
+        //   - PPC64 ELFv1: Section 3.1.4 "Fundamental Types" in 64-bit PowerPC ELF Application
+        //     Binary Interface Supplement 1.9 says ANSI C is unsigned byte
+        //     https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUND-TYPE
+        //   - PPC64 ELFv2: Section 2.1.2.2 "Fundamental Types" in 64-Bit ELF V2 ABI Specification
+        //     says char is unsigned byte
+        //     https://openpowerfoundation.org/specifications/64bitelfabi/
+        //   - AIX: XL C for AIX Language Reference says "By default, char behaves like an unsigned char."
+        //     https://www.ibm.com/docs/en/xl-c-aix/13.1.3?topic=specifiers-character-types
+        // riscv32/riscv64:
+        //   C/C++ type representations section in RISC-V Calling Conventions
+        //   page in RISC-V ELF psABI Document says "char is unsigned."
+        //   https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/draft-20240829-13bfa9f54634cb60d86b9b333e109f077805b4b3/riscv-cc.adoc#cc-type-representations
+        // s390x:
+        //   - ELF: "Table 1.1.: Scalar types" in ELF Application Binary Interface s390x Supplement
+        //     Version 1.6.1 categorize ISO C char in unsigned integer
+        //     https://github.com/IBM/s390x-abi/releases/tag/v1.6.1
+        //   - z/OS: XL C/C++ Language Reference says: "By default, char behaves like an unsigned char."
+        //     https://www.ibm.com/docs/en/zos/3.1.0?topic=specifiers-character-types
+        // Xtensa:
+        //   - "The char type is unsigned by default for Xtensa processors."
+        //
+        // On the following operating systems, c_char is signed by default, regardless of architecture.
+        // Darwin (macOS, iOS, etc.):
+        //   Apple targets' c_char is signed by default even on arm
+        //   https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Handle-data-types-and-data-alignment-properly
+        // Windows:
+        //   Windows MSVC C++ Language Reference says "Microsoft-specific: Variables of type char
+        //   are promoted to int as if from type signed char by default, unless the /J compilation
+        //   option is used."
+        //   https://learn.microsoft.com/en-us/cpp/cpp/fundamental-types-cpp?view=msvc-170#character-types)
+        // L4RE:
+        //   The kernel builds with -funsigned-char on all targets (but useserspace follows the
+        //   architecture defaults). As we only have a target for userspace apps so there are no
+        //   special cases for L4RE below.
+        all(
+            not(windows),
+            not(target_vendor = "apple"),
+            any(
+                target_arch = "aarch64",
+                target_arch = "arm",
+                target_arch = "csky",
+                target_arch = "hexagon",
+                target_arch = "msp430",
+                target_arch = "powerpc",
+                target_arch = "powerpc64",
+                target_arch = "riscv64",
+                target_arch = "riscv32",
+                target_arch = "s390x",
+                target_arch = "xtensa",
+            )
+        ) => {
+            pub type c_char = u8;
+        }
+        _ => {
+            // On every other target, c_char is signed.
+            pub type c_char = i8;
+        }
+    }
 }
 
+
 mod c_int_definition {
+    #[cfg(bootstrap)]
     cfg_if! {
         if #[cfg(any(target_arch = "avr", target_arch = "msp430"))] {
             pub type c_int = i16;
@@ -190,9 +283,21 @@ mod c_int_definition {
             pub type c_uint = u32;
         }
     }
+    #[cfg(not(bootstrap))]
+    crate::cfg_match! {
+        any(target_arch = "avr", target_arch = "msp430") => {
+            pub type c_int = i16;
+            pub type c_uint = u16;
+        }
+        _ => {
+            pub type c_int = i32;
+            pub type c_uint = u32;
+        }
+    }
 }
 
 mod c_long_definition {
+    #[cfg(bootstrap)]
     cfg_if! {
         if #[cfg(all(target_pointer_width = "64", not(windows)))] {
             pub type c_long = i64;
@@ -203,6 +308,18 @@ mod c_long_definition {
             pub type c_ulong = u32;
         }
     }
+    #[cfg(not(bootstrap))]
+    crate::cfg_match! {
+        all(target_pointer_width = "64", not(windows)) => {
+            pub type c_long = i64;
+            pub type c_ulong = u64;
+        }
+        _ => {
+            // The minimal size of `long` in the C standard is 32 bits
+            pub type c_long = i32;
+            pub type c_ulong = u32;
+        }
+    }
 }
 
 // N.B., for LLVM to recognize the void pointer type and by extension

diff --git a/library/core/src/internal_macros.rs b/library/core/src/internal_macros.rs
@@ -146,6 +146,7 @@ macro_rules! impl_fn_for_zst {
 /// ```
 // This is a copy of `cfg_if!` from the `cfg_if` crate.
 // The recursive invocations should use $crate if this is ever exported.
+#[cfg(bootstrap)]
 macro_rules! cfg_if {
     // match if/else chains with a final `else`
     (

diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
@@ -110,6 +110,7 @@
 #![feature(array_ptr_get)]
 #![feature(asm_experimental_arch)]
 #![feature(bigint_helper_methods)]
+#![feature(cfg_match)]
 #![feature(const_carrying_mul_add)]
 #![feature(const_eval_select)]
 #![feature(core_intrinsics)]

diff --git a/library/core/src/num/f32.rs b/library/core/src/num/f32.rs
@@ -991,6 +991,7 @@ impl f32 {
     #[stable(feature = "num_midpoint", since = "1.85.0")]
     #[rustc_const_stable(feature = "num_midpoint", since = "1.85.0")]
     pub const fn midpoint(self, other: f32) -> f32 {
+        #[cfg(bootstrap)]
         cfg_if! {
             // Allow faster implementation that have known good 64-bit float
             // implementations. Falling back to the branchy code on targets that don't
@@ -1013,6 +1014,45 @@ impl f32 {
                 let abs_a = a.abs();
                 let abs_b = b.abs();
 
+                if abs_a <= HI && abs_b <= HI {
+                    // Overflow is impossible
+                    (a + b) / 2.
+                } else if abs_a < LO {
+                    // Not safe to halve `a` (would underflow)
+                    a + (b / 2.)
+                } else if abs_b < LO {
+                    // Not safe to halve `b` (would underflow)
+                    (a / 2.) + b
+                } else {
+                    // Safe to halve `a` and `b`
+                    (a / 2.) + (b / 2.)
+                }
+            }
+        }
+        #[cfg(not(bootstrap))]
+        crate::cfg_match! {
+            // Allow faster implementation that have known good 64-bit float
+            // implementations. Falling back to the branchy code on targets that don't
+            // have 64-bit hardware floats or buggy implementations.
+            // https://github.com/rust-lang/rust/pull/121062#issuecomment-2123408114
+            any(
+                target_arch = "x86_64",
+                target_arch = "aarch64",
+                all(any(target_arch = "riscv32", target_arch = "riscv64"), target_feature = "d"),
+                all(target_arch = "arm", target_feature = "vfp2"),
+                target_arch = "wasm32",
+                target_arch = "wasm64",
+            ) => {
+                ((self as f64 + other as f64) / 2.0) as f32
+            }
+            _ => {
+                const LO: f32 = f32::MIN_POSITIVE * 2.;
+                const HI: f32 = f32::MAX / 2.;
+
+                let (a, b) = (self, other);
+                let abs_a = a.abs();
+                let abs_b = b.abs();
+
                 if abs_a <= HI && abs_b <= HI {
                     // Overflow is impossible
                     (a + b) / 2.

diff --git a/library/core/src/slice/sort/select.rs b/library/core/src/slice/sort/select.rs
@@ -41,13 +41,23 @@ where
         let min_idx = min_index(v, &mut is_less).unwrap();
         v.swap(min_idx, index);
     } else {
+        #[cfg(bootstrap)]
         cfg_if! {
             if #[cfg(feature = "optimize_for_size")] {
                 median_of_medians(v, &mut is_less, index);
             } else {
                 partition_at_index_loop(v, index, None, &mut is_less);
             }
         }
+        #[cfg(not(bootstrap))]
+        crate::cfg_match! {
+            feature = "optimize_for_size" => {
+                median_of_medians(v, &mut is_less, index);
+            }
+            _ => {
+                partition_at_index_loop(v, index, None, &mut is_less);
+            }
+        }
     }
 
     let (left, right) = v.split_at_mut(index);

diff --git a/library/core/src/slice/sort/stable/mod.rs b/library/core/src/slice/sort/stable/mod.rs
@@ -39,6 +39,7 @@ pub fn sort<T, F: FnMut(&T, &T) -> bool, BufT: BufGuard<T>>(v: &mut [T], is_less
         return;
     }
 
+    #[cfg(bootstrap)]
     cfg_if! {
         if #[cfg(any(feature = "optimize_for_size", target_pointer_width = "16"))] {
             let alloc_len = len / 2;
@@ -79,6 +80,50 @@ pub fn sort<T, F: FnMut(&T, &T) -> bool, BufT: BufGuard<T>>(v: &mut [T], is_less
             driftsort_main::<T, F, BufT>(v, is_less);
         }
     }
+
+    #[cfg(not(bootstrap))]
+    crate::cfg_match! {
+        any(feature = "optimize_for_size", target_pointer_width = "16") => {
+            let alloc_len = len / 2;
+
+            crate::cfg_match! {
+                target_pointer_width = "16" => {
+                    let mut heap_buf = BufT::with_capacity(alloc_len);
+                    let scratch = heap_buf.as_uninit_slice_mut();
+                }
+                _ => {
+                    // For small inputs 4KiB of stack storage suffices, which allows us to avoid
+                    // calling the (de-)allocator. Benchmarks showed this was quite beneficial.
+                    let mut stack_buf = AlignedStorage::<T, 4096>::new();
+                    let stack_scratch = stack_buf.as_uninit_slice_mut();
+                    let mut heap_buf;
+                    let scratch = if stack_scratch.len() >= alloc_len {
+                        stack_scratch
+                    } else {
+                        heap_buf = BufT::with_capacity(alloc_len);
+                        heap_buf.as_uninit_slice_mut()
+                    };
+                }
+            }
+
+            tiny::mergesort(v, scratch, is_less);
+        }
+        _ => {
+            // More advanced sorting methods than insertion sort are faster if called in
+            // a hot loop for small inputs, but for general-purpose code the small
+            // binary size of insertion sort is more important. The instruction cache in
+            // modern processors is very valuable, and for a single sort call in general
+            // purpose code any gains from an advanced method are cancelled by i-cache
+            // misses during the sort, and thrashing the i-cache for surrounding code.
+            const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20;
+            if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) {
+                insertion_sort_shift_left(v, 1, is_less);
+                return;
+            }
+
+            driftsort_main::<T, F, BufT>(v, is_less);
+        }
+    }
 }
 
 /// See [`sort`]

diff --git a/library/core/src/slice/sort/unstable/mod.rs b/library/core/src/slice/sort/unstable/mod.rs
@@ -30,6 +30,7 @@ pub fn sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
         return;
     }
 
+    #[cfg(bootstrap)]
     cfg_if! {
         if #[cfg(any(feature = "optimize_for_size", target_pointer_width = "16"))] {
             heapsort::heapsort(v, is_less);
@@ -49,6 +50,28 @@ pub fn sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
             ipnsort(v, is_less);
         }
     }
+
+    #[cfg(not(bootstrap))]
+    crate::cfg_match! {
+        any(feature = "optimize_for_size", target_pointer_width = "16") => {
+            heapsort::heapsort(v, is_less);
+        }
+        _ => {
+            // More advanced sorting methods than insertion sort are faster if called in
+            // a hot loop for small inputs, but for general-purpose code the small
+            // binary size of insertion sort is more important. The instruction cache in
+            // modern processors is very valuable, and for a single sort call in general
+            // purpose code any gains from an advanced method are cancelled by i-cache
+            // misses during the sort, and thrashing the i-cache for surrounding code.
+            const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20;
+            if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) {
+                insertion_sort_shift_left(v, 1, is_less);
+                return;
+            }
+
+            ipnsort(v, is_less);
+        }
+    }
 }
 
 /// See [`sort`]

diff --git a/library/core/src/slice/sort/unstable/quicksort.rs b/library/core/src/slice/sort/unstable/quicksort.rs
@@ -140,13 +140,23 @@ const fn inst_partition<T, F: FnMut(&T, &T) -> bool>() -> fn(&mut [T], &T, &mut
     if mem::size_of::<T>() <= MAX_BRANCHLESS_PARTITION_SIZE {
         // Specialize for types that are relatively cheap to copy, where branchless optimizations
         // have large leverage e.g. `u64` and `String`.
+        #[cfg(bootstrap)]
         cfg_if! {
             if #[cfg(feature = "optimize_for_size")] {
                 partition_lomuto_branchless_simple::<T, F>
             } else {
                 partition_lomuto_branchless_cyclic::<T, F>
             }
         }
+        #[cfg(not(bootstrap))]
+        crate::cfg_match! {
+            feature = "optimize_for_size" => {
+                partition_lomuto_branchless_simple::<T, F>
+            }
+            _ => {
+                partition_lomuto_branchless_cyclic::<T, F>
+            }
+        }
     } else {
         partition_hoare_branchy_cyclic::<T, F>
     }