add a hack to work around cranelift ABI oddities

rust-lang · Jan 12, 2025 · 2789b51 · 2789b51
1 parent d7b63a3
commit 2789b51
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 15 deletions.
diff --git a/compiler/rustc_target/src/callconv/mod.rs b/compiler/rustc_target/src/callconv/mod.rs
@@ -776,17 +776,18 @@ impl<'a, Ty> FnAbi<'a, Ty> {
 
             if arg_idx.is_none()
                 && arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2
-                && arg.layout.is_aggregate()
+                && !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
             {
-                // Return aggregate values larger than 2 registers using a return area
+                // Return values larger than 2 registers using a return area
                 // pointer. LLVM and Cranelift disagree about how to return
                 // values that don't fit in the registers designated for return
                 // values. LLVM will force the entire return value to be passed
                 // by return area pointer, while Cranelift will look at each IR level
                 // return value independently and decide to pass it in a
                 // register or not, which would result in the return value
                 // being passed partially in registers and partially through a
-                // return area pointer.
+                // return area pointer. For large IR-level values such as `i128`,
+                // cranelift will even split up the value into smaller chunks.
                 //
                 // While Cranelift may need to be fixed as the LLVM behavior is
                 // generally more correct with respect to the surface language,
@@ -816,6 +817,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {
                 // rustc_target already ensure any return value which doesn't
                 // fit in the available amount of return registers is passed in
                 // the right way for the current target.
+                // The adjustment is also not necessary nor desired for types with
+                // a vector representation; those are handled below.
                 arg.make_indirect();
                 continue;
             }

diff --git a/compiler/rustc_target/src/callconv/x86.rs b/compiler/rustc_target/src/callconv/x86.rs
@@ -231,14 +231,17 @@ where
             BackendRepr::ScalarPair(s1, s2) => {
                 matches!(s1.primitive(), Float(_)) || matches!(s2.primitive(), Float(_))
             }
-            _ => false, // anyway not passed via registers on x86
+            _ => false, // anyway not returned via registers on x86
         };
         if has_float {
             if cx.target_spec().rust_abi == Some(RustAbi::X86Sse2)
                 && fn_abi.ret.layout.backend_repr.is_scalar()
-                && fn_abi.ret.layout.size.bits() <= 128
+                && fn_abi.ret.layout.size.bits() <= 64
             {
                 // This is a single scalar that fits into an SSE register.
+                // FIXME: We cannot return 128-bit-floats this way since scalars larger than
+                // 64bit must be returned indirectly to make cranelift happy. See the comment
+                // in `adjust_for_rust_abi`.
                 fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
             } else if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) {
                 // Same size or smaller than pointer, return in an integer register.

diff --git a/tests/assembly/x86-return-float.rs b/tests/assembly/x86-return-float.rs
@@ -323,16 +323,15 @@ pub fn return_f16(x: f16) -> f16 {
 #[no_mangle]
 pub fn return_f128(x: f128) -> f128 {
     // CHECK: pushl %ebp
-    // sse: movaps [[#%d,OFFSET:]](%ebp), %xmm0
-    // nosse: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
-    // nosse-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
-    // nosse-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
-    // nosse-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
-    // nosse-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
-    // nosse-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
-    // nosse-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
-    // nosse-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
-    // nosse-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
+    // CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
     // CHECK: popl %ebp
     // CHECK: retl
     x