-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SelectionDAG][RISCV] Treat zext nneg as sext in PromoteIntOp_ZERO_EXTEND if the promoted input is sign extended. #145120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…TEND if the promoted input is sign extended. If the zext has the nneg flag and we can prove the promoted input is sign extended, we can avoid generating an AND that we might not be able to remove. RISC-V emits a lot of sext_inreg operations during i32->i64 promotion that makes this likely. I've restricted this to the case where the promoted type is the same as the result type so we don't need to create an additional extend. I've also restricted it to cases where the target has stated a preference for sext like i32->i64 on RV64. This is largely to avoid wasting time in computeNumSignBits until we have a test case that benefits.
@llvm/pr-subscribers-llvm-selectiondag Author: Craig Topper (topperc) ChangesIf the zext has the nneg flag and we can prove the promoted input I've restricted this to the case where the promoted type is the same I've also restricted it to cases where the target has stated a Full diff: https://github.com/llvm/llvm-project/pull/145120.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index dd64676222055..dd0412460f4e1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2605,9 +2605,22 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
SDLoc dl(N);
- SDValue Op = GetPromotedInteger(N->getOperand(0));
- Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
- return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+ SDValue Src = N->getOperand(0);
+ SDValue Op = GetPromotedInteger(Src);
+ EVT VT = N->getValueType(0);
+
+ // If this zext has the nneg flag and the target prefers sext, see if the
+ // promoted input is already sign extended.
+ // TODO: Should we have some way to set nneg on ISD::AND instead?
+ if (N->getFlags().hasNonNeg() && Op.getValueType() == VT &&
+ TLI.isSExtCheaperThanZExt(Src.getValueType(), VT)) {
+ unsigned OpEffectiveBits = DAG.ComputeMaxSignificantBits(Op);
+ if (OpEffectiveBits <= Src.getScalarValueSizeInBits())
+ return Op;
+ }
+
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op);
+ return DAG.getZeroExtendInReg(Op, dl, Src.getValueType());
}
SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) {
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 32a037918a5a7..7ca1ee1cba2f8 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -779,3 +779,78 @@ define i128 @shl128_shamt32(i128 %a, i32 signext %b) nounwind {
%1 = shl i128 %a, %zext
ret i128 %1
}
+
+; Do some arithmetic on the i32 shift amount before the zext nneg. This
+; arithmetic will be promoted using a W instruction RV64. Make sure we can use
+; this to avoid an unncessary zext of the shift amount.
+define i128 @shl128_shamt32_arith(i128 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: shl128_shamt32_arith:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: addi a2, a2, 1
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
+; RV32I-NEXT: addi a6, sp, 16
+; RV32I-NEXT: srli a7, a2, 3
+; RV32I-NEXT: andi t0, a2, 31
+; RV32I-NEXT: andi a7, a7, 12
+; RV32I-NEXT: sub a6, a6, a7
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a5, 24(sp)
+; RV32I-NEXT: sw a1, 28(sp)
+; RV32I-NEXT: lw a1, 0(a6)
+; RV32I-NEXT: lw a3, 4(a6)
+; RV32I-NEXT: lw a4, 8(a6)
+; RV32I-NEXT: lw a5, 12(a6)
+; RV32I-NEXT: xori a6, t0, 31
+; RV32I-NEXT: sll a7, a3, a2
+; RV32I-NEXT: srli t0, a1, 1
+; RV32I-NEXT: sll a5, a5, a2
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: sll a2, a4, a2
+; RV32I-NEXT: srli a3, a3, 1
+; RV32I-NEXT: srli a4, a4, 1
+; RV32I-NEXT: srl t0, t0, a6
+; RV32I-NEXT: srl a3, a3, a6
+; RV32I-NEXT: srl a4, a4, a6
+; RV32I-NEXT: or a6, a7, t0
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a6, 4(a0)
+; RV32I-NEXT: sw a2, 8(a0)
+; RV32I-NEXT: sw a4, 12(a0)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: shl128_shamt32_arith:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addiw a4, a2, 1
+; RV64I-NEXT: addi a3, a4, -64
+; RV64I-NEXT: sll a2, a0, a4
+; RV64I-NEXT: bltz a3, .LBB17_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a2
+; RV64I-NEXT: j .LBB17_3
+; RV64I-NEXT: .LBB17_2:
+; RV64I-NEXT: sll a1, a1, a4
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: not a4, a4
+; RV64I-NEXT: srl a0, a0, a4
+; RV64I-NEXT: or a1, a1, a0
+; RV64I-NEXT: .LBB17_3:
+; RV64I-NEXT: srai a0, a3, 63
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: ret
+ %c = add i32 %b, 1
+ %zext = zext nneg i32 %c to i128
+ %1 = shl i128 %a, %zext
+ ret i128 %1
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
If the zext has the nneg flag and we can prove the promoted input
is sign extended, we can avoid generating an AND that we might not
be able to remove. RISC-V emits a lot of sext_inreg operations during
i32->i64 promotion that makes this likely.
I've restricted this to the case where the promoted type is the same
as the result type so we don't need to create an additional extend.
I've also restricted it to cases where the target has stated a
preference for sext like i32->i64 on RV64. This is largely to avoid
wasting time in computeNumSignBits until we have a test case that
benefits.