diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h index 830eed5d60ee4..7d896c44f4679 100644 --- a/llvm/include/llvm/Analysis/MemoryLocation.h +++ b/llvm/include/llvm/Analysis/MemoryLocation.h @@ -297,13 +297,6 @@ class MemoryLocation { return MemoryLocation(Ptr, LocationSize::beforeOrAfterPointer(), AATags); } - // Return the exact size if the exact size is known at compiletime, - // otherwise return LocationSize::beforeOrAfterPointer(). - static LocationSize getSizeOrUnknown(const TypeSize &T) { - return T.isScalable() ? LocationSize::beforeOrAfterPointer() - : LocationSize::precise(T.getFixedValue()); - } - MemoryLocation() : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()) {} explicit MemoryLocation(const Value *Ptr, LocationSize Size, diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index dfbf7a1e7aae5..c2bff27944939 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -1058,8 +1058,9 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { int64_t Offset, LocationSize Size) { return getMachineMemOperand( MMO, Offset, - !Size.hasValue() || Size.isScalable() - ? LLT() + !Size.hasValue() ? LLT() + : Size.isScalable() + ? LLT::scalable_vector(1, 8 * Size.getValue().getKnownMinValue()) : LLT::scalar(8 * Size.getValue().getKnownMinValue())); } MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index 9fc8ecd60b03f..fb9656c09ca39 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -128,14 +128,14 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, // vector objects on the stack. // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the // following situations arise: - if (PtrDiff >= 0 && Size1.hasValue()) { + if (PtrDiff >= 0 && Size1.hasValue() && !Size1.isScalable()) { // [----BasePtr0----] // [---BasePtr1--] // ========PtrDiff========> IsAlias = !((int64_t)Size1.getValue() <= PtrDiff); return true; } - if (PtrDiff < 0 && Size2.hasValue()) { + if (PtrDiff < 0 && Size2.hasValue() && !Size2.isScalable()) { // [----BasePtr0----] // [---BasePtr1--] // =====(-PtrDiff)====> @@ -248,10 +248,20 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI, return false; } + // If NumBytes is scalable and offset is not 0, conservatively return may + // alias + if ((MUC0.NumBytes.isScalable() && MUC0.Offset != 0) || + (MUC1.NumBytes.isScalable() && MUC1.Offset != 0)) + return true; + + const bool BothNotScalable = + !MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable(); + // Try to prove that there is aliasing, or that there is no aliasing. Either // way, we can return now. If nothing can be proved, proceed with more tests. bool IsAlias; - if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) + if (BothNotScalable && + GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) return IsAlias; // The following all rely on MMO0 and MMO1 being valid. @@ -267,12 +277,18 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI, Size1.hasValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset; - int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset; - if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0, - MUC0.MMO->getAAInfo()), - MemoryLocation(MUC1.MMO->getValue(), Overlap1, - MUC1.MMO->getAAInfo()))) + int64_t Overlap0 = + Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset; + int64_t Overlap1 = + Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset; + LocationSize Loc0 = + Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0); + LocationSize Loc1 = + Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1); + + if (AA->isNoAlias( + MemoryLocation(MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()), + MemoryLocation(MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo()))) return false; } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index fe2f9ccd33a33..8102bb971ba66 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1306,6 +1306,7 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, LocationSize WidthB = MMOb->getSize(); bool KnownWidthA = WidthA.hasValue(); bool KnownWidthB = WidthB.hasValue(); + bool BothMMONonScalable = !WidthA.isScalable() && !WidthB.isScalable(); const Value *ValA = MMOa->getValue(); const Value *ValB = MMOb->getValue(); @@ -1321,12 +1322,14 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, SameVal = true; } - if (SameVal) { + if (SameVal && BothMMONonScalable) { if (!KnownWidthA || !KnownWidthB) return true; int64_t MaxOffset = std::max(OffsetA, OffsetB); - LocationSize LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; - return (MinOffset + (int)LowWidth.getValue() > MaxOffset); + int64_t LowWidth = (MinOffset == OffsetA) + ? WidthA.getValue().getKnownMinValue() + : WidthB.getValue().getKnownMinValue(); + return (MinOffset + LowWidth > MaxOffset); } if (!AA) @@ -1338,15 +1341,29 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA, assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); - int64_t OverlapA = KnownWidthA ? WidthA.getValue() + OffsetA - MinOffset - : MemoryLocation::UnknownSize; - int64_t OverlapB = KnownWidthB ? WidthB.getValue() + OffsetB - MinOffset - : MemoryLocation::UnknownSize; + // If Scalable Location Size has non-zero offset, Width + Offset does not work + // at the moment + if ((WidthA.isScalable() && OffsetA > 0) || + (WidthB.isScalable() && OffsetB > 0)) + return true; + + int64_t OverlapA = + KnownWidthA ? WidthA.getValue().getKnownMinValue() + OffsetA - MinOffset + : MemoryLocation::UnknownSize; + int64_t OverlapB = + KnownWidthB ? WidthB.getValue().getKnownMinValue() + OffsetB - MinOffset + : MemoryLocation::UnknownSize; + + LocationSize LocA = (WidthA.isScalable() || !KnownWidthA) + ? WidthA + : LocationSize::precise(OverlapA); + LocationSize LocB = (WidthB.isScalable() || !KnownWidthB) + ? WidthB + : LocationSize::precise(OverlapB); return !AA->isNoAlias( - MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(ValB, OverlapB, - UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + MemoryLocation(ValA, LocA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValB, LocB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); } bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 937ca539513af..ace05902d5df7 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1107,12 +1107,13 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags F, const MDNode *Ranges, SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) - : MachineMemOperand(ptrinfo, F, - !TS.hasValue() || TS.isScalable() - ? LLT() - : LLT::scalar(8 * TS.getValue().getKnownMinValue()), - BaseAlignment, AAInfo, Ranges, SSID, Ordering, - FailureOrdering) {} + : MachineMemOperand( + ptrinfo, F, + !TS.hasValue() ? LLT() + : TS.isScalable() + ? LLT::scalable_vector(1, 8 * TS.getValue().getKnownMinValue()) + : LLT::scalar(8 * TS.getValue().getKnownMinValue()), + BaseAlignment, AAInfo, Ranges, SSID, Ordering, FailureOrdering) {} void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { // The Value and Offset may differ due to CSE. But the flags and size diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d6d0bf02c6397..d17040d66b811 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24181,7 +24181,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { // TODO: Use "BaseIndexOffset" to make this more effective. SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL); - LocationSize StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize()); + LocationSize StoreSize = LocationSize::precise(VT.getStoreSize()); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO; if (Offset.isScalable()) { @@ -27826,14 +27826,10 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; - LocationSize Size = - MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize()); - return {LSN->isVolatile(), - LSN->isAtomic(), - LSN->getBasePtr(), - Offset /*base offset*/, - Size, - LSN->getMemOperand()}; + TypeSize Size = LSN->getMemoryVT().getStoreSize(); + return {LSN->isVolatile(), LSN->isAtomic(), + LSN->getBasePtr(), Offset /*base offset*/, + LocationSize::precise(Size), LSN->getMemOperand()}; } if (const auto *LN = cast(N)) return {false /*isVolatile*/, @@ -27875,6 +27871,13 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { return false; } + // If NumBytes is scalable and offset is not 0, conservatively return may + // alias + if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() && + MUC0.Offset != 0) || + (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() && + MUC1.Offset != 0)) + return true; // Try to prove that there is aliasing, or that there is no aliasing. Either // way, we can return now. If nothing can be proved, proceed with more tests. bool IsAlias; @@ -27905,18 +27908,22 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { Align OrigAlignment1 = MUC1.MMO->getBaseAlign(); LocationSize Size0 = MUC0.NumBytes; LocationSize Size1 = MUC1.NumBytes; + if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - Size0.hasValue() && Size1.hasValue() && Size0 == Size1 && - OrigAlignment0 > Size0.getValue() && - SrcValOffset0 % Size0.getValue() == 0 && - SrcValOffset1 % Size1.getValue() == 0) { + Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() && + !Size1.isScalable() && Size0 == Size1 && + OrigAlignment0 > Size0.getValue().getKnownMinValue() && + SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 && + SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); // There is no overlap between these relatively aligned accesses of // similar size. Return no alias. - if ((OffAlign0 + (int64_t)Size0.getValue()) <= OffAlign1 || - (OffAlign1 + (int64_t)Size1.getValue()) <= OffAlign0) + if ((OffAlign0 + static_cast( + Size0.getValue().getKnownMinValue())) <= OffAlign1 || + (OffAlign1 + static_cast( + Size1.getValue().getKnownMinValue())) <= OffAlign0) return false; } @@ -27933,12 +27940,18 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { Size0.hasValue() && Size1.hasValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset; - int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset; + int64_t Overlap0 = + Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset; + int64_t Overlap1 = + Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset; + LocationSize Loc0 = + Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0); + LocationSize Loc1 = + Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1); if (AA->isNoAlias( - MemoryLocation(MUC0.MMO->getValue(), Overlap0, + MemoryLocation(MUC0.MMO->getValue(), Loc0, UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), - MemoryLocation(MUC1.MMO->getValue(), Overlap1, + MemoryLocation(MUC1.MMO->getValue(), Loc1, UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()))) return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6f6ed4bd45027..f1edc7976cf4d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8404,9 +8404,7 @@ SDValue SelectionDAG::getMemIntrinsicNode( EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags, LocationSize Size, const AAMDNodes &AAInfo) { - if (Size.hasValue() && MemVT.isScalableVector()) - Size = LocationSize::beforeOrAfterPointer(); - else if (Size.hasValue() && !Size.getValue()) + if (Size.hasValue() && !Size.getValue()) Size = LocationSize::precise(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); @@ -8569,7 +8567,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); - LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); + LocationSize Size = LocationSize::precise(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges); @@ -8690,8 +8688,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); - LocationSize Size = - MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); + LocationSize Size = LocationSize::precise(Val.getValueType().getStoreSize()); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); @@ -8744,8 +8741,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), - Alignment, AAInfo); + PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment, + AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -8839,7 +8836,7 @@ SDValue SelectionDAG::getLoadVP( if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); - LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); + LocationSize Size = LocationSize::precise(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges); @@ -8992,8 +8989,8 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), - Alignment, AAInfo); + PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment, + AAInfo); return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO, IsCompressing); } @@ -11728,10 +11725,9 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, // We check here that the size of the memory operand fits within the size of // the MMO. This is because the MMO might indicate only a possible address // range instead of specifying the affected memory addresses precisely. - // TODO: Make MachineMemOperands aware of scalable vectors. assert( (!MMO->getType().isValid() || - memvt.getStoreSize().getKnownMinValue() <= MMO->getSize().getValue()) && + TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) && "Size mismatch!"); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 9670c3ac8430e..f2ab88851b780 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -106,8 +106,6 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, int64_t PtrDiff; if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { // If the size of memory access is unknown, do not use it to analysis. - // One example of unknown size memory access is to load/store scalable - // vector objects on the stack. // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the // following situations arise: if (PtrDiff >= 0 && NumBytes0.hasValue() && !NumBytes0.isScalable()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8780f4b931484..f575ce37e8326 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4960,7 +4960,8 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOLoad, - LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(), Ranges); + LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(), + Ranges); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 02943b8a4ab15..d0c5e6b99e9ee 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2687,10 +2687,7 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( return false; // The maximum vscale is 16 under AArch64, return the maximal extent for the // vector. - Width = WidthN.isScalable() - ? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector / - AArch64::SVEBitsPerBlock - : WidthN.getKnownMinValue(); + Width = LocationSize::precise(WidthN); BaseOps.push_back(BaseOp); return true; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 105587f0810b8..ed988566143e4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10340,9 +10340,15 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { + MachineMemOperand *MMO = Load->getMemOperand(); + MachineFunction &MF = DAG.getMachineFunction(); + MMO = MF.getMachineMemOperand( + MMO, MMO->getPointerInfo(), + MMO->getMemoryType().isValid() + ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits()) + : MMO->getMemoryType()); SDValue NewLoad = - DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), - Load->getMemOperand()); + DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), MMO); SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); } @@ -10400,9 +10406,17 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, const auto [MinVLMAX, MaxVLMAX] = RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && - getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) + getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { + MachineMemOperand *MMO = Store->getMemOperand(); + MachineFunction &MF = DAG.getMachineFunction(); + MMO = MF.getMachineMemOperand( + MMO, MMO->getPointerInfo(), + MMO->getMemoryType().isValid() + ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits()) + : MMO->getMemoryType()); return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(), - Store->getMemOperand()); + MMO); + } SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll index 58299696e78fc..d4d803a91cfa1 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll @@ -3,7 +3,7 @@ define void @UphPNR(target("aarch64.svcount") %predcnt) { entry: ; CHECK: %0:ppr = COPY $p0 -; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) +; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store () into %ir.predcnt.addr) ; CHECK: %1:pnr_p8to15 = COPY %0 ; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_p8to15 */, %1 ; CHECK: RET_ReallyLR @@ -17,7 +17,7 @@ entry: define void @UpaPNR(target("aarch64.svcount") %predcnt) { entry: ; CHECK: %0:ppr = COPY $p0 -; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) +; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store () into %ir.predcnt.addr) ; CHECK: %1:pnr = COPY %0 ; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR */, %1 ; CHECK: RET_ReallyLR @@ -31,7 +31,7 @@ entry: define void @UplPNR(target("aarch64.svcount") %predcnt) { entry: ; CHECK: %0:ppr = COPY $p0 -; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) +; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store () into %ir.predcnt.addr) ; CHECK: %1:pnr_3b = COPY %0 ; CHECK: INLINEASM &"fadd z0.h, $0/m, z0.h, #0.5", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_3b */, %1 ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll index 9a4e01a29ecb6..7244ac949ab88 100644 --- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll +++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll @@ -14,12 +14,12 @@ define void @array_1D(ptr %addr) #0 { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #2, mul vl] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] -; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [sp] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -81,18 +81,18 @@ define void @array_2D(ptr %addr) #0 { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #5, mul vl] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #4, mul vl] -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl] -; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #2, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [sp, #5, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [sp, #4, mul vl] -; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] -; CHECK-NEXT: st1d { z5.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1d { z4.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [sp] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #5, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #4, mul vl] +; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #5, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [sp, #4, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1d { z4.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #6 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll index 7292d52aaf476..f03a6f018d34d 100644 --- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll +++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll @@ -13,12 +13,12 @@ define void @test(ptr %addr) #0 { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #2, mul vl] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] -; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [sp] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll index 1d025a2f776f8..1fe91c721f4dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll @@ -18,15 +18,15 @@ define void @test(ptr %addr) { ; CHECK-NEXT: add a2, a0, a1 ; CHECK-NEXT: vl1re64.v v8, (a2) ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: add a3, a0, a2 -; CHECK-NEXT: vl1re64.v v9, (a3) +; CHECK-NEXT: vl1re64.v v9, (a0) +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: vl1re64.v v10, (a0) ; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v9, (a0) ; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs1r.v v9, (a2) -; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vs1r.v v8, (a1) -; CHECK-NEXT: vs1r.v v10, (a0) +; CHECK-NEXT: vs1r.v v10, (a2) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll index 64031f8a93598..a9a680d54d589 100644 --- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll @@ -16,13 +16,13 @@ define @test(ptr %addr, i64 %vl) { ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; CHECK-NEXT: csrrs a2, vlenb, zero -; CHECK-NEXT: add a3, a0, a2 -; CHECK-NEXT: vl1re64.v v8, (a3) +; CHECK-NEXT: vl1re64.v v8, (a0) +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: vl1re64.v v9, (a0) ; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs1r.v v8, (a2) -; CHECK-NEXT: vs1r.v v9, (a0) +; CHECK-NEXT: vs1r.v v9, (a2) ; CHECK-NEXT: vl1re64.v v8, (a2) ; CHECK-NEXT: vl1re64.v v9, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll index 0544204cce792..52bd15742ef4b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll @@ -16,8 +16,8 @@ define void @vpmerge_vpload_store( %passthru, ptr %p, ) into %ir.p) ; CHECK-NEXT: PseudoRET %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -37,8 +37,8 @@ define void @vpselect_vpload_store( %passthru, ptr %p, ) into %ir.p) ; CHECK-NEXT: PseudoRET %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer