Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 22c77f2

Browse files
authoredAug 10, 2024··
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses that expect to be called only on instructions and basic blocks of the function they were original created for. When Polly outlined a parallel loop body into a separate function, it reused the same analyses seemed to work until new checks to be added in #101198. This patch creates new analyses for the subfunctions. GenDT, GenLI, and GenSE now refer to the analyses of the current region of code. Outside of an outlined function, they refer to the same analysis as used for the SCoP, but are substituted within an outlined function. Additionally to the cross-function queries of DT/LI/SE, we must not create SCEVs that refer to a mix of expressions for old and generated values. Currently, SCEVs themselves do not "remember" which ScalarEvolution analysis they were created for, but mixing them is just as unexpected as using DT/LI across function boundaries. Hence `SCEVLoopAddRecRewriter` was combined into `ScopExpander`. `SCEVLoopAddRecRewriter` only replaced induction variables but left SCEVUnknowns to reference the old function. `SCEVParameterRewriter` would have done so but its job was effectively superseded by `ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues persist put marked with a FIXME in the code. Changing them would possibly cause this patch to be not NFC anymore.
1 parent 3b57f6b commit 22c77f2

13 files changed

+351
-221
lines changed
 

‎polly/include/polly/CodeGen/BlockGenerators.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,14 @@ class BlockGenerator {
162162
/// The dominator tree of this function.
163163
DominatorTree &DT;
164164

165-
/// The entry block of the current function.
166-
BasicBlock *EntryBB;
165+
/// Relates to the region where the code is emitted into.
166+
/// @{
167+
DominatorTree *GenDT;
168+
LoopInfo *GenLI;
169+
ScalarEvolution *GenSE;
170+
/// @}
167171

172+
public:
168173
/// Map to resolve scalar dependences for PHI operands and scalars.
169174
///
170175
/// When translating code that contains scalar dependences as they result from
@@ -298,6 +303,10 @@ class BlockGenerator {
298303
/// Split @p BB to create a new one we can use to clone @p BB in.
299304
BasicBlock *splitBB(BasicBlock *BB);
300305

306+
/// Change the function that code is emitted into.
307+
void switchGeneratedFunc(Function *GenFn, DominatorTree *GenDT,
308+
LoopInfo *GenLI, ScalarEvolution *GenSE);
309+
301310
/// Copy the given basic block.
302311
///
303312
/// @param Stmt The statement to code generate.

‎polly/include/polly/CodeGen/IslExprBuilder.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ class IslExprBuilder final {
124124
llvm::ScalarEvolution &SE, llvm::DominatorTree &DT,
125125
llvm::LoopInfo &LI, llvm::BasicBlock *StartBlock);
126126

127+
/// Change the function that code is emitted into.
128+
void switchGeneratedFunc(llvm::Function *GenFn, llvm::DominatorTree *GenDT,
129+
llvm::LoopInfo *GenLI, llvm::ScalarEvolution *GenSE);
130+
127131
/// Create LLVM-IR for an isl_ast_expr[ession].
128132
///
129133
/// @param Expr The ast expression for which we generate LLVM-IR.
@@ -205,10 +209,15 @@ class IslExprBuilder final {
205209

206210
const llvm::DataLayout &DL;
207211
llvm::ScalarEvolution &SE;
208-
llvm::DominatorTree &DT;
209-
llvm::LoopInfo &LI;
210212
llvm::BasicBlock *StartBlock;
211213

214+
/// Relates to the region where the code is emitted into.
215+
/// @{
216+
llvm::DominatorTree *GenDT;
217+
llvm::LoopInfo *GenLI;
218+
llvm::ScalarEvolution *GenSE;
219+
/// @}
220+
212221
llvm::Value *createOp(__isl_take isl_ast_expr *Expr);
213222
llvm::Value *createOpUnary(__isl_take isl_ast_expr *Expr);
214223
llvm::Value *createOpAccess(__isl_take isl_ast_expr *Expr);

‎polly/include/polly/CodeGen/IslNodeBuilder.h

+8-13
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class IslNodeBuilder {
7272
BlockGen(Builder, LI, SE, DT, ScalarMap, EscapeMap, ValueMap,
7373
&ExprBuilder, StartBlock),
7474
RegionGen(BlockGen), DL(DL), LI(LI), SE(SE), DT(DT),
75-
StartBlock(StartBlock) {}
75+
StartBlock(StartBlock), GenDT(&DT), GenLI(&LI), GenSE(&SE) {}
7676

7777
virtual ~IslNodeBuilder() = default;
7878

@@ -147,6 +147,13 @@ class IslNodeBuilder {
147147
DominatorTree &DT;
148148
BasicBlock *StartBlock;
149149

150+
/// Relates to the region where the code is emitted into.
151+
/// @{
152+
DominatorTree *GenDT;
153+
LoopInfo *GenLI;
154+
ScalarEvolution *GenSE;
155+
/// @}
156+
150157
/// The current iteration of out-of-scop loops
151158
///
152159
/// This map provides for a given loop a llvm::Value that contains the current
@@ -246,18 +253,6 @@ class IslNodeBuilder {
246253
SetVector<Value *> &Values,
247254
SetVector<const Loop *> &Loops);
248255

249-
/// Change the llvm::Value(s) used for code generation.
250-
///
251-
/// When generating code certain values (e.g., references to induction
252-
/// variables or array base pointers) in the original code may be replaced by
253-
/// new values. This function allows to (partially) update the set of values
254-
/// used. A typical use case for this function is the case when we continue
255-
/// code generation in a subfunction/kernel function and need to explicitly
256-
/// pass down certain values.
257-
///
258-
/// @param NewValues A map that maps certain llvm::Values to new llvm::Values.
259-
void updateValues(ValueMapT &NewValues);
260-
261256
/// Return the most up-to-date version of the llvm::Value for code generation.
262257
/// @param Original The Value to check for an up to date version.
263258
/// @returns A remapped `Value` from ValueMap, or `Original` if no mapping

‎polly/include/polly/CodeGen/LoopGenerators.h

+14-10
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ extern int PollyChunkSize;
5555
/// @param Builder The builder used to create the loop.
5656
/// @param P A pointer to the pass that uses this function.
5757
/// It is used to update analysis information.
58-
/// @param LI The loop info for the current function
58+
/// @param LI The loop info we need to update
5959
/// @param DT The dominator tree we need to update
6060
/// @param ExitBlock The block the loop will exit to.
6161
/// @param Predicate The predicate used to generate the upper loop
@@ -128,11 +128,9 @@ llvm::DebugLoc createDebugLocForGeneratedCode(Function *F);
128128
class ParallelLoopGenerator {
129129
public:
130130
/// Create a parallel loop generator for the current function.
131-
ParallelLoopGenerator(PollyIRBuilder &Builder, LoopInfo &LI,
132-
DominatorTree &DT, const DataLayout &DL)
133-
: Builder(Builder), LI(LI), DT(DT),
134-
LongType(
135-
Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
131+
ParallelLoopGenerator(PollyIRBuilder &Builder, const DataLayout &DL)
132+
: Builder(Builder), LongType(Type::getIntNTy(Builder.getContext(),
133+
DL.getPointerSizeInBits())),
136134
M(Builder.GetInsertBlock()->getParent()->getParent()),
137135
DLGenerated(createDebugLocForGeneratedCode(
138136
Builder.GetInsertBlock()->getParent())) {}
@@ -164,11 +162,11 @@ class ParallelLoopGenerator {
164162
/// The IR builder we use to create instructions.
165163
PollyIRBuilder &Builder;
166164

167-
/// The loop info of the current function we need to update.
168-
LoopInfo &LI;
165+
/// The loop info for the generated subfunction.
166+
std::unique_ptr<LoopInfo> SubFnLI;
169167

170-
/// The dominance tree of the current function we need to update.
171-
DominatorTree &DT;
168+
/// The dominance tree for the generated subfunction.
169+
std::unique_ptr<DominatorTree> SubFnDT;
172170

173171
/// The type of a "long" on this hardware used for backend calls.
174172
Type *LongType;
@@ -184,6 +182,12 @@ class ParallelLoopGenerator {
184182
llvm::DebugLoc DLGenerated;
185183

186184
public:
185+
/// Returns the DominatorTree for the generated subfunction.
186+
DominatorTree *getCalleeDominatorTree() const { return SubFnDT.get(); }
187+
188+
/// Returns the LoopInfo for the generated subfunction.
189+
LoopInfo *getCalleeLoopInfo() const { return SubFnLI.get(); }
190+
187191
/// Create a struct for all @p Values and store them in there.
188192
///
189193
/// @param Values The values which should be stored in the struct.

‎polly/include/polly/CodeGen/LoopGeneratorsGOMP.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@ namespace polly {
2525
class ParallelLoopGeneratorGOMP final : public ParallelLoopGenerator {
2626
public:
2727
/// Create a parallel loop generator for the current function.
28-
ParallelLoopGeneratorGOMP(PollyIRBuilder &Builder, LoopInfo &LI,
29-
DominatorTree &DT, const DataLayout &DL)
30-
: ParallelLoopGenerator(Builder, LI, DT, DL) {}
28+
ParallelLoopGeneratorGOMP(PollyIRBuilder &Builder, const DataLayout &DL)
29+
: ParallelLoopGenerator(Builder, DL) {}
3130

3231
// The functions below may be used if one does not want to generate a
3332
// specific OpenMP parallel loop, but generate individual parts of it

‎polly/include/polly/CodeGen/LoopGeneratorsKMP.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,8 @@ using llvm::GlobalVariable;
2727
class ParallelLoopGeneratorKMP final : public ParallelLoopGenerator {
2828
public:
2929
/// Create a parallel loop generator for the current function.
30-
ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI,
31-
DominatorTree &DT, const DataLayout &DL)
32-
: ParallelLoopGenerator(Builder, LI, DT, DL) {
30+
ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, const DataLayout &DL)
31+
: ParallelLoopGenerator(Builder, DL) {
3332
SourceLocationInfo = createSourceLocation();
3433
}
3534

‎polly/include/polly/Support/ScopHelper.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ namespace polly {
3636
class Scop;
3737
class ScopStmt;
3838

39+
/// Same as llvm/Analysis/ScalarEvolutionExpressions.h
40+
using LoopToScevMapT = llvm::DenseMap<const llvm::Loop *, const llvm::SCEV *>;
41+
3942
/// Enumeration of assumptions Polly can take.
4043
enum AssumptionKind {
4144
ALIASING,
@@ -383,20 +386,24 @@ void splitEntryBlockForAlloca(llvm::BasicBlock *EntryBlock,
383386
/// as the call to SCEVExpander::expandCodeFor:
384387
///
385388
/// @param S The current Scop.
386-
/// @param SE The Scalar Evolution pass.
389+
/// @param SE The Scalar Evolution pass used by @p S.
390+
/// @param GenFn The function to generate code in. Can be the same as @p SE.
391+
/// @param GenSE The Scalar Evolution pass for @p GenFn.
387392
/// @param DL The module data layout.
388393
/// @param Name The suffix added to the new instruction names.
389394
/// @param E The expression for which code is actually generated.
390395
/// @param Ty The type of the resulting code.
391396
/// @param IP The insertion point for the new code.
392397
/// @param VMap A remapping of values used in @p E.
398+
/// @param LoopMap A remapping of loops used in @p E.
393399
/// @param RTCBB The last block of the RTC. Used to insert loop-invariant
394400
/// instructions in rare cases.
395401
llvm::Value *expandCodeFor(Scop &S, llvm::ScalarEvolution &SE,
402+
llvm::Function *GenFn, llvm::ScalarEvolution &GenSE,
396403
const llvm::DataLayout &DL, const char *Name,
397404
const llvm::SCEV *E, llvm::Type *Ty,
398405
llvm::Instruction *IP, ValueMapT *VMap,
399-
llvm::BasicBlock *RTCBB);
406+
LoopToScevMapT *LoopMap, llvm::BasicBlock *RTCBB);
400407

401408
/// Return the condition for the terminator @p TI.
402409
///

‎polly/lib/CodeGen/BlockGenerators.cpp

+22-16
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ BlockGenerator::BlockGenerator(
5757
PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT,
5858
AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap,
5959
ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock)
60-
: Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
61-
EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
60+
: Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT), GenDT(&DT),
61+
GenLI(&LI), GenSE(&SE), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
6262
GlobalMap(GlobalMap), StartBlock(StartBlock) {}
6363

6464
Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
@@ -75,7 +75,6 @@ Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
7575
if (isa<SCEVCouldNotCompute>(Scev))
7676
return nullptr;
7777

78-
const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE);
7978
ValueMapT VTV;
8079
VTV.insert(BBMap.begin(), BBMap.end());
8180
VTV.insert(GlobalMap.begin(), GlobalMap.end());
@@ -86,9 +85,9 @@ Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
8685

8786
assert(IP != Builder.GetInsertBlock()->end() &&
8887
"Only instructions can be insert points for SCEVExpander");
89-
Value *Expanded =
90-
expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV,
91-
StartBlock->getSinglePredecessor());
88+
Value *Expanded = expandCodeFor(
89+
S, SE, Builder.GetInsertBlock()->getParent(), *GenSE, DL, "polly", Scev,
90+
Old->getType(), &*IP, &VTV, &LTS, StartBlock->getSinglePredecessor());
9291

9392
BBMap[Old] = Expanded;
9493
return Expanded;
@@ -233,6 +232,8 @@ void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
233232
return;
234233
}
235234

235+
// FIXME: We will encounter "NewOperand" again if used twice. getNewValue()
236+
// is meant to be called on old values only.
236237
NewInst->replaceUsesOfWith(OldOperand, NewOperand);
237238
}
238239

@@ -410,7 +411,7 @@ void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
410411

411412
BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
412413
BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
413-
&*Builder.GetInsertPoint(), &DT, &LI);
414+
&*Builder.GetInsertPoint(), GenDT, GenLI);
414415
CopyBB->setName("polly.stmt." + BB->getName());
415416
return CopyBB;
416417
}
@@ -431,11 +432,20 @@ BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB,
431432
return CopyBB;
432433
}
433434

435+
void BlockGenerator::switchGeneratedFunc(Function *GenFn, DominatorTree *GenDT,
436+
LoopInfo *GenLI,
437+
ScalarEvolution *GenSE) {
438+
assert(GenFn == GenDT->getRoot()->getParent());
439+
assert(GenLI->getTopLevelLoops().empty() ||
440+
GenFn == GenLI->getTopLevelLoops().front()->getHeader()->getParent());
441+
this->GenDT = GenDT;
442+
this->GenLI = GenLI;
443+
this->GenSE = GenSE;
444+
}
445+
434446
void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
435447
ValueMapT &BBMap, LoopToScevMapT &LTS,
436448
isl_id_to_ast_expr *NewAccesses) {
437-
EntryBB = &CopyBB->getParent()->getEntryBlock();
438-
439449
// Block statements and the entry blocks of region statement are code
440450
// generated from instruction lists. This allow us to optimize the
441451
// instructions that belong to a certain scop statement. As the code
@@ -497,7 +507,7 @@ Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
497507
Addr =
498508
new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
499509
DL.getPrefTypeAlign(Ty), ScalarBase->getName() + NameExt);
500-
EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
510+
BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
501511
Addr->insertBefore(&*EntryBB->getFirstInsertionPt());
502512

503513
return Addr;
@@ -554,10 +564,6 @@ void BlockGenerator::generateScalarLoads(
554564

555565
auto *Address =
556566
getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
557-
assert((!isa<Instruction>(Address) ||
558-
DT.dominates(cast<Instruction>(Address)->getParent(),
559-
Builder.GetInsertBlock())) &&
560-
"Domination violation");
561567
BBMap[MA->getAccessValue()] = Builder.CreateLoad(
562568
MA->getElementType(), Address, Address->getName() + ".reload");
563569
}
@@ -615,9 +621,9 @@ void BlockGenerator::generateConditionalExecution(
615621
StringRef BlockName = HeadBlock->getName();
616622

617623
// Generate the conditional block.
618-
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
624+
DomTreeUpdater DTU(GenDT, DomTreeUpdater::UpdateStrategy::Eager);
619625
SplitBlockAndInsertIfThen(Cond, &*Builder.GetInsertPoint(), false, nullptr,
620-
&DTU, &LI);
626+
&DTU, GenLI);
621627
BranchInst *Branch = cast<BranchInst>(HeadBlock->getTerminator());
622628
BasicBlock *ThenBlock = Branch->getSuccessor(0);
623629
BasicBlock *TailBlock = Branch->getSuccessor(1);

‎polly/lib/CodeGen/IslExprBuilder.cpp

+23-12
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,23 @@ IslExprBuilder::IslExprBuilder(Scop &S, PollyIRBuilder &Builder,
4242
DominatorTree &DT, LoopInfo &LI,
4343
BasicBlock *StartBlock)
4444
: S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap),
45-
DL(DL), SE(SE), DT(DT), LI(LI), StartBlock(StartBlock) {
45+
DL(DL), SE(SE), StartBlock(StartBlock), GenDT(&DT), GenLI(&LI),
46+
GenSE(&SE) {
4647
OverflowState = (OTMode == OT_ALWAYS) ? Builder.getFalse() : nullptr;
4748
}
4849

50+
void IslExprBuilder::switchGeneratedFunc(llvm::Function *GenFn,
51+
llvm::DominatorTree *GenDT,
52+
llvm::LoopInfo *GenLI,
53+
llvm::ScalarEvolution *GenSE) {
54+
assert(GenFn == GenDT->getRoot()->getParent());
55+
assert(GenLI->getTopLevelLoops().empty() ||
56+
GenFn == GenLI->getTopLevelLoops().front()->getHeader()->getParent());
57+
this->GenDT = GenDT;
58+
this->GenLI = GenLI;
59+
this->GenSE = GenSE;
60+
}
61+
4962
void IslExprBuilder::setTrackOverflow(bool Enable) {
5063
// If potential overflows are tracked always or never we ignore requests
5164
// to change the behavior.
@@ -307,14 +320,12 @@ IslExprBuilder::createAccessAddress(__isl_take isl_ast_expr *Expr) {
307320

308321
const SCEV *DimSCEV = SAI->getDimensionSize(u);
309322

310-
llvm::ValueToSCEVMapTy Map;
311-
for (auto &KV : GlobalMap)
312-
Map[KV.first] = SE.getSCEV(KV.second);
313-
DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map);
314-
Value *DimSize =
315-
expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(),
316-
&*Builder.GetInsertPoint(), nullptr,
317-
StartBlock->getSinglePredecessor());
323+
// DimSize should be invariant to the SCoP, so no BBMap nor LoopToScev
324+
// needed. But GlobalMap may contain SCoP-invariant vars.
325+
Value *DimSize = expandCodeFor(
326+
S, SE, Builder.GetInsertBlock()->getParent(), *GenSE, DL, "polly",
327+
DimSCEV, DimSCEV->getType(), &*Builder.GetInsertPoint(), &GlobalMap,
328+
/*LoopMap*/ nullptr, StartBlock->getSinglePredecessor());
318329

319330
Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType());
320331

@@ -602,10 +613,10 @@ IslExprBuilder::createOpBooleanConditional(__isl_take isl_ast_expr *Expr) {
602613

603614
auto InsertBB = Builder.GetInsertBlock();
604615
auto InsertPoint = Builder.GetInsertPoint();
605-
auto NextBB = SplitBlock(InsertBB, &*InsertPoint, &DT, &LI);
616+
auto NextBB = SplitBlock(InsertBB, &*InsertPoint, GenDT, GenLI);
606617
BasicBlock *CondBB = BasicBlock::Create(Context, "polly.cond", F);
607-
LI.changeLoopFor(CondBB, LI.getLoopFor(InsertBB));
608-
DT.addNewBlock(CondBB, InsertBB);
618+
GenLI->changeLoopFor(CondBB, GenLI->getLoopFor(InsertBB));
619+
GenDT->addNewBlock(CondBB, InsertBB);
609620

610621
InsertBB->getTerminator()->eraseFromParent();
611622
Builder.SetInsertPoint(InsertBB);

0 commit comments

Comments
 (0)
Please sign in to comment.