Skip to content

Commit e319817

Browse files
committed
[TailDuplicator] Only duplicate the blocks containing computed gotos
1 parent bca647a commit e319817

File tree

3 files changed

+49
-45
lines changed

3 files changed

+49
-45
lines changed

llvm/include/llvm/CodeGen/MachineInstr.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -986,8 +986,12 @@ class MachineInstr
986986

987987
/// Return true if this is an indirect branch, such as a
988988
/// branch through a register.
989-
bool isIndirectBranch(QueryType Type = AnyInBundle) const {
990-
return hasProperty(MCID::IndirectBranch, Type);
989+
bool isIndirectBranch(QueryType Type = AnyInBundle,
990+
bool IncludeJumpTable = true) const {
991+
return hasProperty(MCID::IndirectBranch, Type) &&
992+
(IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) {
993+
return Op.isJTI();
994+
}));
991995
}
992996

993997
/// Return true if this is a branch which may fall

llvm/lib/CodeGen/TailDuplicator.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -603,17 +603,19 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
603603
TailBB.canFallThrough())
604604
return false;
605605

606-
// If the target has hardware branch prediction that can handle indirect
607-
// branches, duplicating them can often make them predictable when there
608-
// are common paths through the code. The limit needs to be high enough
609-
// to allow undoing the effects of tail merging and other optimizations
610-
// that rearrange the predecessors of the indirect branch.
611-
612-
bool HasIndirectbr = false;
606+
// Only duplicate the blocks containing computed gotos. This basically
607+
// unfactors computed gotos that were factored early on in the compilation
608+
// process to speed up edge based data flow. If we do not unfactor them again,
609+
// it can seriously pessimize code with many computed jumps in the source
610+
// code, such as interpreters.
611+
bool HasComputedGoto = false;
613612
if (!TailBB.empty())
614-
HasIndirectbr = TailBB.back().isIndirectBranch();
613+
HasComputedGoto = TailBB.back().isIndirectBranch(
614+
/*Type=*/MachineInstr::AnyInBundle,
615+
// Jump tables are not considered computed gotos.
616+
/*IncludeJumpTable=*/false);
615617

616-
if (HasIndirectbr && PreRegAlloc)
618+
if (HasComputedGoto && PreRegAlloc)
617619
MaxDuplicateCount = TailDupIndirectBranchSize;
618620

619621
// Check the instructions in the block to determine whether tail-duplication
@@ -685,7 +687,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
685687
}
686688
}
687689

688-
if (HasIndirectbr && PreRegAlloc)
690+
if (HasComputedGoto && PreRegAlloc)
689691
return true;
690692

691693
if (IsSimple)

llvm/test/CodeGen/X86/tail-dup-computed-goto.mir

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -179,76 +179,74 @@ jumpTable:
179179
body: |
180180
; CHECK-LABEL: name: jump_table
181181
; CHECK: bb.0:
182-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
182+
; CHECK-NEXT: successors: %bb.1(0x80000000)
183183
; CHECK-NEXT: {{ $}}
184184
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
185185
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
186186
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
187187
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax
188188
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY [[COPY]]
189-
; CHECK-NEXT: [[DEC64r:%[0-9]+]]:gr64_nosp = DEC64r [[COPY1]], implicit-def dead $eflags
190-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY [[COPY1]]
189+
; CHECK-NEXT: {{ $}}
190+
; CHECK-NEXT: bb.1:
191+
; CHECK-NEXT: successors: %bb.2(0x80000000)
192+
; CHECK-NEXT: {{ $}}
193+
; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY1]], %bb.0, %6, %bb.7, %5, %bb.6, %4, %bb.5, %3, %bb.4, %2, %bb.3
194+
; CHECK-NEXT: [[DEC64r:%[0-9]+]]:gr64_nosp = DEC64r [[PHI]], implicit-def dead $eflags
195+
; CHECK-NEXT: {{ $}}
196+
; CHECK-NEXT: bb.2:
197+
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
198+
; CHECK-NEXT: {{ $}}
191199
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r]], %jump-table.0, $noreg :: (load (s64) from jump-table)
192200
; CHECK-NEXT: {{ $}}
193201
; CHECK-NEXT: bb.3:
194-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
202+
; CHECK-NEXT: successors: %bb.1(0x80000000)
195203
; CHECK-NEXT: {{ $}}
196204
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
197205
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
198206
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
199-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax
200-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY [[COPY3]]
201-
; CHECK-NEXT: [[DEC64r1:%[0-9]+]]:gr64_nosp = DEC64r [[COPY4]], implicit-def dead $eflags
202-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY [[COPY4]]
203-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r1]], %jump-table.0, $noreg :: (load (s64) from jump-table)
207+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rax
208+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY2]]
209+
; CHECK-NEXT: JMP_1 %bb.1
204210
; CHECK-NEXT: {{ $}}
205211
; CHECK-NEXT: bb.4:
206-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
212+
; CHECK-NEXT: successors: %bb.1(0x80000000)
207213
; CHECK-NEXT: {{ $}}
208214
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
209215
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
210216
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
211-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
212-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[COPY6]]
213-
; CHECK-NEXT: [[DEC64r2:%[0-9]+]]:gr64_nosp = DEC64r [[COPY7]], implicit-def dead $eflags
214-
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY [[COPY7]]
215-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r2]], %jump-table.0, $noreg :: (load (s64) from jump-table)
217+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax
218+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY [[COPY4]]
219+
; CHECK-NEXT: JMP_1 %bb.1
216220
; CHECK-NEXT: {{ $}}
217221
; CHECK-NEXT: bb.5:
218-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
222+
; CHECK-NEXT: successors: %bb.1(0x80000000)
219223
; CHECK-NEXT: {{ $}}
220224
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
221225
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
222226
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
223-
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY $rax
224-
; CHECK-NEXT: [[COPY10:%[0-9]+]]:gr64 = COPY [[COPY9]]
225-
; CHECK-NEXT: [[DEC64r3:%[0-9]+]]:gr64_nosp = DEC64r [[COPY10]], implicit-def dead $eflags
226-
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gr64 = COPY [[COPY10]]
227-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r3]], %jump-table.0, $noreg :: (load (s64) from jump-table)
227+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
228+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[COPY6]]
229+
; CHECK-NEXT: JMP_1 %bb.1
228230
; CHECK-NEXT: {{ $}}
229231
; CHECK-NEXT: bb.6:
230-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
232+
; CHECK-NEXT: successors: %bb.1(0x80000000)
231233
; CHECK-NEXT: {{ $}}
232234
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
233235
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
234236
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
235-
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gr64 = COPY $rax
236-
; CHECK-NEXT: [[COPY13:%[0-9]+]]:gr64 = COPY [[COPY12]]
237-
; CHECK-NEXT: [[DEC64r4:%[0-9]+]]:gr64_nosp = DEC64r [[COPY13]], implicit-def dead $eflags
238-
; CHECK-NEXT: [[COPY14:%[0-9]+]]:gr64 = COPY [[COPY13]]
239-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r4]], %jump-table.0, $noreg :: (load (s64) from jump-table)
237+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY $rax
238+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY [[COPY8]]
239+
; CHECK-NEXT: JMP_1 %bb.1
240240
; CHECK-NEXT: {{ $}}
241241
; CHECK-NEXT: bb.7:
242-
; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
242+
; CHECK-NEXT: successors: %bb.1(0x80000000)
243243
; CHECK-NEXT: {{ $}}
244244
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
245245
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f5, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
246246
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
247-
; CHECK-NEXT: [[COPY15:%[0-9]+]]:gr64 = COPY $rax
248-
; CHECK-NEXT: [[COPY16:%[0-9]+]]:gr64 = COPY [[COPY15]]
249-
; CHECK-NEXT: [[DEC64r5:%[0-9]+]]:gr64_nosp = DEC64r [[COPY16]], implicit-def dead $eflags
250-
; CHECK-NEXT: [[COPY17:%[0-9]+]]:gr64 = COPY [[COPY16]]
251-
; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r5]], %jump-table.0, $noreg :: (load (s64) from jump-table)
247+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:gr64 = COPY $rax
248+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gr64 = COPY [[COPY10]]
249+
; CHECK-NEXT: JMP_1 %bb.1
252250
; CHECK-NEXT: {{ $}}
253251
; CHECK-NEXT: bb.8:
254252
bb.0:

0 commit comments

Comments
 (0)