Add -reorder-basic-blocks option and fix nop insertion
1. Basic block reordering can be enabled with -reorder-basic-blocks option enabled.
Blocks will be sorted according to the Reversed Post Traversal Order, but the next
node to visit among all candidate children nodes is selected 'randomly'.
Example:
A
/ \
B C
\ /
D
This CFG can generate two possible layouts:
A-B-C-D or A-C-B-D
2. Fix nop insetion
Add checks to avoiding insertions in empty basic blocks(dead blocks) and bundle locked instructions.
BUG=
R=jpp@chromium.org, jvoung@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/1255303004.
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index ef49c15..c19d01a 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -371,6 +371,51 @@
assert(Nodes.size() == OldSize);
}
+namespace {
+void getRandomPostOrder(CfgNode *Node, llvm::BitVector &ToVisit,
+ Ice::NodeList &PostOrder,
+ Ice::RandomNumberGenerator *RNG) {
+ assert(ToVisit[Node->getIndex()]);
+ ToVisit[Node->getIndex()] = false;
+ NodeList Outs = Node->getOutEdges();
+ Ice::RandomShuffle(Outs.begin(), Outs.end(),
+ [RNG](int N) { return RNG->next(N); });
+ for (CfgNode *Next : Outs) {
+ if (ToVisit[Next->getIndex()])
+ getRandomPostOrder(Next, ToVisit, PostOrder, RNG);
+ }
+ PostOrder.push_back(Node);
+}
+} // end of anonymous namespace
+
+void Cfg::shuffleNodes() {
+ if (!Ctx->getFlags().shouldReorderBasicBlocks())
+ return;
+
+ NodeList ReversedReachable;
+ NodeList Unreachable;
+ llvm::BitVector ToVisit(Nodes.size(), true);
+ // Traverse from entry node.
+ getRandomPostOrder(getEntryNode(), ToVisit, ReversedReachable,
+ &Ctx->getRNG());
+ // Collect the unreachable nodes.
+ for (CfgNode *Node : Nodes)
+ if (ToVisit[Node->getIndex()])
+ Unreachable.push_back(Node);
+ // Copy the layout list to the Nodes.
+ SizeT OldSize = Nodes.size();
+ (void)OldSize;
+ Nodes.clear();
+ for (CfgNode *Node : reverse_range(ReversedReachable))
+ Nodes.emplace_back(Node);
+ for (CfgNode *Node : Unreachable) {
+ Nodes.emplace_back(Node);
+ }
+ assert(Nodes.size() == OldSize);
+
+ dump("After basic block shuffling");
+}
+
void Cfg::doArgLowering() {
TimerMarker T(TimerStack::TT_doArgLowering, this);
getTarget()->lowerArguments();
@@ -383,6 +428,8 @@
}
void Cfg::doNopInsertion() {
+ if (!Ctx->getFlags().shouldDoNopInsertion())
+ return;
TimerMarker T(TimerStack::TT_doNopInsertion, this);
for (CfgNode *Node : Nodes)
Node->doNopInsertion();
diff --git a/src/IceCfg.h b/src/IceCfg.h
index f2b5295..499e226 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -180,6 +180,7 @@
void deletePhis();
void advancedPhiLowering();
void reorderNodes();
+ void shuffleNodes();
void doAddressOpt();
void doArgLowering();
void doNopInsertion();
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 78063e3..cae5d37 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -500,18 +500,22 @@
TargetLowering *Target = Func->getTarget();
LoweringContext &Context = Target->getContext();
Context.init(this);
+ Context.setInsertPoint(Context.getCur());
+ // Do not insert nop in bundle locked instructions.
+ bool PauseNopInsertion = false;
while (!Context.atEnd()) {
- Target->doNopInsertion();
+ if (llvm::isa<InstBundleLock>(Context.getCur())) {
+ PauseNopInsertion = true;
+ } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) {
+ PauseNopInsertion = false;
+ }
+ if (!PauseNopInsertion)
+ Target->doNopInsertion();
// Ensure Cur=Next, so that the nops are inserted before the current
// instruction rather than after.
- Context.advanceNext();
Context.advanceCur();
+ Context.advanceNext();
}
- // Insert before all instructions.
- Context.setInsertPoint(getInsts().begin());
- Context.advanceNext();
- Context.advanceCur();
- Target->doNopInsertion();
}
// Drives the target lowering. Passes the current instruction and the
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index 5a4abe6..5066a4b 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -284,6 +284,12 @@
cl::desc("The threshold for immediates randomization and pooling"),
cl::init(0xffff));
+// Command line option for turning on basic block shuffling.
+cl::opt<bool> ReorderBasicBlocks(
+ "reorder-basic-blocks",
+ cl::desc("Shuffle the layout of basic blocks in each functions"),
+ cl::init(false));
+
// Command line option for turning on function layout reordering.
cl::opt<bool> ReorderFunctions(
"reorder-functions",
@@ -341,6 +347,7 @@
OutFlags.PhiEdgeSplit = false;
OutFlags.RandomNopInsertion = false;
OutFlags.RandomRegAlloc = false;
+ OutFlags.ReorderBasicBlocks = false;
OutFlags.ReorderFunctions = false;
OutFlags.ReorderGlobalVariables = false;
OutFlags.ReorderPooledConstants = false;
@@ -398,8 +405,17 @@
OutFlags.setOptLevel(::OLevel);
OutFlags.setPhiEdgeSplit(::EnablePhiEdgeSplit);
OutFlags.setRandomSeed(::RandomSeed);
+ OutFlags.setRandomizeAndPoolImmediatesOption(
+ ::RandomizeAndPoolImmediatesOption);
+ OutFlags.setRandomizeAndPoolImmediatesThreshold(
+ ::RandomizeAndPoolImmediatesThreshold);
+ OutFlags.setReorderFunctionsWindowSize(::ReorderFunctionsWindowSize);
+ OutFlags.setShouldReorderBasicBlocks(::ReorderBasicBlocks);
OutFlags.setShouldDoNopInsertion(::ShouldDoNopInsertion);
OutFlags.setShouldRandomizeRegAlloc(::RandomizeRegisterAllocation);
+ OutFlags.setShouldReorderFunctions(::ReorderFunctions);
+ OutFlags.setShouldReorderGlobalVariables(::ReorderGlobalVariables);
+ OutFlags.setShouldReorderPooledConstants(::ReorderPooledConstants);
OutFlags.setSkipUnimplemented(::SkipUnimplemented);
OutFlags.setSubzeroTimingEnabled(::SubzeroTimingEnabled);
OutFlags.setTargetArch(::TargetArch);
@@ -414,22 +430,6 @@
OutFlags.setMaxNopsPerInstruction(::MaxNopsPerInstruction);
OutFlags.setNopProbabilityAsPercentage(::NopProbabilityAsPercentage);
OutFlags.setVerbose(VMask);
-
- // Set for immediates randomization or pooling option.
- OutFlags.setRandomizeAndPoolImmediatesOption(
- ::RandomizeAndPoolImmediatesOption);
- OutFlags.setRandomizeAndPoolImmediatesThreshold(
- ::RandomizeAndPoolImmediatesThreshold);
-
- // Set for function reordering options.
- OutFlags.setShouldReorderFunctions(::ReorderFunctions);
- OutFlags.setReorderFunctionsWindowSize(::ReorderFunctionsWindowSize);
-
- // Set for global variable reordering option.
- OutFlags.setShouldReorderGlobalVariables(::ReorderGlobalVariables);
-
- // Set for pooled constant reordering option.
- OutFlags.setShouldReorderPooledConstants(::ReorderPooledConstants);
}
void ClFlags::getParsedClFlagsExtra(ClFlagsExtra &OutFlagsExtra) {
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 549bf16..244790b 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -153,6 +153,11 @@
return RandomizeAndPoolImmediatesThreshold;
}
+ bool shouldReorderBasicBlocks() const { return ReorderBasicBlocks; }
+ void setShouldReorderBasicBlocks(bool NewValue) {
+ ReorderBasicBlocks = NewValue;
+ }
+
void setShouldReorderFunctions(bool Option) { ReorderFunctions = Option; }
bool shouldReorderFunctions() const { return ReorderFunctions; }
@@ -229,6 +234,7 @@
bool PhiEdgeSplit;
bool RandomNopInsertion;
bool RandomRegAlloc;
+ bool ReorderBasicBlocks;
bool ReorderFunctions;
bool ReorderGlobalVariables;
bool ReorderPooledConstants;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index b4c4ea3..3c7ad5e 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -400,6 +400,9 @@
Func->contractEmptyNodes();
Func->reorderNodes();
+ // Shuffle basic block order if -reorder-basic-blocks is enabled.
+ Func->shuffleNodes();
+
// Branch optimization. This needs to be done just before code emission. In
// particular, no transformations that insert or reorder CfgNodes should be
// done after branch optimization. We go ahead and do it before nop insertion
@@ -407,9 +410,8 @@
Func->doBranchOpt();
Func->dump("After branch optimization");
- // Nop insertion
- if (Ctx->getFlags().shouldDoNopInsertion())
- Func->doNopInsertion();
+ // Nop insertion if -nop-insertion is enabled.
+ Func->doNopInsertion();
// Mark nodes that require sandbox alignment
if (Ctx->getFlags().getUseSandboxing())
@@ -446,10 +448,11 @@
return;
Func->dump("After stack frame mapping");
- // Nop insertion
- if (Ctx->getFlags().shouldDoNopInsertion()) {
- Func->doNopInsertion();
- }
+ // Shuffle basic block order if -reorder-basic-blocks is enabled.
+ Func->shuffleNodes();
+
+ // Nop insertion if -nop-insertion is enabled.
+ Func->doNopInsertion();
// Mark nodes that require sandbox alignment
if (Ctx->getFlags().getUseSandboxing())
diff --git a/tests_lit/llvm2ice_tests/nop-insertion.ll b/tests_lit/llvm2ice_tests/nop-insertion.ll
index 197ddd3..160d933 100644
--- a/tests_lit/llvm2ice_tests/nop-insertion.ll
+++ b/tests_lit/llvm2ice_tests/nop-insertion.ll
@@ -4,6 +4,7 @@
; Use filetype=asm because this currently depends on the # variant
; assembler comment.
+
; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \
; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \
; RUN: | FileCheck %s --check-prefix=PROB50
@@ -13,88 +14,121 @@
; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \
; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=2 \
; RUN: | FileCheck %s --check-prefix=MAXNOPS2
+; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion -sandbox\
+; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \
+; RUN: | FileCheck %s --check-prefix=SANDBOX50
+
define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
entry:
%res = mul <4 x i32> %a, %b
ret <4 x i32> %res
+
; PROB50-LABEL: mul_v4i32
-; PROB50: nop # variant = 3
+; PROB50: nop # variant = 4
; PROB50: subl $60, %esp
-; PROB50: nop # variant = 4
; PROB50: movups %xmm0, 32(%esp)
-; PROB50: movups %xmm1, 16(%esp)
; PROB50: nop # variant = 0
-; PROB50: movups 32(%esp), %xmm0
+; PROB50: movups %xmm1, 16(%esp)
; PROB50: nop # variant = 4
+; PROB50: movups 32(%esp), %xmm0
; PROB50: pshufd $49, 32(%esp), %xmm1
; PROB50: pshufd $49, 16(%esp), %xmm2
; PROB50: pmuludq 16(%esp), %xmm0
-; PROB50: pmuludq %xmm2, %xmm1
; PROB50: nop # variant = 0
+; PROB50: pmuludq %xmm2, %xmm1
; PROB50: shufps $136, %xmm1, %xmm0
-; PROB50: pshufd $216, %xmm0, %xmm0
; PROB50: nop # variant = 2
+; PROB50: pshufd $216, %xmm0, %xmm0
; PROB50: movups %xmm0, (%esp)
; PROB50: movups (%esp), %xmm0
-; PROB50: addl $60, %esp
; PROB50: nop # variant = 0
+; PROB50: addl $60, %esp
+; PROB50: nop # variant = 3
; PROB50: ret
; PROB90-LABEL: mul_v4i32
-; PROB90: nop # variant = 3
+; PROB90: nop # variant = 4
; PROB90: subl $60, %esp
-; PROB90: nop # variant = 4
+; PROB90: nop # variant = 3
; PROB90: movups %xmm0, 32(%esp)
-; PROB90: nop # variant = 3
+; PROB90: nop # variant = 2
; PROB90: movups %xmm1, 16(%esp)
-; PROB90: nop # variant = 2
+; PROB90: nop # variant = 3
; PROB90: movups 32(%esp), %xmm0
-; PROB90: nop # variant = 3
+; PROB90: nop # variant = 4
; PROB90: pshufd $49, 32(%esp), %xmm1
-; PROB90: nop # variant = 4
-; PROB90: pshufd $49, 16(%esp), %xmm2
; PROB90: nop # variant = 0
+; PROB90: pshufd $49, 16(%esp), %xmm2
+; PROB90: nop # variant = 2
; PROB90: pmuludq 16(%esp), %xmm0
-; PROB90: nop # variant = 2
-; PROB90: pmuludq %xmm2, %xmm1
; PROB90: nop # variant = 3
+; PROB90: pmuludq %xmm2, %xmm1
+; PROB90: nop # variant = 4
; PROB90: shufps $136, %xmm1, %xmm0
-; PROB90: nop # variant = 4
+; PROB90: nop # variant = 2
; PROB90: pshufd $216, %xmm0, %xmm0
-; PROB90: nop # variant = 2
-; PROB90: movups %xmm0, (%esp)
; PROB90: nop # variant = 4
-; PROB90: movups (%esp), %xmm0
+; PROB90: movups %xmm0, (%esp)
; PROB90: nop # variant = 2
+; PROB90: movups (%esp), %xmm0
+; PROB90: nop # variant = 3
; PROB90: addl $60, %esp
; PROB90: nop # variant = 3
; PROB90: ret
; MAXNOPS2-LABEL: mul_v4i32
+; MAXNOPS2: nop # variant = 4
; MAXNOPS2: subl $60, %esp
+; MAXNOPS2: nop # variant = 0
; MAXNOPS2: nop # variant = 4
; MAXNOPS2: movups %xmm0, 32(%esp)
-; MAXNOPS2: nop # variant = 0
-; MAXNOPS2: nop # variant = 4
; MAXNOPS2: movups %xmm1, 16(%esp)
-; MAXNOPS2: movups 32(%esp), %xmm0
; MAXNOPS2: nop # variant = 0
-; MAXNOPS2: pshufd $49, 32(%esp), %xmm1
+; MAXNOPS2: movups 32(%esp), %xmm0
; MAXNOPS2: nop # variant = 2
+; MAXNOPS2: pshufd $49, 32(%esp), %xmm1
; MAXNOPS2: pshufd $49, 16(%esp), %xmm2
-; MAXNOPS2: pmuludq 16(%esp), %xmm0
; MAXNOPS2: nop # variant = 0
; MAXNOPS2: nop # variant = 3
+; MAXNOPS2: pmuludq 16(%esp), %xmm0
; MAXNOPS2: pmuludq %xmm2, %xmm1
; MAXNOPS2: shufps $136, %xmm1, %xmm0
-; MAXNOPS2: pshufd $216, %xmm0, %xmm0
; MAXNOPS2: nop # variant = 3
-; MAXNOPS2: movups %xmm0, (%esp)
+; MAXNOPS2: pshufd $216, %xmm0, %xmm0
; MAXNOPS2: nop # variant = 0
-; MAXNOPS2: movups (%esp), %xmm0
+; MAXNOPS2: movups %xmm0, (%esp)
; MAXNOPS2: nop # variant = 2
-; MAXNOPS2: addl $60, %esp
+; MAXNOPS2: movups (%esp), %xmm0
; MAXNOPS2: nop # variant = 4
+; MAXNOPS2: addl $60, %esp
; MAXNOPS2: ret
+
+; SANDBOX50-LABEL: mul_v4i32
+; SANDBOX50: nop # variant = 4
+; SANDBOX50: subl $60, %esp
+; SANDBOX50: movups %xmm0, 32(%esp)
+; SANDBOX50: nop # variant = 0
+; SANDBOX50: movups %xmm1, 16(%esp)
+; SANDBOX50: nop # variant = 4
+; SANDBOX50: movups 32(%esp), %xmm0
+; SANDBOX50: pshufd $49, 32(%esp), %xmm1
+; SANDBOX50: pshufd $49, 16(%esp), %xmm2
+; SANDBOX50: pmuludq 16(%esp), %xmm0
+; SANDBOX50: nop # variant = 0
+; SANDBOX50: pmuludq %xmm2, %xmm1
+; SANDBOX50: shufps $136, %xmm1, %xmm0
+; SANDBOX50: nop # variant = 2
+; SANDBOX50: pshufd $216, %xmm0, %xmm0
+; SANDBOX50: movups %xmm0, (%esp)
+; SANDBOX50: movups (%esp), %xmm0
+; SANDBOX50: nop # variant = 0
+; SANDBOX50: addl $60, %esp
+; SANDBOX50: nop # variant = 3
+; SANDBOX50: pop %ecx
+; SANDBOX50: .bundle_lock
+; SANDBOX50: andl $-32, %ecx
+; SANDBOX50: jmp *%ecx
+; SANDBOX50: .bundle_unlock
+
}
diff --git a/tests_lit/llvm2ice_tests/reorder-basic-blocks.ll b/tests_lit/llvm2ice_tests/reorder-basic-blocks.ll
new file mode 100644
index 0000000..9df88eb
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/reorder-basic-blocks.ll
@@ -0,0 +1,41 @@
+; Trivial smoke test of basic block reordering. Different random seeds should
+; generate different basic block layout.
+; REQUIRES allow_dump
+
+; RUN: %p2i -i %s --filetype=asm --args -O2 -sz-seed=1 \
+; RUN: -reorder-basic-blocks -threads=0 \
+; RUN: | FileCheck %s --check-prefix=SEED1
+; RUN: %p2i -i %s --filetype=asm --args -O2 -sz-seed=2 \
+; RUN: -reorder-basic-blocks -threads=0 \
+; RUN: | FileCheck %s --check-prefix=SEED2
+
+define void @basic_block_reordering(i32 %foo, i32 %bar) {
+entry:
+ %r1 = icmp eq i32 %foo, %bar
+ br i1 %r1, label %BB1, label %BB2
+BB1:
+ %r2 = icmp sgt i32 %foo, %bar
+ br i1 %r2, label %BB3, label %BB4
+BB2:
+ %r3 = icmp slt i32 %foo, %bar
+ br i1 %r3, label %BB3, label %BB4
+BB3:
+ ret void
+BB4:
+ ret void
+
+
+; SEED1-LABEL: basic_block_reordering:
+; SEED1: .Lbasic_block_reordering$entry:
+; SEED1: .Lbasic_block_reordering$BB1:
+; SEED1: .Lbasic_block_reordering$BB2:
+; SEED1: .Lbasic_block_reordering$BB3:
+; SEED1: .Lbasic_block_reordering$BB4:
+
+; SEED2-LABEL: basic_block_reordering:
+; SEED2: .Lbasic_block_reordering$entry:
+; SEED2: .Lbasic_block_reordering$BB2:
+; SEED2: .Lbasic_block_reordering$BB1:
+; SEED2: .Lbasic_block_reordering$BB4:
+; SEED2: .Lbasic_block_reordering$BB3
+}