Subzero: Randomly insert nops.
Adds command line options -nop-insertion, -nop-insertion-probability=X, and -max-nops-per-instruction=X.
BUG=none
R=jvoung@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/463563006
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 49e9806..faaabe9 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -121,6 +121,12 @@
}
}
+void Cfg::doNopInsertion() {
+ for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ (*I)->doNopInsertion();
+ }
+}
+
void Cfg::genCode() {
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->genCode();
diff --git a/src/IceCfg.h b/src/IceCfg.h
index 856b145..e12bc9d 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -88,6 +88,7 @@
void deletePhis();
void doAddressOpt();
void doArgLowering();
+ void doNopInsertion();
void genCode();
void genFrame();
void livenessLightweight();
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 22915b2..696a2f0 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -191,6 +191,24 @@
}
}
+void CfgNode::doNopInsertion() {
+ TargetLowering *Target = Func->getTarget();
+ LoweringContext &Context = Target->getContext();
+ Context.init(this);
+ while (!Context.atEnd()) {
+ Target->doNopInsertion();
+ // Ensure Cur=Next, so that the nops are inserted before the current
+ // instruction rather than after.
+ Context.advanceNext();
+ Context.advanceCur();
+ }
+ // Insert before all instructions.
+ Context.setInsertPoint(getInsts().begin());
+ Context.advanceNext();
+ Context.advanceCur();
+ Target->doNopInsertion();
+}
+
// Drives the target lowering. Passes the current instruction and the
// next non-deleted instruction for target lowering.
void CfgNode::genCode() {
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index ea50dca..bf11844 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -55,6 +55,7 @@
void placePhiStores();
void deletePhis();
void doAddressOpt();
+ void doNopInsertion();
void genCode();
void livenessLightweight();
bool liveness(Liveness *Liveness);
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 0b99c80..a1a4d68 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -241,6 +241,9 @@
addSource(Source);
}
+InstX8632Nop::InstX8632Nop(Cfg *Func, InstX8632Nop::NopVariant Variant)
+ : InstX8632(Func, InstX8632::Nop, 0, NULL), Variant(Variant) {}
+
InstX8632Fld::InstX8632Fld(Cfg *Func, Operand *Src)
: InstX8632(Func, InstX8632::Fld, 1, NULL) {
addSource(Src);
@@ -1058,6 +1061,17 @@
dumpSources(Func);
}
+void InstX8632Nop::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ // TODO: Emit the right code for each variant.
+ Str << "\tnop\t# variant = " << Variant << "\n";
+}
+
+void InstX8632Nop::dump(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "nop (variant = " << Variant << ")";
+}
+
void InstX8632Fld::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 64ce4a0..834e061 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -175,6 +175,7 @@
Mulps,
Mulss,
Neg,
+ Nop,
Or,
Padd,
Pand,
@@ -1060,6 +1061,28 @@
virtual ~InstX8632Movzx() {}
};
+// Nop instructions of varying length
+class InstX8632Nop : public InstX8632 {
+public:
+ // TODO: Replace with enum.
+ typedef unsigned NopVariant;
+
+ static InstX8632Nop *create(Cfg *Func, NopVariant Variant) {
+ return new (Func->allocate<InstX8632Nop>()) InstX8632Nop(Func, Variant);
+ }
+ virtual void emit(const Cfg *Func) const;
+ virtual void dump(const Cfg *Func) const;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Nop); }
+
+private:
+ InstX8632Nop(Cfg *Func, SizeT Length);
+ InstX8632Nop(const InstX8632Nop &) LLVM_DELETED_FUNCTION;
+ InstX8632Nop &operator=(const InstX8632Nop &) LLVM_DELETED_FUNCTION;
+ virtual ~InstX8632Nop() {}
+
+ NopVariant Variant;
+};
+
// Fld - load a value onto the x87 FP stack.
class InstX8632Fld : public InstX8632 {
public:
diff --git a/src/IceRNG.cpp b/src/IceRNG.cpp
index 6a9f515..b874d83 100644
--- a/src/IceRNG.cpp
+++ b/src/IceRNG.cpp
@@ -26,6 +26,8 @@
RandomSeed("rng-seed", cl::desc("Seed the random number generator"),
cl::init(time(0)));
+const unsigned MAX = 2147483647;
+
} // end of anonymous namespace
// TODO(wala,stichnot): Switch to RNG implementation from LLVM or C++11.
@@ -39,8 +41,12 @@
uint64_t RandomNumberGenerator::next(uint64_t Max) {
// Lewis, Goodman, and Miller (1969)
- State = (16807 * State) % 2147483647;
+ State = (16807 * State) % MAX;
return State % Max;
}
+bool RandomNumberGeneratorWrapper::getTrueWithProbability(float Probability) {
+ return RNG.next(MAX) < Probability * MAX;
+}
+
} // end of namespace Ice
diff --git a/src/IceRNG.h b/src/IceRNG.h
index 423aee0..4d6c397 100644
--- a/src/IceRNG.h
+++ b/src/IceRNG.h
@@ -16,6 +16,7 @@
#include <stdint.h>
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
namespace Ice {
@@ -25,9 +26,31 @@
uint64_t next(uint64_t Max);
private:
+ RandomNumberGenerator(const RandomNumberGenerator &) LLVM_DELETED_FUNCTION;
+ RandomNumberGenerator &
+ operator=(const RandomNumberGenerator &) LLVM_DELETED_FUNCTION;
+
uint64_t State;
};
+// This class adds additional random number generator utilities. The
+// reason for the wrapper class is that we want to keep the
+// RandomNumberGenerator interface identical to LLVM's.
+class RandomNumberGeneratorWrapper {
+public:
+ uint64_t next(uint64_t Max) { return RNG.next(Max); }
+ bool getTrueWithProbability(float Probability);
+ RandomNumberGeneratorWrapper(RandomNumberGenerator &RNG) : RNG(RNG) {}
+
+private:
+ RandomNumberGeneratorWrapper(const RandomNumberGeneratorWrapper &)
+ LLVM_DELETED_FUNCTION;
+ RandomNumberGeneratorWrapper &
+ operator=(const RandomNumberGeneratorWrapper &) LLVM_DELETED_FUNCTION;
+
+ RandomNumberGenerator &RNG;
+};
+
} // end of namespace Ice
#endif // SUBZERO_SRC_ICERNG_H
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 0034de5..057a3ea 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -22,8 +22,25 @@
#include "IceTargetLowering.h"
#include "IceTargetLoweringX8632.h"
+#include "llvm/Support/CommandLine.h"
+
namespace Ice {
+namespace {
+
+namespace cl = llvm::cl;
+cl::opt<bool> DoNopInsertion("nop-insertion", cl::desc("Randomly insert NOPs"),
+ cl::init(false));
+
+cl::opt<int> MaxNopsPerInstruction(
+ "max-nops-per-instruction",
+ cl::desc("Max number of nops to insert per instruction"), cl::init(1));
+
+cl::opt<int> NopProbabilityAsPercentage(
+ "nop-insertion-percentage",
+ cl::desc("Nop insertion probability as percentage"), cl::init(10));
+} // end of anonymous namespace
+
void LoweringContext::init(CfgNode *N) {
Node = N;
Begin = getNode()->getInsts().begin();
@@ -90,6 +107,20 @@
Context.advanceNext();
}
+bool TargetLowering::shouldDoNopInsertion() const { return DoNopInsertion; }
+
+void TargetLowering::doNopInsertion() {
+ Inst *I = *Context.getCur();
+ bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) ||
+ llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() ||
+ I->isDeleted();
+ if (!ShouldSkip) {
+ for (int I = 0; I < MaxNopsPerInstruction; ++I) {
+ randomlyInsertNop(NopProbabilityAsPercentage / 100.0);
+ }
+ }
+}
+
// Lowers a single instruction according to the information in
// Context, by checking the Context.Cur instruction kind and calling
// the appropriate lowering method. The lowering method should insert
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 39e08de..6bf1424 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -121,6 +121,8 @@
// Tries to do address mode optimization on a single instruction.
void doAddressOpt();
+ // Randomly insert NOPs.
+ void doNopInsertion();
// Lowers a single instruction.
void lower();
@@ -136,6 +138,7 @@
virtual SizeT getFrameOrStackReg() const = 0;
virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
bool hasComputedFrame() const { return HasComputedFrame; }
+ bool shouldDoNopInsertion() const;
int32_t getStackAdjustment() const { return StackAdjustment; }
void updateStackAdjustment(int32_t Offset) { StackAdjustment += Offset; }
void resetStackAdjustment() { StackAdjustment = 0; }
@@ -193,6 +196,7 @@
virtual void doAddressOptLoad() {}
virtual void doAddressOptStore() {}
+ virtual void randomlyInsertNop(float Probability) = 0;
// This gives the target an opportunity to post-process the lowered
// expansion before returning. The primary intention is to do some
// Register Manager activity as necessary, specifically to eagerly
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 35792aa..d36ad16 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -134,6 +134,8 @@
const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
// The base 2 logarithm of the width in bytes of the largest stack slot
const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
+// The number of different NOP instructions
+const uint32_t X86_NUM_NOP_VARIANTS = 5;
// Value and Alignment are in bytes. Return Value adjusted to the next
// highest multiple of Alignment.
@@ -391,6 +393,11 @@
return;
T_genFrame.printElapsedUs(Context, "genFrame()");
Func->dump("After stack frame mapping");
+
+ // Nop insertion
+ if (shouldDoNopInsertion()) {
+ Func->doNopInsertion();
+ }
}
void TargetX8632::translateOm1() {
@@ -429,6 +436,11 @@
return;
T_genFrame.printElapsedUs(Context, "genFrame()");
Func->dump("After stack frame mapping");
+
+ // Nop insertion
+ if (shouldDoNopInsertion()) {
+ Func->doNopInsertion();
+ }
}
IceString TargetX8632::RegNames[] = {
@@ -3670,6 +3682,13 @@
}
}
+void TargetX8632::randomlyInsertNop(float Probability) {
+ RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
+ if (RNG.getTrueWithProbability(Probability)) {
+ _nop(RNG.next(X86_NUM_NOP_VARIANTS));
+ }
+}
+
void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
Func->setError("Phi found in regular instruction list");
}
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index ebe6ba9..499790e 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -104,6 +104,7 @@
virtual void lowerUnreachable(const InstUnreachable *Inst);
virtual void doAddressOptLoad();
virtual void doAddressOptStore();
+ virtual void randomlyInsertNop(float Probability);
// Naive lowering of cmpxchg.
void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected,
@@ -327,6 +328,9 @@
void _neg(Variable *SrcDest) {
Context.insert(InstX8632Neg::create(Func, SrcDest));
}
+ void _nop(SizeT Variant) {
+ Context.insert(InstX8632Nop::create(Func, Variant));
+ }
void _or(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Or::create(Func, Dest, Src0));
}
diff --git a/tests_lit/llvm2ice_tests/nop-insertion.ll b/tests_lit/llvm2ice_tests/nop-insertion.ll
new file mode 100644
index 0000000..8c0d242
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/nop-insertion.ll
@@ -0,0 +1,93 @@
+; This is a smoke test of nop insertion.
+
+; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \
+; RUN: -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB50
+; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=90 \
+; RUN: -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB90
+; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \
+; RUN: -max-nops-per-instruction=2 %s | FileCheck %s --check-prefix=MAXNOPS2
+
+define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
+entry:
+ %res = mul <4 x i32> %a, %b
+ ret <4 x i32> %res
+; PROB50-LABEL: mul_v4i32:
+; PROB50: nop # variant = 3
+; PROB50: sub esp, 60
+; PROB50: nop # variant = 4
+; PROB50: movups xmmword ptr [esp+32], xmm0
+; PROB50: movups xmmword ptr [esp+16], xmm1
+; PROB50: nop # variant = 0
+; PROB50: movups xmm0, xmmword ptr [esp+32]
+; PROB50: nop # variant = 4
+; PROB50: pshufd xmm1, xmmword ptr [esp+32], 49
+; PROB50: pshufd xmm2, xmmword ptr [esp+16], 49
+; PROB50: pmuludq xmm0, xmmword ptr [esp+16]
+; PROB50: pmuludq xmm1, xmm2
+; PROB50: nop # variant = 0
+; PROB50: shufps xmm0, xmm1, 136
+; PROB50: pshufd xmm3, xmm0, 216
+; PROB50: nop # variant = 2
+; PROB50: movups xmmword ptr [esp], xmm3
+; PROB50: movups xmm0, xmmword ptr [esp]
+; PROB50: add esp, 60
+; PROB50: nop # variant = 0
+; PROB50: ret
+
+; PROB90-LABEL: mul_v4i32:
+; PROB90: nop # variant = 3
+; PROB90: sub esp, 60
+; PROB90: nop # variant = 4
+; PROB90: movups xmmword ptr [esp+32], xmm0
+; PROB90: nop # variant = 3
+; PROB90: movups xmmword ptr [esp+16], xmm1
+; PROB90: nop # variant = 2
+; PROB90: movups xmm0, xmmword ptr [esp+32]
+; PROB90: nop # variant = 3
+; PROB90: pshufd xmm1, xmmword ptr [esp+32], 49
+; PROB90: nop # variant = 4
+; PROB90: pshufd xmm2, xmmword ptr [esp+16], 49
+; PROB90: nop # variant = 0
+; PROB90: pmuludq xmm0, xmmword ptr [esp+16]
+; PROB90: nop # variant = 2
+; PROB90: pmuludq xmm1, xmm2
+; PROB90: nop # variant = 3
+; PROB90: shufps xmm0, xmm1, 136
+; PROB90: nop # variant = 4
+; PROB90: pshufd xmm3, xmm0, 216
+; PROB90: nop # variant = 2
+; PROB90: movups xmmword ptr [esp], xmm3
+; PROB90: nop # variant = 4
+; PROB90: movups xmm0, xmmword ptr [esp]
+; PROB90: nop # variant = 2
+; PROB90: add esp, 60
+; PROB90: nop # variant = 3
+; PROB90: ret
+
+; MAXNOPS2-LABEL: mul_v4i32:
+; MAXNOPS2: sub esp, 60
+; MAXNOPS2: nop # variant = 4
+; MAXNOPS2: movups xmmword ptr [esp+32], xmm0
+; MAXNOPS2: nop # variant = 0
+; MAXNOPS2: nop # variant = 4
+; MAXNOPS2: movups xmmword ptr [esp+16], xmm1
+; MAXNOPS2: movups xmm0, xmmword ptr [esp+32]
+; MAXNOPS2: nop # variant = 0
+; MAXNOPS2: pshufd xmm1, xmmword ptr [esp+32], 49
+; MAXNOPS2: nop # variant = 2
+; MAXNOPS2: pshufd xmm2, xmmword ptr [esp+16], 49
+; MAXNOPS2: pmuludq xmm0, xmmword ptr [esp+16]
+; MAXNOPS2: nop # variant = 0
+; MAXNOPS2: nop # variant = 3
+; MAXNOPS2: pmuludq xmm1, xmm2
+; MAXNOPS2: shufps xmm0, xmm1, 136
+; MAXNOPS2: pshufd xmm3, xmm0, 216
+; MAXNOPS2: nop # variant = 3
+; MAXNOPS2: movups xmmword ptr [esp], xmm3
+; MAXNOPS2: nop # variant = 0
+; MAXNOPS2: movups xmm0, xmmword ptr [esp]
+; MAXNOPS2: nop # variant = 2
+; MAXNOPS2: add esp, 60
+; MAXNOPS2: nop # variant = 4
+; MAXNOPS2: ret
+}