Subzero: Randomly insert nops. Adds command line options -nop-insertion, -nop-insertion-probability=X, and -max-nops-per-instruction=X. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/463563006

commit: c3302746b21dcb4469efef8d484adba47c9d87e9 [log] [tgz]
author: Matt Wala <wala@chromium.org> Fri Aug 15 16:21:56 2014 -0700
committer: Matt Wala <wala@chromium.org> Fri Aug 15 16:21:56 2014 -0700
tree: 5746d018deb6c088b6d19d787c3c86ab4c6bc118
parent: 9dbe38e31e72aec9869effaa325b2bab23eb8816 [diff]
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 49e9806..faaabe9 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp

@@ -121,6 +121,12 @@
   }
 }
 
+void Cfg::doNopInsertion() {
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    (*I)->doNopInsertion();
+  }
+}
+
 void Cfg::genCode() {
   for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
     (*I)->genCode();

diff --git a/src/IceCfg.h b/src/IceCfg.h
index 856b145..e12bc9d 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h

@@ -88,6 +88,7 @@
   void deletePhis();
   void doAddressOpt();
   void doArgLowering();
+  void doNopInsertion();
   void genCode();
   void genFrame();
   void livenessLightweight();

diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 22915b2..696a2f0 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp

@@ -191,6 +191,24 @@
   }
 }
 
+void CfgNode::doNopInsertion() {
+  TargetLowering *Target = Func->getTarget();
+  LoweringContext &Context = Target->getContext();
+  Context.init(this);
+  while (!Context.atEnd()) {
+    Target->doNopInsertion();
+    // Ensure Cur=Next, so that the nops are inserted before the current
+    // instruction rather than after.
+    Context.advanceNext();
+    Context.advanceCur();
+  }
+  // Insert before all instructions.
+  Context.setInsertPoint(getInsts().begin());
+  Context.advanceNext();
+  Context.advanceCur();
+  Target->doNopInsertion();
+}
+
 // Drives the target lowering.  Passes the current instruction and the
 // next non-deleted instruction for target lowering.
 void CfgNode::genCode() {

diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index ea50dca..bf11844 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h

@@ -55,6 +55,7 @@
   void placePhiStores();
   void deletePhis();
   void doAddressOpt();
+  void doNopInsertion();
   void genCode();
   void livenessLightweight();
   bool liveness(Liveness *Liveness);

diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 0b99c80..a1a4d68 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp

@@ -241,6 +241,9 @@
   addSource(Source);
 }
 
+InstX8632Nop::InstX8632Nop(Cfg *Func, InstX8632Nop::NopVariant Variant)
+    : InstX8632(Func, InstX8632::Nop, 0, NULL), Variant(Variant) {}
+
 InstX8632Fld::InstX8632Fld(Cfg *Func, Operand *Src)
     : InstX8632(Func, InstX8632::Fld, 1, NULL) {
   addSource(Src);
@@ -1058,6 +1061,17 @@
   dumpSources(Func);
 }
 
+void InstX8632Nop::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  // TODO: Emit the right code for each variant.
+  Str << "\tnop\t# variant = " << Variant << "\n";
+}
+
+void InstX8632Nop::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "nop (variant = " << Variant << ")";
+}
+
 void InstX8632Fld::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 1);

diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 64ce4a0..834e061 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h

@@ -175,6 +175,7 @@
     Mulps,
     Mulss,
     Neg,
+    Nop,
     Or,
     Padd,
     Pand,
@@ -1060,6 +1061,28 @@
   virtual ~InstX8632Movzx() {}
 };
 
+// Nop instructions of varying length
+class InstX8632Nop : public InstX8632 {
+public:
+  // TODO: Replace with enum.
+  typedef unsigned NopVariant;
+
+  static InstX8632Nop *create(Cfg *Func, NopVariant Variant) {
+    return new (Func->allocate<InstX8632Nop>()) InstX8632Nop(Func, Variant);
+  }
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Nop); }
+
+private:
+  InstX8632Nop(Cfg *Func, SizeT Length);
+  InstX8632Nop(const InstX8632Nop &) LLVM_DELETED_FUNCTION;
+  InstX8632Nop &operator=(const InstX8632Nop &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632Nop() {}
+
+  NopVariant Variant;
+};
+
 // Fld - load a value onto the x87 FP stack.
 class InstX8632Fld : public InstX8632 {
 public:

diff --git a/src/IceRNG.cpp b/src/IceRNG.cpp
index 6a9f515..b874d83 100644
--- a/src/IceRNG.cpp
+++ b/src/IceRNG.cpp

@@ -26,6 +26,8 @@
 RandomSeed("rng-seed", cl::desc("Seed the random number generator"),
            cl::init(time(0)));
 
+const unsigned MAX = 2147483647;
+
 } // end of anonymous namespace
 
 // TODO(wala,stichnot): Switch to RNG implementation from LLVM or C++11.
@@ -39,8 +41,12 @@
 
 uint64_t RandomNumberGenerator::next(uint64_t Max) {
   // Lewis, Goodman, and Miller (1969)
-  State = (16807 * State) % 2147483647;
+  State = (16807 * State) % MAX;
   return State % Max;
 }
 
+bool RandomNumberGeneratorWrapper::getTrueWithProbability(float Probability) {
+  return RNG.next(MAX) < Probability * MAX;
+}
+
 } // end of namespace Ice

diff --git a/src/IceRNG.h b/src/IceRNG.h
index 423aee0..4d6c397 100644
--- a/src/IceRNG.h
+++ b/src/IceRNG.h

@@ -16,6 +16,7 @@
 
 #include <stdint.h>
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
 
 namespace Ice {
 
@@ -25,9 +26,31 @@
   uint64_t next(uint64_t Max);
 
 private:
+  RandomNumberGenerator(const RandomNumberGenerator &) LLVM_DELETED_FUNCTION;
+  RandomNumberGenerator &
+  operator=(const RandomNumberGenerator &) LLVM_DELETED_FUNCTION;
+
   uint64_t State;
 };
 
+// This class adds additional random number generator utilities. The
+// reason for the wrapper class is that we want to keep the
+// RandomNumberGenerator interface identical to LLVM's.
+class RandomNumberGeneratorWrapper {
+public:
+  uint64_t next(uint64_t Max) { return RNG.next(Max); }
+  bool getTrueWithProbability(float Probability);
+  RandomNumberGeneratorWrapper(RandomNumberGenerator &RNG) : RNG(RNG) {}
+
+private:
+  RandomNumberGeneratorWrapper(const RandomNumberGeneratorWrapper &)
+      LLVM_DELETED_FUNCTION;
+  RandomNumberGeneratorWrapper &
+  operator=(const RandomNumberGeneratorWrapper &) LLVM_DELETED_FUNCTION;
+
+  RandomNumberGenerator &RNG;
+};
+
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICERNG_H

diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 0034de5..057a3ea 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp

@@ -22,8 +22,25 @@
 #include "IceTargetLowering.h"
 #include "IceTargetLoweringX8632.h"
 
+#include "llvm/Support/CommandLine.h"
+
 namespace Ice {
 
+namespace {
+
+namespace cl = llvm::cl;
+cl::opt<bool> DoNopInsertion("nop-insertion", cl::desc("Randomly insert NOPs"),
+                             cl::init(false));
+
+cl::opt<int> MaxNopsPerInstruction(
+    "max-nops-per-instruction",
+    cl::desc("Max number of nops to insert per instruction"), cl::init(1));
+
+cl::opt<int> NopProbabilityAsPercentage(
+    "nop-insertion-percentage",
+    cl::desc("Nop insertion probability as percentage"), cl::init(10));
+} // end of anonymous namespace
+
 void LoweringContext::init(CfgNode *N) {
   Node = N;
   Begin = getNode()->getInsts().begin();
@@ -90,6 +107,20 @@
   Context.advanceNext();
 }
 
+bool TargetLowering::shouldDoNopInsertion() const { return DoNopInsertion; }
+
+void TargetLowering::doNopInsertion() {
+  Inst *I = *Context.getCur();
+  bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) ||
+                    llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() ||
+                    I->isDeleted();
+  if (!ShouldSkip) {
+    for (int I = 0; I < MaxNopsPerInstruction; ++I) {
+      randomlyInsertNop(NopProbabilityAsPercentage / 100.0);
+    }
+  }
+}
+
 // Lowers a single instruction according to the information in
 // Context, by checking the Context.Cur instruction kind and calling
 // the appropriate lowering method.  The lowering method should insert

diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 39e08de..6bf1424 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h

@@ -121,6 +121,8 @@
 
   // Tries to do address mode optimization on a single instruction.
   void doAddressOpt();
+  // Randomly insert NOPs.
+  void doNopInsertion();
   // Lowers a single instruction.
   void lower();
 
@@ -136,6 +138,7 @@
   virtual SizeT getFrameOrStackReg() const = 0;
   virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
   bool hasComputedFrame() const { return HasComputedFrame; }
+  bool shouldDoNopInsertion() const;
   int32_t getStackAdjustment() const { return StackAdjustment; }
   void updateStackAdjustment(int32_t Offset) { StackAdjustment += Offset; }
   void resetStackAdjustment() { StackAdjustment = 0; }
@@ -193,6 +196,7 @@
 
   virtual void doAddressOptLoad() {}
   virtual void doAddressOptStore() {}
+  virtual void randomlyInsertNop(float Probability) = 0;
   // This gives the target an opportunity to post-process the lowered
   // expansion before returning.  The primary intention is to do some
   // Register Manager activity as necessary, specifically to eagerly

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 35792aa..d36ad16 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp

@@ -134,6 +134,8 @@
 const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
 // The base 2 logarithm of the width in bytes of the largest stack slot
 const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
+// The number of different NOP instructions
+const uint32_t X86_NUM_NOP_VARIANTS = 5;
 
 // Value and Alignment are in bytes.  Return Value adjusted to the next
 // highest multiple of Alignment.
@@ -391,6 +393,11 @@
     return;
   T_genFrame.printElapsedUs(Context, "genFrame()");
   Func->dump("After stack frame mapping");
+
+  // Nop insertion
+  if (shouldDoNopInsertion()) {
+    Func->doNopInsertion();
+  }
 }
 
 void TargetX8632::translateOm1() {
@@ -429,6 +436,11 @@
     return;
   T_genFrame.printElapsedUs(Context, "genFrame()");
   Func->dump("After stack frame mapping");
+
+  // Nop insertion
+  if (shouldDoNopInsertion()) {
+    Func->doNopInsertion();
+  }
 }
 
 IceString TargetX8632::RegNames[] = {
@@ -3670,6 +3682,13 @@
   }
 }
 
+void TargetX8632::randomlyInsertNop(float Probability) {
+  RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
+  if (RNG.getTrueWithProbability(Probability)) {
+    _nop(RNG.next(X86_NUM_NOP_VARIANTS));
+  }
+}
+
 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
   Func->setError("Phi found in regular instruction list");
 }

diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index ebe6ba9..499790e 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h

@@ -104,6 +104,7 @@
   virtual void lowerUnreachable(const InstUnreachable *Inst);
   virtual void doAddressOptLoad();
   virtual void doAddressOptStore();
+  virtual void randomlyInsertNop(float Probability);
 
   // Naive lowering of cmpxchg.
   void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected,
@@ -327,6 +328,9 @@
   void _neg(Variable *SrcDest) {
     Context.insert(InstX8632Neg::create(Func, SrcDest));
   }
+  void _nop(SizeT Variant) {
+    Context.insert(InstX8632Nop::create(Func, Variant));
+  }
   void _or(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Or::create(Func, Dest, Src0));
   }

diff --git a/tests_lit/llvm2ice_tests/nop-insertion.ll b/tests_lit/llvm2ice_tests/nop-insertion.ll
new file mode 100644
index 0000000..8c0d242
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/nop-insertion.ll

@@ -0,0 +1,93 @@
+; This is a smoke test of nop insertion.
+
+; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \
+; RUN:    -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB50
+; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=90 \
+; RUN:    -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB90
+; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \
+; RUN:    -max-nops-per-instruction=2 %s | FileCheck %s --check-prefix=MAXNOPS2
+
+define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
+entry:
+  %res = mul <4 x i32> %a, %b
+  ret <4 x i32> %res
+; PROB50-LABEL: mul_v4i32:
+; PROB50: nop # variant = 3
+; PROB50: sub esp, 60
+; PROB50: nop # variant = 4
+; PROB50: movups xmmword ptr [esp+32], xmm0
+; PROB50: movups xmmword ptr [esp+16], xmm1
+; PROB50: nop # variant = 0
+; PROB50: movups xmm0, xmmword ptr [esp+32]
+; PROB50: nop # variant = 4
+; PROB50: pshufd xmm1, xmmword ptr [esp+32], 49
+; PROB50: pshufd xmm2, xmmword ptr [esp+16], 49
+; PROB50: pmuludq xmm0, xmmword ptr [esp+16]
+; PROB50: pmuludq xmm1, xmm2
+; PROB50: nop # variant = 0
+; PROB50: shufps xmm0, xmm1, 136
+; PROB50: pshufd xmm3, xmm0, 216
+; PROB50: nop # variant = 2
+; PROB50: movups xmmword ptr [esp], xmm3
+; PROB50: movups xmm0, xmmword ptr [esp]
+; PROB50: add esp, 60
+; PROB50: nop # variant = 0
+; PROB50: ret
+
+; PROB90-LABEL: mul_v4i32:
+; PROB90: nop # variant = 3
+; PROB90: sub esp, 60
+; PROB90: nop # variant = 4
+; PROB90: movups xmmword ptr [esp+32], xmm0
+; PROB90: nop # variant = 3
+; PROB90: movups xmmword ptr [esp+16], xmm1
+; PROB90: nop # variant = 2
+; PROB90: movups xmm0, xmmword ptr [esp+32]
+; PROB90: nop # variant = 3
+; PROB90: pshufd xmm1, xmmword ptr [esp+32], 49
+; PROB90: nop # variant = 4
+; PROB90: pshufd xmm2, xmmword ptr [esp+16], 49
+; PROB90: nop # variant = 0
+; PROB90: pmuludq xmm0, xmmword ptr [esp+16]
+; PROB90: nop # variant = 2
+; PROB90: pmuludq xmm1, xmm2
+; PROB90: nop # variant = 3
+; PROB90: shufps xmm0, xmm1, 136
+; PROB90: nop # variant = 4
+; PROB90: pshufd xmm3, xmm0, 216
+; PROB90: nop # variant = 2
+; PROB90: movups xmmword ptr [esp], xmm3
+; PROB90: nop # variant = 4
+; PROB90: movups xmm0, xmmword ptr [esp]
+; PROB90: nop # variant = 2
+; PROB90: add esp, 60
+; PROB90: nop # variant = 3
+; PROB90: ret
+
+; MAXNOPS2-LABEL: mul_v4i32:
+; MAXNOPS2: sub esp, 60
+; MAXNOPS2: nop # variant = 4
+; MAXNOPS2: movups xmmword ptr [esp+32], xmm0
+; MAXNOPS2: nop # variant = 0
+; MAXNOPS2: nop # variant = 4
+; MAXNOPS2: movups xmmword ptr [esp+16], xmm1
+; MAXNOPS2: movups xmm0, xmmword ptr [esp+32]
+; MAXNOPS2: nop # variant = 0
+; MAXNOPS2: pshufd xmm1, xmmword ptr [esp+32], 49
+; MAXNOPS2: nop # variant = 2
+; MAXNOPS2: pshufd xmm2, xmmword ptr [esp+16], 49
+; MAXNOPS2: pmuludq xmm0, xmmword ptr [esp+16]
+; MAXNOPS2: nop # variant = 0
+; MAXNOPS2: nop # variant = 3
+; MAXNOPS2: pmuludq xmm1, xmm2
+; MAXNOPS2: shufps xmm0, xmm1, 136
+; MAXNOPS2: pshufd xmm3, xmm0, 216
+; MAXNOPS2: nop # variant = 3
+; MAXNOPS2: movups xmmword ptr [esp], xmm3
+; MAXNOPS2: nop # variant = 0
+; MAXNOPS2: movups xmm0, xmmword ptr [esp]
+; MAXNOPS2: nop # variant = 2
+; MAXNOPS2: add esp, 60
+; MAXNOPS2: nop # variant = 4
+; MAXNOPS2: ret
+}
commit	c3302746b21dcb4469efef8d484adba47c9d87e9	[log] [tgz]
author	Matt Wala <wala@chromium.org>	Fri Aug 15 16:21:56 2014 -0700
committer	Matt Wala <wala@chromium.org>	Fri Aug 15 16:21:56 2014 -0700
tree	5746d018deb6c088b6d19d787c3c86ab4c6bc118
parent	9dbe38e31e72aec9869effaa325b2bab23eb8816 [diff]