Subzero: Randomize register assignment.

Randomize the order that registers appear in the free list.  Only
randomize fully "equivalent" registers to ensure no extra spills.

This adds the -randomize-regalloc option.

This is a continuation of https://codereview.chromium.org/456033003/ which Matt owns.

BUG= none
R=jfb@chromium.org

Review URL: https://codereview.chromium.org/807293003
diff --git a/src/IceDefs.h b/src/IceDefs.h
index 4160fec..6434077 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -122,6 +122,7 @@
   IceV_LinearScan = 1 << 8,
   IceV_Frame = 1 << 9,
   IceV_AddrOpt = 1 << 10,
+  IceV_Random = 1 << 11,
   IceV_All = ~IceV_None,
   IceV_Most = IceV_All & ~IceV_LinearScan
 };
diff --git a/src/IceRNG.h b/src/IceRNG.h
index 5f862a8..bb2ac45 100644
--- a/src/IceRNG.h
+++ b/src/IceRNG.h
@@ -42,7 +42,7 @@
   operator=(const RandomNumberGeneratorWrapper &) = delete;
 
 public:
-  uint64_t next(uint64_t Max) { return RNG.next(Max); }
+  uint64_t operator()(uint64_t Max) { return RNG.next(Max); }
   bool getTrueWithProbability(float Probability);
   RandomNumberGeneratorWrapper(RandomNumberGenerator &RNG) : RNG(RNG) {}
 
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 2d5ff9b..3601846 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -141,7 +141,7 @@
 //
 // Some properties we aren't (yet) taking advantage of:
 //
-// * Because live ranges are a single segment, the Unhandled set will
+// * Because live ranges are a single segment, the Inactive set will
 //   always be empty, and the live range trimming operation is
 //   unnecessary.
 //
@@ -258,7 +258,8 @@
 // Requires running Cfg::liveness(Liveness_Intervals) in
 // preparation.  Results are assigned to Variable::RegNum for each
 // Variable.
-void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
+void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
+                      bool Randomized) {
   TimerMarker T(TimerStack::TT_linearScan, Func);
   assert(RegMaskFull.any()); // Sanity check
   Ostream &Str = Func->getContext()->getStrDump();
@@ -266,6 +267,13 @@
       ALLOW_DUMP && Func->getContext()->isVerbose(IceV_LinearScan);
   Func->resetCurrentNode();
   VariablesMetadata *VMetadata = Func->getVMetadata();
+  const size_t NumRegisters = RegMaskFull.size();
+  llvm::SmallBitVector PreDefinedRegisters(NumRegisters);
+  if (Randomized) {
+    for (Variable *Var : UnhandledPrecolored) {
+      PreDefinedRegisters[Var->getRegNum()] = true;
+    }
+  }
 
   // Build a LiveRange representing the Kills list.
   LiveRange KillsRange(Kills);
@@ -274,7 +282,7 @@
   // RegUses[I] is the number of live ranges (variables) that register
   // I is currently assigned to.  It can be greater than 1 as a result
   // of AllowOverlap inference below.
-  std::vector<int> RegUses(RegMaskFull.size());
+  std::vector<int> RegUses(NumRegisters);
   // Unhandled is already set to all ranges in increasing order of
   // start points.
   assert(Active.empty());
@@ -662,23 +670,39 @@
   Inactive.clear();
   dump(Func);
 
-  // Finish up by assigning RegNumTmp->RegNum for each Variable.
+  // TODO(stichnot): Statically choose the size based on the target
+  // being compiled.
+  const size_t REGS_SIZE = 32;
+  llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters);
+  if (Randomized) {
+    Func->getTarget()->makeRandomRegisterPermutation(
+        Permutation, PreDefinedRegisters | ~RegMaskFull);
+  }
+
+  // Finish up by assigning RegNumTmp->RegNum (or a random permutation
+  // thereof) for each Variable.
   for (Variable *Item : Handled) {
     int32_t RegNum = Item->getRegNumTmp();
+    int32_t AssignedRegNum = RegNum;
+
+    if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
+      AssignedRegNum = Permutation[RegNum];
+    }
     if (Verbose) {
       if (!Item->hasRegTmp()) {
         Str << "Not assigning ";
         Item->dump(Func);
         Str << "\n";
       } else {
-        Str << (RegNum == Item->getRegNum() ? "Reassigning " : "Assigning ")
-            << Func->getTarget()->getRegName(RegNum, IceType_i32) << "(r"
-            << RegNum << ") to ";
+        Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
+                                                    : "Assigning ")
+            << Func->getTarget()->getRegName(AssignedRegNum, IceType_i32)
+            << "(r" << AssignedRegNum << ") to ";
         Item->dump(Func);
         Str << "\n";
       }
     }
-    Item->setRegNum(Item->getRegNumTmp());
+    Item->setRegNum(AssignedRegNum);
   }
 
   // TODO: Consider running register allocation one more time, with
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
index 4ecae52..c5c1c9e 100644
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
@@ -29,7 +29,7 @@
   LinearScan(Cfg *Func)
       : Func(Func), FindPreference(false), FindOverlap(false) {}
   void init(RegAllocKind Kind);
-  void scan(const llvm::SmallBitVector &RegMask);
+  void scan(const llvm::SmallBitVector &RegMask, bool Randomized);
   void dump(Cfg *Func) const;
 
 private:
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index bab46f6..be1d2e1 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -29,6 +29,7 @@
 
 namespace {
 
+// TODO(stichnot): Move this machinery into llvm2ice.cpp.
 namespace cl = llvm::cl;
 cl::opt<bool> DoNopInsertion("nop-insertion", cl::desc("Randomly insert NOPs"),
                              cl::init(false));
@@ -40,6 +41,11 @@
 cl::opt<int> NopProbabilityAsPercentage(
     "nop-insertion-percentage",
     cl::desc("Nop insertion probability as percentage"), cl::init(10));
+
+cl::opt<bool>
+CLRandomizeRegisterAllocation("randomize-regalloc",
+                              cl::desc("Randomize register allocation"),
+                              cl::init(false));
 } // end of anonymous namespace
 
 void LoweringContext::init(CfgNode *N) {
@@ -95,6 +101,12 @@
   return NULL;
 }
 
+TargetLowering::TargetLowering(Cfg *Func)
+    : Func(Func), Ctx(Func->getContext()),
+      RandomizeRegisterAllocation(CLRandomizeRegisterAllocation),
+      HasComputedFrame(false), CallsReturnsTwice(false), StackAdjustment(0),
+      Context() {}
+
 Assembler *TargetLowering::createAssembler(TargetArch Target, Cfg *Func) {
   // These statements can be #ifdef'd to specialize the assembler
   // to a subset of the available targets.  TODO: use CRTP.
@@ -236,7 +248,7 @@
     RegExclude |= RegSet_FramePointer;
   LinearScan.init(Kind);
   llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
-  LinearScan.scan(RegMask);
+  LinearScan.scan(RegMask, RandomizeRegisterAllocation);
 }
 
 TargetGlobalInitLowering *
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 3e2243a..d2bd6f8 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -191,6 +191,10 @@
   virtual const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const = 0;
   void regAlloc(RegAllocKind Kind);
 
+  virtual void makeRandomRegisterPermutation(
+      llvm::SmallVectorImpl<int32_t> &Permutation,
+      const llvm::SmallBitVector &ExcludeRegisters) const = 0;
+
   virtual void emitVariable(const Variable *Var) const = 0;
 
   // Performs target-specific argument lowering.
@@ -204,9 +208,7 @@
   virtual ~TargetLowering() {}
 
 protected:
-  TargetLowering(Cfg *Func)
-      : Func(Func), Ctx(Func->getContext()), HasComputedFrame(false),
-        CallsReturnsTwice(false), StackAdjustment(0) {}
+  TargetLowering(Cfg *Func);
   virtual void lowerAlloca(const InstAlloca *Inst) = 0;
   virtual void lowerArithmetic(const InstArithmetic *Inst) = 0;
   virtual void lowerAssign(const InstAssign *Inst) = 0;
@@ -235,6 +237,7 @@
 
   Cfg *Func;
   GlobalContext *Ctx;
+  const bool RandomizeRegisterAllocation;
   bool HasComputedFrame;
   bool CallsReturnsTwice;
   // StackAdjustment keeps track of the current stack offset from its
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index d049cbd..71f860f 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -3869,7 +3869,7 @@
 void TargetX8632::randomlyInsertNop(float Probability) {
   RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
   if (RNG.getTrueWithProbability(Probability)) {
-    _nop(RNG.next(X86_NUM_NOP_VARIANTS));
+    _nop(RNG(X86_NUM_NOP_VARIANTS));
   }
 }
 
@@ -4530,6 +4530,72 @@
   }
 }
 
+void TargetX8632::makeRandomRegisterPermutation(
+    llvm::SmallVectorImpl<int32_t> &Permutation,
+    const llvm::SmallBitVector &ExcludeRegisters) const {
+  // TODO(stichnot): Declaring Permutation this way loses type/size
+  // information.  Fix this in conjunction with the caller-side TODO.
+  assert(Permutation.size() >= RegX8632::Reg_NUM);
+  // Expected upper bound on the number of registers in a single
+  // equivalence class.  For x86-32, this would comprise the 8 XMM
+  // registers.  This is for performance, not correctness.
+  static const unsigned MaxEquivalenceClassSize = 8;
+  typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
+  typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
+  EquivalenceClassMap EquivalenceClasses;
+  SizeT NumShuffled = 0, NumPreserved = 0;
+
+// Build up the equivalence classes of registers by looking at the
+// register properties as well as whether the registers should be
+// explicitly excluded from shuffling.
+#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
+          frameptr, isI8, isInt, isFP)                                         \
+  if (ExcludeRegisters[RegX8632::val]) {                                       \
+    /* val stays the same in the resulting permutation. */                     \
+    Permutation[RegX8632::val] = RegX8632::val;                                \
+    ++NumPreserved;                                                            \
+  } else {                                                                     \
+    const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) |   \
+                           (isInt << 3) | (isFP << 4);                         \
+    /* val is assigned to an equivalence class based on its properties. */     \
+    EquivalenceClasses[Index].push_back(RegX8632::val);                        \
+  }
+  REGX8632_TABLE
+#undef X
+
+  RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
+
+  // Shuffle the resulting equivalence classes.
+  for (auto I : EquivalenceClasses) {
+    const RegisterList &List = I.second;
+    RegisterList Shuffled(List);
+    std::random_shuffle(Shuffled.begin(), Shuffled.end(), RNG);
+    for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
+      Permutation[List[SI]] = Shuffled[SI];
+      ++NumShuffled;
+    }
+  }
+
+  assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
+
+  if (Func->getContext()->isVerbose(IceV_Random)) {
+    Ostream &Str = Func->getContext()->getStrDump();
+    Str << "Register equivalence classes:\n";
+    for (auto I : EquivalenceClasses) {
+      Str << "{";
+      const RegisterList &List = I.second;
+      bool First = true;
+      for (int32_t Register : List) {
+        if (!First)
+          Str << " ";
+        First = false;
+        Str << getRegName(Register, IceType_i32);
+      }
+      Str << "}\n";
+    }
+  }
+}
+
 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
   if (!ALLOW_DUMP)
     return;
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 2846ac9..6e2bed9 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -180,6 +180,10 @@
   OperandX8632Mem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
                                                 uint32_t Offset = 0);
 
+  void makeRandomRegisterPermutation(
+      llvm::SmallVectorImpl<int32_t> &Permutation,
+      const llvm::SmallBitVector &ExcludeRegisters) const;
+
   // The following are helpers that insert lowered x86 instructions
   // with minimal syntactic overhead, so that the lowering code can
   // look as close to assembly as practical.
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index 92bb839..53c90ee 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -47,6 +47,7 @@
         clEnumValN(Ice::IceV_LinearScan, "regalloc", "Linear scan details"),
         clEnumValN(Ice::IceV_Frame, "frame", "Stack frame layout details"),
         clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"),
+        clEnumValN(Ice::IceV_Random, "random", "Randomization details"),
         clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
         clEnumValN(Ice::IceV_Most, "most",
                    "Use all verbose options except 'regalloc' and 'time'"),