Add -reorder-basic-blocks option and fix nop insertion

1. Basic block reordering can be enabled with -reorder-basic-blocks option enabled.

Blocks will be sorted according to the Reversed Post Traversal Order, but the next
node to visit among all candidate children nodes is selected 'randomly'.

Example:
  A
 / \
B   C
 \ /
  D

This CFG can generate two possible layouts:
A-B-C-D or A-C-B-D

2. Fix nop insetion

Add checks to avoiding insertions in empty basic blocks(dead blocks) and bundle locked instructions.

BUG=
R=jpp@chromium.org, jvoung@chromium.org, stichnot@chromium.org

Review URL: https://codereview.chromium.org/1255303004.
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index ef49c15..c19d01a 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -371,6 +371,51 @@
   assert(Nodes.size() == OldSize);
 }
 
+namespace {
+void getRandomPostOrder(CfgNode *Node, llvm::BitVector &ToVisit,
+                        Ice::NodeList &PostOrder,
+                        Ice::RandomNumberGenerator *RNG) {
+  assert(ToVisit[Node->getIndex()]);
+  ToVisit[Node->getIndex()] = false;
+  NodeList Outs = Node->getOutEdges();
+  Ice::RandomShuffle(Outs.begin(), Outs.end(),
+                     [RNG](int N) { return RNG->next(N); });
+  for (CfgNode *Next : Outs) {
+    if (ToVisit[Next->getIndex()])
+      getRandomPostOrder(Next, ToVisit, PostOrder, RNG);
+  }
+  PostOrder.push_back(Node);
+}
+} // end of anonymous namespace
+
+void Cfg::shuffleNodes() {
+  if (!Ctx->getFlags().shouldReorderBasicBlocks())
+    return;
+
+  NodeList ReversedReachable;
+  NodeList Unreachable;
+  llvm::BitVector ToVisit(Nodes.size(), true);
+  // Traverse from entry node.
+  getRandomPostOrder(getEntryNode(), ToVisit, ReversedReachable,
+                     &Ctx->getRNG());
+  // Collect the unreachable nodes.
+  for (CfgNode *Node : Nodes)
+    if (ToVisit[Node->getIndex()])
+      Unreachable.push_back(Node);
+  // Copy the layout list to the Nodes.
+  SizeT OldSize = Nodes.size();
+  (void)OldSize;
+  Nodes.clear();
+  for (CfgNode *Node : reverse_range(ReversedReachable))
+    Nodes.emplace_back(Node);
+  for (CfgNode *Node : Unreachable) {
+    Nodes.emplace_back(Node);
+  }
+  assert(Nodes.size() == OldSize);
+
+  dump("After basic block shuffling");
+}
+
 void Cfg::doArgLowering() {
   TimerMarker T(TimerStack::TT_doArgLowering, this);
   getTarget()->lowerArguments();
@@ -383,6 +428,8 @@
 }
 
 void Cfg::doNopInsertion() {
+  if (!Ctx->getFlags().shouldDoNopInsertion())
+    return;
   TimerMarker T(TimerStack::TT_doNopInsertion, this);
   for (CfgNode *Node : Nodes)
     Node->doNopInsertion();
diff --git a/src/IceCfg.h b/src/IceCfg.h
index f2b5295..499e226 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -180,6 +180,7 @@
   void deletePhis();
   void advancedPhiLowering();
   void reorderNodes();
+  void shuffleNodes();
   void doAddressOpt();
   void doArgLowering();
   void doNopInsertion();
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 78063e3..cae5d37 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -500,18 +500,22 @@
   TargetLowering *Target = Func->getTarget();
   LoweringContext &Context = Target->getContext();
   Context.init(this);
+  Context.setInsertPoint(Context.getCur());
+  // Do not insert nop in bundle locked instructions.
+  bool PauseNopInsertion = false;
   while (!Context.atEnd()) {
-    Target->doNopInsertion();
+    if (llvm::isa<InstBundleLock>(Context.getCur())) {
+      PauseNopInsertion = true;
+    } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) {
+      PauseNopInsertion = false;
+    }
+    if (!PauseNopInsertion)
+      Target->doNopInsertion();
     // Ensure Cur=Next, so that the nops are inserted before the current
     // instruction rather than after.
-    Context.advanceNext();
     Context.advanceCur();
+    Context.advanceNext();
   }
-  // Insert before all instructions.
-  Context.setInsertPoint(getInsts().begin());
-  Context.advanceNext();
-  Context.advanceCur();
-  Target->doNopInsertion();
 }
 
 // Drives the target lowering.  Passes the current instruction and the
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index 5a4abe6..5066a4b 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -284,6 +284,12 @@
     cl::desc("The threshold for immediates randomization and pooling"),
     cl::init(0xffff));
 
+// Command line option for turning on basic block shuffling.
+cl::opt<bool> ReorderBasicBlocks(
+    "reorder-basic-blocks",
+    cl::desc("Shuffle the layout of basic blocks in each functions"),
+    cl::init(false));
+
 // Command line option for turning on function layout reordering.
 cl::opt<bool> ReorderFunctions(
     "reorder-functions",
@@ -341,6 +347,7 @@
   OutFlags.PhiEdgeSplit = false;
   OutFlags.RandomNopInsertion = false;
   OutFlags.RandomRegAlloc = false;
+  OutFlags.ReorderBasicBlocks = false;
   OutFlags.ReorderFunctions = false;
   OutFlags.ReorderGlobalVariables = false;
   OutFlags.ReorderPooledConstants = false;
@@ -398,8 +405,17 @@
   OutFlags.setOptLevel(::OLevel);
   OutFlags.setPhiEdgeSplit(::EnablePhiEdgeSplit);
   OutFlags.setRandomSeed(::RandomSeed);
+  OutFlags.setRandomizeAndPoolImmediatesOption(
+      ::RandomizeAndPoolImmediatesOption);
+  OutFlags.setRandomizeAndPoolImmediatesThreshold(
+      ::RandomizeAndPoolImmediatesThreshold);
+  OutFlags.setReorderFunctionsWindowSize(::ReorderFunctionsWindowSize);
+  OutFlags.setShouldReorderBasicBlocks(::ReorderBasicBlocks);
   OutFlags.setShouldDoNopInsertion(::ShouldDoNopInsertion);
   OutFlags.setShouldRandomizeRegAlloc(::RandomizeRegisterAllocation);
+  OutFlags.setShouldReorderFunctions(::ReorderFunctions);
+  OutFlags.setShouldReorderGlobalVariables(::ReorderGlobalVariables);
+  OutFlags.setShouldReorderPooledConstants(::ReorderPooledConstants);
   OutFlags.setSkipUnimplemented(::SkipUnimplemented);
   OutFlags.setSubzeroTimingEnabled(::SubzeroTimingEnabled);
   OutFlags.setTargetArch(::TargetArch);
@@ -414,22 +430,6 @@
   OutFlags.setMaxNopsPerInstruction(::MaxNopsPerInstruction);
   OutFlags.setNopProbabilityAsPercentage(::NopProbabilityAsPercentage);
   OutFlags.setVerbose(VMask);
-
-  // Set for immediates randomization or pooling option.
-  OutFlags.setRandomizeAndPoolImmediatesOption(
-      ::RandomizeAndPoolImmediatesOption);
-  OutFlags.setRandomizeAndPoolImmediatesThreshold(
-      ::RandomizeAndPoolImmediatesThreshold);
-
-  // Set for function reordering options.
-  OutFlags.setShouldReorderFunctions(::ReorderFunctions);
-  OutFlags.setReorderFunctionsWindowSize(::ReorderFunctionsWindowSize);
-
-  // Set for global variable reordering option.
-  OutFlags.setShouldReorderGlobalVariables(::ReorderGlobalVariables);
-
-  // Set for pooled constant reordering option.
-  OutFlags.setShouldReorderPooledConstants(::ReorderPooledConstants);
 }
 
 void ClFlags::getParsedClFlagsExtra(ClFlagsExtra &OutFlagsExtra) {
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 549bf16..244790b 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -153,6 +153,11 @@
     return RandomizeAndPoolImmediatesThreshold;
   }
 
+  bool shouldReorderBasicBlocks() const { return ReorderBasicBlocks; }
+  void setShouldReorderBasicBlocks(bool NewValue) {
+    ReorderBasicBlocks = NewValue;
+  }
+
   void setShouldReorderFunctions(bool Option) { ReorderFunctions = Option; }
   bool shouldReorderFunctions() const { return ReorderFunctions; }
 
@@ -229,6 +234,7 @@
   bool PhiEdgeSplit;
   bool RandomNopInsertion;
   bool RandomRegAlloc;
+  bool ReorderBasicBlocks;
   bool ReorderFunctions;
   bool ReorderGlobalVariables;
   bool ReorderPooledConstants;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index b4c4ea3..3c7ad5e 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -400,6 +400,9 @@
   Func->contractEmptyNodes();
   Func->reorderNodes();
 
+  // Shuffle basic block order if -reorder-basic-blocks is enabled.
+  Func->shuffleNodes();
+
   // Branch optimization.  This needs to be done just before code emission.  In
   // particular, no transformations that insert or reorder CfgNodes should be
   // done after branch optimization.  We go ahead and do it before nop insertion
@@ -407,9 +410,8 @@
   Func->doBranchOpt();
   Func->dump("After branch optimization");
 
-  // Nop insertion
-  if (Ctx->getFlags().shouldDoNopInsertion())
-    Func->doNopInsertion();
+  // Nop insertion if -nop-insertion is enabled.
+  Func->doNopInsertion();
 
   // Mark nodes that require sandbox alignment
   if (Ctx->getFlags().getUseSandboxing())
@@ -446,10 +448,11 @@
     return;
   Func->dump("After stack frame mapping");
 
-  // Nop insertion
-  if (Ctx->getFlags().shouldDoNopInsertion()) {
-    Func->doNopInsertion();
-  }
+  // Shuffle basic block order if -reorder-basic-blocks is enabled.
+  Func->shuffleNodes();
+
+  // Nop insertion if -nop-insertion is enabled.
+  Func->doNopInsertion();
 
   // Mark nodes that require sandbox alignment
   if (Ctx->getFlags().getUseSandboxing())