Subzero: Improve regalloc performance by optimizing UnhandledPrecolored.

A lot of time was being spent in the two loops that check precolored ranges in the Unhandled set, specifically in the endsBefore() check.

Solve this by keeping a shadow copy of Unhandled, restricted to the ranges that are precolored.

BUG= none
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/622553003
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 3a4c178..69353a1 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -68,6 +68,7 @@
   TimerMarker T(IDscan, Func->getContext());
   assert(RegMaskFull.any()); // Sanity check
   Unhandled.clear();
+  UnhandledPrecolored.clear();
   Handled.clear();
   Inactive.clear();
   Active.clear();
@@ -97,10 +98,12 @@
       // it was never referenced.
       if (Var->getLiveRange().isEmpty())
         continue;
-      Unhandled.insert(LiveRangeWrapper(Var));
+      LiveRangeWrapper R(Var);
+      Unhandled.insert(R);
       if (Var->hasReg()) {
         Var->setRegNumTmp(Var->getRegNum());
         Var->setLiveRangeInfiniteWeight();
+        UnhandledPrecolored.insert(R);
       }
     }
   }
@@ -145,6 +148,9 @@
       Active.push_back(Cur);
       assert(RegUses[RegNum] >= 0);
       ++RegUses[RegNum];
+      assert(!UnhandledPrecolored.empty());
+      assert(UnhandledPrecolored.begin()->Var == Cur.Var);
+      UnhandledPrecolored.erase(UnhandledPrecolored.begin());
       continue;
     }
 
@@ -306,19 +312,25 @@
       }
     }
 
-    // Remove registers from the Free[] list where an Unhandled range
-    // overlaps with the current range and is precolored.
-    // Cur.endsBefore(Item) is an early exit check that turns a
-    // guaranteed O(N^2) algorithm into expected linear complexity.
-    llvm::SmallBitVector PrecoloredUnhandled(RegMask.size());
-    // Note: PrecoloredUnhandled is only used for dumping.
-    for (const LiveRangeWrapper &Item : Unhandled) {
+    std::vector<RegWeight> Weights(RegMask.size());
+
+    // Remove registers from the Free[] list where an Unhandled
+    // precolored range overlaps with the current range, and set those
+    // registers to infinite weight so that they aren't candidates for
+    // eviction.  Cur.endsBefore(Item) is an early exit check that
+    // turns a guaranteed O(N^2) algorithm into expected linear
+    // complexity.
+    llvm::SmallBitVector PrecoloredUnhandledMask(RegMask.size());
+    // Note: PrecoloredUnhandledMask is only used for dumping.
+    for (const LiveRangeWrapper &Item : UnhandledPrecolored) {
+      assert(Item.Var->hasReg());
       if (Cur.endsBefore(Item))
         break;
-      if (Item.Var->hasReg() && Item.overlaps(Cur)) {
+      if (Item.overlaps(Cur)) {
         int32_t ItemReg = Item.Var->getRegNum(); // Note: not getRegNumTmp()
+        Weights[ItemReg].setWeight(RegWeight::Inf);
         Free[ItemReg] = false;
-        PrecoloredUnhandled[ItemReg] = true;
+        PrecoloredUnhandledMask[ItemReg] = true;
         // Disable AllowOverlap if the preferred register is one of
         // these precolored unhandled overlapping ranges.
         if (AllowOverlap && ItemReg == PreferReg) {
@@ -334,7 +346,7 @@
         if (RegMask[i]) {
           Str << Func->getTarget()->getRegName(i, IceType_i32)
               << "(U=" << RegUses[i] << ",F=" << Free[i]
-              << ",P=" << PrecoloredUnhandled[i] << ") ";
+              << ",P=" << PrecoloredUnhandledMask[i] << ") ";
         }
       }
       Str << "\n";
@@ -369,7 +381,6 @@
     } else {
       // Fallback: there are no free registers, so we look for the
       // lowest-weight register and see if Cur has higher weight.
-      std::vector<RegWeight> Weights(RegMask.size());
       // Check Active ranges.
       for (const LiveRangeWrapper &Item : Active) {
         assert(Item.overlaps(Cur));
@@ -384,18 +395,6 @@
         if (Item.overlaps(Cur))
           Weights[RegNum].addWeight(Item.range().getWeight());
       }
-      // Check Unhandled ranges that overlap Cur and are precolored.
-      // Cur.endsBefore(*I) is an early exit check that turns a
-      // guaranteed O(N^2) algorithm into expected linear complexity.
-      for (const LiveRangeWrapper &Item : Unhandled) {
-        if (Cur.endsBefore(Item))
-          break;
-        int32_t RegNum = Item.Var->getRegNumTmp();
-        if (RegNum < 0)
-          continue;
-        if (Item.overlaps(Cur))
-          Weights[RegNum].setWeight(RegWeight::Inf);
-      }
 
       // All the weights are now calculated.  Find the register with
       // smallest weight.
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
index 791b349..99ed908 100644
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
@@ -71,6 +71,9 @@
   typedef std::set<LiveRangeWrapper, RangeCompare> OrderedRanges;
   typedef std::list<LiveRangeWrapper> UnorderedRanges;
   OrderedRanges Unhandled;
+  // UnhandledPrecolored is a subset of Unhandled, specially collected
+  // for faster processing.
+  OrderedRanges UnhandledPrecolored;
   UnorderedRanges Active, Inactive, Handled;
   LinearScan(const LinearScan &) = delete;
   LinearScan &operator=(const LinearScan &) = delete;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 55f113b..3217141 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -4305,6 +4305,8 @@
 void TargetX8632::postLower() {
   if (Ctx->getOptLevel() != Opt_m1)
     return;
+  static TimerIdT IDpostLower = GlobalContext::getTimerID("postLower");
+  TimerMarker T(IDpostLower, Ctx);
   // TODO: Avoid recomputing WhiteList every instruction.
   RegSetMask RegInclude = RegSet_All;
   RegSetMask RegExclude = RegSet_StackPointer;