Subzero: Improve regalloc performance by optimizing UnhandledPrecolored.
A lot of time was being spent in the two loops that check precolored ranges in the Unhandled set, specifically in the endsBefore() check.
Solve this by keeping a shadow copy of Unhandled, restricted to the ranges that are precolored.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/622553003
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 3a4c178..69353a1 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -68,6 +68,7 @@
TimerMarker T(IDscan, Func->getContext());
assert(RegMaskFull.any()); // Sanity check
Unhandled.clear();
+ UnhandledPrecolored.clear();
Handled.clear();
Inactive.clear();
Active.clear();
@@ -97,10 +98,12 @@
// it was never referenced.
if (Var->getLiveRange().isEmpty())
continue;
- Unhandled.insert(LiveRangeWrapper(Var));
+ LiveRangeWrapper R(Var);
+ Unhandled.insert(R);
if (Var->hasReg()) {
Var->setRegNumTmp(Var->getRegNum());
Var->setLiveRangeInfiniteWeight();
+ UnhandledPrecolored.insert(R);
}
}
}
@@ -145,6 +148,9 @@
Active.push_back(Cur);
assert(RegUses[RegNum] >= 0);
++RegUses[RegNum];
+ assert(!UnhandledPrecolored.empty());
+ assert(UnhandledPrecolored.begin()->Var == Cur.Var);
+ UnhandledPrecolored.erase(UnhandledPrecolored.begin());
continue;
}
@@ -306,19 +312,25 @@
}
}
- // Remove registers from the Free[] list where an Unhandled range
- // overlaps with the current range and is precolored.
- // Cur.endsBefore(Item) is an early exit check that turns a
- // guaranteed O(N^2) algorithm into expected linear complexity.
- llvm::SmallBitVector PrecoloredUnhandled(RegMask.size());
- // Note: PrecoloredUnhandled is only used for dumping.
- for (const LiveRangeWrapper &Item : Unhandled) {
+ std::vector<RegWeight> Weights(RegMask.size());
+
+ // Remove registers from the Free[] list where an Unhandled
+ // precolored range overlaps with the current range, and set those
+ // registers to infinite weight so that they aren't candidates for
+ // eviction. Cur.endsBefore(Item) is an early exit check that
+ // turns a guaranteed O(N^2) algorithm into expected linear
+ // complexity.
+ llvm::SmallBitVector PrecoloredUnhandledMask(RegMask.size());
+ // Note: PrecoloredUnhandledMask is only used for dumping.
+ for (const LiveRangeWrapper &Item : UnhandledPrecolored) {
+ assert(Item.Var->hasReg());
if (Cur.endsBefore(Item))
break;
- if (Item.Var->hasReg() && Item.overlaps(Cur)) {
+ if (Item.overlaps(Cur)) {
int32_t ItemReg = Item.Var->getRegNum(); // Note: not getRegNumTmp()
+ Weights[ItemReg].setWeight(RegWeight::Inf);
Free[ItemReg] = false;
- PrecoloredUnhandled[ItemReg] = true;
+ PrecoloredUnhandledMask[ItemReg] = true;
// Disable AllowOverlap if the preferred register is one of
// these precolored unhandled overlapping ranges.
if (AllowOverlap && ItemReg == PreferReg) {
@@ -334,7 +346,7 @@
if (RegMask[i]) {
Str << Func->getTarget()->getRegName(i, IceType_i32)
<< "(U=" << RegUses[i] << ",F=" << Free[i]
- << ",P=" << PrecoloredUnhandled[i] << ") ";
+ << ",P=" << PrecoloredUnhandledMask[i] << ") ";
}
}
Str << "\n";
@@ -369,7 +381,6 @@
} else {
// Fallback: there are no free registers, so we look for the
// lowest-weight register and see if Cur has higher weight.
- std::vector<RegWeight> Weights(RegMask.size());
// Check Active ranges.
for (const LiveRangeWrapper &Item : Active) {
assert(Item.overlaps(Cur));
@@ -384,18 +395,6 @@
if (Item.overlaps(Cur))
Weights[RegNum].addWeight(Item.range().getWeight());
}
- // Check Unhandled ranges that overlap Cur and are precolored.
- // Cur.endsBefore(*I) is an early exit check that turns a
- // guaranteed O(N^2) algorithm into expected linear complexity.
- for (const LiveRangeWrapper &Item : Unhandled) {
- if (Cur.endsBefore(Item))
- break;
- int32_t RegNum = Item.Var->getRegNumTmp();
- if (RegNum < 0)
- continue;
- if (Item.overlaps(Cur))
- Weights[RegNum].setWeight(RegWeight::Inf);
- }
// All the weights are now calculated. Find the register with
// smallest weight.
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
index 791b349..99ed908 100644
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
@@ -71,6 +71,9 @@
typedef std::set<LiveRangeWrapper, RangeCompare> OrderedRanges;
typedef std::list<LiveRangeWrapper> UnorderedRanges;
OrderedRanges Unhandled;
+ // UnhandledPrecolored is a subset of Unhandled, specially collected
+ // for faster processing.
+ OrderedRanges UnhandledPrecolored;
UnorderedRanges Active, Inactive, Handled;
LinearScan(const LinearScan &) = delete;
LinearScan &operator=(const LinearScan &) = delete;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 55f113b..3217141 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -4305,6 +4305,8 @@
void TargetX8632::postLower() {
if (Ctx->getOptLevel() != Opt_m1)
return;
+ static TimerIdT IDpostLower = GlobalContext::getTimerID("postLower");
+ TimerMarker T(IDpostLower, Ctx);
// TODO: Avoid recomputing WhiteList every instruction.
RegSetMask RegInclude = RegSet_All;
RegSetMask RegExclude = RegSet_StackPointer;