Subzero: Improve regalloc performance by optimizing UnhandledPrecolored. A lot of time was being spent in the two loops that check precolored ranges in the Unhandled set, specifically in the endsBefore() check. Solve this by keeping a shadow copy of Unhandled, restricted to the ranges that are precolored. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/622553003
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp index 3a4c178..69353a1 100644 --- a/src/IceRegAlloc.cpp +++ b/src/IceRegAlloc.cpp
@@ -68,6 +68,7 @@ TimerMarker T(IDscan, Func->getContext()); assert(RegMaskFull.any()); // Sanity check Unhandled.clear(); + UnhandledPrecolored.clear(); Handled.clear(); Inactive.clear(); Active.clear(); @@ -97,10 +98,12 @@ // it was never referenced. if (Var->getLiveRange().isEmpty()) continue; - Unhandled.insert(LiveRangeWrapper(Var)); + LiveRangeWrapper R(Var); + Unhandled.insert(R); if (Var->hasReg()) { Var->setRegNumTmp(Var->getRegNum()); Var->setLiveRangeInfiniteWeight(); + UnhandledPrecolored.insert(R); } } } @@ -145,6 +148,9 @@ Active.push_back(Cur); assert(RegUses[RegNum] >= 0); ++RegUses[RegNum]; + assert(!UnhandledPrecolored.empty()); + assert(UnhandledPrecolored.begin()->Var == Cur.Var); + UnhandledPrecolored.erase(UnhandledPrecolored.begin()); continue; } @@ -306,19 +312,25 @@ } } - // Remove registers from the Free[] list where an Unhandled range - // overlaps with the current range and is precolored. - // Cur.endsBefore(Item) is an early exit check that turns a - // guaranteed O(N^2) algorithm into expected linear complexity. - llvm::SmallBitVector PrecoloredUnhandled(RegMask.size()); - // Note: PrecoloredUnhandled is only used for dumping. - for (const LiveRangeWrapper &Item : Unhandled) { + std::vector<RegWeight> Weights(RegMask.size()); + + // Remove registers from the Free[] list where an Unhandled + // precolored range overlaps with the current range, and set those + // registers to infinite weight so that they aren't candidates for + // eviction. Cur.endsBefore(Item) is an early exit check that + // turns a guaranteed O(N^2) algorithm into expected linear + // complexity. + llvm::SmallBitVector PrecoloredUnhandledMask(RegMask.size()); + // Note: PrecoloredUnhandledMask is only used for dumping. + for (const LiveRangeWrapper &Item : UnhandledPrecolored) { + assert(Item.Var->hasReg()); if (Cur.endsBefore(Item)) break; - if (Item.Var->hasReg() && Item.overlaps(Cur)) { + if (Item.overlaps(Cur)) { int32_t ItemReg = Item.Var->getRegNum(); // Note: not getRegNumTmp() + Weights[ItemReg].setWeight(RegWeight::Inf); Free[ItemReg] = false; - PrecoloredUnhandled[ItemReg] = true; + PrecoloredUnhandledMask[ItemReg] = true; // Disable AllowOverlap if the preferred register is one of // these precolored unhandled overlapping ranges. if (AllowOverlap && ItemReg == PreferReg) { @@ -334,7 +346,7 @@ if (RegMask[i]) { Str << Func->getTarget()->getRegName(i, IceType_i32) << "(U=" << RegUses[i] << ",F=" << Free[i] - << ",P=" << PrecoloredUnhandled[i] << ") "; + << ",P=" << PrecoloredUnhandledMask[i] << ") "; } } Str << "\n"; @@ -369,7 +381,6 @@ } else { // Fallback: there are no free registers, so we look for the // lowest-weight register and see if Cur has higher weight. - std::vector<RegWeight> Weights(RegMask.size()); // Check Active ranges. for (const LiveRangeWrapper &Item : Active) { assert(Item.overlaps(Cur)); @@ -384,18 +395,6 @@ if (Item.overlaps(Cur)) Weights[RegNum].addWeight(Item.range().getWeight()); } - // Check Unhandled ranges that overlap Cur and are precolored. - // Cur.endsBefore(*I) is an early exit check that turns a - // guaranteed O(N^2) algorithm into expected linear complexity. - for (const LiveRangeWrapper &Item : Unhandled) { - if (Cur.endsBefore(Item)) - break; - int32_t RegNum = Item.Var->getRegNumTmp(); - if (RegNum < 0) - continue; - if (Item.overlaps(Cur)) - Weights[RegNum].setWeight(RegWeight::Inf); - } // All the weights are now calculated. Find the register with // smallest weight.
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h index 791b349..99ed908 100644 --- a/src/IceRegAlloc.h +++ b/src/IceRegAlloc.h
@@ -71,6 +71,9 @@ typedef std::set<LiveRangeWrapper, RangeCompare> OrderedRanges; typedef std::list<LiveRangeWrapper> UnorderedRanges; OrderedRanges Unhandled; + // UnhandledPrecolored is a subset of Unhandled, specially collected + // for faster processing. + OrderedRanges UnhandledPrecolored; UnorderedRanges Active, Inactive, Handled; LinearScan(const LinearScan &) = delete; LinearScan &operator=(const LinearScan &) = delete;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp index 55f113b..3217141 100644 --- a/src/IceTargetLoweringX8632.cpp +++ b/src/IceTargetLoweringX8632.cpp
@@ -4305,6 +4305,8 @@ void TargetX8632::postLower() { if (Ctx->getOptLevel() != Opt_m1) return; + static TimerIdT IDpostLower = GlobalContext::getTimerID("postLower"); + TimerMarker T(IDpostLower, Ctx); // TODO: Avoid recomputing WhiteList every instruction. RegSetMask RegInclude = RegSet_All; RegSetMask RegExclude = RegSet_StackPointer;