Subzero: Initial O2 lowering
Includes the following:
1. Liveness analysis.
2. Linear-scan register allocation.
3. Address mode optimization.
4. Compare-branch fusing.
All of these depend on liveness analysis. There are three versions of liveness analysis (in order of increasing cost):
1. Lightweight. This computes last-uses for variables local to a single basic block.
2. Full. This computes last-uses for all variables based on global dataflow analysis.
3. Full live ranges. This computes all last-uses, plus calculates the live range intervals in terms of instruction numbers. (The live ranges are needed for register allocation.)
For testing the full live range computation, Cfg::validateLiveness() checks every Variable of every Inst and verifies that the current Inst is contained within the Variable's live range.
The cross tests are run with O2 in addition to Om1.
Some of the lit tests (for what good they do) are updated with O2 code sequences.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/300563003
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 3d720fa..6bd9bf7 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -16,6 +16,7 @@
#include "IceCfgNode.h"
#include "IceDefs.h"
#include "IceInst.h"
+#include "IceLiveness.h"
#include "IceOperand.h"
#include "IceTargetLowering.h"
@@ -24,7 +25,7 @@
Cfg::Cfg(GlobalContext *Ctx)
: Ctx(Ctx), FunctionName(""), ReturnType(IceType_void),
IsInternalLinkage(false), HasError(false), ErrorMessage(""), Entry(NULL),
- NextInstNumber(1),
+ NextInstNumber(1), Live(NULL),
Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)),
CurrentNode(NULL) {}
@@ -62,14 +63,10 @@
bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
void Cfg::translate() {
- Ostream &Str = Ctx->getStrDump();
if (hasError())
return;
- if (Ctx->isVerbose()) {
- Str << "================ Initial CFG ================\n";
- dump();
- }
+ dump("Initial CFG");
Timer T_translate;
// The set of translation passes and their order are determined by
@@ -77,10 +74,7 @@
getTarget()->translate();
T_translate.printElapsedUs(getContext(), "translate()");
- if (Ctx->isVerbose()) {
- Str << "================ Final output ================\n";
- dump();
- }
+ dump("Final output");
}
void Cfg::computePredecessors() {
@@ -89,6 +83,13 @@
}
}
+void Cfg::renumberInstructions() {
+ NextInstNumber = 1;
+ for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ (*I)->renumberInstructions();
+ }
+}
+
// placePhiLoads() must be called before placePhiStores().
void Cfg::placePhiLoads() {
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
@@ -109,6 +110,12 @@
}
}
+void Cfg::doAddressOpt() {
+ for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ (*I)->doAddressOpt();
+ }
+}
+
void Cfg::genCode() {
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->genCode();
@@ -128,6 +135,150 @@
}
}
+// This is a lightweight version of live-range-end calculation. Marks
+// the last use of only those variables whose definition and uses are
+// completely with a single block. It is a quick single pass and
+// doesn't need to iterate until convergence.
+void Cfg::livenessLightweight() {
+ for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ (*I)->livenessLightweight();
+ }
+}
+
+void Cfg::liveness(LivenessMode Mode) {
+ Live.reset(new Liveness(this, Mode));
+ Live->init();
+ // Initialize with all nodes needing to be processed.
+ llvm::BitVector NeedToProcess(Nodes.size(), true);
+ while (NeedToProcess.any()) {
+ // Iterate in reverse topological order to speed up convergence.
+ for (NodeList::reverse_iterator I = Nodes.rbegin(), E = Nodes.rend();
+ I != E; ++I) {
+ CfgNode *Node = *I;
+ if (NeedToProcess[Node->getIndex()]) {
+ NeedToProcess[Node->getIndex()] = false;
+ bool Changed = Node->liveness(getLiveness());
+ if (Changed) {
+ // If the beginning-of-block liveness changed since the last
+ // iteration, mark all in-edges as needing to be processed.
+ const NodeList &InEdges = Node->getInEdges();
+ for (NodeList::const_iterator I1 = InEdges.begin(),
+ E1 = InEdges.end();
+ I1 != E1; ++I1) {
+ CfgNode *Pred = *I1;
+ NeedToProcess[Pred->getIndex()] = true;
+ }
+ }
+ }
+ }
+ }
+ if (Mode == Liveness_Intervals) {
+ // Reset each variable's live range.
+ for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
+ I != E; ++I) {
+ if (Variable *Var = *I)
+ Var->resetLiveRange();
+ }
+ }
+ // Collect timing for just the portion that constructs the live
+ // range intervals based on the end-of-live-range computation, for a
+ // finer breakdown of the cost.
+ Timer T_liveRange;
+ // Make a final pass over instructions to delete dead instructions
+ // and build each Variable's live range.
+ for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ (*I)->livenessPostprocess(Mode, getLiveness());
+ }
+ if (Mode == Liveness_Intervals) {
+ // Special treatment for live in-args. Their liveness needs to
+ // extend beyond the beginning of the function, otherwise an arg
+ // whose only use is in the first instruction will end up having
+ // the trivial live range [1,1) and will *not* interfere with
+ // other arguments. So if the first instruction of the method is
+ // "r=arg1+arg2", both args may be assigned the same register.
+ for (SizeT I = 0; I < Args.size(); ++I) {
+ Variable *Arg = Args[I];
+ if (!Live->getLiveRange(Arg).isEmpty()) {
+ // Add live range [-1,0) with weight 0. TODO: Here and below,
+ // use better symbolic constants along the lines of
+ // Inst::NumberDeleted and Inst::NumberSentinel instead of -1
+ // and 0.
+ Live->addLiveRange(Arg, -1, 0, 0);
+ }
+ // Do the same for i64 args that may have been lowered into i32
+ // Lo and Hi components.
+ Variable *Lo = Arg->getLo();
+ if (Lo && !Live->getLiveRange(Lo).isEmpty())
+ Live->addLiveRange(Lo, -1, 0, 0);
+ Variable *Hi = Arg->getHi();
+ if (Hi && !Live->getLiveRange(Hi).isEmpty())
+ Live->addLiveRange(Hi, -1, 0, 0);
+ }
+ // Copy Liveness::LiveRanges into individual variables. TODO:
+ // Remove Variable::LiveRange and redirect to
+ // Liveness::LiveRanges. TODO: make sure Variable weights
+ // are applied properly.
+ SizeT NumVars = Variables.size();
+ for (SizeT i = 0; i < NumVars; ++i) {
+ Variable *Var = Variables[i];
+ Var->setLiveRange(Live->getLiveRange(Var));
+ if (Var->getWeight().isInf())
+ Var->setLiveRangeInfiniteWeight();
+ }
+ T_liveRange.printElapsedUs(getContext(), "live range construction");
+ dump();
+ }
+}
+
+// Traverse every Variable of every Inst and verify that it
+// appears within the Variable's computed live range.
+bool Cfg::validateLiveness() const {
+ bool Valid = true;
+ Ostream &Str = Ctx->getStrDump();
+ for (NodeList::const_iterator I1 = Nodes.begin(), E1 = Nodes.end(); I1 != E1;
+ ++I1) {
+ CfgNode *Node = *I1;
+ InstList &Insts = Node->getInsts();
+ for (InstList::const_iterator I2 = Insts.begin(), E2 = Insts.end();
+ I2 != E2; ++I2) {
+ Inst *Inst = *I2;
+ if (Inst->isDeleted())
+ continue;
+ if (llvm::isa<InstFakeKill>(Inst))
+ continue;
+ InstNumberT InstNumber = Inst->getNumber();
+ Variable *Dest = Inst->getDest();
+ if (Dest) {
+ // TODO: This instruction should actually begin Dest's live
+ // range, so we could probably test that this instruction is
+ // the beginning of some segment of Dest's live range. But
+ // this wouldn't work with non-SSA temporaries during
+ // lowering.
+ if (!Dest->getLiveRange().containsValue(InstNumber)) {
+ Valid = false;
+ Str << "Liveness error: inst " << Inst->getNumber() << " dest ";
+ Dest->dump(this);
+ Str << " live range " << Dest->getLiveRange() << "\n";
+ }
+ }
+ for (SizeT I = 0; I < Inst->getSrcSize(); ++I) {
+ Operand *Src = Inst->getSrc(I);
+ SizeT NumVars = Src->getNumVars();
+ for (SizeT J = 0; J < NumVars; ++J) {
+ const Variable *Var = Src->getVar(J);
+ if (!Var->getLiveRange().containsValue(InstNumber)) {
+ Valid = false;
+ Str << "Liveness error: inst " << Inst->getNumber() << " var ";
+ Var->dump(this);
+ Str << " live range " << Var->getLiveRange() << "\n";
+ }
+ }
+ }
+ }
+ }
+ return Valid;
+}
+
// ======================== Dump routines ======================== //
void Cfg::emit() {
@@ -158,8 +309,13 @@
T_emit.printElapsedUs(Ctx, "emit()");
}
-void Cfg::dump() {
+// Dumps the IR with an optional introductory message.
+void Cfg::dump(const IceString &Message) {
+ if (!Ctx->isVerbose())
+ return;
Ostream &Str = Ctx->getStrDump();
+ if (!Message.empty())
+ Str << "================ " << Message << " ================\n";
setCurrentNode(getEntryNode());
// Print function name+args
if (getContext()->isVerbose(IceV_Instructions)) {
@@ -176,6 +332,18 @@
Str << ") {\n";
}
setCurrentNode(NULL);
+ if (getContext()->isVerbose(IceV_Liveness)) {
+ // Print summary info about variables
+ for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
+ I != E; ++I) {
+ Variable *Var = *I;
+ Str << "//"
+ << " multiblock=" << Var->isMultiblockLife() << " "
+ << " weight=" << Var->getWeight() << " ";
+ Var->dump(this);
+ Str << " LIVE=" << Var->getLiveRange() << "\n";
+ }
+ }
// Print each basic block
for (NodeList::const_iterator I = Nodes.begin(), E = Nodes.end(); I != E;
++I) {