Short Circuit Evaluation
Split Nodes whenever an early jump is possible by short circuiting boolean
operations. Nodes are split after conservatively checking for side effects,
which include definition of multi block variables, function calls and
instructions involving memory.
BUG=None
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/2069923004 .
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index c2b4d06..0bcda2b 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -640,6 +640,69 @@
}
}
+void Cfg::shortCircuitJumps() {
+ // Split Nodes whenever an early jump is possible.
+ // __N :
+ // a = <something>
+ // Instruction 1 without side effect
+ // ... b = <something> ...
+ // Instruction N without side effect
+ // t1 = or a b
+ // br t1 __X __Y
+ //
+ // is transformed into:
+ // __N :
+ // a = <something>
+ // br a __X __N_ext
+ //
+ // __N_ext :
+ // Instruction 1 without side effect
+ // ... b = <something> ...
+ // Instruction N without side effect
+ // br b __X __Y
+ // (Similar logic for AND, jump to false instead of true target.)
+
+ TimerMarker T(TimerStack::TT_shortCircuit, this);
+ getVMetadata()->init(VMK_Uses);
+ auto NodeStack = this->getNodes();
+ CfgUnorderedMap<SizeT, CfgVector<CfgNode *>> Splits;
+ while (!NodeStack.empty()) {
+ auto *Node = NodeStack.back();
+ NodeStack.pop_back();
+ auto NewNode = Node->shortCircuit();
+ if (NewNode) {
+ NodeStack.push_back(NewNode);
+ NodeStack.push_back(Node);
+ Splits[Node->getIndex()].push_back(NewNode);
+ }
+ }
+
+ // Insert nodes in the right place
+ NodeList NewList;
+ NewList.reserve(Nodes.size());
+ CfgUnorderedSet<SizeT> Inserted;
+ for (auto *Node : Nodes) {
+ if (Inserted.find(Node->getIndex()) != Inserted.end())
+ continue; // already inserted
+ NodeList Stack{Node};
+ while (!Stack.empty()) {
+ auto *Current = Stack.back();
+ Stack.pop_back();
+ Inserted.insert(Current->getIndex());
+ NewList.push_back(Current);
+ for (auto *Next : Splits[Current->getIndex()]) {
+ Stack.push_back(Next);
+ }
+ }
+ }
+
+ SizeT NodeIndex = 0;
+ for (auto *Node : NewList) {
+ Node->resetIndex(NodeIndex++);
+ }
+ Nodes = NewList;
+}
+
void Cfg::doArgLowering() {
TimerMarker T(TimerStack::TT_doArgLowering, this);
getTarget()->lowerArguments();
diff --git a/src/IceCfg.h b/src/IceCfg.h
index c656961..181585f 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -201,6 +201,7 @@
void reorderNodes();
void shuffleNodes();
void localCSE();
+ void shortCircuitJumps();
/// Scan allocas to determine whether we need to use a frame pointer.
/// If SortAndCombine == true, merge all the fixed-size allocas in the
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 78e3d18..c92c9eb 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -51,6 +51,22 @@
}
}
+void CfgNode::replaceInEdge(CfgNode *Old, CfgNode *New) {
+ for (SizeT i = 0; i < InEdges.size(); ++i) {
+ if (InEdges[i] == Old) {
+ InEdges[i] = New;
+ }
+ }
+ for (auto &Inst : getPhis()) {
+ auto &Phi = llvm::cast<InstPhi>(Inst);
+ for (SizeT i = 0; i < Phi.getSrcSize(); ++i) {
+ if (Phi.getLabel(i) == Old) {
+ Phi.setLabel(i, New);
+ }
+ }
+ }
+}
+
namespace {
template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) {
const bool DoDelete =
@@ -1472,4 +1488,156 @@
Insts.push_front(Instr);
}
+void CfgNode::removeInEdge(CfgNode *In) {
+ InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In));
+}
+
+CfgNode *CfgNode::shortCircuit() {
+ auto *Func = getCfg();
+ auto *Last = &getInsts().back();
+ Variable *Condition = nullptr;
+ InstBr *Br = nullptr;
+ if ((Br = llvm::dyn_cast<InstBr>(Last))) {
+ if (!Br->isUnconditional()) {
+ Condition = llvm::dyn_cast<Variable>(Br->getCondition());
+ }
+ }
+ if (Condition == nullptr)
+ return nullptr;
+
+ auto *JumpOnTrue = Br->getTargetTrue();
+ auto *JumpOnFalse = Br->getTargetFalse();
+
+ bool FoundOr = false;
+ bool FoundAnd = false;
+
+ InstArithmetic *TopLevelBoolOp = nullptr;
+
+ for (auto &Inst : reverse_range(getInsts())) {
+ if (Inst.isDeleted())
+ continue;
+ if (Inst.getDest() == Condition) {
+ if (auto *Arith = llvm::dyn_cast<InstArithmetic>(&Inst)) {
+
+ FoundOr = (Arith->getOp() == InstArithmetic::OpKind::Or);
+ FoundAnd = (Arith->getOp() == InstArithmetic::OpKind::And);
+
+ if (FoundOr || FoundAnd) {
+ TopLevelBoolOp = Arith;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!TopLevelBoolOp)
+ return nullptr;
+
+ auto IsOperand = [](Inst *Instr, Operand *Opr) -> bool {
+ for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
+ if (Instr->getSrc(i) == Opr)
+ return true;
+ }
+ return false;
+ };
+ Inst *FirstOperandDef = nullptr;
+ for (auto &Inst : getInsts()) {
+ if (IsOperand(TopLevelBoolOp, Inst.getDest())) {
+ FirstOperandDef = &Inst;
+ break;
+ }
+ }
+
+ if (FirstOperandDef == nullptr) {
+ return nullptr;
+ }
+
+ // Check for side effects
+ auto It = Ice::instToIterator(FirstOperandDef);
+ while (It != getInsts().end()) {
+ if (It->isDeleted()) {
+ ++It;
+ continue;
+ }
+ if (llvm::isa<InstBr>(It) || llvm::isa<InstRet>(It)) {
+ break;
+ }
+ auto *Dest = It->getDest();
+ if (It->getDest() == nullptr || It->hasSideEffects() ||
+ !Func->getVMetadata()->isSingleBlock(Dest)) {
+ // Relying on short cicuit eval here.
+ // getVMetadata()->isSingleBlock(Dest)
+ // will segfault if It->getDest() == nullptr
+ return nullptr;
+ }
+ It++;
+ }
+
+ auto *NewNode = Func->makeNode();
+ NewNode->setLoopNestDepth(getLoopNestDepth());
+ It = Ice::instToIterator(FirstOperandDef);
+ It++; // Have to split after the def
+
+ NewNode->getInsts().splice(NewNode->getInsts().begin(), getInsts(), It,
+ getInsts().end());
+
+ if (BuildDefs::dump()) {
+ NewNode->setName(getName().append("_2"));
+ setName(getName().append("_1"));
+ }
+
+ // Point edges properly
+ NewNode->addInEdge(this);
+ for (auto *Out : getOutEdges()) {
+ NewNode->addOutEdge(Out);
+ Out->addInEdge(NewNode);
+ }
+ removeAllOutEdges();
+ addOutEdge(NewNode);
+
+ // Manage Phi instructions of successors
+ for (auto *Succ : NewNode->getOutEdges()) {
+ for (auto &Inst : Succ->getPhis()) {
+ auto *Phi = llvm::cast<InstPhi>(&Inst);
+ for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
+ if (Phi->getLabel(i) == this) {
+ Phi->addArgument(Phi->getSrc(i), NewNode);
+ }
+ }
+ }
+ }
+
+ // Create new Br instruction
+ InstBr *NewInst = nullptr;
+ if (FoundOr) {
+ addOutEdge(JumpOnTrue);
+ JumpOnFalse->removeInEdge(this);
+ NewInst =
+ InstBr::create(Func, FirstOperandDef->getDest(), JumpOnTrue, NewNode);
+ } else if (FoundAnd) {
+ addOutEdge(JumpOnFalse);
+ JumpOnTrue->removeInEdge(this);
+ NewInst =
+ InstBr::create(Func, FirstOperandDef->getDest(), NewNode, JumpOnFalse);
+ } else {
+ return nullptr;
+ }
+
+ assert(NewInst != nullptr);
+ appendInst(NewInst);
+
+ Operand *UnusedOperand = nullptr;
+ assert(TopLevelBoolOp->getSrcSize() == 2);
+ if (TopLevelBoolOp->getSrc(0) == FirstOperandDef->getDest())
+ UnusedOperand = TopLevelBoolOp->getSrc(1);
+ else if (TopLevelBoolOp->getSrc(1) == FirstOperandDef->getDest())
+ UnusedOperand = TopLevelBoolOp->getSrc(0);
+ assert(UnusedOperand);
+
+ Br->replaceSource(0, UnusedOperand); // Index 0 has the condition of the Br
+
+ TopLevelBoolOp->setDeleted();
+ return NewNode;
+}
+
} // end of namespace Ice
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index 45b25fc..40c7d85 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -116,10 +116,14 @@
void addOutEdge(CfgNode *Out) { OutEdges.push_back(Out); }
void addInEdge(CfgNode *In) { InEdges.push_back(In); }
+ void replaceInEdge(CfgNode *Old, CfgNode *New);
+ void removeAllOutEdges() { OutEdges.clear(); }
+ void removeInEdge(CfgNode *In);
bool hasSingleOutEdge() const {
return (getOutEdges().size() == 1 || getOutEdges()[0] == getOutEdges()[1]);
}
+ CfgNode *shortCircuit();
private:
CfgNode(Cfg *Func, SizeT Number)
diff --git a/src/IceClFlags.def b/src/IceClFlags.def
index 7488698..bd0c20f 100644
--- a/src/IceClFlags.def
+++ b/src/IceClFlags.def
@@ -140,13 +140,16 @@
"information to stdout at the end of program execution."), \
cl::init(false)) \
\
- X(EnableExperimental, bool, dev_opt_flag, "enable-experimental", \
+ X(EnableExperimental, bool, dev_opt_flag, "enable-experimental", \
cl::desc("Enable Optimizations not yet part of O2"), \
cl::init(false)) \
\
X(EnablePhiEdgeSplit, bool, dev_opt_flag, "phi-edge-split", \
cl::desc("Enable edge splitting for Phi lowering"), cl::init(true)) \
\
+ X(EnableShortCircuit, bool, dev_opt_flag, "enable-sc", \
+ cl::desc("Split Nodes for short circuit evaluation"), cl::init(false)) \
+ \
X(ExcludedRegisters, std::string, dev_list_flag, "reg-exclude", \
cl::CommaSeparated, cl::desc("Don't use specified registers")) \
\
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index ff577b7..f9cfddd 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -77,7 +77,9 @@
Inst::Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest)
: Kind(Kind), Number(Func->newInstNumber()), Dest(Dest), MaxSrcs(MaxSrcs),
- Srcs(Func->allocateArrayOf<Operand *>(MaxSrcs)), LiveRangesEnded(0) {}
+ LiveRangesEnded(0) {
+ Srcs.reserve(MaxSrcs);
+}
const char *Inst::getInstName() const {
if (!BuildDefs::dump())
@@ -393,7 +395,7 @@
InstPhi::InstPhi(Cfg *Func, SizeT MaxSrcs, Variable *Dest)
: InstHighLevel(Func, Phi, MaxSrcs, Dest) {
- Labels = Func->allocateArrayOf<CfgNode *>(MaxSrcs);
+ Labels.reserve(MaxSrcs);
}
// TODO: A Switch instruction (and maybe others) can add duplicate edges. We
@@ -401,7 +403,8 @@
// are the same for duplicate edges), though it seems the current lowering code
// is OK with this situation.
void InstPhi::addArgument(Operand *Source, CfgNode *Label) {
- Labels[getSrcSize()] = Label;
+ assert(Label);
+ Labels.push_back(Label);
addSource(Source);
}
diff --git a/src/IceInst.h b/src/IceInst.h
index 0f2cac2..029fcc5 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -103,13 +103,13 @@
Variable *getDest() const { return Dest; }
- SizeT getSrcSize() const { return NumSrcs; }
+ SizeT getSrcSize() const { return Srcs.size(); }
Operand *getSrc(SizeT I) const {
assert(I < getSrcSize());
return Srcs[I];
}
void replaceSource(SizeT Index, Operand *Replacement) {
- assert(Index < NumSrcs);
+ assert(Index < getSrcSize());
assert(!isDeleted());
assert(LiveRangesEnded == 0);
// Invalidates liveness info because the use Srcs[Index] is removed.
@@ -189,8 +189,7 @@
Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest);
void addSource(Operand *Src) {
assert(Src);
- assert(NumSrcs < MaxSrcs);
- Srcs[NumSrcs++] = Src;
+ Srcs.push_back(Src);
}
void setLastUse(SizeT VarIndex) {
if (VarIndex < CHAR_BIT * sizeof(LiveRangesEnded))
@@ -199,7 +198,7 @@
void resetLastUses() { LiveRangesEnded = 0; }
/// The destroy() method lets the instruction cleanly release any memory that
/// was allocated via the Cfg's allocator.
- virtual void destroy(Cfg *Func) { Func->deallocateArrayOf<Operand *>(Srcs); }
+ virtual void destroy(Cfg *) {}
const InstKind Kind;
/// Number is the instruction number for describing live ranges.
@@ -226,8 +225,8 @@
Variable *Dest;
const SizeT MaxSrcs; // only used for assert
- SizeT NumSrcs = 0;
- Operand **Srcs;
+
+ CfgVector<Operand *> Srcs;
/// LiveRangesEnded marks which Variables' live ranges end in this
/// instruction. An instruction can have an arbitrary number of source
@@ -666,15 +665,12 @@
private:
InstPhi(Cfg *Func, SizeT MaxSrcs, Variable *Dest);
- void destroy(Cfg *Func) override {
- Func->deallocateArrayOf<CfgNode *>(Labels);
- Inst::destroy(Func);
- }
+ void destroy(Cfg *Func) override { Inst::destroy(Func); }
/// Labels[] duplicates the InEdges[] information in the enclosing CfgNode,
/// but the Phi instruction is created before InEdges[] is available, so it's
/// more complicated to share the list.
- CfgNode **Labels;
+ CfgVector<CfgNode *> Labels;
};
/// Ret instruction. The return value is captured in getSrc(0), but if there is
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 0cd2789..09bce6d 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -447,6 +447,10 @@
Func->localCSE();
Func->dump("After Local CSE");
}
+ if (getFlags().getEnableShortCircuit()) {
+ Func->shortCircuitJumps();
+ Func->dump("After Short Circuiting");
+ }
if (!getFlags().getEnablePhiEdgeSplit()) {
// Lower Phi instructions.
diff --git a/src/IceTimerTree.def b/src/IceTimerTree.def
index 41d73c2..d042175 100644
--- a/src/IceTimerTree.def
+++ b/src/IceTimerTree.def
@@ -61,6 +61,7 @@
X(qTransPush) \
X(regAlloc) \
X(renumberInstructions) \
+ X(shortCircuit) \
X(szmain) \
X(translate) \
X(translateFunctions) \