| //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// |
| // |
| // The Subzero Code Generator |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// \brief Implements the CfgNode class, including the complexities of |
| /// instruction insertion and in-edge calculation. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "IceCfgNode.h" |
| |
| #include "IceAssembler.h" |
| #include "IceCfg.h" |
| #include "IceGlobalInits.h" |
| #include "IceInst.h" |
| #include "IceInstVarIter.h" |
| #include "IceLiveness.h" |
| #include "IceOperand.h" |
| #include "IceTargetLowering.h" |
| |
| namespace Ice { |
| |
| // Adds an instruction to either the Phi list or the regular instruction list. |
| // Validates that all Phis are added before all regular instructions. |
| void CfgNode::appendInst(Inst *Instr) { |
| ++InstCountEstimate; |
| |
| if (BuildDefs::wasm()) { |
| if (llvm::isa<InstSwitch>(Instr) || llvm::isa<InstBr>(Instr)) { |
| for (auto *N : Instr->getTerminatorEdges()) { |
| N->addInEdge(this); |
| addOutEdge(N); |
| } |
| } |
| } |
| |
| if (auto *Phi = llvm::dyn_cast<InstPhi>(Instr)) { |
| if (!Insts.empty()) { |
| Func->setError("Phi instruction added to the middle of a block"); |
| return; |
| } |
| Phis.push_back(Phi); |
| } else { |
| Insts.push_back(Instr); |
| } |
| } |
| |
| void CfgNode::replaceInEdge(CfgNode *Old, CfgNode *New) { |
| for (SizeT i = 0; i < InEdges.size(); ++i) { |
| if (InEdges[i] == Old) { |
| InEdges[i] = New; |
| } |
| } |
| for (auto &Inst : getPhis()) { |
| auto &Phi = llvm::cast<InstPhi>(Inst); |
| for (SizeT i = 0; i < Phi.getSrcSize(); ++i) { |
| if (Phi.getLabel(i) == Old) { |
| Phi.setLabel(i, New); |
| } |
| } |
| } |
| } |
| |
| namespace { |
| template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) { |
| const bool DoDelete = |
| BuildDefs::minimal() || !getFlags().getKeepDeletedInsts(); |
| auto I = L->begin(), E = L->end(), Next = I; |
| for (++Next; I != E; I = Next++) { |
| if (DoDelete && I->isDeleted()) { |
| L->remove(I); |
| } else { |
| I->renumber(Func); |
| } |
| } |
| } |
| } // end of anonymous namespace |
| |
| void CfgNode::renumberInstructions() { |
| InstNumberT FirstNumber = Func->getNextInstNumber(); |
| removeDeletedAndRenumber(&Phis, Func); |
| removeDeletedAndRenumber(&Insts, Func); |
| InstCountEstimate = Func->getNextInstNumber() - FirstNumber; |
| } |
| |
| // When a node is created, the OutEdges are immediately known, but the InEdges |
| // have to be built up incrementally. After the CFG has been constructed, the |
| // computePredecessors() pass finalizes it by creating the InEdges list. |
| void CfgNode::computePredecessors() { |
| for (CfgNode *Succ : OutEdges) |
| Succ->InEdges.push_back(this); |
| } |
| |
| void CfgNode::computeSuccessors() { |
| OutEdges.clear(); |
| InEdges.clear(); |
| assert(!Insts.empty()); |
| OutEdges = Insts.rbegin()->getTerminatorEdges(); |
| } |
| |
| // Ensure each Phi instruction in the node is consistent with respect to control |
| // flow. For each predecessor, there must be a phi argument with that label. |
| // If a phi argument's label doesn't appear in the predecessor list (which can |
| // happen as a result of e.g. unreachable node elimination), its value is |
| // modified to be zero, to maintain consistency in liveness analysis. This |
| // allows us to remove some dead control flow without a major rework of the phi |
| // instructions. We don't check that phi arguments with the same label have the |
| // same value. |
| void CfgNode::enforcePhiConsistency() { |
| for (Inst &Instr : Phis) { |
| auto *Phi = llvm::cast<InstPhi>(&Instr); |
| // We do a simple O(N^2) algorithm to check for consistency. Even so, it |
| // shows up as only about 0.2% of the total translation time. But if |
| // necessary, we could improve the complexity by using a hash table to |
| // count how many times each node is referenced in the Phi instruction, and |
| // how many times each node is referenced in the incoming edge list, and |
| // compare the two for equality. |
| for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { |
| CfgNode *Label = Phi->getLabel(i); |
| bool Found = false; |
| for (CfgNode *InNode : getInEdges()) { |
| if (InNode == Label) { |
| Found = true; |
| break; |
| } |
| } |
| if (!Found) { |
| // Predecessor was unreachable, so if (impossibly) the control flow |
| // enters from that predecessor, the value should be zero. |
| Phi->clearOperandForTarget(Label); |
| } |
| } |
| for (CfgNode *InNode : getInEdges()) { |
| bool Found = false; |
| for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { |
| CfgNode *Label = Phi->getLabel(i); |
| if (InNode == Label) { |
| Found = true; |
| break; |
| } |
| } |
| if (!Found) |
| llvm::report_fatal_error("Phi error: missing label for incoming edge"); |
| } |
| } |
| } |
| |
| // This does part 1 of Phi lowering, by creating a new dest variable for each |
| // Phi instruction, replacing the Phi instruction's dest with that variable, |
| // and adding an explicit assignment of the old dest to the new dest. For |
| // example, |
| // a=phi(...) |
| // changes to |
| // "a_phi=phi(...); a=a_phi". |
| // |
| // This is in preparation for part 2 which deletes the Phi instructions and |
| // appends assignment instructions to predecessor blocks. Note that this |
| // transformation preserves SSA form. |
| void CfgNode::placePhiLoads() { |
| for (Inst &I : Phis) { |
| auto *Phi = llvm::dyn_cast<InstPhi>(&I); |
| Insts.insert(Insts.begin(), Phi->lower(Func)); |
| } |
| } |
| |
| // This does part 2 of Phi lowering. For each Phi instruction at each out-edge, |
| // create a corresponding assignment instruction, and add all the assignments |
| // near the end of this block. They need to be added before any branch |
| // instruction, and also if the block ends with a compare instruction followed |
| // by a branch instruction that we may want to fuse, it's better to insert the |
| // new assignments before the compare instruction. The |
| // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions. |
| // |
| // Note that this transformation takes the Phi dest variables out of SSA form, |
| // as there may be assignments to the dest variable in multiple blocks. |
| void CfgNode::placePhiStores() { |
| // Find the insertion point. |
| InstList::iterator InsertionPoint = Insts.end(); |
| // Every block must end in a terminator instruction, and therefore must have |
| // at least one instruction, so it's valid to decrement InsertionPoint (but |
| // assert just in case). |
| assert(InsertionPoint != Insts.begin()); |
| --InsertionPoint; |
| // Confirm that InsertionPoint is a terminator instruction. Calling |
| // getTerminatorEdges() on a non-terminator instruction will cause an |
| // llvm_unreachable(). |
| (void)InsertionPoint->getTerminatorEdges(); |
| // SafeInsertionPoint is always immediately before the terminator |
| // instruction. If the block ends in a compare and conditional branch, it's |
| // better to place the Phi store before the compare so as not to interfere |
| // with compare/branch fusing. However, if the compare instruction's dest |
| // operand is the same as the new assignment statement's source operand, this |
| // can't be done due to data dependences, so we need to fall back to the |
| // SafeInsertionPoint. To illustrate: |
| // ; <label>:95 |
| // %97 = load i8* %96, align 1 |
| // %98 = icmp ne i8 %97, 0 |
| // br i1 %98, label %99, label %2132 |
| // ; <label>:99 |
| // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] |
| // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] |
| // would be Phi-lowered as: |
| // ; <label>:95 |
| // %97 = load i8* %96, align 1 |
| // %100_phi = %97 ; can be at InsertionPoint |
| // %98 = icmp ne i8 %97, 0 |
| // %101_phi = %98 ; must be at SafeInsertionPoint |
| // br i1 %98, label %99, label %2132 |
| // ; <label>:99 |
| // %100 = %100_phi |
| // %101 = %101_phi |
| // |
| // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint |
| // mechanism. If a source basic block ends in a conditional branch: |
| // labelSource: |
| // ... |
| // br i1 %foo, label %labelTrue, label %labelFalse |
| // and a branch target has a Phi involving the branch operand: |
| // labelTrue: |
| // %bar = phi i1 [ %foo, %labelSource ], ... |
| // then we actually know the constant i1 value of the Phi operand: |
| // labelTrue: |
| // %bar = phi i1 [ true, %labelSource ], ... |
| // It seems that this optimization should be done by clang or opt, but we |
| // could also do it here. |
| InstList::iterator SafeInsertionPoint = InsertionPoint; |
| // Keep track of the dest variable of a compare instruction, so that we |
| // insert the new instruction at the SafeInsertionPoint if the compare's dest |
| // matches the Phi-lowered assignment's source. |
| Variable *CmpInstDest = nullptr; |
| // If the current insertion point is at a conditional branch instruction, and |
| // the previous instruction is a compare instruction, then we move the |
| // insertion point before the compare instruction so as not to interfere with |
| // compare/branch fusing. |
| if (auto *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { |
| if (!Branch->isUnconditional()) { |
| if (InsertionPoint != Insts.begin()) { |
| --InsertionPoint; |
| if (llvm::isa<InstIcmp>(InsertionPoint) || |
| llvm::isa<InstFcmp>(InsertionPoint)) { |
| CmpInstDest = InsertionPoint->getDest(); |
| } else { |
| ++InsertionPoint; |
| } |
| } |
| } |
| } |
| |
| // Consider every out-edge. |
| for (CfgNode *Succ : OutEdges) { |
| // Consider every Phi instruction at the out-edge. |
| for (Inst &I : Succ->Phis) { |
| auto *Phi = llvm::dyn_cast<InstPhi>(&I); |
| Operand *Operand = Phi->getOperandForTarget(this); |
| assert(Operand); |
| Variable *Dest = I.getDest(); |
| assert(Dest); |
| auto *NewInst = InstAssign::create(Func, Dest, Operand); |
| if (CmpInstDest == Operand) |
| Insts.insert(SafeInsertionPoint, NewInst); |
| else |
| Insts.insert(InsertionPoint, NewInst); |
| } |
| } |
| } |
| |
| // Deletes the phi instructions after the loads and stores are placed. |
| void CfgNode::deletePhis() { |
| for (Inst &I : Phis) |
| I.setDeleted(); |
| } |
| |
| // Splits the edge from Pred to this node by creating a new node and hooking up |
| // the in and out edges appropriately. (The EdgeIndex parameter is only used to |
| // make the new node's name unique when there are multiple edges between the |
| // same pair of nodes.) The new node's instruction list is initialized to the |
| // empty list, with no terminator instruction. There must not be multiple edges |
| // from Pred to this node so all Inst::getTerminatorEdges implementations must |
| // not contain duplicates. |
| CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { |
| CfgNode *NewNode = Func->makeNode(); |
| // Depth is the minimum as it works if both are the same, but if one is |
| // outside the loop and the other is inside, the new node should be placed |
| // outside and not be executed multiple times within the loop. |
| NewNode->setLoopNestDepth( |
| std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); |
| if (BuildDefs::dump()) |
| NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + |
| std::to_string(EdgeIndex)); |
| // The new node is added to the end of the node list, and will later need to |
| // be sorted into a reasonable topological order. |
| NewNode->setNeedsPlacement(true); |
| // Repoint Pred's out-edge. |
| bool Found = false; |
| for (CfgNode *&I : Pred->OutEdges) { |
| if (I == this) { |
| I = NewNode; |
| NewNode->InEdges.push_back(Pred); |
| Found = true; |
| break; |
| } |
| } |
| assert(Found); |
| (void)Found; |
| // Repoint this node's in-edge. |
| Found = false; |
| for (CfgNode *&I : InEdges) { |
| if (I == Pred) { |
| I = NewNode; |
| NewNode->OutEdges.push_back(this); |
| Found = true; |
| break; |
| } |
| } |
| assert(Found); |
| (void)Found; |
| // Repoint all suitable branch instructions' target and return. |
| Found = false; |
| for (Inst &I : Pred->getInsts()) |
| if (!I.isDeleted() && I.repointEdges(this, NewNode)) |
| Found = true; |
| assert(Found); |
| (void)Found; |
| return NewNode; |
| } |
| |
| namespace { |
| |
| // Helpers for advancedPhiLowering(). |
| |
| class PhiDesc { |
| PhiDesc() = delete; |
| PhiDesc(const PhiDesc &) = delete; |
| PhiDesc &operator=(const PhiDesc &) = delete; |
| |
| public: |
| PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {} |
| PhiDesc(PhiDesc &&) = default; |
| InstPhi *Phi = nullptr; |
| Variable *Dest = nullptr; |
| Operand *Src = nullptr; |
| bool Processed = false; |
| size_t NumPred = 0; // number of entries whose Src is this Dest |
| int32_t Weight = 0; // preference for topological order |
| }; |
| using PhiDescList = llvm::SmallVector<PhiDesc, 32>; |
| |
| // Always pick NumPred=0 over NumPred>0. |
| constexpr int32_t WeightNoPreds = 8; |
| // Prefer Src as a register because the register might free up. |
| constexpr int32_t WeightSrcIsReg = 4; |
| // Prefer Dest not as a register because the register stays free longer. |
| constexpr int32_t WeightDestNotReg = 2; |
| // Prefer NumPred=1 over NumPred>1. This is used as a tiebreaker when a |
| // dependency cycle must be broken so that hopefully only one temporary |
| // assignment has to be added to break the cycle. |
| constexpr int32_t WeightOnePred = 1; |
| |
| bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, |
| const Operand *Opnd) { |
| if (Var1 == Opnd) |
| return true; |
| const auto *Var2 = llvm::dyn_cast<Variable>(Opnd); |
| if (Var2 == nullptr) |
| return false; |
| |
| // If either operand lacks a register, they cannot be the same. |
| if (!Var1->hasReg()) |
| return false; |
| if (!Var2->hasReg()) |
| return false; |
| |
| const auto RegNum1 = Var1->getRegNum(); |
| const auto RegNum2 = Var2->getRegNum(); |
| // Quick common-case check. |
| if (RegNum1 == RegNum2) |
| return true; |
| |
| assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == |
| Target->getAliasesForRegister(RegNum2)[RegNum1]); |
| return Target->getAliasesForRegister(RegNum1)[RegNum2]; |
| } |
| |
| // Update NumPred for all Phi assignments using Var as their Dest variable. |
| // Also update Weight if NumPred dropped from 2 to 1, or 1 to 0. |
| void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) { |
| for (PhiDesc &Item : Desc) { |
| if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) { |
| --Item.NumPred; |
| if (Item.NumPred == 1) { |
| // If NumPred changed from 2 to 1, add in WeightOnePred. |
| Item.Weight += WeightOnePred; |
| } else if (Item.NumPred == 0) { |
| // If NumPred changed from 1 to 0, subtract WeightOnePred and add in |
| // WeightNoPreds. |
| Item.Weight += (WeightNoPreds - WeightOnePred); |
| } |
| } |
| } |
| } |
| |
| } // end of anonymous namespace |
| |
| // This the "advanced" version of Phi lowering for a basic block, in contrast |
| // to the simple version that lowers through assignments involving temporaries. |
| // |
| // All Phi instructions in a basic block are conceptually executed in parallel. |
| // However, if we lower Phis early and commit to a sequential ordering, we may |
| // end up creating unnecessary interferences which lead to worse register |
| // allocation. Delaying Phi scheduling until after register allocation can help |
| // unless there are no free registers for shuffling registers or stack slots |
| // and spilling becomes necessary. |
| // |
| // The advanced Phi lowering starts by finding a topological sort of the Phi |
| // instructions, where "A=B" comes before "B=C" due to the anti-dependence on |
| // B. Preexisting register assignments are considered in the topological sort. |
| // If a topological sort is not possible due to a cycle, the cycle is broken by |
| // introducing a non-parallel temporary. For example, a cycle arising from a |
| // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being |
| // equal, prefer to schedule assignments with register-allocated Src operands |
| // earlier, in case that register becomes free afterwards, and prefer to |
| // schedule assignments with register-allocated Dest variables later, to keep |
| // that register free for longer. |
| // |
| // Once the ordering is determined, the Cfg edge is split and the assignment |
| // list is lowered by the target lowering layer. Since the assignment lowering |
| // may create new infinite-weight temporaries, a follow-on register allocation |
| // pass will be needed. To prepare for this, liveness (including live range |
| // calculation) of the split nodes needs to be calculated, and liveness of the |
| // original node need to be updated to "undo" the effects of the phi |
| // assignments. |
| |
| // The specific placement of the new node within the Cfg node list is deferred |
| // until later, including after empty node contraction. |
| // |
| // After phi assignments are lowered across all blocks, another register |
| // allocation pass is run, focusing only on pre-colored and infinite-weight |
| // variables, similar to Om1 register allocation (except without the need to |
| // specially compute these variables' live ranges, since they have already been |
| // precisely calculated). The register allocator in this mode needs the ability |
| // to forcibly spill and reload registers in case none are naturally available. |
| void CfgNode::advancedPhiLowering() { |
| if (getPhis().empty()) |
| return; |
| |
| PhiDescList Desc; |
| |
| for (Inst &I : Phis) { |
| auto *Phi = llvm::dyn_cast<InstPhi>(&I); |
| if (!Phi->isDeleted()) { |
| Variable *Dest = Phi->getDest(); |
| Desc.emplace_back(Phi, Dest); |
| // Undo the effect of the phi instruction on this node's live-in set by |
| // marking the phi dest variable as live on entry. |
| SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); |
| auto &LiveIn = Func->getLiveness()->getLiveIn(this); |
| if (VarNum < LiveIn.size()) { |
| assert(!LiveIn[VarNum]); |
| LiveIn[VarNum] = true; |
| } |
| Phi->setDeleted(); |
| } |
| } |
| if (Desc.empty()) |
| return; |
| |
| TargetLowering *Target = Func->getTarget(); |
| SizeT InEdgeIndex = 0; |
| for (CfgNode *Pred : InEdges) { |
| CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); |
| SizeT Remaining = Desc.size(); |
| |
| // First pass computes Src and initializes NumPred. |
| for (PhiDesc &Item : Desc) { |
| Variable *Dest = Item.Dest; |
| Operand *Src = Item.Phi->getOperandForTarget(Pred); |
| Item.Src = Src; |
| Item.Processed = false; |
| Item.NumPred = 0; |
| // Cherry-pick any trivial assignments, so that they don't contribute to |
| // the running complexity of the topological sort. |
| if (sameVarOrReg(Target, Dest, Src)) { |
| Item.Processed = true; |
| --Remaining; |
| if (Dest != Src) |
| // If Dest and Src are syntactically the same, don't bother adding |
| // the assignment, because in all respects it would be redundant, and |
| // if Dest/Src are on the stack, the target lowering may naively |
| // decide to lower it using a temporary register. |
| Split->appendInst(InstAssign::create(Func, Dest, Src)); |
| } |
| } |
| // Second pass computes NumPred by comparing every pair of Phi instructions. |
| for (PhiDesc &Item : Desc) { |
| if (Item.Processed) |
| continue; |
| const Variable *Dest = Item.Dest; |
| for (PhiDesc &Item2 : Desc) { |
| if (Item2.Processed) |
| continue; |
| // There shouldn't be two different Phis with the same Dest variable or |
| // register. |
| assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest)); |
| if (sameVarOrReg(Target, Dest, Item2.Src)) |
| ++Item.NumPred; |
| } |
| } |
| |
| // Another pass to compute initial Weight values. |
| for (PhiDesc &Item : Desc) { |
| if (Item.Processed) |
| continue; |
| int32_t Weight = 0; |
| if (Item.NumPred == 0) |
| Weight += WeightNoPreds; |
| if (Item.NumPred == 1) |
| Weight += WeightOnePred; |
| if (auto *Var = llvm::dyn_cast<Variable>(Item.Src)) |
| if (Var->hasReg()) |
| Weight += WeightSrcIsReg; |
| if (!Item.Dest->hasReg()) |
| Weight += WeightDestNotReg; |
| Item.Weight = Weight; |
| } |
| |
| // Repeatedly choose and process the best candidate in the topological sort, |
| // until no candidates remain. This implementation is O(N^2) where N is the |
| // number of Phi instructions, but with a small constant factor compared to |
| // a likely implementation of O(N) topological sort. |
| for (; Remaining; --Remaining) { |
| int32_t BestWeight = -1; |
| PhiDesc *BestItem = nullptr; |
| // Find the best candidate. |
| for (PhiDesc &Item : Desc) { |
| if (Item.Processed) |
| continue; |
| const int32_t Weight = Item.Weight; |
| if (Weight > BestWeight) { |
| BestItem = &Item; |
| BestWeight = Weight; |
| } |
| } |
| assert(BestWeight >= 0); |
| Variable *Dest = BestItem->Dest; |
| Operand *Src = BestItem->Src; |
| assert(!sameVarOrReg(Target, Dest, Src)); |
| // Break a cycle by introducing a temporary. |
| while (BestItem->NumPred > 0) { |
| bool Found = false; |
| // If the target instruction "A=B" is part of a cycle, find the "X=A" |
| // assignment in the cycle because it will have to be rewritten as |
| // "X=tmp". |
| for (PhiDesc &Item : Desc) { |
| if (Item.Processed) |
| continue; |
| Operand *OtherSrc = Item.Src; |
| if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { |
| SizeT VarNum = Func->getNumVariables(); |
| Variable *Tmp = Func->makeVariable(OtherSrc->getType()); |
| if (BuildDefs::dump()) |
| Tmp->setName(Func, "__split_" + std::to_string(VarNum)); |
| Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); |
| Item.Src = Tmp; |
| updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc)); |
| Found = true; |
| break; |
| } |
| } |
| assert(Found); |
| (void)Found; |
| } |
| // Now that a cycle (if any) has been broken, create the actual |
| // assignment. |
| Split->appendInst(InstAssign::create(Func, Dest, Src)); |
| if (auto *Var = llvm::dyn_cast<Variable>(Src)) |
| updatePreds(Desc, Target, Var); |
| BestItem->Processed = true; |
| } |
| Split->appendInst(InstBr::create(Func, this)); |
| |
| Split->genCode(); |
| Func->getVMetadata()->addNode(Split); |
| // Validate to be safe. All items should be marked as processed, and have |
| // no predecessors. |
| if (BuildDefs::asserts()) { |
| for (PhiDesc &Item : Desc) { |
| (void)Item; |
| assert(Item.Processed); |
| assert(Item.NumPred == 0); |
| } |
| } |
| } |
| } |
| |
| // Does address mode optimization. Pass each instruction to the TargetLowering |
| // object. If it returns a new instruction (representing the optimized address |
| // mode), then insert the new instruction and delete the old. |
| void CfgNode::doAddressOpt() { |
| TargetLowering *Target = Func->getTarget(); |
| LoweringContext &Context = Target->getContext(); |
| Context.init(this); |
| while (!Context.atEnd()) { |
| Target->doAddressOpt(); |
| } |
| } |
| |
| void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { |
| TargetLowering *Target = Func->getTarget(); |
| LoweringContext &Context = Target->getContext(); |
| Context.init(this); |
| Context.setInsertPoint(Context.getCur()); |
| // Do not insert nop in bundle locked instructions. |
| bool PauseNopInsertion = false; |
| while (!Context.atEnd()) { |
| if (llvm::isa<InstBundleLock>(Context.getCur())) { |
| PauseNopInsertion = true; |
| } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) { |
| PauseNopInsertion = false; |
| } |
| if (!PauseNopInsertion) |
| Target->doNopInsertion(RNG); |
| // Ensure Cur=Next, so that the nops are inserted before the current |
| // instruction rather than after. |
| Context.advanceCur(); |
| Context.advanceNext(); |
| } |
| } |
| |
| // Drives the target lowering. Passes the current instruction and the next |
| // non-deleted instruction for target lowering. |
| void CfgNode::genCode() { |
| TargetLowering *Target = Func->getTarget(); |
| LoweringContext &Context = Target->getContext(); |
| // Lower the regular instructions. |
| Context.init(this); |
| Target->initNodeForLowering(this); |
| while (!Context.atEnd()) { |
| InstList::iterator Orig = Context.getCur(); |
| if (llvm::isa<InstRet>(*Orig)) |
| setHasReturn(); |
| Target->lower(); |
| // Ensure target lowering actually moved the cursor. |
| assert(Context.getCur() != Orig); |
| } |
| Context.availabilityReset(); |
| // Do preliminary lowering of the Phi instructions. |
| Target->prelowerPhis(); |
| } |
| |
| void CfgNode::livenessLightweight() { |
| SizeT NumVars = Func->getNumVariables(); |
| LivenessBV Live(NumVars); |
| // Process regular instructions in reverse order. |
| for (Inst &I : reverse_range(Insts)) { |
| if (I.isDeleted()) |
| continue; |
| I.livenessLightweight(Func, Live); |
| } |
| for (Inst &I : Phis) { |
| if (I.isDeleted()) |
| continue; |
| I.livenessLightweight(Func, Live); |
| } |
| } |
| |
| // Performs liveness analysis on the block. Returns true if the incoming |
| // liveness changed from before, false if it stayed the same. (If it changes, |
| // the node's predecessors need to be processed again.) |
| bool CfgNode::liveness(Liveness *Liveness) { |
| const SizeT NumVars = Liveness->getNumVarsInNode(this); |
| const SizeT NumGlobalVars = Liveness->getNumGlobalVars(); |
| LivenessBV &Live = Liveness->getScratchBV(); |
| Live.clear(); |
| |
| LiveBeginEndMap *LiveBegin = nullptr; |
| LiveBeginEndMap *LiveEnd = nullptr; |
| // Mark the beginning and ending of each variable's live range with the |
| // sentinel instruction number 0. |
| if (Liveness->getMode() == Liveness_Intervals) { |
| LiveBegin = Liveness->getLiveBegin(this); |
| LiveEnd = Liveness->getLiveEnd(this); |
| LiveBegin->clear(); |
| LiveEnd->clear(); |
| // Guess that the number of live ranges beginning is roughly the number of |
| // instructions, and same for live ranges ending. |
| LiveBegin->reserve(getInstCountEstimate()); |
| LiveEnd->reserve(getInstCountEstimate()); |
| } |
| |
| // Initialize Live to be the union of all successors' LiveIn. |
| for (CfgNode *Succ : OutEdges) { |
| const LivenessBV &LiveIn = Liveness->getLiveIn(Succ); |
| assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars); |
| Live |= LiveIn; |
| // Mark corresponding argument of phis in successor as live. |
| for (Inst &I : Succ->Phis) { |
| if (I.isDeleted()) |
| continue; |
| auto *Phi = llvm::cast<InstPhi>(&I); |
| Phi->livenessPhiOperand(Live, this, Liveness); |
| } |
| } |
| assert(Live.empty() || Live.size() == NumGlobalVars); |
| Liveness->getLiveOut(this) = Live; |
| |
| // Expand Live so it can hold locals in addition to globals. |
| Live.resize(NumVars); |
| // Process regular instructions in reverse order. |
| for (Inst &I : reverse_range(Insts)) { |
| if (I.isDeleted()) |
| continue; |
| I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); |
| } |
| // Process phis in forward order so that we can override the instruction |
| // number to be that of the earliest phi instruction in the block. |
| SizeT NumNonDeadPhis = 0; |
| InstNumberT FirstPhiNumber = Inst::NumberSentinel; |
| for (Inst &I : Phis) { |
| if (I.isDeleted()) |
| continue; |
| if (FirstPhiNumber == Inst::NumberSentinel) |
| FirstPhiNumber = I.getNumber(); |
| if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) |
| ++NumNonDeadPhis; |
| } |
| |
| // When using the sparse representation, after traversing the instructions in |
| // the block, the Live bitvector should only contain set bits for global |
| // variables upon block entry. We validate this by testing the upper bits of |
| // the Live bitvector. |
| if (Live.find_next(NumGlobalVars) != -1) { |
| if (BuildDefs::dump()) { |
| // This is a fatal liveness consistency error. Print some diagnostics and |
| // abort. |
| Ostream &Str = Func->getContext()->getStrDump(); |
| Func->resetCurrentNode(); |
| Str << "Invalid Live ="; |
| for (SizeT i = NumGlobalVars; i < Live.size(); ++i) { |
| if (Live.test(i)) { |
| Str << " "; |
| Liveness->getVariable(i, this)->dump(Func); |
| } |
| } |
| Str << "\n"; |
| } |
| llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); |
| } |
| // Now truncate Live to prevent LiveIn from growing. |
| Live.resize(NumGlobalVars); |
| |
| bool Changed = false; |
| LivenessBV &LiveIn = Liveness->getLiveIn(this); |
| assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars); |
| // Add in current LiveIn |
| Live |= LiveIn; |
| // Check result, set LiveIn=Live |
| SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); |
| bool LiveInChanged = (Live != LiveIn); |
| Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); |
| if (LiveInChanged) |
| LiveIn = Live; |
| PrevNumNonDeadPhis = NumNonDeadPhis; |
| return Changed; |
| } |
| |
| // Validate the integrity of the live ranges in this block. If there are any |
| // errors, it prints details and returns false. On success, it returns true. |
| bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const { |
| if (!BuildDefs::asserts()) |
| return true; |
| |
| // Verify there are no duplicates. |
| auto ComparePair = |
| [](const LiveBeginEndMapEntry &A, const LiveBeginEndMapEntry &B) { |
| return A.first == B.first; |
| }; |
| LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); |
| LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); |
| if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) == |
| MapBegin.end() && |
| std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) == |
| MapEnd.end()) |
| return true; |
| |
| // There is definitely a liveness error. All paths from here return false. |
| if (!BuildDefs::dump()) |
| return false; |
| |
| // Print all the errors. |
| if (BuildDefs::dump()) { |
| GlobalContext *Ctx = Func->getContext(); |
| OstreamLocker L(Ctx); |
| Ostream &Str = Ctx->getStrDump(); |
| if (Func->isVerbose()) { |
| Str << "Live range errors in the following block:\n"; |
| dump(Func); |
| } |
| for (auto Start = MapBegin.begin(); |
| (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) != |
| MapBegin.end(); |
| ++Start) { |
| auto Next = Start + 1; |
| Str << "Duplicate LR begin, block " << getName() << ", instructions " |
| << Start->second << " & " << Next->second << ", variable " |
| << Liveness->getVariable(Start->first, this)->getName() << "\n"; |
| } |
| for (auto Start = MapEnd.begin(); |
| (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) != |
| MapEnd.end(); |
| ++Start) { |
| auto Next = Start + 1; |
| Str << "Duplicate LR end, block " << getName() << ", instructions " |
| << Start->second << " & " << Next->second << ", variable " |
| << Liveness->getVariable(Start->first, this)->getName() << "\n"; |
| } |
| } |
| |
| return false; |
| } |
| |
| // Once basic liveness is complete, compute actual live ranges. It is assumed |
| // that within a single basic block, a live range begins at most once and ends |
| // at most once. This is certainly true for pure SSA form. It is also true once |
| // phis are lowered, since each assignment to the phi-based temporary is in a |
| // different basic block, and there is a single read that ends the live in the |
| // basic block that contained the actual phi instruction. |
| void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, |
| InstNumberT LastInstNum) { |
| TimerMarker T1(TimerStack::TT_liveRange, Func); |
| |
| const SizeT NumVars = Liveness->getNumVarsInNode(this); |
| const LivenessBV &LiveIn = Liveness->getLiveIn(this); |
| const LivenessBV &LiveOut = Liveness->getLiveOut(this); |
| LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); |
| LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); |
| std::sort(MapBegin.begin(), MapBegin.end()); |
| std::sort(MapEnd.begin(), MapEnd.end()); |
| |
| if (!livenessValidateIntervals(Liveness)) { |
| llvm::report_fatal_error("livenessAddIntervals: Liveness error"); |
| return; |
| } |
| |
| LivenessBV &LiveInAndOut = Liveness->getScratchBV(); |
| LiveInAndOut = LiveIn; |
| LiveInAndOut &= LiveOut; |
| |
| // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. |
| auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); |
| auto IBE = MapBegin.end(), IEE = MapEnd.end(); |
| while (IBB != IBE || IEB != IEE) { |
| SizeT i1 = IBB == IBE ? NumVars : IBB->first; |
| SizeT i2 = IEB == IEE ? NumVars : IEB->first; |
| SizeT i = std::min(i1, i2); |
| // i1 is the Variable number of the next MapBegin entry, and i2 is the |
| // Variable number of the next MapEnd entry. If i1==i2, then the Variable's |
| // live range begins and ends in this block. If i1<i2, then i1's live range |
| // begins at instruction IBB->second and extends through the end of the |
| // block. If i1>i2, then i2's live range begins at the first instruction of |
| // the block and ends at IEB->second. In any case, we choose the lesser of |
| // i1 and i2 and proceed accordingly. |
| InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; |
| InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; |
| |
| Variable *Var = Liveness->getVariable(i, this); |
| if (LB > LE) { |
| Var->addLiveRange(FirstInstNum, LE, this); |
| Var->addLiveRange(LB, LastInstNum + 1, this); |
| // Assert that Var is a global variable by checking that its liveness |
| // index is less than the number of globals. This ensures that the |
| // LiveInAndOut[] access is valid. |
| assert(i < Liveness->getNumGlobalVars()); |
| LiveInAndOut[i] = false; |
| } else { |
| Var->addLiveRange(LB, LE, this); |
| } |
| if (i == i1) |
| ++IBB; |
| if (i == i2) |
| ++IEB; |
| } |
| // Process the variables that are live across the entire block. |
| for (int i = LiveInAndOut.find_first(); i != -1; |
| i = LiveInAndOut.find_next(i)) { |
| Variable *Var = Liveness->getVariable(i, this); |
| if (Liveness->getRangeMask(Var->getIndex())) |
| Var->addLiveRange(FirstInstNum, LastInstNum + 1, this); |
| } |
| } |
| |
| // If this node contains only deleted instructions, and ends in an |
| // unconditional branch, contract the node by repointing all its in-edges to |
| // its successor. |
| void CfgNode::contractIfEmpty() { |
| if (InEdges.empty()) |
| return; |
| Inst *Branch = nullptr; |
| for (Inst &I : Insts) { |
| if (I.isDeleted()) |
| continue; |
| if (I.isUnconditionalBranch()) |
| Branch = &I; |
| else if (!I.isRedundantAssign()) |
| return; |
| } |
| // Make sure there is actually a successor to repoint in-edges to. |
| if (OutEdges.empty()) |
| return; |
| assert(hasSingleOutEdge()); |
| // Don't try to delete a self-loop. |
| if (OutEdges[0] == this) |
| return; |
| // Make sure the node actually contains (ends with) an unconditional branch. |
| if (Branch == nullptr) |
| return; |
| |
| Branch->setDeleted(); |
| CfgNode *Successor = OutEdges.front(); |
| // Repoint all this node's in-edges to this node's successor, unless this |
| // node's successor is actually itself (in which case the statement |
| // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator |
| // over this->InEdges). |
| if (Successor != this) { |
| for (CfgNode *Pred : InEdges) { |
| for (CfgNode *&I : Pred->OutEdges) { |
| if (I == this) { |
| I = Successor; |
| Successor->InEdges.push_back(Pred); |
| } |
| } |
| for (Inst &I : Pred->getInsts()) { |
| if (!I.isDeleted()) |
| I.repointEdges(this, Successor); |
| } |
| } |
| |
| // Remove the in-edge to the successor to allow node reordering to make |
| // better decisions. For example it's more helpful to place a node after a |
| // reachable predecessor than an unreachable one (like the one we just |
| // contracted). |
| Successor->InEdges.erase( |
| std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); |
| } |
| InEdges.clear(); |
| } |
| |
| void CfgNode::doBranchOpt(const CfgNode *NextNode) { |
| TargetLowering *Target = Func->getTarget(); |
| // Find the first opportunity for branch optimization (which will be the last |
| // instruction in the block) and stop. This is sufficient unless there is |
| // some target lowering where we have the possibility of multiple |
| // optimizations per block. Take care with switch lowering as there are |
| // multiple unconditional branches and only the last can be deleted. |
| for (Inst &I : reverse_range(Insts)) { |
| if (!I.isDeleted()) { |
| Target->doBranchOpt(&I, NextNode); |
| return; |
| } |
| } |
| } |
| |
| // ======================== Dump routines ======================== // |
| |
| namespace { |
| |
| // Helper functions for emit(). |
| |
| void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node, |
| bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) { |
| if (!BuildDefs::dump()) |
| return; |
| Liveness *Liveness = Func->getLiveness(); |
| const LivenessBV *Live; |
| const auto StackReg = Func->getTarget()->getStackReg(); |
| const auto FrameOrStackReg = Func->getTarget()->getFrameOrStackReg(); |
| if (IsLiveIn) { |
| Live = &Liveness->getLiveIn(Node); |
| Str << "\t\t\t\t/* LiveIn="; |
| } else { |
| Live = &Liveness->getLiveOut(Node); |
| Str << "\t\t\t\t/* LiveOut="; |
| } |
| if (!Live->empty()) { |
| CfgVector<Variable *> LiveRegs; |
| for (SizeT i = 0; i < Live->size(); ++i) { |
| if (!(*Live)[i]) |
| continue; |
| Variable *Var = Liveness->getVariable(i, Node); |
| if (!Var->hasReg()) |
| continue; |
| const auto RegNum = Var->getRegNum(); |
| if (RegNum == StackReg || RegNum == FrameOrStackReg) |
| continue; |
| if (IsLiveIn) |
| ++LiveRegCount[RegNum]; |
| LiveRegs.push_back(Var); |
| } |
| // Sort the variables by regnum so they are always printed in a familiar |
| // order. |
| std::sort(LiveRegs.begin(), LiveRegs.end(), |
| [](const Variable *V1, const Variable *V2) { |
| return unsigned(V1->getRegNum()) < unsigned(V2->getRegNum()); |
| }); |
| bool First = true; |
| for (Variable *Var : LiveRegs) { |
| if (!First) |
| Str << ","; |
| First = false; |
| Var->emit(Func); |
| } |
| } |
| Str << " */\n"; |
| } |
| |
| /// Returns true if some text was emitted - in which case the caller definitely |
| /// needs to emit a newline character. |
| bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, |
| CfgVector<SizeT> &LiveRegCount) { |
| bool Printed = false; |
| if (!BuildDefs::dump()) |
| return Printed; |
| Variable *Dest = Instr->getDest(); |
| // Normally we increment the live count for the dest register. But we |
| // shouldn't if the instruction's IsDestRedefined flag is set, because this |
| // means that the target lowering created this instruction as a non-SSA |
| // assignment; i.e., a different, previous instruction started the dest |
| // variable's live range. |
| if (!Instr->isDestRedefined() && Dest && Dest->hasReg()) |
| ++LiveRegCount[Dest->getRegNum()]; |
| FOREACH_VAR_IN_INST(Var, *Instr) { |
| bool ShouldReport = Instr->isLastUse(Var); |
| if (ShouldReport && Var->hasReg()) { |
| // Don't report end of live range until the live count reaches 0. |
| SizeT NewCount = --LiveRegCount[Var->getRegNum()]; |
| if (NewCount) |
| ShouldReport = false; |
| } |
| if (ShouldReport) { |
| if (Printed) |
| Str << ","; |
| else |
| Str << " \t/* END="; |
| Var->emit(Func); |
| Printed = true; |
| } |
| } |
| if (Printed) |
| Str << " */"; |
| return Printed; |
| } |
| |
| void updateStats(Cfg *Func, const Inst *I) { |
| if (!BuildDefs::dump()) |
| return; |
| // Update emitted instruction count, plus fill/spill count for Variable |
| // operands without a physical register. |
| if (uint32_t Count = I->getEmitInstCount()) { |
| Func->getContext()->statsUpdateEmitted(Count); |
| if (Variable *Dest = I->getDest()) { |
| if (!Dest->hasReg()) |
| Func->getContext()->statsUpdateFills(); |
| } |
| for (SizeT S = 0; S < I->getSrcSize(); ++S) { |
| if (auto *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { |
| if (!Src->hasReg()) |
| Func->getContext()->statsUpdateSpills(); |
| } |
| } |
| } |
| } |
| |
| } // end of anonymous namespace |
| |
| void CfgNode::emit(Cfg *Func) const { |
| if (!BuildDefs::dump()) |
| return; |
| Func->setCurrentNode(this); |
| Ostream &Str = Func->getContext()->getStrEmit(); |
| Liveness *Liveness = Func->getLiveness(); |
| const bool DecorateAsm = Liveness && getFlags().getDecorateAsm(); |
| Str << getAsmName() << ":\n"; |
| // LiveRegCount keeps track of the number of currently live variables that |
| // each register is assigned to. Normally that would be only 0 or 1, but the |
| // register allocator's AllowOverlap inference allows it to be greater than 1 |
| // for short periods. |
| CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); |
| if (DecorateAsm) { |
| constexpr bool IsLiveIn = true; |
| emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); |
| if (getInEdges().size()) { |
| Str << "\t\t\t\t/* preds="; |
| bool First = true; |
| for (CfgNode *I : getInEdges()) { |
| if (!First) |
| Str << ","; |
| First = false; |
| Str << "$" << I->getName(); |
| } |
| Str << " */\n"; |
| } |
| if (getLoopNestDepth()) { |
| Str << "\t\t\t\t/* loop depth=" << getLoopNestDepth() << " */\n"; |
| } |
| } |
| |
| for (const Inst &I : Phis) { |
| if (I.isDeleted()) |
| continue; |
| // Emitting a Phi instruction should cause an error. |
| I.emit(Func); |
| } |
| for (const Inst &I : Insts) { |
| if (I.isDeleted()) |
| continue; |
| if (I.isRedundantAssign()) { |
| // Usually, redundant assignments end the live range of the src variable |
| // and begin the live range of the dest variable, with no net effect on |
| // the liveness of their register. However, if the register allocator |
| // infers the AllowOverlap condition, then this may be a redundant |
| // assignment that does not end the src variable's live range, in which |
| // case the active variable count for that register needs to be bumped. |
| // That normally would have happened as part of emitLiveRangesEnded(), |
| // but that isn't called for redundant assignments. |
| Variable *Dest = I.getDest(); |
| if (DecorateAsm && Dest->hasReg()) { |
| ++LiveRegCount[Dest->getRegNum()]; |
| if (I.isLastUse(I.getSrc(0))) |
| --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()]; |
| } |
| continue; |
| } |
| I.emit(Func); |
| bool Printed = false; |
| if (DecorateAsm) |
| Printed = emitLiveRangesEnded(Str, Func, &I, LiveRegCount); |
| if (Printed || llvm::isa<InstTarget>(&I)) |
| Str << "\n"; |
| updateStats(Func, &I); |
| } |
| if (DecorateAsm) { |
| constexpr bool IsLiveIn = false; |
| emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); |
| } |
| } |
| |
| // Helper class for emitIAS(). |
| namespace { |
| class BundleEmitHelper { |
| BundleEmitHelper() = delete; |
| BundleEmitHelper(const BundleEmitHelper &) = delete; |
| BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; |
| |
| public: |
| BundleEmitHelper(Assembler *Asm, const InstList &Insts) |
| : Asm(Asm), End(Insts.end()), BundleLockStart(End), |
| BundleSize(1 << Asm->getBundleAlignLog2Bytes()), |
| BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} |
| // Check whether we're currently within a bundle_lock region. |
| bool isInBundleLockRegion() const { return BundleLockStart != End; } |
| // Check whether the current bundle_lock region has the align_to_end option. |
| bool isAlignToEnd() const { |
| assert(isInBundleLockRegion()); |
| return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == |
| InstBundleLock::Opt_AlignToEnd; |
| } |
| bool isPadToEnd() const { |
| assert(isInBundleLockRegion()); |
| return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == |
| InstBundleLock::Opt_PadToEnd; |
| } |
| // Check whether the entire bundle_lock region falls within the same bundle. |
| bool isSameBundle() const { |
| assert(isInBundleLockRegion()); |
| return SizeSnapshotPre == SizeSnapshotPost || |
| (SizeSnapshotPre & BundleMaskHi) == |
| ((SizeSnapshotPost - 1) & BundleMaskHi); |
| } |
| // Get the bundle alignment of the first instruction of the bundle_lock |
| // region. |
| intptr_t getPreAlignment() const { |
| assert(isInBundleLockRegion()); |
| return SizeSnapshotPre & BundleMaskLo; |
| } |
| // Get the bundle alignment of the first instruction past the bundle_lock |
| // region. |
| intptr_t getPostAlignment() const { |
| assert(isInBundleLockRegion()); |
| return SizeSnapshotPost & BundleMaskLo; |
| } |
| // Get the iterator pointing to the bundle_lock instruction, e.g. to roll |
| // back the instruction iteration to that point. |
| InstList::const_iterator getBundleLockStart() const { |
| assert(isInBundleLockRegion()); |
| return BundleLockStart; |
| } |
| // Set up bookkeeping when the bundle_lock instruction is first processed. |
| void enterBundleLock(InstList::const_iterator I) { |
| assert(!isInBundleLockRegion()); |
| BundleLockStart = I; |
| SizeSnapshotPre = Asm->getBufferSize(); |
| Asm->setPreliminary(true); |
| assert(isInBundleLockRegion()); |
| } |
| // Update bookkeeping when the bundle_unlock instruction is processed. |
| void enterBundleUnlock() { |
| assert(isInBundleLockRegion()); |
| SizeSnapshotPost = Asm->getBufferSize(); |
| } |
| // Update bookkeeping when we are completely finished with the bundle_lock |
| // region. |
| void leaveBundleLockRegion() { BundleLockStart = End; } |
| // Check whether the instruction sequence fits within the current bundle, and |
| // if not, add nop padding to the end of the current bundle. |
| void padToNextBundle() { |
| assert(isInBundleLockRegion()); |
| if (!isSameBundle()) { |
| intptr_t PadToNextBundle = BundleSize - getPreAlignment(); |
| Asm->padWithNop(PadToNextBundle); |
| SizeSnapshotPre += PadToNextBundle; |
| SizeSnapshotPost += PadToNextBundle; |
| assert((Asm->getBufferSize() & BundleMaskLo) == 0); |
| assert(Asm->getBufferSize() == SizeSnapshotPre); |
| } |
| } |
| // If align_to_end is specified, add padding such that the instruction |
| // sequences ends precisely at a bundle boundary. |
| void padForAlignToEnd() { |
| assert(isInBundleLockRegion()); |
| if (isAlignToEnd()) { |
| if (intptr_t Offset = getPostAlignment()) { |
| Asm->padWithNop(BundleSize - Offset); |
| SizeSnapshotPre = Asm->getBufferSize(); |
| } |
| } |
| } |
| // If pad_to_end is specified, add padding such that the first instruction |
| // after the instruction sequence starts at a bundle boundary. |
| void padForPadToEnd() { |
| assert(isInBundleLockRegion()); |
| if (isPadToEnd()) { |
| if (intptr_t Offset = getPostAlignment()) { |
| Asm->padWithNop(BundleSize - Offset); |
| SizeSnapshotPre = Asm->getBufferSize(); |
| } |
| } |
| } // Update bookkeeping when rolling back for the second pass. |
| void rollback() { |
| assert(isInBundleLockRegion()); |
| Asm->setBufferSize(SizeSnapshotPre); |
| Asm->setPreliminary(false); |
| } |
| |
| private: |
| Assembler *const Asm; |
| // End is a sentinel value such that BundleLockStart==End implies that we are |
| // not in a bundle_lock region. |
| const InstList::const_iterator End; |
| InstList::const_iterator BundleLockStart; |
| const intptr_t BundleSize; |
| // Masking with BundleMaskLo identifies an address's bundle offset. |
| const intptr_t BundleMaskLo; |
| // Masking with BundleMaskHi identifies an address's bundle. |
| const intptr_t BundleMaskHi; |
| intptr_t SizeSnapshotPre = 0; |
| intptr_t SizeSnapshotPost = 0; |
| }; |
| |
| } // end of anonymous namespace |
| |
| void CfgNode::emitIAS(Cfg *Func) const { |
| Func->setCurrentNode(this); |
| Assembler *Asm = Func->getAssembler<>(); |
| // TODO(stichnot): When sandboxing, defer binding the node label until just |
| // before the first instruction is emitted, to reduce the chance that a |
| // padding nop is a branch target. |
| Asm->bindCfgNodeLabel(this); |
| for (const Inst &I : Phis) { |
| if (I.isDeleted()) |
| continue; |
| // Emitting a Phi instruction should cause an error. |
| I.emitIAS(Func); |
| } |
| |
| // Do the simple emission if not sandboxed. |
| if (!getFlags().getUseSandboxing()) { |
| for (const Inst &I : Insts) { |
| if (!I.isDeleted() && !I.isRedundantAssign()) { |
| I.emitIAS(Func); |
| updateStats(Func, &I); |
| } |
| } |
| return; |
| } |
| |
| // The remainder of the function handles emission with sandboxing. There are |
| // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock |
| // instructions. All other instructions are treated as an implicit |
| // one-instruction bundle_lock region. Emission is done twice for each |
| // bundle_lock region. The first pass is a preliminary pass, after which we |
| // can figure out what nop padding is needed, then roll back, and make the |
| // final pass. |
| // |
| // Ideally, the first pass would be speculative and the second pass would |
| // only be done if nop padding were needed, but the structure of the |
| // integrated assembler makes it hard to roll back the state of label |
| // bindings, label links, and relocation fixups. Instead, the first pass just |
| // disables all mutation of that state. |
| |
| BundleEmitHelper Helper(Asm, Insts); |
| InstList::const_iterator End = Insts.end(); |
| // Retrying indicates that we had to roll back to the bundle_lock instruction |
| // to apply padding before the bundle_lock sequence. |
| bool Retrying = false; |
| for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { |
| if (I->isDeleted() || I->isRedundantAssign()) |
| continue; |
| |
| if (llvm::isa<InstBundleLock>(I)) { |
| // Set up the initial bundle_lock state. This should not happen while |
| // retrying, because the retry rolls back to the instruction following |
| // the bundle_lock instruction. |
| assert(!Retrying); |
| Helper.enterBundleLock(I); |
| continue; |
| } |
| |
| if (llvm::isa<InstBundleUnlock>(I)) { |
| Helper.enterBundleUnlock(); |
| if (Retrying) { |
| // Make sure all instructions are in the same bundle. |
| assert(Helper.isSameBundle()); |
| // If align_to_end is specified, make sure the next instruction begins |
| // the bundle. |
| assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); |
| Helper.padForPadToEnd(); |
| Helper.leaveBundleLockRegion(); |
| Retrying = false; |
| } else { |
| // This is the first pass, so roll back for the retry pass. |
| Helper.rollback(); |
| // Pad to the next bundle if the instruction sequence crossed a bundle |
| // boundary. |
| Helper.padToNextBundle(); |
| // Insert additional padding to make AlignToEnd work. |
| Helper.padForAlignToEnd(); |
| // Prepare for the retry pass after padding is done. |
| Retrying = true; |
| I = Helper.getBundleLockStart(); |
| } |
| continue; |
| } |
| |
| // I points to a non bundle_lock/bundle_unlock instruction. |
| if (Helper.isInBundleLockRegion()) { |
| I->emitIAS(Func); |
| // Only update stats during the final pass. |
| if (Retrying) |
| updateStats(Func, iteratorToInst(I)); |
| } else { |
| // Treat it as though there were an implicit bundle_lock and |
| // bundle_unlock wrapping the instruction. |
| Helper.enterBundleLock(I); |
| I->emitIAS(Func); |
| Helper.enterBundleUnlock(); |
| Helper.rollback(); |
| Helper.padToNextBundle(); |
| I->emitIAS(Func); |
| updateStats(Func, iteratorToInst(I)); |
| Helper.leaveBundleLockRegion(); |
| } |
| } |
| |
| // Don't allow bundle locking across basic blocks, to keep the backtracking |
| // mechanism simple. |
| assert(!Helper.isInBundleLockRegion()); |
| assert(!Retrying); |
| } |
| |
| void CfgNode::dump(Cfg *Func) const { |
| if (!BuildDefs::dump()) |
| return; |
| Func->setCurrentNode(this); |
| Ostream &Str = Func->getContext()->getStrDump(); |
| Liveness *Liveness = Func->getLiveness(); |
| if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop)) |
| Str << getName() << ":\n"; |
| // Dump the loop nest depth |
| if (Func->isVerbose(IceV_Loop)) |
| Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n"; |
| // Dump list of predecessor nodes. |
| if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) { |
| Str << " // preds = "; |
| bool First = true; |
| for (CfgNode *I : InEdges) { |
| if (!First) |
| Str << ", "; |
| First = false; |
| Str << "%" << I->getName(); |
| } |
| Str << "\n"; |
| } |
| // Dump the live-in variables. |
| if (Func->isVerbose(IceV_Liveness)) { |
| if (Liveness != nullptr && !Liveness->getLiveIn(this).empty()) { |
| const LivenessBV &LiveIn = Liveness->getLiveIn(this); |
| Str << " // LiveIn:"; |
| for (SizeT i = 0; i < LiveIn.size(); ++i) { |
| if (LiveIn[i]) { |
| Variable *Var = Liveness->getVariable(i, this); |
| Str << " %" << Var->getName(); |
| if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) { |
| Str << ":" |
| << Func->getTarget()->getRegName(Var->getRegNum(), |
| Var->getType()); |
| } |
| } |
| } |
| Str << "\n"; |
| } |
| } |
| // Dump each instruction. |
| if (Func->isVerbose(IceV_Instructions)) { |
| for (const Inst &I : Phis) |
| I.dumpDecorated(Func); |
| for (const Inst &I : Insts) |
| I.dumpDecorated(Func); |
| } |
| // Dump the live-out variables. |
| if (Func->isVerbose(IceV_Liveness)) { |
| if (Liveness != nullptr && !Liveness->getLiveOut(this).empty()) { |
| const LivenessBV &LiveOut = Liveness->getLiveOut(this); |
| Str << " // LiveOut:"; |
| for (SizeT i = 0; i < LiveOut.size(); ++i) { |
| if (LiveOut[i]) { |
| Variable *Var = Liveness->getVariable(i, this); |
| Str << " %" << Var->getName(); |
| if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) { |
| Str << ":" |
| << Func->getTarget()->getRegName(Var->getRegNum(), |
| Var->getType()); |
| } |
| } |
| } |
| Str << "\n"; |
| } |
| } |
| // Dump list of successor nodes. |
| if (Func->isVerbose(IceV_Succs)) { |
| Str << " // succs = "; |
| bool First = true; |
| for (CfgNode *I : OutEdges) { |
| if (!First) |
| Str << ", "; |
| First = false; |
| Str << "%" << I->getName(); |
| } |
| Str << "\n"; |
| } |
| } |
| |
| void CfgNode::profileExecutionCount(VariableDeclaration *Var) { |
| GlobalContext *Ctx = Func->getContext(); |
| GlobalString RMW_I64 = Ctx->getGlobalString("llvm.nacl.atomic.rmw.i64"); |
| |
| bool BadIntrinsic = false; |
| const Intrinsics::FullIntrinsicInfo *Info = |
| Ctx->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic); |
| assert(!BadIntrinsic); |
| assert(Info != nullptr); |
| |
| Operand *RMWI64Name = Ctx->getConstantExternSym(RMW_I64); |
| constexpr RelocOffsetT Offset = 0; |
| Constant *Counter = Ctx->getConstantSym(Offset, Var->getName()); |
| Constant *AtomicRMWOp = Ctx->getConstantInt32(Intrinsics::AtomicAdd); |
| Constant *One = Ctx->getConstantInt64(1); |
| Constant *OrderAcquireRelease = |
| Ctx->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease); |
| |
| auto *Instr = InstIntrinsicCall::create( |
| Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); |
| Instr->addArg(AtomicRMWOp); |
| Instr->addArg(Counter); |
| Instr->addArg(One); |
| Instr->addArg(OrderAcquireRelease); |
| Insts.push_front(Instr); |
| } |
| |
| void CfgNode::removeInEdge(CfgNode *In) { |
| InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In)); |
| } |
| |
| CfgNode *CfgNode::shortCircuit() { |
| auto *Func = getCfg(); |
| auto *Last = &getInsts().back(); |
| Variable *Condition = nullptr; |
| InstBr *Br = nullptr; |
| if ((Br = llvm::dyn_cast<InstBr>(Last))) { |
| if (!Br->isUnconditional()) { |
| Condition = llvm::dyn_cast<Variable>(Br->getCondition()); |
| } |
| } |
| if (Condition == nullptr) |
| return nullptr; |
| |
| auto *JumpOnTrue = Br->getTargetTrue(); |
| auto *JumpOnFalse = Br->getTargetFalse(); |
| |
| bool FoundOr = false; |
| bool FoundAnd = false; |
| |
| InstArithmetic *TopLevelBoolOp = nullptr; |
| |
| for (auto &Inst : reverse_range(getInsts())) { |
| if (Inst.isDeleted()) |
| continue; |
| if (Inst.getDest() == Condition) { |
| if (auto *Arith = llvm::dyn_cast<InstArithmetic>(&Inst)) { |
| |
| FoundOr = (Arith->getOp() == InstArithmetic::OpKind::Or); |
| FoundAnd = (Arith->getOp() == InstArithmetic::OpKind::And); |
| |
| if (FoundOr || FoundAnd) { |
| TopLevelBoolOp = Arith; |
| break; |
| } |
| } |
| } |
| } |
| |
| if (!TopLevelBoolOp) |
| return nullptr; |
| |
| auto IsOperand = [](Inst *Instr, Operand *Opr) -> bool { |
| for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { |
| if (Instr->getSrc(i) == Opr) |
| return true; |
| } |
| return false; |
| }; |
| Inst *FirstOperandDef = nullptr; |
| for (auto &Inst : getInsts()) { |
| if (IsOperand(TopLevelBoolOp, Inst.getDest())) { |
| FirstOperandDef = &Inst; |
| break; |
| } |
| } |
| |
| if (FirstOperandDef == nullptr) { |
| return nullptr; |
| } |
| |
| // Check for side effects |
| auto It = Ice::instToIterator(FirstOperandDef); |
| while (It != getInsts().end()) { |
| if (It->isDeleted()) { |
| ++It; |
| continue; |
| } |
| if (llvm::isa<InstBr>(It) || llvm::isa<InstRet>(It)) { |
| break; |
| } |
| auto *Dest = It->getDest(); |
| if (It->getDest() == nullptr || It->hasSideEffects() || |
| !Func->getVMetadata()->isSingleBlock(Dest)) { |
| // Relying on short cicuit eval here. |
| // getVMetadata()->isSingleBlock(Dest) |
| // will segfault if It->getDest() == nullptr |
| return nullptr; |
| } |
| It++; |
| } |
| |
| auto *NewNode = Func->makeNode(); |
| NewNode->setLoopNestDepth(getLoopNestDepth()); |
| It = Ice::instToIterator(FirstOperandDef); |
| It++; // Have to split after the def |
| |
| NewNode->getInsts().splice(NewNode->getInsts().begin(), getInsts(), It, |
| getInsts().end()); |
| |
| if (BuildDefs::dump()) { |
| NewNode->setName(getName().append("_2")); |
| setName(getName().append("_1")); |
| } |
| |
| // Point edges properly |
| NewNode->addInEdge(this); |
| for (auto *Out : getOutEdges()) { |
| NewNode->addOutEdge(Out); |
| Out->addInEdge(NewNode); |
| } |
| removeAllOutEdges(); |
| addOutEdge(NewNode); |
| |
| // Manage Phi instructions of successors |
| for (auto *Succ : NewNode->getOutEdges()) { |
| for (auto &Inst : Succ->getPhis()) { |
| auto *Phi = llvm::cast<InstPhi>(&Inst); |
| for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { |
| if (Phi->getLabel(i) == this) { |
| Phi->addArgument(Phi->getSrc(i), NewNode); |
| } |
| } |
| } |
| } |
| |
| // Create new Br instruction |
| InstBr *NewInst = nullptr; |
| if (FoundOr) { |
| addOutEdge(JumpOnTrue); |
| JumpOnFalse->removeInEdge(this); |
| NewInst = |
| InstBr::create(Func, FirstOperandDef->getDest(), JumpOnTrue, NewNode); |
| } else if (FoundAnd) { |
| addOutEdge(JumpOnFalse); |
| JumpOnTrue->removeInEdge(this); |
| NewInst = |
| InstBr::create(Func, FirstOperandDef->getDest(), NewNode, JumpOnFalse); |
| } else { |
| return nullptr; |
| } |
| |
| assert(NewInst != nullptr); |
| appendInst(NewInst); |
| |
| Operand *UnusedOperand = nullptr; |
| assert(TopLevelBoolOp->getSrcSize() == 2); |
| if (TopLevelBoolOp->getSrc(0) == FirstOperandDef->getDest()) |
| UnusedOperand = TopLevelBoolOp->getSrc(1); |
| else if (TopLevelBoolOp->getSrc(1) == FirstOperandDef->getDest()) |
| UnusedOperand = TopLevelBoolOp->getSrc(0); |
| assert(UnusedOperand); |
| |
| Br->replaceSource(0, UnusedOperand); // Index 0 has the condition of the Br |
| |
| TopLevelBoolOp->setDeleted(); |
| return NewNode; |
| } |
| |
| } // end of namespace Ice |