third_party/subzero/src/IceVariableSplitting.cpp - SwiftShader - Git at Google

 //===- subzero/src/IceVariableSplitting.cpp - Local variable splitting ----===//
 //
 //                        The Subzero Code Generator
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 ///
 /// \file
 /// \brief Aggressive block-local variable splitting to improve linear-scan
 /// register allocation.
 ///
 //===----------------------------------------------------------------------===//

 #include "IceVariableSplitting.h"

 #include "IceCfg.h"
 #include "IceCfgNode.h"
 #include "IceClFlags.h"
 #include "IceInst.h"
 #include "IceOperand.h"
 #include "IceTargetLowering.h"

 namespace Ice {

 namespace {

 /// A Variable is "allocable" if it is a register allocation candidate but
 /// doesn't already have a register.
 bool isAllocable(const Variable *Var) {
   if (Var == nullptr)
     return false;
   return !Var->hasReg() && Var->mayHaveReg();
 }

 /// A Variable is "inf" if it already has a register or is infinite-weight.
 bool isInf(const Variable *Var) {
   if (Var == nullptr)
     return false;
   return Var->hasReg() || Var->mustHaveReg();
 }

 /// VariableMap is a simple helper class that keeps track of the latest split
 /// version of the original Variables, as well as the instruction containing the
 /// last use of the Variable within the current block.  For each entry, the
 /// Variable is tagged with the CfgNode that it is valid in, so that we don't
 /// need to clear the entire Map[] vector for each block.
 class VariableMap {
 private:
   VariableMap() = delete;
   VariableMap(const VariableMap &) = delete;
   VariableMap &operator=(const VariableMap &) = delete;

   struct VarInfo {
     /// MappedVar is the latest mapped/split version of the Variable.
     Variable *MappedVar = nullptr;
     /// MappedVarNode is the block in which MappedVar is valid.
     const CfgNode *MappedVarNode = nullptr;
     /// LastUseInst is the last instruction in the block that uses the Variable
     /// as a source operand.
     const Inst *LastUseInst = nullptr;
     /// LastUseNode is the block in which LastUseInst is valid.
     const CfgNode *LastUseNode = nullptr;
     VarInfo() = default;

   private:
     VarInfo(const VarInfo &) = delete;
     VarInfo &operator=(const VarInfo &) = delete;
   };

 public:
   explicit VariableMap(Cfg *Func)
       : Func(Func), NumVars(Func->getNumVariables()), Map(NumVars) {}
   /// Reset the mappings at the start of a block.
   void reset(const CfgNode *CurNode) {
     Node = CurNode;
     // Do a prepass through all the instructions, marking which instruction is
     // the last use of each Variable within the block.
     for (const Inst &Instr : Node->getInsts()) {
       if (Instr.isDeleted())
         continue;
       for (SizeT i = 0; i < Instr.getSrcSize(); ++i) {
         if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr.getSrc(i))) {
           const SizeT VarNum = getVarNum(SrcVar);
           Map[VarNum].LastUseInst = &Instr;
           Map[VarNum].LastUseNode = Node;
         }
       }
     }
   }
   /// Get Var's current mapping (or Var itself if it has no mapping yet).
   Variable *get(Variable *Var) const {
     const SizeT VarNum = getVarNum(Var);
     Variable *MappedVar = Map[VarNum].MappedVar;
     if (MappedVar == nullptr)
       return Var;
     if (Map[VarNum].MappedVarNode != Node)
       return Var;
     return MappedVar;
   }
   /// Create a new linked Variable in the LinkedTo chain, and set it as Var's
   /// latest mapping.
   Variable *makeLinked(Variable *Var) {
     Variable *NewVar = Func->makeVariable(Var->getType());
     NewVar->setRegClass(Var->getRegClass());
     NewVar->setLinkedTo(get(Var));
     const SizeT VarNum = getVarNum(Var);
     Map[VarNum].MappedVar = NewVar;
     Map[VarNum].MappedVarNode = Node;
     return NewVar;
   }
   /// Given Var that is LinkedTo some other variable, re-splice it into the
   /// LinkedTo chain so that the chain is ordered by Variable::getIndex().
   void spliceBlockLocalLinkedToChain(Variable *Var) {
     Variable *LinkedTo = Var->getLinkedTo();
     assert(LinkedTo != nullptr);
     assert(Var->getIndex() > LinkedTo->getIndex());
     const SizeT VarNum = getVarNum(LinkedTo);
     Variable *Link = Map[VarNum].MappedVar;
     if (Link == nullptr || Map[VarNum].MappedVarNode != Node)
       return;
     Variable *LinkParent = Link->getLinkedTo();
     while (LinkParent != nullptr && LinkParent->getIndex() >= Var->getIndex()) {
       Link = LinkParent;
       LinkParent = Link->getLinkedTo();
     }
     Var->setLinkedTo(LinkParent);
     Link->setLinkedTo(Var);
   }
   /// Return whether the given Variable has any uses as a source operand within
   /// the current block.  If it has no source operand uses, but is assigned as a
   /// dest variable in some instruction in the block, then we needn't bother
   /// splitting it.
   bool isDestUsedInBlock(const Variable *Dest) const {
     return Map[getVarNum(Dest)].LastUseNode == Node;
   }
   /// Return whether the given instruction is the last use of the given Variable
   /// within the current block.  If it is, then we needn't bother splitting the
   /// Variable at this instruction.
   bool isInstLastUseOfVar(const Variable *Var, const Inst *Instr) {
     return Map[getVarNum(Var)].LastUseInst == Instr;
   }

 private:
   Cfg *const Func;
   // NumVars is for the size of the Map array.  It can be const because any new
   // Variables created during the splitting pass don't need to be mapped.
   const SizeT NumVars;
   CfgVector<VarInfo> Map;
   const CfgNode *Node = nullptr;
   /// Get Var's VarNum, and do some validation.
   SizeT getVarNum(const Variable *Var) const {
     const SizeT VarNum = Var->getIndex();
     assert(VarNum < NumVars);
     return VarNum;
   }
 };

 /// LocalVariableSplitter tracks the necessary splitting state across
 /// instructions.
 class LocalVariableSplitter {
   LocalVariableSplitter() = delete;
   LocalVariableSplitter(const LocalVariableSplitter &) = delete;
   LocalVariableSplitter &operator=(const LocalVariableSplitter &) = delete;

 public:
   explicit LocalVariableSplitter(Cfg *Func)
       : Target(Func->getTarget()), VarMap(Func) {}
   /// setNode() is called before processing the instructions of a block.
   void setNode(CfgNode *CurNode) {
     Node = CurNode;
     VarMap.reset(Node);
     LinkedToFixups.clear();
   }
   /// finalizeNode() is called after all instructions in the block are
   /// processed.
   void finalizeNode() {
     // Splice in any preexisting LinkedTo links into the single chain.  These
     // are the ones that were recorded during setInst().
     for (Variable *Var : LinkedToFixups) {
       VarMap.spliceBlockLocalLinkedToChain(Var);
     }
   }
   /// setInst() is called before processing the next instruction.  The iterators
   /// are the insertion points for a new instructions, depending on whether the
   /// new instruction should be inserted before or after the current
   /// instruction.
   void setInst(Inst *CurInst, InstList::iterator Cur, InstList::iterator Next) {
     Instr = CurInst;
     Dest = Instr->getDest();
     IterCur = Cur;
     IterNext = Next;
     ShouldSkipRemainingInstructions = false;
     // Note any preexisting LinkedTo relationships that were created during
     // target lowering.  Record them in LinkedToFixups which is then processed
     // in finalizeNode().
     if (Dest != nullptr && Dest->getLinkedTo() != nullptr) {
       LinkedToFixups.emplace_back(Dest);
     }
   }
   bool shouldSkipRemainingInstructions() const {
     return ShouldSkipRemainingInstructions;
   }
   bool isUnconditionallyExecuted() const { return WaitingForLabel == nullptr; }

   /// Note: the handle*() functions return true to indicate that the instruction
   /// has now been handled and that the instruction loop should continue to the
   /// next instruction in the block (and return false otherwise).  In addition,
   /// they set the ShouldSkipRemainingInstructions flag to indicate that no more
   /// instructions in the block should be processed.

   /// Handle an "unwanted" instruction by returning true;
   bool handleUnwantedInstruction() {
     // We can limit the splitting to an arbitrary subset of the instructions,
     // and still expect correct code.  As such, we can do instruction-subset
     // bisection to help debug any problems in this pass.
     static constexpr char AnInstructionHasNoName[] = "";
     if (!BuildDefs::minimal() &&
         !getFlags().matchSplitInsts(AnInstructionHasNoName,
                                     Instr->getNumber())) {
       return true;
     }
     if (!llvm::isa<InstTarget>(Instr)) {
       // Ignore non-lowered instructions like FakeDef/FakeUse.
       return true;
     }
     return false;
   }

   /// Process a potential label instruction.
   bool handleLabel() {
     if (!Instr->isLabel())
       return false;
     // A Label instruction shouldn't have any operands, so it can be handled
     // right here and then move on.
     assert(Dest == nullptr);
     assert(Instr->getSrcSize() == 0);
     if (Instr == WaitingForLabel) {
       // If we found the forward-branch-target Label instruction we're waiting
       // for, then clear the WaitingForLabel state.
       WaitingForLabel = nullptr;
     } else if (WaitingForLabel == nullptr && WaitingForBranchTo == nullptr) {
       // If we found a new Label instruction while the WaitingFor* state is
       // clear, then set things up for this being a backward branch target.
       WaitingForBranchTo = Instr;
     } else {
       // We see something we don't understand, so skip to the next block.
       ShouldSkipRemainingInstructions = true;
     }
     return true;
   }

   /// Process a potential intra-block branch instruction.
   bool handleIntraBlockBranch() {
     const Inst *Label = Instr->getIntraBlockBranchTarget();
     if (Label == nullptr)
       return false;
     // An intra-block branch instruction shouldn't have any operands, so it can
     // be handled right here and then move on.
     assert(Dest == nullptr);
     assert(Instr->getSrcSize() == 0);
     if (WaitingForBranchTo == Label && WaitingForLabel == nullptr) {
       WaitingForBranchTo = nullptr;
     } else if (WaitingForBranchTo == nullptr &&
                (WaitingForLabel == nullptr || WaitingForLabel == Label)) {
       WaitingForLabel = Label;
     } else {
       // We see something we don't understand, so skip to the next block.
       ShouldSkipRemainingInstructions = true;
     }
     return true;
   }

   /// Specially process a potential "Variable=Variable" assignment instruction,
   /// when it conforms to certain patterns.
   bool handleSimpleVarAssign() {
     if (!Instr->isVarAssign())
       return false;
     const bool DestIsInf = isInf(Dest);
     const bool DestIsAllocable = isAllocable(Dest);
     auto *SrcVar = llvm::cast<Variable>(Instr->getSrc(0));
     const bool SrcIsInf = isInf(SrcVar);
     const bool SrcIsAllocable = isAllocable(SrcVar);
     if (DestIsInf && SrcIsInf) {
       // The instruction:
       //   t:inf = u:inf
       // No transformation is needed.
       return true;
     }
     if (DestIsInf && SrcIsAllocable && Dest->getType() == SrcVar->getType()) {
       // The instruction:
       //   t:inf = v
       // gets transformed to:
       //   t:inf = v1
       //   v2 = t:inf
       // where:
       //   v1 := map[v]
       //   v2 := linkTo(v)
       //   map[v] := v2
       //
       // If both v2 and its linkedToStackRoot get a stack slot, then "v2=t:inf"
       // is recognized as a redundant assignment and elided.
       //
       // Note that if the dest and src types are different, then this is
       // actually a truncation operation, which would make "v2=t:inf" an invalid
       // instruction.  In this case, the type test will make it fall through to
       // the general case below.
       Variable *OldMapped = VarMap.get(SrcVar);
       Instr->replaceSource(0, OldMapped);
       if (isUnconditionallyExecuted()) {
         // Only create new mapping state if the instruction is unconditionally
         // executed.
         if (!VarMap.isInstLastUseOfVar(SrcVar, Instr)) {
           Variable *NewMapped = VarMap.makeLinked(SrcVar);
           Inst *Mov = Target->createLoweredMove(NewMapped, Dest);
           Node->getInsts().insert(IterNext, Mov);
         }
       }
       return true;
     }
     if (DestIsAllocable && SrcIsInf) {
       if (!VarMap.isDestUsedInBlock(Dest)) {
         return true;
       }
       // The instruction:
       //   v = t:inf
       // gets transformed to:
       //   v = t:inf
       //   v2 = t:inf
       // where:
       //   v2 := linkTo(v)
       //   map[v] := v2
       //
       // If both v2 and v get a stack slot, then "v2=t:inf" is recognized as a
       // redundant assignment and elided.
       if (isUnconditionallyExecuted()) {
         // Only create new mapping state if the instruction is unconditionally
         // executed.
         Variable *NewMapped = VarMap.makeLinked(Dest);
         Inst *Mov = Target->createLoweredMove(NewMapped, SrcVar);
         Node->getInsts().insert(IterNext, Mov);
       } else {
         // For a conditionally executed instruction, add a redefinition of the
         // original Dest mapping, without creating a new linked variable.
         Variable *OldMapped = VarMap.get(Dest);
         Inst *Mov = Target->createLoweredMove(OldMapped, SrcVar);
         Mov->setDestRedefined();
         Node->getInsts().insert(IterNext, Mov);
       }
       return true;
     }
     assert(!ShouldSkipRemainingInstructions);
     return false;
   }

   /// Process the dest Variable of a Phi instruction.
   bool handlePhi() {
     assert(llvm::isa<InstPhi>(Instr));
     const bool DestIsAllocable = isAllocable(Dest);
     if (!DestIsAllocable)
       return true;
     if (!VarMap.isDestUsedInBlock(Dest))
       return true;
     Variable *NewMapped = VarMap.makeLinked(Dest);
     Inst *Mov = Target->createLoweredMove(NewMapped, Dest);
     Node->getInsts().insert(IterCur, Mov);
     return true;
   }

   /// Process an arbitrary instruction.
   bool handleGeneralInst() {
     const bool DestIsAllocable = isAllocable(Dest);
     // The (non-variable-assignment) instruction:
     //   ... = F(v)
     // where v is not infinite-weight, gets transformed to:
     //   v2 = v1
     //   ... = F(v1)
     // where:
     //   v1 := map[v]
     //   v2 := linkTo(v)
     //   map[v] := v2
     // After that, if the "..." dest=u is not infinite-weight, append:
     //   u2 = u
     // where:
     //   u2 := linkTo(u)
     //   map[u] := u2
     for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
       // Iterate over the top-level src vars.  Don't bother to dig into
       // e.g. MemOperands because their vars should all be infinite-weight.
       // (This assumption would need to change if the pass were done
       // pre-lowering.)
       if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
         const bool SrcIsAllocable = isAllocable(SrcVar);
         if (SrcIsAllocable) {
           Variable *OldMapped = VarMap.get(SrcVar);
           if (isUnconditionallyExecuted()) {
             if (!VarMap.isInstLastUseOfVar(SrcVar, Instr)) {
               Variable *NewMapped = VarMap.makeLinked(SrcVar);
               Inst *Mov = Target->createLoweredMove(NewMapped, OldMapped);
               Node->getInsts().insert(IterCur, Mov);
             }
           }
           Instr->replaceSource(i, OldMapped);
         }
       }
     }
     // Transformation of Dest is the same as the "v=t:inf" case above.
     if (DestIsAllocable && VarMap.isDestUsedInBlock(Dest)) {
       if (isUnconditionallyExecuted()) {
         Variable *NewMapped = VarMap.makeLinked(Dest);
         Inst *Mov = Target->createLoweredMove(NewMapped, Dest);
         Node->getInsts().insert(IterNext, Mov);
       } else {
         Variable *OldMapped = VarMap.get(Dest);
         Inst *Mov = Target->createLoweredMove(OldMapped, Dest);
         Mov->setDestRedefined();
         Node->getInsts().insert(IterNext, Mov);
       }
     }
     return true;
   }

 private:
   TargetLowering *Target;
   CfgNode *Node = nullptr;
   Inst *Instr = nullptr;
   Variable *Dest = nullptr;
   InstList::iterator IterCur;
   InstList::iterator IterNext;
   bool ShouldSkipRemainingInstructions = false;
   VariableMap VarMap;
   CfgVector<Variable *> LinkedToFixups;
   /// WaitingForLabel and WaitingForBranchTo are for tracking intra-block
   /// control flow.
   const Inst *WaitingForLabel = nullptr;
   const Inst *WaitingForBranchTo = nullptr;
 };

 } // end of anonymous namespace

 /// Within each basic block, rewrite Variable references in terms of chained
 /// copies of the original Variable.  For example:
 ///   A = B + C
 /// might be rewritten as:
 ///   B1 = B
 ///   C1 = C
 ///   A = B + C
 ///   A1 = A
 /// and then:
 ///   D = A + B
 /// might be rewritten as:
 ///   A2 = A1
 ///   B2 = B1
 ///   D = A1 + B1
 ///   D1 = D
 ///
 /// The purpose is to present the linear-scan register allocator with smaller
 /// live ranges, to help mitigate its "all or nothing" allocation strategy,
 /// while counting on its preference mechanism to keep the split versions in the
 /// same register when possible.
 ///
 /// When creating new Variables, A2 is linked to A1 which is linked to A, and
 /// similar for the other Variable linked-to chains.  Rewrites apply only to
 /// Variables where mayHaveReg() is true.
 ///
 /// At code emission time, redundant linked-to stack assignments will be
 /// recognized and elided.  To illustrate using the above example, if A1 gets a
 /// register but A and A2 are on the stack, the "A2=A1" store instruction is
 /// redundant since A and A2 share the same stack slot and A1 originated from A.
 ///
 /// Simple assignment instructions are rewritten slightly differently, to take
 /// maximal advantage of Variables known to have registers.
 ///
 /// In general, there may be several valid ways to rewrite an instruction: add
 /// the new assignment instruction either before or after the original
 /// instruction, and rewrite the original instruction with either the old or the
 /// new variable mapping.  We try to pick a strategy most likely to avoid
 /// potential performance problems.  For example, try to avoid storing to the
 /// stack and then immediately reloading from the same location.  One
 /// consequence is that code might be generated that loads a register from a
 /// stack location, followed almost immediately by another use of the same stack
 /// location, despite its value already being available in a register as a
 /// result of the first instruction.  However, the performance impact here is
 /// likely to be negligible, and a simple availability peephole optimization
 /// could clean it up.
 ///
 /// This pass potentially adds a lot of new instructions and variables, and as
 /// such there are compile-time performance concerns, particularly with liveness
 /// analysis and register allocation.  Note that for liveness analysis, the new
 /// variables have single-block liveness, so they don't increase the size of the
 /// liveness bit vectors that need to be merged across blocks.  As a result, the
 /// performance impact is likely to be linearly related to the number of new
 /// instructions, rather than number of new variables times number of blocks
 /// which would be the case if they were multi-block variables.
 void splitBlockLocalVariables(Cfg *Func) {
   if (!getFlags().getSplitLocalVars())
     return;
   TimerMarker _(TimerStack::TT_splitLocalVars, Func);
   LocalVariableSplitter Splitter(Func);
   // TODO(stichnot): Fix this mechanism for LinkedTo variables and stack slot
   // assignment.
   //
   // To work around shortcomings with stack frame mapping, we want to arrange
   // LinkedTo structure such that within one block, the LinkedTo structure
   // leading to a root forms a list, not a tree.  A LinkedTo root can have
   // multiple children linking to it, but only one per block.  Furthermore,
   // because stack slot mapping processes variables in numerical order, the
   // LinkedTo chain needs to be ordered such that when A->getLinkedTo() == B,
   // then A->getIndex() > B->getIndex().
   //
   // To effect this, while processing a block we keep track of preexisting
   // LinkedTo relationships via the LinkedToFixups vector, and at the end of the
   // block we splice them in such that the block has a single chain for each
   // root, ordered by getIndex() value.
   CfgVector<Variable *> LinkedToFixups;
   for (CfgNode *Node : Func->getNodes()) {
     // Clear the VarMap and LinkedToFixups at the start of every block.
     LinkedToFixups.clear();
     Splitter.setNode(Node);
     auto &Insts = Node->getInsts();
     auto Iter = Insts.begin();
     auto IterEnd = Insts.end();
     // TODO(stichnot): Figure out why Phi processing usually degrades
     // performance.  Disable for now.
     static constexpr bool ProcessPhis = false;
     if (ProcessPhis) {
       for (Inst &Instr : Node->getPhis()) {
         if (Instr.isDeleted())
           continue;
         Splitter.setInst(&Instr, Iter, Iter);
         Splitter.handlePhi();
       }
     }
     InstList::iterator NextIter;
     for (; Iter != IterEnd && !Splitter.shouldSkipRemainingInstructions();
          Iter = NextIter) {
       NextIter = Iter;
       ++NextIter;
       Inst *Instr = iteratorToInst(Iter);
       if (Instr->isDeleted())
         continue;
       Splitter.setInst(Instr, Iter, NextIter);

       // Before doing any transformations, take care of the bookkeeping for
       // intra-block branching.
       //
       // This is tricky because the transformation for one instruction may
       // depend on a transformation for a previous instruction, but if that
       // previous instruction is not dynamically executed due to intra-block
       // control flow, it may lead to an inconsistent state and incorrect code.
       //
       // We want to handle some simple cases, and reject some others:
       //
       // 1. For something like a select instruction, we could have:
       //   test cond
       //   dest = src_false
       //   branch conditionally to label
       //   dest = src_true
       //   label:
       //
       // Between the conditional branch and the label, we need to treat dest and
       // src variables specially, specifically not creating any new state.
       //
       // 2. Some 64-bit atomic instructions may be lowered to a loop:
       //   label:
       //   ...
       //   branch conditionally to label
       //
       // No special treatment is needed, but it's worth tracking so that case #1
       // above can also be handled.
       //
       // 3. Advanced switch lowering can create really complex intra-block
       // control flow, so when we recognize this, we should just stop splitting
       // for the remainder of the block (which isn't much since a switch
       // instruction is a terminator).
       //
       // 4. Other complex lowering, e.g. an i64 icmp on a 32-bit architecture,
       // can result in an if/then/else like structure with two labels.  One
       // possibility would be to suspect splitting for the remainder of the
       // lowered instruction, and then resume for the remainder of the block,
       // but since we don't have high-level instruction markers, we might as
       // well just stop splitting for the remainder of the block.
       if (Splitter.handleLabel())
         continue;
       if (Splitter.handleIntraBlockBranch())
         continue;
       if (Splitter.handleUnwantedInstruction())
         continue;

       // Intra-block bookkeeping is complete, now do the transformations.

       // Determine the transformation based on the kind of instruction, and
       // whether its Variables are infinite-weight.  New instructions can be
       // inserted before the current instruction via Iter, or after the current
       // instruction via NextIter.
       if (Splitter.handleSimpleVarAssign())
         continue;
       if (Splitter.handleGeneralInst())
         continue;
     }
     Splitter.finalizeNode();
   }

   Func->dump("After splitting local variables");
 }

 } // end of namespace Ice