third_party/subzero/src/IceTargetLowering.cpp - SwiftShader - Git at Google

 //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===//
 //
 //                        The Subzero Code Generator
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 ///
 /// \file
 /// \brief Implements the skeleton of the TargetLowering class.
 ///
 /// Specifically this invokes the appropriate lowering method for a given
 /// instruction kind and driving global register allocation. It also implements
 /// the non-deleted instruction iteration in LoweringContext.
 ///
 //===----------------------------------------------------------------------===//

 #include "IceTargetLowering.h"

 #include "IceBitVector.h"
 #include "IceCfg.h" // setError()
 #include "IceCfgNode.h"
 #include "IceGlobalContext.h"
 #include "IceGlobalInits.h"
 #include "IceInstVarIter.h"
 #include "IceLiveness.h"
 #include "IceOperand.h"
 #include "IceRegAlloc.h"

 #include <string>
 #include <vector>

 #define TARGET_LOWERING_CLASS_FOR(t) Target_##t

 // We prevent target-specific implementation details from leaking outside their
 // implementations by forbidding #include of target-specific header files
 // anywhere outside their own files. To create target-specific objects
 // (TargetLowering, TargetDataLowering, and TargetHeaderLowering) we use the
 // following named constructors. For reference, each target Foo needs to
 // implement the following named constructors and initializer:
 //
 // namespace Foo {
 //   unique_ptr<Ice::TargetLowering> createTargetLowering(Ice::Cfg *);
 //   unique_ptr<Ice::TargetDataLowering>
 //       createTargetDataLowering(Ice::GlobalContext*);
 //   unique_ptr<Ice::TargetHeaderLowering>
 //       createTargetHeaderLowering(Ice::GlobalContext *);
 //   void staticInit(::Ice::GlobalContext *);
 // }
 #define SUBZERO_TARGET(X)                                                      \
   namespace X {                                                                \
   std::unique_ptr<::Ice::TargetLowering>                                       \
   createTargetLowering(::Ice::Cfg *Func);                                      \
   std::unique_ptr<::Ice::TargetDataLowering>                                   \
   createTargetDataLowering(::Ice::GlobalContext *Ctx);                         \
   std::unique_ptr<::Ice::TargetHeaderLowering>                                 \
   createTargetHeaderLowering(::Ice::GlobalContext *Ctx);                       \
   void staticInit(::Ice::GlobalContext *Ctx);                                  \
   bool shouldBePooled(const ::Ice::Constant *C);                               \
   ::Ice::Type getPointerType();                                                \
   } // end of namespace X
 #include "SZTargets.def"
 #undef SUBZERO_TARGET

 namespace Ice {
 void LoweringContext::init(CfgNode *N) {
   Node = N;
   End = getNode()->getInsts().end();
   rewind();
   advanceForward(Next);
 }

 void LoweringContext::rewind() {
   Begin = getNode()->getInsts().begin();
   Cur = Begin;
   skipDeleted(Cur);
   Next = Cur;
   availabilityReset();
 }

 void LoweringContext::insert(Inst *Instr) {
   getNode()->getInsts().insert(Next, Instr);
   LastInserted = Instr;
 }

 void LoweringContext::skipDeleted(InstList::iterator &I) const {
   while (I != End && I->isDeleted())
     ++I;
 }

 void LoweringContext::advanceForward(InstList::iterator &I) const {
   if (I != End) {
     ++I;
     skipDeleted(I);
   }
 }

 Inst *LoweringContext::getLastInserted() const {
   assert(LastInserted);
   return LastInserted;
 }

 void LoweringContext::availabilityReset() {
   LastDest = nullptr;
   LastSrc = nullptr;
 }

 void LoweringContext::availabilityUpdate() {
   availabilityReset();
   Inst *Instr = LastInserted;
   if (Instr == nullptr)
     return;
   if (!Instr->isVarAssign())
     return;
   // Since isVarAssign() is true, the source operand must be a Variable.
   LastDest = Instr->getDest();
   LastSrc = llvm::cast<Variable>(Instr->getSrc(0));
 }

 Variable *LoweringContext::availabilityGet(Operand *Src) const {
   assert(Src);
   if (Src == LastDest)
     return LastSrc;
   return nullptr;
 }

 namespace {

 void printRegisterSet(Ostream &Str, const SmallBitVector &Bitset,
                       std::function<std::string(RegNumT)> getRegName,
                       const std::string &LineIndentString) {
   constexpr size_t RegistersPerLine = 16;
   size_t Count = 0;
   for (RegNumT RegNum : RegNumBVIter(Bitset)) {
     if (Count == 0) {
       Str << LineIndentString;
     } else {
       Str << ",";
     }
     if (Count > 0 && Count % RegistersPerLine == 0)
       Str << "\n" << LineIndentString;
     ++Count;
     Str << getRegName(RegNum);
   }
   if (Count)
     Str << "\n";
 }

 // Splits "<class>:<reg>" into "<class>" plus "<reg>".  If there is no <class>
 // component, the result is "" plus "<reg>".
 void splitToClassAndName(const std::string &RegName, std::string *SplitRegClass,
                          std::string *SplitRegName) {
   constexpr const char Separator[] = ":";
   constexpr size_t SeparatorWidth = llvm::array_lengthof(Separator) - 1;
   size_t Pos = RegName.find(Separator);
   if (Pos == std::string::npos) {
     *SplitRegClass = "";
     *SplitRegName = RegName;
   } else {
     *SplitRegClass = RegName.substr(0, Pos);
     *SplitRegName = RegName.substr(Pos + SeparatorWidth);
   }
 }

 LLVM_ATTRIBUTE_NORETURN void badTargetFatalError(TargetArch Target) {
   llvm::report_fatal_error("Unsupported target: " +
                            std::string(targetArchString(Target)));
 }

 } // end of anonymous namespace

 void TargetLowering::filterTypeToRegisterSet(
     GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[],
     size_t TypeToRegisterSetSize,
     std::function<std::string(RegNumT)> getRegName,
     std::function<const char *(RegClass)> getRegClassName) {
   std::vector<SmallBitVector> UseSet(TypeToRegisterSetSize,
                                      SmallBitVector(NumRegs));
   std::vector<SmallBitVector> ExcludeSet(TypeToRegisterSetSize,
                                          SmallBitVector(NumRegs));

   std::unordered_map<std::string, RegNumT> RegNameToIndex;
   for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) {
     const auto RegNum = RegNumT::fromInt(RegIndex);
     RegNameToIndex[getRegName(RegNum)] = RegNum;
   }

   std::vector<std::string> BadRegNames;

   // The processRegList function iterates across the RegNames vector.  Each
   // entry in the vector is a string of the form "<reg>" or "<class>:<reg>".
   // The register class and register number are computed, and the corresponding
   // bit is set in RegSet[][].  If "<class>:" is missing, then the bit is set
   // for all classes.
   auto processRegList = [&](const std::vector<std::string> &RegNames,
                             std::vector<SmallBitVector> &RegSet) {
     for (const std::string &RegClassAndName : RegNames) {
       std::string RClass;
       std::string RName;
       splitToClassAndName(RegClassAndName, &RClass, &RName);
       if (!RegNameToIndex.count(RName)) {
         BadRegNames.push_back(RName);
         continue;
       }
       const int32_t RegIndex = RegNameToIndex.at(RName);
       for (SizeT TypeIndex = 0; TypeIndex < TypeToRegisterSetSize;
            ++TypeIndex) {
         if (RClass.empty() ||
             RClass == getRegClassName(static_cast<RegClass>(TypeIndex))) {
           RegSet[TypeIndex][RegIndex] = TypeToRegisterSet[TypeIndex][RegIndex];
         }
       }
     }
   };

   processRegList(getFlags().getUseRestrictedRegisters(), UseSet);
   processRegList(getFlags().getExcludedRegisters(), ExcludeSet);

   if (!BadRegNames.empty()) {
     std::string Buffer;
     llvm::raw_string_ostream StrBuf(Buffer);
     StrBuf << "Unrecognized use/exclude registers:";
     for (const auto &RegName : BadRegNames)
       StrBuf << " " << RegName;
     llvm::report_fatal_error(StrBuf.str());
   }

   // Apply filters.
   for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
     SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex];
     SmallBitVector *UseBitSet = &UseSet[TypeIndex];
     SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex];
     if (UseBitSet->any())
       *TypeBitSet = *UseBitSet;
     (*TypeBitSet).reset(*ExcludeBitSet);
   }

   // Display filtered register sets, if requested.
   if (BuildDefs::dump() && NumRegs &&
       (getFlags().getVerbose() & IceV_AvailableRegs)) {
     Ostream &Str = Ctx->getStrDump();
     const std::string Indent = "  ";
     const std::string IndentTwice = Indent + Indent;
     Str << "Registers available for register allocation:\n";
     for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
       Str << Indent << getRegClassName(static_cast<RegClass>(TypeIndex))
           << ":\n";
       printRegisterSet(Str, TypeToRegisterSet[TypeIndex], getRegName,
                        IndentTwice);
     }
     Str << "\n";
   }
 }

 std::unique_ptr<TargetLowering>
 TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
   switch (Target) {
   default:
     badTargetFatalError(Target);
 #define SUBZERO_TARGET(X)                                                      \
   case TARGET_LOWERING_CLASS_FOR(X):                                           \
     return ::X::createTargetLowering(Func);
 #include "SZTargets.def"
 #undef SUBZERO_TARGET
   }
 }

 void TargetLowering::staticInit(GlobalContext *Ctx) {
   const TargetArch Target = getFlags().getTargetArch();
   // Call the specified target's static initializer.
   switch (Target) {
   default:
     badTargetFatalError(Target);
 #define SUBZERO_TARGET(X)                                                      \
   case TARGET_LOWERING_CLASS_FOR(X): {                                         \
     static bool InitGuard##X = false;                                          \
     if (InitGuard##X) {                                                        \
       return;                                                                  \
     }                                                                          \
     InitGuard##X = true;                                                       \
     ::X::staticInit(Ctx);                                                      \
   } break;
 #include "SZTargets.def"
 #undef SUBZERO_TARGET
   }
 }

 bool TargetLowering::shouldBePooled(const Constant *C) {
   const TargetArch Target = getFlags().getTargetArch();
   switch (Target) {
   default:
     return false;
 #define SUBZERO_TARGET(X)                                                      \
   case TARGET_LOWERING_CLASS_FOR(X):                                           \
     return ::X::shouldBePooled(C);
 #include "SZTargets.def"
 #undef SUBZERO_TARGET
   }
 }

 ::Ice::Type TargetLowering::getPointerType() {
   const TargetArch Target = getFlags().getTargetArch();
   switch (Target) {
   default:
     return ::Ice::IceType_void;
 #define SUBZERO_TARGET(X)                                                      \
   case TARGET_LOWERING_CLASS_FOR(X):                                           \
     return ::X::getPointerType();
 #include "SZTargets.def"
 #undef SUBZERO_TARGET
   }
 }

 TargetLowering::SandboxType
 TargetLowering::determineSandboxTypeFromFlags(const ClFlags &Flags) {
   assert(!Flags.getUseSandboxing() || !Flags.getUseNonsfi());
   if (Flags.getUseNonsfi()) {
     return TargetLowering::ST_Nonsfi;
   }
   if (Flags.getUseSandboxing()) {
     return TargetLowering::ST_NaCl;
   }
   return TargetLowering::ST_None;
 }

 TargetLowering::TargetLowering(Cfg *Func)
     : Func(Func), Ctx(Func->getContext()),
       SandboxingType(determineSandboxTypeFromFlags(getFlags())) {}

 TargetLowering::AutoBundle::AutoBundle(TargetLowering *Target,
                                        InstBundleLock::Option Option)
     : Target(Target), NeedSandboxing(getFlags().getUseSandboxing()) {
   assert(!Target->AutoBundling);
   Target->AutoBundling = true;
   if (NeedSandboxing) {
     Target->_bundle_lock(Option);
   }
 }

 TargetLowering::AutoBundle::~AutoBundle() {
   assert(Target->AutoBundling);
   Target->AutoBundling = false;
   if (NeedSandboxing) {
     Target->_bundle_unlock();
   }
 }

 void TargetLowering::genTargetHelperCalls() {
   TimerMarker T(TimerStack::TT_genHelpers, Func);
   Utils::BoolFlagSaver _(GeneratingTargetHelpers, true);
   for (CfgNode *Node : Func->getNodes()) {
     Context.init(Node);
     while (!Context.atEnd()) {
       PostIncrLoweringContext _(Context);
       genTargetHelperCallFor(iteratorToInst(Context.getCur()));
     }
   }
 }

 void TargetLowering::doAddressOpt() {
   doAddressOptOther();
   if (llvm::isa<InstLoad>(*Context.getCur()))
     doAddressOptLoad();
   else if (llvm::isa<InstStore>(*Context.getCur()))
     doAddressOptStore();
   else if (auto *Intrinsic =
                llvm::dyn_cast<InstIntrinsic>(&*Context.getCur())) {
     if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::LoadSubVector)
       doAddressOptLoadSubVector();
     else if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::StoreSubVector)
       doAddressOptStoreSubVector();
   }
   Context.advanceCur();
   Context.advanceNext();
 }

 void TargetLowering::doNopInsertion(RandomNumberGenerator &RNG) {
   Inst *I = iteratorToInst(Context.getCur());
   bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) ||
                     llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() ||
                     I->isDeleted();
   if (!ShouldSkip) {
     int Probability = getFlags().getNopProbabilityAsPercentage();
     for (int I = 0; I < getFlags().getMaxNopsPerInstruction(); ++I) {
       randomlyInsertNop(Probability / 100.0, RNG);
     }
   }
 }

 // Lowers a single instruction according to the information in Context, by
 // checking the Context.Cur instruction kind and calling the appropriate
 // lowering method. The lowering method should insert target instructions at
 // the Cur.Next insertion point, and should not delete the Context.Cur
 // instruction or advance Context.Cur.
 //
 // The lowering method may look ahead in the instruction stream as desired, and
 // lower additional instructions in conjunction with the current one, for
 // example fusing a compare and branch. If it does, it should advance
 // Context.Cur to point to the next non-deleted instruction to process, and it
 // should delete any additional instructions it consumes.
 void TargetLowering::lower() {
   assert(!Context.atEnd());
   Inst *Instr = iteratorToInst(Context.getCur());
   Instr->deleteIfDead();
   if (!Instr->isDeleted() && !llvm::isa<InstFakeDef>(Instr) &&
       !llvm::isa<InstFakeUse>(Instr)) {
     // Mark the current instruction as deleted before lowering, otherwise the
     // Dest variable will likely get marked as non-SSA. See
     // Variable::setDefinition(). However, just pass-through FakeDef and
     // FakeUse instructions that might have been inserted prior to lowering.
     Instr->setDeleted();
     switch (Instr->getKind()) {
     case Inst::Alloca:
       lowerAlloca(llvm::cast<InstAlloca>(Instr));
       break;
     case Inst::Arithmetic:
       lowerArithmetic(llvm::cast<InstArithmetic>(Instr));
       break;
     case Inst::Assign:
       lowerAssign(llvm::cast<InstAssign>(Instr));
       break;
     case Inst::Br:
       lowerBr(llvm::cast<InstBr>(Instr));
       break;
     case Inst::Breakpoint:
       lowerBreakpoint(llvm::cast<InstBreakpoint>(Instr));
       break;
     case Inst::Call:
       lowerCall(llvm::cast<InstCall>(Instr));
       break;
     case Inst::Cast:
       lowerCast(llvm::cast<InstCast>(Instr));
       break;
     case Inst::ExtractElement:
       lowerExtractElement(llvm::cast<InstExtractElement>(Instr));
       break;
     case Inst::Fcmp:
       lowerFcmp(llvm::cast<InstFcmp>(Instr));
       break;
     case Inst::Icmp:
       lowerIcmp(llvm::cast<InstIcmp>(Instr));
       break;
     case Inst::InsertElement:
       lowerInsertElement(llvm::cast<InstInsertElement>(Instr));
       break;
     case Inst::Intrinsic: {
       auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
       if (Intrinsic->getIntrinsicInfo().ReturnsTwice)
         setCallsReturnsTwice(true);
       lowerIntrinsic(Intrinsic);
       break;
     }
     case Inst::Load:
       lowerLoad(llvm::cast<InstLoad>(Instr));
       break;
     case Inst::Phi:
       lowerPhi(llvm::cast<InstPhi>(Instr));
       break;
     case Inst::Ret:
       lowerRet(llvm::cast<InstRet>(Instr));
       break;
     case Inst::Select:
       lowerSelect(llvm::cast<InstSelect>(Instr));
       break;
     case Inst::ShuffleVector:
       lowerShuffleVector(llvm::cast<InstShuffleVector>(Instr));
       break;
     case Inst::Store:
       lowerStore(llvm::cast<InstStore>(Instr));
       break;
     case Inst::Switch:
       lowerSwitch(llvm::cast<InstSwitch>(Instr));
       break;
     case Inst::Unreachable:
       lowerUnreachable(llvm::cast<InstUnreachable>(Instr));
       break;
     default:
       lowerOther(Instr);
       break;
     }

     postLower();
   }

   Context.advanceCur();
   Context.advanceNext();
 }

 void TargetLowering::lowerInst(CfgNode *Node, InstList::iterator Next,
                                InstHighLevel *Instr) {
   // TODO(stichnot): Consider modifying the design/implementation to avoid
   // multiple init() calls when using lowerInst() to lower several instructions
   // in the same node.
   Context.init(Node);
   Context.setNext(Next);
   Context.insert(Instr);
   --Next;
   assert(iteratorToInst(Next) == Instr);
   Context.setCur(Next);
   lower();
 }

 void TargetLowering::lowerOther(const Inst *Instr) {
   (void)Instr;
   Func->setError("Can't lower unsupported instruction type");
 }

 // Drives register allocation, allowing all physical registers (except perhaps
 // for the frame pointer) to be allocated. This set of registers could
 // potentially be parameterized if we want to restrict registers e.g. for
 // performance testing.
 void TargetLowering::regAlloc(RegAllocKind Kind) {
   TimerMarker T(TimerStack::TT_regAlloc, Func);
   LinearScan LinearScan(Func);
   RegSetMask RegInclude = RegSet_None;
   RegSetMask RegExclude = RegSet_None;
   RegInclude |= RegSet_CallerSave;
   RegInclude |= RegSet_CalleeSave;
   if (hasFramePointer())
     RegExclude |= RegSet_FramePointer;
   SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
   bool Repeat = (Kind == RAK_Global && getFlags().getRepeatRegAlloc());
   CfgSet<Variable *> EmptySet;
   do {
     LinearScan.init(Kind, EmptySet);
     LinearScan.scan(RegMask, getFlags().getRandomizeRegisterAllocation());
     if (!LinearScan.hasEvictions())
       Repeat = false;
     Kind = RAK_SecondChance;
   } while (Repeat);
   // TODO(stichnot): Run the register allocator one more time to do stack slot
   // coalescing.  The idea would be to initialize the Unhandled list with the
   // set of Variables that have no register and a non-empty live range, and
   // model an infinite number of registers.  Maybe use the register aliasing
   // mechanism to get better packing of narrower slots.
   if (getFlags().getSplitGlobalVars())
     postRegallocSplitting(RegMask);
 }

 namespace {
 CfgVector<Inst *> getInstructionsInRange(CfgNode *Node, InstNumberT Start,
                                          InstNumberT End) {
   CfgVector<Inst *> Result;
   bool Started = false;
   auto Process = [&](InstList &Insts) {
     for (auto &Instr : Insts) {
       if (Instr.isDeleted()) {
         continue;
       }
       if (Instr.getNumber() == Start) {
         Started = true;
       }
       if (Started) {
         Result.emplace_back(&Instr);
       }
       if (Instr.getNumber() == End) {
         break;
       }
     }
   };
   Process(Node->getPhis());
   Process(Node->getInsts());
   // TODO(manasijm): Investigate why checking >= End significantly changes
   // output. Should not happen when renumbering produces monotonically
   // increasing instruction numbers and live ranges begin and end on non-deleted
   // instructions.
   return Result;
 }
 } // namespace

 void TargetLowering::postRegallocSplitting(const SmallBitVector &RegMask) {
   // Splits the live ranges of global(/multi block) variables and runs the
   // register allocator to find registers for as many of the new variables as
   // possible.
   // TODO(manasijm): Merge the small liveranges back into multi-block ones when
   // the variables get the same register. This will reduce the amount of new
   // instructions inserted. This might involve a full dataflow analysis.
   // Also, modify the preference mechanism in the register allocator to match.

   TimerMarker _(TimerStack::TT_splitGlobalVars, Func);
   CfgSet<Variable *> SplitCandidates;

   // Find variables that do not have registers but are allowed to. Also skip
   // variables with single segment live ranges as they are not split further in
   // this function.
   for (Variable *Var : Func->getVariables()) {
     if (!Var->mustNotHaveReg() && !Var->hasReg()) {
       if (Var->getLiveRange().getNumSegments() > 1)
         SplitCandidates.insert(Var);
     }
   }
   if (SplitCandidates.empty())
     return;

   CfgSet<Variable *> ExtraVars;

   struct UseInfo {
     Variable *Replacing = nullptr;
     Inst *FirstUse = nullptr;
     Inst *LastDef = nullptr;
     SizeT UseCount = 0;
   };
   CfgUnorderedMap<Variable *, UseInfo> VarInfo;
   // Split the live ranges of the viable variables by node.
   // Compute metadata (UseInfo) for each of the resulting variables.
   for (auto *Var : SplitCandidates) {
     for (auto &Segment : Var->getLiveRange().getSegments()) {
       UseInfo Info;
       Info.Replacing = Var;
       auto *Node = Var->getLiveRange().getNodeForSegment(Segment.first);

       for (auto *Instr :
            getInstructionsInRange(Node, Segment.first, Segment.second)) {
         for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
           // It's safe to iterate over the top-level src operands rather than
           // using FOREACH_VAR_IN_INST(), because any variables inside e.g.
           // mem operands should already have registers.
           if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
             if (Var == Info.Replacing) {
               if (Info.FirstUse == nullptr && !llvm::isa<InstPhi>(Instr)) {
                 Info.FirstUse = Instr;
               }
               Info.UseCount++;
             }
           }
         }
         if (Instr->getDest() == Info.Replacing && !llvm::isa<InstPhi>(Instr)) {
           Info.LastDef = Instr;
         }
       }

       static constexpr SizeT MinUseThreshold = 3;
       // Skip if variable has less than `MinUseThreshold` uses in the segment.
       if (Info.UseCount < MinUseThreshold)
         continue;

       if (!Info.FirstUse && !Info.LastDef) {
         continue;
       }

       LiveRange LR;
       LR.addSegment(Segment);
       Variable *NewVar = Func->makeVariable(Var->getType());

       NewVar->setLiveRange(LR);

       VarInfo[NewVar] = Info;

       ExtraVars.insert(NewVar);
     }
   }
   // Run the register allocator with all these new variables included
   LinearScan RegAlloc(Func);
   RegAlloc.init(RAK_Global, SplitCandidates);
   RegAlloc.scan(RegMask, getFlags().getRandomizeRegisterAllocation());

   // Modify the Cfg to use the new variables that now have registers.
   for (auto *ExtraVar : ExtraVars) {
     if (!ExtraVar->hasReg()) {
       continue;
     }

     auto &Info = VarInfo[ExtraVar];

     assert(ExtraVar->getLiveRange().getSegments().size() == 1);
     auto Segment = ExtraVar->getLiveRange().getSegments()[0];

     auto *Node =
         Info.Replacing->getLiveRange().getNodeForSegment(Segment.first);

     auto RelevantInsts =
         getInstructionsInRange(Node, Segment.first, Segment.second);

     if (RelevantInsts.empty())
       continue;

     // Replace old variables
     for (auto *Instr : RelevantInsts) {
       if (llvm::isa<InstPhi>(Instr))
         continue;
       // TODO(manasijm): Figure out how to safely enable replacing phi dest
       // variables. The issue is that we can not insert low level mov
       // instructions into the PhiList.
       for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
         // FOREACH_VAR_IN_INST() not needed. Same logic as above.
         if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
           if (Var == Info.Replacing) {
             Instr->replaceSource(i, ExtraVar);
           }
         }
       }
       if (Instr->getDest() == Info.Replacing) {
         Instr->replaceDest(ExtraVar);
       }
     }

     assert(Info.FirstUse != Info.LastDef);
     assert(Info.FirstUse || Info.LastDef);

     // Insert spill code
     if (Info.FirstUse != nullptr) {
       auto *NewInst =
           Func->getTarget()->createLoweredMove(ExtraVar, Info.Replacing);
       Node->getInsts().insert(instToIterator(Info.FirstUse), NewInst);
     }
     if (Info.LastDef != nullptr) {
       auto *NewInst =
           Func->getTarget()->createLoweredMove(Info.Replacing, ExtraVar);
       Node->getInsts().insertAfter(instToIterator(Info.LastDef), NewInst);
     }
   }
 }

 void TargetLowering::markRedefinitions() {
   // Find (non-SSA) instructions where the Dest variable appears in some source
   // operand, and set the IsDestRedefined flag to keep liveness analysis
   // consistent.
   for (auto Instr = Context.getCur(), E = Context.getNext(); Instr != E;
        ++Instr) {
     if (Instr->isDeleted())
       continue;
     Variable *Dest = Instr->getDest();
     if (Dest == nullptr)
       continue;
     FOREACH_VAR_IN_INST(Var, *Instr) {
       if (Var == Dest) {
         Instr->setDestRedefined();
         break;
       }
     }
   }
 }

 void TargetLowering::addFakeDefUses(const Inst *Instr) {
   FOREACH_VAR_IN_INST(Var, *Instr) {
     if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) {
       Context.insert<InstFakeUse>(Var64->getLo());
       Context.insert<InstFakeUse>(Var64->getHi());
     } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Var)) {
       for (Variable *Var : VarVec->getContainers()) {
         Context.insert<InstFakeUse>(Var);
       }
     } else {
       Context.insert<InstFakeUse>(Var);
     }
   }
   Variable *Dest = Instr->getDest();
   if (Dest == nullptr)
     return;
   if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) {
     Context.insert<InstFakeDef>(Var64->getLo());
     Context.insert<InstFakeDef>(Var64->getHi());
   } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Dest)) {
     for (Variable *Var : VarVec->getContainers()) {
       Context.insert<InstFakeDef>(Var);
     }
   } else {
     Context.insert<InstFakeDef>(Dest);
   }
 }

 void TargetLowering::sortVarsByAlignment(VarList &Dest,
                                          const VarList &Source) const {
   Dest = Source;
   // Instead of std::sort, we could do a bucket sort with log2(alignment) as
   // the buckets, if performance is an issue.
   std::sort(Dest.begin(), Dest.end(),
             [this](const Variable *V1, const Variable *V2) {
               const size_t WidthV1 = typeWidthInBytesOnStack(V1->getType());
               const size_t WidthV2 = typeWidthInBytesOnStack(V2->getType());
               if (WidthV1 == WidthV2)
                 return V1->getIndex() < V2->getIndex();
               return WidthV1 > WidthV2;
             });
 }

 void TargetLowering::getVarStackSlotParams(
     VarList &SortedSpilledVariables, SmallBitVector &RegsUsed,
     size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
     uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
     std::function<bool(Variable *)> TargetVarHook) {
   const VariablesMetadata *VMetadata = Func->getVMetadata();
   BitVector IsVarReferenced(Func->getNumVariables());
   for (CfgNode *Node : Func->getNodes()) {
     for (Inst &Instr : Node->getInsts()) {
       if (Instr.isDeleted())
         continue;
       if (const Variable *Var = Instr.getDest())
         IsVarReferenced[Var->getIndex()] = true;
       FOREACH_VAR_IN_INST(Var, Instr) {
         IsVarReferenced[Var->getIndex()] = true;
       }
     }
   }

   // If SimpleCoalescing is false, each variable without a register gets its
   // own unique stack slot, which leads to large stack frames. If
   // SimpleCoalescing is true, then each "global" variable without a register
   // gets its own slot, but "local" variable slots are reused across basic
   // blocks. E.g., if A and B are local to block 1 and C is local to block 2,
   // then C may share a slot with A or B.
   //
   // We cannot coalesce stack slots if this function calls a "returns twice"
   // function. In that case, basic blocks may be revisited, and variables local
   // to those basic blocks are actually live until after the called function
   // returns a second time.
   const bool SimpleCoalescing = !callsReturnsTwice();

   CfgVector<size_t> LocalsSize(Func->getNumNodes());
   const VarList &Variables = Func->getVariables();
   VarList SpilledVariables;
   for (Variable *Var : Variables) {
     if (Var->hasReg()) {
       // Don't consider a rematerializable variable to be an actual register use
       // (specifically of the frame pointer).  Otherwise, the prolog may decide
       // to save the frame pointer twice - once because of the explicit need for
       // a frame pointer, and once because of an active use of a callee-save
       // register.
       if (!Var->isRematerializable())
         RegsUsed[Var->getRegNum()] = true;
       continue;
     }
     // An argument either does not need a stack slot (if passed in a register)
     // or already has one (if passed on the stack).
     if (Var->getIsArg()) {
       if (!Var->hasReg()) {
         assert(!Var->hasStackOffset());
         Var->setHasStackOffset();
       }
       continue;
     }
     // An unreferenced variable doesn't need a stack slot.
     if (!IsVarReferenced[Var->getIndex()])
       continue;
     // Check a target-specific variable (it may end up sharing stack slots) and
     // not need accounting here.
     if (TargetVarHook(Var))
       continue;
     assert(!Var->hasStackOffset());
     Var->setHasStackOffset();
     SpilledVariables.push_back(Var);
   }

   SortedSpilledVariables.reserve(SpilledVariables.size());
   sortVarsByAlignment(SortedSpilledVariables, SpilledVariables);

   for (Variable *Var : SortedSpilledVariables) {
     size_t Increment = typeWidthInBytesOnStack(Var->getType());
     // We have sorted by alignment, so the first variable we encounter that is
     // located in each area determines the max alignment for the area.
     if (!*SpillAreaAlignmentBytes)
       *SpillAreaAlignmentBytes = Increment;
     if (SimpleCoalescing && VMetadata->isTracked(Var)) {
       if (VMetadata->isMultiBlock(Var)) {
         *GlobalsSize += Increment;
       } else {
         SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
         LocalsSize[NodeIndex] += Increment;
         if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes)
           *SpillAreaSizeBytes = LocalsSize[NodeIndex];
         if (!*LocalsSlotsAlignmentBytes)
           *LocalsSlotsAlignmentBytes = Increment;
       }
     } else {
       *SpillAreaSizeBytes += Increment;
     }
   }
   // For testing legalization of large stack offsets on targets with limited
   // offset bits in instruction encodings, add some padding.
   *SpillAreaSizeBytes += getFlags().getTestStackExtra();
 }

 void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset,
                                           uint32_t SpillAreaAlignmentBytes,
                                           size_t GlobalsSize,
                                           uint32_t LocalsSlotsAlignmentBytes,
                                           uint32_t *SpillAreaPaddingBytes,
                                           uint32_t *LocalsSlotsPaddingBytes) {
   if (SpillAreaAlignmentBytes) {
     uint32_t PaddingStart = SpillAreaStartOffset;
     uint32_t SpillAreaStart =
         Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
     *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
   }

   // If there are separate globals and locals areas, make sure the locals area
   // is aligned by padding the end of the globals area.
   if (LocalsSlotsAlignmentBytes) {
     uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
     GlobalsAndSubsequentPaddingSize =
         Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
     *LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize;
   }
 }

 void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables,
                                          size_t SpillAreaPaddingBytes,
                                          size_t SpillAreaSizeBytes,
                                          size_t GlobalsAndSubsequentPaddingSize,
                                          bool UsesFramePointer) {
   const VariablesMetadata *VMetadata = Func->getVMetadata();
   // For testing legalization of large stack offsets on targets with limited
   // offset bits in instruction encodings, add some padding. This assumes that
   // SpillAreaSizeBytes has accounted for the extra test padding. When
   // UseFramePointer is true, the offset depends on the padding, not just the
   // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the
   // offsets depend on the gap between SpillAreaSizeBytes and
   // SpillAreaPaddingBytes, so we don't increment that.
   size_t TestPadding = getFlags().getTestStackExtra();
   if (UsesFramePointer)
     SpillAreaPaddingBytes += TestPadding;
   size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
   size_t NextStackOffset = SpillAreaPaddingBytes;
   CfgVector<size_t> LocalsSize(Func->getNumNodes());
   const bool SimpleCoalescing = !callsReturnsTwice();

   for (Variable *Var : SortedSpilledVariables) {
     size_t Increment = typeWidthInBytesOnStack(Var->getType());
     if (SimpleCoalescing && VMetadata->isTracked(Var)) {
       if (VMetadata->isMultiBlock(Var)) {
         GlobalsSpaceUsed += Increment;
         NextStackOffset = GlobalsSpaceUsed;
       } else {
         SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
         LocalsSize[NodeIndex] += Increment;
         NextStackOffset = SpillAreaPaddingBytes +
                           GlobalsAndSubsequentPaddingSize +
                           LocalsSize[NodeIndex];
       }
     } else {
       NextStackOffset += Increment;
     }
     if (UsesFramePointer)
       Var->setStackOffset(-NextStackOffset);
     else
       Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
   }
 }

 InstCall *TargetLowering::makeHelperCall(RuntimeHelper FuncID, Variable *Dest,
                                          SizeT MaxSrcs) {
   constexpr bool HasTailCall = false;
   Constant *CallTarget = Ctx->getRuntimeHelperFunc(FuncID);
   InstCall *Call =
       InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall);
   return Call;
 }

 bool TargetLowering::shouldOptimizeMemIntrins() {
   return Func->getOptLevel() >= Opt_1 || getFlags().getForceMemIntrinOpt();
 }

 void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind,
                                          Variable *Dest, Operand *Src0,
                                          Operand *Src1) {
   scalarizeInstruction(
       Dest,
       [this, Kind](Variable *Dest, Operand *Src0, Operand *Src1) {
         return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1);
       },
       Src0, Src1);
 }

 void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C,
                                        const char *Suffix) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Ctx->getStrEmit();
   const std::string &EmitStr = C->getEmitString();
   if (!EmitStr.empty()) {
     // C has a custom emit string, so we use it instead of the canonical
     // Name + Offset form.
     Str << EmitStr;
     return;
   }
   Str << C->getName() << Suffix;
   RelocOffsetT Offset = C->getOffset();
   if (Offset) {
     if (Offset > 0)
       Str << "+";
     Str << Offset;
   }
 }

 std::unique_ptr<TargetDataLowering>
 TargetDataLowering::createLowering(GlobalContext *Ctx) {
   TargetArch Target = getFlags().getTargetArch();
   switch (Target) {
   default:
     badTargetFatalError(Target);
 #define SUBZERO_TARGET(X)                                                      \
   case TARGET_LOWERING_CLASS_FOR(X):                                           \
     return ::X::createTargetDataLowering(Ctx);
 #include "SZTargets.def"
 #undef SUBZERO_TARGET
   }
 }

 TargetDataLowering::~TargetDataLowering() = default;

 namespace {

 // dataSectionSuffix decides whether to use SectionSuffix or VarName as data
 // section suffix. Essentially, when using separate data sections for globals
 // SectionSuffix is not necessary.
 std::string dataSectionSuffix(const std::string &SectionSuffix,
                               const std::string &VarName,
                               const bool DataSections) {
   if (SectionSuffix.empty() && !DataSections) {
     return "";
   }

   if (DataSections) {
     // With data sections we don't need to use the SectionSuffix.
     return "." + VarName;
   }

   assert(!SectionSuffix.empty());
   return "." + SectionSuffix;
 }

 } // end of anonymous namespace

 void TargetDataLowering::emitGlobal(const VariableDeclaration &Var,
                                     const std::string &SectionSuffix) {
   if (!BuildDefs::dump())
     return;

   // If external and not initialized, this must be a cross test. Don't generate
   // a declaration for such cases.
   const bool IsExternal = Var.isExternal() || getFlags().getDisableInternal();
   if (IsExternal && !Var.hasInitializer())
     return;

   Ostream &Str = Ctx->getStrEmit();
   const bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
   const bool IsConstant = Var.getIsConstant();
   const SizeT Size = Var.getNumBytes();
   const std::string Name = Var.getName().toString();

   Str << "\t.type\t" << Name << ",%object\n";

   const bool UseDataSections = getFlags().getDataSections();
   const bool UseNonsfi = getFlags().getUseNonsfi();
   const std::string Suffix =
       dataSectionSuffix(SectionSuffix, Name, UseDataSections);
   if (IsConstant && UseNonsfi)
     Str << "\t.section\t.data.rel.ro" << Suffix << ",\"aw\",%progbits\n";
   else if (IsConstant)
     Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n";
   else if (HasNonzeroInitializer)
     Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n";
   else
     Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n";

   if (IsExternal)
     Str << "\t.globl\t" << Name << "\n";

   const uint32_t Align = Var.getAlignment();
   if (Align > 1) {
     assert(llvm::isPowerOf2_32(Align));
     // Use the .p2align directive, since the .align N directive can either
     // interpret N as bytes, or power of 2 bytes, depending on the target.
     Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n";
   }

   Str << Name << ":\n";

   if (HasNonzeroInitializer) {
     for (const auto *Init : Var.getInitializers()) {
       switch (Init->getKind()) {
       case VariableDeclaration::Initializer::DataInitializerKind: {
         const auto &Data =
             llvm::cast<VariableDeclaration::DataInitializer>(Init)
                 ->getContents();
         for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
           Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
         }
         break;
       }
       case VariableDeclaration::Initializer::ZeroInitializerKind:
         Str << "\t.zero\t" << Init->getNumBytes() << "\n";
         break;
       case VariableDeclaration::Initializer::RelocInitializerKind: {
         const auto *Reloc =
             llvm::cast<VariableDeclaration::RelocInitializer>(Init);
         Str << "\t" << getEmit32Directive() << "\t";
         Str << Reloc->getDeclaration()->getName();
         if (Reloc->hasFixup()) {
           // TODO(jpp): this is ARM32 specific.
           Str << "(GOTOFF)";
         }
         if (RelocOffsetT Offset = Reloc->getOffset()) {
           if (Offset >= 0 || (Offset == INT32_MIN))
             Str << " + " << Offset;
           else
             Str << " - " << -Offset;
         }
         Str << "\n";
         break;
       }
       }
     }
   } else {
     // NOTE: for non-constant zero initializers, this is BSS (no bits), so an
     // ELF writer would not write to the file, and only track virtual offsets,
     // but the .s writer still needs this .zero and cannot simply use the .size
     // to advance offsets.
     Str << "\t.zero\t" << Size << "\n";
   }

   Str << "\t.size\t" << Name << ", " << Size << "\n";
 }

 std::unique_ptr<TargetHeaderLowering>
 TargetHeaderLowering::createLowering(GlobalContext *Ctx) {
   TargetArch Target = getFlags().getTargetArch();
   switch (Target) {
   default:
     badTargetFatalError(Target);
 #define SUBZERO_TARGET(X)                                                      \
   case TARGET_LOWERING_CLASS_FOR(X):                                           \
     return ::X::createTargetHeaderLowering(Ctx);
 #include "SZTargets.def"
 #undef SUBZERO_TARGET
   }
 }

 TargetHeaderLowering::~TargetHeaderLowering() = default;

 } // end of namespace Ice