| //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===// |
| // |
| // The Subzero Code Generator |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// \brief Implements the skeleton of the TargetLowering class. |
| /// |
| /// Specifically this invokes the appropriate lowering method for a given |
| /// instruction kind and driving global register allocation. It also implements |
| /// the non-deleted instruction iteration in LoweringContext. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "IceTargetLowering.h" |
| |
| #include "IceBitVector.h" |
| #include "IceCfg.h" // setError() |
| #include "IceCfgNode.h" |
| #include "IceGlobalContext.h" |
| #include "IceGlobalInits.h" |
| #include "IceInstVarIter.h" |
| #include "IceLiveness.h" |
| #include "IceOperand.h" |
| #include "IceRegAlloc.h" |
| |
| #include <string> |
| #include <vector> |
| |
| #define TARGET_LOWERING_CLASS_FOR(t) Target_##t |
| |
| // We prevent target-specific implementation details from leaking outside their |
| // implementations by forbidding #include of target-specific header files |
| // anywhere outside their own files. To create target-specific objects |
| // (TargetLowering, TargetDataLowering, and TargetHeaderLowering) we use the |
| // following named constructors. For reference, each target Foo needs to |
| // implement the following named constructors and initializer: |
| // |
| // namespace Foo { |
| // unique_ptr<Ice::TargetLowering> createTargetLowering(Ice::Cfg *); |
| // unique_ptr<Ice::TargetDataLowering> |
| // createTargetDataLowering(Ice::GlobalContext*); |
| // unique_ptr<Ice::TargetHeaderLowering> |
| // createTargetHeaderLowering(Ice::GlobalContext *); |
| // void staticInit(::Ice::GlobalContext *); |
| // } |
| #define SUBZERO_TARGET(X) \ |
| namespace X { \ |
| std::unique_ptr<::Ice::TargetLowering> \ |
| createTargetLowering(::Ice::Cfg *Func); \ |
| std::unique_ptr<::Ice::TargetDataLowering> \ |
| createTargetDataLowering(::Ice::GlobalContext *Ctx); \ |
| std::unique_ptr<::Ice::TargetHeaderLowering> \ |
| createTargetHeaderLowering(::Ice::GlobalContext *Ctx); \ |
| void staticInit(::Ice::GlobalContext *Ctx); \ |
| bool shouldBePooled(const ::Ice::Constant *C); \ |
| ::Ice::Type getPointerType(); \ |
| } // end of namespace X |
| #include "SZTargets.def" |
| #undef SUBZERO_TARGET |
| |
| namespace Ice { |
| void LoweringContext::init(CfgNode *N) { |
| Node = N; |
| End = getNode()->getInsts().end(); |
| rewind(); |
| advanceForward(Next); |
| } |
| |
| void LoweringContext::rewind() { |
| Begin = getNode()->getInsts().begin(); |
| Cur = Begin; |
| skipDeleted(Cur); |
| Next = Cur; |
| availabilityReset(); |
| } |
| |
| void LoweringContext::insert(Inst *Instr) { |
| getNode()->getInsts().insert(Next, Instr); |
| LastInserted = Instr; |
| } |
| |
| void LoweringContext::skipDeleted(InstList::iterator &I) const { |
| while (I != End && I->isDeleted()) |
| ++I; |
| } |
| |
| void LoweringContext::advanceForward(InstList::iterator &I) const { |
| if (I != End) { |
| ++I; |
| skipDeleted(I); |
| } |
| } |
| |
| Inst *LoweringContext::getLastInserted() const { |
| assert(LastInserted); |
| return LastInserted; |
| } |
| |
| void LoweringContext::availabilityReset() { |
| LastDest = nullptr; |
| LastSrc = nullptr; |
| } |
| |
| void LoweringContext::availabilityUpdate() { |
| availabilityReset(); |
| Inst *Instr = LastInserted; |
| if (Instr == nullptr) |
| return; |
| if (!Instr->isVarAssign()) |
| return; |
| // Since isVarAssign() is true, the source operand must be a Variable. |
| LastDest = Instr->getDest(); |
| LastSrc = llvm::cast<Variable>(Instr->getSrc(0)); |
| } |
| |
| Variable *LoweringContext::availabilityGet(Operand *Src) const { |
| assert(Src); |
| if (Src == LastDest) |
| return LastSrc; |
| return nullptr; |
| } |
| |
| namespace { |
| |
| void printRegisterSet(Ostream &Str, const SmallBitVector &Bitset, |
| std::function<std::string(RegNumT)> getRegName, |
| const std::string &LineIndentString) { |
| constexpr size_t RegistersPerLine = 16; |
| size_t Count = 0; |
| for (RegNumT RegNum : RegNumBVIter(Bitset)) { |
| if (Count == 0) { |
| Str << LineIndentString; |
| } else { |
| Str << ","; |
| } |
| if (Count > 0 && Count % RegistersPerLine == 0) |
| Str << "\n" << LineIndentString; |
| ++Count; |
| Str << getRegName(RegNum); |
| } |
| if (Count) |
| Str << "\n"; |
| } |
| |
| // Splits "<class>:<reg>" into "<class>" plus "<reg>". If there is no <class> |
| // component, the result is "" plus "<reg>". |
| void splitToClassAndName(const std::string &RegName, std::string *SplitRegClass, |
| std::string *SplitRegName) { |
| constexpr const char Separator[] = ":"; |
| constexpr size_t SeparatorWidth = llvm::array_lengthof(Separator) - 1; |
| size_t Pos = RegName.find(Separator); |
| if (Pos == std::string::npos) { |
| *SplitRegClass = ""; |
| *SplitRegName = RegName; |
| } else { |
| *SplitRegClass = RegName.substr(0, Pos); |
| *SplitRegName = RegName.substr(Pos + SeparatorWidth); |
| } |
| } |
| |
| LLVM_ATTRIBUTE_NORETURN void badTargetFatalError(TargetArch Target) { |
| llvm::report_fatal_error("Unsupported target: " + |
| std::string(targetArchString(Target))); |
| } |
| |
| } // end of anonymous namespace |
| |
| void TargetLowering::filterTypeToRegisterSet( |
| GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[], |
| size_t TypeToRegisterSetSize, |
| std::function<std::string(RegNumT)> getRegName, |
| std::function<const char *(RegClass)> getRegClassName) { |
| std::vector<SmallBitVector> UseSet(TypeToRegisterSetSize, |
| SmallBitVector(NumRegs)); |
| std::vector<SmallBitVector> ExcludeSet(TypeToRegisterSetSize, |
| SmallBitVector(NumRegs)); |
| |
| std::unordered_map<std::string, RegNumT> RegNameToIndex; |
| for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) { |
| const auto RegNum = RegNumT::fromInt(RegIndex); |
| RegNameToIndex[getRegName(RegNum)] = RegNum; |
| } |
| |
| std::vector<std::string> BadRegNames; |
| |
| // The processRegList function iterates across the RegNames vector. Each |
| // entry in the vector is a string of the form "<reg>" or "<class>:<reg>". |
| // The register class and register number are computed, and the corresponding |
| // bit is set in RegSet[][]. If "<class>:" is missing, then the bit is set |
| // for all classes. |
| auto processRegList = [&](const std::vector<std::string> &RegNames, |
| std::vector<SmallBitVector> &RegSet) { |
| for (const std::string &RegClassAndName : RegNames) { |
| std::string RClass; |
| std::string RName; |
| splitToClassAndName(RegClassAndName, &RClass, &RName); |
| if (!RegNameToIndex.count(RName)) { |
| BadRegNames.push_back(RName); |
| continue; |
| } |
| const int32_t RegIndex = RegNameToIndex.at(RName); |
| for (SizeT TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; |
| ++TypeIndex) { |
| if (RClass.empty() || |
| RClass == getRegClassName(static_cast<RegClass>(TypeIndex))) { |
| RegSet[TypeIndex][RegIndex] = TypeToRegisterSet[TypeIndex][RegIndex]; |
| } |
| } |
| } |
| }; |
| |
| processRegList(getFlags().getUseRestrictedRegisters(), UseSet); |
| processRegList(getFlags().getExcludedRegisters(), ExcludeSet); |
| |
| if (!BadRegNames.empty()) { |
| std::string Buffer; |
| llvm::raw_string_ostream StrBuf(Buffer); |
| StrBuf << "Unrecognized use/exclude registers:"; |
| for (const auto &RegName : BadRegNames) |
| StrBuf << " " << RegName; |
| llvm::report_fatal_error(StrBuf.str()); |
| } |
| |
| // Apply filters. |
| for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) { |
| SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex]; |
| SmallBitVector *UseBitSet = &UseSet[TypeIndex]; |
| SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex]; |
| if (UseBitSet->any()) |
| *TypeBitSet = *UseBitSet; |
| (*TypeBitSet).reset(*ExcludeBitSet); |
| } |
| |
| // Display filtered register sets, if requested. |
| if (BuildDefs::dump() && NumRegs && |
| (getFlags().getVerbose() & IceV_AvailableRegs)) { |
| Ostream &Str = Ctx->getStrDump(); |
| const std::string Indent = " "; |
| const std::string IndentTwice = Indent + Indent; |
| Str << "Registers available for register allocation:\n"; |
| for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) { |
| Str << Indent << getRegClassName(static_cast<RegClass>(TypeIndex)) |
| << ":\n"; |
| printRegisterSet(Str, TypeToRegisterSet[TypeIndex], getRegName, |
| IndentTwice); |
| } |
| Str << "\n"; |
| } |
| } |
| |
| std::unique_ptr<TargetLowering> |
| TargetLowering::createLowering(TargetArch Target, Cfg *Func) { |
| switch (Target) { |
| default: |
| badTargetFatalError(Target); |
| #define SUBZERO_TARGET(X) \ |
| case TARGET_LOWERING_CLASS_FOR(X): \ |
| return ::X::createTargetLowering(Func); |
| #include "SZTargets.def" |
| #undef SUBZERO_TARGET |
| } |
| } |
| |
| void TargetLowering::staticInit(GlobalContext *Ctx) { |
| const TargetArch Target = getFlags().getTargetArch(); |
| // Call the specified target's static initializer. |
| switch (Target) { |
| default: |
| badTargetFatalError(Target); |
| #define SUBZERO_TARGET(X) \ |
| case TARGET_LOWERING_CLASS_FOR(X): { \ |
| static bool InitGuard##X = false; \ |
| if (InitGuard##X) { \ |
| return; \ |
| } \ |
| InitGuard##X = true; \ |
| ::X::staticInit(Ctx); \ |
| } break; |
| #include "SZTargets.def" |
| #undef SUBZERO_TARGET |
| } |
| } |
| |
| bool TargetLowering::shouldBePooled(const Constant *C) { |
| const TargetArch Target = getFlags().getTargetArch(); |
| switch (Target) { |
| default: |
| return false; |
| #define SUBZERO_TARGET(X) \ |
| case TARGET_LOWERING_CLASS_FOR(X): \ |
| return ::X::shouldBePooled(C); |
| #include "SZTargets.def" |
| #undef SUBZERO_TARGET |
| } |
| } |
| |
| ::Ice::Type TargetLowering::getPointerType() { |
| const TargetArch Target = getFlags().getTargetArch(); |
| switch (Target) { |
| default: |
| return ::Ice::IceType_void; |
| #define SUBZERO_TARGET(X) \ |
| case TARGET_LOWERING_CLASS_FOR(X): \ |
| return ::X::getPointerType(); |
| #include "SZTargets.def" |
| #undef SUBZERO_TARGET |
| } |
| } |
| |
| TargetLowering::SandboxType |
| TargetLowering::determineSandboxTypeFromFlags(const ClFlags &Flags) { |
| assert(!Flags.getUseSandboxing() || !Flags.getUseNonsfi()); |
| if (Flags.getUseNonsfi()) { |
| return TargetLowering::ST_Nonsfi; |
| } |
| if (Flags.getUseSandboxing()) { |
| return TargetLowering::ST_NaCl; |
| } |
| return TargetLowering::ST_None; |
| } |
| |
| TargetLowering::TargetLowering(Cfg *Func) |
| : Func(Func), Ctx(Func->getContext()), |
| SandboxingType(determineSandboxTypeFromFlags(getFlags())) {} |
| |
| TargetLowering::AutoBundle::AutoBundle(TargetLowering *Target, |
| InstBundleLock::Option Option) |
| : Target(Target), NeedSandboxing(getFlags().getUseSandboxing()) { |
| assert(!Target->AutoBundling); |
| Target->AutoBundling = true; |
| if (NeedSandboxing) { |
| Target->_bundle_lock(Option); |
| } |
| } |
| |
| TargetLowering::AutoBundle::~AutoBundle() { |
| assert(Target->AutoBundling); |
| Target->AutoBundling = false; |
| if (NeedSandboxing) { |
| Target->_bundle_unlock(); |
| } |
| } |
| |
| void TargetLowering::genTargetHelperCalls() { |
| TimerMarker T(TimerStack::TT_genHelpers, Func); |
| Utils::BoolFlagSaver _(GeneratingTargetHelpers, true); |
| for (CfgNode *Node : Func->getNodes()) { |
| Context.init(Node); |
| while (!Context.atEnd()) { |
| PostIncrLoweringContext _(Context); |
| genTargetHelperCallFor(iteratorToInst(Context.getCur())); |
| } |
| } |
| } |
| |
| void TargetLowering::doAddressOpt() { |
| doAddressOptOther(); |
| if (llvm::isa<InstLoad>(*Context.getCur())) |
| doAddressOptLoad(); |
| else if (llvm::isa<InstStore>(*Context.getCur())) |
| doAddressOptStore(); |
| else if (auto *Intrinsic = |
| llvm::dyn_cast<InstIntrinsicCall>(&*Context.getCur())) { |
| if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::LoadSubVector) |
| doAddressOptLoadSubVector(); |
| else if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::StoreSubVector) |
| doAddressOptStoreSubVector(); |
| } |
| Context.advanceCur(); |
| Context.advanceNext(); |
| } |
| |
| void TargetLowering::doNopInsertion(RandomNumberGenerator &RNG) { |
| Inst *I = iteratorToInst(Context.getCur()); |
| bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) || |
| llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() || |
| I->isDeleted(); |
| if (!ShouldSkip) { |
| int Probability = getFlags().getNopProbabilityAsPercentage(); |
| for (int I = 0; I < getFlags().getMaxNopsPerInstruction(); ++I) { |
| randomlyInsertNop(Probability / 100.0, RNG); |
| } |
| } |
| } |
| |
| // Lowers a single instruction according to the information in Context, by |
| // checking the Context.Cur instruction kind and calling the appropriate |
| // lowering method. The lowering method should insert target instructions at |
| // the Cur.Next insertion point, and should not delete the Context.Cur |
| // instruction or advance Context.Cur. |
| // |
| // The lowering method may look ahead in the instruction stream as desired, and |
| // lower additional instructions in conjunction with the current one, for |
| // example fusing a compare and branch. If it does, it should advance |
| // Context.Cur to point to the next non-deleted instruction to process, and it |
| // should delete any additional instructions it consumes. |
| void TargetLowering::lower() { |
| assert(!Context.atEnd()); |
| Inst *Instr = iteratorToInst(Context.getCur()); |
| Instr->deleteIfDead(); |
| if (!Instr->isDeleted() && !llvm::isa<InstFakeDef>(Instr) && |
| !llvm::isa<InstFakeUse>(Instr)) { |
| // Mark the current instruction as deleted before lowering, otherwise the |
| // Dest variable will likely get marked as non-SSA. See |
| // Variable::setDefinition(). However, just pass-through FakeDef and |
| // FakeUse instructions that might have been inserted prior to lowering. |
| Instr->setDeleted(); |
| switch (Instr->getKind()) { |
| case Inst::Alloca: |
| lowerAlloca(llvm::cast<InstAlloca>(Instr)); |
| break; |
| case Inst::Arithmetic: |
| lowerArithmetic(llvm::cast<InstArithmetic>(Instr)); |
| break; |
| case Inst::Assign: |
| lowerAssign(llvm::cast<InstAssign>(Instr)); |
| break; |
| case Inst::Br: |
| lowerBr(llvm::cast<InstBr>(Instr)); |
| break; |
| case Inst::Breakpoint: |
| lowerBreakpoint(llvm::cast<InstBreakpoint>(Instr)); |
| break; |
| case Inst::Call: |
| lowerCall(llvm::cast<InstCall>(Instr)); |
| break; |
| case Inst::Cast: |
| lowerCast(llvm::cast<InstCast>(Instr)); |
| break; |
| case Inst::ExtractElement: |
| lowerExtractElement(llvm::cast<InstExtractElement>(Instr)); |
| break; |
| case Inst::Fcmp: |
| lowerFcmp(llvm::cast<InstFcmp>(Instr)); |
| break; |
| case Inst::Icmp: |
| lowerIcmp(llvm::cast<InstIcmp>(Instr)); |
| break; |
| case Inst::InsertElement: |
| lowerInsertElement(llvm::cast<InstInsertElement>(Instr)); |
| break; |
| case Inst::IntrinsicCall: { |
| auto *Call = llvm::cast<InstIntrinsicCall>(Instr); |
| if (Call->getIntrinsicInfo().ReturnsTwice) |
| setCallsReturnsTwice(true); |
| lowerIntrinsicCall(Call); |
| break; |
| } |
| case Inst::Load: |
| lowerLoad(llvm::cast<InstLoad>(Instr)); |
| break; |
| case Inst::Phi: |
| lowerPhi(llvm::cast<InstPhi>(Instr)); |
| break; |
| case Inst::Ret: |
| lowerRet(llvm::cast<InstRet>(Instr)); |
| break; |
| case Inst::Select: |
| lowerSelect(llvm::cast<InstSelect>(Instr)); |
| break; |
| case Inst::ShuffleVector: |
| lowerShuffleVector(llvm::cast<InstShuffleVector>(Instr)); |
| break; |
| case Inst::Store: |
| lowerStore(llvm::cast<InstStore>(Instr)); |
| break; |
| case Inst::Switch: |
| lowerSwitch(llvm::cast<InstSwitch>(Instr)); |
| break; |
| case Inst::Unreachable: |
| lowerUnreachable(llvm::cast<InstUnreachable>(Instr)); |
| break; |
| default: |
| lowerOther(Instr); |
| break; |
| } |
| |
| postLower(); |
| } |
| |
| Context.advanceCur(); |
| Context.advanceNext(); |
| } |
| |
| void TargetLowering::lowerInst(CfgNode *Node, InstList::iterator Next, |
| InstHighLevel *Instr) { |
| // TODO(stichnot): Consider modifying the design/implementation to avoid |
| // multiple init() calls when using lowerInst() to lower several instructions |
| // in the same node. |
| Context.init(Node); |
| Context.setNext(Next); |
| Context.insert(Instr); |
| --Next; |
| assert(iteratorToInst(Next) == Instr); |
| Context.setCur(Next); |
| lower(); |
| } |
| |
| void TargetLowering::lowerOther(const Inst *Instr) { |
| (void)Instr; |
| Func->setError("Can't lower unsupported instruction type"); |
| } |
| |
| // Drives register allocation, allowing all physical registers (except perhaps |
| // for the frame pointer) to be allocated. This set of registers could |
| // potentially be parameterized if we want to restrict registers e.g. for |
| // performance testing. |
| void TargetLowering::regAlloc(RegAllocKind Kind) { |
| TimerMarker T(TimerStack::TT_regAlloc, Func); |
| LinearScan LinearScan(Func); |
| RegSetMask RegInclude = RegSet_None; |
| RegSetMask RegExclude = RegSet_None; |
| RegInclude |= RegSet_CallerSave; |
| RegInclude |= RegSet_CalleeSave; |
| if (hasFramePointer()) |
| RegExclude |= RegSet_FramePointer; |
| SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude); |
| bool Repeat = (Kind == RAK_Global && getFlags().getRepeatRegAlloc()); |
| CfgSet<Variable *> EmptySet; |
| do { |
| LinearScan.init(Kind, EmptySet); |
| LinearScan.scan(RegMask, getFlags().getRandomizeRegisterAllocation()); |
| if (!LinearScan.hasEvictions()) |
| Repeat = false; |
| Kind = RAK_SecondChance; |
| } while (Repeat); |
| // TODO(stichnot): Run the register allocator one more time to do stack slot |
| // coalescing. The idea would be to initialize the Unhandled list with the |
| // set of Variables that have no register and a non-empty live range, and |
| // model an infinite number of registers. Maybe use the register aliasing |
| // mechanism to get better packing of narrower slots. |
| if (getFlags().getSplitGlobalVars()) |
| postRegallocSplitting(RegMask); |
| } |
| |
| namespace { |
| CfgVector<Inst *> getInstructionsInRange(CfgNode *Node, InstNumberT Start, |
| InstNumberT End) { |
| CfgVector<Inst *> Result; |
| bool Started = false; |
| auto Process = [&](InstList &Insts) { |
| for (auto &Instr : Insts) { |
| if (Instr.isDeleted()) { |
| continue; |
| } |
| if (Instr.getNumber() == Start) { |
| Started = true; |
| } |
| if (Started) { |
| Result.emplace_back(&Instr); |
| } |
| if (Instr.getNumber() == End) { |
| break; |
| } |
| } |
| }; |
| Process(Node->getPhis()); |
| Process(Node->getInsts()); |
| // TODO(manasijm): Investigate why checking >= End significantly changes |
| // output. Should not happen when renumbering produces monotonically |
| // increasing instruction numbers and live ranges begin and end on non-deleted |
| // instructions. |
| return Result; |
| } |
| } |
| |
| void TargetLowering::postRegallocSplitting(const SmallBitVector &RegMask) { |
| // Splits the live ranges of global(/multi block) variables and runs the |
| // register allocator to find registers for as many of the new variables as |
| // possible. |
| // TODO(manasijm): Merge the small liveranges back into multi-block ones when |
| // the variables get the same register. This will reduce the amount of new |
| // instructions inserted. This might involve a full dataflow analysis. |
| // Also, modify the preference mechanism in the register allocator to match. |
| |
| TimerMarker _(TimerStack::TT_splitGlobalVars, Func); |
| CfgSet<Variable *> SplitCandidates; |
| |
| // Find variables that do not have registers but are allowed to. Also skip |
| // variables with single segment live ranges as they are not split further in |
| // this function. |
| for (Variable *Var : Func->getVariables()) { |
| if (!Var->mustNotHaveReg() && !Var->hasReg()) { |
| if (Var->getLiveRange().getNumSegments() > 1) |
| SplitCandidates.insert(Var); |
| } |
| } |
| if (SplitCandidates.empty()) |
| return; |
| |
| CfgSet<Variable *> ExtraVars; |
| |
| struct UseInfo { |
| Variable *Replacing = nullptr; |
| Inst *FirstUse = nullptr; |
| Inst *LastDef = nullptr; |
| SizeT UseCount = 0; |
| }; |
| CfgUnorderedMap<Variable *, UseInfo> VarInfo; |
| // Split the live ranges of the viable variables by node. |
| // Compute metadata (UseInfo) for each of the resulting variables. |
| for (auto *Var : SplitCandidates) { |
| for (auto &Segment : Var->getLiveRange().getSegments()) { |
| UseInfo Info; |
| Info.Replacing = Var; |
| auto *Node = Var->getLiveRange().getNodeForSegment(Segment.first); |
| |
| for (auto *Instr : |
| getInstructionsInRange(Node, Segment.first, Segment.second)) { |
| for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { |
| // It's safe to iterate over the top-level src operands rather than |
| // using FOREACH_VAR_IN_INST(), because any variables inside e.g. |
| // mem operands should already have registers. |
| if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) { |
| if (Var == Info.Replacing) { |
| if (Info.FirstUse == nullptr && !llvm::isa<InstPhi>(Instr)) { |
| Info.FirstUse = Instr; |
| } |
| Info.UseCount++; |
| } |
| } |
| } |
| if (Instr->getDest() == Info.Replacing && !llvm::isa<InstPhi>(Instr)) { |
| Info.LastDef = Instr; |
| } |
| } |
| |
| static constexpr SizeT MinUseThreshold = 3; |
| // Skip if variable has less than `MinUseThreshold` uses in the segment. |
| if (Info.UseCount < MinUseThreshold) |
| continue; |
| |
| if (!Info.FirstUse && !Info.LastDef) { |
| continue; |
| } |
| |
| LiveRange LR; |
| LR.addSegment(Segment); |
| Variable *NewVar = Func->makeVariable(Var->getType()); |
| |
| NewVar->setLiveRange(LR); |
| |
| VarInfo[NewVar] = Info; |
| |
| ExtraVars.insert(NewVar); |
| } |
| } |
| // Run the register allocator with all these new variables included |
| LinearScan RegAlloc(Func); |
| RegAlloc.init(RAK_Global, SplitCandidates); |
| RegAlloc.scan(RegMask, getFlags().getRandomizeRegisterAllocation()); |
| |
| // Modify the Cfg to use the new variables that now have registers. |
| for (auto *ExtraVar : ExtraVars) { |
| if (!ExtraVar->hasReg()) { |
| continue; |
| } |
| |
| auto &Info = VarInfo[ExtraVar]; |
| |
| assert(ExtraVar->getLiveRange().getSegments().size() == 1); |
| auto Segment = ExtraVar->getLiveRange().getSegments()[0]; |
| |
| auto *Node = |
| Info.Replacing->getLiveRange().getNodeForSegment(Segment.first); |
| |
| auto RelevantInsts = |
| getInstructionsInRange(Node, Segment.first, Segment.second); |
| |
| if (RelevantInsts.empty()) |
| continue; |
| |
| // Replace old variables |
| for (auto *Instr : RelevantInsts) { |
| if (llvm::isa<InstPhi>(Instr)) |
| continue; |
| // TODO(manasijm): Figure out how to safely enable replacing phi dest |
| // variables. The issue is that we can not insert low level mov |
| // instructions into the PhiList. |
| for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { |
| // FOREACH_VAR_IN_INST() not needed. Same logic as above. |
| if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) { |
| if (Var == Info.Replacing) { |
| Instr->replaceSource(i, ExtraVar); |
| } |
| } |
| } |
| if (Instr->getDest() == Info.Replacing) { |
| Instr->replaceDest(ExtraVar); |
| } |
| } |
| |
| assert(Info.FirstUse != Info.LastDef); |
| assert(Info.FirstUse || Info.LastDef); |
| |
| // Insert spill code |
| if (Info.FirstUse != nullptr) { |
| auto *NewInst = |
| Func->getTarget()->createLoweredMove(ExtraVar, Info.Replacing); |
| Node->getInsts().insert(instToIterator(Info.FirstUse), NewInst); |
| } |
| if (Info.LastDef != nullptr) { |
| auto *NewInst = |
| Func->getTarget()->createLoweredMove(Info.Replacing, ExtraVar); |
| Node->getInsts().insertAfter(instToIterator(Info.LastDef), NewInst); |
| } |
| } |
| } |
| |
| void TargetLowering::markRedefinitions() { |
| // Find (non-SSA) instructions where the Dest variable appears in some source |
| // operand, and set the IsDestRedefined flag to keep liveness analysis |
| // consistent. |
| for (auto Instr = Context.getCur(), E = Context.getNext(); Instr != E; |
| ++Instr) { |
| if (Instr->isDeleted()) |
| continue; |
| Variable *Dest = Instr->getDest(); |
| if (Dest == nullptr) |
| continue; |
| FOREACH_VAR_IN_INST(Var, *Instr) { |
| if (Var == Dest) { |
| Instr->setDestRedefined(); |
| break; |
| } |
| } |
| } |
| } |
| |
| void TargetLowering::addFakeDefUses(const Inst *Instr) { |
| FOREACH_VAR_IN_INST(Var, *Instr) { |
| if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) { |
| Context.insert<InstFakeUse>(Var64->getLo()); |
| Context.insert<InstFakeUse>(Var64->getHi()); |
| } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Var)) { |
| for (Variable *Var : VarVec->getContainers()) { |
| Context.insert<InstFakeUse>(Var); |
| } |
| } else { |
| Context.insert<InstFakeUse>(Var); |
| } |
| } |
| Variable *Dest = Instr->getDest(); |
| if (Dest == nullptr) |
| return; |
| if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) { |
| Context.insert<InstFakeDef>(Var64->getLo()); |
| Context.insert<InstFakeDef>(Var64->getHi()); |
| } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Dest)) { |
| for (Variable *Var : VarVec->getContainers()) { |
| Context.insert<InstFakeDef>(Var); |
| } |
| } else { |
| Context.insert<InstFakeDef>(Dest); |
| } |
| } |
| |
| void TargetLowering::sortVarsByAlignment(VarList &Dest, |
| const VarList &Source) const { |
| Dest = Source; |
| // Instead of std::sort, we could do a bucket sort with log2(alignment) as |
| // the buckets, if performance is an issue. |
| std::sort(Dest.begin(), Dest.end(), |
| [this](const Variable *V1, const Variable *V2) { |
| const size_t WidthV1 = typeWidthInBytesOnStack(V1->getType()); |
| const size_t WidthV2 = typeWidthInBytesOnStack(V2->getType()); |
| if (WidthV1 == WidthV2) |
| return V1->getIndex() < V2->getIndex(); |
| return WidthV1 > WidthV2; |
| }); |
| } |
| |
| void TargetLowering::getVarStackSlotParams( |
| VarList &SortedSpilledVariables, SmallBitVector &RegsUsed, |
| size_t *GlobalsSize, size_t *SpillAreaSizeBytes, |
| uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes, |
| std::function<bool(Variable *)> TargetVarHook) { |
| const VariablesMetadata *VMetadata = Func->getVMetadata(); |
| BitVector IsVarReferenced(Func->getNumVariables()); |
| for (CfgNode *Node : Func->getNodes()) { |
| for (Inst &Instr : Node->getInsts()) { |
| if (Instr.isDeleted()) |
| continue; |
| if (const Variable *Var = Instr.getDest()) |
| IsVarReferenced[Var->getIndex()] = true; |
| FOREACH_VAR_IN_INST(Var, Instr) { |
| IsVarReferenced[Var->getIndex()] = true; |
| } |
| } |
| } |
| |
| // If SimpleCoalescing is false, each variable without a register gets its |
| // own unique stack slot, which leads to large stack frames. If |
| // SimpleCoalescing is true, then each "global" variable without a register |
| // gets its own slot, but "local" variable slots are reused across basic |
| // blocks. E.g., if A and B are local to block 1 and C is local to block 2, |
| // then C may share a slot with A or B. |
| // |
| // We cannot coalesce stack slots if this function calls a "returns twice" |
| // function. In that case, basic blocks may be revisited, and variables local |
| // to those basic blocks are actually live until after the called function |
| // returns a second time. |
| const bool SimpleCoalescing = !callsReturnsTwice(); |
| |
| CfgVector<size_t> LocalsSize(Func->getNumNodes()); |
| const VarList &Variables = Func->getVariables(); |
| VarList SpilledVariables; |
| for (Variable *Var : Variables) { |
| if (Var->hasReg()) { |
| // Don't consider a rematerializable variable to be an actual register use |
| // (specifically of the frame pointer). Otherwise, the prolog may decide |
| // to save the frame pointer twice - once because of the explicit need for |
| // a frame pointer, and once because of an active use of a callee-save |
| // register. |
| if (!Var->isRematerializable()) |
| RegsUsed[Var->getRegNum()] = true; |
| continue; |
| } |
| // An argument either does not need a stack slot (if passed in a register) |
| // or already has one (if passed on the stack). |
| if (Var->getIsArg()) { |
| if (!Var->hasReg()) { |
| assert(!Var->hasStackOffset()); |
| Var->setHasStackOffset(); |
| } |
| continue; |
| } |
| // An unreferenced variable doesn't need a stack slot. |
| if (!IsVarReferenced[Var->getIndex()]) |
| continue; |
| // Check a target-specific variable (it may end up sharing stack slots) and |
| // not need accounting here. |
| if (TargetVarHook(Var)) |
| continue; |
| assert(!Var->hasStackOffset()); |
| Var->setHasStackOffset(); |
| SpilledVariables.push_back(Var); |
| } |
| |
| SortedSpilledVariables.reserve(SpilledVariables.size()); |
| sortVarsByAlignment(SortedSpilledVariables, SpilledVariables); |
| |
| for (Variable *Var : SortedSpilledVariables) { |
| size_t Increment = typeWidthInBytesOnStack(Var->getType()); |
| // We have sorted by alignment, so the first variable we encounter that is |
| // located in each area determines the max alignment for the area. |
| if (!*SpillAreaAlignmentBytes) |
| *SpillAreaAlignmentBytes = Increment; |
| if (SimpleCoalescing && VMetadata->isTracked(Var)) { |
| if (VMetadata->isMultiBlock(Var)) { |
| *GlobalsSize += Increment; |
| } else { |
| SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); |
| LocalsSize[NodeIndex] += Increment; |
| if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes) |
| *SpillAreaSizeBytes = LocalsSize[NodeIndex]; |
| if (!*LocalsSlotsAlignmentBytes) |
| *LocalsSlotsAlignmentBytes = Increment; |
| } |
| } else { |
| *SpillAreaSizeBytes += Increment; |
| } |
| } |
| // For testing legalization of large stack offsets on targets with limited |
| // offset bits in instruction encodings, add some padding. |
| *SpillAreaSizeBytes += getFlags().getTestStackExtra(); |
| } |
| |
| void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset, |
| uint32_t SpillAreaAlignmentBytes, |
| size_t GlobalsSize, |
| uint32_t LocalsSlotsAlignmentBytes, |
| uint32_t *SpillAreaPaddingBytes, |
| uint32_t *LocalsSlotsPaddingBytes) { |
| if (SpillAreaAlignmentBytes) { |
| uint32_t PaddingStart = SpillAreaStartOffset; |
| uint32_t SpillAreaStart = |
| Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes); |
| *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart; |
| } |
| |
| // If there are separate globals and locals areas, make sure the locals area |
| // is aligned by padding the end of the globals area. |
| if (LocalsSlotsAlignmentBytes) { |
| uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize; |
| GlobalsAndSubsequentPaddingSize = |
| Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes); |
| *LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize; |
| } |
| } |
| |
| void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables, |
| size_t SpillAreaPaddingBytes, |
| size_t SpillAreaSizeBytes, |
| size_t GlobalsAndSubsequentPaddingSize, |
| bool UsesFramePointer) { |
| const VariablesMetadata *VMetadata = Func->getVMetadata(); |
| // For testing legalization of large stack offsets on targets with limited |
| // offset bits in instruction encodings, add some padding. This assumes that |
| // SpillAreaSizeBytes has accounted for the extra test padding. When |
| // UseFramePointer is true, the offset depends on the padding, not just the |
| // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the |
| // offsets depend on the gap between SpillAreaSizeBytes and |
| // SpillAreaPaddingBytes, so we don't increment that. |
| size_t TestPadding = getFlags().getTestStackExtra(); |
| if (UsesFramePointer) |
| SpillAreaPaddingBytes += TestPadding; |
| size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; |
| size_t NextStackOffset = SpillAreaPaddingBytes; |
| CfgVector<size_t> LocalsSize(Func->getNumNodes()); |
| const bool SimpleCoalescing = !callsReturnsTwice(); |
| |
| for (Variable *Var : SortedSpilledVariables) { |
| size_t Increment = typeWidthInBytesOnStack(Var->getType()); |
| if (SimpleCoalescing && VMetadata->isTracked(Var)) { |
| if (VMetadata->isMultiBlock(Var)) { |
| GlobalsSpaceUsed += Increment; |
| NextStackOffset = GlobalsSpaceUsed; |
| } else { |
| SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); |
| LocalsSize[NodeIndex] += Increment; |
| NextStackOffset = SpillAreaPaddingBytes + |
| GlobalsAndSubsequentPaddingSize + |
| LocalsSize[NodeIndex]; |
| } |
| } else { |
| NextStackOffset += Increment; |
| } |
| if (UsesFramePointer) |
| Var->setStackOffset(-NextStackOffset); |
| else |
| Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset); |
| } |
| } |
| |
| InstCall *TargetLowering::makeHelperCall(RuntimeHelper FuncID, Variable *Dest, |
| SizeT MaxSrcs) { |
| constexpr bool HasTailCall = false; |
| Constant *CallTarget = Ctx->getRuntimeHelperFunc(FuncID); |
| InstCall *Call = |
| InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall); |
| return Call; |
| } |
| |
| bool TargetLowering::shouldOptimizeMemIntrins() { |
| return Func->getOptLevel() >= Opt_1 || getFlags().getForceMemIntrinOpt(); |
| } |
| |
| void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| Variable *Dest, Operand *Src0, |
| Operand *Src1) { |
| scalarizeInstruction( |
| Dest, [this, Kind](Variable *Dest, Operand *Src0, Operand *Src1) { |
| return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1); |
| }, Src0, Src1); |
| } |
| |
| void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C, |
| const char *Suffix) const { |
| if (!BuildDefs::dump()) |
| return; |
| Ostream &Str = Ctx->getStrEmit(); |
| const std::string &EmitStr = C->getEmitString(); |
| if (!EmitStr.empty()) { |
| // C has a custom emit string, so we use it instead of the canonical |
| // Name + Offset form. |
| Str << EmitStr; |
| return; |
| } |
| Str << C->getName() << Suffix; |
| RelocOffsetT Offset = C->getOffset(); |
| if (Offset) { |
| if (Offset > 0) |
| Str << "+"; |
| Str << Offset; |
| } |
| } |
| |
| std::unique_ptr<TargetDataLowering> |
| TargetDataLowering::createLowering(GlobalContext *Ctx) { |
| TargetArch Target = getFlags().getTargetArch(); |
| switch (Target) { |
| default: |
| badTargetFatalError(Target); |
| #define SUBZERO_TARGET(X) \ |
| case TARGET_LOWERING_CLASS_FOR(X): \ |
| return ::X::createTargetDataLowering(Ctx); |
| #include "SZTargets.def" |
| #undef SUBZERO_TARGET |
| } |
| } |
| |
| TargetDataLowering::~TargetDataLowering() = default; |
| |
| namespace { |
| |
| // dataSectionSuffix decides whether to use SectionSuffix or VarName as data |
| // section suffix. Essentially, when using separate data sections for globals |
| // SectionSuffix is not necessary. |
| std::string dataSectionSuffix(const std::string &SectionSuffix, |
| const std::string &VarName, |
| const bool DataSections) { |
| if (SectionSuffix.empty() && !DataSections) { |
| return ""; |
| } |
| |
| if (DataSections) { |
| // With data sections we don't need to use the SectionSuffix. |
| return "." + VarName; |
| } |
| |
| assert(!SectionSuffix.empty()); |
| return "." + SectionSuffix; |
| } |
| |
| } // end of anonymous namespace |
| |
| void TargetDataLowering::emitGlobal(const VariableDeclaration &Var, |
| const std::string &SectionSuffix) { |
| if (!BuildDefs::dump()) |
| return; |
| |
| // If external and not initialized, this must be a cross test. Don't generate |
| // a declaration for such cases. |
| const bool IsExternal = Var.isExternal() || getFlags().getDisableInternal(); |
| if (IsExternal && !Var.hasInitializer()) |
| return; |
| |
| Ostream &Str = Ctx->getStrEmit(); |
| const bool HasNonzeroInitializer = Var.hasNonzeroInitializer(); |
| const bool IsConstant = Var.getIsConstant(); |
| const SizeT Size = Var.getNumBytes(); |
| const std::string Name = Var.getName().toString(); |
| |
| Str << "\t.type\t" << Name << ",%object\n"; |
| |
| const bool UseDataSections = getFlags().getDataSections(); |
| const bool UseNonsfi = getFlags().getUseNonsfi(); |
| const std::string Suffix = |
| dataSectionSuffix(SectionSuffix, Name, UseDataSections); |
| if (IsConstant && UseNonsfi) |
| Str << "\t.section\t.data.rel.ro" << Suffix << ",\"aw\",%progbits\n"; |
| else if (IsConstant) |
| Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n"; |
| else if (HasNonzeroInitializer) |
| Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n"; |
| else |
| Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n"; |
| |
| if (IsExternal) |
| Str << "\t.globl\t" << Name << "\n"; |
| |
| const uint32_t Align = Var.getAlignment(); |
| if (Align > 1) { |
| assert(llvm::isPowerOf2_32(Align)); |
| // Use the .p2align directive, since the .align N directive can either |
| // interpret N as bytes, or power of 2 bytes, depending on the target. |
| Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n"; |
| } |
| |
| Str << Name << ":\n"; |
| |
| if (HasNonzeroInitializer) { |
| for (const auto *Init : Var.getInitializers()) { |
| switch (Init->getKind()) { |
| case VariableDeclaration::Initializer::DataInitializerKind: { |
| const auto &Data = |
| llvm::cast<VariableDeclaration::DataInitializer>(Init) |
| ->getContents(); |
| for (SizeT i = 0; i < Init->getNumBytes(); ++i) { |
| Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| } |
| break; |
| } |
| case VariableDeclaration::Initializer::ZeroInitializerKind: |
| Str << "\t.zero\t" << Init->getNumBytes() << "\n"; |
| break; |
| case VariableDeclaration::Initializer::RelocInitializerKind: { |
| const auto *Reloc = |
| llvm::cast<VariableDeclaration::RelocInitializer>(Init); |
| Str << "\t" << getEmit32Directive() << "\t"; |
| Str << Reloc->getDeclaration()->getName(); |
| if (Reloc->hasFixup()) { |
| // TODO(jpp): this is ARM32 specific. |
| Str << "(GOTOFF)"; |
| } |
| if (RelocOffsetT Offset = Reloc->getOffset()) { |
| if (Offset >= 0 || (Offset == INT32_MIN)) |
| Str << " + " << Offset; |
| else |
| Str << " - " << -Offset; |
| } |
| Str << "\n"; |
| break; |
| } |
| } |
| } |
| } else { |
| // NOTE: for non-constant zero initializers, this is BSS (no bits), so an |
| // ELF writer would not write to the file, and only track virtual offsets, |
| // but the .s writer still needs this .zero and cannot simply use the .size |
| // to advance offsets. |
| Str << "\t.zero\t" << Size << "\n"; |
| } |
| |
| Str << "\t.size\t" << Name << ", " << Size << "\n"; |
| } |
| |
| std::unique_ptr<TargetHeaderLowering> |
| TargetHeaderLowering::createLowering(GlobalContext *Ctx) { |
| TargetArch Target = getFlags().getTargetArch(); |
| switch (Target) { |
| default: |
| badTargetFatalError(Target); |
| #define SUBZERO_TARGET(X) \ |
| case TARGET_LOWERING_CLASS_FOR(X): \ |
| return ::X::createTargetHeaderLowering(Ctx); |
| #include "SZTargets.def" |
| #undef SUBZERO_TARGET |
| } |
| } |
| |
| TargetHeaderLowering::~TargetHeaderLowering() = default; |
| |
| } // end of namespace Ice |