| //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===// |
| // |
| // The Subzero Code Generator |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the skeleton of the TargetLowering class, |
| // specifically invoking the appropriate lowering method for a given |
| // instruction kind and driving global register allocation. It also |
| // implements the non-deleted instruction iteration in |
| // LoweringContext. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "IceTargetLowering.h" |
| |
| #include "IceAssemblerARM32.h" |
| #include "IceAssemblerX8632.h" |
| #include "IceAssemblerX8664.h" |
| #include "assembler_mips32.h" |
| #include "IceCfg.h" // setError() |
| #include "IceCfgNode.h" |
| #include "IceGlobalInits.h" |
| #include "IceOperand.h" |
| #include "IceRegAlloc.h" |
| #include "IceTargetLoweringARM32.h" |
| #include "IceTargetLoweringMIPS32.h" |
| #include "IceTargetLoweringX8632.h" |
| #include "IceTargetLoweringX8664.h" |
| |
| namespace Ice { |
| |
| void LoweringContext::init(CfgNode *N) { |
| Node = N; |
| End = getNode()->getInsts().end(); |
| rewind(); |
| advanceForward(Next); |
| } |
| |
| void LoweringContext::rewind() { |
| Begin = getNode()->getInsts().begin(); |
| Cur = Begin; |
| skipDeleted(Cur); |
| Next = Cur; |
| } |
| |
| void LoweringContext::insert(Inst *Inst) { |
| getNode()->getInsts().insert(Next, Inst); |
| LastInserted = Inst; |
| } |
| |
| void LoweringContext::skipDeleted(InstList::iterator &I) const { |
| while (I != End && I->isDeleted()) |
| ++I; |
| } |
| |
| void LoweringContext::advanceForward(InstList::iterator &I) const { |
| if (I != End) { |
| ++I; |
| skipDeleted(I); |
| } |
| } |
| |
| Inst *LoweringContext::getLastInserted() const { |
| assert(LastInserted); |
| return LastInserted; |
| } |
| |
| TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) { |
| #define SUBZERO_TARGET(X) \ |
| if (Target == Target_##X) \ |
| return Target##X::create(Func); |
| #include "llvm/Config/SZTargets.def" |
| |
| Func->setError("Unsupported target"); |
| return nullptr; |
| } |
| |
| TargetLowering::TargetLowering(Cfg *Func) |
| : Func(Func), Ctx(Func->getContext()), Context() {} |
| |
| std::unique_ptr<Assembler> TargetLowering::createAssembler(TargetArch Target, |
| Cfg *Func) { |
| #define SUBZERO_TARGET(X) \ |
| if (Target == Target_##X) \ |
| return std::unique_ptr<Assembler>(new X::Assembler##X()); |
| #include "llvm/Config/SZTargets.def" |
| |
| Func->setError("Unsupported target assembler"); |
| return nullptr; |
| } |
| |
| void TargetLowering::doAddressOpt() { |
| if (llvm::isa<InstLoad>(*Context.getCur())) |
| doAddressOptLoad(); |
| else if (llvm::isa<InstStore>(*Context.getCur())) |
| doAddressOptStore(); |
| Context.advanceCur(); |
| Context.advanceNext(); |
| } |
| |
| void TargetLowering::doNopInsertion() { |
| Inst *I = Context.getCur(); |
| bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) || |
| llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() || |
| I->isDeleted(); |
| if (!ShouldSkip) { |
| int Probability = Ctx->getFlags().getNopProbabilityAsPercentage(); |
| for (int I = 0; I < Ctx->getFlags().getMaxNopsPerInstruction(); ++I) { |
| randomlyInsertNop(Probability / 100.0); |
| } |
| } |
| } |
| |
| // Lowers a single instruction according to the information in |
| // Context, by checking the Context.Cur instruction kind and calling |
| // the appropriate lowering method. The lowering method should insert |
| // target instructions at the Cur.Next insertion point, and should not |
| // delete the Context.Cur instruction or advance Context.Cur. |
| // |
| // The lowering method may look ahead in the instruction stream as |
| // desired, and lower additional instructions in conjunction with the |
| // current one, for example fusing a compare and branch. If it does, |
| // it should advance Context.Cur to point to the next non-deleted |
| // instruction to process, and it should delete any additional |
| // instructions it consumes. |
| void TargetLowering::lower() { |
| assert(!Context.atEnd()); |
| Inst *Inst = Context.getCur(); |
| Inst->deleteIfDead(); |
| if (!Inst->isDeleted() && !llvm::isa<InstFakeDef>(Inst) && |
| !llvm::isa<InstFakeUse>(Inst)) { |
| // Mark the current instruction as deleted before lowering, |
| // otherwise the Dest variable will likely get marked as non-SSA. |
| // See Variable::setDefinition(). However, just pass-through |
| // FakeDef and FakeUse instructions that might have been inserted |
| // prior to lowering. |
| Inst->setDeleted(); |
| switch (Inst->getKind()) { |
| case Inst::Alloca: |
| lowerAlloca(llvm::cast<InstAlloca>(Inst)); |
| break; |
| case Inst::Arithmetic: |
| lowerArithmetic(llvm::cast<InstArithmetic>(Inst)); |
| break; |
| case Inst::Assign: |
| lowerAssign(llvm::cast<InstAssign>(Inst)); |
| break; |
| case Inst::Br: |
| lowerBr(llvm::cast<InstBr>(Inst)); |
| break; |
| case Inst::Call: |
| lowerCall(llvm::cast<InstCall>(Inst)); |
| break; |
| case Inst::Cast: |
| lowerCast(llvm::cast<InstCast>(Inst)); |
| break; |
| case Inst::ExtractElement: |
| lowerExtractElement(llvm::cast<InstExtractElement>(Inst)); |
| break; |
| case Inst::Fcmp: |
| lowerFcmp(llvm::cast<InstFcmp>(Inst)); |
| break; |
| case Inst::Icmp: |
| lowerIcmp(llvm::cast<InstIcmp>(Inst)); |
| break; |
| case Inst::InsertElement: |
| lowerInsertElement(llvm::cast<InstInsertElement>(Inst)); |
| break; |
| case Inst::IntrinsicCall: { |
| InstIntrinsicCall *Call = llvm::cast<InstIntrinsicCall>(Inst); |
| if (Call->getIntrinsicInfo().ReturnsTwice) |
| setCallsReturnsTwice(true); |
| lowerIntrinsicCall(Call); |
| break; |
| } |
| case Inst::Load: |
| lowerLoad(llvm::cast<InstLoad>(Inst)); |
| break; |
| case Inst::Phi: |
| lowerPhi(llvm::cast<InstPhi>(Inst)); |
| break; |
| case Inst::Ret: |
| lowerRet(llvm::cast<InstRet>(Inst)); |
| break; |
| case Inst::Select: |
| lowerSelect(llvm::cast<InstSelect>(Inst)); |
| break; |
| case Inst::Store: |
| lowerStore(llvm::cast<InstStore>(Inst)); |
| break; |
| case Inst::Switch: |
| lowerSwitch(llvm::cast<InstSwitch>(Inst)); |
| break; |
| case Inst::Unreachable: |
| lowerUnreachable(llvm::cast<InstUnreachable>(Inst)); |
| break; |
| default: |
| lowerOther(Inst); |
| break; |
| } |
| |
| postLower(); |
| } |
| |
| Context.advanceCur(); |
| Context.advanceNext(); |
| } |
| |
| void TargetLowering::lowerOther(const Inst *Instr) { |
| (void)Instr; |
| Func->setError("Can't lower unsupported instruction type"); |
| } |
| |
| // Drives register allocation, allowing all physical registers (except |
| // perhaps for the frame pointer) to be allocated. This set of |
| // registers could potentially be parameterized if we want to restrict |
| // registers e.g. for performance testing. |
| void TargetLowering::regAlloc(RegAllocKind Kind) { |
| TimerMarker T(TimerStack::TT_regAlloc, Func); |
| LinearScan LinearScan(Func); |
| RegSetMask RegInclude = RegSet_None; |
| RegSetMask RegExclude = RegSet_None; |
| RegInclude |= RegSet_CallerSave; |
| RegInclude |= RegSet_CalleeSave; |
| if (hasFramePointer()) |
| RegExclude |= RegSet_FramePointer; |
| LinearScan.init(Kind); |
| llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude); |
| LinearScan.scan(RegMask, Ctx->getFlags().shouldRandomizeRegAlloc()); |
| } |
| |
| void TargetLowering::inferTwoAddress() { |
| // Find two-address non-SSA instructions where Dest==Src0, and set |
| // the DestNonKillable flag to keep liveness analysis consistent. |
| for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) { |
| if (Inst->isDeleted()) |
| continue; |
| if (Variable *Dest = Inst->getDest()) { |
| // TODO(stichnot): We may need to consider all source |
| // operands, not just the first one, if using 3-address |
| // instructions. |
| if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest) |
| Inst->setDestNonKillable(); |
| } |
| } |
| } |
| |
| void TargetLowering::sortVarsByAlignment(VarList &Dest, |
| const VarList &Source) const { |
| Dest = Source; |
| // Instead of std::sort, we could do a bucket sort with log2(alignment) |
| // as the buckets, if performance is an issue. |
| std::sort(Dest.begin(), Dest.end(), |
| [this](const Variable *V1, const Variable *V2) { |
| return typeWidthInBytesOnStack(V1->getType()) > |
| typeWidthInBytesOnStack(V2->getType()); |
| }); |
| } |
| |
| void TargetLowering::getVarStackSlotParams( |
| VarList &SortedSpilledVariables, llvm::SmallBitVector &RegsUsed, |
| size_t *GlobalsSize, size_t *SpillAreaSizeBytes, |
| uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes, |
| std::function<bool(Variable *)> TargetVarHook) { |
| const VariablesMetadata *VMetadata = Func->getVMetadata(); |
| llvm::BitVector IsVarReferenced(Func->getNumVariables()); |
| for (CfgNode *Node : Func->getNodes()) { |
| for (Inst &Inst : Node->getInsts()) { |
| if (Inst.isDeleted()) |
| continue; |
| if (const Variable *Var = Inst.getDest()) |
| IsVarReferenced[Var->getIndex()] = true; |
| for (SizeT I = 0; I < Inst.getSrcSize(); ++I) { |
| Operand *Src = Inst.getSrc(I); |
| SizeT NumVars = Src->getNumVars(); |
| for (SizeT J = 0; J < NumVars; ++J) { |
| const Variable *Var = Src->getVar(J); |
| IsVarReferenced[Var->getIndex()] = true; |
| } |
| } |
| } |
| } |
| |
| // If SimpleCoalescing is false, each variable without a register |
| // gets its own unique stack slot, which leads to large stack |
| // frames. If SimpleCoalescing is true, then each "global" variable |
| // without a register gets its own slot, but "local" variable slots |
| // are reused across basic blocks. E.g., if A and B are local to |
| // block 1 and C is local to block 2, then C may share a slot with A or B. |
| // |
| // We cannot coalesce stack slots if this function calls a "returns twice" |
| // function. In that case, basic blocks may be revisited, and variables |
| // local to those basic blocks are actually live until after the |
| // called function returns a second time. |
| const bool SimpleCoalescing = !callsReturnsTwice(); |
| |
| std::vector<size_t> LocalsSize(Func->getNumNodes()); |
| const VarList &Variables = Func->getVariables(); |
| VarList SpilledVariables; |
| for (Variable *Var : Variables) { |
| if (Var->hasReg()) { |
| RegsUsed[Var->getRegNum()] = true; |
| continue; |
| } |
| // An argument either does not need a stack slot (if passed in a |
| // register) or already has one (if passed on the stack). |
| if (Var->getIsArg()) |
| continue; |
| // An unreferenced variable doesn't need a stack slot. |
| if (!IsVarReferenced[Var->getIndex()]) |
| continue; |
| // Check a target-specific variable (it may end up sharing stack slots) |
| // and not need accounting here. |
| if (TargetVarHook(Var)) |
| continue; |
| SpilledVariables.push_back(Var); |
| } |
| |
| SortedSpilledVariables.reserve(SpilledVariables.size()); |
| sortVarsByAlignment(SortedSpilledVariables, SpilledVariables); |
| |
| for (Variable *Var : SortedSpilledVariables) { |
| size_t Increment = typeWidthInBytesOnStack(Var->getType()); |
| // We have sorted by alignment, so the first variable we encounter that |
| // is located in each area determines the max alignment for the area. |
| if (!*SpillAreaAlignmentBytes) |
| *SpillAreaAlignmentBytes = Increment; |
| if (SimpleCoalescing && VMetadata->isTracked(Var)) { |
| if (VMetadata->isMultiBlock(Var)) { |
| *GlobalsSize += Increment; |
| } else { |
| SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); |
| LocalsSize[NodeIndex] += Increment; |
| if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes) |
| *SpillAreaSizeBytes = LocalsSize[NodeIndex]; |
| if (!*LocalsSlotsAlignmentBytes) |
| *LocalsSlotsAlignmentBytes = Increment; |
| } |
| } else { |
| *SpillAreaSizeBytes += Increment; |
| } |
| } |
| } |
| |
| void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset, |
| uint32_t SpillAreaAlignmentBytes, |
| size_t GlobalsSize, |
| uint32_t LocalsSlotsAlignmentBytes, |
| uint32_t *SpillAreaPaddingBytes, |
| uint32_t *LocalsSlotsPaddingBytes) { |
| if (SpillAreaAlignmentBytes) { |
| uint32_t PaddingStart = SpillAreaStartOffset; |
| uint32_t SpillAreaStart = |
| Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes); |
| *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart; |
| } |
| |
| // If there are separate globals and locals areas, make sure the |
| // locals area is aligned by padding the end of the globals area. |
| if (LocalsSlotsAlignmentBytes) { |
| uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize; |
| GlobalsAndSubsequentPaddingSize = |
| Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes); |
| *LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize; |
| } |
| } |
| |
| void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables, |
| size_t SpillAreaPaddingBytes, |
| size_t SpillAreaSizeBytes, |
| size_t GlobalsAndSubsequentPaddingSize, |
| bool UsesFramePointer) { |
| const VariablesMetadata *VMetadata = Func->getVMetadata(); |
| size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; |
| size_t NextStackOffset = SpillAreaPaddingBytes; |
| std::vector<size_t> LocalsSize(Func->getNumNodes()); |
| const bool SimpleCoalescing = !callsReturnsTwice(); |
| for (Variable *Var : SortedSpilledVariables) { |
| size_t Increment = typeWidthInBytesOnStack(Var->getType()); |
| if (SimpleCoalescing && VMetadata->isTracked(Var)) { |
| if (VMetadata->isMultiBlock(Var)) { |
| GlobalsSpaceUsed += Increment; |
| NextStackOffset = GlobalsSpaceUsed; |
| } else { |
| SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); |
| LocalsSize[NodeIndex] += Increment; |
| NextStackOffset = SpillAreaPaddingBytes + |
| GlobalsAndSubsequentPaddingSize + |
| LocalsSize[NodeIndex]; |
| } |
| } else { |
| NextStackOffset += Increment; |
| } |
| if (UsesFramePointer) |
| Var->setStackOffset(-NextStackOffset); |
| else |
| Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset); |
| } |
| } |
| |
| InstCall *TargetLowering::makeHelperCall(const IceString &Name, Variable *Dest, |
| SizeT MaxSrcs) { |
| const bool HasTailCall = false; |
| Constant *CallTarget = Ctx->getConstantExternSym(Name); |
| InstCall *Call = |
| InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall); |
| return Call; |
| } |
| |
| void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C) const { |
| if (!BuildDefs::dump()) |
| return; |
| Ostream &Str = Ctx->getStrEmit(); |
| if (C->getSuppressMangling()) |
| Str << C->getName(); |
| else |
| Str << Ctx->mangleName(C->getName()); |
| RelocOffsetT Offset = C->getOffset(); |
| if (Offset) { |
| if (Offset > 0) |
| Str << "+"; |
| Str << Offset; |
| } |
| } |
| |
| void TargetLowering::emit(const ConstantRelocatable *C) const { |
| if (!BuildDefs::dump()) |
| return; |
| Ostream &Str = Ctx->getStrEmit(); |
| Str << getConstantPrefix(); |
| emitWithoutPrefix(C); |
| } |
| |
| std::unique_ptr<TargetDataLowering> |
| TargetDataLowering::createLowering(GlobalContext *Ctx) { |
| TargetArch Target = Ctx->getFlags().getTargetArch(); |
| #define SUBZERO_TARGET(X) \ |
| if (Target == Target_##X) \ |
| return TargetData##X::create(Ctx); |
| #include "llvm/Config/SZTargets.def" |
| |
| llvm::report_fatal_error("Unsupported target data lowering"); |
| } |
| |
| TargetDataLowering::~TargetDataLowering() = default; |
| |
| namespace { |
| |
| // dataSectionSuffix decides whether to use SectionSuffix or MangledVarName as |
| // data section suffix. Essentially, when using separate data sections for |
| // globals SectionSuffix is not necessary. |
| IceString dataSectionSuffix(const IceString &SectionSuffix, |
| const IceString &MangledVarName, |
| const bool DataSections) { |
| if (SectionSuffix.empty() && !DataSections) { |
| return ""; |
| } |
| |
| if (DataSections) { |
| // With data sections we don't need to use the SectionSuffix. |
| return "." + MangledVarName; |
| } |
| |
| assert(!SectionSuffix.empty()); |
| return "." + SectionSuffix; |
| } |
| |
| } // end of anonymous namespace |
| |
| void TargetDataLowering::emitGlobal(const VariableDeclaration &Var, |
| const IceString &SectionSuffix) { |
| if (!BuildDefs::dump()) |
| return; |
| |
| // If external and not initialized, this must be a cross test. |
| // Don't generate a declaration for such cases. |
| const bool IsExternal = |
| Var.isExternal() || Ctx->getFlags().getDisableInternal(); |
| if (IsExternal && !Var.hasInitializer()) |
| return; |
| |
| Ostream &Str = Ctx->getStrEmit(); |
| const bool HasNonzeroInitializer = Var.hasNonzeroInitializer(); |
| const bool IsConstant = Var.getIsConstant(); |
| const SizeT Size = Var.getNumBytes(); |
| const IceString MangledName = Var.mangleName(Ctx); |
| |
| Str << "\t.type\t" << MangledName << ",%object\n"; |
| |
| const bool UseDataSections = Ctx->getFlags().getDataSections(); |
| const IceString Suffix = |
| dataSectionSuffix(SectionSuffix, MangledName, UseDataSections); |
| if (IsConstant) |
| Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n"; |
| else if (HasNonzeroInitializer) |
| Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n"; |
| else |
| Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n"; |
| |
| if (IsExternal) |
| Str << "\t.globl\t" << MangledName << "\n"; |
| |
| const uint32_t Align = Var.getAlignment(); |
| if (Align > 1) { |
| assert(llvm::isPowerOf2_32(Align)); |
| // Use the .p2align directive, since the .align N directive can either |
| // interpret N as bytes, or power of 2 bytes, depending on the target. |
| Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n"; |
| } |
| |
| Str << MangledName << ":\n"; |
| |
| if (HasNonzeroInitializer) { |
| for (const std::unique_ptr<VariableDeclaration::Initializer> &Init : |
| Var.getInitializers()) { |
| switch (Init->getKind()) { |
| case VariableDeclaration::Initializer::DataInitializerKind: { |
| const auto &Data = |
| llvm::cast<VariableDeclaration::DataInitializer>(Init.get()) |
| ->getContents(); |
| for (SizeT i = 0; i < Init->getNumBytes(); ++i) { |
| Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| } |
| break; |
| } |
| case VariableDeclaration::Initializer::ZeroInitializerKind: |
| Str << "\t.zero\t" << Init->getNumBytes() << "\n"; |
| break; |
| case VariableDeclaration::Initializer::RelocInitializerKind: { |
| const auto *Reloc = |
| llvm::cast<VariableDeclaration::RelocInitializer>(Init.get()); |
| Str << "\t" << getEmit32Directive() << "\t"; |
| Str << Reloc->getDeclaration()->mangleName(Ctx); |
| if (RelocOffsetT Offset = Reloc->getOffset()) { |
| if (Offset >= 0 || (Offset == INT32_MIN)) |
| Str << " + " << Offset; |
| else |
| Str << " - " << -Offset; |
| } |
| Str << "\n"; |
| break; |
| } |
| } |
| } |
| } else { |
| // NOTE: for non-constant zero initializers, this is BSS (no bits), |
| // so an ELF writer would not write to the file, and only track |
| // virtual offsets, but the .s writer still needs this .zero and |
| // cannot simply use the .size to advance offsets. |
| Str << "\t.zero\t" << Size << "\n"; |
| } |
| |
| Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| } |
| |
| std::unique_ptr<TargetHeaderLowering> |
| TargetHeaderLowering::createLowering(GlobalContext *Ctx) { |
| TargetArch Target = Ctx->getFlags().getTargetArch(); |
| #define SUBZERO_TARGET(X) \ |
| if (Target == Target_##X) \ |
| return TargetHeader##X::create(Ctx); |
| #include "llvm/Config/SZTargets.def" |
| |
| llvm::report_fatal_error("Unsupported target header lowering"); |
| } |
| |
| TargetHeaderLowering::~TargetHeaderLowering() = default; |
| |
| } // end of namespace Ice |