| //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //==-----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// AMDGPU specific subclass of TargetSubtarget. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H |
| #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H |
| |
| #include "AMDGPU.h" |
| #include "AMDGPUCallLowering.h" |
| #include "R600FrameLowering.h" |
| #include "R600ISelLowering.h" |
| #include "R600InstrInfo.h" |
| #include "SIFrameLowering.h" |
| #include "SIISelLowering.h" |
| #include "SIInstrInfo.h" |
| #include "Utils/AMDGPUBaseInfo.h" |
| #include "llvm/ADT/Triple.h" |
| #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
| #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
| #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/SelectionDAGTargetInfo.h" |
| #include "llvm/MC/MCInstrItineraries.h" |
| #include "llvm/Support/MathExtras.h" |
| #include <cassert> |
| #include <cstdint> |
| #include <memory> |
| #include <utility> |
| |
| #define GET_SUBTARGETINFO_HEADER |
| #include "AMDGPUGenSubtargetInfo.inc" |
| #define GET_SUBTARGETINFO_HEADER |
| #include "R600GenSubtargetInfo.inc" |
| |
| namespace llvm { |
| |
| class StringRef; |
| |
| class AMDGPUSubtarget { |
| public: |
| enum Generation { |
| R600 = 0, |
| R700 = 1, |
| EVERGREEN = 2, |
| NORTHERN_ISLANDS = 3, |
| SOUTHERN_ISLANDS = 4, |
| SEA_ISLANDS = 5, |
| VOLCANIC_ISLANDS = 6, |
| GFX9 = 7 |
| }; |
| |
| private: |
| Triple TargetTriple; |
| |
| protected: |
| const FeatureBitset &SubtargetFeatureBits; |
| bool Has16BitInsts; |
| bool HasMadMixInsts; |
| bool FP32Denormals; |
| bool FPExceptions; |
| bool HasSDWA; |
| bool HasVOP3PInsts; |
| bool HasMulI24; |
| bool HasMulU24; |
| bool HasFminFmaxLegacy; |
| bool EnablePromoteAlloca; |
| int LocalMemorySize; |
| unsigned WavefrontSize; |
| |
| public: |
| AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); |
| |
| static const AMDGPUSubtarget &get(const MachineFunction &MF); |
| static const AMDGPUSubtarget &get(const TargetMachine &TM, |
| const Function &F); |
| |
| /// \returns Default range flat work group size for a calling convention. |
| std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; |
| |
| /// \returns Subtarget's default pair of minimum/maximum flat work group sizes |
| /// for function \p F, or minimum/maximum flat work group sizes explicitly |
| /// requested using "amdgpu-flat-work-group-size" attribute attached to |
| /// function \p F. |
| /// |
| /// \returns Subtarget's default values if explicitly requested values cannot |
| /// be converted to integer, or violate subtarget's specifications. |
| std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; |
| |
| /// \returns Subtarget's default pair of minimum/maximum number of waves per |
| /// execution unit for function \p F, or minimum/maximum number of waves per |
| /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute |
| /// attached to function \p F. |
| /// |
| /// \returns Subtarget's default values if explicitly requested values cannot |
| /// be converted to integer, violate subtarget's specifications, or are not |
| /// compatible with minimum/maximum number of waves limited by flat work group |
| /// size, register usage, and/or lds usage. |
| std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const; |
| |
| /// Return the amount of LDS that can be used that will not restrict the |
| /// occupancy lower than WaveCount. |
| unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, |
| const Function &) const; |
| |
| /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if |
| /// the given LDS memory size is the only constraint. |
| unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; |
| |
| unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; |
| |
| bool isAmdHsaOS() const { |
| return TargetTriple.getOS() == Triple::AMDHSA; |
| } |
| |
| bool isAmdPalOS() const { |
| return TargetTriple.getOS() == Triple::AMDPAL; |
| } |
| |
| bool isMesa3DOS() const { |
| return TargetTriple.getOS() == Triple::Mesa3D; |
| } |
| |
| bool isMesaKernel(const Function &F) const { |
| return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv()); |
| } |
| |
| bool isAmdCodeObjectV2(const Function &F) const { |
| return isAmdHsaOS() || isMesaKernel(F); |
| } |
| |
| bool has16BitInsts() const { |
| return Has16BitInsts; |
| } |
| |
| bool hasMadMixInsts() const { |
| return HasMadMixInsts; |
| } |
| |
| bool hasFP32Denormals() const { |
| return FP32Denormals; |
| } |
| |
| bool hasFPExceptions() const { |
| return FPExceptions; |
| } |
| |
| bool hasSDWA() const { |
| return HasSDWA; |
| } |
| |
| bool hasVOP3PInsts() const { |
| return HasVOP3PInsts; |
| } |
| |
| bool hasMulI24() const { |
| return HasMulI24; |
| } |
| |
| bool hasMulU24() const { |
| return HasMulU24; |
| } |
| |
| bool hasFminFmaxLegacy() const { |
| return HasFminFmaxLegacy; |
| } |
| |
| bool isPromoteAllocaEnabled() const { |
| return EnablePromoteAlloca; |
| } |
| |
| unsigned getWavefrontSize() const { |
| return WavefrontSize; |
| } |
| |
| int getLocalMemorySize() const { |
| return LocalMemorySize; |
| } |
| |
| unsigned getAlignmentForImplicitArgPtr() const { |
| return isAmdHsaOS() ? 8 : 4; |
| } |
| |
| /// Returns the offset in bytes from the start of the input buffer |
| /// of the first explicit kernel argument. |
| unsigned getExplicitKernelArgOffset(const Function &F) const { |
| return isAmdCodeObjectV2(F) ? 0 : 36; |
| } |
| |
| /// \returns Maximum number of work groups per compute unit supported by the |
| /// subtarget and limited by given \p FlatWorkGroupSize. |
| unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { |
| return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits, |
| FlatWorkGroupSize); |
| } |
| |
| /// \returns Minimum flat work group size supported by the subtarget. |
| unsigned getMinFlatWorkGroupSize() const { |
| return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits); |
| } |
| |
| /// \returns Maximum flat work group size supported by the subtarget. |
| unsigned getMaxFlatWorkGroupSize() const { |
| return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits); |
| } |
| |
| /// \returns Maximum number of waves per execution unit supported by the |
| /// subtarget and limited by given \p FlatWorkGroupSize. |
| unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { |
| return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits, |
| FlatWorkGroupSize); |
| } |
| |
| /// \returns Minimum number of waves per execution unit supported by the |
| /// subtarget. |
| unsigned getMinWavesPerEU() const { |
| return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits); |
| } |
| |
| unsigned getMaxWavesPerEU() const { return 10; } |
| |
| /// Creates value range metadata on an workitemid.* inrinsic call or load. |
| bool makeLIDRangeMetadata(Instruction *I) const; |
| |
| /// \returns Number of bytes of arguments that are passed to a shader or |
| /// kernel in addition to the explicit ones declared for the function. |
| unsigned getImplicitArgNumBytes(const Function &F) const { |
| if (isMesaKernel(F)) |
| return 16; |
| return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0); |
| } |
| uint64_t getExplicitKernArgSize(const Function &F, |
| unsigned &MaxAlign) const; |
| unsigned getKernArgSegmentSize(const Function &F, |
| unsigned &MaxAlign) const; |
| |
| virtual ~AMDGPUSubtarget() {} |
| }; |
| |
| class GCNSubtarget : public AMDGPUGenSubtargetInfo, |
| public AMDGPUSubtarget { |
| public: |
| enum { |
| ISAVersion0_0_0, |
| ISAVersion6_0_0, |
| ISAVersion6_0_1, |
| ISAVersion7_0_0, |
| ISAVersion7_0_1, |
| ISAVersion7_0_2, |
| ISAVersion7_0_3, |
| ISAVersion7_0_4, |
| ISAVersion8_0_1, |
| ISAVersion8_0_2, |
| ISAVersion8_0_3, |
| ISAVersion8_1_0, |
| ISAVersion9_0_0, |
| ISAVersion9_0_2, |
| ISAVersion9_0_4, |
| ISAVersion9_0_6, |
| }; |
| |
| enum TrapHandlerAbi { |
| TrapHandlerAbiNone = 0, |
| TrapHandlerAbiHsa = 1 |
| }; |
| |
| enum TrapID { |
| TrapIDHardwareReserved = 0, |
| TrapIDHSADebugTrap = 1, |
| TrapIDLLVMTrap = 2, |
| TrapIDLLVMDebugTrap = 3, |
| TrapIDDebugBreakpoint = 7, |
| TrapIDDebugReserved8 = 8, |
| TrapIDDebugReservedFE = 0xfe, |
| TrapIDDebugReservedFF = 0xff |
| }; |
| |
| enum TrapRegValues { |
| LLVMTrapHandlerRegValue = 1 |
| }; |
| |
| private: |
| /// GlobalISel related APIs. |
| std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; |
| std::unique_ptr<InstructionSelector> InstSelector; |
| std::unique_ptr<LegalizerInfo> Legalizer; |
| std::unique_ptr<RegisterBankInfo> RegBankInfo; |
| |
| protected: |
| // Basic subtarget description. |
| Triple TargetTriple; |
| unsigned Gen; |
| unsigned IsaVersion; |
| int LDSBankCount; |
| unsigned MaxPrivateElementSize; |
| |
| // Possibly statically set by tablegen, but may want to be overridden. |
| bool FastFMAF32; |
| bool HalfRate64Ops; |
| |
| // Dynamially set bits that enable features. |
| bool FP64FP16Denormals; |
| bool DX10Clamp; |
| bool FlatForGlobal; |
| bool AutoWaitcntBeforeBarrier; |
| bool CodeObjectV3; |
| bool UnalignedScratchAccess; |
| bool UnalignedBufferAccess; |
| bool HasApertureRegs; |
| bool EnableXNACK; |
| bool TrapHandler; |
| bool DebuggerInsertNops; |
| bool DebuggerEmitPrologue; |
| |
| // Used as options. |
| bool EnableHugePrivateBuffer; |
| bool EnableVGPRSpilling; |
| bool EnableLoadStoreOpt; |
| bool EnableUnsafeDSOffsetFolding; |
| bool EnableSIScheduler; |
| bool EnableDS128; |
| bool DumpCode; |
| |
| // Subtarget statically properties set by tablegen |
| bool FP64; |
| bool FMA; |
| bool MIMG_R128; |
| bool IsGCN; |
| bool GCN3Encoding; |
| bool CIInsts; |
| bool GFX9Insts; |
| bool SGPRInitBug; |
| bool HasSMemRealTime; |
| bool HasIntClamp; |
| bool HasFmaMixInsts; |
| bool HasMovrel; |
| bool HasVGPRIndexMode; |
| bool HasScalarStores; |
| bool HasScalarAtomics; |
| bool HasInv2PiInlineImm; |
| bool HasSDWAOmod; |
| bool HasSDWAScalar; |
| bool HasSDWASdst; |
| bool HasSDWAMac; |
| bool HasSDWAOutModsVOPC; |
| bool HasDPP; |
| bool HasDLInsts; |
| bool D16PreservesUnusedBits; |
| bool FlatAddressSpace; |
| bool FlatInstOffsets; |
| bool FlatGlobalInsts; |
| bool FlatScratchInsts; |
| bool AddNoCarryInsts; |
| bool HasUnpackedD16VMem; |
| bool R600ALUInst; |
| bool CaymanISA; |
| bool CFALUBug; |
| bool HasVertexCache; |
| short TexVTXClauseSize; |
| bool ScalarizeGlobal; |
| |
| // Dummy feature to use for assembler in tablegen. |
| bool FeatureDisable; |
| |
| SelectionDAGTargetInfo TSInfo; |
| AMDGPUAS AS; |
| private: |
| SIInstrInfo InstrInfo; |
| SITargetLowering TLInfo; |
| SIFrameLowering FrameLowering; |
| |
| public: |
| GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, |
| const GCNTargetMachine &TM); |
| ~GCNSubtarget() override; |
| |
| GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, |
| StringRef GPU, StringRef FS); |
| |
| const SIInstrInfo *getInstrInfo() const override { |
| return &InstrInfo; |
| } |
| |
| const SIFrameLowering *getFrameLowering() const override { |
| return &FrameLowering; |
| } |
| |
| const SITargetLowering *getTargetLowering() const override { |
| return &TLInfo; |
| } |
| |
| const SIRegisterInfo *getRegisterInfo() const override { |
| return &InstrInfo.getRegisterInfo(); |
| } |
| |
| const CallLowering *getCallLowering() const override { |
| return CallLoweringInfo.get(); |
| } |
| |
| const InstructionSelector *getInstructionSelector() const override { |
| return InstSelector.get(); |
| } |
| |
| const LegalizerInfo *getLegalizerInfo() const override { |
| return Legalizer.get(); |
| } |
| |
| const RegisterBankInfo *getRegBankInfo() const override { |
| return RegBankInfo.get(); |
| } |
| |
| // Nothing implemented, just prevent crashes on use. |
| const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { |
| return &TSInfo; |
| } |
| |
| void ParseSubtargetFeatures(StringRef CPU, StringRef FS); |
| |
| Generation getGeneration() const { |
| return (Generation)Gen; |
| } |
| |
| unsigned getWavefrontSizeLog2() const { |
| return Log2_32(WavefrontSize); |
| } |
| |
| int getLDSBankCount() const { |
| return LDSBankCount; |
| } |
| |
| unsigned getMaxPrivateElementSize() const { |
| return MaxPrivateElementSize; |
| } |
| |
| AMDGPUAS getAMDGPUAS() const { |
| return AS; |
| } |
| |
| bool hasIntClamp() const { |
| return HasIntClamp; |
| } |
| |
| bool hasFP64() const { |
| return FP64; |
| } |
| |
| bool hasMIMG_R128() const { |
| return MIMG_R128; |
| } |
| |
| bool hasHWFP64() const { |
| return FP64; |
| } |
| |
| bool hasFastFMAF32() const { |
| return FastFMAF32; |
| } |
| |
| bool hasHalfRate64Ops() const { |
| return HalfRate64Ops; |
| } |
| |
| bool hasAddr64() const { |
| return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); |
| } |
| |
| bool hasBFE() const { |
| return true; |
| } |
| |
| bool hasBFI() const { |
| return true; |
| } |
| |
| bool hasBFM() const { |
| return hasBFE(); |
| } |
| |
| bool hasBCNT(unsigned Size) const { |
| return true; |
| } |
| |
| bool hasFFBL() const { |
| return true; |
| } |
| |
| bool hasFFBH() const { |
| return true; |
| } |
| |
| bool hasMed3_16() const { |
| return getGeneration() >= AMDGPUSubtarget::GFX9; |
| } |
| |
| bool hasMin3Max3_16() const { |
| return getGeneration() >= AMDGPUSubtarget::GFX9; |
| } |
| |
| bool hasFmaMixInsts() const { |
| return HasFmaMixInsts; |
| } |
| |
| bool hasCARRY() const { |
| return true; |
| } |
| |
| bool hasFMA() const { |
| return FMA; |
| } |
| |
| TrapHandlerAbi getTrapHandlerAbi() const { |
| return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; |
| } |
| |
| bool enableHugePrivateBuffer() const { |
| return EnableHugePrivateBuffer; |
| } |
| |
| bool unsafeDSOffsetFoldingEnabled() const { |
| return EnableUnsafeDSOffsetFolding; |
| } |
| |
| bool dumpCode() const { |
| return DumpCode; |
| } |
| |
| /// Return the amount of LDS that can be used that will not restrict the |
| /// occupancy lower than WaveCount. |
| unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, |
| const Function &) const; |
| |
| bool hasFP16Denormals() const { |
| return FP64FP16Denormals; |
| } |
| |
| bool hasFP64Denormals() const { |
| return FP64FP16Denormals; |
| } |
| |
| bool supportsMinMaxDenormModes() const { |
| return getGeneration() >= AMDGPUSubtarget::GFX9; |
| } |
| |
| bool enableDX10Clamp() const { |
| return DX10Clamp; |
| } |
| |
| bool enableIEEEBit(const MachineFunction &MF) const { |
| return AMDGPU::isCompute(MF.getFunction().getCallingConv()); |
| } |
| |
| bool useFlatForGlobal() const { |
| return FlatForGlobal; |
| } |
| |
| /// \returns If target supports ds_read/write_b128 and user enables generation |
| /// of ds_read/write_b128. |
| bool useDS128() const { |
| return CIInsts && EnableDS128; |
| } |
| |
| /// \returns If MUBUF instructions always perform range checking, even for |
| /// buffer resources used for private memory access. |
| bool privateMemoryResourceIsRangeChecked() const { |
| return getGeneration() < AMDGPUSubtarget::GFX9; |
| } |
| |
| bool hasAutoWaitcntBeforeBarrier() const { |
| return AutoWaitcntBeforeBarrier; |
| } |
| |
| bool hasCodeObjectV3() const { |
| return CodeObjectV3; |
| } |
| |
| bool hasUnalignedBufferAccess() const { |
| return UnalignedBufferAccess; |
| } |
| |
| bool hasUnalignedScratchAccess() const { |
| return UnalignedScratchAccess; |
| } |
| |
| bool hasApertureRegs() const { |
| return HasApertureRegs; |
| } |
| |
| bool isTrapHandlerEnabled() const { |
| return TrapHandler; |
| } |
| |
| bool isXNACKEnabled() const { |
| return EnableXNACK; |
| } |
| |
| bool hasFlatAddressSpace() const { |
| return FlatAddressSpace; |
| } |
| |
| bool hasFlatInstOffsets() const { |
| return FlatInstOffsets; |
| } |
| |
| bool hasFlatGlobalInsts() const { |
| return FlatGlobalInsts; |
| } |
| |
| bool hasFlatScratchInsts() const { |
| return FlatScratchInsts; |
| } |
| |
| bool hasFlatLgkmVMemCountInOrder() const { |
| return getGeneration() > GFX9; |
| } |
| |
| bool hasD16LoadStore() const { |
| return getGeneration() >= GFX9; |
| } |
| |
| /// Return if most LDS instructions have an m0 use that require m0 to be |
| /// iniitalized. |
| bool ldsRequiresM0Init() const { |
| return getGeneration() < GFX9; |
| } |
| |
| bool hasAddNoCarry() const { |
| return AddNoCarryInsts; |
| } |
| |
| bool hasUnpackedD16VMem() const { |
| return HasUnpackedD16VMem; |
| } |
| |
| // Covers VS/PS/CS graphics shaders |
| bool isMesaGfxShader(const Function &F) const { |
| return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); |
| } |
| |
| bool hasMad64_32() const { |
| return getGeneration() >= SEA_ISLANDS; |
| } |
| |
| bool hasSDWAOmod() const { |
| return HasSDWAOmod; |
| } |
| |
| bool hasSDWAScalar() const { |
| return HasSDWAScalar; |
| } |
| |
| bool hasSDWASdst() const { |
| return HasSDWASdst; |
| } |
| |
| bool hasSDWAMac() const { |
| return HasSDWAMac; |
| } |
| |
| bool hasSDWAOutModsVOPC() const { |
| return HasSDWAOutModsVOPC; |
| } |
| |
| bool vmemWriteNeedsExpWaitcnt() const { |
| return getGeneration() < SEA_ISLANDS; |
| } |
| |
| bool hasDLInsts() const { |
| return HasDLInsts; |
| } |
| |
| bool d16PreservesUnusedBits() const { |
| return D16PreservesUnusedBits; |
| } |
| |
| // Scratch is allocated in 256 dword per wave blocks for the entire |
| // wavefront. When viewed from the perspecive of an arbitrary workitem, this |
| // is 4-byte aligned. |
| // |
| // Only 4-byte alignment is really needed to access anything. Transformations |
| // on the pointer value itself may rely on the alignment / known low bits of |
| // the pointer. Set this to something above the minimum to avoid needing |
| // dynamic realignment in common cases. |
| unsigned getStackAlignment() const { |
| return 16; |
| } |
| |
| bool enableMachineScheduler() const override { |
| return true; |
| } |
| |
| bool enableSubRegLiveness() const override { |
| return true; |
| } |
| |
| void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } |
| bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } |
| |
| /// \returns Number of execution units per compute unit supported by the |
| /// subtarget. |
| unsigned getEUsPerCU() const { |
| return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns Maximum number of waves per compute unit supported by the |
| /// subtarget without any kind of limitation. |
| unsigned getMaxWavesPerCU() const { |
| return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns Maximum number of waves per compute unit supported by the |
| /// subtarget and limited by given \p FlatWorkGroupSize. |
| unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { |
| return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(), |
| FlatWorkGroupSize); |
| } |
| |
| /// \returns Maximum number of waves per execution unit supported by the |
| /// subtarget without any kind of limitation. |
| unsigned getMaxWavesPerEU() const { |
| return AMDGPU::IsaInfo::getMaxWavesPerEU(); |
| } |
| |
| /// \returns Number of waves per work group supported by the subtarget and |
| /// limited by given \p FlatWorkGroupSize. |
| unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { |
| return AMDGPU::IsaInfo::getWavesPerWorkGroup( |
| MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize); |
| } |
| |
| // static wrappers |
| static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); |
| |
| // XXX - Why is this here if it isn't in the default pass set? |
| bool enableEarlyIfConversion() const override { |
| return true; |
| } |
| |
| void overrideSchedPolicy(MachineSchedPolicy &Policy, |
| unsigned NumRegionInstrs) const override; |
| |
| bool isVGPRSpillingEnabled(const Function &F) const; |
| |
| unsigned getMaxNumUserSGPRs() const { |
| return 16; |
| } |
| |
| bool hasSMemRealTime() const { |
| return HasSMemRealTime; |
| } |
| |
| bool hasMovrel() const { |
| return HasMovrel; |
| } |
| |
| bool hasVGPRIndexMode() const { |
| return HasVGPRIndexMode; |
| } |
| |
| bool useVGPRIndexMode(bool UserEnable) const { |
| return !hasMovrel() || (UserEnable && hasVGPRIndexMode()); |
| } |
| |
| bool hasScalarCompareEq64() const { |
| return getGeneration() >= VOLCANIC_ISLANDS; |
| } |
| |
| bool hasScalarStores() const { |
| return HasScalarStores; |
| } |
| |
| bool hasScalarAtomics() const { |
| return HasScalarAtomics; |
| } |
| |
| bool hasInv2PiInlineImm() const { |
| return HasInv2PiInlineImm; |
| } |
| |
| bool hasDPP() const { |
| return HasDPP; |
| } |
| |
| bool enableSIScheduler() const { |
| return EnableSIScheduler; |
| } |
| |
| bool debuggerSupported() const { |
| return debuggerInsertNops() && debuggerEmitPrologue(); |
| } |
| |
| bool debuggerInsertNops() const { |
| return DebuggerInsertNops; |
| } |
| |
| bool debuggerEmitPrologue() const { |
| return DebuggerEmitPrologue; |
| } |
| |
| bool loadStoreOptEnabled() const { |
| return EnableLoadStoreOpt; |
| } |
| |
| bool hasSGPRInitBug() const { |
| return SGPRInitBug; |
| } |
| |
| bool has12DWordStoreHazard() const { |
| return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; |
| } |
| |
| bool hasSMovFedHazard() const { |
| return getGeneration() >= AMDGPUSubtarget::GFX9; |
| } |
| |
| bool hasReadM0MovRelInterpHazard() const { |
| return getGeneration() >= AMDGPUSubtarget::GFX9; |
| } |
| |
| bool hasReadM0SendMsgHazard() const { |
| return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS; |
| } |
| |
| /// Return the maximum number of waves per SIMD for kernels using \p SGPRs |
| /// SGPRs |
| unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; |
| |
| /// Return the maximum number of waves per SIMD for kernels using \p VGPRs |
| /// VGPRs |
| unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; |
| |
| /// \returns true if the flat_scratch register should be initialized with the |
| /// pointer to the wave's scratch memory rather than a size and offset. |
| bool flatScratchIsPointer() const { |
| return getGeneration() >= AMDGPUSubtarget::GFX9; |
| } |
| |
| /// \returns true if the machine has merged shaders in which s0-s7 are |
| /// reserved by the hardware and user SGPRs start at s8 |
| bool hasMergedShaders() const { |
| return getGeneration() >= GFX9; |
| } |
| |
| /// \returns SGPR allocation granularity supported by the subtarget. |
| unsigned getSGPRAllocGranule() const { |
| return AMDGPU::IsaInfo::getSGPRAllocGranule( |
| MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns SGPR encoding granularity supported by the subtarget. |
| unsigned getSGPREncodingGranule() const { |
| return AMDGPU::IsaInfo::getSGPREncodingGranule( |
| MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns Total number of SGPRs supported by the subtarget. |
| unsigned getTotalNumSGPRs() const { |
| return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns Addressable number of SGPRs supported by the subtarget. |
| unsigned getAddressableNumSGPRs() const { |
| return AMDGPU::IsaInfo::getAddressableNumSGPRs( |
| MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns Minimum number of SGPRs that meets the given number of waves per |
| /// execution unit requirement supported by the subtarget. |
| unsigned getMinNumSGPRs(unsigned WavesPerEU) const { |
| return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(), |
| WavesPerEU); |
| } |
| |
| /// \returns Maximum number of SGPRs that meets the given number of waves per |
| /// execution unit requirement supported by the subtarget. |
| unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { |
| return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(), |
| WavesPerEU, Addressable); |
| } |
| |
| /// \returns Reserved number of SGPRs for given function \p MF. |
| unsigned getReservedNumSGPRs(const MachineFunction &MF) const; |
| |
| /// \returns Maximum number of SGPRs that meets number of waves per execution |
| /// unit requirement for function \p MF, or number of SGPRs explicitly |
| /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. |
| /// |
| /// \returns Value that meets number of waves per execution unit requirement |
| /// if explicitly requested value cannot be converted to integer, violates |
| /// subtarget's specifications, or does not meet number of waves per execution |
| /// unit requirement. |
| unsigned getMaxNumSGPRs(const MachineFunction &MF) const; |
| |
| /// \returns VGPR allocation granularity supported by the subtarget. |
| unsigned getVGPRAllocGranule() const { |
| return AMDGPU::IsaInfo::getVGPRAllocGranule( |
| MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns VGPR encoding granularity supported by the subtarget. |
| unsigned getVGPREncodingGranule() const { |
| return AMDGPU::IsaInfo::getVGPREncodingGranule( |
| MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns Total number of VGPRs supported by the subtarget. |
| unsigned getTotalNumVGPRs() const { |
| return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns Addressable number of VGPRs supported by the subtarget. |
| unsigned getAddressableNumVGPRs() const { |
| return AMDGPU::IsaInfo::getAddressableNumVGPRs( |
| MCSubtargetInfo::getFeatureBits()); |
| } |
| |
| /// \returns Minimum number of VGPRs that meets given number of waves per |
| /// execution unit requirement supported by the subtarget. |
| unsigned getMinNumVGPRs(unsigned WavesPerEU) const { |
| return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(), |
| WavesPerEU); |
| } |
| |
| /// \returns Maximum number of VGPRs that meets given number of waves per |
| /// execution unit requirement supported by the subtarget. |
| unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { |
| return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(), |
| WavesPerEU); |
| } |
| |
| /// \returns Maximum number of VGPRs that meets number of waves per execution |
| /// unit requirement for function \p MF, or number of VGPRs explicitly |
| /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. |
| /// |
| /// \returns Value that meets number of waves per execution unit requirement |
| /// if explicitly requested value cannot be converted to integer, violates |
| /// subtarget's specifications, or does not meet number of waves per execution |
| /// unit requirement. |
| unsigned getMaxNumVGPRs(const MachineFunction &MF) const; |
| |
| void getPostRAMutations( |
| std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) |
| const override; |
| }; |
| |
| class R600Subtarget final : public R600GenSubtargetInfo, |
| public AMDGPUSubtarget { |
| private: |
| R600InstrInfo InstrInfo; |
| R600FrameLowering FrameLowering; |
| bool FMA; |
| bool CaymanISA; |
| bool CFALUBug; |
| bool DX10Clamp; |
| bool HasVertexCache; |
| bool R600ALUInst; |
| bool FP64; |
| short TexVTXClauseSize; |
| Generation Gen; |
| R600TargetLowering TLInfo; |
| InstrItineraryData InstrItins; |
| SelectionDAGTargetInfo TSInfo; |
| AMDGPUAS AS; |
| |
| public: |
| R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, |
| const TargetMachine &TM); |
| |
| const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; } |
| |
| const R600FrameLowering *getFrameLowering() const override { |
| return &FrameLowering; |
| } |
| |
| const R600TargetLowering *getTargetLowering() const override { |
| return &TLInfo; |
| } |
| |
| const R600RegisterInfo *getRegisterInfo() const override { |
| return &InstrInfo.getRegisterInfo(); |
| } |
| |
| const InstrItineraryData *getInstrItineraryData() const override { |
| return &InstrItins; |
| } |
| |
| // Nothing implemented, just prevent crashes on use. |
| const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { |
| return &TSInfo; |
| } |
| |
| void ParseSubtargetFeatures(StringRef CPU, StringRef FS); |
| |
| Generation getGeneration() const { |
| return Gen; |
| } |
| |
| unsigned getStackAlignment() const { |
| return 4; |
| } |
| |
| R600Subtarget &initializeSubtargetDependencies(const Triple &TT, |
| StringRef GPU, StringRef FS); |
| |
| bool hasBFE() const { |
| return (getGeneration() >= EVERGREEN); |
| } |
| |
| bool hasBFI() const { |
| return (getGeneration() >= EVERGREEN); |
| } |
| |
| bool hasBCNT(unsigned Size) const { |
| if (Size == 32) |
| return (getGeneration() >= EVERGREEN); |
| |
| return false; |
| } |
| |
| bool hasBORROW() const { |
| return (getGeneration() >= EVERGREEN); |
| } |
| |
| bool hasCARRY() const { |
| return (getGeneration() >= EVERGREEN); |
| } |
| |
| bool hasCaymanISA() const { |
| return CaymanISA; |
| } |
| |
| bool hasFFBL() const { |
| return (getGeneration() >= EVERGREEN); |
| } |
| |
| bool hasFFBH() const { |
| return (getGeneration() >= EVERGREEN); |
| } |
| |
| bool hasFMA() const { return FMA; } |
| |
| bool hasCFAluBug() const { return CFALUBug; } |
| |
| bool hasVertexCache() const { return HasVertexCache; } |
| |
| short getTexVTXClauseSize() const { return TexVTXClauseSize; } |
| |
| AMDGPUAS getAMDGPUAS() const { return AS; } |
| |
| bool enableMachineScheduler() const override { |
| return true; |
| } |
| |
| bool enableSubRegLiveness() const override { |
| return true; |
| } |
| }; |
| |
| } // end namespace llvm |
| |
| #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H |