| //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H |
| #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H |
| |
| #include "SIDefines.h" |
| #include "llvm/ADT/FloatingPointMode.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/Support/Alignment.h" |
| #include <array> |
| #include <functional> |
| #include <utility> |
| |
| struct amd_kernel_code_t; |
| |
| namespace llvm { |
| |
| struct Align; |
| class Argument; |
| class Function; |
| class GCNSubtarget; |
| class GlobalValue; |
| class MCInstrInfo; |
| class MCRegisterClass; |
| class MCRegisterInfo; |
| class MCSubtargetInfo; |
| class StringRef; |
| class Triple; |
| class raw_ostream; |
| |
| namespace amdhsa { |
| struct kernel_descriptor_t; |
| } |
| |
| namespace AMDGPU { |
| |
| struct IsaVersion; |
| |
| /// \returns HSA OS ABI Version identification. |
| std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); |
| /// \returns True if HSA OS ABI Version identification is 2, |
| /// false otherwise. |
| bool isHsaAbiVersion2(const MCSubtargetInfo *STI); |
| /// \returns True if HSA OS ABI Version identification is 3, |
| /// false otherwise. |
| bool isHsaAbiVersion3(const MCSubtargetInfo *STI); |
| /// \returns True if HSA OS ABI Version identification is 4, |
| /// false otherwise. |
| bool isHsaAbiVersion4(const MCSubtargetInfo *STI); |
| /// \returns True if HSA OS ABI Version identification is 5, |
| /// false otherwise. |
| bool isHsaAbiVersion5(const MCSubtargetInfo *STI); |
| /// \returns True if HSA OS ABI Version identification is 3 and above, |
| /// false otherwise. |
| bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); |
| |
| /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr |
| unsigned getMultigridSyncArgImplicitArgPosition(); |
| |
| /// \returns The offset of the hostcall pointer argument from implicitarg_ptr |
| unsigned getHostcallImplicitArgPosition(); |
| |
| unsigned getDefaultQueueImplicitArgPosition(); |
| unsigned getCompletionActionImplicitArgPosition(); |
| |
| /// \returns Code object version. |
| unsigned getAmdhsaCodeObjectVersion(); |
| |
| struct GcnBufferFormatInfo { |
| unsigned Format; |
| unsigned BitsPerComp; |
| unsigned NumComponents; |
| unsigned NumFormat; |
| unsigned DataFormat; |
| }; |
| |
| struct MAIInstInfo { |
| uint16_t Opcode; |
| bool is_dgemm; |
| bool is_gfx940_xdl; |
| }; |
| |
| #define GET_MIMGBaseOpcode_DECL |
| #define GET_MIMGDim_DECL |
| #define GET_MIMGEncoding_DECL |
| #define GET_MIMGLZMapping_DECL |
| #define GET_MIMGMIPMapping_DECL |
| #define GET_MIMGBiASMapping_DECL |
| #define GET_MAIInstInfoTable_DECL |
| #include "AMDGPUGenSearchableTables.inc" |
| |
| namespace IsaInfo { |
| |
| enum { |
| // The closed Vulkan driver sets 96, which limits the wave count to 8 but |
| // doesn't spill SGPRs as much as when 80 is set. |
| FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, |
| TRAP_NUM_SGPRS = 16 |
| }; |
| |
| enum class TargetIDSetting { |
| Unsupported, |
| Any, |
| Off, |
| On |
| }; |
| |
| class AMDGPUTargetID { |
| private: |
| const MCSubtargetInfo &STI; |
| TargetIDSetting XnackSetting; |
| TargetIDSetting SramEccSetting; |
| |
| public: |
| explicit AMDGPUTargetID(const MCSubtargetInfo &STI); |
| ~AMDGPUTargetID() = default; |
| |
| /// \return True if the current xnack setting is not "Unsupported". |
| bool isXnackSupported() const { |
| return XnackSetting != TargetIDSetting::Unsupported; |
| } |
| |
| /// \returns True if the current xnack setting is "On" or "Any". |
| bool isXnackOnOrAny() const { |
| return XnackSetting == TargetIDSetting::On || |
| XnackSetting == TargetIDSetting::Any; |
| } |
| |
| /// \returns True if current xnack setting is "On" or "Off", |
| /// false otherwise. |
| bool isXnackOnOrOff() const { |
| return getXnackSetting() == TargetIDSetting::On || |
| getXnackSetting() == TargetIDSetting::Off; |
| } |
| |
| /// \returns The current xnack TargetIDSetting, possible options are |
| /// "Unsupported", "Any", "Off", and "On". |
| TargetIDSetting getXnackSetting() const { |
| return XnackSetting; |
| } |
| |
| /// Sets xnack setting to \p NewXnackSetting. |
| void setXnackSetting(TargetIDSetting NewXnackSetting) { |
| XnackSetting = NewXnackSetting; |
| } |
| |
| /// \return True if the current sramecc setting is not "Unsupported". |
| bool isSramEccSupported() const { |
| return SramEccSetting != TargetIDSetting::Unsupported; |
| } |
| |
| /// \returns True if the current sramecc setting is "On" or "Any". |
| bool isSramEccOnOrAny() const { |
| return SramEccSetting == TargetIDSetting::On || |
| SramEccSetting == TargetIDSetting::Any; |
| } |
| |
| /// \returns True if current sramecc setting is "On" or "Off", |
| /// false otherwise. |
| bool isSramEccOnOrOff() const { |
| return getSramEccSetting() == TargetIDSetting::On || |
| getSramEccSetting() == TargetIDSetting::Off; |
| } |
| |
| /// \returns The current sramecc TargetIDSetting, possible options are |
| /// "Unsupported", "Any", "Off", and "On". |
| TargetIDSetting getSramEccSetting() const { |
| return SramEccSetting; |
| } |
| |
| /// Sets sramecc setting to \p NewSramEccSetting. |
| void setSramEccSetting(TargetIDSetting NewSramEccSetting) { |
| SramEccSetting = NewSramEccSetting; |
| } |
| |
| void setTargetIDFromFeaturesString(StringRef FS); |
| void setTargetIDFromTargetIDStream(StringRef TargetID); |
| |
| /// \returns String representation of an object. |
| std::string toString() const; |
| }; |
| |
| /// \returns Wavefront size for given subtarget \p STI. |
| unsigned getWavefrontSize(const MCSubtargetInfo *STI); |
| |
| /// \returns Local memory size in bytes for given subtarget \p STI. |
| unsigned getLocalMemorySize(const MCSubtargetInfo *STI); |
| |
| /// \returns Maximum addressable local memory size in bytes for given subtarget |
| /// \p STI. |
| unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI); |
| |
| /// \returns Number of execution units per compute unit for given subtarget \p |
| /// STI. |
| unsigned getEUsPerCU(const MCSubtargetInfo *STI); |
| |
| /// \returns Maximum number of work groups per compute unit for given subtarget |
| /// \p STI and limited by given \p FlatWorkGroupSize. |
| unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, |
| unsigned FlatWorkGroupSize); |
| |
| /// \returns Minimum number of waves per execution unit for given subtarget \p |
| /// STI. |
| unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); |
| |
| /// \returns Maximum number of waves per execution unit for given subtarget \p |
| /// STI without any kind of limitation. |
| unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); |
| |
| /// \returns Number of waves per execution unit required to support the given \p |
| /// FlatWorkGroupSize. |
| unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, |
| unsigned FlatWorkGroupSize); |
| |
| /// \returns Minimum flat work group size for given subtarget \p STI. |
| unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); |
| |
| /// \returns Maximum flat work group size for given subtarget \p STI. |
| unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); |
| |
| /// \returns Number of waves per work group for given subtarget \p STI and |
| /// \p FlatWorkGroupSize. |
| unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, |
| unsigned FlatWorkGroupSize); |
| |
| /// \returns SGPR allocation granularity for given subtarget \p STI. |
| unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); |
| |
| /// \returns SGPR encoding granularity for given subtarget \p STI. |
| unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); |
| |
| /// \returns Total number of SGPRs for given subtarget \p STI. |
| unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); |
| |
| /// \returns Addressable number of SGPRs for given subtarget \p STI. |
| unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); |
| |
| /// \returns Minimum number of SGPRs that meets the given number of waves per |
| /// execution unit requirement for given subtarget \p STI. |
| unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); |
| |
| /// \returns Maximum number of SGPRs that meets the given number of waves per |
| /// execution unit requirement for given subtarget \p STI. |
| unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, |
| bool Addressable); |
| |
| /// \returns Number of extra SGPRs implicitly required by given subtarget \p |
| /// STI when the given special registers are used. |
| unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, |
| bool FlatScrUsed, bool XNACKUsed); |
| |
| /// \returns Number of extra SGPRs implicitly required by given subtarget \p |
| /// STI when the given special registers are used. XNACK is inferred from |
| /// \p STI. |
| unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, |
| bool FlatScrUsed); |
| |
| /// \returns Number of SGPR blocks needed for given subtarget \p STI when |
| /// \p NumSGPRs are used. \p NumSGPRs should already include any special |
| /// register counts. |
| unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); |
| |
| /// \returns VGPR allocation granularity for given subtarget \p STI. |
| /// |
| /// For subtargets which support it, \p EnableWavefrontSize32 should match |
| /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. |
| unsigned |
| getVGPRAllocGranule(const MCSubtargetInfo *STI, |
| std::optional<bool> EnableWavefrontSize32 = std::nullopt); |
| |
| /// \returns VGPR encoding granularity for given subtarget \p STI. |
| /// |
| /// For subtargets which support it, \p EnableWavefrontSize32 should match |
| /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. |
| unsigned getVGPREncodingGranule( |
| const MCSubtargetInfo *STI, |
| std::optional<bool> EnableWavefrontSize32 = std::nullopt); |
| |
| /// \returns Total number of VGPRs for given subtarget \p STI. |
| unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); |
| |
| /// \returns Addressable number of VGPRs for given subtarget \p STI. |
| unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); |
| |
| /// \returns Minimum number of VGPRs that meets given number of waves per |
| /// execution unit requirement for given subtarget \p STI. |
| unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); |
| |
| /// \returns Maximum number of VGPRs that meets given number of waves per |
| /// execution unit requirement for given subtarget \p STI. |
| unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); |
| |
| /// \returns Number of waves reachable for a given \p NumVGPRs usage for given |
| /// subtarget \p STI. |
| unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, |
| unsigned NumVGPRs); |
| |
| /// \returns Number of VGPR blocks needed for given subtarget \p STI when |
| /// \p NumVGPRs are used. |
| /// |
| /// For subtargets which support it, \p EnableWavefrontSize32 should match the |
| /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. |
| unsigned |
| getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, |
| std::optional<bool> EnableWavefrontSize32 = std::nullopt); |
| |
| } // end namespace IsaInfo |
| |
| LLVM_READONLY |
| int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); |
| |
| LLVM_READONLY |
| inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) { |
| return getNamedOperandIdx(Opcode, NamedIdx) != -1; |
| } |
| |
| LLVM_READONLY |
| int getSOPPWithRelaxation(uint16_t Opcode); |
| |
| struct MIMGBaseOpcodeInfo { |
| MIMGBaseOpcode BaseOpcode; |
| bool Store; |
| bool Atomic; |
| bool AtomicX2; |
| bool Sampler; |
| bool Gather4; |
| |
| uint8_t NumExtraArgs; |
| bool Gradients; |
| bool G16; |
| bool Coordinates; |
| bool LodOrClampOrMip; |
| bool HasD16; |
| bool MSAA; |
| bool BVH; |
| }; |
| |
| LLVM_READONLY |
| const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); |
| |
| LLVM_READONLY |
| const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); |
| |
| struct MIMGDimInfo { |
| MIMGDim Dim; |
| uint8_t NumCoords; |
| uint8_t NumGradients; |
| bool MSAA; |
| bool DA; |
| uint8_t Encoding; |
| const char *AsmSuffix; |
| }; |
| |
| LLVM_READONLY |
| const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); |
| |
| LLVM_READONLY |
| const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); |
| |
| LLVM_READONLY |
| const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); |
| |
| struct MIMGLZMappingInfo { |
| MIMGBaseOpcode L; |
| MIMGBaseOpcode LZ; |
| }; |
| |
| struct MIMGMIPMappingInfo { |
| MIMGBaseOpcode MIP; |
| MIMGBaseOpcode NONMIP; |
| }; |
| |
| struct MIMGBiasMappingInfo { |
| MIMGBaseOpcode Bias; |
| MIMGBaseOpcode NoBias; |
| }; |
| |
| struct MIMGOffsetMappingInfo { |
| MIMGBaseOpcode Offset; |
| MIMGBaseOpcode NoOffset; |
| }; |
| |
| struct MIMGG16MappingInfo { |
| MIMGBaseOpcode G; |
| MIMGBaseOpcode G16; |
| }; |
| |
| LLVM_READONLY |
| const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); |
| |
| struct WMMAOpcodeMappingInfo { |
| unsigned Opcode2Addr; |
| unsigned Opcode3Addr; |
| }; |
| |
| LLVM_READONLY |
| const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); |
| |
| LLVM_READONLY |
| const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); |
| |
| LLVM_READONLY |
| const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); |
| |
| LLVM_READONLY |
| const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); |
| |
| LLVM_READONLY |
| int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, |
| unsigned VDataDwords, unsigned VAddrDwords); |
| |
| LLVM_READONLY |
| int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); |
| |
| LLVM_READONLY |
| unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, |
| const MIMGDimInfo *Dim, bool IsA16, |
| bool IsG16Supported); |
| |
| struct MIMGInfo { |
| uint16_t Opcode; |
| uint16_t BaseOpcode; |
| uint8_t MIMGEncoding; |
| uint8_t VDataDwords; |
| uint8_t VAddrDwords; |
| uint8_t VAddrOperands; |
| }; |
| |
| LLVM_READONLY |
| const MIMGInfo *getMIMGInfo(unsigned Opc); |
| |
| LLVM_READONLY |
| int getMTBUFBaseOpcode(unsigned Opc); |
| |
| LLVM_READONLY |
| int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); |
| |
| LLVM_READONLY |
| int getMTBUFElements(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getMTBUFHasVAddr(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getMTBUFHasSrsrc(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getMTBUFHasSoffset(unsigned Opc); |
| |
| LLVM_READONLY |
| int getMUBUFBaseOpcode(unsigned Opc); |
| |
| LLVM_READONLY |
| int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); |
| |
| LLVM_READONLY |
| int getMUBUFElements(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getMUBUFHasVAddr(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getMUBUFHasSrsrc(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getMUBUFHasSoffset(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getMUBUFIsBufferInv(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getSMEMIsBuffer(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getVOP1IsSingle(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getVOP2IsSingle(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getVOP3IsSingle(unsigned Opc); |
| |
| LLVM_READONLY |
| bool isVOPC64DPP(unsigned Opc); |
| |
| /// Returns true if MAI operation is a double precision GEMM. |
| LLVM_READONLY |
| bool getMAIIsDGEMM(unsigned Opc); |
| |
| LLVM_READONLY |
| bool getMAIIsGFX940XDL(unsigned Opc); |
| |
| struct CanBeVOPD { |
| bool X; |
| bool Y; |
| }; |
| |
| LLVM_READONLY |
| CanBeVOPD getCanBeVOPD(unsigned Opc); |
| |
| LLVM_READONLY |
| const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, |
| uint8_t NumComponents, |
| uint8_t NumFormat, |
| const MCSubtargetInfo &STI); |
| LLVM_READONLY |
| const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, |
| const MCSubtargetInfo &STI); |
| |
| LLVM_READONLY |
| int getMCOpcode(uint16_t Opcode, unsigned Gen); |
| |
| LLVM_READONLY |
| unsigned getVOPDOpcode(unsigned Opc); |
| |
| LLVM_READONLY |
| int getVOPDFull(unsigned OpX, unsigned OpY); |
| |
| LLVM_READONLY |
| bool isVOPD(unsigned Opc); |
| |
| LLVM_READNONE |
| bool isMAC(unsigned Opc); |
| |
| LLVM_READNONE |
| bool isPermlane16(unsigned Opc); |
| |
| namespace VOPD { |
| |
| enum Component : unsigned { |
| DST = 0, |
| SRC0, |
| SRC1, |
| SRC2, |
| |
| DST_NUM = 1, |
| MAX_SRC_NUM = 3, |
| MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM |
| }; |
| |
| // Number of VGPR banks per VOPD component operand. |
| constexpr unsigned BANKS_NUM[] = {2, 4, 4, 2}; |
| |
| enum ComponentIndex : unsigned { X = 0, Y = 1 }; |
| constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y}; |
| constexpr unsigned COMPONENTS_NUM = 2; |
| |
| // Properties of VOPD components. |
| class ComponentProps { |
| private: |
| unsigned SrcOperandsNum = 0; |
| std::optional<unsigned> MandatoryLiteralIdx; |
| bool HasSrc2Acc = false; |
| |
| public: |
| ComponentProps() = default; |
| ComponentProps(const MCInstrDesc &OpDesc); |
| |
| // Return the total number of src operands this component has. |
| unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; } |
| |
| // Return the number of src operands of this component visible to the parser. |
| unsigned getCompParsedSrcOperandsNum() const { |
| return SrcOperandsNum - HasSrc2Acc; |
| } |
| |
| // Return true iif this component has a mandatory literal. |
| bool hasMandatoryLiteral() const { return MandatoryLiteralIdx.has_value(); } |
| |
| // If this component has a mandatory literal, return component operand |
| // index of this literal (i.e. either Component::SRC1 or Component::SRC2). |
| unsigned getMandatoryLiteralCompOperandIndex() const { |
| assert(hasMandatoryLiteral()); |
| return *MandatoryLiteralIdx; |
| } |
| |
| // Return true iif this component has operand |
| // with component index CompSrcIdx and this operand may be a register. |
| bool hasRegSrcOperand(unsigned CompSrcIdx) const { |
| assert(CompSrcIdx < Component::MAX_SRC_NUM); |
| return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx); |
| } |
| |
| // Return true iif this component has tied src2. |
| bool hasSrc2Acc() const { return HasSrc2Acc; } |
| |
| private: |
| bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const { |
| assert(CompSrcIdx < Component::MAX_SRC_NUM); |
| return hasMandatoryLiteral() && |
| *MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx; |
| } |
| }; |
| |
| enum ComponentKind : unsigned { |
| SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD. |
| COMPONENT_X, // A VOPD instruction, X component. |
| COMPONENT_Y, // A VOPD instruction, Y component. |
| MAX = COMPONENT_Y |
| }; |
| |
| // Interface functions of this class map VOPD component operand indices |
| // to indices of operands in MachineInstr/MCInst or parsed operands array. |
| // |
| // Note that this class operates with 3 kinds of indices: |
| // - VOPD component operand indices (Component::DST, Component::SRC0, etc.); |
| // - MC operand indices (they refer operands in a MachineInstr/MCInst); |
| // - parsed operand indices (they refer operands in parsed operands array). |
| // |
| // For SINGLE components mapping between these indices is trivial. |
| // But things get more complicated for COMPONENT_X and |
| // COMPONENT_Y because these components share the same |
| // MachineInstr/MCInst and the same parsed operands array. |
| // Below is an example of component operand to parsed operand |
| // mapping for the following instruction: |
| // |
| // v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1 |
| // |
| // PARSED COMPONENT PARSED |
| // COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX |
| // ------------------------------------------------------------------- |
| // "v_dual_add_f32" 0 |
| // v_dual_add_f32 v255 0 (DST) --> 1 |
| // v4 1 (SRC0) --> 2 |
| // v5 2 (SRC1) --> 3 |
| // "::" 4 |
| // "v_dual_mov_b32" 5 |
| // v_dual_mov_b32 v6 0 (DST) --> 6 |
| // v1 1 (SRC0) --> 7 |
| // ------------------------------------------------------------------- |
| // |
| class ComponentLayout { |
| private: |
| // Regular MachineInstr/MCInst operands are ordered as follows: |
| // dst, src0 [, other src operands] |
| // VOPD MachineInstr/MCInst operands are ordered as follows: |
| // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] |
| // Each ComponentKind has operand indices defined below. |
| static constexpr unsigned MC_DST_IDX[] = {0, 0, 1}; |
| static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */}; |
| |
| // Parsed operands of regular instructions are ordered as follows: |
| // Mnemo dst src0 [vsrc1 ...] |
| // Parsed VOPD operands are ordered as follows: |
| // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' |
| // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] |
| // Each ComponentKind has operand indices defined below. |
| static constexpr unsigned PARSED_DST_IDX[] = {1, 1, |
| 4 /* + OpX.ParsedSrcNum */}; |
| static constexpr unsigned FIRST_PARSED_SRC_IDX[] = { |
| 2, 2, 5 /* + OpX.ParsedSrcNum */}; |
| |
| private: |
| const ComponentKind Kind; |
| const ComponentProps PrevComp; |
| |
| public: |
| // Create layout for COMPONENT_X or SINGLE component. |
| ComponentLayout(ComponentKind Kind) : Kind(Kind) { |
| assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X); |
| } |
| |
| // Create layout for COMPONENT_Y which depends on COMPONENT_X layout. |
| ComponentLayout(const ComponentProps &OpXProps) |
| : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {} |
| |
| public: |
| // Return the index of dst operand in MCInst operands. |
| unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; } |
| |
| // Return the index of the specified src operand in MCInst operands. |
| unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const { |
| assert(CompSrcIdx < Component::MAX_SRC_NUM); |
| return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx; |
| } |
| |
| // Return the index of dst operand in the parsed operands array. |
| unsigned getIndexOfDstInParsedOperands() const { |
| return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum(); |
| } |
| |
| // Return the index of the specified src operand in the parsed operands array. |
| unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const { |
| assert(CompSrcIdx < Component::MAX_SRC_NUM); |
| return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx; |
| } |
| |
| private: |
| unsigned getPrevCompSrcNum() const { |
| return PrevComp.getCompSrcOperandsNum(); |
| } |
| unsigned getPrevCompParsedSrcNum() const { |
| return PrevComp.getCompParsedSrcOperandsNum(); |
| } |
| }; |
| |
| // Layout and properties of VOPD components. |
| class ComponentInfo : public ComponentLayout, public ComponentProps { |
| public: |
| // Create ComponentInfo for COMPONENT_X or SINGLE component. |
| ComponentInfo(const MCInstrDesc &OpDesc, |
| ComponentKind Kind = ComponentKind::SINGLE) |
| : ComponentLayout(Kind), ComponentProps(OpDesc) {} |
| |
| // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout. |
| ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps) |
| : ComponentLayout(OpXProps), ComponentProps(OpDesc) {} |
| |
| // Map component operand index to parsed operand index. |
| // Return 0 if the specified operand does not exist. |
| unsigned getIndexInParsedOperands(unsigned CompOprIdx) const; |
| }; |
| |
| // Properties of VOPD instructions. |
| class InstInfo { |
| private: |
| const ComponentInfo CompInfo[COMPONENTS_NUM]; |
| |
| public: |
| using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>; |
| |
| InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) |
| : CompInfo{OpX, OpY} {} |
| |
| InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY) |
| : CompInfo{OprInfoX, OprInfoY} {} |
| |
| const ComponentInfo &operator[](size_t ComponentIdx) const { |
| assert(ComponentIdx < COMPONENTS_NUM); |
| return CompInfo[ComponentIdx]; |
| } |
| |
| // Check VOPD operands constraints. |
| // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index |
| // for the specified component and MC operand. The callback must return 0 |
| // if the operand is not a register or not a VGPR. |
| bool hasInvalidOperand( |
| std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { |
| return getInvalidCompOperandIndex(GetRegIdx).has_value(); |
| } |
| |
| // Check VOPD operands constraints. |
| // Return the index of an invalid component operand, if any. |
| std::optional<unsigned> getInvalidCompOperandIndex( |
| std::function<unsigned(unsigned, unsigned)> GetRegIdx) const; |
| |
| private: |
| RegIndices |
| getRegIndices(unsigned ComponentIdx, |
| std::function<unsigned(unsigned, unsigned)> GetRegIdx) const; |
| }; |
| |
| } // namespace VOPD |
| |
| LLVM_READONLY |
| std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode); |
| |
| LLVM_READONLY |
| // Get properties of 2 single VOP1/VOP2 instructions |
| // used as components to create a VOPD instruction. |
| VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY); |
| |
| LLVM_READONLY |
| // Get properties of VOPD X and Y components. |
| VOPD::InstInfo |
| getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo); |
| |
| LLVM_READONLY |
| bool isTrue16Inst(unsigned Opc); |
| |
| LLVM_READONLY |
| unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); |
| |
| LLVM_READONLY |
| unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); |
| |
| void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, |
| const MCSubtargetInfo *STI); |
| |
| amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( |
| const MCSubtargetInfo *STI); |
| |
| bool isGroupSegment(const GlobalValue *GV); |
| bool isGlobalSegment(const GlobalValue *GV); |
| bool isReadOnlySegment(const GlobalValue *GV); |
| |
| /// \returns True if constants should be emitted to .text section for given |
| /// target triple \p TT, false otherwise. |
| bool shouldEmitConstantsToTextSection(const Triple &TT); |
| |
| /// \returns Integer value requested using \p F's \p Name attribute. |
| /// |
| /// \returns \p Default if attribute is not present. |
| /// |
| /// \returns \p Default and emits error if requested value cannot be converted |
| /// to integer. |
| int getIntegerAttribute(const Function &F, StringRef Name, int Default); |
| |
| /// \returns A pair of integer values requested using \p F's \p Name attribute |
| /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired |
| /// is false). |
| /// |
| /// \returns \p Default if attribute is not present. |
| /// |
| /// \returns \p Default and emits error if one of the requested values cannot be |
| /// converted to integer, or \p OnlyFirstRequired is false and "second" value is |
| /// not present. |
| std::pair<int, int> getIntegerPairAttribute(const Function &F, |
| StringRef Name, |
| std::pair<int, int> Default, |
| bool OnlyFirstRequired = false); |
| |
| /// Represents the counter values to wait for in an s_waitcnt instruction. |
| /// |
| /// Large values (including the maximum possible integer) can be used to |
| /// represent "don't care" waits. |
| struct Waitcnt { |
| unsigned VmCnt = ~0u; |
| unsigned ExpCnt = ~0u; |
| unsigned LgkmCnt = ~0u; |
| unsigned VsCnt = ~0u; |
| |
| Waitcnt() = default; |
| Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) |
| : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} |
| |
| static Waitcnt allZero(bool HasVscnt) { |
| return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); |
| } |
| static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } |
| |
| bool hasWait() const { |
| return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; |
| } |
| |
| bool hasWaitExceptVsCnt() const { |
| return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u; |
| } |
| |
| bool hasWaitVsCnt() const { |
| return VsCnt != ~0u; |
| } |
| |
| bool dominates(const Waitcnt &Other) const { |
| return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && |
| LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; |
| } |
| |
| Waitcnt combined(const Waitcnt &Other) const { |
| return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), |
| std::min(LgkmCnt, Other.LgkmCnt), |
| std::min(VsCnt, Other.VsCnt)); |
| } |
| }; |
| |
| /// \returns Vmcnt bit mask for given isa \p Version. |
| unsigned getVmcntBitMask(const IsaVersion &Version); |
| |
| /// \returns Expcnt bit mask for given isa \p Version. |
| unsigned getExpcntBitMask(const IsaVersion &Version); |
| |
| /// \returns Lgkmcnt bit mask for given isa \p Version. |
| unsigned getLgkmcntBitMask(const IsaVersion &Version); |
| |
| /// \returns Waitcnt bit mask for given isa \p Version. |
| unsigned getWaitcntBitMask(const IsaVersion &Version); |
| |
| /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. |
| unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); |
| |
| /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. |
| unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); |
| |
| /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. |
| unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); |
| |
| /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa |
| /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and |
| /// \p Lgkmcnt respectively. |
| /// |
| /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: |
| /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) |
| /// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) |
| /// \p Vmcnt = \p Waitcnt[15:10] (gfx11+) |
| /// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) |
| /// \p Expcnt = \p Waitcnt[2:0] (gfx11+) |
| /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) |
| /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) |
| /// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11+) |
| void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, |
| unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); |
| |
| Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); |
| |
| /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. |
| unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, |
| unsigned Vmcnt); |
| |
| /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. |
| unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, |
| unsigned Expcnt); |
| |
| /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. |
| unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, |
| unsigned Lgkmcnt); |
| |
| /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa |
| /// \p Version. |
| /// |
| /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: |
| /// Waitcnt[2:0] = \p Expcnt (gfx11+) |
| /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) |
| /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) |
| /// Waitcnt[6:4] = \p Expcnt (pre-gfx11) |
| /// Waitcnt[9:4] = \p Lgkmcnt (gfx11+) |
| /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) |
| /// Waitcnt[13:8] = \p Lgkmcnt (gfx10) |
| /// Waitcnt[15:10] = \p Vmcnt (gfx11+) |
| /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) |
| /// |
| /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given |
| /// isa \p Version. |
| unsigned encodeWaitcnt(const IsaVersion &Version, |
| unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); |
| |
| unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); |
| |
| namespace Hwreg { |
| |
| LLVM_READONLY |
| int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI); |
| |
| LLVM_READNONE |
| bool isValidHwreg(int64_t Id); |
| |
| LLVM_READNONE |
| bool isValidHwregOffset(int64_t Offset); |
| |
| LLVM_READNONE |
| bool isValidHwregWidth(int64_t Width); |
| |
| LLVM_READNONE |
| uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); |
| |
| LLVM_READNONE |
| StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); |
| |
| void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); |
| |
| } // namespace Hwreg |
| |
| namespace DepCtr { |
| |
| int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI); |
| int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, |
| const MCSubtargetInfo &STI); |
| bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, |
| const MCSubtargetInfo &STI); |
| bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, |
| bool &IsDefault, const MCSubtargetInfo &STI); |
| |
| } // namespace DepCtr |
| |
| namespace Exp { |
| |
| bool getTgtName(unsigned Id, StringRef &Name, int &Index); |
| |
| LLVM_READONLY |
| unsigned getTgtId(const StringRef Name); |
| |
| LLVM_READNONE |
| bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); |
| |
| } // namespace Exp |
| |
| namespace MTBUFFormat { |
| |
| LLVM_READNONE |
| int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); |
| |
| void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); |
| |
| int64_t getDfmt(const StringRef Name); |
| |
| StringRef getDfmtName(unsigned Id); |
| |
| int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); |
| |
| StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); |
| |
| bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); |
| |
| bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); |
| |
| int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI); |
| |
| StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI); |
| |
| bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI); |
| |
| int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, |
| const MCSubtargetInfo &STI); |
| |
| bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); |
| |
| unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); |
| |
| } // namespace MTBUFFormat |
| |
| namespace SendMsg { |
| |
| LLVM_READONLY |
| int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI); |
| |
| LLVM_READONLY |
| int64_t getMsgOpId(int64_t MsgId, const StringRef Name); |
| |
| LLVM_READNONE |
| StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI); |
| |
| LLVM_READNONE |
| StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); |
| |
| LLVM_READNONE |
| bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI); |
| |
| LLVM_READNONE |
| bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, |
| bool Strict = true); |
| |
| LLVM_READNONE |
| bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, |
| const MCSubtargetInfo &STI, bool Strict = true); |
| |
| LLVM_READNONE |
| bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI); |
| |
| LLVM_READNONE |
| bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); |
| |
| void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, |
| uint16_t &StreamId, const MCSubtargetInfo &STI); |
| |
| LLVM_READNONE |
| uint64_t encodeMsg(uint64_t MsgId, |
| uint64_t OpId, |
| uint64_t StreamId); |
| |
| } // namespace SendMsg |
| |
| |
| unsigned getInitialPSInputAddr(const Function &F); |
| |
| bool getHasColorExport(const Function &F); |
| |
| bool getHasDepthExport(const Function &F); |
| |
| LLVM_READNONE |
| bool isShader(CallingConv::ID CC); |
| |
| LLVM_READNONE |
| bool isGraphics(CallingConv::ID CC); |
| |
| LLVM_READNONE |
| bool isCompute(CallingConv::ID CC); |
| |
| LLVM_READNONE |
| bool isEntryFunctionCC(CallingConv::ID CC); |
| |
| // These functions are considered entrypoints into the current module, i.e. they |
| // are allowed to be called from outside the current module. This is different |
| // from isEntryFunctionCC, which is only true for functions that are entered by |
| // the hardware. Module entry points include all entry functions but also |
| // include functions that can be called from other functions inside or outside |
| // the current module. Module entry functions are allowed to allocate LDS. |
| LLVM_READNONE |
| bool isModuleEntryFunctionCC(CallingConv::ID CC); |
| |
| bool isKernelCC(const Function *Func); |
| |
| // FIXME: Remove this when calling conventions cleaned up |
| LLVM_READNONE |
| inline bool isKernel(CallingConv::ID CC) { |
| switch (CC) { |
| case CallingConv::AMDGPU_KERNEL: |
| case CallingConv::SPIR_KERNEL: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool hasXNACK(const MCSubtargetInfo &STI); |
| bool hasSRAMECC(const MCSubtargetInfo &STI); |
| bool hasMIMG_R128(const MCSubtargetInfo &STI); |
| bool hasA16(const MCSubtargetInfo &STI); |
| bool hasG16(const MCSubtargetInfo &STI); |
| bool hasPackedD16(const MCSubtargetInfo &STI); |
| |
| bool isSI(const MCSubtargetInfo &STI); |
| bool isCI(const MCSubtargetInfo &STI); |
| bool isVI(const MCSubtargetInfo &STI); |
| bool isGFX9(const MCSubtargetInfo &STI); |
| bool isGFX9_GFX10(const MCSubtargetInfo &STI); |
| bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI); |
| bool isGFX8Plus(const MCSubtargetInfo &STI); |
| bool isGFX9Plus(const MCSubtargetInfo &STI); |
| bool isGFX10(const MCSubtargetInfo &STI); |
| bool isGFX10Plus(const MCSubtargetInfo &STI); |
| bool isNotGFX10Plus(const MCSubtargetInfo &STI); |
| bool isGFX10Before1030(const MCSubtargetInfo &STI); |
| bool isGFX11(const MCSubtargetInfo &STI); |
| bool isGFX11Plus(const MCSubtargetInfo &STI); |
| bool isNotGFX11Plus(const MCSubtargetInfo &STI); |
| bool isGCN3Encoding(const MCSubtargetInfo &STI); |
| bool isGFX10_AEncoding(const MCSubtargetInfo &STI); |
| bool isGFX10_BEncoding(const MCSubtargetInfo &STI); |
| bool hasGFX10_3Insts(const MCSubtargetInfo &STI); |
| bool isGFX90A(const MCSubtargetInfo &STI); |
| bool isGFX940(const MCSubtargetInfo &STI); |
| bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); |
| bool hasMAIInsts(const MCSubtargetInfo &STI); |
| bool hasVOPD(const MCSubtargetInfo &STI); |
| int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); |
| |
| /// Is Reg - scalar register |
| bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); |
| |
| /// If \p Reg is a pseudo reg, return the correct hardware register given |
| /// \p STI otherwise return \p Reg. |
| unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); |
| |
| /// Convert hardware register \p Reg to a pseudo register |
| LLVM_READNONE |
| unsigned mc2PseudoReg(unsigned Reg); |
| |
| LLVM_READNONE |
| bool isInlineValue(unsigned Reg); |
| |
| /// Is this an AMDGPU specific source operand? These include registers, |
| /// inline constants, literals and mandatory literals (KImm). |
| bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); |
| |
| /// Is this a KImm operand? |
| bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo); |
| |
| /// Is this floating-point operand? |
| bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); |
| |
| /// Does this operand support only inlinable literals? |
| bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); |
| |
| /// Get the size in bits of a register from the register class \p RC. |
| unsigned getRegBitWidth(unsigned RCID); |
| |
| /// Get the size in bits of a register from the register class \p RC. |
| unsigned getRegBitWidth(const MCRegisterClass &RC); |
| |
| /// Get size of register operand |
| unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, |
| unsigned OpNo); |
| |
| LLVM_READNONE |
| inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { |
| switch (OpInfo.OperandType) { |
| case AMDGPU::OPERAND_REG_IMM_INT32: |
| case AMDGPU::OPERAND_REG_IMM_FP32: |
| case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: |
| case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
| case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
| case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
| case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
| case AMDGPU::OPERAND_REG_IMM_V2INT32: |
| case AMDGPU::OPERAND_REG_IMM_V2FP32: |
| case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: |
| case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: |
| case AMDGPU::OPERAND_KIMM32: |
| case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 |
| return 4; |
| |
| case AMDGPU::OPERAND_REG_IMM_INT64: |
| case AMDGPU::OPERAND_REG_IMM_FP64: |
| case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
| case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
| case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
| return 8; |
| |
| case AMDGPU::OPERAND_REG_IMM_INT16: |
| case AMDGPU::OPERAND_REG_IMM_FP16: |
| case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: |
| case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
| case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
| case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
| case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
| case AMDGPU::OPERAND_REG_INLINE_AC_INT16: |
| case AMDGPU::OPERAND_REG_INLINE_AC_FP16: |
| case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: |
| case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: |
| case AMDGPU::OPERAND_REG_IMM_V2INT16: |
| case AMDGPU::OPERAND_REG_IMM_V2FP16: |
| return 2; |
| |
| default: |
| llvm_unreachable("unhandled operand type"); |
| } |
| } |
| |
| LLVM_READNONE |
| inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { |
| return getOperandSize(Desc.operands()[OpNo]); |
| } |
| |
| /// Is this literal inlinable, and not one of the values intended for floating |
| /// point values. |
| LLVM_READNONE |
| inline bool isInlinableIntLiteral(int64_t Literal) { |
| return Literal >= -16 && Literal <= 64; |
| } |
| |
| /// Is this literal inlinable |
| LLVM_READNONE |
| bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); |
| |
| LLVM_READNONE |
| bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); |
| |
| LLVM_READNONE |
| bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); |
| |
| LLVM_READNONE |
| bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); |
| |
| LLVM_READNONE |
| bool isInlinableIntLiteralV216(int32_t Literal); |
| |
| LLVM_READNONE |
| bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); |
| |
| bool isArgPassedInSGPR(const Argument *Arg); |
| |
| LLVM_READONLY |
| bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, |
| int64_t EncodedOffset); |
| |
| LLVM_READONLY |
| bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, |
| int64_t EncodedOffset, |
| bool IsBuffer); |
| |
| /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate |
| /// offsets. |
| uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); |
| |
| /// \returns The encoding that will be used for \p ByteOffset in the |
| /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10 |
| /// S_LOAD instructions have a signed offset, on other subtargets it is |
| /// unsigned. S_BUFFER has an unsigned offset for all subtargets. |
| std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, |
| int64_t ByteOffset, bool IsBuffer); |
| |
| /// \return The encoding that can be used for a 32-bit literal offset in an SMRD |
| /// instruction. This is only useful on CI.s |
| std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, |
| int64_t ByteOffset); |
| |
| /// For FLAT segment the offset must be positive; |
| /// MSB is ignored and forced to zero. |
| /// |
| /// \return The number of bits available for the signed offset field in flat |
| /// instructions. Note that some forms of the instruction disallow negative |
| /// offsets. |
| unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST); |
| |
| /// \returns true if this offset is small enough to fit in the SMRD |
| /// offset field. \p ByteOffset should be the offset in bytes and |
| /// not the encoded offset. |
| bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); |
| |
| bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, |
| const GCNSubtarget *Subtarget, |
| Align Alignment = Align(4)); |
| |
| LLVM_READNONE |
| inline bool isLegal64BitDPPControl(unsigned DC) { |
| return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; |
| } |
| |
| /// \returns true if the intrinsic is divergent |
| bool isIntrinsicSourceOfDivergence(unsigned IntrID); |
| |
| // Track defaults for fields in the MODE register. |
| struct SIModeRegisterDefaults { |
| /// Floating point opcodes that support exception flag gathering quiet and |
| /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 |
| /// become IEEE 754- 2008 compliant due to signaling NaN propagation and |
| /// quieting. |
| bool IEEE : 1; |
| |
| /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, |
| /// clamp NaN to zero; otherwise, pass NaN through. |
| bool DX10Clamp : 1; |
| |
| /// If this is set, neither input or output denormals are flushed for most f32 |
| /// instructions. |
| DenormalMode FP32Denormals; |
| |
| /// If this is set, neither input or output denormals are flushed for both f64 |
| /// and f16/v2f16 instructions. |
| DenormalMode FP64FP16Denormals; |
| |
| SIModeRegisterDefaults() : |
| IEEE(true), |
| DX10Clamp(true), |
| FP32Denormals(DenormalMode::getIEEE()), |
| FP64FP16Denormals(DenormalMode::getIEEE()) {} |
| |
| SIModeRegisterDefaults(const Function &F); |
| |
| static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { |
| SIModeRegisterDefaults Mode; |
| Mode.IEEE = !AMDGPU::isShader(CC); |
| return Mode; |
| } |
| |
| bool operator ==(const SIModeRegisterDefaults Other) const { |
| return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && |
| FP32Denormals == Other.FP32Denormals && |
| FP64FP16Denormals == Other.FP64FP16Denormals; |
| } |
| |
| bool allFP32Denormals() const { |
| return FP32Denormals == DenormalMode::getIEEE(); |
| } |
| |
| bool allFP64FP16Denormals() const { |
| return FP64FP16Denormals == DenormalMode::getIEEE(); |
| } |
| |
| /// Get the encoding value for the FP_DENORM bits of the mode register for the |
| /// FP32 denormal mode. |
| uint32_t fpDenormModeSPValue() const { |
| if (FP32Denormals == DenormalMode::getPreserveSign()) |
| return FP_DENORM_FLUSH_IN_FLUSH_OUT; |
| if (FP32Denormals.Output == DenormalMode::PreserveSign) |
| return FP_DENORM_FLUSH_OUT; |
| if (FP32Denormals.Input == DenormalMode::PreserveSign) |
| return FP_DENORM_FLUSH_IN; |
| return FP_DENORM_FLUSH_NONE; |
| } |
| |
| /// Get the encoding value for the FP_DENORM bits of the mode register for the |
| /// FP64/FP16 denormal mode. |
| uint32_t fpDenormModeDPValue() const { |
| if (FP64FP16Denormals == DenormalMode::getPreserveSign()) |
| return FP_DENORM_FLUSH_IN_FLUSH_OUT; |
| if (FP64FP16Denormals.Output == DenormalMode::PreserveSign) |
| return FP_DENORM_FLUSH_OUT; |
| if (FP64FP16Denormals.Input == DenormalMode::PreserveSign) |
| return FP_DENORM_FLUSH_IN; |
| return FP_DENORM_FLUSH_NONE; |
| } |
| |
| /// Returns true if a flag is compatible if it's enabled in the callee, but |
| /// disabled in the caller. |
| static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { |
| return CallerMode == CalleeMode || (!CallerMode && CalleeMode); |
| } |
| |
| // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should |
| // be able to override. |
| bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { |
| if (DX10Clamp != CalleeMode.DX10Clamp) |
| return false; |
| if (IEEE != CalleeMode.IEEE) |
| return false; |
| |
| // Allow inlining denormals enabled into denormals flushed functions. |
| return oneWayCompatible(FP64FP16Denormals.Input != |
| DenormalMode::PreserveSign, |
| CalleeMode.FP64FP16Denormals.Input != |
| DenormalMode::PreserveSign) && |
| oneWayCompatible(FP64FP16Denormals.Output != |
| DenormalMode::PreserveSign, |
| CalleeMode.FP64FP16Denormals.Output != |
| DenormalMode::PreserveSign) && |
| oneWayCompatible(FP32Denormals.Input != DenormalMode::PreserveSign, |
| CalleeMode.FP32Denormals.Input != |
| DenormalMode::PreserveSign) && |
| oneWayCompatible(FP32Denormals.Output != DenormalMode::PreserveSign, |
| CalleeMode.FP32Denormals.Output != |
| DenormalMode::PreserveSign); |
| } |
| }; |
| |
| } // end namespace AMDGPU |
| |
| raw_ostream &operator<<(raw_ostream &OS, |
| const AMDGPU::IsaInfo::TargetIDSetting S); |
| |
| } // end namespace llvm |
| |
| #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H |