| //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass does combining of machine instructions at the generic MI level, |
| // after register banks are known. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPU.h" |
| #include "AMDGPULegalizerInfo.h" |
| #include "AMDGPURegisterBankInfo.h" |
| #include "GCNSubtarget.h" |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| #include "SIMachineFunctionInfo.h" |
| #include "llvm/CodeGen/GlobalISel/Combiner.h" |
| #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" |
| #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" |
| #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" |
| #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
| #include "llvm/CodeGen/MachineDominators.h" |
| #include "llvm/CodeGen/TargetPassConfig.h" |
| #include "llvm/IR/IntrinsicsAMDGPU.h" |
| #include "llvm/Target/TargetMachine.h" |
| #define DEBUG_TYPE "amdgpu-regbank-combiner" |
| |
| using namespace llvm; |
| using namespace MIPatternMatch; |
| |
| class AMDGPURegBankCombinerHelper { |
| protected: |
| MachineIRBuilder &B; |
| MachineFunction &MF; |
| MachineRegisterInfo &MRI; |
| const GCNSubtarget &Subtarget; |
| const RegisterBankInfo &RBI; |
| const TargetRegisterInfo &TRI; |
| const SIInstrInfo &TII; |
| CombinerHelper &Helper; |
| |
| public: |
| AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) |
| : B(B), MF(B.getMF()), MRI(*B.getMRI()), |
| Subtarget(MF.getSubtarget<GCNSubtarget>()), |
| RBI(*Subtarget.getRegBankInfo()), TRI(*Subtarget.getRegisterInfo()), |
| TII(*Subtarget.getInstrInfo()), Helper(Helper){}; |
| |
| bool isVgprRegBank(Register Reg); |
| Register getAsVgpr(Register Reg); |
| |
| struct MinMaxMedOpc { |
| unsigned Min, Max, Med; |
| }; |
| |
| struct Med3MatchInfo { |
| unsigned Opc; |
| Register Val0, Val1, Val2; |
| }; |
| |
| MinMaxMedOpc getMinMaxPair(unsigned Opc); |
| |
| template <class m_Cst, typename CstTy> |
| bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, |
| Register &Val, CstTy &K0, CstTy &K1); |
| |
| bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); |
| bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); |
| bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg); |
| bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg); |
| void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo); |
| void applyClamp(MachineInstr &MI, Register &Reg); |
| |
| private: |
| AMDGPU::SIModeRegisterDefaults getMode(); |
| bool getIEEE(); |
| bool getDX10Clamp(); |
| bool isFminnumIeee(const MachineInstr &MI); |
| bool isFCst(MachineInstr *MI); |
| bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1); |
| }; |
| |
| bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) { |
| return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID; |
| } |
| |
| Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) { |
| if (isVgprRegBank(Reg)) |
| return Reg; |
| |
| // Search for existing copy of Reg to vgpr. |
| for (MachineInstr &Use : MRI.use_instructions(Reg)) { |
| Register Def = Use.getOperand(0).getReg(); |
| if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def)) |
| return Def; |
| } |
| |
| // Copy Reg to vgpr. |
| Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0); |
| MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID)); |
| return VgprReg; |
| } |
| |
| AMDGPURegBankCombinerHelper::MinMaxMedOpc |
| AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) { |
| switch (Opc) { |
| default: |
| llvm_unreachable("Unsupported opcode"); |
| case AMDGPU::G_SMAX: |
| case AMDGPU::G_SMIN: |
| return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3}; |
| case AMDGPU::G_UMAX: |
| case AMDGPU::G_UMIN: |
| return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3}; |
| case AMDGPU::G_FMAXNUM: |
| case AMDGPU::G_FMINNUM: |
| return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3}; |
| case AMDGPU::G_FMAXNUM_IEEE: |
| case AMDGPU::G_FMINNUM_IEEE: |
| return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE, |
| AMDGPU::G_AMDGPU_FMED3}; |
| } |
| } |
| |
| template <class m_Cst, typename CstTy> |
| bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI, |
| MachineRegisterInfo &MRI, |
| MinMaxMedOpc MMMOpc, Register &Val, |
| CstTy &K0, CstTy &K1) { |
| // 4 operand commutes of: min(max(Val, K0), K1). |
| // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)). |
| // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0). |
| // 4 operand commutes of: max(min(Val, K1), K0). |
| // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)). |
| // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1). |
| return mi_match( |
| MI, MRI, |
| m_any_of( |
| m_CommutativeBinOp( |
| MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)), |
| m_Cst(K1)), |
| m_CommutativeBinOp( |
| MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)), |
| m_Cst(K0)))); |
| } |
| |
| bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3( |
| MachineInstr &MI, Med3MatchInfo &MatchInfo) { |
| Register Dst = MI.getOperand(0).getReg(); |
| if (!isVgprRegBank(Dst)) |
| return false; |
| |
| // med3 for i16 is only available on gfx9+, and not available for v2i16. |
| LLT Ty = MRI.getType(Dst); |
| if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) && |
| Ty != LLT::scalar(32)) |
| return false; |
| |
| MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode()); |
| Register Val; |
| std::optional<ValueAndVReg> K0, K1; |
| // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1. |
| if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1)) |
| return false; |
| |
| if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value)) |
| return false; |
| if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value)) |
| return false; |
| |
| MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg}; |
| return true; |
| } |
| |
| // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) |
| // ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K |
| // ieee = false : min/max(NaN, K) = K |
| // clamp(NaN) = dx10_clamp ? 0.0 : NaN |
| // Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input. |
| // Other operand commutes (see matchMed) give same result since min and max are |
| // commutative. |
| |
| // Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1 |
| // with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0. |
| // Val = SNaN only for ieee = true |
| // fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1 |
| // min(max(SNaN, K0), K1) = min(QNaN, K1) = K1 |
| // max(min(SNaN, K1), K0) = max(K1, K0) = K1 |
| // Val = NaN,ieee = false or Val = QNaN,ieee = true |
| // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0 |
| // min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true) |
| // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0 |
| bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3( |
| MachineInstr &MI, Med3MatchInfo &MatchInfo) { |
| Register Dst = MI.getOperand(0).getReg(); |
| LLT Ty = MRI.getType(Dst); |
| |
| // med3 for f16 is only available on gfx9+, and not available for v2f16. |
| if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) && |
| Ty != LLT::scalar(32)) |
| return false; |
| |
| auto OpcodeTriple = getMinMaxPair(MI.getOpcode()); |
| |
| Register Val; |
| std::optional<FPValueAndVReg> K0, K1; |
| // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1. |
| if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1)) |
| return false; |
| |
| if (K0->Value > K1->Value) |
| return false; |
| |
| // For IEEE=false perform combine only when it's safe to assume that there are |
| // no NaN inputs. Most often MI is marked with nnan fast math flag. |
| // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to |
| // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner |
| // nodes(max/min) have same behavior when one input is NaN and other isn't. |
| // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN, |
| // also post-legalizer inputs to min/max are fcanonicalized (never SNaN). |
| if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) { |
| // Don't fold single use constant that can't be inlined. |
| if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) && |
| (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) { |
| MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg}; |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| bool AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp(MachineInstr &MI, |
| Register &Reg) { |
| // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16). |
| auto OpcodeTriple = getMinMaxPair(MI.getOpcode()); |
| Register Val; |
| std::optional<FPValueAndVReg> K0, K1; |
| // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). |
| if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1)) |
| return false; |
| |
| if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0)) |
| return false; |
| |
| // For IEEE=false perform combine only when it's safe to assume that there are |
| // no NaN inputs. Most often MI is marked with nnan fast math flag. |
| // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates |
| // to 0.0 requires dx10_clamp = true. |
| if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) && |
| isKnownNeverSNaN(Val, MRI)) || |
| isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) { |
| Reg = Val; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| // Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true. |
| // Val = SNaN only for ieee = true. It is important which operand is NaN. |
| // min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0 |
| // min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0 |
| // min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN |
| // Val = NaN,ieee = false or Val = QNaN,ieee = true |
| // min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0 |
| // min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0 |
| // min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0 |
| bool AMDGPURegBankCombinerHelper::matchFPMed3ToClamp(MachineInstr &MI, |
| Register &Reg) { |
| if (MI.getIntrinsicID() != Intrinsic::amdgcn_fmed3) |
| return false; |
| |
| // In llvm-ir, clamp is often represented as an intrinsic call to |
| // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders. |
| MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI); |
| MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI); |
| MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI); |
| |
| if (isFCst(Src0) && !isFCst(Src1)) |
| std::swap(Src0, Src1); |
| if (isFCst(Src1) && !isFCst(Src2)) |
| std::swap(Src1, Src2); |
| if (isFCst(Src0) && !isFCst(Src1)) |
| std::swap(Src0, Src1); |
| if (!isClampZeroToOne(Src1, Src2)) |
| return false; |
| |
| Register Val = Src0->getOperand(0).getReg(); |
| |
| auto isOp3Zero = [&]() { |
| MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI); |
| if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT) |
| return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0); |
| return false; |
| }; |
| // For IEEE=false perform combine only when it's safe to assume that there are |
| // no NaN inputs. Most often MI is marked with nnan fast math flag. |
| // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold |
| // when Val could be QNaN. If Val can also be SNaN third input should be 0.0. |
| if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) || |
| (getIEEE() && getDX10Clamp() && |
| (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) { |
| Reg = Val; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| void AMDGPURegBankCombinerHelper::applyClamp(MachineInstr &MI, Register &Reg) { |
| B.setInstrAndDebugLoc(MI); |
| B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg}, |
| MI.getFlags()); |
| MI.eraseFromParent(); |
| } |
| |
| void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI, |
| Med3MatchInfo &MatchInfo) { |
| B.setInstrAndDebugLoc(MI); |
| B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)}, |
| {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1), |
| getAsVgpr(MatchInfo.Val2)}, |
| MI.getFlags()); |
| MI.eraseFromParent(); |
| } |
| |
| AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() { |
| return MF.getInfo<SIMachineFunctionInfo>()->getMode(); |
| } |
| |
| bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; } |
| |
| bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; } |
| |
| bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) { |
| return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE; |
| } |
| |
| bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) { |
| return MI->getOpcode() == AMDGPU::G_FCONSTANT; |
| } |
| |
| bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0, |
| MachineInstr *K1) { |
| if (isFCst(K0) && isFCst(K1)) { |
| const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm(); |
| const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm(); |
| return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) || |
| (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0)); |
| } |
| return false; |
| } |
| |
| class AMDGPURegBankCombinerHelperState { |
| protected: |
| CombinerHelper &Helper; |
| AMDGPURegBankCombinerHelper &RegBankHelper; |
| |
| public: |
| AMDGPURegBankCombinerHelperState(CombinerHelper &Helper, |
| AMDGPURegBankCombinerHelper &RegBankHelper) |
| : Helper(Helper), RegBankHelper(RegBankHelper) {} |
| }; |
| |
| #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS |
| #include "AMDGPUGenRegBankGICombiner.inc" |
| #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS |
| |
| namespace { |
| #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H |
| #include "AMDGPUGenRegBankGICombiner.inc" |
| #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H |
| |
| class AMDGPURegBankCombinerInfo final : public CombinerInfo { |
| GISelKnownBits *KB; |
| MachineDominatorTree *MDT; |
| |
| public: |
| AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg; |
| |
| AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, |
| const AMDGPULegalizerInfo *LI, |
| GISelKnownBits *KB, MachineDominatorTree *MDT) |
| : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, |
| /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), |
| KB(KB), MDT(MDT) { |
| if (!GeneratedRuleCfg.parseCommandLineOption()) |
| report_fatal_error("Invalid rule identifier"); |
| } |
| |
| bool combine(GISelChangeObserver &Observer, MachineInstr &MI, |
| MachineIRBuilder &B) const override; |
| }; |
| |
| bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer, |
| MachineInstr &MI, |
| MachineIRBuilder &B) const { |
| CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT); |
| AMDGPURegBankCombinerHelper RegBankHelper(B, Helper); |
| AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper, |
| RegBankHelper); |
| |
| if (Generated.tryCombineAll(Observer, MI, B)) |
| return true; |
| |
| return false; |
| } |
| |
| #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP |
| #include "AMDGPUGenRegBankGICombiner.inc" |
| #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP |
| |
| // Pass boilerplate |
| // ================ |
| |
| class AMDGPURegBankCombiner : public MachineFunctionPass { |
| public: |
| static char ID; |
| |
| AMDGPURegBankCombiner(bool IsOptNone = false); |
| |
| StringRef getPassName() const override { |
| return "AMDGPURegBankCombiner"; |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override; |
| private: |
| bool IsOptNone; |
| }; |
| } // end anonymous namespace |
| |
| void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const { |
| AU.addRequired<TargetPassConfig>(); |
| AU.setPreservesCFG(); |
| getSelectionDAGFallbackAnalysisUsage(AU); |
| AU.addRequired<GISelKnownBitsAnalysis>(); |
| AU.addPreserved<GISelKnownBitsAnalysis>(); |
| if (!IsOptNone) { |
| AU.addRequired<MachineDominatorTree>(); |
| AU.addPreserved<MachineDominatorTree>(); |
| } |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone) |
| : MachineFunctionPass(ID), IsOptNone(IsOptNone) { |
| initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) { |
| if (MF.getProperties().hasProperty( |
| MachineFunctionProperties::Property::FailedISel)) |
| return false; |
| auto *TPC = &getAnalysis<TargetPassConfig>(); |
| const Function &F = MF.getFunction(); |
| bool EnableOpt = |
| MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); |
| |
| const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| const AMDGPULegalizerInfo *LI |
| = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); |
| |
| GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); |
| MachineDominatorTree *MDT = |
| IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); |
| AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), |
| F.hasMinSize(), LI, KB, MDT); |
| Combiner C(PCInfo, TPC); |
| return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); |
| } |
| |
| char AMDGPURegBankCombiner::ID = 0; |
| INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE, |
| "Combine AMDGPU machine instrs after regbankselect", |
| false, false) |
| INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) |
| INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) |
| INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE, |
| "Combine AMDGPU machine instrs after regbankselect", false, |
| false) |
| |
| namespace llvm { |
| FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) { |
| return new AMDGPURegBankCombiner(IsOptNone); |
| } |
| } // end namespace llvm |