third_party/llvm-16.0/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp - SwiftShader - Git at Google

 //===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file This file contains the AMDGPU DAG scheduling
 /// mutation to pair VOPD instructions back to back. It also contains
 //  subroutines useful in the creation of VOPD instructions
 //
 //===----------------------------------------------------------------------===//

 #include "GCNVOPDUtils.h"
 #include "AMDGPUSubtarget.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MacroFusion.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/MC/MCInst.h"

 using namespace llvm;

 #define DEBUG_TYPE "gcn-vopd-utils"

 bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
                                    const MachineInstr &FirstMI,
                                    const MachineInstr &SecondMI) {
   namespace VOPD = AMDGPU::VOPD;

   const MachineFunction *MF = FirstMI.getMF();
   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
   const MachineRegisterInfo &MRI = MF->getRegInfo();
   // Literals also count against scalar bus limit
   SmallVector<const MachineOperand *> UniqueLiterals;
   auto addLiteral = [&](const MachineOperand &Op) {
     for (auto &Literal : UniqueLiterals) {
       if (Literal->isIdenticalTo(Op))
         return;
     }
     UniqueLiterals.push_back(&Op);
   };
   SmallVector<Register> UniqueScalarRegs;
   assert([&]() -> bool {
     for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
          MII != FirstMI.getParent()->instr_end(); ++MII) {
       if (&*MII == &SecondMI)
         return true;
     }
     return false;
   }() && "Expected FirstMI to precede SecondMI");
   // Cannot pair dependent instructions
   for (const auto &Use : SecondMI.uses())
     if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg()))
       return false;

   auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
     const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
     const MachineOperand &Operand = MI.getOperand(OperandIdx);
     if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
       return Operand.getReg();
     return Register();
   };

   auto InstInfo =
       AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());

   for (auto CompIdx : VOPD::COMPONENTS) {
     const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;

     const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
     if (Src0.isReg()) {
       if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
         if (!is_contained(UniqueScalarRegs, Src0.getReg()))
           UniqueScalarRegs.push_back(Src0.getReg());
       }
     } else {
       if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
         addLiteral(Src0);
     }

     if (InstInfo[CompIdx].hasMandatoryLiteral()) {
       auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
       addLiteral(MI.getOperand(CompOprIdx));
     }
     if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
       UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
   }

   if (UniqueLiterals.size() > 1)
     return false;
   if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
     return false;
   if (InstInfo.hasInvalidOperand(getVRegIdx))
     return false;

   LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
                     << "\n\tY: " << SecondMI << "\n");
   return true;
 }

 /// Check if the instr pair, FirstMI and SecondMI, should be scheduled
 /// together. Given SecondMI, when FirstMI is unspecified, then check if
 /// SecondMI may be part of a fused pair at all.
 static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
                                        const TargetSubtargetInfo &TSI,
                                        const MachineInstr *FirstMI,
                                        const MachineInstr &SecondMI) {
   const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
   unsigned Opc2 = SecondMI.getOpcode();
   auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);

   // One instruction case
   if (!FirstMI)
     return SecondCanBeVOPD.Y;

   unsigned Opc = FirstMI->getOpcode();
   auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);

   if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
         (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
     return false;

   return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
 }

 namespace {
 /// Adapts design from MacroFusion
 /// Puts valid candidate instructions back-to-back so they can easily
 /// be turned into VOPD instructions
 /// Greedily pairs instruction candidates. O(n^2) algorithm.
 struct VOPDPairingMutation : ScheduleDAGMutation {
   ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer

   VOPDPairingMutation(
       ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
       : shouldScheduleAdjacent(shouldScheduleAdjacent) {}

   void apply(ScheduleDAGInstrs *DAG) override {
     const TargetInstrInfo &TII = *DAG->TII;
     const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
     if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
       LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
       return;
     }

     std::vector<SUnit>::iterator ISUI, JSUI;
     for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
       const MachineInstr *IMI = ISUI->getInstr();
       if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
         continue;
       if (!hasLessThanNumFused(*ISUI, 2))
         continue;

       for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
         if (JSUI->isBoundaryNode())
           continue;
         const MachineInstr *JMI = JSUI->getInstr();
         if (!hasLessThanNumFused(*JSUI, 2) ||
             !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
           continue;
         if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
           break;
       }
     }
     LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
   }
 };
 } // namespace

 std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
   return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
 }
	//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file This file contains the AMDGPU DAG scheduling
	/// mutation to pair VOPD instructions back to back. It also contains
	// subroutines useful in the creation of VOPD instructions
	//
	//===----------------------------------------------------------------------===//

	#include "GCNVOPDUtils.h"
	#include "AMDGPUSubtarget.h"
	#include "GCNSubtarget.h"
	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
	#include "SIInstrInfo.h"
	#include "Utils/AMDGPUBaseInfo.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/MacroFusion.h"
	#include "llvm/CodeGen/ScheduleDAG.h"
	#include "llvm/CodeGen/ScheduleDAGMutation.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/MC/MCInst.h"

	using namespace llvm;

	#define DEBUG_TYPE "gcn-vopd-utils"

	bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
	const MachineInstr &FirstMI,
	const MachineInstr &SecondMI) {
	namespace VOPD = AMDGPU::VOPD;

	const MachineFunction *MF = FirstMI.getMF();
	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
	const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
	const MachineRegisterInfo &MRI = MF->getRegInfo();
	// Literals also count against scalar bus limit
	SmallVector<const MachineOperand *> UniqueLiterals;
	auto addLiteral = [&](const MachineOperand &Op) {
	for (auto &Literal : UniqueLiterals) {
	if (Literal->isIdenticalTo(Op))
	return;
	}
	UniqueLiterals.push_back(&Op);
	};
	SmallVector<Register> UniqueScalarRegs;
	assert([&]() -> bool {
	for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
	MII != FirstMI.getParent()->instr_end(); ++MII) {
	if (&*MII == &SecondMI)
	return true;
	}
	return false;
	}() && "Expected FirstMI to precede SecondMI");
	// Cannot pair dependent instructions
	for (const auto &Use : SecondMI.uses())
	if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg()))
	return false;

	auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
	const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
	const MachineOperand &Operand = MI.getOperand(OperandIdx);
	if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
	return Operand.getReg();
	return Register();
	};

	auto InstInfo =
	AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());

	for (auto CompIdx : VOPD::COMPONENTS) {
	const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;

	const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
	if (Src0.isReg()) {
	if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
	if (!is_contained(UniqueScalarRegs, Src0.getReg()))
	UniqueScalarRegs.push_back(Src0.getReg());
	}
	} else {
	if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
	addLiteral(Src0);
	}

	if (InstInfo[CompIdx].hasMandatoryLiteral()) {
	auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
	addLiteral(MI.getOperand(CompOprIdx));
	}
	if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
	UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
	}

	if (UniqueLiterals.size() > 1)
	return false;
	if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
	return false;
	if (InstInfo.hasInvalidOperand(getVRegIdx))
	return false;

	LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
	<< "\n\tY: " << SecondMI << "\n");
	return true;
	}

	/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
	/// together. Given SecondMI, when FirstMI is unspecified, then check if
	/// SecondMI may be part of a fused pair at all.
	static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
	const TargetSubtargetInfo &TSI,
	const MachineInstr *FirstMI,
	const MachineInstr &SecondMI) {
	const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
	unsigned Opc2 = SecondMI.getOpcode();
	auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);

	// One instruction case
	if (!FirstMI)
	return SecondCanBeVOPD.Y;

	unsigned Opc = FirstMI->getOpcode();
	auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);

	if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) \|\|
	(FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
	return false;

	return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
	}

	namespace {
	/// Adapts design from MacroFusion
	/// Puts valid candidate instructions back-to-back so they can easily
	/// be turned into VOPD instructions
	/// Greedily pairs instruction candidates. O(n^2) algorithm.
	struct VOPDPairingMutation : ScheduleDAGMutation {
	ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer

	VOPDPairingMutation(
	ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
	: shouldScheduleAdjacent(shouldScheduleAdjacent) {}

	void apply(ScheduleDAGInstrs *DAG) override {
	const TargetInstrInfo &TII = *DAG->TII;
	const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
	if (!AMDGPU::hasVOPD(ST) \|\| !ST.isWave32()) {
	LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
	return;
	}

	std::vector<SUnit>::iterator ISUI, JSUI;
	for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
	const MachineInstr *IMI = ISUI->getInstr();
	if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
	continue;
	if (!hasLessThanNumFused(*ISUI, 2))
	continue;

	for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
	if (JSUI->isBoundaryNode())
	continue;
	const MachineInstr *JMI = JSUI->getInstr();
	if (!hasLessThanNumFused(*JSUI, 2) \|\|
	!shouldScheduleAdjacent(TII, ST, IMI, *JMI))
	continue;
	if (fuseInstructionPair(DAG, ISUI, *JSUI))
	break;
	}
	}
	LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
	}
	};
	} // namespace

	std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
	return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
	}