third_party/llvm-16.0/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp - SwiftShader - Git at Google

 //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// Pass to temporarily raise the wave priority beginning the start of
 /// the shader function until its last VMEM instructions to allow younger
 /// waves to issue their VMEM instructions as well.
 //
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Allocator.h"

 using namespace llvm;

 #define DEBUG_TYPE "amdgpu-set-wave-priority"

 static cl::opt<unsigned> DefaultVALUInstsThreshold(
     "amdgpu-set-wave-priority-valu-insts-threshold",
     cl::desc("VALU instruction count threshold for adjusting wave priority"),
     cl::init(100), cl::Hidden);

 namespace {

 struct MBBInfo {
   MBBInfo() = default;
   unsigned NumVALUInstsAtStart = 0;
   bool MayReachVMEMLoad = false;
   MachineInstr *LastVMEMLoad = nullptr;
 };

 using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;

 class AMDGPUSetWavePriority : public MachineFunctionPass {
 public:
   static char ID;

   AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}

   StringRef getPassName() const override { return "Set wave priority"; }

   bool runOnMachineFunction(MachineFunction &MF) override;

 private:
   MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I,
                                unsigned priority) const;

   const SIInstrInfo *TII;
 };

 } // End anonymous namespace.

 INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
                 false)

 char AMDGPUSetWavePriority::ID = 0;

 FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
   return new AMDGPUSetWavePriority();
 }

 MachineInstr *
 AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator I,
                                       unsigned priority) const {
   return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))
       .addImm(priority);
 }

 // Checks that for every predecessor Pred that can reach a VMEM load,
 // none of Pred's successors can reach a VMEM load.
 static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,
                                                    MBBInfoSet &MBBInfos) {
   for (const MachineBasicBlock *Pred : MBB.predecessors()) {
     if (!MBBInfos[Pred].MayReachVMEMLoad)
       continue;
     for (const MachineBasicBlock *Succ : Pred->successors()) {
       if (MBBInfos[Succ].MayReachVMEMLoad)
         return false;
     }
   }
   return true;
 }

 static bool isVMEMLoad(const MachineInstr &MI) {
   return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
 }

 bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
   const unsigned HighPriority = 3;
   const unsigned LowPriority = 0;

   Function &F = MF.getFunction();
   if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
     return false;

   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   TII = ST.getInstrInfo();

   unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
   Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold");
   if (A.isValid())
     A.getValueAsString().getAsInteger(0, VALUInstsThreshold);

   // Find VMEM loads that may be executed before long-enough sequences of
   // VALU instructions. We currently assume that backedges/loops, branch
   // probabilities and other details can be ignored, so we essentially
   // determine the largest number of VALU instructions along every
   // possible path from the start of the function that may potentially be
   // executed provided no backedge is ever taken.
   MBBInfoSet MBBInfos;
   for (MachineBasicBlock *MBB : post_order(&MF)) {
     bool AtStart = true;
     unsigned MaxNumVALUInstsInMiddle = 0;
     unsigned NumVALUInstsAtEnd = 0;
     for (MachineInstr &MI : *MBB) {
       if (isVMEMLoad(MI)) {
         AtStart = false;
         MBBInfo &Info = MBBInfos[MBB];
         Info.NumVALUInstsAtStart = 0;
         MaxNumVALUInstsInMiddle = 0;
         NumVALUInstsAtEnd = 0;
         Info.LastVMEMLoad = &MI;
       } else if (SIInstrInfo::isDS(MI)) {
         AtStart = false;
         MaxNumVALUInstsInMiddle =
             std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
         NumVALUInstsAtEnd = 0;
       } else if (SIInstrInfo::isVALU(MI)) {
         if (AtStart)
           ++MBBInfos[MBB].NumVALUInstsAtStart;
         ++NumVALUInstsAtEnd;
       }
     }

     bool SuccsMayReachVMEMLoad = false;
     unsigned NumFollowingVALUInsts = 0;
     for (const MachineBasicBlock *Succ : MBB->successors()) {
       SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad;
       NumFollowingVALUInsts =
           std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart);
     }
     MBBInfo &Info = MBBInfos[MBB];
     if (AtStart)
       Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
     NumVALUInstsAtEnd += NumFollowingVALUInsts;

     unsigned MaxNumVALUInsts =
         std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
     Info.MayReachVMEMLoad =
         SuccsMayReachVMEMLoad ||
         (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
   }

   MachineBasicBlock &Entry = MF.front();
   if (!MBBInfos[&Entry].MayReachVMEMLoad)
     return false;

   // Raise the priority at the beginning of the shader.
   MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
   while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())
     ++I;
   BuildSetprioMI(Entry, I, HighPriority);

   // Lower the priority on edges where control leaves blocks from which
   // the VMEM loads are reachable.
   SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
   for (MachineBasicBlock &MBB : MF) {
     if (MBBInfos[&MBB].MayReachVMEMLoad) {
       if (MBB.succ_empty())
         PriorityLoweringBlocks.insert(&MBB);
       continue;
     }

     if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {
       for (MachineBasicBlock *Pred : MBB.predecessors()) {
         if (MBBInfos[Pred].MayReachVMEMLoad)
           PriorityLoweringBlocks.insert(Pred);
       }
       continue;
     }

     // Where lowering the priority in predecessors is not possible, the
     // block receiving control either was not part of a loop in the first
     // place or the loop simplification/canonicalization pass should have
     // already tried to split the edge and insert a preheader, and if for
     // whatever reason it failed to do so, then this leaves us with the
     // only option of lowering the priority within the loop.
     PriorityLoweringBlocks.insert(&MBB);
   }

   for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
     BuildSetprioMI(
         *MBB,
         MBBInfos[MBB].LastVMEMLoad
             ? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad))
             : MBB->begin(),
         LowPriority);
   }

   return true;
 }
	//===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// Pass to temporarily raise the wave priority beginning the start of
	/// the shader function until its last VMEM instructions to allow younger
	/// waves to issue their VMEM instructions as well.
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPU.h"
	#include "GCNSubtarget.h"
	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
	#include "SIInstrInfo.h"
	#include "llvm/ADT/PostOrderIterator.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Support/Allocator.h"

	using namespace llvm;

	#define DEBUG_TYPE "amdgpu-set-wave-priority"

	static cl::opt<unsigned> DefaultVALUInstsThreshold(
	"amdgpu-set-wave-priority-valu-insts-threshold",
	cl::desc("VALU instruction count threshold for adjusting wave priority"),
	cl::init(100), cl::Hidden);

	namespace {

	struct MBBInfo {
	MBBInfo() = default;
	unsigned NumVALUInstsAtStart = 0;
	bool MayReachVMEMLoad = false;
	MachineInstr *LastVMEMLoad = nullptr;
	};

	using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;

	class AMDGPUSetWavePriority : public MachineFunctionPass {
	public:
	static char ID;

	AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}

	StringRef getPassName() const override { return "Set wave priority"; }

	bool runOnMachineFunction(MachineFunction &MF) override;

	private:
	MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	unsigned priority) const;

	const SIInstrInfo *TII;
	};

	} // End anonymous namespace.

	INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
	false)

	char AMDGPUSetWavePriority::ID = 0;

	FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
	return new AMDGPUSetWavePriority();
	}

	MachineInstr *
	AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	unsigned priority) const {
	return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))
	.addImm(priority);
	}

	// Checks that for every predecessor Pred that can reach a VMEM load,
	// none of Pred's successors can reach a VMEM load.
	static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,
	MBBInfoSet &MBBInfos) {
	for (const MachineBasicBlock *Pred : MBB.predecessors()) {
	if (!MBBInfos[Pred].MayReachVMEMLoad)
	continue;
	for (const MachineBasicBlock *Succ : Pred->successors()) {
	if (MBBInfos[Succ].MayReachVMEMLoad)
	return false;
	}
	}
	return true;
	}

	static bool isVMEMLoad(const MachineInstr &MI) {
	return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
	}

	bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
	const unsigned HighPriority = 3;
	const unsigned LowPriority = 0;

	Function &F = MF.getFunction();
	if (skipFunction(F) \|\| !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
	return false;

	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
	TII = ST.getInstrInfo();

	unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
	Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold");
	if (A.isValid())
	A.getValueAsString().getAsInteger(0, VALUInstsThreshold);

	// Find VMEM loads that may be executed before long-enough sequences of
	// VALU instructions. We currently assume that backedges/loops, branch
	// probabilities and other details can be ignored, so we essentially
	// determine the largest number of VALU instructions along every
	// possible path from the start of the function that may potentially be
	// executed provided no backedge is ever taken.
	MBBInfoSet MBBInfos;
	for (MachineBasicBlock *MBB : post_order(&MF)) {
	bool AtStart = true;
	unsigned MaxNumVALUInstsInMiddle = 0;
	unsigned NumVALUInstsAtEnd = 0;
	for (MachineInstr &MI : *MBB) {
	if (isVMEMLoad(MI)) {
	AtStart = false;
	MBBInfo &Info = MBBInfos[MBB];
	Info.NumVALUInstsAtStart = 0;
	MaxNumVALUInstsInMiddle = 0;
	NumVALUInstsAtEnd = 0;
	Info.LastVMEMLoad = &MI;
	} else if (SIInstrInfo::isDS(MI)) {
	AtStart = false;
	MaxNumVALUInstsInMiddle =
	std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
	NumVALUInstsAtEnd = 0;
	} else if (SIInstrInfo::isVALU(MI)) {
	if (AtStart)
	++MBBInfos[MBB].NumVALUInstsAtStart;
	++NumVALUInstsAtEnd;
	}
	}

	bool SuccsMayReachVMEMLoad = false;
	unsigned NumFollowingVALUInsts = 0;
	for (const MachineBasicBlock *Succ : MBB->successors()) {
	SuccsMayReachVMEMLoad \|= MBBInfos[Succ].MayReachVMEMLoad;
	NumFollowingVALUInsts =
	std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart);
	}
	MBBInfo &Info = MBBInfos[MBB];
	if (AtStart)
	Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
	NumVALUInstsAtEnd += NumFollowingVALUInsts;

	unsigned MaxNumVALUInsts =
	std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
	Info.MayReachVMEMLoad =
	SuccsMayReachVMEMLoad \|\|
	(Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
	}

	MachineBasicBlock &Entry = MF.front();
	if (!MBBInfos[&Entry].MayReachVMEMLoad)
	return false;

	// Raise the priority at the beginning of the shader.
	MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
	while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())
	++I;
	BuildSetprioMI(Entry, I, HighPriority);

	// Lower the priority on edges where control leaves blocks from which
	// the VMEM loads are reachable.
	SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
	for (MachineBasicBlock &MBB : MF) {
	if (MBBInfos[&MBB].MayReachVMEMLoad) {
	if (MBB.succ_empty())
	PriorityLoweringBlocks.insert(&MBB);
	continue;
	}

	if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {
	for (MachineBasicBlock *Pred : MBB.predecessors()) {
	if (MBBInfos[Pred].MayReachVMEMLoad)
	PriorityLoweringBlocks.insert(Pred);
	}
	continue;
	}

	// Where lowering the priority in predecessors is not possible, the
	// block receiving control either was not part of a loop in the first
	// place or the loop simplification/canonicalization pass should have
	// already tried to split the edge and insert a preheader, and if for
	// whatever reason it failed to do so, then this leaves us with the
	// only option of lowering the priority within the loop.
	PriorityLoweringBlocks.insert(&MBB);
	}

	for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
	BuildSetprioMI(
	*MBB,
	MBBInfos[MBB].LastVMEMLoad
	? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad))
	: MBB->begin(),
	LowPriority);
	}

	return true;
	}