third_party/llvm-16.0/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp - SwiftShader - Git at Google

 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "AMDGPUMemoryUtils.h"
 #include "AMDGPU.h"
 #include "AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/ReplaceConstant.h"

 #define DEBUG_TYPE "amdgpu-memory-utils"

 using namespace llvm;

 namespace llvm {

 namespace AMDGPU {

 Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
   return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
                                        GV->getValueType());
 }

 static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
                                    const Function *F) {
   // We are not interested in kernel LDS lowering for module LDS itself.
   if (F && GV.getName() == "llvm.amdgcn.module.lds")
     return false;

   bool Ret = false;
   SmallPtrSet<const User *, 8> Visited;
   SmallVector<const User *, 16> Stack(GV.users());

   assert(!F || isKernelCC(F));

   while (!Stack.empty()) {
     const User *V = Stack.pop_back_val();
     Visited.insert(V);

     if (isa<GlobalValue>(V)) {
       // This use of the LDS variable is the initializer of a global variable.
       // This is ill formed. The address of an LDS variable is kernel dependent
       // and unknown until runtime. It can't be written to a global variable.
       continue;
     }

     if (auto *I = dyn_cast<Instruction>(V)) {
       const Function *UF = I->getFunction();
       if (UF == F) {
         // Used from this kernel, we want to put it into the structure.
         Ret = true;
       } else if (!F) {
         // For module LDS lowering, lowering is required if the user instruction
         // is from non-kernel function.
         Ret |= !isKernelCC(UF);
       }
       continue;
     }

     // User V should be a constant, recursively visit users of V.
     assert(isa<Constant>(V) && "Expected a constant.");
     append_range(Stack, V->users());
   }

   return Ret;
 }

 bool isLDSVariableToLower(const GlobalVariable &GV) {
   if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
     return false;
   }
   if (!GV.hasInitializer()) {
     // addrspace(3) without initializer implies cuda/hip extern __shared__
     // the semantics for such a variable appears to be that all extern
     // __shared__ variables alias one another, in which case this transform
     // is not required
     return false;
   }
   if (!isa<UndefValue>(GV.getInitializer())) {
     // Initializers are unimplemented for LDS address space.
     // Leave such variables in place for consistent error reporting.
     return false;
   }
   if (GV.isConstant()) {
     // A constant undef variable can't be written to, and any load is
     // undef, so it should be eliminated by the optimizer. It could be
     // dropped by the back end if not. This pass skips over it.
     return false;
   }
   return true;
 }

 std::vector<GlobalVariable *> findLDSVariablesToLower(Module &M,
                                                       const Function *F) {
   std::vector<llvm::GlobalVariable *> LocalVars;
   for (auto &GV : M.globals()) {
     if (!isLDSVariableToLower(GV)) {
       continue;
     }
     if (!shouldLowerLDSToStruct(GV, F)) {
       continue;
     }
     LocalVars.push_back(&GV);
   }
   return LocalVars;
 }

 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
   Instruction *DefInst = Def->getMemoryInst();

   if (isa<FenceInst>(DefInst))
     return false;

   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
     switch (II->getIntrinsicID()) {
     case Intrinsic::amdgcn_s_barrier:
     case Intrinsic::amdgcn_wave_barrier:
     case Intrinsic::amdgcn_sched_barrier:
     case Intrinsic::amdgcn_sched_group_barrier:
       return false;
     default:
       break;
     }
   }

   // Ignore atomics not aliasing with the original load, any atomic is a
   // universal MemoryDef from MSSA's point of view too, just like a fence.
   const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
     return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
   };

   if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
       checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
     return false;

   return true;
 }

 bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
                            AAResults *AA) {
   MemorySSAWalker *Walker = MSSA->getWalker();
   SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
   SmallSet<MemoryAccess *, 8> Visited;
   MemoryLocation Loc(MemoryLocation::get(Load));

   LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');

   // Start with a nearest dominating clobbering access, it will be either
   // live on entry (nothing to do, load is not clobbered), MemoryDef, or
   // MemoryPhi if several MemoryDefs can define this memory state. In that
   // case add all Defs to WorkList and continue going up and checking all
   // the definitions of this memory location until the root. When all the
   // defs are exhausted and came to the entry state we have no clobber.
   // Along the scan ignore barriers and fences which are considered clobbers
   // by the MemorySSA, but not really writing anything into the memory.
   while (!WorkList.empty()) {
     MemoryAccess *MA = WorkList.pop_back_val();
     if (!Visited.insert(MA).second)
       continue;

     if (MSSA->isLiveOnEntryDef(MA))
       continue;

     if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
       LLVM_DEBUG(dbgs() << "  Def: " << *Def->getMemoryInst() << '\n');

       if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
         LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
         return true;
       }

       WorkList.push_back(
           Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
       continue;
     }

     const MemoryPhi *Phi = cast<MemoryPhi>(MA);
     for (const auto &Use : Phi->incoming_values())
       WorkList.push_back(cast<MemoryAccess>(&Use));
   }

   LLVM_DEBUG(dbgs() << "      -> no clobber\n");
   return false;
 }

 } // end namespace AMDGPU

 } // end namespace llvm
	//===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPUMemoryUtils.h"
	#include "AMDGPU.h"
	#include "AMDGPUBaseInfo.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/MemorySSA.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/IntrinsicsAMDGPU.h"
	#include "llvm/IR/ReplaceConstant.h"

	#define DEBUG_TYPE "amdgpu-memory-utils"

	using namespace llvm;

	namespace llvm {

	namespace AMDGPU {

	Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
	return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
	GV->getValueType());
	}

	static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
	const Function *F) {
	// We are not interested in kernel LDS lowering for module LDS itself.
	if (F && GV.getName() == "llvm.amdgcn.module.lds")
	return false;

	bool Ret = false;
	SmallPtrSet<const User *, 8> Visited;
	SmallVector<const User *, 16> Stack(GV.users());

	assert(!F \|\| isKernelCC(F));

	while (!Stack.empty()) {
	const User *V = Stack.pop_back_val();
	Visited.insert(V);

	if (isa<GlobalValue>(V)) {
	// This use of the LDS variable is the initializer of a global variable.
	// This is ill formed. The address of an LDS variable is kernel dependent
	// and unknown until runtime. It can't be written to a global variable.
	continue;
	}

	if (auto *I = dyn_cast<Instruction>(V)) {
	const Function *UF = I->getFunction();
	if (UF == F) {
	// Used from this kernel, we want to put it into the structure.
	Ret = true;
	} else if (!F) {
	// For module LDS lowering, lowering is required if the user instruction
	// is from non-kernel function.
	Ret \|= !isKernelCC(UF);
	}
	continue;
	}

	// User V should be a constant, recursively visit users of V.
	assert(isa<Constant>(V) && "Expected a constant.");
	append_range(Stack, V->users());
	}

	return Ret;
	}

	bool isLDSVariableToLower(const GlobalVariable &GV) {
	if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
	return false;
	}
	if (!GV.hasInitializer()) {
	// addrspace(3) without initializer implies cuda/hip extern __shared__
	// the semantics for such a variable appears to be that all extern
	// __shared__ variables alias one another, in which case this transform
	// is not required
	return false;
	}
	if (!isa<UndefValue>(GV.getInitializer())) {
	// Initializers are unimplemented for LDS address space.
	// Leave such variables in place for consistent error reporting.
	return false;
	}
	if (GV.isConstant()) {
	// A constant undef variable can't be written to, and any load is
	// undef, so it should be eliminated by the optimizer. It could be
	// dropped by the back end if not. This pass skips over it.
	return false;
	}
	return true;
	}

	std::vector<GlobalVariable *> findLDSVariablesToLower(Module &M,
	const Function *F) {
	std::vector<llvm::GlobalVariable *> LocalVars;
	for (auto &GV : M.globals()) {
	if (!isLDSVariableToLower(GV)) {
	continue;
	}
	if (!shouldLowerLDSToStruct(GV, F)) {
	continue;
	}
	LocalVars.push_back(&GV);
	}
	return LocalVars;
	}

	bool isReallyAClobber(const Value Ptr, MemoryDef Def, AAResults *AA) {
	Instruction *DefInst = Def->getMemoryInst();

	if (isa<FenceInst>(DefInst))
	return false;

	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
	switch (II->getIntrinsicID()) {
	case Intrinsic::amdgcn_s_barrier:
	case Intrinsic::amdgcn_wave_barrier:
	case Intrinsic::amdgcn_sched_barrier:
	case Intrinsic::amdgcn_sched_group_barrier:
	return false;
	default:
	break;
	}
	}

	// Ignore atomics not aliasing with the original load, any atomic is a
	// universal MemoryDef from MSSA's point of view too, just like a fence.
	const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
	return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
	};

	if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) \|\|
	checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
	return false;

	return true;
	}

	bool isClobberedInFunction(const LoadInst Load, MemorySSA MSSA,
	AAResults *AA) {
	MemorySSAWalker *Walker = MSSA->getWalker();
	SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
	SmallSet<MemoryAccess *, 8> Visited;
	MemoryLocation Loc(MemoryLocation::get(Load));

	LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');

	// Start with a nearest dominating clobbering access, it will be either
	// live on entry (nothing to do, load is not clobbered), MemoryDef, or
	// MemoryPhi if several MemoryDefs can define this memory state. In that
	// case add all Defs to WorkList and continue going up and checking all
	// the definitions of this memory location until the root. When all the
	// defs are exhausted and came to the entry state we have no clobber.
	// Along the scan ignore barriers and fences which are considered clobbers
	// by the MemorySSA, but not really writing anything into the memory.
	while (!WorkList.empty()) {
	MemoryAccess *MA = WorkList.pop_back_val();
	if (!Visited.insert(MA).second)
	continue;

	if (MSSA->isLiveOnEntryDef(MA))
	continue;

	if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
	LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');

	if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
	LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
	return true;
	}

	WorkList.push_back(
	Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
	continue;
	}

	const MemoryPhi *Phi = cast<MemoryPhi>(MA);
	for (const auto &Use : Phi->incoming_values())
	WorkList.push_back(cast<MemoryAccess>(&Use));
	}

	LLVM_DEBUG(dbgs() << " -> no clobber\n");
	return false;
	}

	} // end namespace AMDGPU

	} // end namespace llvm