|  | //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPUMachineFunction.h" | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUPerfHintAnalysis.h" | 
|  | #include "AMDGPUSubtarget.h" | 
|  | #include "llvm/CodeGen/MachineModuleInfo.h" | 
|  | #include "llvm/IR/Constants.h" | 
|  | #include "llvm/Target/TargetMachine.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, | 
|  | const AMDGPUSubtarget &ST) | 
|  | : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), | 
|  | IsModuleEntryFunction( | 
|  | AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), | 
|  | NoSignedZerosFPMath(false) { | 
|  |  | 
|  | // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, | 
|  | // except reserved size is not correctly aligned. | 
|  |  | 
|  | Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); | 
|  | MemoryBound = MemBoundAttr.getValueAsBool(); | 
|  |  | 
|  | Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); | 
|  | WaveLimiter = WaveLimitAttr.getValueAsBool(); | 
|  |  | 
|  | // FIXME: How is this attribute supposed to interact with statically known | 
|  | // global sizes? | 
|  | StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); | 
|  | if (!S.empty()) | 
|  | S.consumeInteger(0, GDSSize); | 
|  |  | 
|  | // Assume the attribute allocates before any known GDS globals. | 
|  | StaticGDSSize = GDSSize; | 
|  |  | 
|  | CallingConv::ID CC = F.getCallingConv(); | 
|  | if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) | 
|  | ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); | 
|  |  | 
|  | // FIXME: Shouldn't be target specific | 
|  | Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); | 
|  | NoSignedZerosFPMath = | 
|  | NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; | 
|  | } | 
|  |  | 
|  | unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, | 
|  | const GlobalVariable &GV, | 
|  | Align Trailing) { | 
|  | auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); | 
|  | if (!Entry.second) | 
|  | return Entry.first->second; | 
|  |  | 
|  | Align Alignment = | 
|  | DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); | 
|  |  | 
|  | unsigned Offset; | 
|  | if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { | 
|  | /// TODO: We should sort these to minimize wasted space due to alignment | 
|  | /// padding. Currently the padding is decided by the first encountered use | 
|  | /// during lowering. | 
|  | Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); | 
|  |  | 
|  | StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); | 
|  |  | 
|  | // Align LDS size to trailing, e.g. for aligning dynamic shared memory | 
|  | LDSSize = alignTo(StaticLDSSize, Trailing); | 
|  | } else { | 
|  | assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && | 
|  | "expected region address space"); | 
|  |  | 
|  | Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); | 
|  | StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); | 
|  |  | 
|  | // FIXME: Apply alignment of dynamic GDS | 
|  | GDSSize = StaticGDSSize; | 
|  | } | 
|  |  | 
|  | Entry.first->second = Offset; | 
|  | return Offset; | 
|  | } | 
|  |  | 
|  | static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; | 
|  |  | 
|  | bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { | 
|  | auto name = GV.getName(); | 
|  | return (name == ModuleLDSName) || | 
|  | (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); | 
|  | } | 
|  |  | 
|  | const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( | 
|  | const GlobalVariable &GV) { | 
|  | const Module &M = *GV.getParent(); | 
|  | StringRef N(GV.getName()); | 
|  | if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { | 
|  | return M.getFunction(N); | 
|  | } | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | const GlobalVariable * | 
|  | AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { | 
|  | const Module *M = F.getParent(); | 
|  | std::string KernelLDSName = "llvm.amdgcn.kernel."; | 
|  | KernelLDSName += F.getName(); | 
|  | KernelLDSName += ".lds"; | 
|  | return M->getNamedGlobal(KernelLDSName); | 
|  | } | 
|  |  | 
|  | // This kernel calls no functions that require the module lds struct | 
|  | static bool canElideModuleLDS(const Function &F) { | 
|  | return F.hasFnAttribute("amdgpu-elide-module-lds"); | 
|  | } | 
|  |  | 
|  | unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( | 
|  | const GlobalVariable &GV) { | 
|  | // module.lds, then alignment padding, then kernel.lds, then other variables | 
|  | // if any | 
|  |  | 
|  | assert(isKnownAddressLDSGlobal(GV)); | 
|  | unsigned Offset = 0; | 
|  |  | 
|  | if (GV.getName() == ModuleLDSName) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | const Module *M = GV.getParent(); | 
|  | const DataLayout &DL = M->getDataLayout(); | 
|  |  | 
|  | const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); | 
|  | const Function *f = getKernelLDSFunctionFromGlobal(GV); | 
|  |  | 
|  | // Account for module.lds if allocated for this function | 
|  | if (GVM && f && !canElideModuleLDS(*f)) { | 
|  | // allocator aligns this to var align, but it's zero to begin with | 
|  | Offset += DL.getTypeAllocSize(GVM->getValueType()); | 
|  | } | 
|  |  | 
|  | // No dynamic LDS alignment done by allocateModuleLDSGlobal | 
|  | Offset = alignTo( | 
|  | Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); | 
|  |  | 
|  | return Offset; | 
|  | } | 
|  |  | 
|  | void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { | 
|  | const Module *M = F.getParent(); | 
|  |  | 
|  | // This function is called before allocating any other LDS so that it can | 
|  | // reliably put values at known addresses. Consequently, dynamic LDS, if | 
|  | // present, will not yet have been allocated | 
|  |  | 
|  | assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated"); | 
|  |  | 
|  | if (isModuleEntryFunction()) { | 
|  |  | 
|  | // Pointer values start from zero, memory allocated per-kernel-launch | 
|  | // Variables can be grouped into a module level struct and a struct per | 
|  | // kernel function by AMDGPULowerModuleLDSPass. If that is done, they | 
|  | // are allocated at statically computable addresses here. | 
|  | // | 
|  | // Address 0 | 
|  | // { | 
|  | //   llvm.amdgcn.module.lds | 
|  | // } | 
|  | // alignment padding | 
|  | // { | 
|  | //   llvm.amdgcn.kernel.some-name.lds | 
|  | // } | 
|  | // other variables, e.g. dynamic lds, allocated after this call | 
|  |  | 
|  | const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); | 
|  | const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); | 
|  |  | 
|  | if (GV && !canElideModuleLDS(F)) { | 
|  | assert(isKnownAddressLDSGlobal(*GV)); | 
|  | unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); | 
|  | (void)Offset; | 
|  | assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && | 
|  | "Module LDS expected to be allocated before other LDS"); | 
|  | } | 
|  |  | 
|  | if (KV) { | 
|  | // The per-kernel offset is deterministic because it is allocated | 
|  | // before any other non-module LDS variables. | 
|  | assert(isKnownAddressLDSGlobal(*KV)); | 
|  | unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); | 
|  | (void)Offset; | 
|  | assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && | 
|  | "Kernel LDS expected to be immediately after module LDS"); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | std::optional<uint32_t> | 
|  | AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { | 
|  | auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); | 
|  | if (MD && MD->getNumOperands() == 1) { | 
|  | ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); | 
|  | if (KnownSize) { | 
|  | uint64_t V = KnownSize->getZExtValue(); | 
|  | if (V <= UINT32_MAX) { | 
|  | return V; | 
|  | } | 
|  | } | 
|  | } | 
|  | return {}; | 
|  | } | 
|  |  | 
|  | void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, | 
|  | const GlobalVariable &GV) { | 
|  | assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); | 
|  |  | 
|  | Align Alignment = | 
|  | DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); | 
|  | if (Alignment <= DynLDSAlign) | 
|  | return; | 
|  |  | 
|  | LDSSize = alignTo(StaticLDSSize, Alignment); | 
|  | DynLDSAlign = Alignment; | 
|  | } |