| //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPUMachineFunction.h" |
| #include "AMDGPU.h" |
| #include "AMDGPUPerfHintAnalysis.h" |
| #include "AMDGPUSubtarget.h" |
| #include "llvm/CodeGen/MachineModuleInfo.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/Target/TargetMachine.h" |
| |
| using namespace llvm; |
| |
| AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, |
| const AMDGPUSubtarget &ST) |
| : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), |
| IsModuleEntryFunction( |
| AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), |
| NoSignedZerosFPMath(false) { |
| |
| // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, |
| // except reserved size is not correctly aligned. |
| |
| Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); |
| MemoryBound = MemBoundAttr.getValueAsBool(); |
| |
| Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); |
| WaveLimiter = WaveLimitAttr.getValueAsBool(); |
| |
| // FIXME: How is this attribute supposed to interact with statically known |
| // global sizes? |
| StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); |
| if (!S.empty()) |
| S.consumeInteger(0, GDSSize); |
| |
| // Assume the attribute allocates before any known GDS globals. |
| StaticGDSSize = GDSSize; |
| |
| CallingConv::ID CC = F.getCallingConv(); |
| if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) |
| ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); |
| |
| // FIXME: Shouldn't be target specific |
| Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); |
| NoSignedZerosFPMath = |
| NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; |
| } |
| |
| unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, |
| const GlobalVariable &GV, |
| Align Trailing) { |
| auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); |
| if (!Entry.second) |
| return Entry.first->second; |
| |
| Align Alignment = |
| DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); |
| |
| unsigned Offset; |
| if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { |
| /// TODO: We should sort these to minimize wasted space due to alignment |
| /// padding. Currently the padding is decided by the first encountered use |
| /// during lowering. |
| Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); |
| |
| StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); |
| |
| // Align LDS size to trailing, e.g. for aligning dynamic shared memory |
| LDSSize = alignTo(StaticLDSSize, Trailing); |
| } else { |
| assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && |
| "expected region address space"); |
| |
| Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); |
| StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); |
| |
| // FIXME: Apply alignment of dynamic GDS |
| GDSSize = StaticGDSSize; |
| } |
| |
| Entry.first->second = Offset; |
| return Offset; |
| } |
| |
| static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; |
| |
| bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { |
| auto name = GV.getName(); |
| return (name == ModuleLDSName) || |
| (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); |
| } |
| |
| const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( |
| const GlobalVariable &GV) { |
| const Module &M = *GV.getParent(); |
| StringRef N(GV.getName()); |
| if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { |
| return M.getFunction(N); |
| } |
| return nullptr; |
| } |
| |
| const GlobalVariable * |
| AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { |
| const Module *M = F.getParent(); |
| std::string KernelLDSName = "llvm.amdgcn.kernel."; |
| KernelLDSName += F.getName(); |
| KernelLDSName += ".lds"; |
| return M->getNamedGlobal(KernelLDSName); |
| } |
| |
| // This kernel calls no functions that require the module lds struct |
| static bool canElideModuleLDS(const Function &F) { |
| return F.hasFnAttribute("amdgpu-elide-module-lds"); |
| } |
| |
| unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( |
| const GlobalVariable &GV) { |
| // module.lds, then alignment padding, then kernel.lds, then other variables |
| // if any |
| |
| assert(isKnownAddressLDSGlobal(GV)); |
| unsigned Offset = 0; |
| |
| if (GV.getName() == ModuleLDSName) { |
| return 0; |
| } |
| |
| const Module *M = GV.getParent(); |
| const DataLayout &DL = M->getDataLayout(); |
| |
| const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); |
| const Function *f = getKernelLDSFunctionFromGlobal(GV); |
| |
| // Account for module.lds if allocated for this function |
| if (GVM && f && !canElideModuleLDS(*f)) { |
| // allocator aligns this to var align, but it's zero to begin with |
| Offset += DL.getTypeAllocSize(GVM->getValueType()); |
| } |
| |
| // No dynamic LDS alignment done by allocateModuleLDSGlobal |
| Offset = alignTo( |
| Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); |
| |
| return Offset; |
| } |
| |
| void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { |
| const Module *M = F.getParent(); |
| |
| // This function is called before allocating any other LDS so that it can |
| // reliably put values at known addresses. Consequently, dynamic LDS, if |
| // present, will not yet have been allocated |
| |
| assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated"); |
| |
| if (isModuleEntryFunction()) { |
| |
| // Pointer values start from zero, memory allocated per-kernel-launch |
| // Variables can be grouped into a module level struct and a struct per |
| // kernel function by AMDGPULowerModuleLDSPass. If that is done, they |
| // are allocated at statically computable addresses here. |
| // |
| // Address 0 |
| // { |
| // llvm.amdgcn.module.lds |
| // } |
| // alignment padding |
| // { |
| // llvm.amdgcn.kernel.some-name.lds |
| // } |
| // other variables, e.g. dynamic lds, allocated after this call |
| |
| const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); |
| const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); |
| |
| if (GV && !canElideModuleLDS(F)) { |
| assert(isKnownAddressLDSGlobal(*GV)); |
| unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); |
| (void)Offset; |
| assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && |
| "Module LDS expected to be allocated before other LDS"); |
| } |
| |
| if (KV) { |
| // The per-kernel offset is deterministic because it is allocated |
| // before any other non-module LDS variables. |
| assert(isKnownAddressLDSGlobal(*KV)); |
| unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); |
| (void)Offset; |
| assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && |
| "Kernel LDS expected to be immediately after module LDS"); |
| } |
| } |
| } |
| |
| std::optional<uint32_t> |
| AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { |
| auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); |
| if (MD && MD->getNumOperands() == 1) { |
| ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); |
| if (KnownSize) { |
| uint64_t V = KnownSize->getZExtValue(); |
| if (V <= UINT32_MAX) { |
| return V; |
| } |
| } |
| } |
| return {}; |
| } |
| |
| void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, |
| const GlobalVariable &GV) { |
| assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); |
| |
| Align Alignment = |
| DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); |
| if (Alignment <= DynLDSAlign) |
| return; |
| |
| LDSSize = alignTo(StaticLDSSize, Alignment); |
| DynLDSAlign = Alignment; |
| } |