| //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// This pass marks all internal functions as always_inline and creates |
| /// duplicates of all other functions and marks the duplicates as always_inline. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPU.h" |
| #include "AMDGPUTargetMachine.h" |
| #include "Utils/AMDGPUBaseInfo.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/Transforms/Utils/Cloning.h" |
| |
| using namespace llvm; |
| |
| namespace { |
| |
| static cl::opt<bool> StressCalls( |
| "amdgpu-stress-function-calls", |
| cl::Hidden, |
| cl::desc("Force all functions to be noinline"), |
| cl::init(false)); |
| |
| class AMDGPUAlwaysInline : public ModulePass { |
| bool GlobalOpt; |
| |
| void recursivelyVisitUsers(GlobalValue &GV, |
| SmallPtrSetImpl<Function *> &FuncsToAlwaysInline); |
| public: |
| static char ID; |
| |
| AMDGPUAlwaysInline(bool GlobalOpt = false) : |
| ModulePass(ID), GlobalOpt(GlobalOpt) { } |
| bool runOnModule(Module &M) override; |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.setPreservesAll(); |
| } |
| }; |
| |
| } // End anonymous namespace |
| |
| INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", |
| "AMDGPU Inline All Functions", false, false) |
| |
| char AMDGPUAlwaysInline::ID = 0; |
| |
| void AMDGPUAlwaysInline::recursivelyVisitUsers( |
| GlobalValue &GV, |
| SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { |
| SmallVector<User *, 16> Stack; |
| |
| SmallPtrSet<const Value *, 8> Visited; |
| |
| for (User *U : GV.users()) |
| Stack.push_back(U); |
| |
| while (!Stack.empty()) { |
| User *U = Stack.pop_back_val(); |
| if (!Visited.insert(U).second) |
| continue; |
| |
| if (Instruction *I = dyn_cast<Instruction>(U)) { |
| Function *F = I->getParent()->getParent(); |
| if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { |
| FuncsToAlwaysInline.insert(F); |
| Stack.push_back(F); |
| } |
| |
| // No need to look at further users, but we do need to inline any callers. |
| continue; |
| } |
| |
| for (User *UU : U->users()) |
| Stack.push_back(UU); |
| } |
| } |
| |
| bool AMDGPUAlwaysInline::runOnModule(Module &M) { |
| AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M); |
| |
| std::vector<GlobalAlias*> AliasesToRemove; |
| |
| SmallPtrSet<Function *, 8> FuncsToAlwaysInline; |
| SmallPtrSet<Function *, 8> FuncsToNoInline; |
| |
| for (GlobalAlias &A : M.aliases()) { |
| if (Function* F = dyn_cast<Function>(A.getAliasee())) { |
| A.replaceAllUsesWith(F); |
| AliasesToRemove.push_back(&A); |
| } |
| |
| // FIXME: If the aliasee isn't a function, it's some kind of constant expr |
| // cast that won't be inlined through. |
| } |
| |
| if (GlobalOpt) { |
| for (GlobalAlias* A : AliasesToRemove) { |
| A->eraseFromParent(); |
| } |
| } |
| |
| // Always force inlining of any function that uses an LDS global address. This |
| // is something of a workaround because we don't have a way of supporting LDS |
| // objects defined in functions. LDS is always allocated by a kernel, and it |
| // is difficult to manage LDS usage if a function may be used by multiple |
| // kernels. |
| // |
| // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this |
| // should only appear when IPO passes manages to move LDs defined in a kernel |
| // into a single user function. |
| |
| for (GlobalVariable &GV : M.globals()) { |
| // TODO: Region address |
| unsigned AS = GV.getType()->getAddressSpace(); |
| if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS) |
| continue; |
| |
| recursivelyVisitUsers(GV, FuncsToAlwaysInline); |
| } |
| |
| if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { |
| auto IncompatAttr |
| = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; |
| |
| for (Function &F : M) { |
| if (!F.isDeclaration() && !F.use_empty() && |
| !F.hasFnAttribute(IncompatAttr)) { |
| if (StressCalls) { |
| if (!FuncsToAlwaysInline.count(&F)) |
| FuncsToNoInline.insert(&F); |
| } else |
| FuncsToAlwaysInline.insert(&F); |
| } |
| } |
| } |
| |
| for (Function *F : FuncsToAlwaysInline) |
| F->addFnAttr(Attribute::AlwaysInline); |
| |
| for (Function *F : FuncsToNoInline) |
| F->addFnAttr(Attribute::NoInline); |
| |
| return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); |
| } |
| |
| ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { |
| return new AMDGPUAlwaysInline(GlobalOpt); |
| } |
| |