third_party/llvm-7.0/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp - SwiftShader - Git at Google

 //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// This pass marks all internal functions as always_inline and creates
 /// duplicates of all other functions and marks the duplicates as always_inline.
 //
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "AMDGPUTargetMachine.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/Cloning.h"

 using namespace llvm;

 namespace {

 static cl::opt<bool> StressCalls(
   "amdgpu-stress-function-calls",
   cl::Hidden,
   cl::desc("Force all functions to be noinline"),
   cl::init(false));

 class AMDGPUAlwaysInline : public ModulePass {
   bool GlobalOpt;

   void recursivelyVisitUsers(GlobalValue &GV,
                              SmallPtrSetImpl<Function *> &FuncsToAlwaysInline);
 public:
   static char ID;

   AMDGPUAlwaysInline(bool GlobalOpt = false) :
     ModulePass(ID), GlobalOpt(GlobalOpt) { }
   bool runOnModule(Module &M) override;

   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
  }
 };

 } // End anonymous namespace

 INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
                 "AMDGPU Inline All Functions", false, false)

 char AMDGPUAlwaysInline::ID = 0;

 void AMDGPUAlwaysInline::recursivelyVisitUsers(
   GlobalValue &GV,
   SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
   SmallVector<User *, 16> Stack;

   SmallPtrSet<const Value *, 8> Visited;

   for (User *U : GV.users())
     Stack.push_back(U);

   while (!Stack.empty()) {
     User *U = Stack.pop_back_val();
     if (!Visited.insert(U).second)
       continue;

     if (Instruction *I = dyn_cast<Instruction>(U)) {
       Function *F = I->getParent()->getParent();
       if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
         FuncsToAlwaysInline.insert(F);
         Stack.push_back(F);
       }

       // No need to look at further users, but we do need to inline any callers.
       continue;
     }

     for (User *UU : U->users())
       Stack.push_back(UU);
   }
 }

 bool AMDGPUAlwaysInline::runOnModule(Module &M) {
   AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M);

   std::vector<GlobalAlias*> AliasesToRemove;

   SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
   SmallPtrSet<Function *, 8> FuncsToNoInline;

   for (GlobalAlias &A : M.aliases()) {
     if (Function* F = dyn_cast<Function>(A.getAliasee())) {
       A.replaceAllUsesWith(F);
       AliasesToRemove.push_back(&A);
     }

     // FIXME: If the aliasee isn't a function, it's some kind of constant expr
     // cast that won't be inlined through.
   }

   if (GlobalOpt) {
     for (GlobalAlias* A : AliasesToRemove) {
       A->eraseFromParent();
     }
   }

   // Always force inlining of any function that uses an LDS global address. This
   // is something of a workaround because we don't have a way of supporting LDS
   // objects defined in functions. LDS is always allocated by a kernel, and it
   // is difficult to manage LDS usage if a function may be used by multiple
   // kernels.
   //
   // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
   // should only appear when IPO passes manages to move LDs defined in a kernel
   // into a single user function.

   for (GlobalVariable &GV : M.globals()) {
     // TODO: Region address
     unsigned AS = GV.getType()->getAddressSpace();
     if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS)
       continue;

     recursivelyVisitUsers(GV, FuncsToAlwaysInline);
   }

   if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
     auto IncompatAttr
       = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;

     for (Function &F : M) {
       if (!F.isDeclaration() && !F.use_empty() &&
           !F.hasFnAttribute(IncompatAttr)) {
         if (StressCalls) {
           if (!FuncsToAlwaysInline.count(&F))
             FuncsToNoInline.insert(&F);
         } else
           FuncsToAlwaysInline.insert(&F);
       }
     }
   }

   for (Function *F : FuncsToAlwaysInline)
     F->addFnAttr(Attribute::AlwaysInline);

   for (Function *F : FuncsToNoInline)
     F->addFnAttr(Attribute::NoInline);

   return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
 }

 ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
   return new AMDGPUAlwaysInline(GlobalOpt);
 }
	//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// This pass marks all internal functions as always_inline and creates
	/// duplicates of all other functions and marks the duplicates as always_inline.
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPU.h"
	#include "AMDGPUTargetMachine.h"
	#include "Utils/AMDGPUBaseInfo.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Transforms/Utils/Cloning.h"

	using namespace llvm;

	namespace {

	static cl::opt<bool> StressCalls(
	"amdgpu-stress-function-calls",
	cl::Hidden,
	cl::desc("Force all functions to be noinline"),
	cl::init(false));

	class AMDGPUAlwaysInline : public ModulePass {
	bool GlobalOpt;

	void recursivelyVisitUsers(GlobalValue &GV,
	SmallPtrSetImpl<Function *> &FuncsToAlwaysInline);
	public:
	static char ID;

	AMDGPUAlwaysInline(bool GlobalOpt = false) :
	ModulePass(ID), GlobalOpt(GlobalOpt) { }
	bool runOnModule(Module &M) override;

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.setPreservesAll();
	}
	};

	} // End anonymous namespace

	INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
	"AMDGPU Inline All Functions", false, false)

	char AMDGPUAlwaysInline::ID = 0;

	void AMDGPUAlwaysInline::recursivelyVisitUsers(
	GlobalValue &GV,
	SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
	SmallVector<User *, 16> Stack;

	SmallPtrSet<const Value *, 8> Visited;

	for (User *U : GV.users())
	Stack.push_back(U);

	while (!Stack.empty()) {
	User *U = Stack.pop_back_val();
	if (!Visited.insert(U).second)
	continue;

	if (Instruction *I = dyn_cast<Instruction>(U)) {
	Function *F = I->getParent()->getParent();
	if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
	FuncsToAlwaysInline.insert(F);
	Stack.push_back(F);
	}

	// No need to look at further users, but we do need to inline any callers.
	continue;
	}

	for (User *UU : U->users())
	Stack.push_back(UU);
	}
	}

	bool AMDGPUAlwaysInline::runOnModule(Module &M) {
	AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M);

	std::vector<GlobalAlias*> AliasesToRemove;

	SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
	SmallPtrSet<Function *, 8> FuncsToNoInline;

	for (GlobalAlias &A : M.aliases()) {
	if (Function* F = dyn_cast<Function>(A.getAliasee())) {
	A.replaceAllUsesWith(F);
	AliasesToRemove.push_back(&A);
	}

	// FIXME: If the aliasee isn't a function, it's some kind of constant expr
	// cast that won't be inlined through.
	}

	if (GlobalOpt) {
	for (GlobalAlias* A : AliasesToRemove) {
	A->eraseFromParent();
	}
	}

	// Always force inlining of any function that uses an LDS global address. This
	// is something of a workaround because we don't have a way of supporting LDS
	// objects defined in functions. LDS is always allocated by a kernel, and it
	// is difficult to manage LDS usage if a function may be used by multiple
	// kernels.
	//
	// OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
	// should only appear when IPO passes manages to move LDs defined in a kernel
	// into a single user function.

	for (GlobalVariable &GV : M.globals()) {
	// TODO: Region address
	unsigned AS = GV.getType()->getAddressSpace();
	if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS)
	continue;

	recursivelyVisitUsers(GV, FuncsToAlwaysInline);
	}

	if (!AMDGPUTargetMachine::EnableFunctionCalls \|\| StressCalls) {
	auto IncompatAttr
	= StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;

	for (Function &F : M) {
	if (!F.isDeclaration() && !F.use_empty() &&
	!F.hasFnAttribute(IncompatAttr)) {
	if (StressCalls) {
	if (!FuncsToAlwaysInline.count(&F))
	FuncsToNoInline.insert(&F);
	} else
	FuncsToAlwaysInline.insert(&F);
	}
	}
	}

	for (Function *F : FuncsToAlwaysInline)
	F->addFnAttr(Attribute::AlwaysInline);

	for (Function *F : FuncsToNoInline)
	F->addFnAttr(Attribute::NoInline);

	return !FuncsToAlwaysInline.empty() \|\| !FuncsToNoInline.empty();
	}

	ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
	return new AMDGPUAlwaysInline(GlobalOpt);
	}