blob: 7a7addd0f5cfe8cf160403a6aca5d6a6e410beb5 [file] [log] [blame]
//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This pass propagates attributes from kernels to the non-entry
/// functions. Most of the library functions were not compiled for specific ABI,
/// yet will be correctly compiled if proper attrbutes are propagated from the
/// caller.
///
/// The pass analyzes call graph and propagates ABI target features through the
/// call graph.
///
/// It can run in two modes: as a function or module pass. A function pass
/// simply propagates attributes. A module pass clones functions if there are
/// callers with different ABI. If a function is clonned all call sites will
/// be updated to use a correct clone.
///
/// A function pass is limited in functionality but can run early in the
/// pipeline. A module pass is more powerful but has to run late, so misses
/// library folding opportunities.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <string>
#define DEBUG_TYPE "amdgpu-propagate-attributes"
using namespace llvm;
namespace llvm {
extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
}
namespace {
class AMDGPUPropagateAttributes {
const FeatureBitset TargetFeatures = {
AMDGPU::FeatureWavefrontSize16,
AMDGPU::FeatureWavefrontSize32,
AMDGPU::FeatureWavefrontSize64
};
class Clone{
public:
Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
FeatureBitset FeatureMask;
Function *OrigF;
Function *NewF;
};
const TargetMachine *TM;
// Clone functions as needed or just set attributes.
bool AllowClone;
// Option propagation roots.
SmallSet<Function *, 32> Roots;
// Clones of functions with their attributes.
SmallVector<Clone, 32> Clones;
// Find a clone with required features.
Function *findFunction(const FeatureBitset &FeaturesNeeded,
Function *OrigF);
// Clone function F and set NewFeatures on the clone.
// Cole takes the name of original function.
Function *cloneWithFeatures(Function &F,
const FeatureBitset &NewFeatures);
// Set new function's features in place.
void setFeatures(Function &F, const FeatureBitset &NewFeatures);
std::string getFeatureString(const FeatureBitset &Features) const;
// Propagate attributes from Roots.
bool process();
public:
AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
TM(TM), AllowClone(AllowClone) {}
// Use F as a root and propagate its attributes.
bool process(Function &F);
// Propagate attributes starting from kernel functions.
bool process(Module &M);
};
// Allows to propagate attributes early, but no clonning is allowed as it must
// be a function pass to run before any optimizations.
// TODO: We shall only need a one instance of module pass, but that needs to be
// in the linker pipeline which is currently not possible.
class AMDGPUPropagateAttributesEarly : public FunctionPass {
const TargetMachine *TM;
public:
static char ID; // Pass identification
AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
FunctionPass(ID), TM(TM) {
initializeAMDGPUPropagateAttributesEarlyPass(
*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
};
// Allows to propagate attributes with clonning but does that late in the
// pipeline.
class AMDGPUPropagateAttributesLate : public ModulePass {
const TargetMachine *TM;
public:
static char ID; // Pass identification
AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
ModulePass(ID), TM(TM) {
initializeAMDGPUPropagateAttributesLatePass(
*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override;
};
} // end anonymous namespace.
char AMDGPUPropagateAttributesEarly::ID = 0;
char AMDGPUPropagateAttributesLate::ID = 0;
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
"amdgpu-propagate-attributes-early",
"Early propagate attributes from kernels to functions",
false, false)
INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
"amdgpu-propagate-attributes-late",
"Late propagate attributes from kernels to functions",
false, false)
Function *
AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
Function *OrigF) {
// TODO: search for clone's clones.
for (Clone &C : Clones)
if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
return C.NewF;
return nullptr;
}
bool AMDGPUPropagateAttributes::process(Module &M) {
for (auto &F : M.functions())
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
Roots.insert(&F);
return process();
}
bool AMDGPUPropagateAttributes::process(Function &F) {
Roots.insert(&F);
return process();
}
bool AMDGPUPropagateAttributes::process() {
bool Changed = false;
SmallSet<Function *, 32> NewRoots;
SmallSet<Function *, 32> Replaced;
if (Roots.empty())
return false;
Module &M = *(*Roots.begin())->getParent();
do {
Roots.insert(NewRoots.begin(), NewRoots.end());
NewRoots.clear();
for (auto &F : M.functions()) {
if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
continue;
const FeatureBitset &CalleeBits =
TM->getSubtargetImpl(F)->getFeatureBits();
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
for (User *U : F.users()) {
Instruction *I = dyn_cast<Instruction>(U);
if (!I)
continue;
CallBase *CI = dyn_cast<CallBase>(I);
if (!CI)
continue;
Function *Caller = CI->getCaller();
if (!Caller)
continue;
if (!Roots.count(Caller))
continue;
const FeatureBitset &CallerBits =
TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
if (CallerBits == (CalleeBits & TargetFeatures)) {
NewRoots.insert(&F);
continue;
}
Function *NewF = findFunction(CallerBits, &F);
if (!NewF) {
FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
CallerBits);
if (!AllowClone) {
// This may set different features on different iteartions if
// there is a contradiction in callers' attributes. In this case
// we rely on a second pass running on Module, which is allowed
// to clone.
setFeatures(F, NewFeatures);
NewRoots.insert(&F);
Changed = true;
break;
}
NewF = cloneWithFeatures(F, NewFeatures);
Clones.push_back(Clone(CallerBits, &F, NewF));
NewRoots.insert(NewF);
}
ToReplace.push_back(std::make_pair(CI, NewF));
Replaced.insert(&F);
Changed = true;
}
while (!ToReplace.empty()) {
auto R = ToReplace.pop_back_val();
R.first->setCalledFunction(R.second);
}
}
} while (!NewRoots.empty());
for (Function *F : Replaced) {
if (F->use_empty())
F->eraseFromParent();
}
return Changed;
}
Function *
AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
const FeatureBitset &NewFeatures) {
LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
ValueToValueMapTy dummy;
Function *NewF = CloneFunction(&F, dummy);
setFeatures(*NewF, NewFeatures);
// Swap names. If that is the only clone it will retain the name of now
// dead value.
if (F.hasName()) {
std::string NewName = NewF->getName();
NewF->takeName(&F);
F.setName(NewName);
// Name has changed, it does not need an external symbol.
F.setVisibility(GlobalValue::DefaultVisibility);
F.setLinkage(GlobalValue::InternalLinkage);
}
return NewF;
}
void AMDGPUPropagateAttributes::setFeatures(Function &F,
const FeatureBitset &NewFeatures) {
std::string NewFeatureStr = getFeatureString(NewFeatures);
LLVM_DEBUG(dbgs() << "Set features "
<< getFeatureString(NewFeatures & TargetFeatures)
<< " on " << F.getName() << '\n');
F.removeFnAttr("target-features");
F.addFnAttr("target-features", NewFeatureStr);
}
std::string
AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
{
std::string Ret;
for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
if (Features[KV.Value])
Ret += (StringRef("+") + KV.Key + ",").str();
else if (TargetFeatures[KV.Value])
Ret += (StringRef("-") + KV.Key + ",").str();
}
Ret.pop_back(); // Remove last comma.
return Ret;
}
bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
return false;
return AMDGPUPropagateAttributes(TM, false).process(F);
}
bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
if (!TM)
return false;
return AMDGPUPropagateAttributes(TM, true).process(M);
}
FunctionPass
*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
return new AMDGPUPropagateAttributesEarly(TM);
}
ModulePass
*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
return new AMDGPUPropagateAttributesLate(TM);
}