| //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass merges conditional blocks of code and reduces the number of |
| // conditional branches in the hot paths based on profiles. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/StringSet.h" |
| #include "llvm/Analysis/BlockFrequencyInfo.h" |
| #include "llvm/Analysis/GlobalsModRef.h" |
| #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| #include "llvm/Analysis/ProfileSummaryInfo.h" |
| #include "llvm/Analysis/RegionInfo.h" |
| #include "llvm/Analysis/RegionIterator.h" |
| #include "llvm/Analysis/ValueTracking.h" |
| #include "llvm/IR/CFG.h" |
| #include "llvm/IR/Dominators.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/MDBuilder.h" |
| #include "llvm/InitializePasses.h" |
| #include "llvm/Support/BranchProbability.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/MemoryBuffer.h" |
| #include "llvm/Transforms/Utils.h" |
| #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| #include "llvm/Transforms/Utils/Cloning.h" |
| #include "llvm/Transforms/Utils/ValueMapper.h" |
| |
| #include <set> |
| #include <sstream> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "chr" |
| |
| #define CHR_DEBUG(X) LLVM_DEBUG(X) |
| |
| static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, |
| cl::desc("Apply CHR for all functions")); |
| |
| static cl::opt<double> CHRBiasThreshold( |
| "chr-bias-threshold", cl::init(0.99), cl::Hidden, |
| cl::desc("CHR considers a branch bias greater than this ratio as biased")); |
| |
| static cl::opt<unsigned> CHRMergeThreshold( |
| "chr-merge-threshold", cl::init(2), cl::Hidden, |
| cl::desc("CHR merges a group of N branches/selects where N >= this value")); |
| |
| static cl::opt<std::string> CHRModuleList( |
| "chr-module-list", cl::init(""), cl::Hidden, |
| cl::desc("Specify file to retrieve the list of modules to apply CHR to")); |
| |
| static cl::opt<std::string> CHRFunctionList( |
| "chr-function-list", cl::init(""), cl::Hidden, |
| cl::desc("Specify file to retrieve the list of functions to apply CHR to")); |
| |
| static StringSet<> CHRModules; |
| static StringSet<> CHRFunctions; |
| |
| static void parseCHRFilterFiles() { |
| if (!CHRModuleList.empty()) { |
| auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); |
| if (!FileOrErr) { |
| errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; |
| std::exit(1); |
| } |
| StringRef Buf = FileOrErr->get()->getBuffer(); |
| SmallVector<StringRef, 0> Lines; |
| Buf.split(Lines, '\n'); |
| for (StringRef Line : Lines) { |
| Line = Line.trim(); |
| if (!Line.empty()) |
| CHRModules.insert(Line); |
| } |
| } |
| if (!CHRFunctionList.empty()) { |
| auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); |
| if (!FileOrErr) { |
| errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; |
| std::exit(1); |
| } |
| StringRef Buf = FileOrErr->get()->getBuffer(); |
| SmallVector<StringRef, 0> Lines; |
| Buf.split(Lines, '\n'); |
| for (StringRef Line : Lines) { |
| Line = Line.trim(); |
| if (!Line.empty()) |
| CHRFunctions.insert(Line); |
| } |
| } |
| } |
| |
| namespace { |
| class ControlHeightReductionLegacyPass : public FunctionPass { |
| public: |
| static char ID; |
| |
| ControlHeightReductionLegacyPass() : FunctionPass(ID) { |
| initializeControlHeightReductionLegacyPassPass( |
| *PassRegistry::getPassRegistry()); |
| parseCHRFilterFiles(); |
| } |
| |
| bool runOnFunction(Function &F) override; |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.addRequired<BlockFrequencyInfoWrapperPass>(); |
| AU.addRequired<DominatorTreeWrapperPass>(); |
| AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
| AU.addRequired<RegionInfoPass>(); |
| AU.addPreserved<GlobalsAAWrapperPass>(); |
| } |
| }; |
| } // end anonymous namespace |
| |
| char ControlHeightReductionLegacyPass::ID = 0; |
| |
| INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, |
| "chr", |
| "Reduce control height in the hot paths", |
| false, false) |
| INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) |
| INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
| INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) |
| INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, |
| "chr", |
| "Reduce control height in the hot paths", |
| false, false) |
| |
| FunctionPass *llvm::createControlHeightReductionLegacyPass() { |
| return new ControlHeightReductionLegacyPass(); |
| } |
| |
| namespace { |
| |
| struct CHRStats { |
| CHRStats() : NumBranches(0), NumBranchesDelta(0), |
| WeightedNumBranchesDelta(0) {} |
| void print(raw_ostream &OS) const { |
| OS << "CHRStats: NumBranches " << NumBranches |
| << " NumBranchesDelta " << NumBranchesDelta |
| << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; |
| } |
| uint64_t NumBranches; // The original number of conditional branches / |
| // selects |
| uint64_t NumBranchesDelta; // The decrease of the number of conditional |
| // branches / selects in the hot paths due to CHR. |
| uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile |
| // count at the scope entry. |
| }; |
| |
| // RegInfo - some properties of a Region. |
| struct RegInfo { |
| RegInfo() : R(nullptr), HasBranch(false) {} |
| RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} |
| Region *R; |
| bool HasBranch; |
| SmallVector<SelectInst *, 8> Selects; |
| }; |
| |
| typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; |
| |
| // CHRScope - a sequence of regions to CHR together. It corresponds to a |
| // sequence of conditional blocks. It can have subscopes which correspond to |
| // nested conditional blocks. Nested CHRScopes form a tree. |
| class CHRScope { |
| public: |
| CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { |
| assert(RI.R && "Null RegionIn"); |
| RegInfos.push_back(RI); |
| } |
| |
| Region *getParentRegion() { |
| assert(RegInfos.size() > 0 && "Empty CHRScope"); |
| Region *Parent = RegInfos[0].R->getParent(); |
| assert(Parent && "Unexpected to call this on the top-level region"); |
| return Parent; |
| } |
| |
| BasicBlock *getEntryBlock() { |
| assert(RegInfos.size() > 0 && "Empty CHRScope"); |
| return RegInfos.front().R->getEntry(); |
| } |
| |
| BasicBlock *getExitBlock() { |
| assert(RegInfos.size() > 0 && "Empty CHRScope"); |
| return RegInfos.back().R->getExit(); |
| } |
| |
| bool appendable(CHRScope *Next) { |
| // The next scope is appendable only if this scope is directly connected to |
| // it (which implies it post-dominates this scope) and this scope dominates |
| // it (no edge to the next scope outside this scope). |
| BasicBlock *NextEntry = Next->getEntryBlock(); |
| if (getExitBlock() != NextEntry) |
| // Not directly connected. |
| return false; |
| Region *LastRegion = RegInfos.back().R; |
| for (BasicBlock *Pred : predecessors(NextEntry)) |
| if (!LastRegion->contains(Pred)) |
| // There's an edge going into the entry of the next scope from outside |
| // of this scope. |
| return false; |
| return true; |
| } |
| |
| void append(CHRScope *Next) { |
| assert(RegInfos.size() > 0 && "Empty CHRScope"); |
| assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); |
| assert(getParentRegion() == Next->getParentRegion() && |
| "Must be siblings"); |
| assert(getExitBlock() == Next->getEntryBlock() && |
| "Must be adjacent"); |
| for (RegInfo &RI : Next->RegInfos) |
| RegInfos.push_back(RI); |
| for (CHRScope *Sub : Next->Subs) |
| Subs.push_back(Sub); |
| } |
| |
| void addSub(CHRScope *SubIn) { |
| #ifndef NDEBUG |
| bool IsChild = false; |
| for (RegInfo &RI : RegInfos) |
| if (RI.R == SubIn->getParentRegion()) { |
| IsChild = true; |
| break; |
| } |
| assert(IsChild && "Must be a child"); |
| #endif |
| Subs.push_back(SubIn); |
| } |
| |
| // Split this scope at the boundary region into two, which will belong to the |
| // tail and returns the tail. |
| CHRScope *split(Region *Boundary) { |
| assert(Boundary && "Boundary null"); |
| assert(RegInfos.begin()->R != Boundary && |
| "Can't be split at beginning"); |
| auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), |
| [&Boundary](const RegInfo& RI) { |
| return Boundary == RI.R; |
| }); |
| if (BoundaryIt == RegInfos.end()) |
| return nullptr; |
| SmallVector<RegInfo, 8> TailRegInfos; |
| SmallVector<CHRScope *, 8> TailSubs; |
| TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); |
| RegInfos.resize(BoundaryIt - RegInfos.begin()); |
| DenseSet<Region *> TailRegionSet; |
| for (RegInfo &RI : TailRegInfos) |
| TailRegionSet.insert(RI.R); |
| for (auto It = Subs.begin(); It != Subs.end(); ) { |
| CHRScope *Sub = *It; |
| assert(Sub && "null Sub"); |
| Region *Parent = Sub->getParentRegion(); |
| if (TailRegionSet.count(Parent)) { |
| TailSubs.push_back(Sub); |
| It = Subs.erase(It); |
| } else { |
| assert(std::find_if(RegInfos.begin(), RegInfos.end(), |
| [&Parent](const RegInfo& RI) { |
| return Parent == RI.R; |
| }) != RegInfos.end() && |
| "Must be in head"); |
| ++It; |
| } |
| } |
| assert(HoistStopMap.empty() && "MapHoistStops must be empty"); |
| return new CHRScope(TailRegInfos, TailSubs); |
| } |
| |
| bool contains(Instruction *I) const { |
| BasicBlock *Parent = I->getParent(); |
| for (const RegInfo &RI : RegInfos) |
| if (RI.R->contains(Parent)) |
| return true; |
| return false; |
| } |
| |
| void print(raw_ostream &OS) const; |
| |
| SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope |
| SmallVector<CHRScope *, 8> Subs; // Subscopes. |
| |
| // The instruction at which to insert the CHR conditional branch (and hoist |
| // the dependent condition values). |
| Instruction *BranchInsertPoint; |
| |
| // True-biased and false-biased regions (conditional blocks), |
| // respectively. Used only for the outermost scope and includes regions in |
| // subscopes. The rest are unbiased. |
| DenseSet<Region *> TrueBiasedRegions; |
| DenseSet<Region *> FalseBiasedRegions; |
| // Among the biased regions, the regions that get CHRed. |
| SmallVector<RegInfo, 8> CHRRegions; |
| |
| // True-biased and false-biased selects, respectively. Used only for the |
| // outermost scope and includes ones in subscopes. |
| DenseSet<SelectInst *> TrueBiasedSelects; |
| DenseSet<SelectInst *> FalseBiasedSelects; |
| |
| // Map from one of the above regions to the instructions to stop |
| // hoisting instructions at through use-def chains. |
| HoistStopMapTy HoistStopMap; |
| |
| private: |
| CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, |
| SmallVector<CHRScope *, 8> &SubsIn) |
| : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} |
| }; |
| |
| class CHR { |
| public: |
| CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, |
| ProfileSummaryInfo &PSIin, RegionInfo &RIin, |
| OptimizationRemarkEmitter &OREin) |
| : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} |
| |
| ~CHR() { |
| for (CHRScope *Scope : Scopes) { |
| delete Scope; |
| } |
| } |
| |
| bool run(); |
| |
| private: |
| // See the comments in CHR::run() for the high level flow of the algorithm and |
| // what the following functions do. |
| |
| void findScopes(SmallVectorImpl<CHRScope *> &Output) { |
| Region *R = RI.getTopLevelRegion(); |
| CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); |
| if (Scope) { |
| Output.push_back(Scope); |
| } |
| } |
| CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, |
| SmallVectorImpl<CHRScope *> &Scopes); |
| CHRScope *findScope(Region *R); |
| void checkScopeHoistable(CHRScope *Scope); |
| |
| void splitScopes(SmallVectorImpl<CHRScope *> &Input, |
| SmallVectorImpl<CHRScope *> &Output); |
| SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, |
| CHRScope *Outer, |
| DenseSet<Value *> *OuterConditionValues, |
| Instruction *OuterInsertPoint, |
| SmallVectorImpl<CHRScope *> &Output, |
| DenseSet<Instruction *> &Unhoistables); |
| |
| void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); |
| void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); |
| |
| void filterScopes(SmallVectorImpl<CHRScope *> &Input, |
| SmallVectorImpl<CHRScope *> &Output); |
| |
| void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, |
| SmallVectorImpl<CHRScope *> &Output); |
| void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); |
| |
| void sortScopes(SmallVectorImpl<CHRScope *> &Input, |
| SmallVectorImpl<CHRScope *> &Output); |
| |
| void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); |
| void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); |
| void cloneScopeBlocks(CHRScope *Scope, |
| BasicBlock *PreEntryBlock, |
| BasicBlock *ExitBlock, |
| Region *LastRegion, |
| ValueToValueMapTy &VMap); |
| BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, |
| BasicBlock *EntryBlock, |
| BasicBlock *NewEntryBlock, |
| ValueToValueMapTy &VMap); |
| void fixupBranchesAndSelects(CHRScope *Scope, |
| BasicBlock *PreEntryBlock, |
| BranchInst *MergedBR, |
| uint64_t ProfileCount); |
| void fixupBranch(Region *R, |
| CHRScope *Scope, |
| IRBuilder<> &IRB, |
| Value *&MergedCondition, BranchProbability &CHRBranchBias); |
| void fixupSelect(SelectInst* SI, |
| CHRScope *Scope, |
| IRBuilder<> &IRB, |
| Value *&MergedCondition, BranchProbability &CHRBranchBias); |
| void addToMergedCondition(bool IsTrueBiased, Value *Cond, |
| Instruction *BranchOrSelect, |
| CHRScope *Scope, |
| IRBuilder<> &IRB, |
| Value *&MergedCondition); |
| |
| Function &F; |
| BlockFrequencyInfo &BFI; |
| DominatorTree &DT; |
| ProfileSummaryInfo &PSI; |
| RegionInfo &RI; |
| OptimizationRemarkEmitter &ORE; |
| CHRStats Stats; |
| |
| // All the true-biased regions in the function |
| DenseSet<Region *> TrueBiasedRegionsGlobal; |
| // All the false-biased regions in the function |
| DenseSet<Region *> FalseBiasedRegionsGlobal; |
| // All the true-biased selects in the function |
| DenseSet<SelectInst *> TrueBiasedSelectsGlobal; |
| // All the false-biased selects in the function |
| DenseSet<SelectInst *> FalseBiasedSelectsGlobal; |
| // A map from biased regions to their branch bias |
| DenseMap<Region *, BranchProbability> BranchBiasMap; |
| // A map from biased selects to their branch bias |
| DenseMap<SelectInst *, BranchProbability> SelectBiasMap; |
| // All the scopes. |
| DenseSet<CHRScope *> Scopes; |
| }; |
| |
| } // end anonymous namespace |
| |
| static inline |
| raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, |
| const CHRStats &Stats) { |
| Stats.print(OS); |
| return OS; |
| } |
| |
| static inline |
| raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { |
| Scope.print(OS); |
| return OS; |
| } |
| |
| static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { |
| if (ForceCHR) |
| return true; |
| |
| if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { |
| if (CHRModules.count(F.getParent()->getName())) |
| return true; |
| return CHRFunctions.count(F.getName()); |
| } |
| |
| assert(PSI.hasProfileSummary() && "Empty PSI?"); |
| return PSI.isFunctionEntryHot(&F); |
| } |
| |
| static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, |
| CHRStats *Stats) { |
| StringRef FuncName = F.getName(); |
| StringRef ModuleName = F.getParent()->getName(); |
| (void)(FuncName); // Unused in release build. |
| (void)(ModuleName); // Unused in release build. |
| CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " |
| << FuncName); |
| if (Stats) |
| CHR_DEBUG(dbgs() << " " << *Stats); |
| CHR_DEBUG(dbgs() << "\n"); |
| CHR_DEBUG(F.dump()); |
| } |
| |
| void CHRScope::print(raw_ostream &OS) const { |
| assert(RegInfos.size() > 0 && "Empty CHRScope"); |
| OS << "CHRScope["; |
| OS << RegInfos.size() << ", Regions["; |
| for (const RegInfo &RI : RegInfos) { |
| OS << RI.R->getNameStr(); |
| if (RI.HasBranch) |
| OS << " B"; |
| if (RI.Selects.size() > 0) |
| OS << " S" << RI.Selects.size(); |
| OS << ", "; |
| } |
| if (RegInfos[0].R->getParent()) { |
| OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); |
| } else { |
| // top level region |
| OS << "]"; |
| } |
| OS << ", Subs["; |
| for (CHRScope *Sub : Subs) { |
| OS << *Sub << ", "; |
| } |
| OS << "]]"; |
| } |
| |
| // Return true if the given instruction type can be hoisted by CHR. |
| static bool isHoistableInstructionType(Instruction *I) { |
| return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || |
| isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || |
| isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || |
| isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || |
| isa<InsertValueInst>(I); |
| } |
| |
| // Return true if the given instruction can be hoisted by CHR. |
| static bool isHoistable(Instruction *I, DominatorTree &DT) { |
| if (!isHoistableInstructionType(I)) |
| return false; |
| return isSafeToSpeculativelyExecute(I, nullptr, &DT); |
| } |
| |
| // Recursively traverse the use-def chains of the given value and return a set |
| // of the unhoistable base values defined within the scope (excluding the |
| // first-region entry block) or the (hoistable or unhoistable) base values that |
| // are defined outside (including the first-region entry block) of the |
| // scope. The returned set doesn't include constants. |
| static std::set<Value *> getBaseValues( |
| Value *V, DominatorTree &DT, |
| DenseMap<Value *, std::set<Value *>> &Visited) { |
| if (Visited.count(V)) { |
| return Visited[V]; |
| } |
| std::set<Value *> Result; |
| if (auto *I = dyn_cast<Instruction>(V)) { |
| // We don't stop at a block that's not in the Scope because we would miss some |
| // instructions that are based on the same base values if we stop there. |
| if (!isHoistable(I, DT)) { |
| Result.insert(I); |
| Visited.insert(std::make_pair(V, Result)); |
| return Result; |
| } |
| // I is hoistable above the Scope. |
| for (Value *Op : I->operands()) { |
| std::set<Value *> OpResult = getBaseValues(Op, DT, Visited); |
| Result.insert(OpResult.begin(), OpResult.end()); |
| } |
| Visited.insert(std::make_pair(V, Result)); |
| return Result; |
| } |
| if (isa<Argument>(V)) { |
| Result.insert(V); |
| Visited.insert(std::make_pair(V, Result)); |
| return Result; |
| } |
| // We don't include others like constants because those won't lead to any |
| // chance of folding of conditions (eg two bit checks merged into one check) |
| // after CHR. |
| Visited.insert(std::make_pair(V, Result)); |
| return Result; // empty |
| } |
| |
| // Return true if V is already hoisted or can be hoisted (along with its |
| // operands) above the insert point. When it returns true and HoistStops is |
| // non-null, the instructions to stop hoisting at through the use-def chains are |
| // inserted into HoistStops. |
| static bool |
| checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, |
| DenseSet<Instruction *> &Unhoistables, |
| DenseSet<Instruction *> *HoistStops, |
| DenseMap<Instruction *, bool> &Visited) { |
| assert(InsertPoint && "Null InsertPoint"); |
| if (auto *I = dyn_cast<Instruction>(V)) { |
| if (Visited.count(I)) { |
| return Visited[I]; |
| } |
| assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); |
| assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); |
| if (Unhoistables.count(I)) { |
| // Don't hoist if they are not to be hoisted. |
| Visited[I] = false; |
| return false; |
| } |
| if (DT.dominates(I, InsertPoint)) { |
| // We are already above the insert point. Stop here. |
| if (HoistStops) |
| HoistStops->insert(I); |
| Visited[I] = true; |
| return true; |
| } |
| // We aren't not above the insert point, check if we can hoist it above the |
| // insert point. |
| if (isHoistable(I, DT)) { |
| // Check operands first. |
| DenseSet<Instruction *> OpsHoistStops; |
| bool AllOpsHoisted = true; |
| for (Value *Op : I->operands()) { |
| if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, |
| Visited)) { |
| AllOpsHoisted = false; |
| break; |
| } |
| } |
| if (AllOpsHoisted) { |
| CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); |
| if (HoistStops) |
| HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); |
| Visited[I] = true; |
| return true; |
| } |
| } |
| Visited[I] = false; |
| return false; |
| } |
| // Non-instructions are considered hoistable. |
| return true; |
| } |
| |
| // Returns true and sets the true probability and false probability of an |
| // MD_prof metadata if it's well-formed. |
| static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, |
| BranchProbability &FalseProb) { |
| if (!MD) return false; |
| MDString *MDName = cast<MDString>(MD->getOperand(0)); |
| if (MDName->getString() != "branch_weights" || |
| MD->getNumOperands() != 3) |
| return false; |
| ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); |
| ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); |
| if (!TrueWeight || !FalseWeight) |
| return false; |
| uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); |
| uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); |
| uint64_t SumWt = TrueWt + FalseWt; |
| |
| assert(SumWt >= TrueWt && SumWt >= FalseWt && |
| "Overflow calculating branch probabilities."); |
| |
| // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. |
| if (SumWt == 0) |
| return false; |
| |
| TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); |
| FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); |
| return true; |
| } |
| |
| static BranchProbability getCHRBiasThreshold() { |
| return BranchProbability::getBranchProbability( |
| static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); |
| } |
| |
| // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= |
| // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= |
| // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return |
| // false. |
| template <typename K, typename S, typename M> |
| static bool checkBias(K *Key, BranchProbability TrueProb, |
| BranchProbability FalseProb, S &TrueSet, S &FalseSet, |
| M &BiasMap) { |
| BranchProbability Threshold = getCHRBiasThreshold(); |
| if (TrueProb >= Threshold) { |
| TrueSet.insert(Key); |
| BiasMap[Key] = TrueProb; |
| return true; |
| } else if (FalseProb >= Threshold) { |
| FalseSet.insert(Key); |
| BiasMap[Key] = FalseProb; |
| return true; |
| } |
| return false; |
| } |
| |
| // Returns true and insert a region into the right biased set and the map if the |
| // branch of the region is biased. |
| static bool checkBiasedBranch(BranchInst *BI, Region *R, |
| DenseSet<Region *> &TrueBiasedRegionsGlobal, |
| DenseSet<Region *> &FalseBiasedRegionsGlobal, |
| DenseMap<Region *, BranchProbability> &BranchBiasMap) { |
| if (!BI->isConditional()) |
| return false; |
| BranchProbability ThenProb, ElseProb; |
| if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), |
| ThenProb, ElseProb)) |
| return false; |
| BasicBlock *IfThen = BI->getSuccessor(0); |
| BasicBlock *IfElse = BI->getSuccessor(1); |
| assert((IfThen == R->getExit() || IfElse == R->getExit()) && |
| IfThen != IfElse && |
| "Invariant from findScopes"); |
| if (IfThen == R->getExit()) { |
| // Swap them so that IfThen/ThenProb means going into the conditional code |
| // and IfElse/ElseProb means skipping it. |
| std::swap(IfThen, IfElse); |
| std::swap(ThenProb, ElseProb); |
| } |
| CHR_DEBUG(dbgs() << "BI " << *BI << " "); |
| CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); |
| CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); |
| return checkBias(R, ThenProb, ElseProb, |
| TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, |
| BranchBiasMap); |
| } |
| |
| // Returns true and insert a select into the right biased set and the map if the |
| // select is biased. |
| static bool checkBiasedSelect( |
| SelectInst *SI, Region *R, |
| DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, |
| DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, |
| DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { |
| BranchProbability TrueProb, FalseProb; |
| if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), |
| TrueProb, FalseProb)) |
| return false; |
| CHR_DEBUG(dbgs() << "SI " << *SI << " "); |
| CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); |
| CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); |
| return checkBias(SI, TrueProb, FalseProb, |
| TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, |
| SelectBiasMap); |
| } |
| |
| // Returns the instruction at which to hoist the dependent condition values and |
| // insert the CHR branch for a region. This is the terminator branch in the |
| // entry block or the first select in the entry block, if any. |
| static Instruction* getBranchInsertPoint(RegInfo &RI) { |
| Region *R = RI.R; |
| BasicBlock *EntryBB = R->getEntry(); |
| // The hoist point is by default the terminator of the entry block, which is |
| // the same as the branch instruction if RI.HasBranch is true. |
| Instruction *HoistPoint = EntryBB->getTerminator(); |
| for (SelectInst *SI : RI.Selects) { |
| if (SI->getParent() == EntryBB) { |
| // Pick the first select in Selects in the entry block. Note Selects is |
| // sorted in the instruction order within a block (asserted below). |
| HoistPoint = SI; |
| break; |
| } |
| } |
| assert(HoistPoint && "Null HoistPoint"); |
| #ifndef NDEBUG |
| // Check that HoistPoint is the first one in Selects in the entry block, |
| // if any. |
| DenseSet<Instruction *> EntryBlockSelectSet; |
| for (SelectInst *SI : RI.Selects) { |
| if (SI->getParent() == EntryBB) { |
| EntryBlockSelectSet.insert(SI); |
| } |
| } |
| for (Instruction &I : *EntryBB) { |
| if (EntryBlockSelectSet.count(&I) > 0) { |
| assert(&I == HoistPoint && |
| "HoistPoint must be the first one in Selects"); |
| break; |
| } |
| } |
| #endif |
| return HoistPoint; |
| } |
| |
| // Find a CHR scope in the given region. |
| CHRScope * CHR::findScope(Region *R) { |
| CHRScope *Result = nullptr; |
| BasicBlock *Entry = R->getEntry(); |
| BasicBlock *Exit = R->getExit(); // null if top level. |
| assert(Entry && "Entry must not be null"); |
| assert((Exit == nullptr) == (R->isTopLevelRegion()) && |
| "Only top level region has a null exit"); |
| if (Entry) |
| CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); |
| else |
| CHR_DEBUG(dbgs() << "Entry null\n"); |
| if (Exit) |
| CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); |
| else |
| CHR_DEBUG(dbgs() << "Exit null\n"); |
| // Exclude cases where Entry is part of a subregion (hence it doesn't belong |
| // to this region). |
| bool EntryInSubregion = RI.getRegionFor(Entry) != R; |
| if (EntryInSubregion) |
| return nullptr; |
| // Exclude loops |
| for (BasicBlock *Pred : predecessors(Entry)) |
| if (R->contains(Pred)) |
| return nullptr; |
| if (Exit) { |
| // Try to find an if-then block (check if R is an if-then). |
| // if (cond) { |
| // ... |
| // } |
| auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); |
| if (BI) |
| CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); |
| else |
| CHR_DEBUG(dbgs() << "BI null\n"); |
| if (BI && BI->isConditional()) { |
| BasicBlock *S0 = BI->getSuccessor(0); |
| BasicBlock *S1 = BI->getSuccessor(1); |
| CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); |
| CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); |
| if (S0 != S1 && (S0 == Exit || S1 == Exit)) { |
| RegInfo RI(R); |
| RI.HasBranch = checkBiasedBranch( |
| BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, |
| BranchBiasMap); |
| Result = new CHRScope(RI); |
| Scopes.insert(Result); |
| CHR_DEBUG(dbgs() << "Found a region with a branch\n"); |
| ++Stats.NumBranches; |
| if (!RI.HasBranch) { |
| ORE.emit([&]() { |
| return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) |
| << "Branch not biased"; |
| }); |
| } |
| } |
| } |
| } |
| { |
| // Try to look for selects in the direct child blocks (as opposed to in |
| // subregions) of R. |
| // ... |
| // if (..) { // Some subregion |
| // ... |
| // } |
| // if (..) { // Some subregion |
| // ... |
| // } |
| // ... |
| // a = cond ? b : c; |
| // ... |
| SmallVector<SelectInst *, 8> Selects; |
| for (RegionNode *E : R->elements()) { |
| if (E->isSubRegion()) |
| continue; |
| // This returns the basic block of E if E is a direct child of R (not a |
| // subregion.) |
| BasicBlock *BB = E->getEntry(); |
| // Need to push in the order to make it easier to find the first Select |
| // later. |
| for (Instruction &I : *BB) { |
| if (auto *SI = dyn_cast<SelectInst>(&I)) { |
| Selects.push_back(SI); |
| ++Stats.NumBranches; |
| } |
| } |
| } |
| if (Selects.size() > 0) { |
| auto AddSelects = [&](RegInfo &RI) { |
| for (auto *SI : Selects) |
| if (checkBiasedSelect(SI, RI.R, |
| TrueBiasedSelectsGlobal, |
| FalseBiasedSelectsGlobal, |
| SelectBiasMap)) |
| RI.Selects.push_back(SI); |
| else |
| ORE.emit([&]() { |
| return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) |
| << "Select not biased"; |
| }); |
| }; |
| if (!Result) { |
| CHR_DEBUG(dbgs() << "Found a select-only region\n"); |
| RegInfo RI(R); |
| AddSelects(RI); |
| Result = new CHRScope(RI); |
| Scopes.insert(Result); |
| } else { |
| CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); |
| AddSelects(Result->RegInfos[0]); |
| } |
| } |
| } |
| |
| if (Result) { |
| checkScopeHoistable(Result); |
| } |
| return Result; |
| } |
| |
| // Check that any of the branch and the selects in the region could be |
| // hoisted above the the CHR branch insert point (the most dominating of |
| // them, either the branch (at the end of the first block) or the first |
| // select in the first block). If the branch can't be hoisted, drop the |
| // selects in the first blocks. |
| // |
| // For example, for the following scope/region with selects, we want to insert |
| // the merged branch right before the first select in the first/entry block by |
| // hoisting c1, c2, c3, and c4. |
| // |
| // // Branch insert point here. |
| // a = c1 ? b : c; // Select 1 |
| // d = c2 ? e : f; // Select 2 |
| // if (c3) { // Branch |
| // ... |
| // c4 = foo() // A call. |
| // g = c4 ? h : i; // Select 3 |
| // } |
| // |
| // But suppose we can't hoist c4 because it's dependent on the preceding |
| // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop |
| // Select 2. If we can't hoist c3, we drop Selects 1 & 2. |
| void CHR::checkScopeHoistable(CHRScope *Scope) { |
| RegInfo &RI = Scope->RegInfos[0]; |
| Region *R = RI.R; |
| BasicBlock *EntryBB = R->getEntry(); |
| auto *Branch = RI.HasBranch ? |
| cast<BranchInst>(EntryBB->getTerminator()) : nullptr; |
| SmallVector<SelectInst *, 8> &Selects = RI.Selects; |
| if (RI.HasBranch || !Selects.empty()) { |
| Instruction *InsertPoint = getBranchInsertPoint(RI); |
| CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); |
| // Avoid a data dependence from a select or a branch to a(nother) |
| // select. Note no instruction can't data-depend on a branch (a branch |
| // instruction doesn't produce a value). |
| DenseSet<Instruction *> Unhoistables; |
| // Initialize Unhoistables with the selects. |
| for (SelectInst *SI : Selects) { |
| Unhoistables.insert(SI); |
| } |
| // Remove Selects that can't be hoisted. |
| for (auto it = Selects.begin(); it != Selects.end(); ) { |
| SelectInst *SI = *it; |
| if (SI == InsertPoint) { |
| ++it; |
| continue; |
| } |
| DenseMap<Instruction *, bool> Visited; |
| bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, |
| DT, Unhoistables, nullptr, Visited); |
| if (!IsHoistable) { |
| CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); |
| ORE.emit([&]() { |
| return OptimizationRemarkMissed(DEBUG_TYPE, |
| "DropUnhoistableSelect", SI) |
| << "Dropped unhoistable select"; |
| }); |
| it = Selects.erase(it); |
| // Since we are dropping the select here, we also drop it from |
| // Unhoistables. |
| Unhoistables.erase(SI); |
| } else |
| ++it; |
| } |
| // Update InsertPoint after potentially removing selects. |
| InsertPoint = getBranchInsertPoint(RI); |
| CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); |
| if (RI.HasBranch && InsertPoint != Branch) { |
| DenseMap<Instruction *, bool> Visited; |
| bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, |
| DT, Unhoistables, nullptr, Visited); |
| if (!IsHoistable) { |
| // If the branch isn't hoistable, drop the selects in the entry |
| // block, preferring the branch, which makes the branch the hoist |
| // point. |
| assert(InsertPoint != Branch && "Branch must not be the hoist point"); |
| CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); |
| CHR_DEBUG( |
| for (SelectInst *SI : Selects) { |
| dbgs() << "SI " << *SI << "\n"; |
| }); |
| for (SelectInst *SI : Selects) { |
| ORE.emit([&]() { |
| return OptimizationRemarkMissed(DEBUG_TYPE, |
| "DropSelectUnhoistableBranch", SI) |
| << "Dropped select due to unhoistable branch"; |
| }); |
| } |
| Selects.erase(std::remove_if(Selects.begin(), Selects.end(), |
| [EntryBB](SelectInst *SI) { |
| return SI->getParent() == EntryBB; |
| }), Selects.end()); |
| Unhoistables.clear(); |
| InsertPoint = Branch; |
| } |
| } |
| CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); |
| #ifndef NDEBUG |
| if (RI.HasBranch) { |
| assert(!DT.dominates(Branch, InsertPoint) && |
| "Branch can't be already above the hoist point"); |
| DenseMap<Instruction *, bool> Visited; |
| assert(checkHoistValue(Branch->getCondition(), InsertPoint, |
| DT, Unhoistables, nullptr, Visited) && |
| "checkHoistValue for branch"); |
| } |
| for (auto *SI : Selects) { |
| assert(!DT.dominates(SI, InsertPoint) && |
| "SI can't be already above the hoist point"); |
| DenseMap<Instruction *, bool> Visited; |
| assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, |
| Unhoistables, nullptr, Visited) && |
| "checkHoistValue for selects"); |
| } |
| CHR_DEBUG(dbgs() << "Result\n"); |
| if (RI.HasBranch) { |
| CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); |
| } |
| for (auto *SI : Selects) { |
| CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); |
| } |
| #endif |
| } |
| } |
| |
| // Traverse the region tree, find all nested scopes and merge them if possible. |
| CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, |
| SmallVectorImpl<CHRScope *> &Scopes) { |
| CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); |
| CHRScope *Result = findScope(R); |
| // Visit subscopes. |
| CHRScope *ConsecutiveSubscope = nullptr; |
| SmallVector<CHRScope *, 8> Subscopes; |
| for (auto It = R->begin(); It != R->end(); ++It) { |
| const std::unique_ptr<Region> &SubR = *It; |
| auto NextIt = std::next(It); |
| Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; |
| CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() |
| << "\n"); |
| CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); |
| if (SubCHRScope) { |
| CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); |
| } else { |
| CHR_DEBUG(dbgs() << "Subregion Scope null\n"); |
| } |
| if (SubCHRScope) { |
| if (!ConsecutiveSubscope) |
| ConsecutiveSubscope = SubCHRScope; |
| else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { |
| Subscopes.push_back(ConsecutiveSubscope); |
| ConsecutiveSubscope = SubCHRScope; |
| } else |
| ConsecutiveSubscope->append(SubCHRScope); |
| } else { |
| if (ConsecutiveSubscope) { |
| Subscopes.push_back(ConsecutiveSubscope); |
| } |
| ConsecutiveSubscope = nullptr; |
| } |
| } |
| if (ConsecutiveSubscope) { |
| Subscopes.push_back(ConsecutiveSubscope); |
| } |
| for (CHRScope *Sub : Subscopes) { |
| if (Result) { |
| // Combine it with the parent. |
| Result->addSub(Sub); |
| } else { |
| // Push Subscopes as they won't be combined with the parent. |
| Scopes.push_back(Sub); |
| } |
| } |
| return Result; |
| } |
| |
| static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { |
| DenseSet<Value *> ConditionValues; |
| if (RI.HasBranch) { |
| auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); |
| ConditionValues.insert(BI->getCondition()); |
| } |
| for (SelectInst *SI : RI.Selects) { |
| ConditionValues.insert(SI->getCondition()); |
| } |
| return ConditionValues; |
| } |
| |
| |
| // Determine whether to split a scope depending on the sets of the branch |
| // condition values of the previous region and the current region. We split |
| // (return true) it if 1) the condition values of the inner/lower scope can't be |
| // hoisted up to the outer/upper scope, or 2) the two sets of the condition |
| // values have an empty intersection (because the combined branch conditions |
| // won't probably lead to a simpler combined condition). |
| static bool shouldSplit(Instruction *InsertPoint, |
| DenseSet<Value *> &PrevConditionValues, |
| DenseSet<Value *> &ConditionValues, |
| DominatorTree &DT, |
| DenseSet<Instruction *> &Unhoistables) { |
| assert(InsertPoint && "Null InsertPoint"); |
| CHR_DEBUG( |
| dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; |
| for (Value *V : PrevConditionValues) { |
| dbgs() << *V << ", "; |
| } |
| dbgs() << " ConditionValues "; |
| for (Value *V : ConditionValues) { |
| dbgs() << *V << ", "; |
| } |
| dbgs() << "\n"); |
| // If any of Bases isn't hoistable to the hoist point, split. |
| for (Value *V : ConditionValues) { |
| DenseMap<Instruction *, bool> Visited; |
| if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { |
| CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); |
| return true; // Not hoistable, split. |
| } |
| } |
| // If PrevConditionValues or ConditionValues is empty, don't split to avoid |
| // unnecessary splits at scopes with no branch/selects. If |
| // PrevConditionValues and ConditionValues don't intersect at all, split. |
| if (!PrevConditionValues.empty() && !ConditionValues.empty()) { |
| // Use std::set as DenseSet doesn't work with set_intersection. |
| std::set<Value *> PrevBases, Bases; |
| DenseMap<Value *, std::set<Value *>> Visited; |
| for (Value *V : PrevConditionValues) { |
| std::set<Value *> BaseValues = getBaseValues(V, DT, Visited); |
| PrevBases.insert(BaseValues.begin(), BaseValues.end()); |
| } |
| for (Value *V : ConditionValues) { |
| std::set<Value *> BaseValues = getBaseValues(V, DT, Visited); |
| Bases.insert(BaseValues.begin(), BaseValues.end()); |
| } |
| CHR_DEBUG( |
| dbgs() << "PrevBases "; |
| for (Value *V : PrevBases) { |
| dbgs() << *V << ", "; |
| } |
| dbgs() << " Bases "; |
| for (Value *V : Bases) { |
| dbgs() << *V << ", "; |
| } |
| dbgs() << "\n"); |
| std::set<Value *> Intersection; |
| std::set_intersection(PrevBases.begin(), PrevBases.end(), |
| Bases.begin(), Bases.end(), |
| std::inserter(Intersection, Intersection.begin())); |
| if (Intersection.empty()) { |
| // Empty intersection, split. |
| CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); |
| return true; |
| } |
| } |
| CHR_DEBUG(dbgs() << "No split\n"); |
| return false; // Don't split. |
| } |
| |
| static void getSelectsInScope(CHRScope *Scope, |
| DenseSet<Instruction *> &Output) { |
| for (RegInfo &RI : Scope->RegInfos) |
| for (SelectInst *SI : RI.Selects) |
| Output.insert(SI); |
| for (CHRScope *Sub : Scope->Subs) |
| getSelectsInScope(Sub, Output); |
| } |
| |
| void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, |
| SmallVectorImpl<CHRScope *> &Output) { |
| for (CHRScope *Scope : Input) { |
| assert(!Scope->BranchInsertPoint && |
| "BranchInsertPoint must not be set"); |
| DenseSet<Instruction *> Unhoistables; |
| getSelectsInScope(Scope, Unhoistables); |
| splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); |
| } |
| #ifndef NDEBUG |
| for (CHRScope *Scope : Output) { |
| assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); |
| } |
| #endif |
| } |
| |
| SmallVector<CHRScope *, 8> CHR::splitScope( |
| CHRScope *Scope, |
| CHRScope *Outer, |
| DenseSet<Value *> *OuterConditionValues, |
| Instruction *OuterInsertPoint, |
| SmallVectorImpl<CHRScope *> &Output, |
| DenseSet<Instruction *> &Unhoistables) { |
| if (Outer) { |
| assert(OuterConditionValues && "Null OuterConditionValues"); |
| assert(OuterInsertPoint && "Null OuterInsertPoint"); |
| } |
| bool PrevSplitFromOuter = true; |
| DenseSet<Value *> PrevConditionValues; |
| Instruction *PrevInsertPoint = nullptr; |
| SmallVector<CHRScope *, 8> Splits; |
| SmallVector<bool, 8> SplitsSplitFromOuter; |
| SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; |
| SmallVector<Instruction *, 8> SplitsInsertPoints; |
| SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy |
| for (RegInfo &RI : RegInfos) { |
| Instruction *InsertPoint = getBranchInsertPoint(RI); |
| DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); |
| CHR_DEBUG( |
| dbgs() << "ConditionValues "; |
| for (Value *V : ConditionValues) { |
| dbgs() << *V << ", "; |
| } |
| dbgs() << "\n"); |
| if (RI.R == RegInfos[0].R) { |
| // First iteration. Check to see if we should split from the outer. |
| if (Outer) { |
| CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); |
| CHR_DEBUG(dbgs() << "Should split from outer at " |
| << RI.R->getNameStr() << "\n"); |
| if (shouldSplit(OuterInsertPoint, *OuterConditionValues, |
| ConditionValues, DT, Unhoistables)) { |
| PrevConditionValues = ConditionValues; |
| PrevInsertPoint = InsertPoint; |
| ORE.emit([&]() { |
| return OptimizationRemarkMissed(DEBUG_TYPE, |
| "SplitScopeFromOuter", |
| RI.R->getEntry()->getTerminator()) |
| << "Split scope from outer due to unhoistable branch/select " |
| << "and/or lack of common condition values"; |
| }); |
| } else { |
| // Not splitting from the outer. Use the outer bases and insert |
| // point. Union the bases. |
| PrevSplitFromOuter = false; |
| PrevConditionValues = *OuterConditionValues; |
| PrevConditionValues.insert(ConditionValues.begin(), |
| ConditionValues.end()); |
| PrevInsertPoint = OuterInsertPoint; |
| } |
| } else { |
| CHR_DEBUG(dbgs() << "Outer null\n"); |
| PrevConditionValues = ConditionValues; |
| PrevInsertPoint = InsertPoint; |
| } |
| } else { |
| CHR_DEBUG(dbgs() << "Should split from prev at " |
| << RI.R->getNameStr() << "\n"); |
| if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, |
| DT, Unhoistables)) { |
| CHRScope *Tail = Scope->split(RI.R); |
| Scopes.insert(Tail); |
| Splits.push_back(Scope); |
| SplitsSplitFromOuter.push_back(PrevSplitFromOuter); |
| SplitsConditionValues.push_back(PrevConditionValues); |
| SplitsInsertPoints.push_back(PrevInsertPoint); |
| Scope = Tail; |
| PrevConditionValues = ConditionValues; |
| PrevInsertPoint = InsertPoint; |
| PrevSplitFromOuter = true; |
| ORE.emit([&]() { |
| return OptimizationRemarkMissed(DEBUG_TYPE, |
| "SplitScopeFromPrev", |
| RI.R->getEntry()->getTerminator()) |
| << "Split scope from previous due to unhoistable branch/select " |
| << "and/or lack of common condition values"; |
| }); |
| } else { |
| // Not splitting. Union the bases. Keep the hoist point. |
| PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); |
| } |
| } |
| } |
| Splits.push_back(Scope); |
| SplitsSplitFromOuter.push_back(PrevSplitFromOuter); |
| SplitsConditionValues.push_back(PrevConditionValues); |
| assert(PrevInsertPoint && "Null PrevInsertPoint"); |
| SplitsInsertPoints.push_back(PrevInsertPoint); |
| assert(Splits.size() == SplitsConditionValues.size() && |
| Splits.size() == SplitsSplitFromOuter.size() && |
| Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); |
| for (size_t I = 0; I < Splits.size(); ++I) { |
| CHRScope *Split = Splits[I]; |
| DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; |
| Instruction *SplitInsertPoint = SplitsInsertPoints[I]; |
| SmallVector<CHRScope *, 8> NewSubs; |
| DenseSet<Instruction *> SplitUnhoistables; |
| getSelectsInScope(Split, SplitUnhoistables); |
| for (CHRScope *Sub : Split->Subs) { |
| SmallVector<CHRScope *, 8> SubSplits = splitScope( |
| Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, |
| SplitUnhoistables); |
| NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); |
| } |
| Split->Subs = NewSubs; |
| } |
| SmallVector<CHRScope *, 8> Result; |
| for (size_t I = 0; I < Splits.size(); ++I) { |
| CHRScope *Split = Splits[I]; |
| if (SplitsSplitFromOuter[I]) { |
| // Split from the outer. |
| Output.push_back(Split); |
| Split->BranchInsertPoint = SplitsInsertPoints[I]; |
| CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] |
| << "\n"); |
| } else { |
| // Connected to the outer. |
| Result.push_back(Split); |
| } |
| } |
| if (!Outer) |
| assert(Result.empty() && |
| "If no outer (top-level), must return no nested ones"); |
| return Result; |
| } |
| |
| void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { |
| for (CHRScope *Scope : Scopes) { |
| assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); |
| classifyBiasedScopes(Scope, Scope); |
| CHR_DEBUG( |
| dbgs() << "classifyBiasedScopes " << *Scope << "\n"; |
| dbgs() << "TrueBiasedRegions "; |
| for (Region *R : Scope->TrueBiasedRegions) { |
| dbgs() << R->getNameStr() << ", "; |
| } |
| dbgs() << "\n"; |
| dbgs() << "FalseBiasedRegions "; |
| for (Region *R : Scope->FalseBiasedRegions) { |
| dbgs() << R->getNameStr() << ", "; |
| } |
| dbgs() << "\n"; |
| dbgs() << "TrueBiasedSelects "; |
| for (SelectInst *SI : Scope->TrueBiasedSelects) { |
| dbgs() << *SI << ", "; |
| } |
| dbgs() << "\n"; |
| dbgs() << "FalseBiasedSelects "; |
| for (SelectInst *SI : Scope->FalseBiasedSelects) { |
| dbgs() << *SI << ", "; |
| } |
| dbgs() << "\n";); |
| } |
| } |
| |
| void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { |
| for (RegInfo &RI : Scope->RegInfos) { |
| if (RI.HasBranch) { |
| Region *R = RI.R; |
| if (TrueBiasedRegionsGlobal.count(R) > 0) |
| OutermostScope->TrueBiasedRegions.insert(R); |
| else if (FalseBiasedRegionsGlobal.count(R) > 0) |
| OutermostScope->FalseBiasedRegions.insert(R); |
| else |
| llvm_unreachable("Must be biased"); |
| } |
| for (SelectInst *SI : RI.Selects) { |
| if (TrueBiasedSelectsGlobal.count(SI) > 0) |
| OutermostScope->TrueBiasedSelects.insert(SI); |
| else if (FalseBiasedSelectsGlobal.count(SI) > 0) |
| OutermostScope->FalseBiasedSelects.insert(SI); |
| else |
| llvm_unreachable("Must be biased"); |
| } |
| } |
| for (CHRScope *Sub : Scope->Subs) { |
| classifyBiasedScopes(Sub, OutermostScope); |
| } |
| } |
| |
| static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { |
| unsigned NumBiased = Scope->TrueBiasedRegions.size() + |
| Scope->FalseBiasedRegions.size() + |
| Scope->TrueBiasedSelects.size() + |
| Scope->FalseBiasedSelects.size(); |
| return NumBiased >= CHRMergeThreshold; |
| } |
| |
| void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, |
| SmallVectorImpl<CHRScope *> &Output) { |
| for (CHRScope *Scope : Input) { |
| // Filter out the ones with only one region and no subs. |
| if (!hasAtLeastTwoBiasedBranches(Scope)) { |
| CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " |
| << Scope->TrueBiasedRegions.size() |
| << " falsy-regions " << Scope->FalseBiasedRegions.size() |
| << " true-selects " << Scope->TrueBiasedSelects.size() |
| << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); |
| ORE.emit([&]() { |
| return OptimizationRemarkMissed( |
| DEBUG_TYPE, |
| "DropScopeWithOneBranchOrSelect", |
| Scope->RegInfos[0].R->getEntry()->getTerminator()) |
| << "Drop scope with < " |
| << ore::NV("CHRMergeThreshold", CHRMergeThreshold) |
| << " biased branch(es) or select(s)"; |
| }); |
| continue; |
| } |
| Output.push_back(Scope); |
| } |
| } |
| |
| void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, |
| SmallVectorImpl<CHRScope *> &Output) { |
| for (CHRScope *Scope : Input) { |
| assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && |
| "Empty"); |
| setCHRRegions(Scope, Scope); |
| Output.push_back(Scope); |
| CHR_DEBUG( |
| dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; |
| for (auto pair : Scope->HoistStopMap) { |
| Region *R = pair.first; |
| dbgs() << "Region " << R->getNameStr() << "\n"; |
| for (Instruction *I : pair.second) { |
| dbgs() << "HoistStop " << *I << "\n"; |
| } |
| } |
| dbgs() << "CHRRegions" << "\n"; |
| for (RegInfo &RI : Scope->CHRRegions) { |
| dbgs() << RI.R->getNameStr() << "\n"; |
| }); |
| } |
| } |
| |
| void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { |
| DenseSet<Instruction *> Unhoistables; |
| // Put the biased selects in Unhoistables because they should stay where they |
| // are and constant-folded after CHR (in case one biased select or a branch |
| // can depend on another biased select.) |
| for (RegInfo &RI : Scope->RegInfos) { |
| for (SelectInst *SI : RI.Selects) { |
| Unhoistables.insert(SI); |
| } |
| } |
| Instruction *InsertPoint = OutermostScope->BranchInsertPoint; |
| for (RegInfo &RI : Scope->RegInfos) { |
| Region *R = RI.R; |
| DenseSet<Instruction *> HoistStops; |
| bool IsHoisted = false; |
| if (RI.HasBranch) { |
| assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || |
| OutermostScope->FalseBiasedRegions.count(R) > 0) && |
| "Must be truthy or falsy"); |
| auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); |
| // Note checkHoistValue fills in HoistStops. |
| DenseMap<Instruction *, bool> Visited; |
| bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, |
| Unhoistables, &HoistStops, Visited); |
| assert(IsHoistable && "Must be hoistable"); |
| (void)(IsHoistable); // Unused in release build |
| IsHoisted = true; |
| } |
| for (SelectInst *SI : RI.Selects) { |
| assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || |
| OutermostScope->FalseBiasedSelects.count(SI) > 0) && |
| "Must be true or false biased"); |
| // Note checkHoistValue fills in HoistStops. |
| DenseMap<Instruction *, bool> Visited; |
| bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, |
| Unhoistables, &HoistStops, Visited); |
| assert(IsHoistable && "Must be hoistable"); |
| (void)(IsHoistable); // Unused in release build |
| IsHoisted = true; |
| } |
| if (IsHoisted) { |
| OutermostScope->CHRRegions.push_back(RI); |
| OutermostScope->HoistStopMap[R] = HoistStops; |
| } |
| } |
| for (CHRScope *Sub : Scope->Subs) |
| setCHRRegions(Sub, OutermostScope); |
| } |
| |
| bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { |
| return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); |
| } |
| |
| void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, |
| SmallVectorImpl<CHRScope *> &Output) { |
| Output.resize(Input.size()); |
| llvm::copy(Input, Output.begin()); |
| llvm::stable_sort(Output, CHRScopeSorter); |
| } |
| |
| // Return true if V is already hoisted or was hoisted (along with its operands) |
| // to the insert point. |
| static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, |
| HoistStopMapTy &HoistStopMap, |
| DenseSet<Instruction *> &HoistedSet, |
| DenseSet<PHINode *> &TrivialPHIs, |
| DominatorTree &DT) { |
| auto IT = HoistStopMap.find(R); |
| assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); |
| DenseSet<Instruction *> &HoistStops = IT->second; |
| if (auto *I = dyn_cast<Instruction>(V)) { |
| if (I == HoistPoint) |
| return; |
| if (HoistStops.count(I)) |
| return; |
| if (auto *PN = dyn_cast<PHINode>(I)) |
| if (TrivialPHIs.count(PN)) |
| // The trivial phi inserted by the previous CHR scope could replace a |
| // non-phi in HoistStops. Note that since this phi is at the exit of a |
| // previous CHR scope, which dominates this scope, it's safe to stop |
| // hoisting there. |
| return; |
| if (HoistedSet.count(I)) |
| // Already hoisted, return. |
| return; |
| assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); |
| assert(DT.getNode(I->getParent()) && "DT must contain I's block"); |
| assert(DT.getNode(HoistPoint->getParent()) && |
| "DT must contain HoistPoint block"); |
| if (DT.dominates(I, HoistPoint)) |
| // We are already above the hoist point. Stop here. This may be necessary |
| // when multiple scopes would independently hoist the same |
| // instruction. Since an outer (dominating) scope would hoist it to its |
| // entry before an inner (dominated) scope would to its entry, the inner |
| // scope may see the instruction already hoisted, in which case it |
| // potentially wrong for the inner scope to hoist it and could cause bad |
| // IR (non-dominating def), but safe to skip hoisting it instead because |
| // it's already in a block that dominates the inner scope. |
| return; |
| for (Value *Op : I->operands()) { |
| hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); |
| } |
| I->moveBefore(HoistPoint); |
| HoistedSet.insert(I); |
| CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); |
| } |
| } |
| |
| // Hoist the dependent condition values of the branches and the selects in the |
| // scope to the insert point. |
| static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, |
| DenseSet<PHINode *> &TrivialPHIs, |
| DominatorTree &DT) { |
| DenseSet<Instruction *> HoistedSet; |
| for (const RegInfo &RI : Scope->CHRRegions) { |
| Region *R = RI.R; |
| bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); |
| bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); |
| if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { |
| auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); |
| hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, |
| HoistedSet, TrivialPHIs, DT); |
| } |
| for (SelectInst *SI : RI.Selects) { |
| bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); |
| bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); |
| if (!(IsTrueBiased || IsFalseBiased)) |
| continue; |
| hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, |
| HoistedSet, TrivialPHIs, DT); |
| } |
| } |
| } |
| |
| // Negate the predicate if an ICmp if it's used only by branches or selects by |
| // swapping the operands of the branches or the selects. Returns true if success. |
| static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, |
| Instruction *ExcludedUser, |
| CHRScope *Scope) { |
| for (User *U : ICmp->users()) { |
| if (U == ExcludedUser) |
| continue; |
| if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) |
| continue; |
| if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) |
| continue; |
| return false; |
| } |
| for (User *U : ICmp->users()) { |
| if (U == ExcludedUser) |
| continue; |
| if (auto *BI = dyn_cast<BranchInst>(U)) { |
| assert(BI->isConditional() && "Must be conditional"); |
| BI->swapSuccessors(); |
| // Don't need to swap this in terms of |
| // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based |
| // mean whehter the branch is likely go into the if-then rather than |
| // successor0/successor1 and because we can tell which edge is the then or |
| // the else one by comparing the destination to the region exit block. |
| continue; |
| } |
| if (auto *SI = dyn_cast<SelectInst>(U)) { |
| // Swap operands |
| SI->swapValues(); |
| SI->swapProfMetadata(); |
| if (Scope->TrueBiasedSelects.count(SI)) { |
| assert(Scope->FalseBiasedSelects.count(SI) == 0 && |
| "Must not be already in"); |
| Scope->FalseBiasedSelects.insert(SI); |
| } else if (Scope->FalseBiasedSelects.count(SI)) { |
| assert(Scope->TrueBiasedSelects.count(SI) == 0 && |
| "Must not be already in"); |
| Scope->TrueBiasedSelects.insert(SI); |
| } |
| continue; |
| } |
| llvm_unreachable("Must be a branch or a select"); |
| } |
| ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); |
| return true; |
| } |
| |
| // A helper for transformScopes. Insert a trivial phi at the scope exit block |
| // for a value that's defined in the scope but used outside it (meaning it's |
| // alive at the exit block). |
| static void insertTrivialPHIs(CHRScope *Scope, |
| BasicBlock *EntryBlock, BasicBlock *ExitBlock, |
| DenseSet<PHINode *> &TrivialPHIs) { |
| DenseSet<BasicBlock *> BlocksInScopeSet; |
| SmallVector<BasicBlock *, 8> BlocksInScopeVec; |
| for (RegInfo &RI : Scope->RegInfos) { |
| for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the |
| // sub-Scopes. |
| BlocksInScopeSet.insert(BB); |
| BlocksInScopeVec.push_back(BB); |
| } |
| } |
| CHR_DEBUG( |
| dbgs() << "Inserting redudant phis\n"; |
| for (BasicBlock *BB : BlocksInScopeVec) { |
| dbgs() << "BlockInScope " << BB->getName() << "\n"; |
| }); |
| for (BasicBlock *BB : BlocksInScopeVec) { |
| for (Instruction &I : *BB) { |
| SmallVector<Instruction *, 8> Users; |
| for (User *U : I.users()) { |
| if (auto *UI = dyn_cast<Instruction>(U)) { |
| if (BlocksInScopeSet.count(UI->getParent()) == 0 && |
| // Unless there's already a phi for I at the exit block. |
| !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { |
| CHR_DEBUG(dbgs() << "V " << I << "\n"); |
| CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); |
| Users.push_back(UI); |
| } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { |
| // There's a loop backedge from a block that's dominated by this |
| // scope to the entry block. |
| CHR_DEBUG(dbgs() << "V " << I << "\n"); |
| CHR_DEBUG(dbgs() |
| << "Used at entry block (for a back edge) by a phi user " |
| << *UI << "\n"); |
| Users.push_back(UI); |
| } |
| } |
| } |
| if (Users.size() > 0) { |
| // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at |
| // ExitBlock. Replace I with the new phi in UI unless UI is another |
| // phi at ExitBlock. |
| unsigned PredCount = std::distance(pred_begin(ExitBlock), |
| pred_end(ExitBlock)); |
| PHINode *PN = PHINode::Create(I.getType(), PredCount, "", |
| &ExitBlock->front()); |
| for (BasicBlock *Pred : predecessors(ExitBlock)) { |
| PN->addIncoming(&I, Pred); |
| } |
| TrivialPHIs.insert(PN); |
| CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); |
| for (Instruction *UI : Users) { |
| for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { |
| if (UI->getOperand(J) == &I) { |
| UI->setOperand(J, PN); |
| } |
| } |
| CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); |
| } |
| } |
| } |
| } |
| } |
| |
| // Assert that all the CHR regions of the scope have a biased branch or select. |
| static void LLVM_ATTRIBUTE_UNUSED |
| assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { |
| #ifndef NDEBUG |
| auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { |
| if (Scope->TrueBiasedRegions.count(RI.R) || |
| Scope->FalseBiasedRegions.count(RI.R)) |
| return true; |
| for (SelectInst *SI : RI.Selects) |
| if (Scope->TrueBiasedSelects.count(SI) || |
| Scope->FalseBiasedSelects.count(SI)) |
| return true; |
| return false; |
| }; |
| for (RegInfo &RI : Scope->CHRRegions) { |
| assert(HasBiasedBranchOrSelect(RI, Scope) && |
| "Must have biased branch or select"); |
| } |
| #endif |
| } |
| |
| // Assert that all the condition values of the biased branches and selects have |
| // been hoisted to the pre-entry block or outside of the scope. |
| static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( |
| CHRScope *Scope, BasicBlock *PreEntryBlock) { |
| CHR_DEBUG(dbgs() << "Biased regions condition values \n"); |
| for (RegInfo &RI : Scope->CHRRegions) { |
| Region *R = RI.R; |
| bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); |
| bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); |
| if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { |
| auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); |
| Value *V = BI->getCondition(); |
| CHR_DEBUG(dbgs() << *V << "\n"); |
| if (auto *I = dyn_cast<Instruction>(V)) { |
| (void)(I); // Unused in release build. |
| assert((I->getParent() == PreEntryBlock || |
| !Scope->contains(I)) && |
| "Must have been hoisted to PreEntryBlock or outside the scope"); |
| } |
| } |
| for (SelectInst *SI : RI.Selects) { |
| bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); |
| bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); |
| if (!(IsTrueBiased || IsFalseBiased)) |
| continue; |
| Value *V = SI->getCondition(); |
| CHR_DEBUG(dbgs() << *V << "\n"); |
| if (auto *I = dyn_cast<Instruction>(V)) { |
| (void)(I); // Unused in release build. |
| assert((I->getParent() == PreEntryBlock || |
| !Scope->contains(I)) && |
| "Must have been hoisted to PreEntryBlock or outside the scope"); |
| } |
| } |
| } |
| } |
| |
| void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { |
| CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); |
| |
| assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); |
| Region *FirstRegion = Scope->RegInfos[0].R; |
| BasicBlock *EntryBlock = FirstRegion->getEntry(); |
| Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; |
| BasicBlock *ExitBlock = LastRegion->getExit(); |
| Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); |
| |
| if (ExitBlock) { |
| // Insert a trivial phi at the exit block (where the CHR hot path and the |
| // cold path merges) for a value that's defined in the scope but used |
| // outside it (meaning it's alive at the exit block). We will add the |
| // incoming values for the CHR cold paths to it below. Without this, we'd |
| // miss updating phi's for such values unless there happens to already be a |
| // phi for that value there. |
| insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); |
| } |
| |
| // Split the entry block of the first region. The new block becomes the new |
| // entry block of the first region. The old entry block becomes the block to |
| // insert the CHR branch into. Note DT gets updated. Since DT gets updated |
| // through the split, we update the entry of the first region after the split, |
| // and Region only points to the entry and the exit blocks, rather than |
| // keeping everything in a list or set, the blocks membership and the |
| // entry/exit blocks of the region are still valid after the split. |
| CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() |
| << " at " << *Scope->BranchInsertPoint << "\n"); |
| BasicBlock *NewEntryBlock = |
| SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); |
| assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && |
| "NewEntryBlock's only pred must be EntryBlock"); |
| FirstRegion->replaceEntryRecursive(NewEntryBlock); |
| BasicBlock *PreEntryBlock = EntryBlock; |
| |
| ValueToValueMapTy VMap; |
| // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a |
| // hot path (originals) and a cold path (clones) and update the PHIs at the |
| // exit block. |
| cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); |
| |
| // Replace the old (placeholder) branch with the new (merged) conditional |
| // branch. |
| BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, |
| NewEntryBlock, VMap); |
| |
| #ifndef NDEBUG |
| assertCHRRegionsHaveBiasedBranchOrSelect(Scope); |
| #endif |
| |
| // Hoist the conditional values of the branches/selects. |
| hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); |
| |
| #ifndef NDEBUG |
| assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); |
| #endif |
| |
| // Create the combined branch condition and constant-fold the branches/selects |
| // in the hot path. |
| fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, |
| ProfileCount ? ProfileCount.getValue() : 0); |
| } |
| |
| // A helper for transformScopes. Clone the blocks in the scope (excluding the |
| // PreEntryBlock) to split into a hot path and a cold path and update the PHIs |
| // at the exit block. |
| void CHR::cloneScopeBlocks(CHRScope *Scope, |
| BasicBlock *PreEntryBlock, |
| BasicBlock *ExitBlock, |
| Region *LastRegion, |
| ValueToValueMapTy &VMap) { |
| // Clone all the blocks. The original blocks will be the hot-path |
| // CHR-optimized code and the cloned blocks will be the original unoptimized |
| // code. This is so that the block pointers from the |
| // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code |
| // which CHR should apply to. |
| SmallVector<BasicBlock*, 8> NewBlocks; |
| for (RegInfo &RI : Scope->RegInfos) |
| for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the |
| // sub-Scopes. |
| assert(BB != PreEntryBlock && "Don't copy the preetntry block"); |
| BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); |
| NewBlocks.push_back(NewBB); |
| VMap[BB] = NewBB; |
| } |
| |
| // Place the cloned blocks right after the original blocks (right before the |
| // exit block of.) |
| if (ExitBlock) |
| F.getBasicBlockList().splice(ExitBlock->getIterator(), |
| F.getBasicBlockList(), |
| NewBlocks[0]->getIterator(), F.end()); |
| |
| // Update the cloned blocks/instructions to refer to themselves. |
| for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) |
| for (Instruction &I : *NewBlocks[i]) |
| RemapInstruction(&I, VMap, |
| RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); |
| |
| // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for |
| // the top-level region but we don't need to add PHIs. The trivial PHIs |
| // inserted above will be updated here. |
| if (ExitBlock) |
| for (PHINode &PN : ExitBlock->phis()) |
| for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; |
| ++I) { |
| BasicBlock *Pred = PN.getIncomingBlock(I); |
| if (LastRegion->contains(Pred)) { |
| Value *V = PN.getIncomingValue(I); |
| auto It = VMap.find(V); |
| if (It != VMap.end()) V = It->second; |
| assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); |
| PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); |
| } |
| } |
| } |
| |
| // A helper for transformScope. Replace the old (placeholder) branch with the |
| // new (merged) conditional branch. |
| BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, |
| BasicBlock *EntryBlock, |
| BasicBlock *NewEntryBlock, |
| ValueToValueMapTy &VMap) { |
| BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); |
| assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && |
| "SplitBlock did not work correctly!"); |
| assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && |
| "NewEntryBlock's only pred must be EntryBlock"); |
| assert(VMap.find(NewEntryBlock) != VMap.end() && |
| "NewEntryBlock must have been copied"); |
| OldBR->dropAllReferences(); |
| OldBR->eraseFromParent(); |
| // The true predicate is a placeholder. It will be replaced later in |
| // fixupBranchesAndSelects(). |
| BranchInst *NewBR = BranchInst::Create(NewEntryBlock, |
| cast<BasicBlock>(VMap[NewEntryBlock]), |
| ConstantInt::getTrue(F.getContext())); |
| PreEntryBlock->getInstList().push_back(NewBR); |
| assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && |
| "NewEntryBlock's only pred must be EntryBlock"); |
| return NewBR; |
| } |
| |
| // A helper for transformScopes. Create the combined branch condition and |
| // constant-fold the branches/selects in the hot path. |
| void CHR::fixupBranchesAndSelects(CHRScope *Scope, |
| BasicBlock *PreEntryBlock, |
| BranchInst *MergedBR, |
| uint64_t ProfileCount) { |
| Value *MergedCondition = ConstantInt::getTrue(F.getContext()); |
| BranchProbability CHRBranchBias(1, 1); |
| uint64_t NumCHRedBranches = 0; |
| IRBuilder<> IRB(PreEntryBlock->getTerminator()); |
| for (RegInfo &RI : Scope->CHRRegions) { |
| Region *R = RI.R; |
| if (RI.HasBranch) { |
| fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); |
| ++NumCHRedBranches; |
| } |
| for (SelectInst *SI : RI.Selects) { |
| fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); |
| ++NumCHRedBranches; |
| } |
| } |
| Stats.NumBranchesDelta += NumCHRedBranches - 1; |
| Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; |
| ORE.emit([&]() { |
| return OptimizationRemark(DEBUG_TYPE, |
| "CHR", |
| // Refer to the hot (original) path |
| MergedBR->getSuccessor(0)->getTerminator()) |
| << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) |
| << " branches or selects"; |
| }); |
| MergedBR->setCondition(MergedCondition); |
| SmallVector<uint32_t, 2> Weights; |
| Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); |
| Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); |
| MDBuilder MDB(F.getContext()); |
| MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); |
| CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] |
| << "\n"); |
| } |
| |
| // A helper for fixupBranchesAndSelects. Add to the combined branch condition |
| // and constant-fold a branch in the hot path. |
| void CHR::fixupBranch(Region *R, CHRScope *Scope, |
| IRBuilder<> &IRB, |
| Value *&MergedCondition, |
| BranchProbability &CHRBranchBias) { |
| bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); |
| assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && |
| "Must be truthy or falsy"); |
| auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); |
| assert(BranchBiasMap.find(R) != BranchBiasMap.end() && |
| "Must be in the bias map"); |
| BranchProbability Bias = BranchBiasMap[R]; |
| assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); |
| // Take the min. |
| if (CHRBranchBias > Bias) |
| CHRBranchBias = Bias; |
| BasicBlock *IfThen = BI->getSuccessor(1); |
| BasicBlock *IfElse = BI->getSuccessor(0); |
| BasicBlock *RegionExitBlock = R->getExit(); |
| assert(RegionExitBlock && "Null ExitBlock"); |
| assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && |
| IfThen != IfElse && "Invariant from findScopes"); |
| if (IfThen == RegionExitBlock) { |
| // Swap them so that IfThen means going into it and IfElse means skipping |
| // it. |
| std::swap(IfThen, IfElse); |
| } |
| CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() |
| << " IfElse " << IfElse->getName() << "\n"); |
| Value *Cond = BI->getCondition(); |
| BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; |
| bool ConditionTrue = HotTarget == BI->getSuccessor(0); |
| addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, |
| MergedCondition); |
| // Constant-fold the branch at ClonedEntryBlock. |
| assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && |
| "The successor shouldn't change"); |
| Value *NewCondition = ConditionTrue ? |
| ConstantInt::getTrue(F.getContext()) : |
| ConstantInt::getFalse(F.getContext()); |
| BI->setCondition(NewCondition); |
| } |
| |
| // A helper for fixupBranchesAndSelects. Add to the combined branch condition |
| // and constant-fold a select in the hot path. |
| void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, |
| IRBuilder<> &IRB, |
| Value *&MergedCondition, |
| BranchProbability &CHRBranchBias) { |
| bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); |
| assert((IsTrueBiased || |
| Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); |
| assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && |
| "Must be in the bias map"); |
| BranchProbability Bias = SelectBiasMap[SI]; |
| assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); |
| // Take the min. |
| if (CHRBranchBias > Bias) |
| CHRBranchBias = Bias; |
| Value *Cond = SI->getCondition(); |
| addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, |
| MergedCondition); |
| Value *NewCondition = IsTrueBiased ? |
| ConstantInt::getTrue(F.getContext()) : |
| ConstantInt::getFalse(F.getContext()); |
| SI->setCondition(NewCondition); |
| } |
| |
| // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged |
| // condition. |
| void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, |
| Instruction *BranchOrSelect, |
| CHRScope *Scope, |
| IRBuilder<> &IRB, |
| Value *&MergedCondition) { |
| if (IsTrueBiased) { |
| MergedCondition = IRB.CreateAnd(MergedCondition, Cond); |
| } else { |
| // If Cond is an icmp and all users of V except for BranchOrSelect is a |
| // branch, negate the icmp predicate and swap the branch targets and avoid |
| // inserting an Xor to negate Cond. |
| bool Done = false; |
| if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) |
| if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { |
| MergedCondition = IRB.CreateAnd(MergedCondition, Cond); |
| Done = true; |
| } |
| if (!Done) { |
| Value *Negate = IRB.CreateXor( |
| ConstantInt::getTrue(F.getContext()), Cond); |
| MergedCondition = IRB.CreateAnd(MergedCondition, Negate); |
| } |
| } |
| } |
| |
| void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { |
| unsigned I = 0; |
| DenseSet<PHINode *> TrivialPHIs; |
| for (CHRScope *Scope : CHRScopes) { |
| transformScopes(Scope, TrivialPHIs); |
| CHR_DEBUG( |
| std::ostringstream oss; |
| oss << " after transformScopes " << I++; |
| dumpIR(F, oss.str().c_str(), nullptr)); |
| (void)I; |
| } |
| } |
| |
| static void LLVM_ATTRIBUTE_UNUSED |
| dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { |
| dbgs() << Label << " " << Scopes.size() << "\n"; |
| for (CHRScope *Scope : Scopes) { |
| dbgs() << *Scope << "\n"; |
| } |
| } |
| |
| bool CHR::run() { |
| if (!shouldApply(F, PSI)) |
| return false; |
| |
| CHR_DEBUG(dumpIR(F, "before", nullptr)); |
| |
| bool Changed = false; |
| { |
| CHR_DEBUG( |
| dbgs() << "RegionInfo:\n"; |
| RI.print(dbgs())); |
| |
| // Recursively traverse the region tree and find regions that have biased |
| // branches and/or selects and create scopes. |
| SmallVector<CHRScope *, 8> AllScopes; |
| findScopes(AllScopes); |
| CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); |
| |
| // Split the scopes if 1) the conditiona values of the biased |
| // branches/selects of the inner/lower scope can't be hoisted up to the |
| // outermost/uppermost scope entry, or 2) the condition values of the biased |
| // branches/selects in a scope (including subscopes) don't share at least |
| // one common value. |
| SmallVector<CHRScope *, 8> SplitScopes; |
| splitScopes(AllScopes, SplitScopes); |
| CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); |
| |
| // After splitting, set the biased regions and selects of a scope (a tree |
| // root) that include those of the subscopes. |
| classifyBiasedScopes(SplitScopes); |
| CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); |
| |
| // Filter out the scopes that has only one biased region or select (CHR |
| // isn't useful in such a case). |
| SmallVector<CHRScope *, 8> FilteredScopes; |
| filterScopes(SplitScopes, FilteredScopes); |
| CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); |
| |
| // Set the regions to be CHR'ed and their hoist stops for each scope. |
| SmallVector<CHRScope *, 8> SetScopes; |
| setCHRRegions(FilteredScopes, SetScopes); |
| CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); |
| |
| // Sort CHRScopes by the depth so that outer CHRScopes comes before inner |
| // ones. We need to apply CHR from outer to inner so that we apply CHR only |
| // to the hot path, rather than both hot and cold paths. |
| SmallVector<CHRScope *, 8> SortedScopes; |
| sortScopes(SetScopes, SortedScopes); |
| CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); |
| |
| CHR_DEBUG( |
| dbgs() << "RegionInfo:\n"; |
| RI.print(dbgs())); |
| |
| // Apply the CHR transformation. |
| if (!SortedScopes.empty()) { |
| transformScopes(SortedScopes); |
| Changed = true; |
| } |
| } |
| |
| if (Changed) { |
| CHR_DEBUG(dumpIR(F, "after", &Stats)); |
| ORE.emit([&]() { |
| return OptimizationRemark(DEBUG_TYPE, "Stats", &F) |
| << ore::NV("Function", &F) << " " |
| << "Reduced the number of branches in hot paths by " |
| << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) |
| << " (static) and " |
| << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) |
| << " (weighted by PGO count)"; |
| }); |
| } |
| |
| return Changed; |
| } |
| |
| bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { |
| BlockFrequencyInfo &BFI = |
| getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); |
| DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
| ProfileSummaryInfo &PSI = |
| getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
| RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); |
| std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = |
| std::make_unique<OptimizationRemarkEmitter>(&F); |
| return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); |
| } |
| |
| namespace llvm { |
| |
| ControlHeightReductionPass::ControlHeightReductionPass() { |
| parseCHRFilterFiles(); |
| } |
| |
| PreservedAnalyses ControlHeightReductionPass::run( |
| Function &F, |
| FunctionAnalysisManager &FAM) { |
| auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); |
| auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); |
| auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); |
| auto &MAM = MAMProxy.getManager(); |
| auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); |
| auto &RI = FAM.getResult<RegionInfoAnalysis>(F); |
| auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); |
| bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); |
| if (!Changed) |
| return PreservedAnalyses::all(); |
| auto PA = PreservedAnalyses(); |
| PA.preserve<GlobalsAA>(); |
| return PA; |
| } |
| |
| } // namespace llvm |