| //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass removes the computation of provably redundant expressions that have |
| // been computed earlier in a previous iteration. It relies on the use of PHIs |
| // to identify loop carried dependences. This is scalar replacement for vector |
| // types. |
| // |
| //----------------------------------------------------------------------------- |
| // Motivation: Consider the case where we have the following loop structure. |
| // |
| // Loop: |
| // t0 = a[i]; |
| // t1 = f(t0); |
| // t2 = g(t1); |
| // ... |
| // t3 = a[i+1]; |
| // t4 = f(t3); |
| // t5 = g(t4); |
| // t6 = op(t2, t5) |
| // cond_branch <Loop> |
| // |
| // This can be converted to |
| // t00 = a[0]; |
| // t10 = f(t00); |
| // t20 = g(t10); |
| // Loop: |
| // t2 = t20; |
| // t3 = a[i+1]; |
| // t4 = f(t3); |
| // t5 = g(t4); |
| // t6 = op(t2, t5) |
| // t20 = t5 |
| // cond_branch <Loop> |
| // |
| // SROA does a good job of reusing a[i+1] as a[i] in the next iteration. |
| // Such a loop comes to this pass in the following form. |
| // |
| // LoopPreheader: |
| // X0 = a[0]; |
| // Loop: |
| // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> |
| // t1 = f(X2) <-- I1 |
| // t2 = g(t1) |
| // ... |
| // X1 = a[i+1] |
| // t4 = f(X1) <-- I2 |
| // t5 = g(t4) |
| // t6 = op(t2, t5) |
| // cond_branch <Loop> |
| // |
| // In this pass, we look for PHIs such as X2 whose incoming values come only |
| // from the Loop Preheader and over the backedge and additionaly, both these |
| // values are the results of the same operation in terms of opcode. We call such |
| // a PHI node a dependence chain or DepChain. In this case, the dependence of X2 |
| // over X1 is carried over only one iteration and so the DepChain is only one |
| // PHI node long. |
| // |
| // Then, we traverse the uses of the PHI (X2) and the uses of the value of the |
| // PHI coming over the backedge (X1). We stop at the first pair of such users |
| // I1 (of X2) and I2 (of X1) that meet the following conditions. |
| // 1. I1 and I2 are the same operation, but with different operands. |
| // 2. X2 and X1 are used at the same operand number in the two instructions. |
| // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a |
| // a DepChain from Op1 to Op2 of the same length as that between X2 and X1. |
| // |
| // We then make the following transformation |
| // LoopPreheader: |
| // X0 = a[0]; |
| // Y0 = f(X0); |
| // Loop: |
| // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> |
| // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)> |
| // t1 = f(X2) <-- Will be removed by DCE. |
| // t2 = g(Y2) |
| // ... |
| // X1 = a[i+1] |
| // t4 = f(X1) |
| // t5 = g(t4) |
| // t6 = op(t2, t5) |
| // cond_branch <Loop> |
| // |
| // We proceed until we cannot find any more such instructions I1 and I2. |
| // |
| // --- DepChains & Loop carried dependences --- |
| // Consider a single basic block loop such as |
| // |
| // LoopPreheader: |
| // X0 = ... |
| // Y0 = ... |
| // Loop: |
| // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> |
| // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)> |
| // ... |
| // X1 = ... |
| // ... |
| // cond_branch <Loop> |
| // |
| // Then there is a dependence between X2 and X1 that goes back one iteration, |
| // i.e. X1 is used as X2 in the very next iteration. We represent this as a |
| // DepChain from X2 to X1 (X2->X1). |
| // Similarly, there is a dependence between Y2 and X1 that goes back two |
| // iterations. X1 is used as Y2 two iterations after it is computed. This is |
| // represented by a DepChain as (Y2->X2->X1). |
| // |
| // A DepChain has the following properties. |
| // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of |
| // iterations of carried dependence + 1. |
| // 2. All instructions in the DepChain except the last are PHIs. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Analysis/LoopInfo.h" |
| #include "llvm/Analysis/LoopPass.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/IR/IntrinsicsHexagon.h" |
| #include "llvm/IR/Use.h" |
| #include "llvm/IR/User.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/InitializePasses.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Transforms/Scalar.h" |
| #include "llvm/Transforms/Utils.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <cstddef> |
| #include <map> |
| #include <memory> |
| #include <set> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "hexagon-vlcr" |
| |
| STATISTIC(HexagonNumVectorLoopCarriedReuse, |
| "Number of values that were reused from a previous iteration."); |
| |
| static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", |
| cl::Hidden, |
| cl::desc("Maximum distance of loop carried dependences that are handled"), |
| cl::init(2), cl::ZeroOrMore); |
| |
| namespace llvm { |
| |
| void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&); |
| Pass *createHexagonVectorLoopCarriedReusePass(); |
| |
| } // end namespace llvm |
| |
| namespace { |
| |
| // See info about DepChain in the comments at the top of this file. |
| using ChainOfDependences = SmallVector<Instruction *, 4>; |
| |
| class DepChain { |
| ChainOfDependences Chain; |
| |
| public: |
| bool isIdentical(DepChain &Other) const { |
| if (Other.size() != size()) |
| return false; |
| ChainOfDependences &OtherChain = Other.getChain(); |
| for (int i = 0; i < size(); ++i) { |
| if (Chain[i] != OtherChain[i]) |
| return false; |
| } |
| return true; |
| } |
| |
| ChainOfDependences &getChain() { |
| return Chain; |
| } |
| |
| int size() const { |
| return Chain.size(); |
| } |
| |
| void clear() { |
| Chain.clear(); |
| } |
| |
| void push_back(Instruction *I) { |
| Chain.push_back(I); |
| } |
| |
| int iterations() const { |
| return size() - 1; |
| } |
| |
| Instruction *front() const { |
| return Chain.front(); |
| } |
| |
| Instruction *back() const { |
| return Chain.back(); |
| } |
| |
| Instruction *&operator[](const int index) { |
| return Chain[index]; |
| } |
| |
| friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); |
| }; |
| |
| LLVM_ATTRIBUTE_UNUSED |
| raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { |
| const ChainOfDependences &CD = D.Chain; |
| int ChainSize = CD.size(); |
| OS << "**DepChain Start::**\n"; |
| for (int i = 0; i < ChainSize -1; ++i) { |
| OS << *(CD[i]) << " -->\n"; |
| } |
| OS << *CD[ChainSize-1] << "\n"; |
| return OS; |
| } |
| |
| struct ReuseValue { |
| Instruction *Inst2Replace = nullptr; |
| |
| // In the new PHI node that we'll construct this is the value that'll be |
| // used over the backedge. This is the value that gets reused from a |
| // previous iteration. |
| Instruction *BackedgeInst = nullptr; |
| std::map<Instruction *, DepChain *> DepChains; |
| int Iterations = -1; |
| |
| ReuseValue() = default; |
| |
| void reset() { |
| Inst2Replace = nullptr; |
| BackedgeInst = nullptr; |
| DepChains.clear(); |
| Iterations = -1; |
| } |
| bool isDefined() { return Inst2Replace != nullptr; } |
| }; |
| |
| LLVM_ATTRIBUTE_UNUSED |
| raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { |
| OS << "** ReuseValue ***\n"; |
| OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; |
| OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n"; |
| return OS; |
| } |
| |
| class HexagonVectorLoopCarriedReuse : public LoopPass { |
| public: |
| static char ID; |
| |
| explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) { |
| PassRegistry *PR = PassRegistry::getPassRegistry(); |
| initializeHexagonVectorLoopCarriedReusePass(*PR); |
| } |
| |
| StringRef getPassName() const override { |
| return "Hexagon-specific loop carried reuse for HVX vectors"; |
| } |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.addRequired<LoopInfoWrapperPass>(); |
| AU.addRequiredID(LoopSimplifyID); |
| AU.addRequiredID(LCSSAID); |
| AU.addPreservedID(LCSSAID); |
| AU.setPreservesCFG(); |
| } |
| |
| bool runOnLoop(Loop *L, LPPassManager &LPM) override; |
| |
| private: |
| SetVector<DepChain *> Dependences; |
| std::set<Instruction *> ReplacedInsts; |
| Loop *CurLoop; |
| ReuseValue ReuseCandidate; |
| |
| bool doVLCR(); |
| void findLoopCarriedDeps(); |
| void findValueToReuse(); |
| void findDepChainFromPHI(Instruction *I, DepChain &D); |
| void reuseValue(); |
| Value *findValueInBlock(Value *Op, BasicBlock *BB); |
| DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters); |
| bool isEquivalentOperation(Instruction *I1, Instruction *I2); |
| bool canReplace(Instruction *I); |
| bool isCallInstCommutative(CallInst *C); |
| }; |
| |
| } // end anonymous namespace |
| |
| char HexagonVectorLoopCarriedReuse::ID = 0; |
| |
| INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", |
| "Hexagon-specific predictive commoning for HVX vectors", false, false) |
| INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| INITIALIZE_PASS_DEPENDENCY(LoopSimplify) |
| INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) |
| INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", |
| "Hexagon-specific predictive commoning for HVX vectors", false, false) |
| |
| bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) { |
| if (skipLoop(L)) |
| return false; |
| |
| if (!L->getLoopPreheader()) |
| return false; |
| |
| // Work only on innermost loops. |
| if (!L->getSubLoops().empty()) |
| return false; |
| |
| // Work only on single basic blocks loops. |
| if (L->getNumBlocks() != 1) |
| return false; |
| |
| CurLoop = L; |
| |
| return doVLCR(); |
| } |
| |
| bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) { |
| switch (C->getCalledFunction()->getIntrinsicID()) { |
| case Intrinsic::hexagon_V6_vaddb: |
| case Intrinsic::hexagon_V6_vaddb_128B: |
| case Intrinsic::hexagon_V6_vaddh: |
| case Intrinsic::hexagon_V6_vaddh_128B: |
| case Intrinsic::hexagon_V6_vaddw: |
| case Intrinsic::hexagon_V6_vaddw_128B: |
| case Intrinsic::hexagon_V6_vaddubh: |
| case Intrinsic::hexagon_V6_vaddubh_128B: |
| case Intrinsic::hexagon_V6_vadduhw: |
| case Intrinsic::hexagon_V6_vadduhw_128B: |
| case Intrinsic::hexagon_V6_vaddhw: |
| case Intrinsic::hexagon_V6_vaddhw_128B: |
| case Intrinsic::hexagon_V6_vmaxb: |
| case Intrinsic::hexagon_V6_vmaxb_128B: |
| case Intrinsic::hexagon_V6_vmaxh: |
| case Intrinsic::hexagon_V6_vmaxh_128B: |
| case Intrinsic::hexagon_V6_vmaxw: |
| case Intrinsic::hexagon_V6_vmaxw_128B: |
| case Intrinsic::hexagon_V6_vmaxub: |
| case Intrinsic::hexagon_V6_vmaxub_128B: |
| case Intrinsic::hexagon_V6_vmaxuh: |
| case Intrinsic::hexagon_V6_vmaxuh_128B: |
| case Intrinsic::hexagon_V6_vminub: |
| case Intrinsic::hexagon_V6_vminub_128B: |
| case Intrinsic::hexagon_V6_vminuh: |
| case Intrinsic::hexagon_V6_vminuh_128B: |
| case Intrinsic::hexagon_V6_vminb: |
| case Intrinsic::hexagon_V6_vminb_128B: |
| case Intrinsic::hexagon_V6_vminh: |
| case Intrinsic::hexagon_V6_vminh_128B: |
| case Intrinsic::hexagon_V6_vminw: |
| case Intrinsic::hexagon_V6_vminw_128B: |
| case Intrinsic::hexagon_V6_vmpyub: |
| case Intrinsic::hexagon_V6_vmpyub_128B: |
| case Intrinsic::hexagon_V6_vmpyuh: |
| case Intrinsic::hexagon_V6_vmpyuh_128B: |
| case Intrinsic::hexagon_V6_vavgub: |
| case Intrinsic::hexagon_V6_vavgub_128B: |
| case Intrinsic::hexagon_V6_vavgh: |
| case Intrinsic::hexagon_V6_vavgh_128B: |
| case Intrinsic::hexagon_V6_vavguh: |
| case Intrinsic::hexagon_V6_vavguh_128B: |
| case Intrinsic::hexagon_V6_vavgw: |
| case Intrinsic::hexagon_V6_vavgw_128B: |
| case Intrinsic::hexagon_V6_vavgb: |
| case Intrinsic::hexagon_V6_vavgb_128B: |
| case Intrinsic::hexagon_V6_vavguw: |
| case Intrinsic::hexagon_V6_vavguw_128B: |
| case Intrinsic::hexagon_V6_vabsdiffh: |
| case Intrinsic::hexagon_V6_vabsdiffh_128B: |
| case Intrinsic::hexagon_V6_vabsdiffub: |
| case Intrinsic::hexagon_V6_vabsdiffub_128B: |
| case Intrinsic::hexagon_V6_vabsdiffuh: |
| case Intrinsic::hexagon_V6_vabsdiffuh_128B: |
| case Intrinsic::hexagon_V6_vabsdiffw: |
| case Intrinsic::hexagon_V6_vabsdiffw_128B: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1, |
| Instruction *I2) { |
| if (!I1->isSameOperationAs(I2)) |
| return false; |
| // This check is in place specifically for intrinsics. isSameOperationAs will |
| // return two for any two hexagon intrinsics because they are essentially the |
| // same instruciton (CallInst). We need to scratch the surface to see if they |
| // are calls to the same function. |
| if (CallInst *C1 = dyn_cast<CallInst>(I1)) { |
| if (CallInst *C2 = dyn_cast<CallInst>(I2)) { |
| if (C1->getCalledFunction() != C2->getCalledFunction()) |
| return false; |
| } |
| } |
| |
| // If both the Instructions are of Vector Type and any of the element |
| // is integer constant, check their values too for equivalence. |
| if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) { |
| unsigned NumOperands = I1->getNumOperands(); |
| for (unsigned i = 0; i < NumOperands; ++i) { |
| ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i)); |
| ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i)); |
| if(!C1) continue; |
| assert(C2); |
| if (C1->getSExtValue() != C2->getSExtValue()) |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) { |
| const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); |
| if (!II) |
| return true; |
| |
| switch (II->getIntrinsicID()) { |
| case Intrinsic::hexagon_V6_hi: |
| case Intrinsic::hexagon_V6_lo: |
| case Intrinsic::hexagon_V6_hi_128B: |
| case Intrinsic::hexagon_V6_lo_128B: |
| LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n"); |
| return false; |
| default: |
| return true; |
| } |
| } |
| void HexagonVectorLoopCarriedReuse::findValueToReuse() { |
| for (auto *D : Dependences) { |
| LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n"); |
| if (D->iterations() > HexagonVLCRIterationLim) { |
| LLVM_DEBUG( |
| dbgs() |
| << ".. Skipping because number of iterations > than the limit\n"); |
| continue; |
| } |
| |
| PHINode *PN = cast<PHINode>(D->front()); |
| Instruction *BEInst = D->back(); |
| int Iters = D->iterations(); |
| BasicBlock *BB = PN->getParent(); |
| LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN |
| << " can be reused\n"); |
| |
| SmallVector<Instruction *, 4> PNUsers; |
| for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { |
| Use &U = *UI; |
| Instruction *User = cast<Instruction>(U.getUser()); |
| |
| if (User->getParent() != BB) |
| continue; |
| if (ReplacedInsts.count(User)) { |
| LLVM_DEBUG(dbgs() << *User |
| << " has already been replaced. Skipping...\n"); |
| continue; |
| } |
| if (isa<PHINode>(User)) |
| continue; |
| if (User->mayHaveSideEffects()) |
| continue; |
| if (!canReplace(User)) |
| continue; |
| |
| PNUsers.push_back(User); |
| } |
| LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n"); |
| |
| // For each interesting use I of PN, find an Instruction BEUser that |
| // performs the same operation as I on BEInst and whose other operands, |
| // if any, can also be rematerialized in OtherBB. We stop when we find the |
| // first such Instruction BEUser. This is because once BEUser is |
| // rematerialized in OtherBB, we may find more such "fixup" opportunities |
| // in this block. So, we'll start over again. |
| for (Instruction *I : PNUsers) { |
| for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E; |
| ++UI) { |
| Use &U = *UI; |
| Instruction *BEUser = cast<Instruction>(U.getUser()); |
| |
| if (BEUser->getParent() != BB) |
| continue; |
| if (!isEquivalentOperation(I, BEUser)) |
| continue; |
| |
| int NumOperands = I->getNumOperands(); |
| |
| // Take operands of each PNUser one by one and try to find DepChain |
| // with every operand of the BEUser. If any of the operands of BEUser |
| // has DepChain with current operand of the PNUser, break the matcher |
| // loop. Keep doing this for Every PNUser operand. If PNUser operand |
| // does not have DepChain with any of the BEUser operand, break the |
| // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate. |
| // This ensures that DepChain exist for all the PNUser operand with |
| // BEUser operand. This also ensures that DepChains are independent of |
| // the positions in PNUser and BEUser. |
| std::map<Instruction *, DepChain *> DepChains; |
| CallInst *C1 = dyn_cast<CallInst>(I); |
| if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) { |
| bool Found = false; |
| for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { |
| Value *Op = I->getOperand(OpNo); |
| Instruction *OpInst = dyn_cast<Instruction>(Op); |
| Found = false; |
| for (int T = 0; T < NumOperands; ++T) { |
| Value *BEOp = BEUser->getOperand(T); |
| Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); |
| if (!OpInst && !BEOpInst) { |
| if (Op == BEOp) { |
| Found = true; |
| break; |
| } |
| } |
| |
| if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst)) |
| continue; |
| |
| DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters); |
| |
| if (D) { |
| Found = true; |
| DepChains[OpInst] = D; |
| break; |
| } |
| } |
| if (!Found) { |
| BEUser = nullptr; |
| break; |
| } |
| } |
| } else { |
| |
| for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { |
| Value *Op = I->getOperand(OpNo); |
| Value *BEOp = BEUser->getOperand(OpNo); |
| |
| Instruction *OpInst = dyn_cast<Instruction>(Op); |
| if (!OpInst) { |
| if (Op == BEOp) |
| continue; |
| // Do not allow reuse to occur when the operands may be different |
| // values. |
| BEUser = nullptr; |
| break; |
| } |
| |
| Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); |
| DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters); |
| |
| if (D) { |
| DepChains[OpInst] = D; |
| } else { |
| BEUser = nullptr; |
| break; |
| } |
| } |
| } |
| if (BEUser) { |
| LLVM_DEBUG(dbgs() << "Found Value for reuse.\n"); |
| ReuseCandidate.Inst2Replace = I; |
| ReuseCandidate.BackedgeInst = BEUser; |
| ReuseCandidate.DepChains = DepChains; |
| ReuseCandidate.Iterations = Iters; |
| return; |
| } |
| ReuseCandidate.reset(); |
| } |
| } |
| } |
| ReuseCandidate.reset(); |
| } |
| |
| Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, |
| BasicBlock *BB) { |
| PHINode *PN = dyn_cast<PHINode>(Op); |
| assert(PN); |
| Value *ValueInBlock = PN->getIncomingValueForBlock(BB); |
| return ValueInBlock; |
| } |
| |
| void HexagonVectorLoopCarriedReuse::reuseValue() { |
| LLVM_DEBUG(dbgs() << ReuseCandidate); |
| Instruction *Inst2Replace = ReuseCandidate.Inst2Replace; |
| Instruction *BEInst = ReuseCandidate.BackedgeInst; |
| int NumOperands = Inst2Replace->getNumOperands(); |
| std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains; |
| int Iterations = ReuseCandidate.Iterations; |
| BasicBlock *LoopPH = CurLoop->getLoopPreheader(); |
| assert(!DepChains.empty() && "No DepChains"); |
| LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n"); |
| |
| SmallVector<Instruction *, 4> InstsInPreheader; |
| for (int i = 0; i < Iterations; ++i) { |
| Instruction *InstInPreheader = Inst2Replace->clone(); |
| SmallVector<Value *, 4> Ops; |
| for (int j = 0; j < NumOperands; ++j) { |
| Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j)); |
| if (!I) |
| continue; |
| // Get the DepChain corresponding to this operand. |
| DepChain &D = *DepChains[I]; |
| // Get the PHI for the iteration number and find |
| // the incoming value from the Loop Preheader for |
| // that PHI. |
| Value *ValInPreheader = findValueInBlock(D[i], LoopPH); |
| InstInPreheader->setOperand(j, ValInPreheader); |
| } |
| InstsInPreheader.push_back(InstInPreheader); |
| InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr"); |
| InstInPreheader->insertBefore(LoopPH->getTerminator()); |
| LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to " |
| << LoopPH->getName() << "\n"); |
| } |
| BasicBlock *BB = BEInst->getParent(); |
| IRBuilder<> IRB(BB); |
| IRB.SetInsertPoint(BB->getFirstNonPHI()); |
| Value *BEVal = BEInst; |
| PHINode *NewPhi; |
| for (int i = Iterations-1; i >=0 ; --i) { |
| Instruction *InstInPreheader = InstsInPreheader[i]; |
| NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2); |
| NewPhi->addIncoming(InstInPreheader, LoopPH); |
| NewPhi->addIncoming(BEVal, BB); |
| LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() |
| << "\n"); |
| BEVal = NewPhi; |
| } |
| // We are in LCSSA form. So, a value defined inside the Loop is used only |
| // inside the loop. So, the following is safe. |
| Inst2Replace->replaceAllUsesWith(NewPhi); |
| ReplacedInsts.insert(Inst2Replace); |
| ++HexagonNumVectorLoopCarriedReuse; |
| } |
| |
| bool HexagonVectorLoopCarriedReuse::doVLCR() { |
| assert(CurLoop->getSubLoops().empty() && |
| "Can do VLCR on the innermost loop only"); |
| assert((CurLoop->getNumBlocks() == 1) && |
| "Can do VLCR only on single block loops"); |
| |
| bool Changed = false; |
| bool Continue; |
| |
| LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"); |
| do { |
| // Reset datastructures. |
| Dependences.clear(); |
| Continue = false; |
| |
| findLoopCarriedDeps(); |
| findValueToReuse(); |
| if (ReuseCandidate.isDefined()) { |
| reuseValue(); |
| Changed = true; |
| Continue = true; |
| } |
| llvm::for_each(Dependences, std::default_delete<DepChain>()); |
| } while (Continue); |
| return Changed; |
| } |
| |
| void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, |
| DepChain &D) { |
| PHINode *PN = dyn_cast<PHINode>(I); |
| if (!PN) { |
| D.push_back(I); |
| return; |
| } else { |
| auto NumIncomingValues = PN->getNumIncomingValues(); |
| if (NumIncomingValues != 2) { |
| D.clear(); |
| return; |
| } |
| |
| BasicBlock *BB = PN->getParent(); |
| if (BB != CurLoop->getHeader()) { |
| D.clear(); |
| return; |
| } |
| |
| Value *BEVal = PN->getIncomingValueForBlock(BB); |
| Instruction *BEInst = dyn_cast<Instruction>(BEVal); |
| // This is a single block loop with a preheader, so at least |
| // one value should come over the backedge. |
| assert(BEInst && "There should be a value over the backedge"); |
| |
| Value *PreHdrVal = |
| PN->getIncomingValueForBlock(CurLoop->getLoopPreheader()); |
| if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) { |
| D.clear(); |
| return; |
| } |
| D.push_back(PN); |
| findDepChainFromPHI(BEInst, D); |
| } |
| } |
| |
| DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1, |
| Instruction *I2, |
| int Iters) { |
| for (auto *D : Dependences) { |
| if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters) |
| return D; |
| } |
| return nullptr; |
| } |
| |
| void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { |
| BasicBlock *BB = CurLoop->getHeader(); |
| for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { |
| auto *PN = cast<PHINode>(I); |
| if (!isa<VectorType>(PN->getType())) |
| continue; |
| |
| DepChain *D = new DepChain(); |
| findDepChainFromPHI(PN, *D); |
| if (D->size() != 0) |
| Dependences.insert(D); |
| else |
| delete D; |
| } |
| LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n"); |
| LLVM_DEBUG(for (size_t i = 0; i < Dependences.size(); |
| ++i) { dbgs() << *Dependences[i] << "\n"; }); |
| } |
| |
| Pass *llvm::createHexagonVectorLoopCarriedReusePass() { |
| return new HexagonVectorLoopCarriedReuse(); |
| } |