| //===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a shrink wrapping variant of prolog/epilog insertion: |
| // - Spills and restores of callee-saved registers (CSRs) are placed in the |
| // machine CFG to tightly surround their uses so that execution paths that |
| // do not use CSRs do not pay the spill/restore penalty. |
| // |
| // - Avoiding placment of spills/restores in loops: if a CSR is used inside a |
| // loop the spills are placed in the loop preheader, and restores are |
| // placed in the loop exit nodes (the successors of loop _exiting_ nodes). |
| // |
| // - Covering paths without CSR uses: |
| // If a region in a CFG uses CSRs and has multiple entry and/or exit points, |
| // the use info for the CSRs inside the region is propagated outward in the |
| // CFG to ensure validity of the spill/restore placements. This decreases |
| // the effectiveness of shrink wrapping but does not require edge splitting |
| // in the machine CFG. |
| // |
| // This shrink wrapping implementation uses an iterative analysis to determine |
| // which basic blocks require spills and restores for CSRs. |
| // |
| // This pass uses MachineDominators and MachineLoopInfo. Loop information |
| // is used to prevent placement of callee-saved register spills/restores |
| // in the bodies of loops. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #define DEBUG_TYPE "shrink-wrap" |
| |
| #include "PrologEpilogInserter.h" |
| #include "llvm/CodeGen/MachineDominators.h" |
| #include "llvm/CodeGen/MachineLoopInfo.h" |
| #include "llvm/CodeGen/MachineInstr.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Target/TargetRegisterInfo.h" |
| #include "llvm/ADT/SparseBitVector.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/PostOrderIterator.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/Statistic.h" |
| #include <sstream> |
| |
| using namespace llvm; |
| |
| STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); |
| |
| // Shrink Wrapping: |
| static cl::opt<bool> |
| ShrinkWrapping("shrink-wrap", |
| cl::desc("Shrink wrap callee-saved register spills/restores")); |
| |
| // Shrink wrap only the specified function, a debugging aid. |
| static cl::opt<std::string> |
| ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, |
| cl::desc("Shrink wrap the specified function"), |
| cl::value_desc("funcname"), |
| cl::init("")); |
| |
| // Debugging level for shrink wrapping. |
| enum ShrinkWrapDebugLevel { |
| None, BasicInfo, Iterations, Details |
| }; |
| |
| static cl::opt<enum ShrinkWrapDebugLevel> |
| ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, |
| cl::desc("Print shrink wrapping debugging information"), |
| cl::values( |
| clEnumVal(None , "disable debug output"), |
| clEnumVal(BasicInfo , "print basic DF sets"), |
| clEnumVal(Iterations, "print SR sets for each iteration"), |
| clEnumVal(Details , "print all DF sets"), |
| clEnumValEnd)); |
| |
| |
| void PEI::getAnalysisUsage(AnalysisUsage &AU) const { |
| AU.setPreservesCFG(); |
| if (ShrinkWrapping || ShrinkWrapFunc != "") { |
| AU.addRequired<MachineLoopInfo>(); |
| AU.addRequired<MachineDominatorTree>(); |
| } |
| AU.addPreserved<MachineLoopInfo>(); |
| AU.addPreserved<MachineDominatorTree>(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // ShrinkWrapping implementation |
| //===----------------------------------------------------------------------===// |
| |
| // Convienences for dealing with machine loops. |
| MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { |
| assert(LP && "Machine loop is NULL."); |
| MachineBasicBlock* PHDR = LP->getLoopPreheader(); |
| MachineLoop* PLP = LP->getParentLoop(); |
| while (PLP) { |
| PHDR = PLP->getLoopPreheader(); |
| PLP = PLP->getParentLoop(); |
| } |
| return PHDR; |
| } |
| |
| MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { |
| if (LP == 0) |
| return 0; |
| MachineLoop* PLP = LP->getParentLoop(); |
| while (PLP) { |
| LP = PLP; |
| PLP = PLP->getParentLoop(); |
| } |
| return LP; |
| } |
| |
| bool PEI::isReturnBlock(MachineBasicBlock* MBB) { |
| return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); |
| } |
| |
| // Initialize shrink wrapping DFA sets, called before iterations. |
| void PEI::clearAnticAvailSets() { |
| AnticIn.clear(); |
| AnticOut.clear(); |
| AvailIn.clear(); |
| AvailOut.clear(); |
| } |
| |
| // Clear all sets constructed by shrink wrapping. |
| void PEI::clearAllSets() { |
| ReturnBlocks.clear(); |
| clearAnticAvailSets(); |
| UsedCSRegs.clear(); |
| CSRUsed.clear(); |
| TLLoops.clear(); |
| CSRSave.clear(); |
| CSRRestore.clear(); |
| } |
| |
| // Initialize all shrink wrapping data. |
| void PEI::initShrinkWrappingInfo() { |
| clearAllSets(); |
| EntryBlock = 0; |
| #ifndef NDEBUG |
| HasFastExitPath = false; |
| #endif |
| ShrinkWrapThisFunction = ShrinkWrapping; |
| // DEBUG: enable or disable shrink wrapping for the current function |
| // via --shrink-wrap-func=<funcname>. |
| #ifndef NDEBUG |
| if (ShrinkWrapFunc != "") { |
| std::string MFName = MF->getFunction()->getNameStr(); |
| ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); |
| } |
| #endif |
| } |
| |
| |
| /// placeCSRSpillsAndRestores - determine which MBBs of the function |
| /// need save, restore code for callee-saved registers by doing a DF analysis |
| /// similar to the one used in code motion (GVNPRE). This produces maps of MBBs |
| /// to sets of registers (CSRs) for saves and restores. MachineLoopInfo |
| /// is used to ensure that CSR save/restore code is not placed inside loops. |
| /// This function computes the maps of MBBs -> CSRs to spill and restore |
| /// in CSRSave, CSRRestore. |
| /// |
| /// If shrink wrapping is not being performed, place all spills in |
| /// the entry block, all restores in return blocks. In this case, |
| /// CSRSave has a single mapping, CSRRestore has mappings for each |
| /// return block. |
| /// |
| void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { |
| |
| DEBUG(MF = &Fn); |
| |
| initShrinkWrappingInfo(); |
| |
| DEBUG(if (ShrinkWrapThisFunction) { |
| dbgs() << "Place CSR spills/restores for " |
| << MF->getFunction()->getName() << "\n"; |
| }); |
| |
| if (calculateSets(Fn)) |
| placeSpillsAndRestores(Fn); |
| } |
| |
| /// calcAnticInOut - calculate the anticipated in/out reg sets |
| /// for the given MBB by looking forward in the MCFG at MBB's |
| /// successors. |
| /// |
| bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { |
| bool changed = false; |
| |
| // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) |
| SmallVector<MachineBasicBlock*, 4> successors; |
| for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), |
| SE = MBB->succ_end(); SI != SE; ++SI) { |
| MachineBasicBlock* SUCC = *SI; |
| if (SUCC != MBB) |
| successors.push_back(SUCC); |
| } |
| |
| unsigned i = 0, e = successors.size(); |
| if (i != e) { |
| CSRegSet prevAnticOut = AnticOut[MBB]; |
| MachineBasicBlock* SUCC = successors[i]; |
| |
| AnticOut[MBB] = AnticIn[SUCC]; |
| for (++i; i != e; ++i) { |
| SUCC = successors[i]; |
| AnticOut[MBB] &= AnticIn[SUCC]; |
| } |
| if (prevAnticOut != AnticOut[MBB]) |
| changed = true; |
| } |
| |
| // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); |
| CSRegSet prevAnticIn = AnticIn[MBB]; |
| AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; |
| if (prevAnticIn != AnticIn[MBB]) |
| changed = true; |
| return changed; |
| } |
| |
| /// calcAvailInOut - calculate the available in/out reg sets |
| /// for the given MBB by looking backward in the MCFG at MBB's |
| /// predecessors. |
| /// |
| bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { |
| bool changed = false; |
| |
| // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) |
| SmallVector<MachineBasicBlock*, 4> predecessors; |
| for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), |
| PE = MBB->pred_end(); PI != PE; ++PI) { |
| MachineBasicBlock* PRED = *PI; |
| if (PRED != MBB) |
| predecessors.push_back(PRED); |
| } |
| |
| unsigned i = 0, e = predecessors.size(); |
| if (i != e) { |
| CSRegSet prevAvailIn = AvailIn[MBB]; |
| MachineBasicBlock* PRED = predecessors[i]; |
| |
| AvailIn[MBB] = AvailOut[PRED]; |
| for (++i; i != e; ++i) { |
| PRED = predecessors[i]; |
| AvailIn[MBB] &= AvailOut[PRED]; |
| } |
| if (prevAvailIn != AvailIn[MBB]) |
| changed = true; |
| } |
| |
| // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); |
| CSRegSet prevAvailOut = AvailOut[MBB]; |
| AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; |
| if (prevAvailOut != AvailOut[MBB]) |
| changed = true; |
| return changed; |
| } |
| |
| /// calculateAnticAvail - build the sets anticipated and available |
| /// registers in the MCFG of the current function iteratively, |
| /// doing a combined forward and backward analysis. |
| /// |
| void PEI::calculateAnticAvail(MachineFunction &Fn) { |
| // Initialize data flow sets. |
| clearAnticAvailSets(); |
| |
| // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. |
| bool changed = true; |
| unsigned iterations = 0; |
| while (changed) { |
| changed = false; |
| ++iterations; |
| for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); |
| MBBI != MBBE; ++MBBI) { |
| MachineBasicBlock* MBB = MBBI; |
| |
| // Calculate anticipated in, out regs at MBB from |
| // anticipated at successors of MBB. |
| changed |= calcAnticInOut(MBB); |
| |
| // Calculate available in, out regs at MBB from |
| // available at predecessors of MBB. |
| changed |= calcAvailInOut(MBB); |
| } |
| } |
| |
| DEBUG({ |
| if (ShrinkWrapDebugging >= Details) { |
| dbgs() |
| << "-----------------------------------------------------------\n" |
| << " Antic/Avail Sets:\n" |
| << "-----------------------------------------------------------\n" |
| << "iterations = " << iterations << "\n" |
| << "-----------------------------------------------------------\n" |
| << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" |
| << "-----------------------------------------------------------\n"; |
| |
| for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); |
| MBBI != MBBE; ++MBBI) { |
| MachineBasicBlock* MBB = MBBI; |
| dumpSets(MBB); |
| } |
| |
| dbgs() |
| << "-----------------------------------------------------------\n"; |
| } |
| }); |
| } |
| |
| /// propagateUsesAroundLoop - copy used register info from MBB to all blocks |
| /// of the loop given by LP and its parent loops. This prevents spills/restores |
| /// from being placed in the bodies of loops. |
| /// |
| void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { |
| if (! MBB || !LP) |
| return; |
| |
| std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); |
| for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { |
| MachineBasicBlock* LBB = loopBlocks[i]; |
| if (LBB == MBB) |
| continue; |
| if (CSRUsed[LBB].contains(CSRUsed[MBB])) |
| continue; |
| CSRUsed[LBB] |= CSRUsed[MBB]; |
| } |
| } |
| |
| /// calculateSets - collect the CSRs used in this function, compute |
| /// the DF sets that describe the initial minimal regions in the |
| /// Machine CFG around which CSR spills and restores must be placed. |
| /// |
| /// Additionally, this function decides if shrink wrapping should |
| /// be disabled for the current function, checking the following: |
| /// 1. the current function has more than 500 MBBs: heuristic limit |
| /// on function size to reduce compile time impact of the current |
| /// iterative algorithm. |
| /// 2. all CSRs are used in the entry block. |
| /// 3. all CSRs are used in all immediate successors of the entry block. |
| /// 4. all CSRs are used in a subset of blocks, each of which dominates |
| /// all return blocks. These blocks, taken as a subgraph of the MCFG, |
| /// are equivalent to the entry block since all execution paths pass |
| /// through them. |
| /// |
| bool PEI::calculateSets(MachineFunction &Fn) { |
| // Sets used to compute spill, restore placement sets. |
| const std::vector<CalleeSavedInfo> CSI = |
| Fn.getFrameInfo()->getCalleeSavedInfo(); |
| |
| // If no CSRs used, we are done. |
| if (CSI.empty()) { |
| DEBUG(if (ShrinkWrapThisFunction) |
| dbgs() << "DISABLED: " << Fn.getFunction()->getName() |
| << ": uses no callee-saved registers\n"); |
| return false; |
| } |
| |
| // Save refs to entry and return blocks. |
| EntryBlock = Fn.begin(); |
| for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); |
| MBB != E; ++MBB) |
| if (isReturnBlock(MBB)) |
| ReturnBlocks.push_back(MBB); |
| |
| // Determine if this function has fast exit paths. |
| DEBUG(if (ShrinkWrapThisFunction) |
| findFastExitPath()); |
| |
| // Limit shrink wrapping via the current iterative bit vector |
| // implementation to functions with <= 500 MBBs. |
| if (Fn.size() > 500) { |
| DEBUG(if (ShrinkWrapThisFunction) |
| dbgs() << "DISABLED: " << Fn.getFunction()->getName() |
| << ": too large (" << Fn.size() << " MBBs)\n"); |
| ShrinkWrapThisFunction = false; |
| } |
| |
| // Return now if not shrink wrapping. |
| if (! ShrinkWrapThisFunction) |
| return false; |
| |
| // Collect set of used CSRs. |
| for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { |
| UsedCSRegs.set(inx); |
| } |
| |
| // Walk instructions in all MBBs, create CSRUsed[] sets, choose |
| // whether or not to shrink wrap this function. |
| MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); |
| MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); |
| const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); |
| |
| bool allCSRUsesInEntryBlock = true; |
| for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); |
| MBBI != MBBE; ++MBBI) { |
| MachineBasicBlock* MBB = MBBI; |
| for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { |
| for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { |
| unsigned Reg = CSI[inx].getReg(); |
| // If instruction I reads or modifies Reg, add it to UsedCSRegs, |
| // CSRUsed map for the current block. |
| for (unsigned opInx = 0, opEnd = I->getNumOperands(); |
| opInx != opEnd; ++opInx) { |
| const MachineOperand &MO = I->getOperand(opInx); |
| if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) |
| continue; |
| unsigned MOReg = MO.getReg(); |
| if (!MOReg) |
| continue; |
| if (MOReg == Reg || |
| (TargetRegisterInfo::isPhysicalRegister(MOReg) && |
| TargetRegisterInfo::isPhysicalRegister(Reg) && |
| TRI->isSubRegister(Reg, MOReg))) { |
| // CSR Reg is defined/used in block MBB. |
| CSRUsed[MBB].set(inx); |
| // Check for uses in EntryBlock. |
| if (MBB != EntryBlock) |
| allCSRUsesInEntryBlock = false; |
| } |
| } |
| } |
| } |
| |
| if (CSRUsed[MBB].empty()) |
| continue; |
| |
| // Propagate CSRUsed[MBB] in loops |
| if (MachineLoop* LP = LI.getLoopFor(MBB)) { |
| // Add top level loop to work list. |
| MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); |
| MachineLoop* PLP = getTopLevelLoopParent(LP); |
| |
| if (! HDR) { |
| HDR = PLP->getHeader(); |
| assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); |
| MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); |
| HDR = *PI; |
| } |
| TLLoops[HDR] = PLP; |
| |
| // Push uses from inside loop to its parent loops, |
| // or to all other MBBs in its loop. |
| if (LP->getLoopDepth() > 1) { |
| for (MachineLoop* PLP = LP->getParentLoop(); PLP; |
| PLP = PLP->getParentLoop()) { |
| propagateUsesAroundLoop(MBB, PLP); |
| } |
| } else { |
| propagateUsesAroundLoop(MBB, LP); |
| } |
| } |
| } |
| |
| if (allCSRUsesInEntryBlock) { |
| DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() |
| << ": all CSRs used in EntryBlock\n"); |
| ShrinkWrapThisFunction = false; |
| } else { |
| bool allCSRsUsedInEntryFanout = true; |
| for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), |
| SE = EntryBlock->succ_end(); SI != SE; ++SI) { |
| MachineBasicBlock* SUCC = *SI; |
| if (CSRUsed[SUCC] != UsedCSRegs) |
| allCSRsUsedInEntryFanout = false; |
| } |
| if (allCSRsUsedInEntryFanout) { |
| DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() |
| << ": all CSRs used in imm successors of EntryBlock\n"); |
| ShrinkWrapThisFunction = false; |
| } |
| } |
| |
| if (ShrinkWrapThisFunction) { |
| // Check if MBB uses CSRs and dominates all exit nodes. |
| // Such nodes are equiv. to the entry node w.r.t. |
| // CSR uses: every path through the function must |
| // pass through this node. If each CSR is used at least |
| // once by these nodes, shrink wrapping is disabled. |
| CSRegSet CSRUsedInChokePoints; |
| for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); |
| MBBI != MBBE; ++MBBI) { |
| MachineBasicBlock* MBB = MBBI; |
| if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) |
| continue; |
| bool dominatesExitNodes = true; |
| for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) |
| if (! DT.dominates(MBB, ReturnBlocks[ri])) { |
| dominatesExitNodes = false; |
| break; |
| } |
| if (dominatesExitNodes) { |
| CSRUsedInChokePoints |= CSRUsed[MBB]; |
| if (CSRUsedInChokePoints == UsedCSRegs) { |
| DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() |
| << ": all CSRs used in choke point(s) at " |
| << getBasicBlockName(MBB) << "\n"); |
| ShrinkWrapThisFunction = false; |
| break; |
| } |
| } |
| } |
| } |
| |
| // Return now if we have decided not to apply shrink wrapping |
| // to the current function. |
| if (! ShrinkWrapThisFunction) |
| return false; |
| |
| DEBUG({ |
| dbgs() << "ENABLED: " << Fn.getFunction()->getName(); |
| if (HasFastExitPath) |
| dbgs() << " (fast exit path)"; |
| dbgs() << "\n"; |
| if (ShrinkWrapDebugging >= BasicInfo) { |
| dbgs() << "------------------------------" |
| << "-----------------------------\n"; |
| dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; |
| if (ShrinkWrapDebugging >= Details) { |
| dbgs() << "------------------------------" |
| << "-----------------------------\n"; |
| dumpAllUsed(); |
| } |
| } |
| }); |
| |
| // Build initial DF sets to determine minimal regions in the |
| // Machine CFG around which CSRs must be spilled and restored. |
| calculateAnticAvail(Fn); |
| |
| return true; |
| } |
| |
| /// addUsesForMEMERegion - add uses of CSRs spilled or restored in |
| /// multi-entry, multi-exit (MEME) regions so spill and restore |
| /// placement will not break code that enters or leaves a |
| /// shrink-wrapped region by inducing spills with no matching |
| /// restores or restores with no matching spills. A MEME region |
| /// is a subgraph of the MCFG with multiple entry edges, multiple |
| /// exit edges, or both. This code propagates use information |
| /// through the MCFG until all paths requiring spills and restores |
| /// _outside_ the computed minimal placement regions have been covered. |
| /// |
| bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, |
| SmallVector<MachineBasicBlock*, 4>& blks) { |
| if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { |
| bool processThisBlock = false; |
| for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), |
| SE = MBB->succ_end(); SI != SE; ++SI) { |
| MachineBasicBlock* SUCC = *SI; |
| if (SUCC->pred_size() > 1) { |
| processThisBlock = true; |
| break; |
| } |
| } |
| if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { |
| for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), |
| PE = MBB->pred_end(); PI != PE; ++PI) { |
| MachineBasicBlock* PRED = *PI; |
| if (PRED->succ_size() > 1) { |
| processThisBlock = true; |
| break; |
| } |
| } |
| } |
| if (! processThisBlock) |
| return false; |
| } |
| |
| CSRegSet prop; |
| if (!CSRSave[MBB].empty()) |
| prop = CSRSave[MBB]; |
| else if (!CSRRestore[MBB].empty()) |
| prop = CSRRestore[MBB]; |
| else |
| prop = CSRUsed[MBB]; |
| if (prop.empty()) |
| return false; |
| |
| // Propagate selected bits to successors, predecessors of MBB. |
| bool addedUses = false; |
| for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), |
| SE = MBB->succ_end(); SI != SE; ++SI) { |
| MachineBasicBlock* SUCC = *SI; |
| // Self-loop |
| if (SUCC == MBB) |
| continue; |
| if (! CSRUsed[SUCC].contains(prop)) { |
| CSRUsed[SUCC] |= prop; |
| addedUses = true; |
| blks.push_back(SUCC); |
| DEBUG(if (ShrinkWrapDebugging >= Iterations) |
| dbgs() << getBasicBlockName(MBB) |
| << "(" << stringifyCSRegSet(prop) << ")->" |
| << "successor " << getBasicBlockName(SUCC) << "\n"); |
| } |
| } |
| for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), |
| PE = MBB->pred_end(); PI != PE; ++PI) { |
| MachineBasicBlock* PRED = *PI; |
| // Self-loop |
| if (PRED == MBB) |
| continue; |
| if (! CSRUsed[PRED].contains(prop)) { |
| CSRUsed[PRED] |= prop; |
| addedUses = true; |
| blks.push_back(PRED); |
| DEBUG(if (ShrinkWrapDebugging >= Iterations) |
| dbgs() << getBasicBlockName(MBB) |
| << "(" << stringifyCSRegSet(prop) << ")->" |
| << "predecessor " << getBasicBlockName(PRED) << "\n"); |
| } |
| } |
| return addedUses; |
| } |
| |
| /// addUsesForTopLevelLoops - add uses for CSRs used inside top |
| /// level loops to the exit blocks of those loops. |
| /// |
| bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { |
| bool addedUses = false; |
| |
| // Place restores for top level loops where needed. |
| for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator |
| I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { |
| MachineBasicBlock* MBB = I->first; |
| MachineLoop* LP = I->second; |
| MachineBasicBlock* HDR = LP->getHeader(); |
| SmallVector<MachineBasicBlock*, 4> exitBlocks; |
| CSRegSet loopSpills; |
| |
| loopSpills = CSRSave[MBB]; |
| if (CSRSave[MBB].empty()) { |
| loopSpills = CSRUsed[HDR]; |
| assert(!loopSpills.empty() && "No CSRs used in loop?"); |
| } else if (CSRRestore[MBB].contains(CSRSave[MBB])) |
| continue; |
| |
| LP->getExitBlocks(exitBlocks); |
| assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); |
| for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { |
| MachineBasicBlock* EXB = exitBlocks[i]; |
| if (! CSRUsed[EXB].contains(loopSpills)) { |
| CSRUsed[EXB] |= loopSpills; |
| addedUses = true; |
| DEBUG(if (ShrinkWrapDebugging >= Iterations) |
| dbgs() << "LOOP " << getBasicBlockName(MBB) |
| << "(" << stringifyCSRegSet(loopSpills) << ")->" |
| << getBasicBlockName(EXB) << "\n"); |
| if (EXB->succ_size() > 1 || EXB->pred_size() > 1) |
| blks.push_back(EXB); |
| } |
| } |
| } |
| return addedUses; |
| } |
| |
| /// calcSpillPlacements - determine which CSRs should be spilled |
| /// in MBB using AnticIn sets of MBB's predecessors, keeping track |
| /// of changes to spilled reg sets. Add MBB to the set of blocks |
| /// that need to be processed for propagating use info to cover |
| /// multi-entry/exit regions. |
| /// |
| bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, |
| SmallVector<MachineBasicBlock*, 4> &blks, |
| CSRegBlockMap &prevSpills) { |
| bool placedSpills = false; |
| // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) |
| CSRegSet anticInPreds; |
| SmallVector<MachineBasicBlock*, 4> predecessors; |
| for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), |
| PE = MBB->pred_end(); PI != PE; ++PI) { |
| MachineBasicBlock* PRED = *PI; |
| if (PRED != MBB) |
| predecessors.push_back(PRED); |
| } |
| unsigned i = 0, e = predecessors.size(); |
| if (i != e) { |
| MachineBasicBlock* PRED = predecessors[i]; |
| anticInPreds = UsedCSRegs - AnticIn[PRED]; |
| for (++i; i != e; ++i) { |
| PRED = predecessors[i]; |
| anticInPreds &= (UsedCSRegs - AnticIn[PRED]); |
| } |
| } else { |
| // Handle uses in entry blocks (which have no predecessors). |
| // This is necessary because the DFA formulation assumes the |
| // entry and (multiple) exit nodes cannot have CSR uses, which |
| // is not the case in the real world. |
| anticInPreds = UsedCSRegs; |
| } |
| // Compute spills required at MBB: |
| CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; |
| |
| if (! CSRSave[MBB].empty()) { |
| if (MBB == EntryBlock) { |
| for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) |
| CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; |
| } else { |
| // Reset all regs spilled in MBB that are also spilled in EntryBlock. |
| if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { |
| CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; |
| } |
| } |
| } |
| placedSpills = (CSRSave[MBB] != prevSpills[MBB]); |
| prevSpills[MBB] = CSRSave[MBB]; |
| // Remember this block for adding restores to successor |
| // blocks for multi-entry region. |
| if (placedSpills) |
| blks.push_back(MBB); |
| |
| DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) |
| dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " |
| << stringifyCSRegSet(CSRSave[MBB]) << "\n"); |
| |
| return placedSpills; |
| } |
| |
| /// calcRestorePlacements - determine which CSRs should be restored |
| /// in MBB using AvailOut sets of MBB's succcessors, keeping track |
| /// of changes to restored reg sets. Add MBB to the set of blocks |
| /// that need to be processed for propagating use info to cover |
| /// multi-entry/exit regions. |
| /// |
| bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, |
| SmallVector<MachineBasicBlock*, 4> &blks, |
| CSRegBlockMap &prevRestores) { |
| bool placedRestores = false; |
| // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) |
| CSRegSet availOutSucc; |
| SmallVector<MachineBasicBlock*, 4> successors; |
| for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), |
| SE = MBB->succ_end(); SI != SE; ++SI) { |
| MachineBasicBlock* SUCC = *SI; |
| if (SUCC != MBB) |
| successors.push_back(SUCC); |
| } |
| unsigned i = 0, e = successors.size(); |
| if (i != e) { |
| MachineBasicBlock* SUCC = successors[i]; |
| availOutSucc = UsedCSRegs - AvailOut[SUCC]; |
| for (++i; i != e; ++i) { |
| SUCC = successors[i]; |
| availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); |
| } |
| } else { |
| if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { |
| // Handle uses in return blocks (which have no successors). |
| // This is necessary because the DFA formulation assumes the |
| // entry and (multiple) exit nodes cannot have CSR uses, which |
| // is not the case in the real world. |
| availOutSucc = UsedCSRegs; |
| } |
| } |
| // Compute restores required at MBB: |
| CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; |
| |
| // Postprocess restore placements at MBB. |
| // Remove the CSRs that are restored in the return blocks. |
| // Lest this be confusing, note that: |
| // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. |
| if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { |
| if (! CSRSave[EntryBlock].empty()) |
| CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; |
| } |
| placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); |
| prevRestores[MBB] = CSRRestore[MBB]; |
| // Remember this block for adding saves to predecessor |
| // blocks for multi-entry region. |
| if (placedRestores) |
| blks.push_back(MBB); |
| |
| DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) |
| dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " |
| << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); |
| |
| return placedRestores; |
| } |
| |
| /// placeSpillsAndRestores - place spills and restores of CSRs |
| /// used in MBBs in minimal regions that contain the uses. |
| /// |
| void PEI::placeSpillsAndRestores(MachineFunction &Fn) { |
| CSRegBlockMap prevCSRSave; |
| CSRegBlockMap prevCSRRestore; |
| SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; |
| bool changed = true; |
| unsigned iterations = 0; |
| |
| // Iterate computation of spill and restore placements in the MCFG until: |
| // 1. CSR use info has been fully propagated around the MCFG, and |
| // 2. computation of CSRSave[], CSRRestore[] reach fixed points. |
| while (changed) { |
| changed = false; |
| ++iterations; |
| |
| DEBUG(if (ShrinkWrapDebugging >= Iterations) |
| dbgs() << "iter " << iterations |
| << " --------------------------------------------------\n"); |
| |
| // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, |
| // which determines the placements of spills and restores. |
| // Keep track of changes to spills, restores in each iteration to |
| // minimize the total iterations. |
| bool SRChanged = false; |
| for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); |
| MBBI != MBBE; ++MBBI) { |
| MachineBasicBlock* MBB = MBBI; |
| |
| // Place spills for CSRs in MBB. |
| SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); |
| |
| // Place restores for CSRs in MBB. |
| SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); |
| } |
| |
| // Add uses of CSRs used inside loops where needed. |
| changed |= addUsesForTopLevelLoops(cvBlocks); |
| |
| // Add uses for CSRs spilled or restored at branch, join points. |
| if (changed || SRChanged) { |
| while (! cvBlocks.empty()) { |
| MachineBasicBlock* MBB = cvBlocks.pop_back_val(); |
| changed |= addUsesForMEMERegion(MBB, ncvBlocks); |
| } |
| if (! ncvBlocks.empty()) { |
| cvBlocks = ncvBlocks; |
| ncvBlocks.clear(); |
| } |
| } |
| |
| if (changed) { |
| calculateAnticAvail(Fn); |
| CSRSave.clear(); |
| CSRRestore.clear(); |
| } |
| } |
| |
| // Check for effectiveness: |
| // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} |
| // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] |
| // Gives a measure of how many CSR spills have been moved from EntryBlock |
| // to minimal regions enclosing their uses. |
| CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); |
| unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); |
| numSRReduced += numSRReducedThisFunc; |
| DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { |
| dbgs() << "-----------------------------------------------------------\n"; |
| dbgs() << "total iterations = " << iterations << " ( " |
| << Fn.getFunction()->getName() |
| << " " << numSRReducedThisFunc |
| << " " << Fn.size() |
| << " )\n"; |
| dbgs() << "-----------------------------------------------------------\n"; |
| dumpSRSets(); |
| dbgs() << "-----------------------------------------------------------\n"; |
| if (numSRReducedThisFunc) |
| verifySpillRestorePlacement(); |
| }); |
| } |
| |
| // Debugging methods. |
| #ifndef NDEBUG |
| /// findFastExitPath - debugging method used to detect functions |
| /// with at least one path from the entry block to a return block |
| /// directly or which has a very small number of edges. |
| /// |
| void PEI::findFastExitPath() { |
| if (! EntryBlock) |
| return; |
| // Fina a path from EntryBlock to any return block that does not branch: |
| // Entry |
| // | ... |
| // v | |
| // B1<-----+ |
| // | |
| // v |
| // Return |
| for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), |
| SE = EntryBlock->succ_end(); SI != SE; ++SI) { |
| MachineBasicBlock* SUCC = *SI; |
| |
| // Assume positive, disprove existence of fast path. |
| HasFastExitPath = true; |
| |
| // Check the immediate successors. |
| if (isReturnBlock(SUCC)) { |
| if (ShrinkWrapDebugging >= BasicInfo) |
| dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) |
| << "->" << getBasicBlockName(SUCC) << "\n"; |
| break; |
| } |
| // Traverse df from SUCC, look for a branch block. |
| std::string exitPath = getBasicBlockName(SUCC); |
| for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), |
| BE = df_end(SUCC); BI != BE; ++BI) { |
| MachineBasicBlock* SBB = *BI; |
| // Reject paths with branch nodes. |
| if (SBB->succ_size() > 1) { |
| HasFastExitPath = false; |
| break; |
| } |
| exitPath += "->" + getBasicBlockName(SBB); |
| } |
| if (HasFastExitPath) { |
| if (ShrinkWrapDebugging >= BasicInfo) |
| dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) |
| << "->" << exitPath << "\n"; |
| break; |
| } |
| } |
| } |
| |
| /// verifySpillRestorePlacement - check the current spill/restore |
| /// sets for safety. Attempt to find spills without restores or |
| /// restores without spills. |
| /// Spills: walk df from each MBB in spill set ensuring that |
| /// all CSRs spilled at MMBB are restored on all paths |
| /// from MBB to all exit blocks. |
| /// Restores: walk idf from each MBB in restore set ensuring that |
| /// all CSRs restored at MBB are spilled on all paths |
| /// reaching MBB. |
| /// |
| void PEI::verifySpillRestorePlacement() { |
| unsigned numReturnBlocks = 0; |
| for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); |
| MBBI != MBBE; ++MBBI) { |
| MachineBasicBlock* MBB = MBBI; |
| if (isReturnBlock(MBB) || MBB->succ_size() == 0) |
| ++numReturnBlocks; |
| } |
| for (CSRegBlockMap::iterator BI = CSRSave.begin(), |
| BE = CSRSave.end(); BI != BE; ++BI) { |
| MachineBasicBlock* MBB = BI->first; |
| CSRegSet spilled = BI->second; |
| CSRegSet restored; |
| |
| if (spilled.empty()) |
| continue; |
| |
| DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " |
| << stringifyCSRegSet(spilled) |
| << " RESTORE[" << getBasicBlockName(MBB) << "] = " |
| << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); |
| |
| if (CSRRestore[MBB].intersects(spilled)) { |
| restored |= (CSRRestore[MBB] & spilled); |
| } |
| |
| // Walk depth first from MBB to find restores of all CSRs spilled at MBB: |
| // we must find restores for all spills w/no intervening spills on all |
| // paths from MBB to all return blocks. |
| for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), |
| BE = df_end(MBB); BI != BE; ++BI) { |
| MachineBasicBlock* SBB = *BI; |
| if (SBB == MBB) |
| continue; |
| // Stop when we encounter spills of any CSRs spilled at MBB that |
| // have not yet been seen to be restored. |
| if (CSRSave[SBB].intersects(spilled) && |
| !restored.contains(CSRSave[SBB] & spilled)) |
| break; |
| // Collect the CSRs spilled at MBB that are restored |
| // at this DF successor of MBB. |
| if (CSRRestore[SBB].intersects(spilled)) |
| restored |= (CSRRestore[SBB] & spilled); |
| // If we are at a retun block, check that the restores |
| // we have seen so far exhaust the spills at MBB, then |
| // reset the restores. |
| if (isReturnBlock(SBB) || SBB->succ_size() == 0) { |
| if (restored != spilled) { |
| CSRegSet notRestored = (spilled - restored); |
| DEBUG(dbgs() << MF->getFunction()->getName() << ": " |
| << stringifyCSRegSet(notRestored) |
| << " spilled at " << getBasicBlockName(MBB) |
| << " are never restored on path to return " |
| << getBasicBlockName(SBB) << "\n"); |
| } |
| restored.clear(); |
| } |
| } |
| } |
| |
| // Check restore placements. |
| for (CSRegBlockMap::iterator BI = CSRRestore.begin(), |
| BE = CSRRestore.end(); BI != BE; ++BI) { |
| MachineBasicBlock* MBB = BI->first; |
| CSRegSet restored = BI->second; |
| CSRegSet spilled; |
| |
| if (restored.empty()) |
| continue; |
| |
| DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " |
| << stringifyCSRegSet(CSRSave[MBB]) |
| << " RESTORE[" << getBasicBlockName(MBB) << "] = " |
| << stringifyCSRegSet(restored) << "\n"); |
| |
| if (CSRSave[MBB].intersects(restored)) { |
| spilled |= (CSRSave[MBB] & restored); |
| } |
| // Walk inverse depth first from MBB to find spills of all |
| // CSRs restored at MBB: |
| for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), |
| BE = idf_end(MBB); BI != BE; ++BI) { |
| MachineBasicBlock* PBB = *BI; |
| if (PBB == MBB) |
| continue; |
| // Stop when we encounter restores of any CSRs restored at MBB that |
| // have not yet been seen to be spilled. |
| if (CSRRestore[PBB].intersects(restored) && |
| !spilled.contains(CSRRestore[PBB] & restored)) |
| break; |
| // Collect the CSRs restored at MBB that are spilled |
| // at this DF predecessor of MBB. |
| if (CSRSave[PBB].intersects(restored)) |
| spilled |= (CSRSave[PBB] & restored); |
| } |
| if (spilled != restored) { |
| CSRegSet notSpilled = (restored - spilled); |
| DEBUG(dbgs() << MF->getFunction()->getName() << ": " |
| << stringifyCSRegSet(notSpilled) |
| << " restored at " << getBasicBlockName(MBB) |
| << " are never spilled\n"); |
| } |
| } |
| } |
| |
| // Debugging print methods. |
| std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { |
| if (!MBB) |
| return ""; |
| |
| if (MBB->getBasicBlock()) |
| return MBB->getBasicBlock()->getNameStr(); |
| |
| std::ostringstream name; |
| name << "_MBB_" << MBB->getNumber(); |
| return name.str(); |
| } |
| |
| std::string PEI::stringifyCSRegSet(const CSRegSet& s) { |
| const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); |
| const std::vector<CalleeSavedInfo> CSI = |
| MF->getFrameInfo()->getCalleeSavedInfo(); |
| |
| std::ostringstream srep; |
| if (CSI.size() == 0) { |
| srep << "[]"; |
| return srep.str(); |
| } |
| srep << "["; |
| CSRegSet::iterator I = s.begin(), E = s.end(); |
| if (I != E) { |
| unsigned reg = CSI[*I].getReg(); |
| srep << TRI->getName(reg); |
| for (++I; I != E; ++I) { |
| reg = CSI[*I].getReg(); |
| srep << ","; |
| srep << TRI->getName(reg); |
| } |
| } |
| srep << "]"; |
| return srep.str(); |
| } |
| |
| void PEI::dumpSet(const CSRegSet& s) { |
| DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); |
| } |
| |
| void PEI::dumpUsed(MachineBasicBlock* MBB) { |
| DEBUG({ |
| if (MBB) |
| dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " |
| << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; |
| }); |
| } |
| |
| void PEI::dumpAllUsed() { |
| for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); |
| MBBI != MBBE; ++MBBI) { |
| MachineBasicBlock* MBB = MBBI; |
| dumpUsed(MBB); |
| } |
| } |
| |
| void PEI::dumpSets(MachineBasicBlock* MBB) { |
| DEBUG({ |
| if (MBB) |
| dbgs() << getBasicBlockName(MBB) << " | " |
| << stringifyCSRegSet(CSRUsed[MBB]) << " | " |
| << stringifyCSRegSet(AnticIn[MBB]) << " | " |
| << stringifyCSRegSet(AnticOut[MBB]) << " | " |
| << stringifyCSRegSet(AvailIn[MBB]) << " | " |
| << stringifyCSRegSet(AvailOut[MBB]) << "\n"; |
| }); |
| } |
| |
| void PEI::dumpSets1(MachineBasicBlock* MBB) { |
| DEBUG({ |
| if (MBB) |
| dbgs() << getBasicBlockName(MBB) << " | " |
| << stringifyCSRegSet(CSRUsed[MBB]) << " | " |
| << stringifyCSRegSet(AnticIn[MBB]) << " | " |
| << stringifyCSRegSet(AnticOut[MBB]) << " | " |
| << stringifyCSRegSet(AvailIn[MBB]) << " | " |
| << stringifyCSRegSet(AvailOut[MBB]) << " | " |
| << stringifyCSRegSet(CSRSave[MBB]) << " | " |
| << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; |
| }); |
| } |
| |
| void PEI::dumpAllSets() { |
| for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); |
| MBBI != MBBE; ++MBBI) { |
| MachineBasicBlock* MBB = MBBI; |
| dumpSets1(MBB); |
| } |
| } |
| |
| void PEI::dumpSRSets() { |
| DEBUG({ |
| for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); |
| MBB != E; ++MBB) { |
| if (!CSRSave[MBB].empty()) { |
| dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " |
| << stringifyCSRegSet(CSRSave[MBB]); |
| if (CSRRestore[MBB].empty()) |
| dbgs() << '\n'; |
| } |
| |
| if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) |
| dbgs() << " " |
| << "RESTORE[" << getBasicBlockName(MBB) << "] = " |
| << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; |
| } |
| }); |
| } |
| #endif |