| //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// Computations in WWM can overwrite values in inactive channels for |
| /// variables that the register allocator thinks are dead. This pass adds fake |
| /// uses of those variables to WWM instructions to make sure that they aren't |
| /// overwritten. |
| /// |
| /// As an example, consider this snippet: |
| /// %vgpr0 = V_MOV_B32_e32 0.0 |
| /// if (...) { |
| /// %vgpr1 = ... |
| /// %vgpr2 = WWM killed %vgpr1 |
| /// ... = killed %vgpr2 |
| /// %vgpr0 = V_MOV_B32_e32 1.0 |
| /// } |
| /// ... = %vgpr0 |
| /// |
| /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally, |
| /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since |
| /// writing %vgpr1 would only write to channels that would be clobbered by the |
| /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled, |
| /// it would clobber even the inactive channels for which the if-condition is |
| /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use |
| /// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the |
| /// same register. |
| /// |
| /// In general, we need to figure out what registers might have their inactive |
| /// channels which are eventually used accidentally clobbered by a WWM |
| /// instruction. We approximate this using two conditions: |
| /// |
| /// 1. A definition of the variable reaches the WWM instruction. |
| /// 2. The variable would be live at the WWM instruction if all its defs were |
| /// partial defs (i.e. considered as a use), ignoring normal uses. |
| /// |
| /// If a register matches both conditions, then we add an implicit use of it to |
| /// the WWM instruction. Condition #2 is the heart of the matter: every |
| /// definition is really a partial definition, since every VALU instruction is |
| /// implicitly predicated. We can usually ignore this, but WWM forces us not |
| /// to. Condition #1 prevents false positives if the variable is undefined at |
| /// the WWM instruction anyways. This is overly conservative in certain cases, |
| /// especially in uniform control flow, but this is a workaround anyways until |
| /// LLVM gains the notion of predicated uses and definitions of variables. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPU.h" |
| #include "AMDGPUSubtarget.h" |
| #include "SIInstrInfo.h" |
| #include "SIRegisterInfo.h" |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| #include "llvm/ADT/DepthFirstIterator.h" |
| #include "llvm/ADT/SparseBitVector.h" |
| #include "llvm/CodeGen/LiveIntervals.h" |
| #include "llvm/CodeGen/MachineFunctionPass.h" |
| #include "llvm/CodeGen/Passes.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "si-fix-wwm-liveness" |
| |
| namespace { |
| |
| class SIFixWWMLiveness : public MachineFunctionPass { |
| private: |
| LiveIntervals *LIS = nullptr; |
| const SIRegisterInfo *TRI; |
| MachineRegisterInfo *MRI; |
| |
| public: |
| static char ID; |
| |
| SIFixWWMLiveness() : MachineFunctionPass(ID) { |
| initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| bool runOnWWMInstruction(MachineInstr &MI); |
| |
| void addDefs(const MachineInstr &MI, SparseBitVector<> &set); |
| |
| StringRef getPassName() const override { return "SI Fix WWM Liveness"; } |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| // Should preserve the same set that TwoAddressInstructions does. |
| AU.addPreserved<SlotIndexes>(); |
| AU.addPreserved<LiveIntervals>(); |
| AU.addPreservedID(LiveVariablesID); |
| AU.addPreservedID(MachineLoopInfoID); |
| AU.addPreservedID(MachineDominatorsID); |
| AU.setPreservesCFG(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| }; |
| |
| } // End anonymous namespace. |
| |
| INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE, |
| "SI fix WWM liveness", false, false) |
| |
| char SIFixWWMLiveness::ID = 0; |
| |
| char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID; |
| |
| FunctionPass *llvm::createSIFixWWMLivenessPass() { |
| return new SIFixWWMLiveness(); |
| } |
| |
| void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs) |
| { |
| for (const MachineOperand &Op : MI.defs()) { |
| if (Op.isReg()) { |
| unsigned Reg = Op.getReg(); |
| if (TRI->isVGPR(*MRI, Reg)) |
| Regs.set(Reg); |
| } |
| } |
| } |
| |
| bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) { |
| MachineBasicBlock *MBB = WWM.getParent(); |
| |
| // Compute the registers that are live out of MI by figuring out which defs |
| // are reachable from MI. |
| SparseBitVector<> LiveOut; |
| |
| for (auto II = MachineBasicBlock::iterator(WWM), IE = |
| MBB->end(); II != IE; ++II) { |
| addDefs(*II, LiveOut); |
| } |
| |
| for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB), |
| E = df_end(MBB); |
| I != E; ++I) { |
| for (const MachineInstr &MI : **I) { |
| addDefs(MI, LiveOut); |
| } |
| } |
| |
| // Compute the registers that reach MI. |
| SparseBitVector<> Reachable; |
| |
| for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE = |
| MBB->rend(); II != IE; ++II) { |
| addDefs(*II, Reachable); |
| } |
| |
| for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB), |
| E = idf_end(MBB); |
| I != E; ++I) { |
| for (const MachineInstr &MI : **I) { |
| addDefs(MI, Reachable); |
| } |
| } |
| |
| // find the intersection, and add implicit uses. |
| LiveOut &= Reachable; |
| |
| bool Modified = false; |
| for (unsigned Reg : LiveOut) { |
| WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); |
| if (LIS) { |
| // FIXME: is there a better way to update the live interval? |
| LIS->removeInterval(Reg); |
| LIS->createAndComputeVirtRegInterval(Reg); |
| } |
| Modified = true; |
| } |
| |
| return Modified; |
| } |
| |
| bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { |
| bool Modified = false; |
| |
| // This doesn't actually need LiveIntervals, but we can preserve them. |
| LIS = getAnalysisIfAvailable<LiveIntervals>(); |
| |
| const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| const SIInstrInfo *TII = ST.getInstrInfo(); |
| |
| TRI = &TII->getRegisterInfo(); |
| MRI = &MF.getRegInfo(); |
| |
| for (MachineBasicBlock &MBB : MF) { |
| for (MachineInstr &MI : MBB) { |
| if (MI.getOpcode() == AMDGPU::EXIT_WWM) { |
| Modified |= runOnWWMInstruction(MI); |
| } |
| } |
| } |
| |
| return Modified; |
| } |