| //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// Any MIMG instructions that use tfe or lwe require an initialization of the |
| /// result register that will be written in the case of a memory access failure |
| /// The required code is also added to tie this init code to the result of the |
| /// img instruction |
| /// |
| //===----------------------------------------------------------------------===// |
| // |
| |
| #include "AMDGPU.h" |
| #include "AMDGPUSubtarget.h" |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| #include "SIInstrInfo.h" |
| #include "llvm/CodeGen/MachineFunctionPass.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Target/TargetMachine.h" |
| |
| #define DEBUG_TYPE "si-img-init" |
| |
| using namespace llvm; |
| |
| namespace { |
| |
| class SIAddIMGInit : public MachineFunctionPass { |
| public: |
| static char ID; |
| |
| public: |
| SIAddIMGInit() : MachineFunctionPass(ID) { |
| initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.setPreservesCFG(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| }; |
| |
| } // End anonymous namespace. |
| |
| INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) |
| |
| char SIAddIMGInit::ID = 0; |
| |
| char &llvm::SIAddIMGInitID = SIAddIMGInit::ID; |
| |
| FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } |
| |
| bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { |
| MachineRegisterInfo &MRI = MF.getRegInfo(); |
| const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| const SIInstrInfo *TII = ST.getInstrInfo(); |
| const SIRegisterInfo *RI = ST.getRegisterInfo(); |
| bool Changed = false; |
| |
| for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; |
| ++BI) { |
| MachineBasicBlock &MBB = *BI; |
| MachineBasicBlock::iterator I, Next; |
| for (I = MBB.begin(); I != MBB.end(); I = Next) { |
| Next = std::next(I); |
| MachineInstr &MI = *I; |
| |
| auto Opcode = MI.getOpcode(); |
| if (TII->isMIMG(Opcode) && !MI.mayStore()) { |
| MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); |
| MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); |
| MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); |
| |
| // Check for instructions that don't have tfe or lwe fields |
| // There shouldn't be any at this point. |
| assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); |
| |
| unsigned TFEVal = TFE->getImm(); |
| unsigned LWEVal = LWE->getImm(); |
| unsigned D16Val = D16 ? D16->getImm() : 0; |
| |
| if (TFEVal || LWEVal) { |
| // At least one of TFE or LWE are non-zero |
| // We have to insert a suitable initialization of the result value and |
| // tie this to the dest of the image instruction. |
| |
| const DebugLoc &DL = MI.getDebugLoc(); |
| |
| int DstIdx = |
| AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); |
| |
| // Calculate which dword we have to initialize to 0. |
| MachineOperand *MO_Dmask = |
| TII->getNamedOperand(MI, AMDGPU::OpName::dmask); |
| |
| // check that dmask operand is found. |
| assert(MO_Dmask && "Expected dmask operand in instruction"); |
| |
| unsigned dmask = MO_Dmask->getImm(); |
| // Determine the number of active lanes taking into account the |
| // Gather4 special case |
| unsigned ActiveLanes = |
| TII->isGather4(Opcode) ? 4 : countPopulation(dmask); |
| |
| // Subreg indices are counted from 1 |
| // When D16 then we want next whole VGPR after write data. |
| static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected"); |
| |
| bool Packed = !ST.hasUnpackedD16VMem(); |
| |
| unsigned InitIdx = |
| D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; |
| |
| // Abandon attempt if the dst size isn't large enough |
| // - this is in fact an error but this is picked up elsewhere and |
| // reported correctly. |
| uint32_t DstSize = |
| RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; |
| if (DstSize < InitIdx) |
| continue; |
| |
| // Create a register for the intialization value. |
| Register PrevDst = |
| MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); |
| unsigned NewDst = 0; // Final initialized value will be in here |
| |
| // If PRTStrictNull feature is enabled (the default) then initialize |
| // all the result registers to 0, otherwise just the error indication |
| // register (VGPRn+1) |
| unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; |
| unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx; |
| |
| if (DstSize == 1) { |
| // In this case we can just initialize the result directly |
| BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) |
| .addImm(0); |
| NewDst = PrevDst; |
| } else { |
| BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); |
| for (; SizeLeft; SizeLeft--, CurrIdx++) { |
| NewDst = |
| MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); |
| // Initialize dword |
| Register SubReg = |
| MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
| BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) |
| .addImm(0); |
| // Insert into the super-reg |
| BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) |
| .addReg(PrevDst) |
| .addReg(SubReg) |
| .addImm(CurrIdx); |
| |
| PrevDst = NewDst; |
| } |
| } |
| |
| // Add as an implicit operand |
| MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); |
| |
| // Tie the just added implicit operand to the dst |
| MI.tieOperands(DstIdx, MI.getNumOperands() - 1); |
| |
| Changed = true; |
| } |
| } |
| } |
| } |
| |
| return Changed; |
| } |