blob: 69b2adcd5f9e8670f7a475bb037a7b0a55873f7b [file] [log] [blame]
//===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file Pass to preconfig the shape of physical tile registers
/// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
/// walk each instruction of basic block in reverse order. All the tile
/// registers that live out the basic block would be spilled and reloaded
/// before its user. It also check the depenedency of the shape to ensure
/// the shape is defined before ldtilecfg.
//
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "fastpretileconfig"
STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads, "Number of loads added");
namespace {
class X86FastPreTileConfig : public MachineFunctionPass {
MachineFunction *MF = nullptr;
const X86Subtarget *ST = nullptr;
const TargetInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
X86MachineFunctionInfo *X86FI = nullptr;
MachineFrameInfo *MFI = nullptr;
const TargetRegisterInfo *TRI = nullptr;
MachineBasicBlock *MBB = nullptr;
int CfgSS = -1;
struct PHIInfo {
Register Row;
Register Col;
Register StackAddr;
};
DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
/// Maps virtual regs to the frame index where these values are spilled.
IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
/// Has a bit set for tile virtual register for which it was determined
/// that it is alive across blocks.
BitVector MayLiveAcrossBlocks;
int getStackSpaceFor(Register VirtReg);
void InitializeTileConfigStackSpace();
bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
MachineOperand *RowMO, MachineOperand *ColMO);
void canonicalizePHIs(MachineBasicBlock &MBB);
void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
void convertPHIs(MachineBasicBlock &MBB);
bool configBasicBlock(MachineBasicBlock &MBB);
public:
X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
/// Return the pass name.
StringRef getPassName() const override {
return "Fast Tile Register Preconfigure";
}
/// Perform tile register configure.
bool runOnMachineFunction(MachineFunction &MFunc) override;
static char ID;
};
} // end anonymous namespace
char X86FastPreTileConfig::ID = 0;
INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
"Fast Tile Register Preconfigure", false, false)
INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
"Fast Tile Register Preconfigure", false, false)
static bool dominates(MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator A,
MachineBasicBlock::const_iterator B) {
auto MBBEnd = MBB.end();
if (B == MBBEnd)
return true;
MachineBasicBlock::const_iterator I = MBB.begin();
for (; &*I != A && &*I != B; ++I)
;
return &*I == A;
}
/// This allocates space for the specified virtual register to be held on the
/// stack.
int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
// Find the location Reg would belong...
int SS = StackSlotForVirtReg[VirtReg];
// Already has space allocated?
if (SS != -1)
return SS;
// Allocate a new stack object for this spill location...
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
unsigned Size = TRI->getSpillSize(RC);
Align Alignment = TRI->getSpillAlign(RC);
int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
// Assign the slot.
StackSlotForVirtReg[VirtReg] = FrameIdx;
return FrameIdx;
}
/// Returns false if \p VirtReg is known to not live out of the current config.
/// If \p VirtReg live out of the current MBB, it must live out of the current
/// config
bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
return true;
for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
if (UseInst.getParent() != MBB) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
}
// The use and def are in the same MBB. If the tile register is
// reconfigured, it is crobbered and we need to spill and reload
// tile register.
if (CfgMI) {
if (dominates(*MBB, *CfgMI, UseInst)) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
}
}
}
return false;
}
void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
MachineBasicBlock &MBB = MF->front();
MachineInstr *MI = &*MBB.getFirstNonPHI();
DebugLoc DL;
if (ST->hasAVX512()) {
Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
.addReg(Zmm);
} else if (ST->hasAVX2()) {
Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
.addReg(Ymm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
32)
.addReg(Ymm);
} else {
assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
.addReg(Xmm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
.addReg(Xmm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
.addReg(Xmm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
.addReg(Xmm);
}
// Fill in the palette first.
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
.addImm(1);
}
/// Insert spill instruction for \p AssignedReg before \p Before.
/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
Register VirtReg, bool Kill) {
LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
int FI = getStackSpaceFor(VirtReg);
LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
// Don't need shape information for tile store, becasue it is adjacent to
// the tile def instruction.
TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,
Register());
++NumStores;
// TODO: update DBG_VALUEs
}
/// Insert reload instruction for \p PhysReg before \p Before.
void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
Register OrigReg, MachineOperand *RowMO,
MachineOperand *ColMO) {
int FI = getStackSpaceFor(OrigReg);
const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
Register TileReg;
// Fold copy to tileload
// BB1:
// spill src to s
//
// BB2:
// t = copy src
// -->
// t = tileload (s)
if (UseMI->isCopy())
TileReg = UseMI->getOperand(0).getReg();
else
TileReg = MRI->createVirtualRegister(&RC);
// Can't use TII->loadRegFromStackSlot(), because we need the shape
// information for reload.
// tileloadd (%sp, %idx), %tmm
unsigned Opc = X86::PTILELOADDV;
Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
// FIXME: MBB is not the parent of UseMI.
MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
TII->get(X86::MOV64ri), StrideReg)
.addImm(64);
NewMI = addFrameReference(
BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
.addReg(RowMO->getReg())
.addReg(ColMO->getReg()),
FI);
MachineOperand &MO = NewMI->getOperand(5);
MO.setReg(StrideReg);
MO.setIsKill(true);
RowMO->setIsKill(false);
ColMO->setIsKill(false);
// Erase copy instruction after it is folded.
if (UseMI->isCopy()) {
UseMI->eraseFromParent();
} else {
// Replace the register in the user MI.
for (auto &MO : UseMI->operands()) {
if (MO.isReg() && MO.getReg() == OrigReg)
MO.setReg(TileReg);
}
}
++NumLoads;
LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
<< printReg(TileReg, TRI) << '\n');
}
static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
// The instruction must have 3 operands: tile def, row, col.
if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
return false;
MachineOperand &MO = MI.getOperand(0);
if (MO.isReg()) {
Register Reg = MO.getReg();
// FIXME it may be used after Greedy RA and the physical
// register is not rewritten yet.
if (Reg.isVirtual() &&
MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
return true;
if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
return true;
}
return false;
}
static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
MachineInstr *MI = MRI->getVRegDef(TileReg);
if (isTileDef(MRI, *MI)) {
MachineOperand *RowMO = &MI->getOperand(1);
MachineOperand *ColMO = &MI->getOperand(2);
return ShapeT(RowMO, ColMO, MRI);
} else if (MI->isCopy()) {
TileReg = MI->getOperand(1).getReg();
return getShape(MRI, TileReg);
}
// The def should not be PHI node, because we walk the MBB in reverse post
// order.
assert(MI->isPHI() && "Unexpected PHI when get shape.");
llvm_unreachable("Unexpected MI when get shape.");
}
// BB0:
// spill t0 to s0
// BB1:
// spill t1 to s1
//
// BB2:
// t = phi [t0, bb0] [t1, bb1]
// -->
// row = phi [r0, bb0] [r1, bb1]
// col = phi [c0, bb0] [c1, bb1]
// s = phi [s0, bb0] [s1, bb1]
// t = tileload row, col, s
// The new instruction is inserted at the end of the phi node. The order
// of the original phi node is not ensured.
void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
MachineInstr &PHI) {
// 1. Create instruction to get stack slot address of each incoming block.
// 2. Create PHI node for the stack address.
// 3. Create PHI node for shape. If one of the incoming shape is immediate
// use the immediate and delete the PHI node.
// 4. Create tileload instruction from the stack address.
Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
TII->get(X86::PHI), StackAddrReg);
Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
TII->get(X86::PHI), RowReg);
Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
TII->get(X86::PHI), ColReg);
// Record the mapping of phi node and its row/column information.
VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
// Get the 2 incoming value of tile register and MBB.
Register InTileReg = PHI.getOperand(I).getReg();
// Mark it as liveout, so that it will be spilled when visit
// the incoming MBB. Otherwise since phi will be deleted, it
// would miss spill when visit incoming MBB.
MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
MachineBasicBlock::iterator InsertPos;
if (TileDefMI->isPHI()) {
InsertPos = TileDefMI->getParent()->getFirstNonPHI();
if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
// def t1
// / \
// def t2 t3 = phi(t1, t4) <--
// \ / |
// t4 = phi(t2, t3)-------------
//
// For each (row, column and stack address) append phi incoming value.
// Create r3 = phi(r1, r4)
// Create r4 = phi(r2, r3)
Register InRowReg = VisitedPHIs[TileDefMI].Row;
Register InColReg = VisitedPHIs[TileDefMI].Col;
Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
RowPHI.addReg(InRowReg).addMBB(InMBB);
ColPHI.addReg(InColReg).addMBB(InMBB);
AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
continue;
} else {
// Recursively convert PHI to tileload
convertPHI(TileDefMI->getParent(), *TileDefMI);
// The PHI node is coverted to tileload instruction. Get the stack
// address from tileload operands.
MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
Register InRowReg = TileLoad->getOperand(1).getReg();
Register InColReg = TileLoad->getOperand(2).getReg();
Register InStackAddrReg = TileLoad->getOperand(3).getReg();
RowPHI.addReg(InRowReg).addMBB(InMBB);
ColPHI.addReg(InColReg).addMBB(InMBB);
AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
}
} else {
InsertPos = TileDefMI->getIterator();
// Fill the incoming operand of row/column phi instruction.
ShapeT Shape = getShape(MRI, InTileReg);
Shape.getRow()->setIsKill(false);
Shape.getCol()->setIsKill(false);
RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
// The incoming tile register live out of its def BB, it would be spilled.
// Create MI to get the spill stack slot address for the tile register
int FI = getStackSpaceFor(InTileReg);
Register InStackAddrReg =
MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
TII->get(X86::LEA64r), InStackAddrReg)
.addFrameIndex(FI),
0);
AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
}
}
MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
.addImm(64);
Register TileReg = PHI.getOperand(0).getReg();
MachineInstr *NewMI = addDirectMem(
BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
.addReg(RowReg)
.addReg(ColReg),
StackAddrReg);
MachineOperand &MO = NewMI->getOperand(5);
MO.setReg(StrideReg);
MO.setIsKill(true);
PHI.eraseFromParent();
VisitedPHIs.erase(&PHI);
}
static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
MachineOperand &MO = MI.getOperand(0);
if (MO.isReg() && MO.getReg().isVirtual() &&
MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
return true;
return false;
}
void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 8> PHIs;
for (MachineInstr &MI : MBB) {
if (!MI.isPHI())
break;
if (!isTileRegDef(MRI, MI))
continue;
PHIs.push_back(&MI);
}
// Canonicalize the phi node first. One tile phi may depeneds previous
// phi node. For below case, we need convert %t4.
//
// BB0:
// %t3 = phi (t1 BB1, t2 BB0)
// %t4 = phi (t5 BB1, t3 BB0)
// -->
// %t3 = phi (t1 BB1, t2 BB0)
// %t4 = phi (t5 BB1, t2 BB0)
//
while (!PHIs.empty()) {
MachineInstr *PHI = PHIs.pop_back_val();
// Find the operand that is incoming from the same MBB and the def
// is also phi node.
MachineOperand *InMO = nullptr;
MachineInstr *DefMI = nullptr;
for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
Register InTileReg = PHI->getOperand(I).getReg();
MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
DefMI = MRI->getVRegDef(InTileReg);
if (InMBB != &MBB || !DefMI->isPHI())
continue;
InMO = &PHI->getOperand(I);
break;
}
// If can't find such operand, do nothing.
if (!InMO)
continue;
// Current phi node depends on previous phi node. Break the
// dependency.
Register DefTileReg;
for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
if (InMBB != &MBB)
continue;
DefTileReg = DefMI->getOperand(I).getReg();
InMO->setReg(DefTileReg);
break;
}
}
}
void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 8> PHIs;
for (MachineInstr &MI : MBB) {
if (!MI.isPHI())
break;
if (!isTileRegDef(MRI, MI))
continue;
PHIs.push_back(&MI);
}
while (!PHIs.empty()) {
MachineInstr *MI = PHIs.pop_back_val();
VisitedPHIs.clear();
convertPHI(&MBB, *MI);
}
}
// PreTileConfig should configure the tile registers based on basic
// block.
bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
bool Change = false;
MachineInstr *LastShapeMI = nullptr;
MachineInstr *LastTileCfg = nullptr;
bool HasUnconfigTile = false;
auto Config = [&](MachineInstr &Before) {
if (CfgSS == -1)
CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
ST->getTileConfigAlignment(), false);
LastTileCfg = addFrameReference(
BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
LastShapeMI = nullptr;
Change = true;
};
auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
if (Reg.isVirtual() &&
MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
return true;
}
return false;
};
for (MachineInstr &MI : reverse(MBB)) {
// We have transformed phi node before configuring BB.
if (MI.isPHI())
break;
// Don't collect the shape of used tile, the tile should be defined
// before the tile use. Spill and reload would happen if there is only
// tile use after ldtilecfg, so the shape can be collected from reload.
// Take below code for example. %t would be reloaded before tilestore
// call
// ....
// tilestore %r, %c, %t
// -->
// call
// ldtilecfg
// %t = tileload %r, %c
// tilestore %r, %c, %t
if (HasTileOperand(MRI, MI))
HasUnconfigTile = true;
// According to AMX ABI, all the tile registers including config register
// are volatile. Caller need to save/restore config register.
if (MI.isCall() && HasUnconfigTile) {
MachineBasicBlock::iterator I;
if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
I = ++LastShapeMI->getIterator();
else
I = ++MI.getIterator();
Config(*I);
HasUnconfigTile = false;
continue;
}
if (!isTileDef(MRI, MI))
continue;
//
//---------------------------------------------------------------------
// Don't handle COPY instruction. If the src and dst of the COPY can be
// in the same config in below case, we just check the shape of t0.
// def row0
// def col0
// ldtilecfg
// t0 = tielzero(row0, col0)
// t1 = copy t0
// ...
// If the src and dst of the COPY can NOT be in the same config in below
// case. Reload would be generated befor the copy instruction.
// def row0
// def col0
// t0 = tielzero(row0, col0)
// spill t0
// ...
// def row1
// def col1
// ldtilecfg
// t1 = tilezero(row1, col1)
// reload t0
// t1 = copy t0
//---------------------------------------------------------------------
//
// If MI dominate the last shape def instruction, we need insert
// ldtilecfg after LastShapeMI now. The config doesn't include
// current MI.
// def row0
// def col0
// tilezero(row0, col0) <- MI
// def row1
// def col1
// ldtilecfg <- insert
// tilezero(row1, col1)
if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
Config(*(++LastShapeMI->getIterator()));
MachineOperand *RowMO = &MI.getOperand(1);
MachineOperand *ColMO = &MI.getOperand(2);
MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
// If the shape is defined in current MBB, check the domination.
// FIXME how about loop?
if (RowMI->getParent() == &MBB) {
if (!LastShapeMI)
LastShapeMI = RowMI;
else if (dominates(MBB, LastShapeMI, RowMI))
LastShapeMI = RowMI;
}
if (ColMI->getParent() == &MBB) {
if (!LastShapeMI)
LastShapeMI = ColMI;
else if (dominates(MBB, LastShapeMI, ColMI))
LastShapeMI = ColMI;
}
// If there is user live out of the tilecfg, spill it and reload in
// before the user.
Register TileReg = MI.getOperand(0).getReg();
if (mayLiveOut(TileReg, LastTileCfg))
spill(++MI.getIterator(), TileReg, false);
for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
if (UseMI.getParent() == &MBB) {
// check user should not across ldtilecfg
if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
continue;
// reload befor UseMI
reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
} else {
// Don't reload for phi instruction, we handle phi reload separately.
// TODO: merge the reload for the same user MBB.
if (!UseMI.isPHI())
reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
}
}
}
// Configure tile registers at the head of the MBB
if (HasUnconfigTile) {
MachineInstr *Before;
if (LastShapeMI == nullptr || LastShapeMI->isPHI())
Before = &*MBB.getFirstNonPHI();
else
Before = &*(++LastShapeMI->getIterator());
Config(*Before);
}
return Change;
}
bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
MF = &MFunc;
MRI = &MFunc.getRegInfo();
ST = &MFunc.getSubtarget<X86Subtarget>();
TII = ST->getInstrInfo();
X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
MFI = &MFunc.getFrameInfo();
TRI = ST->getRegisterInfo();
CfgSS = -1;
unsigned NumVirtRegs = MRI->getNumVirtRegs();
// Abandon early if there is no tile register to config.
bool HasVirtTileReg = false;
for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
Register VirtReg = Register::index2VirtReg(I);
if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
HasVirtTileReg = true;
break;
}
}
if (!HasVirtTileReg)
return false;
StackSlotForVirtReg.resize(NumVirtRegs);
MayLiveAcrossBlocks.clear();
// We will create register during config. *3 is to make sure
// the virtual register number doesn't exceed the size of
// the bit vector.
MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
bool Change = false;
assert(MRI->isSSA());
// Canonicalize the phi node first.
for (MachineBasicBlock &MBB : MFunc)
canonicalizePHIs(MBB);
// Loop over all of the basic blocks in reverse post order and insert
// ldtilecfg for tile registers. The reserse post order is to facilitate
// PHI node convert.
ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
for (MachineBasicBlock *MBB : RPOT) {
convertPHIs(*MBB);
Change |= configBasicBlock(*MBB);
}
if (Change)
InitializeTileConfigStackSpace();
StackSlotForVirtReg.clear();
return Change;
}
FunctionPass *llvm::createX86FastPreTileConfigPass() {
return new X86FastPreTileConfig();
}