third_party/llvm-10.0/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp - SwiftShader - Git at Google

 //===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
 /// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
 /// with sequential versions where possible.
 ///
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>

 using namespace llvm;

 #define DEBUG_TYPE "amdgpu-nsa-reassign"

 STATISTIC(NumNSAInstructions,
           "Number of NSA instructions with non-sequential address found");
 STATISTIC(NumNSAConverted,
           "Number of NSA instructions changed to sequential");

 namespace {

 class GCNNSAReassign : public MachineFunctionPass {
 public:
   static char ID;

   GCNNSAReassign() : MachineFunctionPass(ID) {
     initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry());
   }

   bool runOnMachineFunction(MachineFunction &MF) override;

   StringRef getPassName() const override { return "GCN NSA Reassign"; }

   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LiveIntervals>();
     AU.addRequired<VirtRegMap>();
     AU.addRequired<LiveRegMatrix>();
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }

 private:
   typedef enum {
     NOT_NSA,        // Not an NSA instruction
     FIXED,          // NSA which we cannot modify
     NON_CONTIGUOUS, // NSA with non-sequential address which we can try
                     // to optimize.
     CONTIGUOUS      // NSA with all sequential address registers
   } NSA_Status;

   const GCNSubtarget *ST;

   const MachineRegisterInfo *MRI;

   const SIRegisterInfo *TRI;

   VirtRegMap *VRM;

   LiveRegMatrix *LRM;

   LiveIntervals *LIS;

   unsigned MaxNumVGPRs;

   const MCPhysReg *CSRegs;

   NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;

   bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
                           unsigned StartReg) const;

   bool canAssign(unsigned StartReg, unsigned NumRegs) const;

   bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
 };

 } // End anonymous namespace.

 INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
 INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
                     false, false)


 char GCNNSAReassign::ID = 0;

 char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;

 bool
 GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
                                    unsigned StartReg) const {
   unsigned NumRegs = Intervals.size();

   for (unsigned N = 0; N < NumRegs; ++N)
     if (VRM->hasPhys(Intervals[N]->reg))
       LRM->unassign(*Intervals[N]);

   for (unsigned N = 0; N < NumRegs; ++N)
     if (LRM->checkInterference(*Intervals[N], StartReg + N))
       return false;

   for (unsigned N = 0; N < NumRegs; ++N)
     LRM->assign(*Intervals[N], StartReg + N);

   return true;
 }

 bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
   for (unsigned N = 0; N < NumRegs; ++N) {
     unsigned Reg = StartReg + N;
     if (!MRI->isAllocatable(Reg))
       return false;

     for (unsigned I = 0; CSRegs[I]; ++I)
       if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
           !LRM->isPhysRegUsed(CSRegs[I]))
       return false;
   }

   return true;
 }

 bool
 GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
   unsigned NumRegs = Intervals.size();

   if (NumRegs > MaxNumVGPRs)
     return false;
   unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;

   for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
     if (!canAssign(Reg, NumRegs))
       continue;

     if (tryAssignRegisters(Intervals, Reg))
       return true;
   }

   return false;
 }

 GCNNSAReassign::NSA_Status
 GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
   if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
     return NSA_Status::NOT_NSA;

   int VAddr0Idx =
     AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);

   unsigned VgprBase = 0;
   bool NSA = false;
   for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
     const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
     Register Reg = Op.getReg();
     if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
       return NSA_Status::FIXED;

     Register PhysReg = VRM->getPhys(Reg);

     if (!Fast) {
       if (!PhysReg)
         return NSA_Status::FIXED;

       // Bail if address is not a VGPR32. That should be possible to extend the
       // optimization to work with subregs of a wider register tuples, but the
       // logic to find free registers will be much more complicated with much
       // less chances for success. That seems reasonable to assume that in most
       // cases a tuple is used because a vector variable contains different
       // parts of an address and it is either already consequitive or cannot
       // be reassigned if not. If needed it is better to rely on register
       // coalescer to process such address tuples.
       if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
         return NSA_Status::FIXED;

       const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);

       if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
         return NSA_Status::FIXED;

       for (auto U : MRI->use_nodbg_operands(Reg)) {
         if (U.isImplicit())
           return NSA_Status::FIXED;
         const MachineInstr *UseInst = U.getParent();
         if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
           return NSA_Status::FIXED;
       }

       if (!LIS->hasInterval(Reg))
         return NSA_Status::FIXED;
     }

     if (I == 0)
       VgprBase = PhysReg;
     else if (VgprBase + I != PhysReg)
       NSA = true;
   }

   return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
 }

 bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
   ST = &MF.getSubtarget<GCNSubtarget>();
   if (ST->getGeneration() < GCNSubtarget::GFX10)
     return false;

   MRI = &MF.getRegInfo();
   TRI = ST->getRegisterInfo();
   VRM = &getAnalysis<VirtRegMap>();
   LRM = &getAnalysis<LiveRegMatrix>();
   LIS = &getAnalysis<LiveIntervals>();

   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
   MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
   CSRegs = MRI->getCalleeSavedRegs();

   using Candidate = std::pair<const MachineInstr*, bool>;
   SmallVector<Candidate, 32> Candidates;
   for (const MachineBasicBlock &MBB : MF) {
     for (const MachineInstr &MI : MBB) {
       switch (CheckNSA(MI)) {
       default:
         continue;
       case NSA_Status::CONTIGUOUS:
         Candidates.push_back(std::make_pair(&MI, true));
         break;
       case NSA_Status::NON_CONTIGUOUS:
         Candidates.push_back(std::make_pair(&MI, false));
         ++NumNSAInstructions;
         break;
       }
     }
   }

   bool Changed = false;
   for (auto &C : Candidates) {
     if (C.second)
       continue;

     const MachineInstr *MI = C.first;
     if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
       // Already happen to be fixed.
       C.second = true;
       ++NumNSAConverted;
       continue;
     }

     const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
     int VAddr0Idx =
       AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);

     SmallVector<LiveInterval *, 16> Intervals;
     SmallVector<unsigned, 16> OrigRegs;
     SlotIndex MinInd, MaxInd;
     for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
       const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
       Register Reg = Op.getReg();
       LiveInterval *LI = &LIS->getInterval(Reg);
       if (llvm::find(Intervals, LI) != Intervals.end()) {
         // Same register used, unable to make sequential
         Intervals.clear();
         break;
       }
       Intervals.push_back(LI);
       OrigRegs.push_back(VRM->getPhys(Reg));
       MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
       MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
     }

     if (Intervals.empty())
       continue;

     LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
                       << "\tOriginal allocation:\t";
                for(auto *LI : Intervals)
                  dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI);
                dbgs() << '\n');

     bool Success = scavengeRegs(Intervals);
     if (!Success) {
       LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
       if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation.
         continue;
     } else {
       // Check we did not make it worse for other instructions.
       auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
                                 [this](const Candidate &C, SlotIndex I) {
                                   return LIS->getInstructionIndex(*C.first) < I;
                                 });
       for (auto E = Candidates.end(); Success && I != E &&
               LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {
         if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
           Success = false;
           LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
         }
       }
     }

     if (!Success) {
       for (unsigned I = 0; I < Info->VAddrDwords; ++I)
         if (VRM->hasPhys(Intervals[I]->reg))
           LRM->unassign(*Intervals[I]);

       for (unsigned I = 0; I < Info->VAddrDwords; ++I)
         LRM->assign(*Intervals[I], OrigRegs[I]);

       continue;
     }

     C.second = true;
     ++NumNSAConverted;
     LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t ["
                  << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI)
                  << " : "
                  << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI)
                  << "]\n");
     Changed = true;
   }

   return Changed;
 }
	//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
	/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
	/// with sequential versions where possible.
	///
	//===----------------------------------------------------------------------===//

	#include "AMDGPU.h"
	#include "AMDGPUSubtarget.h"
	#include "SIInstrInfo.h"
	#include "SIMachineFunctionInfo.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/CodeGen/LiveInterval.h"
	#include "llvm/CodeGen/LiveIntervals.h"
	#include "llvm/CodeGen/LiveRegMatrix.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/CodeGen/VirtRegMap.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Support/MathExtras.h"
	#include <algorithm>

	using namespace llvm;

	#define DEBUG_TYPE "amdgpu-nsa-reassign"

	STATISTIC(NumNSAInstructions,
	"Number of NSA instructions with non-sequential address found");
	STATISTIC(NumNSAConverted,
	"Number of NSA instructions changed to sequential");

	namespace {

	class GCNNSAReassign : public MachineFunctionPass {
	public:
	static char ID;

	GCNNSAReassign() : MachineFunctionPass(ID) {
	initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry());
	}

	bool runOnMachineFunction(MachineFunction &MF) override;

	StringRef getPassName() const override { return "GCN NSA Reassign"; }

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<LiveIntervals>();
	AU.addRequired<VirtRegMap>();
	AU.addRequired<LiveRegMatrix>();
	AU.setPreservesAll();
	MachineFunctionPass::getAnalysisUsage(AU);
	}

	private:
	typedef enum {
	NOT_NSA, // Not an NSA instruction
	FIXED, // NSA which we cannot modify
	NON_CONTIGUOUS, // NSA with non-sequential address which we can try
	// to optimize.
	CONTIGUOUS // NSA with all sequential address registers
	} NSA_Status;

	const GCNSubtarget *ST;

	const MachineRegisterInfo *MRI;

	const SIRegisterInfo *TRI;

	VirtRegMap *VRM;

	LiveRegMatrix *LRM;

	LiveIntervals *LIS;

	unsigned MaxNumVGPRs;

	const MCPhysReg *CSRegs;

	NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;

	bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
	unsigned StartReg) const;

	bool canAssign(unsigned StartReg, unsigned NumRegs) const;

	bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
	};

	} // End anonymous namespace.

	INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
	false, false)
	INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
	INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
	INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
	INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
	false, false)


	char GCNNSAReassign::ID = 0;

	char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;

	bool
	GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
	unsigned StartReg) const {
	unsigned NumRegs = Intervals.size();

	for (unsigned N = 0; N < NumRegs; ++N)
	if (VRM->hasPhys(Intervals[N]->reg))
	LRM->unassign(*Intervals[N]);

	for (unsigned N = 0; N < NumRegs; ++N)
	if (LRM->checkInterference(*Intervals[N], StartReg + N))
	return false;

	for (unsigned N = 0; N < NumRegs; ++N)
	LRM->assign(*Intervals[N], StartReg + N);

	return true;
	}

	bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
	for (unsigned N = 0; N < NumRegs; ++N) {
	unsigned Reg = StartReg + N;
	if (!MRI->isAllocatable(Reg))
	return false;

	for (unsigned I = 0; CSRegs[I]; ++I)
	if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
	!LRM->isPhysRegUsed(CSRegs[I]))
	return false;
	}

	return true;
	}

	bool
	GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
	unsigned NumRegs = Intervals.size();

	if (NumRegs > MaxNumVGPRs)
	return false;
	unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;

	for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
	if (!canAssign(Reg, NumRegs))
	continue;

	if (tryAssignRegisters(Intervals, Reg))
	return true;
	}

	return false;
	}

	GCNNSAReassign::NSA_Status
	GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
	if (!Info \|\| Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
	return NSA_Status::NOT_NSA;

	int VAddr0Idx =
	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);

	unsigned VgprBase = 0;
	bool NSA = false;
	for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
	const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
	Register Reg = Op.getReg();
	if (Register::isPhysicalRegister(Reg) \|\| !VRM->isAssignedReg(Reg))
	return NSA_Status::FIXED;

	Register PhysReg = VRM->getPhys(Reg);

	if (!Fast) {
	if (!PhysReg)
	return NSA_Status::FIXED;

	// Bail if address is not a VGPR32. That should be possible to extend the
	// optimization to work with subregs of a wider register tuples, but the
	// logic to find free registers will be much more complicated with much
	// less chances for success. That seems reasonable to assume that in most
	// cases a tuple is used because a vector variable contains different
	// parts of an address and it is either already consequitive or cannot
	// be reassigned if not. If needed it is better to rely on register
	// coalescer to process such address tuples.
	if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass \|\| Op.getSubReg())
	return NSA_Status::FIXED;

	const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);

	if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
	return NSA_Status::FIXED;

	for (auto U : MRI->use_nodbg_operands(Reg)) {
	if (U.isImplicit())
	return NSA_Status::FIXED;
	const MachineInstr *UseInst = U.getParent();
	if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
	return NSA_Status::FIXED;
	}

	if (!LIS->hasInterval(Reg))
	return NSA_Status::FIXED;
	}

	if (I == 0)
	VgprBase = PhysReg;
	else if (VgprBase + I != PhysReg)
	NSA = true;
	}

	return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
	}

	bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
	ST = &MF.getSubtarget<GCNSubtarget>();
	if (ST->getGeneration() < GCNSubtarget::GFX10)
	return false;

	MRI = &MF.getRegInfo();
	TRI = ST->getRegisterInfo();
	VRM = &getAnalysis<VirtRegMap>();
	LRM = &getAnalysis<LiveRegMatrix>();
	LIS = &getAnalysis<LiveIntervals>();

	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
	MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
	MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
	CSRegs = MRI->getCalleeSavedRegs();

	using Candidate = std::pair<const MachineInstr*, bool>;
	SmallVector<Candidate, 32> Candidates;
	for (const MachineBasicBlock &MBB : MF) {
	for (const MachineInstr &MI : MBB) {
	switch (CheckNSA(MI)) {
	default:
	continue;
	case NSA_Status::CONTIGUOUS:
	Candidates.push_back(std::make_pair(&MI, true));
	break;
	case NSA_Status::NON_CONTIGUOUS:
	Candidates.push_back(std::make_pair(&MI, false));
	++NumNSAInstructions;
	break;
	}
	}
	}

	bool Changed = false;
	for (auto &C : Candidates) {
	if (C.second)
	continue;

	const MachineInstr *MI = C.first;
	if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
	// Already happen to be fixed.
	C.second = true;
	++NumNSAConverted;
	continue;
	}

	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
	int VAddr0Idx =
	AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);

	SmallVector<LiveInterval *, 16> Intervals;
	SmallVector<unsigned, 16> OrigRegs;
	SlotIndex MinInd, MaxInd;
	for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
	const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
	Register Reg = Op.getReg();
	LiveInterval *LI = &LIS->getInterval(Reg);
	if (llvm::find(Intervals, LI) != Intervals.end()) {
	// Same register used, unable to make sequential
	Intervals.clear();
	break;
	}
	Intervals.push_back(LI);
	OrigRegs.push_back(VRM->getPhys(Reg));
	MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
	MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
	}

	if (Intervals.empty())
	continue;

	LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
	<< "\tOriginal allocation:\t";
	for(auto *LI : Intervals)
	dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI);
	dbgs() << '\n');

	bool Success = scavengeRegs(Intervals);
	if (!Success) {
	LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
	if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation.
	continue;
	} else {
	// Check we did not make it worse for other instructions.
	auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
	[this](const Candidate &C, SlotIndex I) {
	return LIS->getInstructionIndex(*C.first) < I;
	});
	for (auto E = Candidates.end(); Success && I != E &&
	LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {
	if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
	Success = false;
	LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
	}
	}
	}

	if (!Success) {
	for (unsigned I = 0; I < Info->VAddrDwords; ++I)
	if (VRM->hasPhys(Intervals[I]->reg))
	LRM->unassign(*Intervals[I]);

	for (unsigned I = 0; I < Info->VAddrDwords; ++I)
	LRM->assign(*Intervals[I], OrigRegs[I]);

	continue;
	}

	C.second = true;
	++NumNSAConverted;
	LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t ["
	<< llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI)
	<< " : "
	<< llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI)
	<< "]\n");
	Changed = true;
	}

	return Changed;
	}