Update SwiftShader to April code dump.
April code dump from Transgaming. Adds new shader compiler.
diff --git a/src/LLVM/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/src/LLVM/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 62e40e9..25842a7 100644
--- a/src/LLVM/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/src/LLVM/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "post-RA-sched"
#include "AggressiveAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -114,12 +115,13 @@
AggressiveAntiDepBreaker::
AggressiveAntiDepBreaker(MachineFunction& MFi,
- TargetSubtarget::RegClassVector& CriticalPathRCs) :
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs) :
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
- AllocatableSet(TRI->getAllocatableSet(MF)),
+ RegClassInfo(RCI),
State(NULL) {
/* Collect a bitset of all registers that are only broken if they
are on the critical path. */
@@ -155,16 +157,11 @@
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
- unsigned Reg = *I;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- State->UnionGroups(AliasReg, 0);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
+ for (const unsigned *Alias = TRI->getOverlaps(*I);
+ unsigned Reg = *Alias; ++Alias) {
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
}
}
}
@@ -176,16 +173,11 @@
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- State->UnionGroups(AliasReg, 0);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
+ for (const unsigned *Alias = TRI->getOverlaps(*I);
+ unsigned Reg = *Alias; ++Alias) {
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
}
}
@@ -197,12 +189,8 @@
for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ unsigned AliasReg = *Alias; ++Alias) {
State->UnionGroups(AliasReg, 0);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
@@ -371,7 +359,7 @@
RegRefs = State->GetRegRefs();
// Handle dead defs by simulating a last-use of the register just
- // after the def. A dead def can occur because the def is truely
+ // after the def. A dead def can occur because the def is truly
// dead, or because only a subregister is live at the def. If we
// don't do this the dead def will be incorrectly merged into the
// previous def.
@@ -416,7 +404,7 @@
// Note register reference...
const TargetRegisterClass *RC = NULL;
if (i < MI->getDesc().getNumOperands())
- RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+ RC = TII->getRegClass(MI->getDesc(), i, TRI);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
RegRefs.insert(std::make_pair(Reg, RR));
}
@@ -435,12 +423,9 @@
continue;
// Update def for Reg and aliases.
- DefIndices[Reg] = Count;
- for (const unsigned *Alias = TRI->getAliasSet(Reg);
- *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ unsigned AliasReg = *Alias; ++Alias)
DefIndices[AliasReg] = Count;
- }
}
}
@@ -494,7 +479,7 @@
// Note register reference...
const TargetRegisterClass *RC = NULL;
if (i < MI->getDesc().getNumOperands())
- RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+ RC = TII->getRegClass(MI->getDesc(), i, TRI);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
RegRefs.insert(std::make_pair(Reg, RR));
}
@@ -635,9 +620,8 @@
const TargetRegisterClass *SuperRC =
TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
- const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
- const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
- if (RB == RE) {
+ ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC);
+ if (Order.empty()) {
DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
return false;
}
@@ -645,15 +629,17 @@
DEBUG(dbgs() << "\tFind Registers:");
if (RenameOrder.count(SuperRC) == 0)
- RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE));
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
- const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC];
- const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR);
- TargetRegisterClass::iterator R = OrigR;
+ unsigned OrigR = RenameOrder[SuperRC];
+ unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+ unsigned R = OrigR;
do {
- if (R == RB) R = RE;
+ if (R == 0) R = Order.size();
--R;
- const unsigned NewSuperReg = *R;
+ const unsigned NewSuperReg = Order[R];
+ // Don't consider non-allocatable registers
+ if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
// Don't replace a register with itself.
if (NewSuperReg == SuperReg) continue;
@@ -734,7 +720,9 @@
const std::vector<SUnit>& SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
- unsigned InsertPosIndex) {
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+
std::vector<unsigned> &KillIndices = State->GetKillIndices();
std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
@@ -832,7 +820,7 @@
DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
- if (!AllocatableSet.test(AntiDepReg)) {
+ if (!RegClassInfo.isAllocatable(AntiDepReg)) {
// Don't break anti-dependencies on non-allocatable registers.
DEBUG(dbgs() << " (non-allocatable)\n");
continue;
@@ -938,14 +926,10 @@
// sure to update that as well.
const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
if (!SU) continue;
- for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
- MachineInstr *DI = SU->DbgInstrList[i];
- assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
- DI->getOperand(0).getReg()
- && "Non register dbg_value attached to SUnit!");
- if (DI->getOperand(0).getReg() == AntiDepReg)
- DI->getOperand(0).setReg(NewReg);
- }
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q->second.Operand->getParent())
+ UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
}
// We just went back in time and modified history; the
diff --git a/src/LLVM/lib/CodeGen/AggressiveAntiDepBreaker.h b/src/LLVM/lib/CodeGen/AggressiveAntiDepBreaker.h
index 9d715cc..7067784 100644
--- a/src/LLVM/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/src/LLVM/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -23,13 +23,15 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include <map>
namespace llvm {
+class RegisterClassInfo;
+
/// Class AggressiveAntiDepState
/// Contains all the state necessary for anti-dep breaking.
class AggressiveAntiDepState {
@@ -117,11 +119,7 @@
MachineRegisterInfo &MRI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
-
- /// AllocatableSet - The set of allocatable registers.
- /// We'll be ignoring anti-dependencies on non-allocatable registers,
- /// because they may not be safe to break.
- const BitVector AllocatableSet;
+ const RegisterClassInfo &RegClassInfo;
/// CriticalPathSet - The set of registers that should only be
/// renamed if they are on the critical path.
@@ -133,7 +131,8 @@
public:
AggressiveAntiDepBreaker(MachineFunction& MFi,
- TargetSubtarget::RegClassVector& CriticalPathRCs);
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
~AggressiveAntiDepBreaker();
/// Start - Initialize anti-dep breaking for a new basic block.
@@ -146,7 +145,8 @@
unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
- unsigned InsertPosIndex);
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues);
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
@@ -157,8 +157,8 @@
void FinishBlock();
private:
- typedef std::map<const TargetRegisterClass *,
- TargetRegisterClass::const_iterator> RenameOrderType;
+ /// Keep track of a position in the allocation order for each regclass.
+ typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
/// IsImplicitDefUse - Return true if MO represents a register
/// that is both implicitly used and defined in MI
diff --git a/src/LLVM/lib/CodeGen/AllocationOrder.cpp b/src/LLVM/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 0000000..1005f10
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,79 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "RegisterClassInfo.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo)
+ : Begin(0), End(0), Pos(0), RCI(RegClassInfo), OwnedBegin(false) {
+ const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
+ std::pair<unsigned, unsigned> HintPair =
+ VRM.getRegInfo().getRegAllocationHint(VirtReg);
+
+ // HintPair.second is a register, phys or virt.
+ Hint = HintPair.second;
+
+ // Translate to physreg, or 0 if not assigned yet.
+ if (TargetRegisterInfo::isVirtualRegister(Hint))
+ Hint = VRM.getPhys(Hint);
+
+ // The first hint pair component indicates a target-specific hint.
+ if (HintPair.first) {
+ const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
+ // The remaining allocation order may depend on the hint.
+ ArrayRef<unsigned> Order =
+ TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
+ VRM.getMachineFunction());
+ if (Order.empty())
+ return;
+
+ // Copy the allocation order with reserved registers removed.
+ OwnedBegin = true;
+ unsigned *P = new unsigned[Order.size()];
+ Begin = P;
+ for (unsigned i = 0; i != Order.size(); ++i)
+ if (!RCI.isReserved(Order[i]))
+ *P++ = Order[i];
+ End = P;
+
+ // Target-dependent hints require resolution.
+ Hint = TRI.ResolveRegAllocHint(HintPair.first, Hint,
+ VRM.getMachineFunction());
+ } else {
+ // If there is no hint or just a normal hint, use the cached allocation
+ // order from RegisterClassInfo.
+ ArrayRef<unsigned> O = RCI.getOrder(RC);
+ Begin = O.begin();
+ End = O.end();
+ }
+
+ // The hint must be a valid physreg for allocation.
+ if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+ !RC->contains(Hint) || RCI.isReserved(Hint)))
+ Hint = 0;
+}
+
+AllocationOrder::~AllocationOrder() {
+ if (OwnedBegin)
+ delete [] Begin;
+}
diff --git a/src/LLVM/lib/CodeGen/AllocationOrder.h b/src/LLVM/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 0000000..d1e48a1
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,73 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+namespace llvm {
+
+class RegisterClassInfo;
+class VirtRegMap;
+
+class AllocationOrder {
+ const unsigned *Begin;
+ const unsigned *End;
+ const unsigned *Pos;
+ const RegisterClassInfo &RCI;
+ unsigned Hint;
+ bool OwnedBegin;
+public:
+
+ /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+ /// @param VirtReg Virtual register to allocate for.
+ /// @param VRM Virtual register map for function.
+ /// @param ReservedRegs Set of reserved registers as returned by
+ /// TargetRegisterInfo::getReservedRegs().
+ AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo);
+
+ ~AllocationOrder();
+
+ /// next - Return the next physical register in the allocation order, or 0.
+ /// It is safe to call next again after it returned 0.
+ /// It will keep returning 0 until rewind() is called.
+ unsigned next() {
+ // First take the hint.
+ if (!Pos) {
+ Pos = Begin;
+ if (Hint)
+ return Hint;
+ }
+ // Then look at the order from TRI.
+ while (Pos != End) {
+ unsigned Reg = *Pos++;
+ if (Reg != Hint)
+ return Reg;
+ }
+ return 0;
+ }
+
+ /// rewind - Start over from the beginning.
+ void rewind() { Pos = 0; }
+
+ /// isHint - Return true if PhysReg is a preferred register.
+ bool isHint(unsigned PhysReg) const { return PhysReg == Hint; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/src/LLVM/lib/CodeGen/Analysis.cpp b/src/LLVM/lib/CodeGen/Analysis.cpp
index 2acfa77..fafc010 100644
--- a/src/LLVM/lib/CodeGen/Analysis.cpp
+++ b/src/LLVM/lib/CodeGen/Analysis.cpp
@@ -19,6 +19,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
@@ -30,7 +31,7 @@
/// of insertvalue or extractvalue indices that identify a member, return
/// the linearized index of the start of the member.
///
-unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+unsigned llvm::ComputeLinearIndex(Type *Ty,
const unsigned *Indices,
const unsigned *IndicesEnd,
unsigned CurIndex) {
@@ -39,24 +40,24 @@
return CurIndex;
// Given a struct type, recursively traverse the elements.
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (StructType::element_iterator EB = STy->element_begin(),
EI = EB,
EE = STy->element_end();
EI != EE; ++EI) {
if (Indices && *Indices == unsigned(EI - EB))
- return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+ return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
}
return CurIndex;
}
// Given an array type, recursively traverse the elements.
- else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- const Type *EltTy = ATy->getElementType();
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
if (Indices && *Indices == i)
- return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+ return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
}
return CurIndex;
}
@@ -71,12 +72,12 @@
/// If Offsets is non-null, it points to a vector to be filled in
/// with the in-memory offsets of each of the individual values.
///
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<uint64_t> *Offsets,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
for (StructType::element_iterator EB = STy->element_begin(),
EI = EB,
@@ -87,8 +88,8 @@
return;
}
// Given an array type, recursively traverse the elements.
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- const Type *EltTy = ATy->getElementType();
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
@@ -125,7 +126,7 @@
/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
/// processed uses a memory 'm' constraint.
bool
-llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
const TargetLowering &TLI) {
for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
InlineAsm::ConstraintInfo &CI = CInfos[i];
@@ -210,7 +211,6 @@
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
- const Function *F = ExitBB->getParent();
// The block must end in a return statement or unreachable.
//
@@ -232,7 +232,7 @@
if (&*BBI == I)
break;
// Debug info intrinsics do not get in the way of tail call optimization.
- if (ISA_DEBUG_INFO_INTRINSIC(BBI))
+ if (isa<DbgInfoIntrinsic>(BBI))
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
!BBI->isSafeToSpeculativelyExecute())
@@ -249,6 +249,7 @@
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
+ const Function *F = ExitBB->getParent();
unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
return false;
@@ -283,3 +284,20 @@
return true;
}
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ const TargetLowering &TLI) {
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if (CallerRetAttr & ~Attribute::NoAlias)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return TLI.isUsedByReturnOnly(Node);
+}
diff --git a/src/LLVM/lib/CodeGen/AntiDepBreaker.h b/src/LLVM/lib/CodeGen/AntiDepBreaker.h
index 086b757..df47f98 100644
--- a/src/LLVM/lib/CodeGen/AntiDepBreaker.h
+++ b/src/LLVM/lib/CodeGen/AntiDepBreaker.h
@@ -30,6 +30,9 @@
/// anti-dependencies.
class AntiDepBreaker {
public:
+ typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
+ DbgValueVector;
+
virtual ~AntiDepBreaker();
/// Start - Initialize anti-dep breaking for a new basic block.
@@ -40,9 +43,10 @@
/// the number of anti-dependencies broken.
///
virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
- MachineBasicBlock::iterator Begin,
- MachineBasicBlock::iterator End,
- unsigned InsertPosIndex) =0;
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) = 0;
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
@@ -52,6 +56,14 @@
/// Finish - Finish anti-dep breaking for a basic block.
virtual void FinishBlock() =0;
+
+ /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating
+ /// other machine instruction to use NewReg.
+ void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) {
+ assert (MI->isDebugValue() && "MI is not DBG_VALUE!");
+ if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg)
+ MI->getOperand(0).setReg(NewReg);
+ }
};
}
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/ARMException.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/ARMException.cpp
new file mode 100644
index 0000000..3f23873
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -0,0 +1,85 @@
+//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+ARMException::ARMException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+ {}
+
+ARMException::~ARMException() {}
+
+void ARMException::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void ARMException::BeginFunction(const MachineFunction *MF) {
+ Asm->OutStreamer.EmitFnStart();
+ if (Asm->MF->getFunction()->needsUnwindTableEntry())
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void ARMException::EndFunction() {
+ if (!Asm->MF->getFunction()->needsUnwindTableEntry())
+ Asm->OutStreamer.EmitCantUnwind();
+ else {
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Emit references to personality.
+ if (const Function * Personality =
+ MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+ MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+ Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+ Asm->OutStreamer.EmitPersonality(PerSym);
+ }
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ Asm->OutStreamer.EmitHandlerData();
+
+ // Emit actual exception table
+ EmitExceptionTable();
+ }
+
+ Asm->OutStreamer.EmitFnEnd();
+}
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 0000000..1999f36
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,2095 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+char AsmPrinter::ID = 0;
+
+typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
+static gcp_map_type &getGCMap(void *&P) {
+ if (P == 0)
+ P = new gcp_map_type();
+ return *(gcp_map_type*)P;
+}
+
+
+/// getGVAlignmentLog2 - Return the alignment to use for the specified global
+/// value in log2 form. This rounds up to the preferred alignment if possible
+/// and legal.
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
+ unsigned InBits = 0) {
+ unsigned NumBits = 0;
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ NumBits = TD.getPreferredAlignmentLog(GVar);
+
+ // If InBits is specified, round it to it.
+ if (InBits > NumBits)
+ NumBits = InBits;
+
+ // If the GV has a specified alignment, take it into account.
+ if (GV->getAlignment() == 0)
+ return NumBits;
+
+ unsigned GVAlign = Log2_32(GV->getAlignment());
+
+ // If the GVAlign is larger than NumBits, or if we are required to obey
+ // NumBits because the GV has an assigned section, obey it.
+ if (GVAlign > NumBits || GV->hasSection())
+ NumBits = GVAlign;
+ return NumBits;
+}
+
+
+
+
+AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+ : MachineFunctionPass(ID),
+ TM(tm), MAI(tm.getMCAsmInfo()),
+ OutContext(Streamer.getContext()),
+ OutStreamer(Streamer),
+ LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
+ DD = 0; DE = 0; MMI = 0; LI = 0;
+ GCMetadataPrinters = 0;
+ VerboseAsm = Streamer.isVerboseAsm();
+}
+
+AsmPrinter::~AsmPrinter() {
+ assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
+
+ if (GCMetadataPrinters != 0) {
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+
+ for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
+ delete I->second;
+ delete &GCMap;
+ GCMetadataPrinters = 0;
+ }
+
+ delete &OutStreamer;
+}
+
+/// getFunctionNumber - Return a unique ID for the current function.
+///
+unsigned AsmPrinter::getFunctionNumber() const {
+ return MF->getFunctionNumber();
+}
+
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+ return TM.getTargetLowering()->getObjFileLowering();
+}
+
+
+/// getTargetData - Return information about data layout.
+const TargetData &AsmPrinter::getTargetData() const {
+ return *TM.getTargetData();
+}
+
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+ return OutStreamer.getCurrentSection();
+}
+
+
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+ if (isVerbose())
+ AU.addRequired<MachineLoopInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MMI->AnalyzeModule(M);
+
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ Mang = new Mangler(OutContext, *TM.getTargetData());
+
+ // Allow the target to emit any magic that it wants at the start of the file.
+ EmitStartOfAsmFile(M);
+
+ // Very minimal debug info. It is ignored if we emit actual debug info. If we
+ // don't, this at least helps the user find where a global came from.
+ if (MAI->hasSingleParameterDotFile()) {
+ // .file "foo.c"
+ OutStreamer.EmitFileDirective(M.getModuleIdentifier());
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ MP->beginAssembly(*this);
+
+ // Emit module-level inline asm if it exists.
+ if (!M.getModuleInlineAsm().empty()) {
+ OutStreamer.AddComment("Start of file scope inline assembly");
+ OutStreamer.AddBlankLine();
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n");
+ OutStreamer.AddComment("End of file scope inline assembly");
+ OutStreamer.AddBlankLine();
+ }
+
+ if (MAI->doesSupportDebugInformation())
+ DD = new DwarfDebug(this, &M);
+
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::None:
+ return false;
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ DE = new DwarfCFIException(this);
+ return false;
+ case ExceptionHandling::ARM:
+ DE = new ARMException(this);
+ return false;
+ case ExceptionHandling::Win64:
+ DE = new Win64Exception(this);
+ return false;
+ }
+
+ llvm_unreachable("Unknown exception type.");
+}
+
+void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
+ switch ((GlobalValue::LinkageTypes)Linkage) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ if (MAI->getWeakDefDirective() != 0) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+ if ((GlobalValue::LinkageTypes)Linkage !=
+ GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ // .weak_definition _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ else
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+ } else if (MAI->getLinkOnceDirective() != 0) {
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ //NOTE: linkonce is handled by the section the symbol was assigned to.
+ } else {
+ // .weak _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+ }
+ break;
+ case GlobalValue::DLLExportLinkage:
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol.
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ break;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
+ break;
+ default:
+ llvm_unreachable("Unknown linkage type!");
+ }
+}
+
+
+/// EmitGlobalVariable - Emit the specified global variable to the .s file.
+void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GV))
+ return;
+
+ if (isVerbose()) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+ }
+
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+
+ if (!GV->hasInitializer()) // External globals require no extra code.
+ return;
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+
+ const TargetData *TD = TM.getTargetData();
+ uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+
+ // If the alignment is specified, we *must* obey it. Overaligning a global
+ // with a specified alignment is a prompt way to break globals emitted to
+ // sections and expected to be contiguous (e.g. ObjC metadata).
+ unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
+
+ // Handle common and BSS local symbols (.lcomm).
+ if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ unsigned Align = 1 << AlignLog;
+
+ // Handle common symbols.
+ if (GVKind.isCommon()) {
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ // Handle local BSS symbols.
+ if (MAI->hasMachoZeroFillDirective()) {
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+ // .zerofill __DATA, __bss, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align);
+ return;
+ }
+
+ if (MAI->getLCOMMDirectiveType() != LCOMM::None &&
+ (MAI->getLCOMMDirectiveType() != LCOMM::NoAlignment || Align == 1)) {
+ // .lcomm _foo, 42
+ OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .local _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
+ // .comm _foo, 42, 4
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ const MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+
+ // Handle the zerofill directive on darwin, which is a special form of BSS
+ // emission.
+ if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+ if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined.
+
+ // .globl _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+ // .zerofill __DATA, __common, _foo, 400, 5
+ OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+ return;
+ }
+
+ // Handle thread local data for mach-o which requires us to output an
+ // additional structure of data and mangle the original symbol so that we
+ // can reference it later.
+ //
+ // TODO: This should become an "emit thread local global" method on TLOF.
+ // All of this macho specific stuff should be sunk down into TLOFMachO and
+ // stuff like "TLSExtraDataSection" should no longer be part of the parent
+ // TLOF class. This will also make it more obvious that stuff like
+ // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+ // specific code.
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+ // Emit the .tbss symbol
+ MCSymbol *MangSym =
+ OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+
+ if (GVKind.isThreadBSS())
+ OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+ else if (GVKind.isThreadData()) {
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitAlignment(AlignLog, GV);
+ OutStreamer.EmitLabel(MangSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // Emit the variable struct for the runtime.
+ const MCSection *TLVSect
+ = getObjFileLowering().getTLSExtraDataSection();
+
+ OutStreamer.SwitchSection(TLVSect);
+ // Emit the linkage here.
+ EmitLinkage(GV->getLinkage(), GVSym);
+ OutStreamer.EmitLabel(GVSym);
+
+ // Three pointers in size:
+ // - __tlv_bootstrap - used to make sure support exists
+ // - spare pointer, used when mapped by the runtime
+ // - pointer to mangled symbol above with initializer
+ unsigned PtrSize = TD->getPointerSizeInBits()/8;
+ OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+ PtrSize, 0);
+ OutStreamer.EmitIntValue(0, PtrSize, 0);
+ OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
+
+ OutStreamer.AddBlankLine();
+ return;
+ }
+
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitLinkage(GV->getLinkage(), GVSym);
+ EmitAlignment(AlignLog, GV);
+
+ OutStreamer.EmitLabel(GVSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ // .size foo, 42
+ OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+
+ OutStreamer.AddBlankLine();
+}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::EmitFunctionHeader() {
+ // Print out constants referenced by the function
+ EmitConstantPool();
+
+ // Print the 'header' of function.
+ const Function *F = MF->getFunction();
+
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+ EmitVisibility(CurrentFnSym, F->getVisibility());
+
+ EmitLinkage(F->getLinkage(), CurrentFnSym);
+ EmitAlignment(MF->getAlignment(), F);
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+ if (isVerbose()) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), F,
+ /*PrintType=*/false, F->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+
+ // Emit the CurrentFnSym. This is a virtual function to allow targets to
+ // do their wild and crazy things as required.
+ EmitFunctionEntryLabel();
+
+ // If the function had address-taken blocks that got deleted, then we have
+ // references to the dangling symbols. Emit them at the start of the function
+ // so that we don't get references to undefined symbols.
+ std::vector<MCSymbol*> DeadBlockSyms;
+ MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+ for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+ OutStreamer.AddComment("Address taken block that was later removed");
+ OutStreamer.EmitLabel(DeadBlockSyms[i]);
+ }
+
+ // Add some workaround for linkonce linkage on Cygwin\MinGW.
+ if (MAI->getLinkOnceDirective() != 0 &&
+ (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) {
+ // FIXME: What is this?
+ MCSymbol *FakeStub =
+ OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+
+ CurrentFnSym->getName());
+ OutStreamer.EmitLabel(FakeStub);
+ }
+
+ // Emit pre-function debug and/or EH information.
+ if (DE) {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->BeginFunction(MF);
+ }
+ if (DD) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginFunction(MF);
+ }
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function. This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::EmitFunctionEntryLabel() {
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (CurrentFnSym->isUndefined()) {
+ OutStreamer.ForceCodeRegion();
+ return OutStreamer.EmitLabel(CurrentFnSym);
+ }
+
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
+}
+
+
+/// EmitComments - Pretty-print comments for instructions.
+static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetMachine &TM = MF->getTarget();
+
+ // Check for spills and reloads
+ int FI;
+
+ const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+
+ // We assume a single instruction only has a spill or reload, not
+ // both.
+ const MachineMemOperand *MMO;
+ if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Reload\n";
+ }
+ } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Spill\n";
+ }
+ } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ }
+
+ // Check for spill-induced copies
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
+}
+
+/// EmitImplicitDef - This method emits the specified machine instruction
+/// that is an implicit def.
+static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+ unsigned RegNo = MI->getOperand(0).getReg();
+ AP.OutStreamer.AddComment(Twine("implicit-def: ") +
+ AP.TM.getRegisterInfo()->getName(RegNo));
+ AP.OutStreamer.AddBlankLine();
+}
+
+static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
+ std::string Str = "kill:";
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+ assert(Op.isReg() && "KILL instruction must have only register operands");
+ Str += ' ';
+ Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
+ Str += (Op.isDef() ? "<def>" : "<kill>");
+ }
+ AP.OutStreamer.AddComment(Str);
+ AP.OutStreamer.AddBlankLine();
+}
+
+/// EmitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so. A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+ // This code handles only the 3-operand target-independent form.
+ if (MI->getNumOperands() != 3)
+ return false;
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+ if (V.getContext().isSubprogram())
+ OS << DISubprogram(V.getContext()).getDisplayName() << ":";
+ OS << V.getName() << " <- ";
+
+ // Register or immediate value. Register 0 means undef.
+ if (MI->getOperand(0).isFPImm()) {
+ APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+ if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+ OS << (double)APF.convertToFloat();
+ } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+ OS << APF.convertToDouble();
+ } else {
+ // There is no good way to print long double. Convert a copy to
+ // double. Ah well, it's only a comment.
+ bool ignored;
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ OS << "(long double) " << APF.convertToDouble();
+ }
+ } else if (MI->getOperand(0).isImm()) {
+ OS << MI->getOperand(0).getImm();
+ } else if (MI->getOperand(0).isCImm()) {
+ MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+ } else {
+ assert(MI->getOperand(0).isReg() && "Unknown operand type");
+ if (MI->getOperand(0).getReg() == 0) {
+ // Suppress offset, it is not meaningful here.
+ OS << "undef";
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer.EmitRawText(OS.str());
+ return true;
+ }
+ OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+ }
+
+ OS << '+' << MI->getOperand(1).getImm();
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer.EmitRawText(OS.str());
+ return true;
+}
+
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
+ MF->getFunction()->needsUnwindTableEntry())
+ return CFI_M_EH;
+
+ if (MMI->hasDebugInfo())
+ return CFI_M_Debug;
+
+ return CFI_M_None;
+}
+
+bool AsmPrinter::needsSEHMoves() {
+ return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
+ MF->getFunction()->needsUnwindTableEntry();
+}
+
+void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
+ MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+
+ if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+ return;
+
+ if (needsCFIMoves() == CFI_M_None)
+ return;
+
+ if (MMI->getCompactUnwindEncoding() != 0)
+ OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding());
+
+ MachineModuleInfo &MMI = MF->getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ bool FoundOne = false;
+ (void)FoundOne;
+ for (std::vector<MachineMove>::iterator I = Moves.begin(),
+ E = Moves.end(); I != E; ++I) {
+ if (I->getLabel() == Label) {
+ EmitCFIFrameMove(*I);
+ FoundOne = true;
+ }
+ }
+ assert(FoundOne);
+}
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::EmitFunctionBody() {
+ // Emit target-specific gunk before the function body.
+ EmitFunctionBodyStart();
+
+ bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
+
+ // Print out code for the function.
+ bool HasAnyRealCode = false;
+ const MachineInstr *LastMI = 0;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ EmitBasicBlockStart(I);
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ LastMI = II;
+
+ // Print the assembly for the instruction.
+ if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
+ !II->isDebugValue()) {
+ HasAnyRealCode = true;
+ ++EmittedInsts;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginInstruction(II);
+ }
+
+ if (isVerbose())
+ EmitComments(*II, OutStreamer.GetCommentOS());
+
+ switch (II->getOpcode()) {
+ case TargetOpcode::PROLOG_LABEL:
+ emitPrologLabel(*II);
+ break;
+
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::GC_LABEL:
+ OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::INLINEASM:
+ EmitInlineAsm(II);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ if (isVerbose()) {
+ if (!EmitDebugValueComment(II, *this))
+ EmitInstruction(II);
+ }
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ if (isVerbose()) EmitImplicitDef(II, *this);
+ break;
+ case TargetOpcode::KILL:
+ if (isVerbose()) EmitKill(II, *this);
+ break;
+ default:
+ if (!TM.hasMCUseLoc())
+ MCLineEntry::Make(&OutStreamer, getCurrentSection());
+
+ EmitInstruction(II);
+ break;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endInstruction(II);
+ }
+ }
+ }
+
+ // If the last instruction was a prolog label, then we have a situation where
+ // we emitted a prolog but no function body. This results in the ending prolog
+ // label equaling the end of function label and an invalid "row" in the
+ // FDE. We need to emit a noop in this situation so that the FDE's rows are
+ // valid.
+ bool RequiresNoop = LastMI && LastMI->isPrologLabel();
+
+ // If the function is empty and the object file uses .subsections_via_symbols,
+ // then we need to emit *something* to the function body to prevent the
+ // labels from collapsing together. Just emit a noop.
+ if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
+ MCInst Noop;
+ TM.getInstrInfo()->getNoopForMachoTarget(Noop);
+ if (Noop.getOpcode()) {
+ OutStreamer.AddComment("avoids zero-length function");
+ OutStreamer.EmitInstruction(Noop);
+ } else // Target not mc-ized yet.
+ OutStreamer.EmitRawText(StringRef("\tnop\n"));
+ }
+
+ // Emit target-specific gunk after the function body.
+ EmitFunctionBodyEnd();
+
+ // If the target wants a .size directive for the size of the function, emit
+ // it.
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ // Create a symbol for the end of function, so we can get the size as
+ // difference between the function label and the temp label.
+ MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(FnEndLabel);
+
+ const MCExpr *SizeExp =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+ MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
+ OutContext);
+ OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
+ }
+
+ // Emit post-function debug information.
+ if (DD) {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endFunction(MF);
+ }
+ if (DE) {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndFunction();
+ }
+ MMI->EndFunction();
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo();
+
+ OutStreamer.AddBlankLine();
+}
+
+/// getDebugValueLocation - Get location information encoded by DBG_VALUE
+/// operands.
+MachineLocation AsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ // Target specific DBG_VALUE instructions are handled by each target.
+ return MachineLocation();
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+
+ for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg());
+ *SR && Reg < 0; ++SR) {
+ Reg = TRI->getDwarfRegNum(*SR, false);
+ // FIXME: Get the bit range this register uses of the superregister
+ // so that we can produce a DW_OP_bit_piece
+ }
+
+ // FIXME: Handle cases like a super register being encoded as
+ // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33
+
+ // FIXME: We have no reasonable way of handling errors in here. The
+ // caller might be in the middle of an dwarf expression. We should
+ // probably assert that Reg >= 0 once debug info generation is more mature.
+
+ if (int Offset = MLoc.getOffset()) {
+ if (Reg < 32) {
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
+ EmitInt8(dwarf::DW_OP_breg0 + Reg);
+ } else {
+ OutStreamer.AddComment("DW_OP_bregx");
+ EmitInt8(dwarf::DW_OP_bregx);
+ OutStreamer.AddComment(Twine(Reg));
+ EmitULEB128(Reg);
+ }
+ EmitSLEB128(Offset);
+ } else {
+ if (Reg < 32) {
+ OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+ EmitInt8(dwarf::DW_OP_reg0 + Reg);
+ } else {
+ OutStreamer.AddComment("DW_OP_regx");
+ EmitInt8(dwarf::DW_OP_regx);
+ OutStreamer.AddComment(Twine(Reg));
+ EmitULEB128(Reg);
+ }
+ }
+
+ // FIXME: Produce a DW_OP_bit_piece if we used a superregister
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+ // Emit global variables.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobalVariable(I);
+
+ // Emit visibility info for declarations
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ const Function &F = *I;
+ if (!F.isDeclaration())
+ continue;
+ GlobalValue::VisibilityTypes V = F.getVisibility();
+ if (V == GlobalValue::DefaultVisibility)
+ continue;
+
+ MCSymbol *Name = Mang->getSymbol(&F);
+ EmitVisibility(Name, V, false);
+ }
+
+ // Finalize debug and EH information.
+ if (DE) {
+ {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndModule();
+ }
+ delete DE; DE = 0;
+ }
+ if (DD) {
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endModule();
+ }
+ delete DD; DD = 0;
+ }
+
+ // If the target wants to know about weak references, print them all.
+ if (MAI->getWeakRefDirective()) {
+ // FIXME: This is not lazy, it would be nice to only print weak references
+ // to stuff that is actually used. Note that doing so would require targets
+ // to notice uses in operands (due to constant exprs etc). This should
+ // happen with the MC stuff eventually.
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ }
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->hasExternalWeakLinkage()) continue;
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ }
+ }
+
+ if (MAI->hasSetDirective()) {
+ OutStreamer.AddBlankLine();
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ MCSymbol *Name = Mang->getSymbol(I);
+
+ const GlobalValue *GV = I->getAliasedGlobal();
+ MCSymbol *Target = Mang->getSymbol(GV);
+
+ if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+ else if (I->hasWeakLinkage())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(I->hasLocalLinkage() && "Invalid alias linkage");
+
+ EmitVisibility(Name, I->getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ OutStreamer.EmitAssignment(Name,
+ MCSymbolRefExpr::Create(Target, OutContext));
+ }
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+ MP->finishAssembly(*this);
+
+ // If we don't have any trampolines, then we don't require stack memory
+ // to be executable. Some targets have a directive to declare this.
+ Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+ if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+ if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+ OutStreamer.SwitchSection(S);
+
+ // Allow the target to emit any magic that it wants at the end of the file,
+ // after everything else has gone out.
+ EmitEndOfAsmFile(M);
+
+ delete Mang; Mang = 0;
+ MMI = 0;
+
+ OutStreamer.Finish();
+ return false;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+ // Get the function symbol.
+ CurrentFnSym = Mang->getSymbol(MF.getFunction());
+
+ if (isVerbose())
+ LI = &getAnalysis<MachineLoopInfo>();
+}
+
+namespace {
+ // SectionCPs - Keep track the alignment, constpool entries per Section.
+ struct SectionCPs {
+ const MCSection *S;
+ unsigned Alignment;
+ SmallVector<unsigned, 4> CPEs;
+ SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
+ };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool() {
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Calculate sections for constant pool entries. We collect entries to go into
+ // the same section together to reduce amount of section switch statements.
+ SmallVector<SectionCPs, 4> CPSections;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const MachineConstantPoolEntry &CPE = CP[i];
+ unsigned Align = CPE.getAlignment();
+
+ SectionKind Kind;
+ switch (CPE.getRelocationInfo()) {
+ default: llvm_unreachable("Unknown section kind");
+ case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+ case 4: Kind = SectionKind::getMergeableConst4(); break;
+ case 8: Kind = SectionKind::getMergeableConst8(); break;
+ case 16: Kind = SectionKind::getMergeableConst16();break;
+ default: Kind = SectionKind::getMergeableConst(); break;
+ }
+ }
+
+ const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+
+ // The number of sections are small, just do a linear search from the
+ // last section to the first.
+ bool Found = false;
+ unsigned SecIdx = CPSections.size();
+ while (SecIdx != 0) {
+ if (CPSections[--SecIdx].S == S) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ SecIdx = CPSections.size();
+ CPSections.push_back(SectionCPs(S, Align));
+ }
+
+ if (Align > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Align;
+ CPSections[SecIdx].CPEs.push_back(i);
+ }
+
+ // Now print stuff into the calculated sections.
+ for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+ OutStreamer.SwitchSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+
+ unsigned Offset = 0;
+ for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+ unsigned CPI = CPSections[i].CPEs[j];
+ MachineConstantPoolEntry CPE = CP[CPI];
+
+ // Emit inter-object padding for alignment.
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+ OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
+
+ Type *Ty = CPE.getType();
+ Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+ OutStreamer.EmitLabel(GetCPISymbol(CPI));
+
+ if (CPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(CPE.Val.ConstVal);
+ }
+ }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo() {
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (MJTI == 0) return;
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ const Function *F = MF->getFunction();
+ bool JTInDiffSection = false;
+ if (// In PIC mode, we need to emit the jump table to the same section as the
+ // function body itself, otherwise the label differences won't make sense.
+ // FIXME: Need a better predicate for this: what about custom entries?
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+ // We should also do if the section name is NULL or function is declared
+ // in discardable section
+ // FIXME: this isn't the right predicate, should be based on the MCSection
+ // for the function.
+ F->isWeakForLinker()) {
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
+ } else {
+ // Otherwise, drop it in the readonly section.
+ const MCSection *ReadOnlySection =
+ getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(ReadOnlySection);
+ JTInDiffSection = true;
+ }
+
+ EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+
+ // If we know the form of the jump table, go ahead and tag it as such.
+ if (!JTInDiffSection) {
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) {
+ OutStreamer.EmitJumpTable32Region();
+ } else {
+ OutStreamer.EmitDataRegion();
+ }
+ }
+
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For the EK_LabelDifference32 entry, if the target supports .set, emit a
+ // .set directive for each unique entry. This reduces the number of
+ // relocations the assembler will generate for the jump table.
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+ MAI->hasSetDirective()) {
+ SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+ const TargetLowering *TLI = TM.getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+ const MachineBasicBlock *MBB = JTBBs[ii];
+ if (!EmittedSets.insert(MBB)) continue;
+
+ // .set LJTSet, LBB32-base
+ const MCExpr *LHS =
+ MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+ MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+ }
+ }
+
+ // On some targets (e.g. Darwin) we want to emit two consecutive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
+ // FIXME: This doesn't have to have any specific name, just any randomly
+ // named and numbered 'l' label would work. Simplify GetJTISymbol.
+ OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
+
+ OutStreamer.EmitLabel(GetJTISymbol(JTI));
+
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+ EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ }
+}
+
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned UID) const {
+ assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
+ const MCExpr *Value = 0;
+ switch (MJTI->getEntryKind()) {
+ case MachineJumpTableInfo::EK_Inline:
+ llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
+ case MachineJumpTableInfo::EK_Custom32:
+ Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
+ OutContext);
+ break;
+ case MachineJumpTableInfo::EK_BlockAddress:
+ // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+ // .word LBB123
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ break;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+ // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gprel32 LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_LabelDifference32: {
+ // EK_LabelDifference32 - Each entry is the address of the block minus
+ // the address of the jump table. This is used for PIC jump tables where
+ // gprel32 is not supported. e.g.:
+ // .word LBB123 - LJTI1_2
+ // If the .set directive is supported, this is emitted as:
+ // .set L4_5_set_123, LBB123 - LJTI1_2
+ // .word L4_5_set_123
+
+ // If we have emitted set directives for the jump table entries, print
+ // them rather than the entries themselves. If we're emitting PIC, then
+ // emit the table entries as differences between two text section labels.
+ if (MAI->hasSetDirective()) {
+ // If we used .set, reference the .set's symbol.
+ Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
+ OutContext);
+ break;
+ }
+ // Otherwise, use the difference as the jump table entry.
+ Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+ const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
+ Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+ break;
+ }
+ }
+
+ assert(Value && "Unknown entry kind!");
+
+ unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
+ OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used") {
+ if (MAI->hasNoDeadStrip()) // No need to emit this at all.
+ EmitLLVMUsedList(GV->getInitializer());
+ return true;
+ }
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ if (GV->getName() == "llvm.global_ctors") {
+ OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
+ EmitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer());
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".constructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
+ EmitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer());
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".destructors_used");
+ OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ const GlobalValue *GV =
+ dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+ if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
+ OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip);
+ }
+}
+
+typedef std::pair<int, Constant*> Structor;
+
+static bool priority_order(const Structor& lhs, const Structor& rhs) {
+ return lhs.first < rhs.first;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
+/// priority.
+void AsmPrinter::EmitXXStructorList(const Constant *List) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority.
+ if (!isa<ConstantArray>(List)) return;
+
+ // Sanity check the structors list.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (!InitList) return; // Not an array!
+ StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
+ if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs!
+ if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
+ !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
+
+ // Gather the structors in a form that's convenient for sorting by priority.
+ SmallVector<Structor, 8> Structors;
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
+ if (!CS) continue; // Malformed.
+ if (CS->getOperand(1)->isNullValue())
+ break; // Found a null terminator, skip the rest.
+ ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!Priority) continue; // Malformed.
+ Structors.push_back(std::make_pair(Priority->getLimitedValue(65535),
+ CS->getOperand(1)));
+ }
+
+ // Emit the function pointers in reverse priority order.
+ switch (MAI->getStructorOutputOrder()) {
+ case Structors::None:
+ break;
+ case Structors::PriorityOrder:
+ std::sort(Structors.begin(), Structors.end(), priority_order);
+ break;
+ case Structors::ReversePriorityOrder:
+ std::sort(Structors.rbegin(), Structors.rend(), priority_order);
+ break;
+ }
+ for (unsigned i = 0, e = Structors.size(); i != e; ++i)
+ EmitGlobalConstant(Structors[i].second);
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+ OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+ OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+ OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/);
+}
+
+/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
+/// in bytes of the directive is specified by Size and Hi/Lo specify the
+/// labels. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+ unsigned Size) const {
+ // Get the Hi-Lo expression.
+ const MCExpr *Diff =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
+ MCSymbolRefExpr::Create(Lo, OutContext),
+ OutContext);
+
+ if (!MAI->hasSetDirective()) {
+ OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/);
+ return;
+ }
+
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ OutStreamer.EmitAssignment(SetLabel, Diff);
+ OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
+}
+
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
+/// where the size in bytes of the directive is specified by Size and Hi/Lo
+/// specify the labels. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+ const MCSymbol *Lo, unsigned Size)
+ const {
+
+ // Emit Hi+Offset - Lo
+ // Get the Hi+Offset expression.
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext),
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ // Get the Hi+Offset-Lo expression.
+ const MCExpr *Diff =
+ MCBinaryExpr::CreateSub(Plus,
+ MCSymbolRefExpr::Create(Lo, OutContext),
+ OutContext);
+
+ if (!MAI->hasSetDirective())
+ OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
+ else {
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ OutStreamer.EmitAssignment(SetLabel, Diff);
+ OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
+ }
+}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ unsigned Size)
+ const {
+
+ // Emit Label+Offset
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext),
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/);
+}
+
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. For example, if you pass in 3 here, you will get an 8
+// byte alignment. If a global value is specified, and if that global has
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
+ if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
+
+ if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
+
+ if (getCurrentSection()->getKind().isText())
+ OutStreamer.EmitCodeAlignment(1 << NumBits);
+ else
+ OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
+/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::Create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (CE == 0) {
+ llvm_unreachable("Unknown constant value to lower!");
+ return MCConstantExpr::Create(0, Ctx);
+ }
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using TargetData as a
+ // last resort before giving up.
+ if (Constant *C =
+ ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+ if (C != CE)
+ return LowerConstant(C, AP);
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ return MCConstantExpr::Create(0, Ctx);
+ case Instruction::GetElementPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Generate a symbolic expression for the byte address
+ const Constant *PtrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
+ int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
+
+ const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+ if (Offset == 0)
+ return Base;
+
+ // Truncate/sext the offset to the pointer size.
+ if (TD.getPointerSizeInBits() != 64) {
+ int SExtAmount = 64-TD.getPointerSizeInBits();
+ Offset = (Offset << SExtAmount) >> SExtAmount;
+ }
+
+ return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return LowerConstant(CE->getOperand(0), AP);
+
+ case Instruction::IntToPtr: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return LowerConstant(Op, AP);
+ }
+
+ case Instruction::PtrToInt: {
+ const TargetData &TD = *AP.TM.getTargetData();
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = LowerConstant(Op, AP);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
+ const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
+ case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+ AsmPrinter &AP);
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64) return -1;
+
+ uint64_t Size = TM.getTargetData()->getTypeAllocSize(V->getType());
+ uint64_t Value = CI->getZExtValue();
+
+ // Make sure the constant is at least 8 bits long and has a power
+ // of 2 bit width. This guarantees the constant bit width is
+ // always a multiple of 8 bits, avoiding issues with padding out
+ // to Size and other such corner cases.
+ if (CI->getBitWidth() < 8 || !isPowerOf2_64(CI->getBitWidth())) return -1;
+
+ uint8_t Byte = static_cast<uint8_t>(Value);
+
+ for (unsigned i = 1; i < Size; ++i) {
+ Value >>= 8;
+ if (static_cast<uint8_t>(Value) != Byte) return -1;
+ }
+ return Byte;
+ }
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+ // Make sure all array elements are sequences of the same repeated
+ // byte.
+ if (CA->getNumOperands() == 0) return -1;
+
+ int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
+ if (Byte == -1) return -1;
+
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+ int ThisByte = isRepeatedByteSequence(CA->getOperand(i), TM);
+ if (ThisByte == -1) return -1;
+ if (Byte != ThisByte) return -1;
+ }
+ return Byte;
+ }
+
+ return -1;
+}
+
+static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ if (AddrSpace != 0 || !CA->isString()) {
+ // Not a string. Print the values in successive locations.
+
+ // See if we can aggregate some values. Make sure it can be
+ // represented as a series of bytes of the constant value.
+ int Value = isRepeatedByteSequence(CA, AP.TM);
+
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
+ AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+ else {
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ }
+ return;
+ }
+
+ // Otherwise, it can be emitted as .ascii.
+ SmallVector<char, 128> TmpVec;
+ TmpVec.reserve(CA->getNumOperands());
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+
+ AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace);
+}
+
+static void EmitGlobalConstantVector(const ConstantVector *CV,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+
+ const TargetData &TD = *AP.TM.getTargetData();
+ unsigned Size = TD.getTypeAllocSize(CV->getType());
+ unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
+ CV->getType()->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer.EmitZeros(Padding, AddrSpace);
+}
+
+static void EmitGlobalConstantStruct(const ConstantStruct *CS,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ // Print the fields in successive locations. Pad to align if needed!
+ const TargetData *TD = AP.TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = TD->getStructLayout(CS->getType());
+ uint64_t SizeSoFar = 0;
+ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+ const Constant *Field = CS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t FieldSize = TD->getTypeAllocSize(Field->getType());
+ uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
+ - Layout->getElementOffset(i)) - FieldSize;
+ SizeSoFar += FieldSize + PadSize;
+
+ // Now print the actual field value.
+ EmitGlobalConstantImpl(Field, AddrSpace, AP);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ AP.OutStreamer.EmitZeros(PadSize, AddrSpace);
+ }
+ assert(SizeSoFar == Layout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision.
+ if (CFP->getType()->isDoubleTy()) {
+ if (AP.isVerbose()) {
+ double Val = CFP->getValueAPF().convertToDouble();
+ AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
+ }
+
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ return;
+ }
+
+ if (CFP->getType()->isFloatTy()) {
+ if (AP.isVerbose()) {
+ float Val = CFP->getValueAPF().convertToFloat();
+ AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
+ }
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
+ return;
+ }
+
+ if (CFP->getType()->isX86_FP80Ty()) {
+ // all long double variants are printed as hex
+ // API needed to prevent premature destruction
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = API.getRawData();
+ if (AP.isVerbose()) {
+ // Convert to double so we can print the approximate val as a comment.
+ APFloat DoubleVal = CFP->getValueAPF();
+ bool ignored;
+ DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= "
+ << DoubleVal.convertToDouble() << '\n';
+ }
+
+ if (AP.TM.getTargetData()->isBigEndian()) {
+ AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ } else {
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+ }
+
+ // Emit the tail padding for the long double.
+ const TargetData &TD = *AP.TM.getTargetData();
+ AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
+ TD.getTypeStoreSize(CFP->getType()), AddrSpace);
+ return;
+ }
+
+ assert(CFP->getType()->isPPC_FP128Ty() &&
+ "Floating point constant type not handled");
+ // All long double variants are printed as hex
+ // API needed to prevent premature destruction.
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = API.getRawData();
+ if (AP.TM.getTargetData()->isBigEndian()) {
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+ } else {
+ AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+ }
+}
+
+static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
+ unsigned AddrSpace, AsmPrinter &AP) {
+ const TargetData *TD = AP.TM.getTargetData();
+ unsigned BitWidth = CI->getBitWidth();
+ assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
+
+ // We don't expect assemblers to support integer data directives
+ // for more than 64 bits, so we emit the data in at most 64-bit
+ // quantities at a time.
+ const uint64_t *RawData = CI->getValue().getRawData();
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ }
+}
+
+static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
+ uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+ return AP.OutStreamer.EmitZeros(Size, AddrSpace);
+ }
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+ switch (Size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+ AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+ return;
+ default:
+ EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
+ return;
+ }
+ }
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+ return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
+ return EmitGlobalConstantFP(CFP, AddrSpace, AP);
+
+ if (isa<ConstantPointerNull>(CV)) {
+ unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+ AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
+ return;
+ }
+
+ if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+ return EmitGlobalConstantVector(V, AddrSpace, AP);
+
+ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
+ // thread the streamer with EmitValue.
+ AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
+ AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
+ AddrSpace);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+ uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ if (Size)
+ EmitGlobalConstantImpl(CV, AddrSpace, *this);
+ else if (MAI->hasSubsectionsViaSymbols()) {
+ // If the global has zero size, emit a single byte so that two labels don't
+ // look like they are at the same location.
+ OutStreamer.EmitIntValue(0, 1, AddrSpace);
+ }
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
+}
+
+void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
+ if (Offset > 0)
+ OS << '+' << Offset;
+ else if (Offset < 0)
+ OS << Offset;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Lowering Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
+/// temporary label with the specified stem and unique ID.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const {
+ return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+ Name + Twine(ID));
+}
+
+/// GetTempSymbol - Return an assembler temporary label with the specified
+/// stem.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const {
+ return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+
+ Name);
+}
+
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+ return MMI->getAddrLabelSymbol(BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+ return MMI->getAddrLabelSymbol(BB);
+}
+
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ return OutContext.GetOrCreateSymbol
+ (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+ + "_" + Twine(CPID));
+}
+
+/// GetJTISymbol - Return the symbol for the specified jump table entry.
+MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
+ return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+ return OutContext.GetOrCreateSymbol
+ (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
+ Twine(UID) + "_set_" + Twine(MBBID));
+}
+
+/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
+/// global value name as its base, with the specified suffix, and where the
+/// symbol is forced to have private linkage if ForcePrivate is true.
+MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
+ StringRef Suffix,
+ bool ForcePrivate) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
+ NameStr.append(Suffix.begin(), Suffix.end());
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
+/// ExternalSymbol.
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, Sym);
+ return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+
+
+/// PrintParentLoopComment - Print comments about parent loops of this one.
+static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ if (Loop == 0) return;
+ PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
+ OS.indent(Loop->getLoopDepth()*2)
+ << "Parent Loop BB" << FunctionNumber << "_"
+ << Loop->getHeader()->getNumber()
+ << " Depth=" << Loop->getLoopDepth() << '\n';
+}
+
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ // Add child loop information
+ for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){
+ OS.indent((*CL)->getLoopDepth()*2)
+ << "Child Loop BB" << FunctionNumber << "_"
+ << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth()
+ << '\n';
+ PrintChildLoopComment(OS, *CL, FunctionNumber);
+ }
+}
+
+/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+ const MachineLoopInfo *LI,
+ const AsmPrinter &AP) {
+ // Add loop depth information
+ const MachineLoop *Loop = LI->getLoopFor(&MBB);
+ if (Loop == 0) return;
+
+ MachineBasicBlock *Header = Loop->getHeader();
+ assert(Header && "No header for loop");
+
+ // If this block is not a loop header, just print out what is the loop header
+ // and return.
+ if (Header != &MBB) {
+ AP.OutStreamer.AddComment(" in Loop: Header=BB" +
+ Twine(AP.getFunctionNumber())+"_" +
+ Twine(Loop->getHeader()->getNumber())+
+ " Depth="+Twine(Loop->getLoopDepth()));
+ return;
+ }
+
+ // Otherwise, it is a loop header. Print out information about child and
+ // parent loops.
+ raw_ostream &OS = AP.OutStreamer.GetCommentOS();
+
+ PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
+
+ OS << "=>";
+ OS.indent(Loop->getLoopDepth()*2-2);
+
+ OS << "This ";
+ if (Loop->empty())
+ OS << "Inner ";
+ OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
+
+ PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
+}
+
+
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+ // Emit an alignment directive for this block, if needed.
+ if (unsigned Align = MBB->getAlignment())
+ EmitAlignment(Log2_32(Align));
+
+ // If the block has its address taken, emit any labels that were used to
+ // reference the block. It is possible that there is more than one label
+ // here, because multiple LLVM BB's may have been RAUW'd to this block after
+ // the references were generated.
+ if (MBB->hasAddressTaken()) {
+ const BasicBlock *BB = MBB->getBasicBlock();
+ if (isVerbose())
+ OutStreamer.AddComment("Block address taken");
+
+ std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
+
+ for (unsigned i = 0, e = Syms.size(); i != e; ++i)
+ OutStreamer.EmitLabel(Syms[i]);
+ }
+
+ // Print the main label for the block.
+ if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+
+ EmitBasicBlockLoopComments(*MBB, LI, *this);
+
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
+ Twine(MBB->getNumber()) + ":");
+ }
+ } else {
+ if (isVerbose()) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+ EmitBasicBlockLoopComments(*MBB, LI, *this);
+ }
+
+ OutStreamer.EmitLabel(MBB->getSymbol());
+ }
+}
+
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+ bool IsDefinition) const {
+ MCSymbolAttr Attr = MCSA_Invalid;
+
+ switch (Visibility) {
+ default: break;
+ case GlobalValue::HiddenVisibility:
+ if (IsDefinition)
+ Attr = MAI->getHiddenVisibilityAttr();
+ else
+ Attr = MAI->getHiddenDeclarationVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Attr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
+
+ if (Attr != MCSA_Invalid)
+ OutStreamer.EmitSymbolAttribute(Sym, Attr);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check the terminators in the previous blocks
+ for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
+ IE = Pred->end(); II != IE; ++II) {
+ MachineInstr &MI = *II;
+
+ // If it is not a simple branch, we are in a table somewhere.
+ if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+ return false;
+
+ // If we are the operands of one of the branches, this is not
+ // a fall through.
+ for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+ OE = MI.operands_end(); OI != OE; ++OI) {
+ const MachineOperand& OP = *OI;
+ if (OP.isJTI())
+ return false;
+ if (OP.isMBB() && OP.getMBB() == MBB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
+ if (!S->usesMetadata())
+ return 0;
+
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+ gcp_map_type::iterator GCPI = GCMap.find(S);
+ if (GCPI != GCMap.end())
+ return GCPI->second;
+
+ const char *Name = S->getName().c_str();
+
+ for (GCMetadataPrinterRegistry::iterator
+ I = GCMetadataPrinterRegistry::begin(),
+ E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+ if (strcmp(Name, I->getName()) == 0) {
+ GCMetadataPrinter *GMP = I->instantiate();
+ GMP->S = S;
+ GCMap.insert(std::make_pair(S, GMP));
+ return GMP;
+ }
+
+ report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+ return 0;
+}
+
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
new file mode 100644
index 0000000..4d6c281
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -0,0 +1,233 @@
+//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dwarf emissions parts of AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Dwarf Emission Helper Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
+ if (isVerbose() && Desc)
+ OutStreamer.AddComment(Desc);
+
+ if (MAI->hasLEB128()) {
+ OutStreamer.EmitSLEB128IntValue(Value);
+ return;
+ }
+
+ // If we don't have .sleb128, emit as .bytes.
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ if (IsMore) Byte |= 0x80;
+ OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+ } while (IsMore);
+}
+
+/// EmitULEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
+ unsigned PadTo) const {
+ if (isVerbose() && Desc)
+ OutStreamer.AddComment(Desc);
+
+ // FIXME: Should we add a PadTo option to the streamer?
+ if (MAI->hasLEB128() && PadTo == 0) {
+ OutStreamer.EmitULEB128IntValue(Value);
+ return;
+ }
+
+ // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ if (Value || PadTo != 0) Byte |= 0x80;
+ OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+ } while (Value);
+
+ if (PadTo) {
+ if (PadTo > 1)
+ OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
+ OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
+ }
+}
+
+/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+void AsmPrinter::EmitCFAByte(unsigned Val) const {
+ if (isVerbose()) {
+ if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
+ OutStreamer.AddComment("DW_CFA_offset + Reg (" +
+ Twine(Val-dwarf::DW_CFA_offset) + ")");
+ else
+ OutStreamer.AddComment(dwarf::CallFrameString(Val));
+ }
+ OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+ switch (Encoding) {
+ case dwarf::DW_EH_PE_absptr: return "absptr";
+ case dwarf::DW_EH_PE_omit: return "omit";
+ case dwarf::DW_EH_PE_pcrel: return "pcrel";
+ case dwarf::DW_EH_PE_udata4: return "udata4";
+ case dwarf::DW_EH_PE_udata8: return "udata8";
+ case dwarf::DW_EH_PE_sdata4: return "sdata4";
+ case dwarf::DW_EH_PE_sdata8: return "sdata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+ return "indirect pcrel udata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+ return "indirect pcrel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+ return "indirect pcrel udata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+ return "indirect pcrel sdata8";
+ }
+
+ return "<unknown encoding>";
+}
+
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding. If verbose assembly output is enabled, we output comments
+/// describing the encoding. Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
+ if (isVerbose()) {
+ if (Desc != 0)
+ OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
+ Twine(DecodeDWARFEncoding(Val)));
+ else
+ OutStreamer.AddComment(Twine("Encoding = ") +
+ DecodeDWARFEncoding(Val));
+ }
+
+ OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return 0;
+
+ switch (Encoding & 0x07) {
+ default: assert(0 && "Invalid encoded value.");
+ case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
+ case dwarf::DW_EH_PE_udata2: return 2;
+ case dwarf::DW_EH_PE_udata4: return 4;
+ case dwarf::DW_EH_PE_udata8: return 8;
+ }
+}
+
+void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ const MCExpr *Exp =
+ TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer);
+ OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ const MCExpr *Exp =
+ TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+ OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+}
+
+/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
+/// section. This can be done with a special directive if the target supports
+/// it (e.g. cygwin) or by emitting it as an offset from a label at the start
+/// of the section.
+///
+/// SectionLabel is a temporary label emitted at the start of the section that
+/// Label lives in.
+void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
+ const MCSymbol *SectionLabel) const {
+ // On COFF targets, we have to emit the special .secrel32 directive.
+ if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
+ // FIXME: MCize.
+ OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName()));
+ return;
+ }
+
+ // Get the section that we're referring to, based on SectionLabel.
+ const MCSection &Section = SectionLabel->getSection();
+
+ // If Label has already been emitted, verify that it is in the same section as
+ // section label for sanity.
+ assert((!Label->isInSection() || &Label->getSection() == &Section) &&
+ "Section offset using wrong section base for label");
+
+ // If the section in question will end up with an address of 0 anyway, we can
+ // just emit an absolute reference to save a relocation.
+ if (Section.isBaseAddressKnownZero()) {
+ OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/);
+ return;
+ }
+
+ // Otherwise, emit it as a label difference from the start of the section.
+ EmitLabelDifference(Label, SectionLabel, 4);
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Lowering Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitCFIFrameMove - Emit a frame instruction.
+void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+ } else {
+ // Reg + Offset
+ OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true),
+ Src.getOffset());
+ }
+ } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ assert(Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+ } else {
+ assert(!Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+ Dst.getOffset());
+ }
+}
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
new file mode 100644
index 0000000..8eda889
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -0,0 +1,420 @@
+//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the inline assembler pieces of the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct SrcMgrDiagInfo {
+ const MDNode *LocInfo;
+ LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+ void *DiagContext;
+ };
+}
+
+/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+ SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+ assert(DiagInfo && "Diagnostic context not passed down?");
+
+ // If the inline asm had metadata associated with it, pull out a location
+ // cookie corresponding to which line the error occurred on.
+ unsigned LocCookie = 0;
+ if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ unsigned ErrorLine = Diag.getLineNo()-1;
+ if (ErrorLine >= LocInfo->getNumOperands())
+ ErrorLine = 0;
+
+ if (LocInfo->getNumOperands() != 0)
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ LocCookie = CI->getZExtValue();
+ }
+
+ DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
+/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
+ assert(!Str.empty() && "Can't emit empty inline asm block");
+
+ // Remember if the buffer is nul terminated or not so we can avoid a copy.
+ bool isNullTerminated = Str.back() == 0;
+ if (isNullTerminated)
+ Str = Str.substr(0, Str.size()-1);
+
+ // If the output streamer is actually a .s file, just emit the blob textually.
+ // This is useful in case the asm parser doesn't handle something but the
+ // system assembler does.
+ if (OutStreamer.hasRawTextSupport()) {
+ OutStreamer.EmitRawText(Str);
+ return;
+ }
+
+ SourceMgr SrcMgr;
+ SrcMgrDiagInfo DiagInfo;
+
+ // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
+ LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+ bool HasDiagHandler = false;
+ if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+ // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+ // to be invoked, getting DiagInfo passed into it.
+ DiagInfo.LocInfo = LocMDNode;
+ DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
+ HasDiagHandler = true;
+ }
+
+ MemoryBuffer *Buffer;
+ if (isNullTerminated)
+ Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
+ else
+ Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+
+ // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+ SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+
+ OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr,
+ OutContext, OutStreamer,
+ *MAI));
+
+ // FIXME: It would be nice if we can avoid createing a new instance of
+ // MCSubtargetInfo here given TargetSubtargetInfo is available. However,
+ // we have to watch out for asm directives which can change subtarget
+ // state. e.g. .code 16, .code 32.
+ OwningPtr<MCSubtargetInfo>
+ STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(),
+ TM.getTargetCPU(),
+ TM.getTargetFeatureString()));
+ OwningPtr<MCTargetAsmParser>
+ TAP(TM.getTarget().createMCAsmParser(*STI, *Parser));
+ if (!TAP)
+ report_fatal_error("Inline asm not supported by this streamer because"
+ " we don't have an asm parser for this target\n");
+ Parser->setTargetParser(*TAP.get());
+
+ // Don't implicitly switch to the text section before the asm.
+ int Res = Parser->Run(/*NoInitialTextSection*/ true,
+ /*NoFinalize*/ true);
+ if (Res && !HasDiagHandler)
+ report_fatal_error("Error parsing inline asm\n");
+}
+
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+ assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+ unsigned NumOperands = MI->getNumOperands();
+
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != NumOperands-2 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ // Don't emit the comments if writing to a .o file.
+ if (!OutStreamer.hasRawTextSupport()) return;
+
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmEnd());
+ return;
+ }
+
+ // Emit the #APP start marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use EmitRawText.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+
+ // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+ // it.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
+ for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+ if (MI->getOperand(i-1).isMetadata() &&
+ (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ // Emit the inline asm to a temporary string so we can emit it through
+ // EmitInlineAsm.
+ SmallString<256> StringData;
+ raw_svector_ostream OS(StringData);
+
+ OS << '\t';
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1)
+ report_fatal_error("Nested variants found in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // consume '|' character.
+ if (CurVariant == -1)
+ OS << '|'; // this is gcc's behavior for | outside a variant
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // consume ')' character.
+ if (CurVariant == -1)
+ OS << '}'; // this is gcc's behavior for } outside a variant
+ else
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (HasCurlyBraces && *LastEmitted == ':') {
+ ++LastEmitted;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (StrEnd == 0)
+ report_fatal_error("Unterminated ${:foo} operand in inline asm"
+ " string: '" + Twine(AsmStr) + "'");
+
+ std::string Val(StrStart, StrEnd);
+ PrintSpecial(MI, OS, Val.c_str());
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0)
+ report_fatal_error("Bad ${:} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}')
+ report_fatal_error("Bad ${} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ if (Val >= NumOperands-1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ if (OpNo >= MI->getNumOperands()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (Modifier[0] == 'l') // labels are target independent
+ // FIXME: What if the operand isn't an MBB, report error?
+ OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
+ else {
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0,
+ OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0, OS);
+ }
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ }
+ break;
+ }
+ }
+ }
+ OS << '\n' << (char)0; // null terminate string.
+ EmitInlineAsm(OS.str(), LocMD);
+
+ // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use EmitRawText.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmEnd());
+}
+
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+ const char *Code) const {
+ if (!strcmp(Code, "private")) {
+ OS << MAI->getPrivateGlobalPrefix();
+ } else if (!strcmp(Code, "comment")) {
+ OS << MAI->getCommentString();
+ } else if (!strcmp(Code, "uid")) {
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+
+ // If this is a new LastFn instruction, bump the counter.
+ if (LastMI != MI || LastFn != getFunctionNumber()) {
+ ++Counter;
+ LastMI = MI;
+ LastFn = getFunctionNumber();
+ }
+ OS << Counter;
+ } else {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ report_fatal_error(Msg.str());
+ }
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// override this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Target doesn't support this yet!
+ return true;
+}
+
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/DIE.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 0000000..9c1ce76
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,368 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Attribute);
+ ID.AddInteger(Form);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Tag);
+ ID.AddInteger(ChildrenFlag);
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(AsmPrinter *AP) const {
+ // Emit its Dwarf tag type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+
+ // Emit whether it has children DIEs.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(AttrData.getAttribute(),
+ dwarf::AttributeString(AttrData.getAttribute()));
+
+ // Emit form type.
+ // FIXME: Doing work even in non-asm-verbose runs.
+ AP->EmitULEB128(AttrData.getForm(),
+ dwarf::FormEncodingString(AttrData.getForm()));
+ }
+
+ // Mark end of abbreviation.
+ AP->EmitULEB128(0, "EOM(1)");
+ AP->EmitULEB128(0, "EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(raw_ostream &O) {
+ O << "Abbreviation @"
+ << format("0x%lx", (long)(intptr_t)this)
+ << " "
+ << dwarf::TagString(Tag)
+ << " "
+ << dwarf::ChildrenString(ChildrenFlag)
+ << '\n';
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << dwarf::AttributeString(Data[i].getAttribute())
+ << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << '\n';
+ }
+}
+void DIEAbbrev::dump() { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE::~DIE() {
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ delete Children[i];
+}
+
+/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+///
+DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
+ DIEInteger *DI = new (A) DIEInteger(0);
+ Values.insert(Values.begin(), DI);
+ Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
+ return DI;
+}
+
+#ifndef NDEBUG
+void DIE::print(raw_ostream &O, unsigned IncIndent) {
+ IndentCount += IncIndent;
+ const std::string Indent(IndentCount, ' ');
+ bool isBlock = Abbrev.getTag() == 0;
+
+ if (!isBlock) {
+ O << Indent
+ << "Die: "
+ << format("0x%lx", (long)(intptr_t)this)
+ << ", Offset: " << Offset
+ << ", Size: " << Size << "\n";
+
+ O << Indent
+ << dwarf::TagString(Abbrev.getTag())
+ << " "
+ << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n";
+ } else {
+ O << "Size: " << Size << "\n";
+ }
+
+ const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+
+ IndentCount += 2;
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << Indent;
+
+ if (!isBlock)
+ O << dwarf::AttributeString(Data[i].getAttribute());
+ else
+ O << "Blk[" << i << "]";
+
+ O << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << " ";
+ Values[i]->print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ Children[j]->print(O, 4);
+ }
+
+ if (!isBlock) O << "\n";
+ IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+ print(dbgs());
+}
+#endif
+
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+ unsigned Size = ~0U;
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: Size = 1; break;
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: Size = 2; break;
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: Size = 4; break;
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: Size = 8; break;
+ case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+ case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break;
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+ Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: return sizeof(int16_t);
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+ case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize();
+ default: llvm_unreachable("DIE Value form not supported yet"); break;
+ }
+ return 0;
+}
+
+#ifndef NDEBUG
+void DIEInteger::print(raw_ostream &O) {
+ O << "Int: " << (int64_t)Integer
+ << format(" 0x%llx", (unsigned long long)Integer);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->OutStreamer.EmitBytes(Str, /*addrspace*/0);
+ // Emit nul terminator.
+ AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
+}
+
+#ifndef NDEBUG
+void DIEString::print(raw_ostream &O) {
+ O << "Str: \"" << Str << "\"";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIELabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return AP->getTargetData().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIELabel::print(raw_ostream &O) {
+ O << "Lbl: " << Label->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return AP->getTargetData().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEDelta::print(raw_ostream &O) {
+ O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const {
+ AP->EmitInt32(Entry->getOffset());
+}
+
+#ifndef NDEBUG
+void DIEEntry::print(raw_ostream &O) {
+ O << format("Die: 0x%lx", (long)(intptr_t)Entry);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
+ if (!Size) {
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+ switch (Form) {
+ default: assert(0 && "Improper form for block"); break;
+ case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
+ }
+
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
+ default: llvm_unreachable("Improper form for block"); break;
+ }
+ return 0;
+}
+
+#ifndef NDEBUG
+void DIEBlock::print(raw_ostream &O) {
+ O << "Blk: ";
+ DIE::print(O, 5);
+}
+#endif
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/DIE.h b/src/LLVM/lib/CodeGen/AsmPrinter/DIE.h
index d56c094..7d61f1e 100644
--- a/src/LLVM/lib/CodeGen/AsmPrinter/DIE.h
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/DIE.h
@@ -70,7 +70,6 @@
public:
DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
- virtual ~DIEAbbrev() {}
// Accessors.
unsigned getTag() const { return Tag; }
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 0000000..8ed4f4c
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,153 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false),
+ moveTypeModule(AsmPrinter::CFI_M_None) {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+ if (moveTypeModule == AsmPrinter::CFI_M_Debug)
+ Asm->OutStreamer.EmitCFISections(false, true);
+
+ if (!Asm->MAI->isExceptionHandlingDwarf())
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+ return;
+
+ // Emit references to all used personality functions
+ bool AtLeastOne = false;
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+ if (!Personalities[i])
+ continue;
+ MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]);
+ TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
+ AtLeastOne = true;
+ }
+
+ if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
+ // This is a temporary hack to keep sections in the same order they
+ // were before. This lets us produce bit identical outputs while
+ // transitioning to CFI.
+ Asm->OutStreamer.SwitchSection(
+ const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection());
+ }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+
+ // See if we need frame move info.
+ AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
+ if (MoveType == AsmPrinter::CFI_M_EH ||
+ (MoveType == AsmPrinter::CFI_M_Debug &&
+ moveTypeModule == AsmPrinter::CFI_M_None))
+ moveTypeModule = MoveType;
+
+ shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+ shouldEmitPersonality = hasLandingPads &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitCFIStartProc();
+
+ // Indicate personality routine, if any.
+ if (!shouldEmitPersonality)
+ return;
+
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+ Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+
+ // Provide LSDA information.
+ if (!shouldEmitLSDA)
+ return;
+
+ Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()),
+ LSDAEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitCFIEndProc();
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (shouldEmitPersonality)
+ EmitExceptionTable();
+}
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 0000000..6fe476d
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,1413 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+/// CompileUnit - Compile unit constructor.
+CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW)
+ : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+ DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+}
+
+/// ~CompileUnit - Destructor for compile unit.
+CompileUnit::~CompileUnit() {
+ for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+ DIEBlocks[j]->~DIEBlock();
+}
+
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
+ DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
+ return Value;
+}
+
+/// addUInt - Add an unsigned integer attribute data and value.
+///
+void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
+ unsigned Form, uint64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(false, Integer);
+ DIEValue *Value = Integer == 1 ?
+ DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addSInt - Add an signed integer attribute data and value.
+///
+void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
+ unsigned Form, int64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(true, Integer);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addString - Add a string attribute data and value. DIEString only
+/// keeps string reference.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form,
+ StringRef String) {
+ DIEValue *Value = new (DIEValueAllocator) DIEString(String);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addLabel - Add a Dwarf label attribute data and value.
+///
+void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Label) {
+ DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addDelta - Add a label delta attribute data and value.
+///
+void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+ Die->addValue(Attribute, Form, Value);
+}
+
+/// addDIEEntry - Add a DIE attribute data and value.
+///
+void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+ DIE *Entry) {
+ Die->addValue(Attribute, Form, createDIEEntry(Entry));
+}
+
+
+/// addBlock - Add block data.
+///
+void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+ DIEBlock *Block) {
+ Block->ComputeSize(Asm);
+ DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+ Die->addValue(Attribute, Block->BestForm(), Block);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
+ // Verify variable.
+ if (!V.Verify())
+ return;
+
+ unsigned Line = V.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(),
+ V.getContext().getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
+ // Verify global variable.
+ if (!G.Verify())
+ return;
+
+ unsigned Line = G.getLineNumber();
+ if (Line == 0)
+ return;
+ unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(),
+ G.getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
+ // Verify subprogram.
+ if (!SP.Verify())
+ return;
+ // If the line number is 0, don't add it.
+ if (SP.getLineNumber() == 0)
+ return;
+
+ unsigned Line = SP.getLineNumber();
+ if (!SP.getContext().Verify())
+ return;
+ unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
+ // Verify type.
+ if (!Ty.Verify())
+ return;
+
+ unsigned Line = Ty.getLineNumber();
+ if (Line == 0 || !Ty.getContext().Verify())
+ return;
+ unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
+ // Verify namespace.
+ if (!NS.Verify())
+ return;
+
+ unsigned Line = NS.getLineNumber();
+ if (Line == 0)
+ return;
+ StringRef FN = NS.getFilename();
+
+ unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a
+/// DbgVariable based on provided MachineLocation.
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die,
+ MachineLocation Location) {
+ if (DV->variableHasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else if (DV->isBlockByrefVariable())
+ addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// addRegisterOp - Add register operand.
+void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) {
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+ if (DWReg < 32)
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
+ else {
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ }
+}
+
+/// addRegisterOffset - Add register offset.
+void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg,
+ int64_t Offset) {
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (Reg == TRI->getFrameRegister(*Asm->MF))
+ // If variable offset is based in frame register then use fbreg.
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+ else if (DWReg < 32)
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
+ else {
+ addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ }
+ addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset);
+}
+
+/// addAddress - Add an address attribute to a die based on the location
+/// provided.
+void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ if (Location.isReg())
+ addRegisterOp(Block, Location.getReg());
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// addComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location. Add the DWARF information to the die.
+///
+void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ unsigned N = DV->getNumAddrElements();
+ unsigned i = 0;
+ if (Location.isReg()) {
+ if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) {
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1));
+ i = 2;
+ } else
+ addRegisterOp(Block, Location.getReg());
+ }
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+ for (;i < N; ++i) {
+ uint64_t Element = DV->getAddrElement(i);
+ if (Element == DIBuilder::OpPlus) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
+ } else if (Element == DIBuilder::OpDeref) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ } else llvm_unreachable("unknown DIBuilder Opcode");
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+ VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+ gives the variable VarName either the struct, or a pointer to the struct, as
+ its type. This is necessary for various behind-the-scenes things the
+ compiler needs to do with by-reference variables in Blocks.
+
+ However, as far as the original *programmer* is concerned, the variable
+ should still have type 'SomeType', as originally declared.
+
+ The function getBlockByrefType dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable, which is then assigned to
+ the variable's Debug Information Entry as its real type. So far, so good.
+ However now the debugger will expect the variable VarName to have the type
+ SomeType. So we need the location attribute for the variable to be an
+ expression that explains to the debugger how to navigate through the
+ pointers and struct to find the actual variable of type SomeType.
+
+ The following function does just that. We start by getting
+ the "normal" location for the variable. This will be the location
+ of either the struct __Block_byref_x_VarName or the pointer to the
+ struct __Block_byref_x_VarName.
+
+ The struct will look something like:
+
+ struct __Block_byref_x_VarName {
+ ... <various fields>
+ struct __Block_byref_x_VarName *forwarding;
+ ... <various other fields>
+ SomeType VarName;
+ ... <maybe more fields>
+ };
+
+ If we are given the struct directly (as our starting point) we
+ need to tell the debugger to:
+
+ 1). Add the offset of the forwarding field.
+
+ 2). Follow that pointer to get the real __Block_byref_x_VarName
+ struct to use (the real one may have been copied onto the heap).
+
+ 3). Add the offset for the field VarName, to find the actual variable.
+
+ If we started with a pointer to the struct, then we need to
+ dereference that pointer first, before the other steps.
+ Translating this into DWARF ops, we will need to append the following
+ to the current location description for the variable:
+
+ DW_OP_deref -- optional, if we start with a pointer
+ DW_OP_plus_uconst <forward_fld_offset>
+ DW_OP_deref
+ DW_OP_plus_uconst <varName_fld_offset>
+
+ That is what this function does. */
+
+/// addBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location. Add the DWARF information to the die. For
+/// more information, read large comment just above here.
+///
+void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+ unsigned Attribute,
+ const MachineLocation &Location) {
+ DIType Ty = DV->getType();
+ DIType TmpTy = Ty;
+ unsigned Tag = Ty.getTag();
+ bool isPointer = false;
+
+ StringRef varName = DV->getName();
+
+ if (Tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ TmpTy = DTy.getTypeDerivedFrom();
+ isPointer = true;
+ }
+
+ DICompositeType blockStruct = DICompositeType(TmpTy);
+
+ // Find the __forwarding field and the variable field in the __Block_byref
+ // struct.
+ DIArray Fields = blockStruct.getTypeArray();
+ DIDescriptor varField = DIDescriptor();
+ DIDescriptor forwardingField = DIDescriptor();
+
+ for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Fields.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ StringRef fieldName = DT.getName();
+ if (fieldName == "__forwarding")
+ forwardingField = Element;
+ else if (fieldName == varName)
+ varField = Element;
+ }
+
+ // Get the offsets for the forwarding field and the variable field.
+ unsigned forwardingFieldOffset =
+ DIDerivedType(forwardingField).getOffsetInBits() >> 3;
+ unsigned varFieldOffset =
+ DIDerivedType(varField).getOffsetInBits() >> 3;
+
+ // Decode the original location, and use that as the start of the byref
+ // variable's location.
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ if (Location.isReg()) {
+ if (Reg < 32)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+ else {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+ } else {
+ if (Reg < 32)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ else {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+
+ addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+ }
+
+ // If we started with a pointer to the __Block_byref... struct, then
+ // the first thing we need to do is dereference the pointer (DW_OP_deref).
+ if (isPointer)
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Next add the offset for the '__forwarding' field:
+ // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
+ // adding the offset if it's 0.
+ if (forwardingFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+ }
+
+ // Now dereference the __forwarding field to get to the real __Block_byref
+ // struct: DW_OP_deref.
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Now that we've got the real __Block_byref... struct, add the offset
+ // for the variable's field to get to the location of the actual variable:
+ // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
+ if (varFieldOffset > 0) {
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, 0, Block);
+}
+
+/// isTypeSigned - Return true if the type is signed.
+static bool isTypeSigned(DIType Ty, int *SizeInBits) {
+ if (Ty.isDerivedType())
+ return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits);
+ if (Ty.isBasicType())
+ if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed
+ || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) {
+ *SizeInBits = Ty.getSizeInBits();
+ return true;
+ }
+ return false;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
+ DIType Ty) {
+ assert (MO.isImm() && "Invalid machine operand!");
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ int SizeInBits = -1;
+ bool SignedConstant = isTypeSigned(Ty, &SizeInBits);
+ unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata;
+ switch (SizeInBits) {
+ case 8: Form = dwarf::DW_FORM_data1; break;
+ case 16: Form = dwarf::DW_FORM_data2; break;
+ case 32: Form = dwarf::DW_FORM_data4; break;
+ case 64: Form = dwarf::DW_FORM_data8; break;
+ default: break;
+ }
+ SignedConstant ? addSInt(Block, 0, Form, MO.getImm())
+ : addUInt(Block, 0, Form, MO.getImm());
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+ assert (MO.isFPImm() && "Invalid machine operand!");
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ APFloat FPImm = MO.getFPImm()->getValueAPF();
+
+ // Get the raw data form of the floating point.
+ const APInt FltVal = FPImm.bitcastToAPInt();
+ const char *FltPtr = (const char*)FltVal.getRawData();
+
+ int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getTargetData().isLittleEndian();
+ int Incr = (LittleEndian ? 1 : -1);
+ int Start = (LittleEndian ? 0 : NumBytes - 1);
+ int Stop = (LittleEndian ? NumBytes : -1);
+
+ // Output the constant to DWARF one byte at a time.
+ for (; Start != Stop; Start += Incr)
+ addUInt(Block, 0, dwarf::DW_FORM_data1,
+ (unsigned char)0xFF & FltPtr[Start]);
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
+ bool Unsigned) {
+ unsigned CIBitWidth = CI->getBitWidth();
+ if (CIBitWidth <= 64) {
+ unsigned form = 0;
+ switch (CIBitWidth) {
+ case 8: form = dwarf::DW_FORM_data1; break;
+ case 16: form = dwarf::DW_FORM_data2; break;
+ case 32: form = dwarf::DW_FORM_data4; break;
+ case 64: form = dwarf::DW_FORM_data8; break;
+ default:
+ form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
+ }
+ if (Unsigned)
+ addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue());
+ else
+ addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue());
+ return true;
+ }
+
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ // Get the raw data form of the large APInt.
+ const APInt Val = CI->getValue();
+ const uint64_t *Ptr64 = Val.getRawData();
+
+ int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getTargetData().isLittleEndian();
+
+ // Output the constant to DWARF one byte at a time.
+ for (int i = 0; i < NumBytes; i++) {
+ uint8_t c;
+ if (LittleEndian)
+ c = Ptr64[i / 8] >> (8 * (i & 7));
+ else
+ c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, c);
+ }
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
+
+/// addTemplateParams - Add template parameters in buffer.
+void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+ // Add template parameters.
+ for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
+ DIDescriptor Element = TParams.getElement(i);
+ if (Element.isTemplateTypeParameter())
+ Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+ DITemplateTypeParameter(Element)));
+ else if (Element.isTemplateValueParameter())
+ Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+ DITemplateValueParameter(Element)));
+ }
+
+}
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
+ if (Context.isType()) {
+ DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
+ ContextDIE->addChild(Die);
+ } else if (Context.isNameSpace()) {
+ DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
+ ContextDIE->addChild(Die);
+ } else if (Context.isSubprogram()) {
+ DIE *ContextDIE = getOrCreateSubprogramDIE(DISubprogram(Context));
+ ContextDIE->addChild(Die);
+ } else if (DIE *ContextDIE = getDIE(Context))
+ ContextDIE->addChild(Die);
+ else
+ addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
+ DIType Ty(TyNode);
+ if (!Ty.Verify())
+ return NULL;
+ DIE *TyDIE = getDIE(Ty);
+ if (TyDIE)
+ return TyDIE;
+
+ // Create new type.
+ TyDIE = new DIE(dwarf::DW_TAG_base_type);
+ insertDIE(Ty, TyDIE);
+ if (Ty.isBasicType())
+ constructTypeDIE(*TyDIE, DIBasicType(Ty));
+ else if (Ty.isCompositeType())
+ constructTypeDIE(*TyDIE, DICompositeType(Ty));
+ else {
+ assert(Ty.isDerivedType() && "Unknown kind of DIType");
+ constructTypeDIE(*TyDIE, DIDerivedType(Ty));
+ }
+
+ addToContextOwner(TyDIE, Ty.getContext());
+ return TyDIE;
+}
+
+/// addType - Add a new type attribute to the specified entity.
+void CompileUnit::addType(DIE *Entity, DIType Ty) {
+ if (!Ty.Verify())
+ return;
+
+ // Check for pre-existence.
+ DIEEntry *Entry = getDIEEntry(Ty);
+ // If it exists then use the existing value.
+ if (Entry) {
+ Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+ return;
+ }
+
+ // Construct type.
+ DIE *Buffer = getOrCreateTypeDIE(Ty);
+
+ // Set up proxy.
+ Entry = createDIEEntry(Buffer);
+ insertDIEEntry(Ty, Entry);
+ Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+
+ // If this is a complete composite type then include it in the
+ // list of global types.
+ addGlobalType(Ty);
+}
+
+/// addGlobalType - Add a new global type to the compile unit.
+///
+void CompileUnit::addGlobalType(DIType Ty) {
+ DIDescriptor Context = Ty.getContext();
+ if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
+ && (!Context || Context.isCompileUnit() || Context.isFile()
+ || Context.isNameSpace()))
+ if (DIEEntry *Entry = getDIEEntry(Ty))
+ GlobalTypes[Ty.getName()] = Entry->getEntry();
+}
+
+/// addPubTypes - Add type for pubtypes section.
+void CompileUnit::addPubTypes(DISubprogram SP) {
+ DICompositeType SPTy = SP.getType();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag != dwarf::DW_TAG_subroutine_type)
+ return;
+
+ DIArray Args = SPTy.getTypeArray();
+ for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+ DIType ATy(Args.getElement(i));
+ if (!ATy.Verify())
+ continue;
+ addGlobalType(ATy);
+ }
+}
+
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+ // Get core information.
+ StringRef Name = BTy.getName();
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
+ Buffer.setTag(dwarf::DW_TAG_unspecified_type);
+ // Unspecified types has only name, nothing else.
+ return;
+ }
+
+ Buffer.setTag(dwarf::DW_TAG_base_type);
+ addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
+
+ uint64_t Size = BTy.getSizeInBits() >> 3;
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+ // Get core information.
+ StringRef Name = DTy.getName();
+ uint64_t Size = DTy.getSizeInBits() >> 3;
+ unsigned Tag = DTy.getTag();
+
+ // FIXME - Workaround for templates.
+ if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+ Buffer.setTag(Tag);
+
+ // Map to main type, void will not have a type.
+ DIType FromTy = DTy.getTypeDerivedFrom();
+ addType(&Buffer, FromTy);
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+ // Add source line info if available and TyDesc is not a forward declaration.
+ if (!DTy.isForwardDecl())
+ addSourceLine(&Buffer, DTy);
+}
+
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+ // Get core information.
+ StringRef Name = CTy.getName();
+
+ uint64_t Size = CTy.getSizeInBits() >> 3;
+ unsigned Tag = CTy.getTag();
+ Buffer.setTag(Tag);
+
+ switch (Tag) {
+ case dwarf::DW_TAG_vector_type:
+ case dwarf::DW_TAG_array_type:
+ constructArrayTypeDIE(Buffer, &CTy);
+ break;
+ case dwarf::DW_TAG_enumeration_type: {
+ DIArray Elements = CTy.getTypeArray();
+
+ // Add enumerators to enumeration type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *ElemDie = NULL;
+ DIDescriptor Enum(Elements.getElement(i));
+ if (Enum.isEnumerator()) {
+ ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
+ Buffer.addChild(ElemDie);
+ }
+ }
+ }
+ break;
+ case dwarf::DW_TAG_subroutine_type: {
+ // Add return type.
+ DIArray Elements = CTy.getTypeArray();
+ DIDescriptor RTy = Elements.getElement(0);
+ addType(&Buffer, DIType(RTy));
+
+ bool isPrototyped = true;
+ // Add arguments.
+ for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Ty = Elements.getElement(i);
+ if (Ty.isUnspecifiedParameter()) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+ Buffer.addChild(Arg);
+ isPrototyped = false;
+ } else {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ addType(Arg, DIType(Ty));
+ Buffer.addChild(Arg);
+ }
+ }
+ // Add prototype flag.
+ if (isPrototyped)
+ addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+ }
+ break;
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_class_type: {
+ // Add elements to structure type.
+ DIArray Elements = CTy.getTypeArray();
+
+ // A forward struct declared type may not have elements available.
+ unsigned N = Elements.getNumElements();
+ if (N == 0)
+ break;
+
+ // Add elements to structure type.
+ for (unsigned i = 0; i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIE *ElemDie = NULL;
+ if (Element.isSubprogram()) {
+ DISubprogram SP(Element);
+ ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
+ if (SP.isProtected())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_protected);
+ else if (SP.isPrivate())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_private);
+ else
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_public);
+ if (SP.isExplicit())
+ addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+ }
+ else if (Element.isVariable()) {
+ DIVariable DV(Element);
+ ElemDie = new DIE(dwarf::DW_TAG_variable);
+ addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ DV.getName());
+ addType(ElemDie, DV.getType());
+ addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ addSourceLine(ElemDie, DV);
+ } else if (Element.isDerivedType())
+ ElemDie = createMemberDIE(DIDerivedType(Element));
+ else
+ continue;
+ Buffer.addChild(ElemDie);
+ }
+
+ if (CTy.isAppleBlockExtension())
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
+
+ DICompositeType ContainingType = CTy.getContainingType();
+ if (DIDescriptor(ContainingType).isCompositeType())
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+ getOrCreateTypeDIE(DIType(ContainingType)));
+ else {
+ DIDescriptor Context = CTy.getContext();
+ addToContextOwner(&Buffer, Context);
+ }
+
+ if (CTy.isObjcClassComplete())
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
+ dwarf::DW_FORM_flag, 1);
+
+ if (Tag == dwarf::DW_TAG_class_type)
+ addTemplateParams(Buffer, CTy.getTemplateParams());
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+ || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+ {
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ else {
+ // Add zero size if it is not a forward declaration.
+ if (CTy.isForwardDecl())
+ addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ else
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+ }
+
+ // Add source line info if available.
+ if (!CTy.isForwardDecl())
+ addSourceLine(&Buffer, CTy);
+ }
+}
+
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateTypeParameter.
+DIE *
+CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+ DIE *ParamDIE = getDIE(TP);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+ addType(ParamDIE, TP.getType());
+ addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+ return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateValueParameter.
+DIE *
+CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+ DIE *ParamDIE = getDIE(TPV);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+ addType(ParamDIE, TPV.getType());
+ if (!TPV.getName().empty())
+ addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+ addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ TPV.getValue());
+ return ParamDIE;
+}
+
+/// getOrCreateNameSpace - Create a DIE for DINameSpace.
+DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
+ DIE *NDie = getDIE(NS);
+ if (NDie)
+ return NDie;
+ NDie = new DIE(dwarf::DW_TAG_namespace);
+ insertDIE(NS, NDie);
+ if (!NS.getName().empty())
+ addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+ addSourceLine(NDie, NS);
+ addToContextOwner(NDie, NS.getContext());
+ return NDie;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+/// getOrCreateSubprogramDIE - Create new DIE using SP.
+DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
+ DIE *SPDie = getDIE(SP);
+ if (SPDie)
+ return SPDie;
+
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ insertDIE(SP, SPDie);
+
+ // Add to context owner.
+ addToContextOwner(SPDie, SP.getContext());
+
+ // Add function template parameters.
+ addTemplateParams(*SPDie, SP.getTemplateParams());
+
+ StringRef LinkageName = SP.getLinkageName();
+ if (!LinkageName.empty())
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+ dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+
+ // If this DIE is going to refer declaration info using AT_specification
+ // then there is no need to add other attributes.
+ if (SP.getFunctionDeclaration().isSubprogram())
+ return SPDie;
+
+ // Constructors and operators for anonymous aggregates do not have names.
+ if (!SP.getName().empty())
+ addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ SP.getName());
+
+ addSourceLine(SPDie, SP);
+
+ if (SP.isPrototyped())
+ addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+ // Add Return Type.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
+ addType(SPDie, SPTy);
+ else
+ addType(SPDie, DIType(Args.getElement(0)));
+
+ unsigned VK = SP.getVirtuality();
+ if (VK) {
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+ DIEBlock *Block = getDIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+ ContainingTypeMap.insert(std::make_pair(SPDie,
+ SP.getContainingType()));
+ }
+
+ if (!SP.isDefinition()) {
+ addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled while processing variables.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(DIType(Args.getElement(i)));
+ addType(Arg, ATy);
+ if (ATy.isArtificial())
+ addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ SPDie->addChild(Arg);
+ }
+ }
+
+ if (SP.isArtificial())
+ addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
+ if (!SP.isLocalToUnit())
+ addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+ if (SP.isOptimized())
+ addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+ if (unsigned isa = Asm->getISAEncoding()) {
+ addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
+
+ return SPDie;
+}
+
+// Return const expression if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return NULL;
+
+ // First operand points to a global struct.
+ Value *Ptr = CE->getOperand(0);
+ if (!isa<GlobalValue>(Ptr) ||
+ !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
+ return NULL;
+
+ // Second operand is zero.
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return NULL;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return NULL;
+
+ return CE;
+}
+
+/// createGlobalVariableDIE - create global variable DIE.
+void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
+ // Check for pre-existence.
+ if (getDIE(N))
+ return;
+
+ DIGlobalVariable GV(N);
+ if (!GV.Verify())
+ return;
+
+ DIE *VariableDIE = new DIE(GV.getTag());
+ // Add to map.
+ insertDIE(N, VariableDIE);
+
+ // Add name.
+ addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ GV.getDisplayName());
+ StringRef LinkageName = GV.getLinkageName();
+ bool isGlobalVariable = GV.getGlobal() != NULL;
+ if (!LinkageName.empty() && isGlobalVariable)
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+ // Add type.
+ DIType GTy = GV.getType();
+ addType(VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV.isLocalToUnit()) {
+ addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ // Expose as global.
+ addGlobal(GV.getName(), VariableDIE);
+ }
+ // Add line number info.
+ addSourceLine(VariableDIE, GV);
+ // Add to context owner.
+ DIDescriptor GVContext = GV.getContext();
+ addToContextOwner(VariableDIE, GVContext);
+ // Add location.
+ if (isGlobalVariable) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(GV.getGlobal()));
+ // Do not create specification DIE if context is either compile unit
+ // or a subprogram.
+ if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
+ !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+ // Create specification DIE.
+ DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, VariableDIE);
+ addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+ addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag,
+ 1);
+ addDie(VariableSpecDIE);
+ } else {
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV.getConstant()))
+ addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
+ else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ // GV is a merged global.
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ Value *Ptr = CE->getOperand(0);
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
+ addUInt(Block, 0, dwarf::DW_FORM_udata,
+ Asm->getTargetData().getIndexedOffset(Ptr->getType(), Idx));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+
+ return;
+}
+
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+ int64_t L = SR.getLo();
+ int64_t H = SR.getHi();
+
+ // The L value defines the lower bounds which is typically zero for C/C++. The
+ // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size
+ // of the array. If L > H then do not emit DW_AT_lower_bound and
+ // DW_AT_upper_bound attributes. If L is zero and H is also zero then the
+ // array has one element and in such case do not emit lower bound.
+
+ if (L > H) {
+ Buffer.addChild(DW_Subrange);
+ return;
+ }
+ if (L)
+ addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+ addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+ Buffer.addChild(DW_Subrange);
+}
+
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy) {
+ Buffer.setTag(dwarf::DW_TAG_array_type);
+ if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+ addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+
+ // Emit derived type.
+ addType(&Buffer, CTy->getTypeDerivedFrom());
+ DIArray Elements = CTy->getTypeArray();
+
+ // Get an anonymous type for index type.
+ DIE *IdxTy = getIndexTyDie();
+ if (!IdxTy) {
+ // Construct an anonymous type for index type.
+ IdxTy = new DIE(dwarf::DW_TAG_base_type);
+ addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+ addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_signed);
+ addDie(IdxTy);
+ setIndexTyDie(IdxTy);
+ }
+
+ // Add subranges to array type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+ constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+ }
+}
+
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
+ DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+ StringRef Name = ETy.getName();
+ addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ int64_t Value = ETy.getEnumValue();
+ addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+ return Enumerator;
+}
+
+/// constructContainingTypeDIEs - Construct DIEs for types that contain
+/// vtables.
+void CompileUnit::constructContainingTypeDIEs() {
+ for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
+ CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+ DIE *SPDie = CI->first;
+ const MDNode *N = CI->second;
+ if (!N) continue;
+ DIE *NDie = getDIE(N);
+ if (!NDie) continue;
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ }
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
+ StringRef Name = DV->getName();
+ if (Name.empty())
+ return NULL;
+
+ // Translate tag to proper Dwarf tag.
+ unsigned Tag = DV->getTag();
+
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
+ DbgVariable *AbsVar = DV->getAbstractVariable();
+ DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL;
+ if (AbsDIE)
+ addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsDIE);
+ else {
+ addString(VariableDie, dwarf::DW_AT_name,
+ dwarf::DW_FORM_string, Name);
+ addSourceLine(VariableDie, DV->getVariable());
+ addType(VariableDie, DV->getType());
+ }
+
+ if (DV->isArtificial())
+ addUInt(VariableDie, dwarf::DW_AT_artificial,
+ dwarf::DW_FORM_flag, 1);
+
+ if (isScopeAbstract) {
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // Add variable address.
+
+ unsigned Offset = DV->getDotDebugLocOffset();
+ if (Offset != ~0U) {
+ addLabel(VariableDie, dwarf::DW_AT_location,
+ dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("debug_loc", Offset));
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
+ // Check if variable is described by a DBG_VALUE instruction.
+ if (const MachineInstr *DVInsn = DV->getMInsn()) {
+ bool updated = false;
+ if (DVInsn->getNumOperands() == 3) {
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (DVInsn->getOperand(1).isImm() &&
+ TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF,
+ DVInsn->getOperand(1).getImm(),
+ FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, VariableDie, Location);
+
+ } else if (RegOp.getReg())
+ addVariableAddress(DV, VariableDie,
+ MachineLocation(RegOp.getReg()));
+ updated = true;
+ }
+ else if (DVInsn->getOperand(0).isImm())
+ updated =
+ addConstantValue(VariableDie, DVInsn->getOperand(0),
+ DV->getType());
+ else if (DVInsn->getOperand(0).isFPImm())
+ updated =
+ addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isCImm())
+ updated =
+ addConstantValue(VariableDie,
+ DVInsn->getOperand(0).getCImm(),
+ DV->getType().isUnsignedDIType());
+ } else {
+ addVariableAddress(DV, VariableDie,
+ Asm->getDebugValueLocation(DVInsn));
+ updated = true;
+ }
+ if (!updated) {
+ // If variableDie is not updated then DBG_VALUE instruction does not
+ // have valid variable info.
+ delete VariableDie;
+ return NULL;
+ }
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ } else {
+ // .. else use frame index.
+ int FI = DV->getFrameIndex();
+ if (FI != ~0) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, VariableDie, Location);
+ }
+ }
+
+ DV->setDIE(VariableDie);
+ return VariableDie;
+}
+
+/// createMemberDIE - Create new member DIE.
+DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
+ DIE *MemberDie = new DIE(DT.getTag());
+ StringRef Name = DT.getName();
+ if (!Name.empty())
+ addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ addType(MemberDie, DT.getTypeDerivedFrom());
+
+ addSourceLine(MemberDie, DT);
+
+ DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
+ uint64_t Size = DT.getSizeInBits();
+ uint64_t FieldSize = DT.getOriginalTypeSize();
+
+ if (Size != FieldSize) {
+ // Handle bitfield.
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+ addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+ uint64_t Offset = DT.getOffsetInBits();
+ uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ uint64_t FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (Asm->getTargetData().isLittleEndian())
+ Offset = FieldSize - (Offset + Size);
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+
+ // Here WD_AT_data_member_location points to the anonymous
+ // field that includes this bit field.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+ } else
+ // This is not a bitfield.
+ addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+ if (DT.getTag() == dwarf::DW_TAG_inheritance
+ && DT.isVirtual()) {
+
+ // For C++, virtual base classes are not at fixed offset. Use following
+ // expression to extract appropriate offset from vtable.
+ // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+ DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+ VBaseLocationDie);
+ } else
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+
+ if (DT.isProtected())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_private);
+ // Otherwise C++ member and base classes are considered public.
+ else
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_public);
+ if (DT.isVirtual())
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+ dwarf::DW_VIRTUALITY_virtual);
+
+ // Objective-C properties.
+ StringRef PropertyName = DT.getObjCPropertyName();
+ if (!PropertyName.empty()) {
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string,
+ PropertyName);
+ StringRef GetterName = DT.getObjCPropertyGetterName();
+ if (!GetterName.empty())
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_getter,
+ dwarf::DW_FORM_string, GetterName);
+ StringRef SetterName = DT.getObjCPropertySetterName();
+ if (!SetterName.empty())
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_setter,
+ dwarf::DW_FORM_string, SetterName);
+ unsigned PropertyAttributes = 0;
+ if (DT.isReadOnlyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+ if (DT.isReadWriteObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+ if (DT.isAssignObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+ if (DT.isRetainObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+ if (DT.isCopyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+ if (DT.isNonAtomicObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+ if (PropertyAttributes)
+ addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0,
+ PropertyAttributes);
+ }
+ return MemberDie;
+}
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 0000000..7859265
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,298 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DIE.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class DwarfDebug;
+class MachineLocation;
+class MachineOperand;
+class ConstantInt;
+class DbgVariable;
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class CompileUnit {
+ /// ID - File identifier for source.
+ ///
+ unsigned ID;
+
+ /// Die - Compile unit debug information entry.
+ ///
+ const OwningPtr<DIE> CUDie;
+
+ /// Asm - Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ DwarfDebug *DD;
+
+ /// IndexTyDie - An anonymous type for index type. Owned by CUDie.
+ DIE *IndexTyDie;
+
+ /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
+ /// variables to debug information entries.
+ DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+ /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
+ /// descriptors to debug information entries using a DIEEntry proxy.
+ DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+
+ /// Globals - A map of globally visible named entities for this unit.
+ ///
+ StringMap<DIE*> Globals;
+
+ /// GlobalTypes - A map of globally visible types for this unit.
+ ///
+ StringMap<DIE*> GlobalTypes;
+
+ /// DIEBlocks - A list of all the DIEBlocks in use.
+ std::vector<DIEBlock *> DIEBlocks;
+
+ /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+ /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+ /// corresponds to the MDNode mapped with the subprogram DIE.
+ DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+
+public:
+ CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+ ~CompileUnit();
+
+ // Accessors.
+ unsigned getID() const { return ID; }
+ DIE* getCUDie() const { return CUDie.get(); }
+ const StringMap<DIE*> &getGlobals() const { return Globals; }
+ const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+
+ /// hasContent - Return true if this compile unit has something to write out.
+ ///
+ bool hasContent() const { return !CUDie->getChildren().empty(); }
+
+ /// addGlobal - Add a new global entity to the compile unit.
+ ///
+ void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
+
+ /// addGlobalType - Add a new global type to the compile unit.
+ ///
+ void addGlobalType(DIType Ty);
+
+ /// getDIE - Returns the debug information entry map slot for the
+ /// specified debug variable.
+ DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
+
+ DIEBlock *getDIEBlock() {
+ return new (DIEValueAllocator) DIEBlock();
+ }
+
+ /// insertDIE - Insert DIE into the map.
+ void insertDIE(const MDNode *N, DIE *D) {
+ MDNodeToDieMap.insert(std::make_pair(N, D));
+ }
+
+ /// getDIEEntry - Returns the debug information entry for the specified
+ /// debug variable.
+ DIEEntry *getDIEEntry(const MDNode *N) {
+ DenseMap<const MDNode *, DIEEntry *>::iterator I =
+ MDNodeToDIEEntryMap.find(N);
+ if (I == MDNodeToDIEEntryMap.end())
+ return NULL;
+ return I->second;
+ }
+
+ /// insertDIEEntry - Insert debug information entry into the map.
+ void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+ MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+ }
+
+ /// addDie - Adds or interns the DIE to the compile unit.
+ ///
+ void addDie(DIE *Buffer) {
+ this->CUDie->addChild(Buffer);
+ }
+
+ // getIndexTyDie - Get an anonymous type for index type.
+ DIE *getIndexTyDie() {
+ return IndexTyDie;
+ }
+
+ // setIndexTyDie - Set D as anonymous type for index which can be reused
+ // later.
+ void setIndexTyDie(DIE *D) {
+ IndexTyDie = D;
+ }
+public:
+
+ /// addUInt - Add an unsigned integer attribute data and value.
+ ///
+ void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+ /// addSInt - Add an signed integer attribute data and value.
+ ///
+ void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+ /// addString - Add a string attribute data and value.
+ ///
+ void addString(DIE *Die, unsigned Attribute, unsigned Form,
+ const StringRef Str);
+
+ /// addLabel - Add a Dwarf label attribute data and value.
+ ///
+ void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Label);
+
+ /// addDelta - Add a label delta attribute data and value.
+ ///
+ void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ /// addDIEEntry - Add a DIE attribute data and value.
+ ///
+ void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+
+ /// addBlock - Add block data.
+ ///
+ void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+ /// addSourceLine - Add location information to specified debug information
+ /// entry.
+ void addSourceLine(DIE *Die, DIVariable V);
+ void addSourceLine(DIE *Die, DIGlobalVariable G);
+ void addSourceLine(DIE *Die, DISubprogram SP);
+ void addSourceLine(DIE *Die, DIType Ty);
+ void addSourceLine(DIE *Die, DINameSpace NS);
+
+ /// addAddress - Add an address attribute to a die based on the location
+ /// provided.
+ void addAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addConstantValue - Add constant value entry in variable DIE.
+ bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
+ bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
+
+ /// addConstantFPValue - Add constant value entry in variable DIE.
+ bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+
+ /// addTemplateParams - Add template parameters in buffer.
+ void addTemplateParams(DIE &Buffer, DIArray TParams);
+
+ /// addRegisterOp - Add register operand.
+ void addRegisterOp(DIE *TheDie, unsigned Reg);
+
+ /// addRegisterOffset - Add register offset.
+ void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset);
+
+ /// addComplexAddress - Start with the address based on the location provided,
+ /// and generate the DWARF information necessary to find the actual variable
+ /// (navigating the extra location information encoded in the type) based on
+ /// the starting location. Add the DWARF information to the die.
+ ///
+ void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ // FIXME: Should be reformulated in terms of addComplexAddress.
+ /// addBlockByrefAddress - Start with the address based on the location
+ /// provided, and generate the DWARF information necessary to find the
+ /// actual Block variable (navigating the Block struct) based on the
+ /// starting location. Add the DWARF information to the die. Obsolete,
+ /// please use addComplexAddress instead.
+ ///
+ void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// addVariableAddress - Add DW_AT_location attribute for a
+ /// DbgVariable based on provided MachineLocation.
+ void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
+
+ /// addToContextOwner - Add Die into the list of its context owner's children.
+ void addToContextOwner(DIE *Die, DIDescriptor Context);
+
+ /// addType - Add a new type attribute to the specified entity.
+ void addType(DIE *Entity, DIType Ty);
+
+ /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+ DIE *getOrCreateNameSpace(DINameSpace NS);
+
+ /// getOrCreateSubprogramDIE - Create new DIE using SP.
+ DIE *getOrCreateSubprogramDIE(DISubprogram SP);
+
+ /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+ /// given DIType.
+ DIE *getOrCreateTypeDIE(const MDNode *N);
+
+ /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateTypeParameter.
+ DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+ /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateValueParameter.
+ DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
+ /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+ /// information entry.
+ DIEEntry *createDIEEntry(DIE *Entry);
+
+ /// createGlobalVariableDIE - create global variable DIE.
+ void createGlobalVariableDIE(const MDNode *N);
+
+ void addPubTypes(DISubprogram SP);
+
+ /// constructTypeDIE - Construct basic type die from DIBasicType.
+ void constructTypeDIE(DIE &Buffer,
+ DIBasicType BTy);
+
+ /// constructTypeDIE - Construct derived type die from DIDerivedType.
+ void constructTypeDIE(DIE &Buffer,
+ DIDerivedType DTy);
+
+ /// constructTypeDIE - Construct type DIE from DICompositeType.
+ void constructTypeDIE(DIE &Buffer,
+ DICompositeType CTy);
+
+ /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+ void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+ /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+ void constructArrayTypeDIE(DIE &Buffer,
+ DICompositeType *CTy);
+
+ /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+ DIE *constructEnumTypeDIE(DIEnumerator ETy);
+
+ /// constructContainingTypeDIEs - Construct DIEs for types that contain
+ /// vtables.
+ void constructContainingTypeDIEs();
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract);
+
+ /// createMemberDIE - Create new member DIE.
+ DIE *createMemberDIE(DIDerivedType DT);
+
+private:
+
+ // DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+ DIEInteger *DIEIntegerOne;
+};
+
+} // end llvm namespace
+#endif
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 0000000..1b7e370
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,2016 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "DwarfCompileUnit.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/Path.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+ cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
+static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
+ cl::desc("Make an absence of debug location information explicit."),
+ cl::init(false));
+
+namespace {
+ const char *DWARFGroupName = "DWARF Emission";
+ const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+/// Configuration values for initial hash set sizes (log2).
+///
+static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
+
+namespace llvm {
+
+DIType DbgVariable::getType() const {
+ DIType Ty = Var.getType();
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (Var.isBlockByrefVariable()) {
+ /* Byref variables, in Blocks, are declared by the programmer as
+ "SomeType VarName;", but the compiler creates a
+ __Block_byref_x_VarName struct, and gives the variable VarName
+ either the struct, or a pointer to the struct, as its type. This
+ is necessary for various behind-the-scenes things the compiler
+ needs to do with by-reference variables in blocks.
+
+ However, as far as the original *programmer* is concerned, the
+ variable should still have type 'SomeType', as originally declared.
+
+ The following function dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable. This will be
+ passed back to the code generating the type for the Debug
+ Information Entry for the variable 'VarName'. 'VarName' will then
+ have the original type 'SomeType' in its debug information.
+
+ The original type 'SomeType' will be the type of the field named
+ 'VarName' inside the __Block_byref_x_VarName struct.
+
+ NOTE: In order for this to not completely fail on the debugger
+ side, the Debug Information Entry for the variable VarName needs to
+ have a DW_AT_location that tells the debugger how to unwind through
+ the pointers and __Block_byref_x_VarName struct to find the actual
+ value of the variable. The function addBlockByrefType does this. */
+ DIType subType = Ty;
+ unsigned tag = Ty.getTag();
+
+ if (tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ subType = DTy.getTypeDerivedFrom();
+ }
+
+ DICompositeType blockStruct = DICompositeType(subType);
+ DIArray Elements = blockStruct.getTypeArray();
+
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ if (getName() == DT.getName())
+ return (DT.getTypeDerivedFrom());
+ }
+ return Ty;
+ }
+ return Ty;
+}
+
+} // end llvm namespace
+
+DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+ : Asm(A), MMI(Asm->MMI), FirstCU(0),
+ AbbreviationsSet(InitAbbreviationsSetSize),
+ PrevLabel(NULL) {
+ NextStringPoolNumber = 0;
+
+ DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
+ DwarfStrSectionSym = TextSectionSym = 0;
+ DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+ FunctionBeginSym = FunctionEndSym = 0;
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ beginModule(M);
+ }
+}
+DwarfDebug::~DwarfDebug() {
+}
+
+MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
+ std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
+ if (Entry.first) return Entry.first;
+
+ Entry.second = NextStringPoolNumber++;
+ return Entry.first = Asm->GetTempSymbol("string", Entry.second);
+}
+
+
+/// assignAbbrevNumber - Define a unique number for the abbreviation.
+///
+void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+ // Profile the node so that we can make it unique.
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+
+ // Check the set for priors.
+ DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+
+ // If it's newly added.
+ if (InSet == &Abbrev) {
+ // Add to abbreviation list.
+ Abbreviations.push_back(&Abbrev);
+
+ // Assign the vector position + 1 as its number.
+ Abbrev.setNumber(Abbreviations.size());
+ } else {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ }
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+/// If there are global variables in this scope then create and insert
+/// DIEs for these variables.
+DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
+ const MDNode *SPNode) {
+ DIE *SPDie = SPCU->getDIE(SPNode);
+
+ assert(SPDie && "Unable to find subprogram DIE!");
+ DISubprogram SP(SPNode);
+
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ if (SPDecl.isSubprogram())
+ // Refer function declaration directly.
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ SPCU->getOrCreateSubprogramDIE(SPDecl));
+ else {
+ // There is not any need to generate specification DIE for a function
+ // defined at compile unit level. If a function is defined inside another
+ // function then gdb prefers the definition at top level and but does not
+ // expect specification DIE in parent function. So avoid creating
+ // specification DIE for a function defined inside a function.
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+ !SP.getContext().isFile() &&
+ !isSubprogramContext(SP.getContext())) {
+ SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(DIType(Args.getElement(i)));
+ SPCU->addType(Arg, ATy);
+ if (ATy.isArtificial())
+ SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ SPDie->addChild(Arg);
+ }
+ DIE *SPDeclDie = SPDie;
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ SPDeclDie);
+ SPCU->addDie(SPDie);
+ }
+ }
+ // Pick up abstract subprogram DIE.
+ if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsSPDIE);
+ SPCU->addDie(SPDie);
+ }
+
+ SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
+ SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+ SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
+ return SPDie;
+}
+
+/// constructLexicalScope - Construct new DW_TAG_lexical_block
+/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
+ LexicalScope *Scope) {
+
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
+
+ const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
+ if (Ranges.empty())
+ return 0;
+
+ SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
+ if (Ranges.size() > 1) {
+ // .debug_range section has not been laid out yet. Emit offset in
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // DW_AT_ranges appropriately.
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+ DebugRangeSymbols.size()
+ * Asm->getTargetData().getPointerSize());
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+ DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+ }
+ DebugRangeSymbols.push_back(NULL);
+ DebugRangeSymbols.push_back(NULL);
+ return ScopeDIE;
+ }
+
+ const MCSymbol *Start = getLabelBeforeInsn(RI->first);
+ const MCSymbol *End = getLabelAfterInsn(RI->second);
+
+ if (End == 0) return 0;
+
+ assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
+ assert(End->isDefined() && "Invalid end label for an inlined scope!");
+
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
+
+ return ScopeDIE;
+}
+
+/// constructInlinedScopeDIE - This scope represents inlined body of
+/// a function. Construct DIE to represent this concrete inlined copy
+/// of the function.
+DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
+ LexicalScope *Scope) {
+
+ const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
+ assert (Ranges.empty() == false
+ && "LexicalScope does not have instruction markers!");
+
+ if (!Scope->getScopeNode())
+ return NULL;
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS);
+ DIE *OriginDIE = TheCU->getDIE(InlinedSP);
+ if (!OriginDIE) {
+ DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram.");
+ return NULL;
+ }
+
+ SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
+ const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+ const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+
+ if (StartLabel == 0 || EndLabel == 0) {
+ assert (0 && "Unexpected Start and End labels for a inlined scope!");
+ return 0;
+ }
+ assert(StartLabel->isDefined() &&
+ "Invalid starting label for an inlined scope!");
+ assert(EndLabel->isDefined() &&
+ "Invalid end label for an inlined scope!");
+
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
+ TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, OriginDIE);
+
+ if (Ranges.size() > 1) {
+ // .debug_range section has not been laid out yet. Emit offset in
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // DW_AT_ranges appropriately.
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+ DebugRangeSymbols.size()
+ * Asm->getTargetData().getPointerSize());
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+ DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+ }
+ DebugRangeSymbols.push_back(NULL);
+ DebugRangeSymbols.push_back(NULL);
+ } else {
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ StartLabel);
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ EndLabel);
+ }
+
+ InlinedSubprogramDIEs.insert(OriginDIE);
+
+ // Track the start label for this inlined function.
+ //.debug_inlined section specification does not clearly state how
+ // to emit inlined scope that is split into multiple instruction ranges.
+ // For now, use first instruction range and emit low_pc/high_pc pair and
+ // corresponding .debug_inlined section entry for this pair.
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ I = InlineInfo.find(InlinedSP);
+
+ if (I == InlineInfo.end()) {
+ InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
+ ScopeDIE));
+ InlinedSPNodes.push_back(InlinedSP);
+ } else
+ I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
+
+ DILocation DL(Scope->getInlinedAt());
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+ return ScopeDIE;
+}
+
+/// constructScopeDIE - Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
+ if (!Scope || !Scope->getScopeNode())
+ return NULL;
+
+ SmallVector <DIE *, 8> Children;
+
+ // Collect arguments for current function.
+ if (LScopes.isCurrentFunctionScope(Scope))
+ for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
+ if (DbgVariable *ArgDV = CurrentFnArguments[i])
+ if (DIE *Arg =
+ TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope()))
+ Children.push_back(Arg);
+
+ // Collect lexical scope children first.
+ const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+ if (DIE *Variable =
+ TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope()))
+ Children.push_back(Variable);
+ const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+ if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
+ Children.push_back(Nested);
+ DIScope DS(Scope->getScopeNode());
+ DIE *ScopeDIE = NULL;
+ if (Scope->getInlinedAt())
+ ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
+ else if (DS.isSubprogram()) {
+ ProcessedSPNodes.insert(DS);
+ if (Scope->isAbstractScope()) {
+ ScopeDIE = TheCU->getDIE(DS);
+ // Note down abstract DIE.
+ if (ScopeDIE)
+ AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
+ }
+ else
+ ScopeDIE = updateSubprogramScopeDIE(TheCU, DS);
+ }
+ else {
+ // There is no need to emit empty lexical block DIE.
+ if (Children.empty())
+ return NULL;
+ ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
+ }
+
+ if (!ScopeDIE) return NULL;
+
+ // Add children
+ for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ ScopeDIE->addChild(*I);
+
+ if (DS.isSubprogram())
+ TheCU->addPubTypes(DISubprogram(DS));
+
+ return ScopeDIE;
+}
+
+/// GetOrCreateSourceID - Look up the source id with the given directory and
+/// source file names. If none currently exists, create a new id and insert it
+/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
+/// maps as well.
+
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName) {
+ // If FE did not provide a file name, then assume stdin.
+ if (FileName.empty())
+ return GetOrCreateSourceID("<stdin>", StringRef());
+
+ // MCStream expects full path name as filename.
+ if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+ SmallString<128> FullPathName = DirName;
+ sys::path::append(FullPathName, FileName);
+ // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+ return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+ }
+
+ StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+ if (Entry.getValue())
+ return Entry.getValue();
+
+ unsigned SrcId = SourceIdMap.size();
+ Entry.setValue(SrcId);
+
+ // Print out a .file directive to specify files for .loc directives.
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+
+ return SrcId;
+}
+
+/// constructCompileUnit - Create new CompileUnit for the given
+/// metadata node with tag DW_TAG_compile_unit.
+CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
+ DICompileUnit DIUnit(N);
+ StringRef FN = DIUnit.getFilename();
+ StringRef Dir = DIUnit.getDirectory();
+ unsigned ID = GetOrCreateSourceID(FN, Dir);
+
+ DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+ CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this);
+ NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+ DIUnit.getProducer());
+ NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ DIUnit.getLanguage());
+ NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+ // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
+ // simplifies debug range entries.
+ NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section.
+ if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("section_line"));
+ else
+ NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+
+ if (!Dir.empty())
+ NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+ if (DIUnit.isOptimized())
+ NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+ StringRef Flags = DIUnit.getFlags();
+ if (!Flags.empty())
+ NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string,
+ Flags);
+
+ unsigned RVer = DIUnit.getRunTimeVersion();
+ if (RVer)
+ NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+
+ if (!FirstCU)
+ FirstCU = NewCU;
+ CUMap.insert(std::make_pair(N, NewCU));
+ return NewCU;
+}
+
+/// construct SubprogramDIE - Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
+ const MDNode *N) {
+ DISubprogram SP(N);
+ if (!SP.isDefinition())
+ // This is a method declaration which will be handled while constructing
+ // class type.
+ return;
+
+ DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP);
+
+ // Add to map.
+ TheCU->insertDIE(N, SubprogramDie);
+
+ // Add to context owner.
+ TheCU->addToContextOwner(SubprogramDie, SP.getContext());
+
+ // Expose as global.
+ TheCU->addGlobal(SP.getName(), SubprogramDie);
+
+ SPMap[N] = TheCU;
+ return;
+}
+
+/// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
+/// as llvm.dbg.enum and llvm.dbg.ty
+void DwarfDebug::collectInfoFromNamedMDNodes(Module *M) {
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
+ constructSubprogramDIE(CU, N);
+ }
+
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
+ CU->createGlobalVariableDIE(N);
+ }
+
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIType Ty(NMD->getOperand(i));
+ if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
+ CU->getOrCreateTypeDIE(Ty);
+ }
+
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIType Ty(NMD->getOperand(i));
+ if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
+ CU->getOrCreateTypeDIE(Ty);
+ }
+}
+
+/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
+/// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
+bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(*M);
+
+ bool HasDebugInfo = false;
+ // Scan all the compile-units to see if there are any marked as the main
+ // unit. If not, we do not generate debug info.
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ if (DICompileUnit(*I).isMain()) {
+ HasDebugInfo = true;
+ break;
+ }
+ }
+ if (!HasDebugInfo) return false;
+
+ // Create all the compile unit DIEs.
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I)
+ constructCompileUnit(*I);
+
+ // Create DIEs for each global variable.
+ for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+ E = DbgFinder.global_variable_end(); I != E; ++I) {
+ const MDNode *N = *I;
+ if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
+ CU->createGlobalVariableDIE(N);
+ }
+
+ // Create DIEs for each subprogram.
+ for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+ E = DbgFinder.subprogram_end(); I != E; ++I) {
+ const MDNode *N = *I;
+ if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
+ constructSubprogramDIE(CU, N);
+ }
+
+ return HasDebugInfo;
+}
+
+/// beginModule - Emit all Dwarf sections that should come prior to the
+/// content. Create global DIEs and emit initial debug info sections.
+/// This is invoked by the target AsmPrinter.
+void DwarfDebug::beginModule(Module *M) {
+ if (DisableDebugInfoPrinting)
+ return;
+
+ // If module has named metadata anchors then use them, otherwise scan the
+ // module using debug info finder to collect debug info.
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (CU_Nodes) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CUNode(CU_Nodes->getOperand(i));
+ CompileUnit *CU = constructCompileUnit(CUNode);
+ DIArray GVs = CUNode.getGlobalVariables();
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
+ CU->createGlobalVariableDIE(GVs.getElement(i));
+ DIArray SPs = CUNode.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+ constructSubprogramDIE(CU, SPs.getElement(i));
+ DIArray EnumTypes = CUNode.getEnumTypes();
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
+ DIArray RetainedTypes = CUNode.getRetainedTypes();
+ for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+ }
+ } else if (!collectLegacyDebugInfo(M))
+ return;
+
+ collectInfoFromNamedMDNodes(M);
+
+ // Tell MMI that we have debug info.
+ MMI->setDebugInfoAvailability(true);
+
+ // Emit initial sections.
+ EmitSectionLabels();
+
+ // Prime section data.
+ SectionMap.insert(Asm->getObjFileLowering().getTextSection());
+}
+
+/// endModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfDebug::endModule() {
+ if (!FirstCU) return;
+ const Module *M = MMI->getModule();
+ DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap;
+
+ // Collect info for variables that were optimized out.
+ if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit TheCU(CU_Nodes->getOperand(i));
+ DIArray Subprograms = TheCU.getSubprograms();
+ for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
+ DISubprogram SP(Subprograms.getElement(i));
+ if (ProcessedSPNodes.count(SP) != 0) continue;
+ if (!SP.Verify()) continue;
+ if (!SP.isDefinition()) continue;
+ DIArray Variables = SP.getVariables();
+ if (Variables.getNumElements() == 0) continue;
+
+ LexicalScope *Scope =
+ new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
+ DeadFnScopeMap[SP] = Scope;
+
+ // Construct subprogram DIE and add variables DIEs.
+ CompileUnit *SPCU = CUMap.lookup(TheCU);
+ assert (SPCU && "Unable to find Compile Unit!");
+ constructSubprogramDIE(SPCU, SP);
+ DIE *ScopeDIE = SPCU->getDIE(SP);
+ for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
+ DIVariable DV(Variables.getElement(vi));
+ if (!DV.Verify()) continue;
+ DbgVariable *NewVar = new DbgVariable(DV, NULL);
+ if (DIE *VariableDIE =
+ SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
+ ScopeDIE->addChild(VariableDIE);
+ }
+ }
+ }
+ }
+
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+ AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+ DIE *ISP = *AI;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+
+ // Emit DW_AT_containing_type attribute to connect types with their
+ // vtable holding type.
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(),
+ CUE = CUMap.end(); CUI != CUE; ++CUI) {
+ CompileUnit *TheCU = CUI->second;
+ TheCU->constructContainingTypeDIEs();
+ }
+
+ // Standard sections final addresses.
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
+
+ // End text sections.
+ for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
+ Asm->OutStreamer.SwitchSection(SectionMap[i]);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i));
+ }
+
+ // Compute DIE offsets and sizes.
+ computeSizeAndOffsets();
+
+ // Emit all the DIEs into a debug info section
+ emitDebugInfo();
+
+ // Corresponding abbreviations into a abbrev section.
+ emitAbbreviations();
+
+ // Emit info into a debug pubnames section.
+ emitDebugPubNames();
+
+ // Emit info into a debug pubtypes section.
+ emitDebugPubTypes();
+
+ // Emit info into a debug loc section.
+ emitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ EmitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ emitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ emitDebugMacInfo();
+
+ // Emit inline info.
+ emitDebugInlineInfo();
+
+ // Emit info into a debug str section.
+ emitDebugStr();
+
+ // clean up.
+ DeleteContainerSeconds(DeadFnScopeMap);
+ SPMap.clear();
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I)
+ delete I->second;
+ FirstCU = NULL; // Reset for the next Module, if any.
+}
+
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
+ DebugLoc ScopeLoc) {
+ LLVMContext &Ctx = DV->getContext();
+ // More then one inlined variable corresponds to one abstract variable.
+ DIVariable Var = cleanseInlinedVariable(DV, Ctx);
+ DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
+ if (AbsDbgVariable)
+ return AbsDbgVariable;
+
+ LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx));
+ if (!Scope)
+ return NULL;
+
+ AbsDbgVariable = new DbgVariable(Var, NULL);
+ addScopeVariable(Scope, AbsDbgVariable);
+ AbstractVariables[Var] = AbsDbgVariable;
+ return AbsDbgVariable;
+}
+
+/// addCurrentFnArgument - If Var is a current function argument then add
+/// it to CurrentFnArguments list.
+bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
+ DbgVariable *Var, LexicalScope *Scope) {
+ if (!LScopes.isCurrentFunctionScope(Scope))
+ return false;
+ DIVariable DV = Var->getVariable();
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ return false;
+ unsigned ArgNo = DV.getArgNumber();
+ if (ArgNo == 0)
+ return false;
+
+ size_t Size = CurrentFnArguments.size();
+ if (Size == 0)
+ CurrentFnArguments.resize(MF->getFunction()->arg_size());
+ // llvm::Function argument size is not good indicator of how many
+ // arguments does the function have at source level.
+ if (ArgNo > Size)
+ CurrentFnArguments.resize(ArgNo * 2);
+ CurrentFnArguments[ArgNo - 1] = Var;
+ return true;
+}
+
+/// collectVariableInfoFromMMITable - Collect variable information from
+/// side table maintained by MMI.
+void
+DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
+ MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+ for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+ VE = VMap.end(); VI != VE; ++VI) {
+ const MDNode *Var = VI->first;
+ if (!Var) continue;
+ Processed.insert(Var);
+ DIVariable DV(Var);
+ const std::pair<unsigned, DebugLoc> &VP = VI->second;
+
+ LexicalScope *Scope = LScopes.findLexicalScope(VP.second);
+
+ // If variable scope is not found then skip this variable.
+ if (Scope == 0)
+ continue;
+
+ DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
+ DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable);
+ RegVar->setFrameIndex(VP.first);
+ if (!addCurrentFnArgument(MF, RegVar, Scope))
+ addScopeVariable(Scope, RegVar);
+ if (AbsDbgVariable)
+ AbsDbgVariable->setFrameIndex(VP.first);
+ }
+}
+
+/// isDbgValueInDefinedReg - Return true if debug value, encoded by
+/// DBG_VALUE instruction, is in a defined reg.
+static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
+ assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+ return MI->getNumOperands() == 3 &&
+ MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
+ MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
+}
+
+/// getDebugLocEntry - Get .debug_loc entry for the instruction range starting
+/// at MI.
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
+ const MCSymbol *FLabel,
+ const MCSymbol *SLabel,
+ const MachineInstr *MI) {
+ const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+ if (MI->getNumOperands() != 3) {
+ MachineLocation MLoc = Asm->getDebugValueLocation(MI);
+ return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+ }
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
+ MachineLocation MLoc;
+ MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+ }
+ if (MI->getOperand(0).isImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm());
+ if (MI->getOperand(0).isFPImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm());
+ if (MI->getOperand(0).isCImm())
+ return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
+
+ assert (0 && "Unexpected 3 operand DBG_VALUE instruction!");
+ return DotDebugLocEntry();
+}
+
+/// collectVariableInfo - Find variables for each lexical scope.
+void
+DwarfDebug::collectVariableInfo(const MachineFunction *MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
+
+ /// collection info from MMI table.
+ collectVariableInfoFromMMITable(MF, Processed);
+
+ for (SmallVectorImpl<const MDNode*>::const_iterator
+ UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE;
+ ++UVI) {
+ const MDNode *Var = *UVI;
+ if (Processed.count(Var))
+ continue;
+
+ // History contains relevant DBG_VALUE instructions for Var and instructions
+ // clobbering it.
+ SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+ if (History.empty())
+ continue;
+ const MachineInstr *MInsn = History.front();
+
+ DIVariable DV(Var);
+ LexicalScope *Scope = NULL;
+ if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(DV.getContext()).describes(MF->getFunction()))
+ Scope = LScopes.getCurrentFunctionScope();
+ else {
+ if (DV.getVersion() <= LLVMDebugVersion9)
+ Scope = LScopes.findLexicalScope(MInsn->getDebugLoc());
+ else {
+ if (MDNode *IA = DV.getInlinedAt())
+ Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
+ else
+ Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
+ }
+ }
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ Processed.insert(DV);
+ assert(MInsn->isDebugValue() && "History must begin with debug value");
+ DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
+ DbgVariable *RegVar = new DbgVariable(DV, AbsVar);
+ if (!addCurrentFnArgument(MF, RegVar, Scope))
+ addScopeVariable(Scope, RegVar);
+ if (AbsVar)
+ AbsVar->setMInsn(MInsn);
+
+ // Simple ranges that are fully coalesced.
+ if (History.size() <= 1 || (History.size() == 2 &&
+ MInsn->isIdenticalTo(History.back()))) {
+ RegVar->setMInsn(MInsn);
+ continue;
+ }
+
+ // handle multiple DBG_VALUE instructions describing one variable.
+ RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
+
+ for (SmallVectorImpl<const MachineInstr*>::const_iterator
+ HI = History.begin(), HE = History.end(); HI != HE; ++HI) {
+ const MachineInstr *Begin = *HI;
+ assert(Begin->isDebugValue() && "Invalid History entry");
+
+ // Check if DBG_VALUE is truncating a range.
+ if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg()
+ && !Begin->getOperand(0).getReg())
+ continue;
+
+ // Compute the range for a register location.
+ const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
+ const MCSymbol *SLabel = 0;
+
+ if (HI + 1 == HE)
+ // If Begin is the last instruction in History then its value is valid
+ // until the end of the function.
+ SLabel = FunctionEndSym;
+ else {
+ const MachineInstr *End = HI[1];
+ DEBUG(dbgs() << "DotDebugLoc Pair:\n"
+ << "\t" << *Begin << "\t" << *End << "\n");
+ if (End->isDebugValue())
+ SLabel = getLabelBeforeInsn(End);
+ else {
+ // End is a normal instruction clobbering the range.
+ SLabel = getLabelAfterInsn(End);
+ assert(SLabel && "Forgot label after clobber instruction");
+ ++HI;
+ }
+ }
+
+ // The value is valid until the next DBG_VALUE or clobber.
+ DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin));
+ }
+ DotDebugLocEntries.push_back(DotDebugLocEntry());
+ }
+
+ // Collect info for variables that were optimized out.
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables();
+ for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
+ DIVariable DV(Variables.getElement(i));
+ if (!DV || !DV.Verify() || !Processed.insert(DV))
+ continue;
+ if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
+ addScopeVariable(Scope, new DbgVariable(DV, NULL));
+ }
+}
+
+/// getLabelBeforeInsn - Return Label preceding the instruction.
+const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+ MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
+ assert(Label && "Didn't insert label before instruction");
+ return Label;
+}
+
+/// getLabelAfterInsn - Return Label immediately following the instruction.
+const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+ return LabelsAfterInsn.lookup(MI);
+}
+
+/// beginInstruction - Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+ // Check if source location changes, but ignore DBG_VALUE locations.
+ if (!MI->isDebugValue()) {
+ DebugLoc DL = MI->getDebugLoc();
+ if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
+ unsigned Flags = DWARF2_FLAG_IS_STMT;
+ PrevInstLoc = DL;
+ if (DL == PrologEndLoc) {
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ PrologEndLoc = DebugLoc();
+ }
+ if (!DL.isUnknown()) {
+ const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+ } else
+ recordSourceLine(0, 0, 0, 0);
+ }
+ }
+
+ // Insert labels where requested.
+ DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+ LabelsBeforeInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsBeforeInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+/// endInstruction - Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
+ // Don't create a new label after DBG_VALUE instructions.
+ // They don't generate code.
+ if (!MI->isDebugValue())
+ PrevLabel = 0;
+
+ DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+ LabelsAfterInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsAfterInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ // We need a label after this instruction.
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+/// identifyScopeMarkers() -
+/// Each LexicalScope has first instruction and last instruction to mark
+/// beginning and end of a scope respectively. Create an inverse map that list
+/// scopes starts (and ends) with an instruction. One instruction may start (or
+/// end) multiple scopes. Ignore scopes that are not reachable.
+void DwarfDebug::identifyScopeMarkers() {
+ SmallVector<LexicalScope *, 4> WorkList;
+ WorkList.push_back(LScopes.getCurrentFunctionScope());
+ while (!WorkList.empty()) {
+ LexicalScope *S = WorkList.pop_back_val();
+
+ const SmallVector<LexicalScope *, 4> &Children = S->getChildren();
+ if (!Children.empty())
+ for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+
+ if (S->isAbstractScope())
+ continue;
+
+ const SmallVector<InsnRange, 4> &Ranges = S->getRanges();
+ if (Ranges.empty())
+ continue;
+ for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
+ RE = Ranges.end(); RI != RE; ++RI) {
+ assert(RI->first && "InsnRange does not have first instruction!");
+ assert(RI->second && "InsnRange does not have second instruction!");
+ requestLabelBeforeInsn(RI->first);
+ requestLabelAfterInsn(RI->second);
+ }
+ }
+}
+
+/// getScopeNode - Get MDNode for DebugLoc's scope.
+static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
+ if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
+ return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
+ return DL.getScope(Ctx);
+}
+
+/// getFnDebugLoc - Walk up the scope chain of given debug loc and find
+/// line number info for the function.
+static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
+ const MDNode *Scope = getScopeNode(DL, Ctx);
+ DISubprogram SP = getDISubprogram(Scope);
+ if (SP.Verify())
+ return DebugLoc::get(SP.getLineNumber(), 0, SP);
+ return DebugLoc();
+}
+
+/// beginFunction - Gather pre-function debug information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfDebug::beginFunction(const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo()) return;
+ LScopes.initialize(*MF);
+ if (LScopes.empty()) return;
+ identifyScopeMarkers();
+
+ FunctionBeginSym = Asm->GetTempSymbol("func_begin",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionBeginSym);
+
+ assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
+
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ /// LiveUserVar - Map physreg numbers to the MDNode they contain.
+ std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ bool AtBlockEntry = true;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MI = II;
+
+ if (MI->isDebugValue()) {
+ assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+
+ // Keep track of user variables.
+ const MDNode *Var =
+ MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+ // Variable is in a register, we need to check for clobbers.
+ if (isDbgValueInDefinedReg(MI))
+ LiveUserVar[MI->getOperand(0).getReg()] = Var;
+
+ // Check the history of this variable.
+ SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+ if (History.empty()) {
+ UserVariables.push_back(Var);
+ // The first mention of a function argument gets the FunctionBeginSym
+ // label, so arguments are visible when breaking at function entry.
+ DIVariable DV(Var);
+ if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(getDISubprogram(DV.getContext()))
+ .describes(MF->getFunction()))
+ LabelsBeforeInsn[MI] = FunctionBeginSym;
+ } else {
+ // We have seen this variable before. Try to coalesce DBG_VALUEs.
+ const MachineInstr *Prev = History.back();
+ if (Prev->isDebugValue()) {
+ // Coalesce identical entries at the end of History.
+ if (History.size() >= 2 &&
+ Prev->isIdenticalTo(History[History.size() - 2])) {
+ DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n"
+ << "\t" << *Prev
+ << "\t" << *History[History.size() - 2] << "\n");
+ History.pop_back();
+ }
+
+ // Terminate old register assignments that don't reach MI;
+ MachineFunction::const_iterator PrevMBB = Prev->getParent();
+ if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) &&
+ isDbgValueInDefinedReg(Prev)) {
+ // Previous register assignment needs to terminate at the end of
+ // its basic block.
+ MachineBasicBlock::const_iterator LastMI =
+ PrevMBB->getLastNonDebugInstr();
+ if (LastMI == PrevMBB->end()) {
+ // Drop DBG_VALUE for empty range.
+ DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n"
+ << "\t" << *Prev << "\n");
+ History.pop_back();
+ }
+ else {
+ // Terminate after LastMI.
+ History.push_back(LastMI);
+ }
+ }
+ }
+ }
+ History.push_back(MI);
+ } else {
+ // Not a DBG_VALUE instruction.
+ if (!MI->isLabel())
+ AtBlockEntry = false;
+
+ // First known non DBG_VALUE location marks beginning of function
+ // body.
+ if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())
+ PrologEndLoc = MI->getDebugLoc();
+
+ // Check if the instruction clobbers any registers with debug vars.
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
+ continue;
+ for (const unsigned *AI = TRI->getOverlaps(MOI->getReg());
+ unsigned Reg = *AI; ++AI) {
+ const MDNode *Var = LiveUserVar[Reg];
+ if (!Var)
+ continue;
+ // Reg is now clobbered.
+ LiveUserVar[Reg] = 0;
+
+ // Was MD last defined by a DBG_VALUE referring to Reg?
+ DbgValueHistoryMap::iterator HistI = DbgValues.find(Var);
+ if (HistI == DbgValues.end())
+ continue;
+ SmallVectorImpl<const MachineInstr*> &History = HistI->second;
+ if (History.empty())
+ continue;
+ const MachineInstr *Prev = History.back();
+ // Sanity-check: Register assignments are terminated at the end of
+ // their block.
+ if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent())
+ continue;
+ // Is the variable still in Reg?
+ if (!isDbgValueInDefinedReg(Prev) ||
+ Prev->getOperand(0).getReg() != Reg)
+ continue;
+ // Var is clobbered. Make sure the next instruction gets a label.
+ History.push_back(MI);
+ }
+ }
+ }
+ }
+ }
+
+ for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end();
+ I != E; ++I) {
+ SmallVectorImpl<const MachineInstr*> &History = I->second;
+ if (History.empty())
+ continue;
+
+ // Make sure the final register assignments are terminated.
+ const MachineInstr *Prev = History.back();
+ if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
+ const MachineBasicBlock *PrevMBB = Prev->getParent();
+ MachineBasicBlock::const_iterator LastMI =
+ PrevMBB->getLastNonDebugInstr();
+ if (LastMI == PrevMBB->end())
+ // Drop DBG_VALUE for empty range.
+ History.pop_back();
+ else {
+ // Terminate after LastMI.
+ History.push_back(LastMI);
+ }
+ }
+ // Request labels for the full history.
+ for (unsigned i = 0, e = History.size(); i != e; ++i) {
+ const MachineInstr *MI = History[i];
+ if (MI->isDebugValue())
+ requestLabelBeforeInsn(MI);
+ else
+ requestLabelAfterInsn(MI);
+ }
+ }
+
+ PrevInstLoc = DebugLoc();
+ PrevLabel = FunctionBeginSym;
+
+ // Record beginning of function.
+ if (!PrologEndLoc.isUnknown()) {
+ DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc,
+ MF->getFunction()->getContext());
+ recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
+ FnStartDL.getScope(MF->getFunction()->getContext()),
+ DWARF2_FLAG_IS_STMT);
+ }
+}
+
+void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+// SmallVector<DbgVariable *, 8> &Vars = ScopeVariables.lookup(LS);
+ ScopeVariables[LS].push_back(Var);
+// Vars.push_back(Var);
+}
+
+/// endFunction - Gather and emit post-function debug information.
+///
+void DwarfDebug::endFunction(const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo() || LScopes.empty()) return;
+
+ // Define end label for subprogram.
+ FunctionEndSym = Asm->GetTempSymbol("func_end",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionEndSym);
+
+ SmallPtrSet<const MDNode *, 16> ProcessedVars;
+ collectVariableInfo(MF, ProcessedVars);
+
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+ assert (TheCU && "Unable to find compile unit!");
+
+ // Construct abstract scopes.
+ ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList();
+ for (unsigned i = 0, e = AList.size(); i != e; ++i) {
+ LexicalScope *AScope = AList[i];
+ DISubprogram SP(AScope->getScopeNode());
+ if (SP.Verify()) {
+ // Collect info for variables that were optimized out.
+ DIArray Variables = SP.getVariables();
+ for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
+ DIVariable DV(Variables.getElement(i));
+ if (!DV || !DV.Verify() || !ProcessedVars.insert(DV))
+ continue;
+ if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext()))
+ addScopeVariable(Scope, new DbgVariable(DV, NULL));
+ }
+ }
+ if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0)
+ constructScopeDIE(TheCU, AScope);
+ }
+
+ DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
+
+ if (!DisableFramePointerElim(*MF))
+ TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
+ dwarf::DW_FORM_flag, 1);
+
+ DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
+ MMI->getFrameMoves()));
+
+ // Clear debug info
+ for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator
+ I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I)
+ DeleteContainerPointers(I->second);
+ ScopeVariables.clear();
+ DeleteContainerPointers(CurrentFnArguments);
+ UserVariables.clear();
+ DbgValues.clear();
+ AbstractVariables.clear();
+ LabelsBeforeInsn.clear();
+ LabelsAfterInsn.clear();
+ PrevLabel = NULL;
+}
+
+/// recordSourceLine - Register a source line with debug info. Returns the
+/// unique label that was emitted and which provides correspondence to
+/// the source line list.
+void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
+ unsigned Flags) {
+ StringRef Fn;
+ StringRef Dir;
+ unsigned Src = 1;
+ if (S) {
+ DIDescriptor Scope(S);
+
+ if (Scope.isCompileUnit()) {
+ DICompileUnit CU(S);
+ Fn = CU.getFilename();
+ Dir = CU.getDirectory();
+ } else if (Scope.isFile()) {
+ DIFile F(S);
+ Fn = F.getFilename();
+ Dir = F.getDirectory();
+ } else if (Scope.isSubprogram()) {
+ DISubprogram SP(S);
+ Fn = SP.getFilename();
+ Dir = SP.getDirectory();
+ } else if (Scope.isLexicalBlockFile()) {
+ DILexicalBlockFile DBF(S);
+ Fn = DBF.getFilename();
+ Dir = DBF.getDirectory();
+ } else if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(S);
+ Fn = DB.getFilename();
+ Dir = DB.getDirectory();
+ } else
+ assert(0 && "Unexpected scope info");
+
+ Src = GetOrCreateSourceID(Fn, Dir);
+ }
+ Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+/// computeSizeAndOffset - Compute the size and offset of a DIE.
+///
+unsigned
+DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
+ // Get the children.
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ // If not last sibling and has children then add sibling offset attribute.
+ if (!Last && !Children.empty())
+ Die->addSiblingOffset(DIEValueAllocator);
+
+ // Record the abbreviation.
+ assignAbbrevNumber(Die->getAbbrev());
+
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ // Set DIE offset
+ Die->setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
+
+ const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Size the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ // Size attribute value.
+ Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
+
+ // Size the DIE children if any.
+ if (!Children.empty()) {
+ assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
+ "Children flag not set");
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die->setSize(Offset - Die->getOffset());
+ return Offset;
+}
+
+/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
+///
+void DwarfDebug::computeSizeAndOffsets() {
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ // Compute size of compile unit header.
+ unsigned Offset =
+ sizeof(int32_t) + // Length of Compilation Unit Info
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+ computeSizeAndOffset(I->second->getCUDie(), Offset, true);
+ }
+}
+
+/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// temporary label to it if SymbolStem is specified.
+static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+ const char *SymbolStem = 0) {
+ Asm->OutStreamer.SwitchSection(Section);
+ if (!SymbolStem) return 0;
+
+ MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+ Asm->OutStreamer.EmitLabel(TmpSym);
+ return TmpSym;
+}
+
+/// EmitSectionLabels - Emit initial Dwarf sections with a label at
+/// the start of each one.
+void DwarfDebug::EmitSectionLabels() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Dwarf sections base addresses.
+ DwarfInfoSectionSym =
+ EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+ DwarfAbbrevSectionSym =
+ EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+ EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
+
+ if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
+ EmitSectionSym(Asm, MacroInfo);
+
+ EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+ EmitSectionSym(Asm, TLOF.getDwarfLocSection());
+ EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
+ EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+ DwarfStrSectionSym =
+ EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
+ DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+ "debug_range");
+
+ DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(),
+ "section_debug_loc");
+
+ TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+ EmitSectionSym(Asm, TLOF.getDataSection());
+}
+
+/// emitDIE - Recursively emits a debug information entry.
+///
+void DwarfDebug::emitDIE(DIE *Die) {
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ // Emit the code (index) for the abbreviation.
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" +
+ Twine::utohexstr(Die->getOffset()) + ":0x" +
+ Twine::utohexstr(Die->getSize()) + " " +
+ dwarf::TagString(Abbrev->getTag()));
+ Asm->EmitULEB128(AbbrevNumber);
+
+ const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ unsigned Attr = AbbrevData[i].getAttribute();
+ unsigned Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
+
+ switch (Attr) {
+ case dwarf::DW_AT_sibling:
+ Asm->EmitInt32(Die->getSiblingOffset());
+ break;
+ case dwarf::DW_AT_abstract_origin: {
+ DIEEntry *E = cast<DIEEntry>(Values[i]);
+ DIE *Origin = E->getEntry();
+ unsigned Addr = Origin->getOffset();
+ Asm->EmitInt32(Addr);
+ break;
+ }
+ case dwarf::DW_AT_ranges: {
+ // DW_AT_range Value encodes offset in debug_range section.
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+
+ if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+ Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ 4);
+ } else {
+ Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ DwarfDebugRangeSectionSym,
+ 4);
+ }
+ break;
+ }
+ case dwarf::DW_AT_location: {
+ if (DIELabel *L = dyn_cast<DIELabel>(Values[i]))
+ Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+ else
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ case dwarf::DW_AT_accessibility: {
+ if (Asm->isVerbose()) {
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+ Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+ }
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ default:
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ emitDIE(Children[j]);
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("End Of Children Mark");
+ Asm->EmitInt8(0);
+ }
+}
+
+/// emitDebugInfo - Emit the debug info section.
+///
+void DwarfDebug::emitDebugInfo() {
+ // Start debug info section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfInfoSection());
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ DIE *Die = TheCU->getCUDie();
+
+ // Emit the compile units header.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
+ TheCU->getID()));
+
+ // Emit size of content not including length itself
+ unsigned ContentSize = Die->getSize() +
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+
+ Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+ Asm->EmitInt32(ContentSize);
+ Asm->OutStreamer.AddComment("DWARF version number");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
+ DwarfAbbrevSectionSym);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+
+ emitDIE(Die);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
+ }
+}
+
+/// emitAbbreviations - Emit the abbreviation section.
+///
+void DwarfDebug::emitAbbreviations() const {
+ // Check to see if it is worth the effort.
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfAbbrevSection());
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_begin"));
+
+ // For each abbrevation.
+ for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+ // Get abbreviation data
+ const DIEAbbrev *Abbrev = Abbreviations[i];
+
+ // Emit the abbrevations code (base 1 index.)
+ Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(Asm);
+ }
+
+ // Mark end of abbreviations.
+ Asm->EmitULEB128(0, "EOM(3)");
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_end"));
+ }
+}
+
+/// emitEndOfLineMatrix - Emit the last address of the section and the end of
+/// the line matrix.
+///
+void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
+ // Define last address of section.
+ Asm->OutStreamer.AddComment("Extended Op");
+ Asm->EmitInt8(0);
+
+ Asm->OutStreamer.AddComment("Op size");
+ Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
+ Asm->OutStreamer.AddComment("DW_LNE_set_address");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address);
+
+ Asm->OutStreamer.AddComment("Section end label");
+
+ Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
+ Asm->getTargetData().getPointerSize(),
+ 0/*AddrSpace*/);
+
+ // Mark end of matrix.
+ Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
+ Asm->EmitInt8(0);
+ Asm->EmitInt8(1);
+ Asm->EmitInt8(1);
+}
+
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubNames() {
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+ Asm->OutStreamer.AddComment("Length of Public Names Info");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
+ Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
+ TheCU->getID()));
+
+ Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+ Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobals();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE *Entity = GI->second;
+
+ Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
+ TheCU->getID()));
+ }
+}
+
+void DwarfDebug::emitDebugPubTypes() {
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubTypesSection());
+ Asm->OutStreamer.AddComment("Length of Public Types Info");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
+ Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
+ TheCU->getID()));
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+ Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE *Entity = GI->second;
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
+ TheCU->getID()));
+ }
+}
+
+/// emitDebugStr - Emit visible names into a debug str section.
+///
+void DwarfDebug::emitDebugStr() {
+ // Check to see if it is worth the effort.
+ if (StringPool.empty()) return;
+
+ // Start the dwarf str section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfStrSection());
+
+ // Get all of the string pool entries and put them in an array by their ID so
+ // we can sort them.
+ SmallVector<std::pair<unsigned,
+ StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+
+ for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
+ I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
+ Entries.push_back(std::make_pair(I->second.second, &*I));
+
+ array_pod_sort(Entries.begin(), Entries.end());
+
+ for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+ // Emit a label for reference from debug information entries.
+ Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
+
+ // Emit the string itself.
+ Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
+ }
+}
+
+/// emitDebugLoc - Emit visible names into a debug loc section.
+///
+void DwarfDebug::emitDebugLoc() {
+ if (DotDebugLocEntries.empty())
+ return;
+
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I) {
+ DotDebugLocEntry &Entry = *I;
+ if (I + 1 != DotDebugLocEntries.end())
+ Entry.Merge(I+1);
+ }
+
+ // Start the dwarf loc section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+ unsigned char Size = Asm->getTargetData().getPointerSize();
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
+ unsigned index = 1;
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I, ++index) {
+ DotDebugLocEntry &Entry = *I;
+ if (Entry.isMerged()) continue;
+ if (Entry.isEmpty()) {
+ Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
+ } else {
+ Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
+ Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
+ DIVariable DV(Entry.Variable);
+ Asm->OutStreamer.AddComment("Loc expr size");
+ MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
+ MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
+ Asm->EmitLabelDifference(end, begin, 2);
+ Asm->OutStreamer.EmitLabel(begin);
+ if (Entry.isInt()) {
+ DIBasicType BTy(DV.getType());
+ if (BTy.Verify() &&
+ (BTy.getEncoding() == dwarf::DW_ATE_signed
+ || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
+ Asm->OutStreamer.AddComment("DW_OP_consts");
+ Asm->EmitInt8(dwarf::DW_OP_consts);
+ Asm->EmitSLEB128(Entry.getInt());
+ } else {
+ Asm->OutStreamer.AddComment("DW_OP_constu");
+ Asm->EmitInt8(dwarf::DW_OP_constu);
+ Asm->EmitULEB128(Entry.getInt());
+ }
+ } else if (Entry.isLocation()) {
+ if (!DV.hasComplexAddress())
+ // Regular entry.
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ else {
+ // Complex address entry.
+ unsigned N = DV.getNumAddrElements();
+ unsigned i = 0;
+ if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+ if (Entry.Loc.getOffset()) {
+ i = 2;
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ Asm->OutStreamer.AddComment("DW_OP_deref");
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
+ Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+ Asm->EmitSLEB128(DV.getAddrElement(1));
+ } else {
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
+ Asm->EmitDwarfRegOp(Loc);
+ i = 2;
+ }
+ } else {
+ Asm->EmitDwarfRegOp(Entry.Loc);
+ }
+
+ // Emit remaining complex address elements.
+ for (; i < N; ++i) {
+ uint64_t Element = DV.getAddrElement(i);
+ if (Element == DIBuilder::OpPlus) {
+ Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+ Asm->EmitULEB128(DV.getAddrElement(++i));
+ } else if (Element == DIBuilder::OpDeref)
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ else llvm_unreachable("unknown Opcode found in complex address");
+ }
+ }
+ }
+ // else ... ignore constant fp. There is not any good way to
+ // to represent them here in dwarf.
+ Asm->OutStreamer.EmitLabel(end);
+ }
+ }
+}
+
+/// EmitDebugARanges - Emit visible names into a debug aranges section.
+///
+void DwarfDebug::EmitDebugARanges() {
+ // Start the dwarf aranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfARangesSection());
+}
+
+/// emitDebugRanges - Emit visible names into a debug ranges section.
+///
+void DwarfDebug::emitDebugRanges() {
+ // Start the dwarf ranges section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfRangesSection());
+ unsigned char Size = Asm->getTargetData().getPointerSize();
+ for (SmallVector<const MCSymbol *, 8>::iterator
+ I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
+ I != E; ++I) {
+ if (*I)
+ Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
+ else
+ Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ }
+}
+
+/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
+///
+void DwarfDebug::emitDebugMacInfo() {
+ if (const MCSection *LineInfo =
+ Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
+ // Start the dwarf macinfo section.
+ Asm->OutStreamer.SwitchSection(LineInfo);
+ }
+}
+
+/// emitDebugInlineInfo - Emit inline info using following format.
+/// Section Header:
+/// 1. length of section
+/// 2. Dwarf version number
+/// 3. address size.
+///
+/// Entries (one "entry" for each function that was inlined):
+///
+/// 1. offset into __debug_str section for MIPS linkage name, if exists;
+/// otherwise offset into __debug_str for regular function name.
+/// 2. offset into __debug_str section for regular function name.
+/// 3. an unsigned LEB128 number indicating the number of distinct inlining
+/// instances for the function.
+///
+/// The rest of the entry consists of a {die_offset, low_pc} pair for each
+/// inlined instance; the die_offset points to the inlined_subroutine die in the
+/// __debug_info section, and the low_pc is the starting address for the
+/// inlining instance.
+void DwarfDebug::emitDebugInlineInfo() {
+ if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
+ return;
+
+ if (!FirstCU)
+ return;
+
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfDebugInlineSection());
+
+ Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1),
+ Asm->GetTempSymbol("debug_inlined_begin", 1), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1));
+
+ Asm->OutStreamer.AddComment("Dwarf Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+
+ for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+ E = InlinedSPNodes.end(); I != E; ++I) {
+
+ const MDNode *Node = *I;
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+ = InlineInfo.find(Node);
+ SmallVector<InlineInfoLabels, 4> &Labels = II->second;
+ DISubprogram SP(Node);
+ StringRef LName = SP.getLinkageName();
+ StringRef Name = SP.getName();
+
+ Asm->OutStreamer.AddComment("MIPS linkage name");
+ if (LName.empty()) {
+ Asm->OutStreamer.EmitBytes(Name, 0);
+ Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
+ } else
+ Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
+ DwarfStrSectionSym);
+
+ Asm->OutStreamer.AddComment("Function name");
+ Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+ Asm->EmitULEB128(Labels.size(), "Inline count");
+
+ for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
+ LE = Labels.end(); LI != LE; ++LI) {
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(LI->second->getOffset());
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
+ Asm->OutStreamer.EmitSymbolValue(LI->first,
+ Asm->getTargetData().getPointerSize(),0);
+ }
+ }
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
+}
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/DwarfDebug.h b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfDebug.h
index d5982a6..35653be 100644
--- a/src/LLVM/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,7 +15,9 @@
#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "DIE.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
@@ -23,12 +25,12 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
namespace llvm {
class CompileUnit;
class DbgConcreteScope;
-class DbgScope;
class DbgVariable;
class MachineFrameInfo;
class MachineModuleInfo;
@@ -39,19 +41,6 @@
class DIEBlock;
class DIEEntry;
-class DIEnumerator;
-class DIDescriptor;
-class DIVariable;
-class DIGlobal;
-class DIGlobalVariable;
-class DISubprogram;
-class DIBasicType;
-class DIDerivedType;
-class DIType;
-class DINameSpace;
-class DISubrange;
-class DICompositeType;
-
//===----------------------------------------------------------------------===//
/// SrcLineInfo - This class is used to record source line correspondence.
///
@@ -71,6 +60,126 @@
MCSymbol *getLabel() const { return Label; }
};
+/// DotDebugLocEntry - This struct describes location entries emitted in
+/// .debug_loc section.
+typedef struct DotDebugLocEntry {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ MachineLocation Loc;
+ const MDNode *Variable;
+ bool Merged;
+ bool Constant;
+ enum EntryType {
+ E_Location,
+ E_Integer,
+ E_ConstantFP,
+ E_ConstantInt
+ };
+ enum EntryType EntryKind;
+
+ union {
+ int64_t Int;
+ const ConstantFP *CFP;
+ const ConstantInt *CIP;
+ } Constants;
+ DotDebugLocEntry()
+ : Begin(0), End(0), Variable(0), Merged(false),
+ Constant(false) { Constants.Int = 0;}
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
+ const MDNode *V)
+ : Begin(B), End(E), Loc(L), Variable(V), Merged(false),
+ Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr)
+ : Begin(B), End(E), Variable(0), Merged(false),
+ Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
+
+ /// Empty entries are also used as a trigger to emit temp label. Such
+ /// labels are referenced is used to find debug_loc offset for a given DIE.
+ bool isEmpty() { return Begin == 0 && End == 0; }
+ bool isMerged() { return Merged; }
+ void Merge(DotDebugLocEntry *Next) {
+ if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+ return;
+ Next->Begin = Begin;
+ Merged = true;
+ }
+ bool isLocation() const { return EntryKind == E_Location; }
+ bool isInt() const { return EntryKind == E_Integer; }
+ bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+ bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+ int64_t getInt() { return Constants.Int; }
+ const ConstantFP *getConstantFP() { return Constants.CFP; }
+ const ConstantInt *getConstantInt() { return Constants.CIP; }
+} DotDebugLocEntry;
+
+//===----------------------------------------------------------------------===//
+/// DbgVariable - This class is used to track local variable information.
+///
+class DbgVariable {
+ DIVariable Var; // Variable Descriptor.
+ DIE *TheDIE; // Variable DIE.
+ unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
+ DbgVariable *AbsVar; // Corresponding Abstract variable, if any.
+ const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
+ int FrameIndex;
+public:
+ // AbsVar may be NULL.
+ DbgVariable(DIVariable V, DbgVariable *AV)
+ : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
+ FrameIndex(~0) {}
+
+ // Accessors.
+ DIVariable getVariable() const { return Var; }
+ void setDIE(DIE *D) { TheDIE = D; }
+ DIE *getDIE() const { return TheDIE; }
+ void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
+ unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
+ StringRef getName() const { return Var.getName(); }
+ DbgVariable *getAbstractVariable() const { return AbsVar; }
+ const MachineInstr *getMInsn() const { return MInsn; }
+ void setMInsn(const MachineInstr *M) { MInsn = M; }
+ int getFrameIndex() const { return FrameIndex; }
+ void setFrameIndex(int FI) { FrameIndex = FI; }
+ // Translate tag to proper Dwarf tag.
+ unsigned getTag() const {
+ if (Var.getTag() == dwarf::DW_TAG_arg_variable)
+ return dwarf::DW_TAG_formal_parameter;
+
+ return dwarf::DW_TAG_variable;
+ }
+ /// isArtificial - Return true if DbgVariable is artificial.
+ bool isArtificial() const {
+ if (Var.isArtificial())
+ return true;
+ if (Var.getTag() == dwarf::DW_TAG_arg_variable
+ && getType().isArtificial())
+ return true;
+ return false;
+ }
+ bool variableHasComplexAddress() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.hasComplexAddress();
+ }
+ bool isBlockByrefVariable() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.isBlockByrefVariable();
+ }
+ unsigned getNumAddrElements() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.getNumAddrElements();
+ }
+ uint64_t getAddrElement(unsigned i) const {
+ return Var.getAddrElement(i);
+ }
+ DIType getType() const;
+};
+
class DwarfDebug {
/// Asm - Target of Dwarf emission.
AsmPrinter *Asm;
@@ -83,8 +192,13 @@
//
CompileUnit *FirstCU;
+
+ /// Maps MDNode with its corresponding CompileUnit.
DenseMap <const MDNode *, CompileUnit *> CUMap;
+ /// Maps subprogram MDNode with its corresponding CompileUnit.
+ DenseMap <const MDNode *, CompileUnit *> SPMap;
+
/// AbbreviationsSet - Used to uniquely define abbreviations.
///
FoldingSet<DIEAbbrev> AbbreviationsSet;
@@ -93,36 +207,9 @@
///
std::vector<DIEAbbrev *> Abbreviations;
- /// DirectoryIdMap - Directory name to directory id map.
- ///
- StringMap<unsigned> DirectoryIdMap;
-
- /// DirectoryNames - A list of directory names.
- SmallVector<std::string, 8> DirectoryNames;
-
- /// SourceFileIdMap - Source file name to source file id map.
- ///
- StringMap<unsigned> SourceFileIdMap;
-
- /// SourceFileNames - A list of source file names.
- SmallVector<std::string, 8> SourceFileNames;
-
/// SourceIdMap - Source id map, i.e. pair of directory id and source file
/// id mapped to a unique id.
- DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap;
-
- /// SourceIds - Reverse map from source id to directory id + file id pair.
- ///
- SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds;
-
- /// Lines - List of source line correspondence.
- std::vector<SrcLineInfo> Lines;
-
- /// DIEBlocks - A list of all the DIEBlocks in use.
- std::vector<DIEBlock *> DIEBlocks;
-
- // DIEValueAllocator - All DIEValues are allocated through this allocator.
- BumpPtrAllocator DIEValueAllocator;
+ StringMap<unsigned> SourceIdMap;
/// StringPool - A String->Symbol mapping of strings used by indirect
/// references.
@@ -135,87 +222,27 @@
///
UniqueVector<const MCSection*> SectionMap;
- /// SectionSourceLines - Tracks line numbers per text section.
- ///
- std::vector<std::vector<SrcLineInfo> > SectionSourceLines;
+ /// CurrentFnArguments - List of Arguments (DbgValues) for current function.
+ SmallVector<DbgVariable *, 8> CurrentFnArguments;
- // CurrentFnDbgScope - Top level scope for the current function.
- //
- DbgScope *CurrentFnDbgScope;
-
- /// DbgScopeMap - Tracks the scopes in the current function. Owns the
- /// contained DbgScope*s.
- ///
- DenseMap<const MDNode *, DbgScope *> DbgScopeMap;
-
- /// ConcreteScopes - Tracks the concrete scopees in the current function.
- /// These scopes are also included in DbgScopeMap.
- DenseMap<const MDNode *, DbgScope *> ConcreteScopes;
-
- /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
- /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s.
- DenseMap<const MDNode *, DbgScope *> AbstractScopes;
+ LexicalScopes LScopes;
/// AbstractSPDies - Collection of abstract subprogram DIEs.
DenseMap<const MDNode *, DIE *> AbstractSPDies;
- /// AbstractScopesList - Tracks abstract scopes constructed while processing
- /// a function. This list is cleared during endFunction().
- SmallVector<DbgScope *, 4>AbstractScopesList;
+ /// ScopeVariables - Collection of dbg variables of a scope.
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
- /// AbstractVariables - Collection on abstract variables. Owned by the
- /// DbgScopes in AbstractScopes.
+ /// AbstractVariables - Collection on abstract variables.
DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
- /// DbgVariableToFrameIndexMap - Tracks frame index used to find
- /// variable's value.
- DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap;
-
- /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE
- /// machine instruction.
- DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
-
- /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol.
- DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap;
-
- /// DotDebugLocEntry - This struct describes location entries emitted in
- /// .debug_loc section.
- typedef struct DotDebugLocEntry {
- const MCSymbol *Begin;
- const MCSymbol *End;
- MachineLocation Loc;
- DotDebugLocEntry() : Begin(0), End(0) {}
- DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
- MachineLocation &L) : Begin(B), End(E), Loc(L) {}
- /// Empty entries are also used as a trigger to emit temp label. Such
- /// labels are referenced is used to find debug_loc offset for a given DIE.
- bool isEmpty() { return Begin == 0 && End == 0; }
- } DotDebugLocEntry;
-
/// DotDebugLocEntries - Collection of DotDebugLocEntry.
SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
- /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set
- /// idetifies corresponding .debug_loc entry offset.
- SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry;
-
- /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract
- /// DbgVariable, if any.
- DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap;
-
/// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
/// (at the end of the module) as DW_AT_inline.
SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
- /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
- /// need DW_AT_containing_type attribute. This attribute points to a DIE that
- /// corresponds to the MDNode mapped with the subprogram DIE.
- DenseMap<DIE *, const MDNode *> ContainingTypeMap;
-
- typedef SmallVector<DbgScope *, 2> ScopeVector;
-
- SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet;
-
/// InlineInfo - Keep track of inlined functions and their location. This
/// information is used to populate debug_inlined section.
typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
@@ -234,9 +261,16 @@
/// instruction.
DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
- /// insnNeedsLabel - Collection of instructions that need a label to mark
- /// a debuggging information entity.
- SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel;
+ /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction
+ /// in order of appearance.
+ SmallVector<const MDNode*, 8> UserVariables;
+
+ /// DbgValues - For each user variable, keep a list of DBG_VALUE
+ /// instructions in order. The list can also contain normal instructions that
+ /// clobber the previous DBG_VALUE.
+ typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
+ DbgValueHistoryMap;
+ DbgValueHistoryMap DbgValues;
SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
@@ -245,6 +279,10 @@
DebugLoc PrevInstLoc;
MCSymbol *PrevLabel;
+ /// PrologEndLoc - This location indicates end of function prologue and
+ /// beginning of function body.
+ DebugLoc PrologEndLoc;
+
struct FunctionDebugFrameInfo {
unsigned Number;
std::vector<MachineMove> Moves;
@@ -255,179 +293,24 @@
std::vector<FunctionDebugFrameInfo> DebugFrames;
+ // DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
// Section Symbols: these are assembler temporary labels that are emitted at
// the beginning of each supported dwarf section. These are used to form
// section offsets and are created by EmitSectionLabels.
- MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
+ MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
MCSymbol *DwarfDebugLocSectionSym;
- MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
- DIEInteger *DIEIntegerOne;
private:
-
- /// getSourceDirectoryAndFileIds - Return the directory and file ids that
- /// maps to the source id. Source id starts at 1.
- std::pair<unsigned, unsigned>
- getSourceDirectoryAndFileIds(unsigned SId) const {
- return SourceIds[SId-1];
- }
-
- /// getNumSourceDirectories - Return the number of source directories in the
- /// debug info.
- unsigned getNumSourceDirectories() const {
- return DirectoryNames.size();
- }
-
- /// getSourceDirectoryName - Return the name of the directory corresponding
- /// to the id.
- const std::string &getSourceDirectoryName(unsigned Id) const {
- return DirectoryNames[Id - 1];
- }
-
- /// getSourceFileName - Return the name of the source file corresponding
- /// to the id.
- const std::string &getSourceFileName(unsigned Id) const {
- return SourceFileNames[Id - 1];
- }
-
- /// getNumSourceIds - Return the number of unique source ids.
- unsigned getNumSourceIds() const {
- return SourceIds.size();
- }
/// assignAbbrevNumber - Define a unique number for the abbreviation.
///
void assignAbbrevNumber(DIEAbbrev &Abbrev);
- /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
- /// information entry.
- DIEEntry *createDIEEntry(DIE *Entry);
-
- /// addUInt - Add an unsigned integer attribute data and value.
- ///
- void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
-
- /// addSInt - Add an signed integer attribute data and value.
- ///
- void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
-
- /// addString - Add a string attribute data and value.
- ///
- void addString(DIE *Die, unsigned Attribute, unsigned Form,
- const StringRef Str);
-
- /// addLabel - Add a Dwarf label attribute data and value.
- ///
- void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
- const MCSymbol *Label);
-
- /// addDelta - Add a label delta attribute data and value.
- ///
- void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
- const MCSymbol *Hi, const MCSymbol *Lo);
-
- /// addDIEEntry - Add a DIE attribute data and value.
- ///
- void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
-
- /// addBlock - Add block data.
- ///
- void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
-
- /// addSourceLine - Add location information to specified debug information
- /// entry.
- void addSourceLine(DIE *Die, DIVariable V);
- void addSourceLine(DIE *Die, DIGlobalVariable G);
- void addSourceLine(DIE *Die, DISubprogram SP);
- void addSourceLine(DIE *Die, DIType Ty);
- void addSourceLine(DIE *Die, DINameSpace NS);
-
- /// addAddress - Add an address attribute to a die based on the location
- /// provided.
- void addAddress(DIE *Die, unsigned Attribute,
- const MachineLocation &Location);
-
- /// addRegisterAddress - Add register location entry in variable DIE.
- bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
-
- /// addConstantValue - Add constant value entry in variable DIE.
- bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
-
- /// addConstantFPValue - Add constant value entry in variable DIE.
- bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
-
- /// addComplexAddress - Start with the address based on the location provided,
- /// and generate the DWARF information necessary to find the actual variable
- /// (navigating the extra location information encoded in the type) based on
- /// the starting location. Add the DWARF information to the die.
- ///
- void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
- const MachineLocation &Location);
-
- // FIXME: Should be reformulated in terms of addComplexAddress.
- /// addBlockByrefAddress - Start with the address based on the location
- /// provided, and generate the DWARF information necessary to find the
- /// actual Block variable (navigating the Block struct) based on the
- /// starting location. Add the DWARF information to the die. Obsolete,
- /// please use addComplexAddress instead.
- ///
- void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
- const MachineLocation &Location);
-
- /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable.
- void addVariableAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
- const MachineLocation &Location);
-
- /// addToContextOwner - Add Die into the list of its context owner's children.
- void addToContextOwner(DIE *Die, DIDescriptor Context);
-
- /// addType - Add a new type attribute to the specified entity.
- void addType(DIE *Entity, DIType Ty);
-
-
- /// getOrCreateNameSpace - Create a DIE for DINameSpace.
- DIE *getOrCreateNameSpace(DINameSpace NS);
-
- /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
- /// given DIType.
- DIE *getOrCreateTypeDIE(DIType Ty);
-
- void addPubTypes(DISubprogram SP);
-
- /// constructTypeDIE - Construct basic type die from DIBasicType.
- void constructTypeDIE(DIE &Buffer,
- DIBasicType BTy);
-
- /// constructTypeDIE - Construct derived type die from DIDerivedType.
- void constructTypeDIE(DIE &Buffer,
- DIDerivedType DTy);
-
- /// constructTypeDIE - Construct type DIE from DICompositeType.
- void constructTypeDIE(DIE &Buffer,
- DICompositeType CTy);
-
- /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
- void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
-
- /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
- void constructArrayTypeDIE(DIE &Buffer,
- DICompositeType *CTy);
-
- /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
- DIE *constructEnumTypeDIE(DIEnumerator ETy);
-
- /// createMemberDIE - Create new member DIE.
- DIE *createMemberDIE(DIDerivedType DT);
-
- /// createSubprogramDIE - Create new DIE using SP.
- DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false);
-
- /// getOrCreateDbgScope - Create DbgScope for the scope.
- DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
-
- DbgScope *getOrCreateAbstractScope(const MDNode *N);
+ void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
/// findAbstractVariable - Find abstract variable associated with Var.
DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
@@ -436,22 +319,22 @@
/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
/// If there are global variables in this scope then create and insert
/// DIEs for these variables.
- DIE *updateSubprogramScopeDIE(const MDNode *SPNode);
+ DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode);
/// constructLexicalScope - Construct new DW_TAG_lexical_block
/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
- DIE *constructLexicalScopeDIE(DbgScope *Scope);
+ DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
/// constructInlinedScopeDIE - This scope represents inlined body of
/// a function. Construct DIE to represent this concrete inlined copy
/// of the function.
- DIE *constructInlinedScopeDIE(DbgScope *Scope);
+ DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
- DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S);
+ DIE *constructVariableDIE(DbgVariable *DV, LexicalScope *S);
/// constructScopeDIE - Construct a DIE for this scope.
- DIE *constructScopeDIE(DbgScope *Scope);
+ DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
/// EmitSectionLabels - Emit initial Dwarf sections with a label at
/// the start of each one.
@@ -482,18 +365,6 @@
///
void emitEndOfLineMatrix(unsigned SectionEnd);
- /// emitDebugLines - Emit source line information.
- ///
- void emitDebugLines();
-
- /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
- ///
- void emitCommonDebugFrame();
-
- /// emitFunctionDebugFrame - Emit per function frame info into a debug frame
- /// section.
- void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
-
/// emitDebugPubNames - Emit visible names into a debug pubnames section.
///
void emitDebugPubNames();
@@ -542,59 +413,29 @@
/// inlining instance.
void emitDebugInlineInfo();
- /// GetOrCreateSourceID - Look up the source id with the given directory and
- /// source file names. If none currently exists, create a new id and insert it
- /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
- /// maps as well.
- unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
-
/// constructCompileUnit - Create new CompileUnit for the given
/// metadata node with tag DW_TAG_compile_unit.
- void constructCompileUnit(const MDNode *N);
-
- /// getCompielUnit - Get CompileUnit DIE.
- CompileUnit *getCompileUnit(const MDNode *N) const;
-
- /// constructGlobalVariableDIE - Construct global variable DIE.
- void constructGlobalVariableDIE(const MDNode *N);
+ CompileUnit *constructCompileUnit(const MDNode *N);
/// construct SubprogramDIE - Construct subprogram DIE.
- void constructSubprogramDIE(const MDNode *N);
+ void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
- MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
+ void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
+ unsigned Flags);
- /// getSourceLineCount - Return the number of source lines in the debug
- /// info.
- unsigned getSourceLineCount() const {
- return Lines.size();
- }
-
- /// recordVariableFrameIndex - Record a variable's index.
- void recordVariableFrameIndex(const DbgVariable *V, int Index);
-
- /// findVariableFrameIndex - Return true if frame index for the variable
- /// is found. Update FI to hold value of the index.
- bool findVariableFrameIndex(const DbgVariable *V, int *FI);
-
- /// findVariableLabel - Find MCSymbol for the variable.
- const MCSymbol *findVariableLabel(const DbgVariable *V);
-
- /// findDbgScope - Find DbgScope for the debug loc attached with an
- /// instruction.
- DbgScope *findDbgScope(const MachineInstr *MI);
-
/// identifyScopeMarkers() - Indentify instructions that are marking
/// beginning of or end of a scope.
void identifyScopeMarkers();
- /// extractScopeInformation - Scan machine instructions in this function
- /// and collect DbgScopes. Return true, if atleast one scope was found.
- bool extractScopeInformation();
-
- /// collectVariableInfo - Populate DbgScope entries with variables' info.
+ /// addCurrentFnArgument - If Var is an current function argument that add
+ /// it in CurrentFnArguments list.
+ bool addCurrentFnArgument(const MachineFunction *MF,
+ DbgVariable *Var, LexicalScope *Scope);
+
+ /// collectVariableInfo - Populate LexicalScope entries with variables' info.
void collectVariableInfo(const MachineFunction *,
SmallPtrSet<const MDNode *, 16> &ProcessedVars);
@@ -602,6 +443,23 @@
/// side table maintained by MMI.
void collectVariableInfoFromMMITable(const MachineFunction * MF,
SmallPtrSet<const MDNode *, 16> &P);
+
+ /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI.
+ void requestLabelBeforeInsn(const MachineInstr *MI) {
+ LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+ }
+
+ /// getLabelBeforeInsn - Return Label preceding the instruction.
+ const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+ /// requestLabelAfterInsn - Ensure that a label will be emitted after MI.
+ void requestLabelAfterInsn(const MachineInstr *MI) {
+ LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+ }
+
+ /// getLabelAfterInsn - Return Label immediately following the instruction.
+ const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -609,6 +467,14 @@
DwarfDebug(AsmPrinter *A, Module *M);
~DwarfDebug();
+ /// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
+ /// as llvm.dbg.enum and llvm.dbg.ty
+ void collectInfoFromNamedMDNodes(Module *M);
+
+ /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
+ /// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
+ bool collectLegacyDebugInfo(Module *M);
+
/// beginModule - Emit all Dwarf sections that should come prior to the
/// content.
void beginModule(Module *M);
@@ -625,17 +491,19 @@
///
void endFunction(const MachineFunction *MF);
- /// getLabelBeforeInsn - Return Label preceding the instruction.
- const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+ /// beginInstruction - Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI);
- /// getLabelAfterInsn - Return Label immediately following the instruction.
- const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+ /// endInstruction - Prcess end of an instruction.
+ void endInstruction(const MachineInstr *MI);
- /// beginScope - Process beginning of a scope.
- void beginScope(const MachineInstr *MI);
+ /// GetOrCreateSourceID - Look up the source id with the given directory and
+ /// source file names. If none currently exists, create a new id and insert it
+ /// in the SourceIds map.
+ unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
- /// endScope - Prcess end of a scope.
- void endScope(const MachineInstr *MI);
+ /// createSubprogramDIE - Create new DIE using SP.
+ DIE *createSubprogramDIE(DISubprogram SP);
};
} // End of namespace llvm
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/DwarfException.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfException.cpp
new file mode 100644
index 0000000..18b726b
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -0,0 +1,735 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfException::DwarfException(AsmPrinter *A)
+ : Asm(A), MMI(Asm->MMI) {}
+
+DwarfException::~DwarfException() {}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+/// PadLT - Order landing pads lexicographically by type id.
+bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+/// ComputeActionsTable - Compute the actions table and gather the first action
+/// index for each landing pad site.
+unsigned DwarfException::
+ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions) {
+
+ // The action table follows the call-site table in the LSDA. The individual
+ // records are of two types:
+ //
+ // * Catch clause
+ // * Exception specification
+ //
+ // The two record kinds have the same format, with only small differences.
+ // They are distinguished by the "switch value" field: Catch clauses
+ // (TypeInfos) have strictly positive switch values, and exception
+ // specifications (FilterIds) have strictly negative switch values. Value 0
+ // indicates a catch-all clause.
+ //
+ // Negative type IDs index into FilterIds. Positive type IDs index into
+ // TypeInfos. The value written for a positive type ID is just the type ID
+ // itself. For a negative type ID, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type ID (because the FilterIds entries are
+ // written using a variable width encoding, which outputs one byte per entry
+ // as long as the value written is not too large) but can differ. This kind
+ // of complication does not occur for positive type IDs because type infos are
+ // output using a fixed width encoding. FilterOffsets[i] holds the byte
+ // offset corresponding to FilterIds[i].
+
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= MCAsmInfo::getULEB128Size(*I);
+ }
+
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ const LandingPadInfo *PrevLPI = 0;
+
+ for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+ I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+ const LandingPadInfo *LPI = *I;
+ const std::vector<int> &TypeIds = LPI->TypeIds;
+ unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ unsigned PrevAction = (unsigned)-1;
+
+ if (NumShared) {
+ unsigned SizePrevIds = PrevLPI->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = Actions.size() - 1;
+ SizeAction =
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
+ SizeAction -=
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeAction += -Actions[PrevAction].NextAction;
+ PrevAction = Actions[PrevAction].Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+ int TypeID = TypeIds[J];
+ assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
+ Actions.push_back(Action);
+ PrevAction = Actions.size() - 1;
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ // Information used when created the call-site table. The action record
+ // field of the call site record is the offset of the first associated
+ // action record, relative to the start of the actions table. This value is
+ // biased by 1 (1 indicating the start of the actions table), and 0
+ // indicates that there are no actions.
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+
+ PrevLPI = LPI;
+ }
+
+ return SizeActions;
+}
+
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+ assert(MI->getDesc().isCall() && "This should be a call instruction!");
+
+ bool MarkedNoUnwind = false;
+ bool SawFunc = false;
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+
+ if (!MO.isGlobal()) continue;
+
+ const Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (F == 0) continue;
+
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
+ }
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
+ }
+
+ return MarkedNoUnwind;
+}
+
+/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke
+/// has a try-range containing the call, a non-zero landing pad, and an
+/// appropriate action. The entry for an ordinary call has a try-range
+/// containing the call and zero for the landing pad and the action. Calls
+/// marked 'nounwind' have no entry and must not be contained in the try-range
+/// of any entry - they form gaps in the table. Entries must be ordered by
+/// try-range address.
+void DwarfException::
+ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ // The end label of the previous invoke or nounwind try-range.
+ MCSymbol *LastLabel = 0;
+
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+
+ // Whether the last CallSite entry was for an invoke.
+ bool PreviousIsInvoke = false;
+
+ // Visit all instructions in order of address.
+ for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ if (MI->getDesc().isCall())
+ SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+ continue;
+ }
+
+ // End of the previous try-range?
+ MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+
+ // Beginning of a new try-range?
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+
+ const PadRange &P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // For Dwarf exception handling (SjLj handling doesn't use this). If some
+ // instruction between the previous try-range and this one may throw,
+ // create a call-site entry with no landing pad for the region between the
+ // try-ranges.
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
+ CallSites.push_back(Site);
+ PreviousIsInvoke = false;
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+ if (!LandingPad->LandingPadLabel) {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ } else {
+ // This try-range is for an invoke.
+ CallSiteEntry Site = {
+ BeginLabel,
+ LastLabel,
+ LandingPad->LandingPadLabel,
+ FirstActions[P.PadIndex]
+ };
+
+ // Try to merge with the previous call-site. SJLJ doesn't do this
+ if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ if (Asm->MAI->isExceptionHandlingDwarf())
+ CallSites.push_back(Site);
+ else {
+ // SjLj EH must maintain the call sites in the order assigned
+ // to them by the SjLjPrepare pass.
+ unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+ if (CallSites.size() < SiteNo)
+ CallSites.resize(SiteNo);
+ CallSites[SiteNo - 1] = Site;
+ }
+ PreviousIsInvoke = true;
+ }
+ }
+ }
+
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ CallSiteEntry Site = { LastLabel, 0, 0, 0 };
+ CallSites.push_back(Site);
+ }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy. First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+/// 1. The landing pad site information describes the range of code covered by
+/// the try. In our case it's an accumulation of the ranges covered by the
+/// invokes in the try. There is also a reference to the landing pad that
+/// handles the exception once processed. Finally an index into the actions
+/// table.
+/// 2. The action table, in our case, is composed of pairs of type IDs and next
+/// action offset. Starting with the action index from the landing pad
+/// site, each type ID is checked for a match to the current exception. If
+/// it matches then the exception and type id are passed on to the landing
+/// pad. Otherwise the next action is looked up. This chain is terminated
+/// with a next action of zero. If no type id is found then the frame is
+/// unwound and handling continues.
+/// 3. Type ID table contains references to all the C++ typeinfo for all
+/// catches in the function. This tables is reverse indexed base 1.
+void DwarfException::EmitExceptionTable() {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
+
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced when using DWARF exception handling.
+ RangeMapType PadMap;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ // Compute the call-site table.
+ SmallVector<CallSiteEntry, 64> CallSites;
+ ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+
+ // Final tallies.
+
+ // Call sites.
+ bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+ bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+
+ unsigned CallSiteTableLength;
+ if (IsSJLJ)
+ CallSiteTableLength = 0;
+ else {
+ unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
+ CallSiteTableLength =
+ CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
+ }
+
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+ if (IsSJLJ)
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
+ }
+
+ // Type infos.
+ const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+ unsigned TTypeEncoding;
+ unsigned TypeFormatSize;
+
+ if (!HaveTTData) {
+ // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+ // that we're omitting that bit.
+ TTypeEncoding = dwarf::DW_EH_PE_omit;
+ // dwarf::DW_EH_PE_absptr
+ TypeFormatSize = Asm->getTargetData().getPointerSize();
+ } else {
+ // Okay, we have actual filters or typeinfos to emit. As such, we need to
+ // pick a type encoding for them. We're about to emit a list of pointers to
+ // typeinfo objects at the end of the LSDA. However, unless we're in static
+ // mode, this reference will require a relocation by the dynamic linker.
+ //
+ // Because of this, we have a couple of options:
+ //
+ // 1) If we are in -static mode, we can always use an absolute reference
+ // from the LSDA, because the static linker will resolve it.
+ //
+ // 2) Otherwise, if the LSDA section is writable, we can output the direct
+ // reference to the typeinfo and allow the dynamic linker to relocate
+ // it. Since it is in a writable section, the dynamic linker won't
+ // have a problem.
+ //
+ // 3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+ // we need to use some form of indirection. For example, on Darwin,
+ // we can output a statically-relocatable reference to a dyld stub. The
+ // offset to the stub is constant, but the contents are in a section
+ // that is updated by the dynamic linker. This is easy enough, but we
+ // need to tell the personality function of the unwinder to indirect
+ // through the dyld stub.
+ //
+ // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+ // somewhere. This predicate should be moved to a shared location that is
+ // in target-independent code.
+ //
+ TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+ TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
+ }
+
+ // Begin the exception table.
+ // Sometimes we want not to emit the data into separate section (e.g. ARM
+ // EHABI). In this case LSDASection will be NULL.
+ if (LSDASection)
+ Asm->OutStreamer.SwitchSection(LSDASection);
+ Asm->EmitAlignment(2);
+
+ // Emit the LSDA.
+ MCSymbol *GCCETSym =
+ Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
+ Twine(Asm->getFunctionNumber()));
+ Asm->OutStreamer.EmitLabel(GCCETSym);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()));
+
+ if (IsSJLJ)
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
+ Asm->getFunctionNumber()));
+
+ // Emit the LSDA header.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ Asm->EmitEncodingByte(TTypeEncoding, "@TType");
+
+ // The type infos need to be aligned. GCC does this by inserting padding just
+ // before the type infos. However, this changes the size of the exception
+ // table, so you need to take this into account when you output the exception
+ // table size. However, the size is output using a variable length encoding.
+ // So by increasing the size by inserting padding, you may increase the number
+ // of bytes used for writing the size. If it increases, say by one byte, then
+ // you now need to output one less byte of padding to get the type infos
+ // aligned. However this decreases the size of the exception table. This
+ // changes the value you have to output for the exception table size. Due to
+ // the variable length encoding, the number of bytes used for writing the
+ // length may decrease. If so, you then have to increase the amount of
+ // padding. And so on. If you look carefully at the GCC code you will see that
+ // it indeed does this in a loop, going on and on until the values stabilize.
+ // We chose another solution: don't output padding inside the table like GCC
+ // does, instead output it before the table.
+ unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+ unsigned CallSiteTableLengthSize =
+ MCAsmInfo::getULEB128Size(CallSiteTableLength);
+ unsigned TTypeBaseOffset =
+ sizeof(int8_t) + // Call site format
+ CallSiteTableLengthSize + // Call site table length size
+ CallSiteTableLength + // Call site table length
+ SizeActions + // Actions size
+ SizeTypes;
+ unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+ unsigned TotalSize =
+ sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size
+ TTypeBaseOffset; // TType base offset
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ if (HaveTTData) {
+ // Account for any extra padding that will be added to the call site table
+ // length.
+ Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+ SizeAlign = 0;
+ }
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ // SjLj Exception handling
+ if (IsSJLJ) {
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ // Emit the landing pad site information.
+ unsigned idx = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+ const CallSiteEntry &S = *I;
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (VerboseAsm) {
+ Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+ llvm::utostr(idx) + " <<");
+ Asm->OutStreamer.AddComment(Twine(" On exception at call site ") +
+ llvm::utostr(idx));
+ }
+ Asm->EmitULEB128(idx);
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer.AddComment(" Action: cleanup");
+ else
+ Asm->OutStreamer.AddComment(Twine(" Action: ") +
+ llvm::utostr((S.Action - 1) / 2 + 1));
+ }
+ Asm->EmitULEB128(S.Action);
+ }
+ } else {
+ // DWARF Exception handling
+ assert(Asm->MAI->isExceptionHandlingDwarf());
+
+ // The call-site table is a list of all call sites that may throw an
+ // exception (including C++ 'throw' statements) in the procedure
+ // fragment. It immediately follows the LSDA header. Each entry indicates,
+ // for a given call, the first corresponding action record and corresponding
+ // landing pad.
+ //
+ // The table begins with the number of bytes, stored as an LEB128
+ // compressed, unsigned integer. The records immediately follow the record
+ // count. They are sorted in increasing call-site address. Each record
+ // indicates:
+ //
+ // * The position of the call-site.
+ // * The position of the landing pad.
+ // * The first action record for that call site.
+ //
+ // A missing entry in the call-site table indicates that a call is not
+ // supposed to throw.
+
+ // Emit the landing pad call site table.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ unsigned Entry = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+ const CallSiteEntry &S = *I;
+
+ MCSymbol *EHFuncBeginSym =
+ Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+
+ MCSymbol *BeginLabel = S.BeginLabel;
+ if (BeginLabel == 0)
+ BeginLabel = EHFuncBeginSym;
+ MCSymbol *EndLabel = S.EndLabel;
+ if (EndLabel == 0)
+ EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+
+
+ // Offset of the call site relative to the previous call site, counted in
+ // number of 16-byte bundles. The first call site is counted relative to
+ // the start of the procedure fragment.
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+ llvm::utostr(++Entry) + " <<");
+ Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(" Call between ") +
+ BeginLabel->getName() + " and " +
+ EndLabel->getName());
+ Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (!S.PadLabel) {
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(" has no landing pad");
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ } else {
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine(" jumps to ") +
+ S.PadLabel->getName());
+ Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+ }
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer.AddComment(" On action: cleanup");
+ else
+ Asm->OutStreamer.AddComment(Twine(" On action: ") +
+ llvm::utostr((S.Action - 1) / 2 + 1));
+ }
+ Asm->EmitULEB128(S.Action);
+ }
+ }
+
+ // Emit the Action Table.
+ int Entry = 0;
+ for (SmallVectorImpl<ActionEntry>::const_iterator
+ I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+ const ActionEntry &Action = *I;
+
+ if (VerboseAsm) {
+ // Emit comments that decode the action table.
+ Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
+ llvm::utostr(++Entry) + " <<");
+ }
+
+ // Type Filter
+ //
+ // Used by the runtime to match the type of the thrown exception to the
+ // type of the catch clauses or the types in the exception specification.
+ if (VerboseAsm) {
+ if (Action.ValueForTypeID > 0)
+ Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") +
+ llvm::itostr(Action.ValueForTypeID));
+ else if (Action.ValueForTypeID < 0)
+ Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") +
+ llvm::itostr(Action.ValueForTypeID));
+ else
+ Asm->OutStreamer.AddComment(" Cleanup");
+ }
+ Asm->EmitSLEB128(Action.ValueForTypeID);
+
+ // Action Record
+ //
+ // Self-relative signed displacement in bytes of the next action record,
+ // or 0 if there is no next action record.
+ if (VerboseAsm) {
+ if (Action.NextAction == 0) {
+ Asm->OutStreamer.AddComment(" No further actions");
+ } else {
+ unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
+ Asm->OutStreamer.AddComment(Twine(" Continue to action ") +
+ llvm::utostr(NextAction));
+ }
+ }
+ Asm->EmitSLEB128(Action.NextAction);
+ }
+
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+ const GlobalVariable *GV = *I;
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
+ if (GV)
+ Asm->EmitReference(GV, TTypeEncoding);
+ else
+ Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding),
+ 0);
+ }
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (TypeID != 0)
+ Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+ }
+
+ Asm->EmitULEB128(TypeID);
+ }
+
+ Asm->EmitAlignment(2);
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfException::EndModule() {
+ assert(0 && "Should be implemented");
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfException::BeginFunction(const MachineFunction *MF) {
+ assert(0 && "Should be implemented");
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfException::EndFunction() {
+ assert(0 && "Should be implemented");
+}
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/DwarfException.h b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfException.h
index 94a305a..b5f86ab 100644
--- a/src/LLVM/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
#include <vector>
namespace llvm {
@@ -35,59 +36,32 @@
/// DwarfException - Emits Dwarf exception handling directives.
///
class DwarfException {
+protected:
/// Asm - Target of Dwarf emission.
AsmPrinter *Asm;
/// MMI - Collected machine module information.
MachineModuleInfo *MMI;
- struct FunctionEHFrameInfo {
- MCSymbol *FunctionEHSym; // L_foo.eh
- unsigned Number;
- unsigned PersonalityIndex;
- bool adjustsStack;
- bool hasLandingPads;
- std::vector<MachineMove> Moves;
- const Function *function;
-
- FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
- bool hC, bool hL,
- const std::vector<MachineMove> &M,
- const Function *f):
- FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
- adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
- };
-
- std::vector<FunctionEHFrameInfo> EHFrames;
-
- /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
- /// uses an LSDA. If so, then we need to encode that information in the CIE's
- /// augmentation.
- DenseMap<unsigned, bool> UsesLSDA;
-
- /// shouldEmitTable - Per-function flag to indicate if EH tables should
- /// be emitted.
- bool shouldEmitTable;
-
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
- bool shouldEmitMoves;
-
- /// shouldEmitTableModule - Per-module flag to indicate if EH tables
- /// should be emitted.
- bool shouldEmitTableModule;
-
- /// shouldEmitFrameModule - Per-module flag to indicate if frame moves
- /// should be emitted.
- bool shouldEmitMovesModule;
-
- /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
- /// that is shared among many Frame Description Entries. There is at least
- /// one CIE in every non-empty .debug_frame section.
- void EmitCIE(const Function *Personality, unsigned Index);
-
- /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
- void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
+ /// EmitExceptionTable - Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
/// SharedTypeIds - How many leading type ids two landing pads have in common.
static unsigned SharedTypeIds(const LandingPadInfo *L,
@@ -152,18 +126,115 @@
// Main entry points.
//
DwarfException(AsmPrinter *A);
- ~DwarfException();
+ virtual ~DwarfException();
/// EndModule - Emit all exception information that should come after the
/// content.
- void EndModule();
+ virtual void EndModule();
/// BeginFunction - Gather pre-function exception information. Assumes being
/// emitted immediately after the function entry point.
- void BeginFunction(const MachineFunction *MF);
+ virtual void BeginFunction(const MachineFunction *MF);
/// EndFunction - Gather and emit post-function exception information.
- void EndFunction();
+ virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+ /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
+ /// should be emitted.
+ bool shouldEmitPersonality;
+
+ /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
+ /// should be emitted.
+ bool shouldEmitLSDA;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ AsmPrinter::CFIMoveType moveTypeModule;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfCFIException(AsmPrinter *A);
+ virtual ~DwarfCFIException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class ARMException : public DwarfException {
+ /// shouldEmitTable - Per-function flag to indicate if EH tables should
+ /// be emitted.
+ bool shouldEmitTable;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+ /// should be emitted.
+ bool shouldEmitTableModule;
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ ARMException(AsmPrinter *A);
+ virtual ~ARMException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class Win64Exception : public DwarfException {
+ /// shouldEmitPersonality - Per-function flag to indicate if personality
+ /// info should be emitted.
+ bool shouldEmitPersonality;
+
+ /// shouldEmitLSDA - Per-function flag to indicate if the LSDA
+ /// should be emitted.
+ bool shouldEmitLSDA;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ Win64Exception(AsmPrinter *A);
+ virtual ~Win64Exception();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
};
} // End of namespace llvm
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/INSTALL.vcxproj b/src/LLVM/lib/CodeGen/AsmPrinter/INSTALL.vcxproj
new file mode 100644
index 0000000..1849fa5
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/INSTALL.vcxproj
@@ -0,0 +1,261 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="MinSizeRel|Win32">
+ <Configuration>MinSizeRel</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="RelWithDebInfo|Win32">
+ <Configuration>RelWithDebInfo</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID>
+ <Keyword>Win32Proj</Keyword>
+ <Platform>Win32</Platform>
+ <ProjectName>INSTALL</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\INSTALL_force.rule">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeLists.txt">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj">
+ <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/INSTALL.vcxproj.filters b/src/LLVM/lib/CodeGen/AsmPrinter/INSTALL.vcxproj.filters
new file mode 100644
index 0000000..251dd1d
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\INSTALL_force.rule">
+ <Filter>CMake Rules</Filter>
+ </CustomBuild>
+ <CustomBuild Include="CMakeLists.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <Filter Include="CMake Rules">
+ <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+</Project>
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/LLVMAsmPrinter.vcxproj b/src/LLVM/lib/CodeGen/AsmPrinter/LLVMAsmPrinter.vcxproj
new file mode 100644
index 0000000..84d509a
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/LLVMAsmPrinter.vcxproj
@@ -0,0 +1,382 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Profile|Win32">
+ <Configuration>Profile</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Profile|x64">
+ <Configuration>Profile</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGUID>{CB52073C-DE5B-4EC6-9FB3-96C58DD3B9FF}</ProjectGUID>
+ <Keyword>Win32Proj</Keyword>
+ <Platform>Win32</Platform>
+ <ProjectName>LLVMAsmPrinter</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)</TargetName>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.lib</TargetExt>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</IntDir>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(ProjectName)</TargetName>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">.lib</TargetExt>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>
+ </ExceptionHandling>
+ <InlineFunctionExpansion>Disabled</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Disabled</Optimization>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Debug</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Debug/LLVMAsmPrinter.pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>
+ </ExceptionHandling>
+ <InlineFunctionExpansion>Disabled</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Disabled</Optimization>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Debug</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Debug/LLVMAsmPrinter.pdb</ProgramDataBaseFileName>
+ <OmitFramePointers>false</OmitFramePointers>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>
+ </DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Release/LLVMAsmPrinter.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Release/LLVMAsmPrinter.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>false</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>
+ </DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Release/LLVMAsmPrinter.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>
+ </DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Release/LLVMAsmPrinter.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>false</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="ARMException.cpp" />
+ <ClCompile Include="AsmPrinter.cpp" />
+ <ClCompile Include="AsmPrinterDwarf.cpp" />
+ <ClCompile Include="AsmPrinterInlineAsm.cpp" />
+ <ClCompile Include="DIE.cpp" />
+ <ClCompile Include="DwarfCFIException.cpp" />
+ <ClCompile Include="DwarfCompileUnit.cpp" />
+ <ClCompile Include="DwarfDebug.cpp" />
+ <ClCompile Include="DwarfException.cpp" />
+ <ClCompile Include="OcamlGCPrinter.cpp" />
+ <ClCompile Include="Win64Exception.cpp" />
+ <ClInclude Include="DIE.h" />
+ <ClInclude Include="DwarfCompileUnit.h" />
+ <ClInclude Include="DwarfDebug.h" />
+ <ClInclude Include="DwarfException.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\..\Analysis/LLVMAnalysis.vcxproj">
+ <Project>97EDF19C-6360-4770-9255-EBA2F1A13E9B</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\LLVMCodeGen.vcxproj">
+ <Project>7BE5F26F-0525-42BB-AAED-56C5B4582B99</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\VMCore/LLVMCore.vcxproj">
+ <Project>00F3295C-F7A0-43D3-BD0B-1BC0515B30E1</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\MC/LLVMMC.vcxproj">
+ <Project>8AB70E5D-2814-4682-AF9F-3062758BAEAB</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\MC/MCParser/LLVMMCParser.vcxproj">
+ <Project>0557A55A-8920-464E-A8C0-BEFCFA17E0E1</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\Support/LLVMSupport.vcxproj">
+ <Project>C688DD59-C6CB-4B33-B56F-A7D6F3761524</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\Target/LLVMTarget.vcxproj">
+ <Project>76F7B8C2-C825-40DC-BB68-9D987275E320</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\..\include/llvm/intrinsics_gen.vcxproj">
+ <Project>E9B87B46-1EB0-4D95-9049-41B148FBADCD</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/LLVMAsmPrinter.vcxproj.filters b/src/LLVM/lib/CodeGen/AsmPrinter/LLVMAsmPrinter.vcxproj.filters
new file mode 100644
index 0000000..acde067
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/LLVMAsmPrinter.vcxproj.filters
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <ClCompile Include="ARMException.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="AsmPrinter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="AsmPrinterDwarf.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="AsmPrinterInlineAsm.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="DIE.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="DwarfCFIException.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="DwarfCompileUnit.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="DwarfDebug.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="DwarfException.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="OcamlGCPrinter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="Win64Exception.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="DIE.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="DwarfCompileUnit.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="DwarfDebug.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="DwarfException.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{1733179C-6FE4-462E-9EA5-4A29A1ACFE25}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{CFA0CD99-0550-4E94-A4D9-080C3F5D695C}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 0000000..1153817
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+ class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(AsmPrinter &AP);
+ void finishAssembly(AsmPrinter &AP);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() { }
+
+static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
+ const std::string &MId = M.getModuleIdentifier();
+
+ std::string SymName;
+ SymName += "caml";
+ size_t Letter = SymName.size();
+ SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+ SymName += "__";
+ SymName += Id;
+
+ // Capitalize the first letter of the module name.
+ SymName[Letter] = toupper(SymName[Letter]);
+
+ SmallString<128> TmpStr;
+ AP.Mang->getNameWithPrefix(TmpStr, SymName);
+
+ MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
+
+ AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ AP.OutStreamer.EmitLabel(Sym);
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), AP, "code_begin");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+/// extern "C" struct align(sizeof(intptr_t)) {
+/// uint16_t NumDescriptors;
+/// struct align(sizeof(intptr_t)) {
+/// void *ReturnAddress;
+/// uint16_t FrameSize;
+/// uint16_t NumLiveOffsets;
+/// uint16_t LiveOffsets[NumLiveOffsets];
+/// } Descriptors[NumDescriptors];
+/// } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+ unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize();
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(getModule(), AP, "code_end");
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "data_end");
+
+ // FIXME: Why does ocaml emit this??
+ AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0);
+
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(getModule(), AP, "frametable");
+
+ int NumDescriptors = 0;
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ NumDescriptors++;
+ }
+ }
+
+ if (NumDescriptors >= 1<<16) {
+ // Very rude!
+ report_fatal_error(" Too much descriptor for ocaml GC");
+ }
+ AP.EmitInt16(NumDescriptors);
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+
+ uint64_t FrameSize = FI.getFrameSize();
+ if (FrameSize >= 1<<16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Frame size " + Twine(FrameSize) + ">= 65536.\n"
+ "(" + Twine(uintptr_t(&FI)) + ")");
+ }
+
+ AP.OutStreamer.AddComment("live roots for " +
+ Twine(FI.getFunction().getName()));
+ AP.OutStreamer.AddBlankLine();
+
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ size_t LiveCount = FI.live_size(J);
+ if (LiveCount >= 1<<16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Live root count "+Twine(LiveCount)+" >= 65536.");
+ }
+
+ AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0);
+ AP.EmitInt16(FrameSize);
+ AP.EmitInt16(LiveCount);
+
+ for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+ KE = FI.live_end(J); K != KE; ++K) {
+ if (K->StackOffset >= 1<<16) {
+ // Very rude!
+ report_fatal_error(
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+ }
+ AP.EmitInt16(K->StackOffset);
+ }
+
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ }
+ }
+}
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/PACKAGE.vcxproj b/src/LLVM/lib/CodeGen/AsmPrinter/PACKAGE.vcxproj
new file mode 100644
index 0000000..0987b8e
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/PACKAGE.vcxproj
@@ -0,0 +1,277 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="MinSizeRel|Win32">
+ <Configuration>MinSizeRel</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="RelWithDebInfo|Win32">
+ <Configuration>RelWithDebInfo</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID>
+ <Keyword>Win32Proj</Keyword>
+ <Platform>Win32</Platform>
+ <ProjectName>PACKAGE</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\AsmPrinter;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\PACKAGE_force.rule">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeLists.txt">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj">
+ <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/PACKAGE.vcxproj.filters b/src/LLVM/lib/CodeGen/AsmPrinter/PACKAGE.vcxproj.filters
new file mode 100644
index 0000000..a570359
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\PACKAGE_force.rule">
+ <Filter>CMake Rules</Filter>
+ </CustomBuild>
+ <CustomBuild Include="CMakeLists.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <Filter Include="CMake Rules">
+ <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+</Project>
diff --git a/src/LLVM/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/src/LLVM/lib/CodeGen/AsmPrinter/Win64Exception.cpp
new file mode 100644
index 0000000..b83aa5a
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -0,0 +1,115 @@
+//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Win64 exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+Win64Exception::Win64Exception(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
+ {}
+
+Win64Exception::~Win64Exception() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void Win64Exception::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void Win64Exception::BeginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+
+ shouldEmitMoves = Asm->needsSEHMoves();
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+
+ shouldEmitPersonality = hasLandingPads &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym);
+
+ if (!shouldEmitPersonality)
+ return;
+
+ MCSymbol *GCCHandlerSym =
+ Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
+ Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void Win64Exception::EndFunction() {
+ if (!shouldEmitPersonality && !shouldEmitMoves)
+ return;
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (shouldEmitPersonality) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+
+ Asm->OutStreamer.PushSection();
+ Asm->OutStreamer.EmitWin64EHHandlerData();
+ Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
+ 4);
+ EmitExceptionTable();
+ Asm->OutStreamer.PopSection();
+ }
+ Asm->OutStreamer.EmitWin64EHEndProc();
+}
diff --git a/src/LLVM/lib/CodeGen/BranchFolding.cpp b/src/LLVM/lib/CodeGen/BranchFolding.cpp
index 4ff84c9..75288b0 100644
--- a/src/LLVM/lib/CodeGen/BranchFolding.cpp
+++ b/src/LLVM/lib/CodeGen/BranchFolding.cpp
@@ -41,6 +41,7 @@
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumHoist , "Number of times common instructions are hoisted");
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -65,7 +66,7 @@
public:
static char ID;
explicit BranchFolderPass(bool defaultEnableTailMerge)
- : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {}
+ : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "Control Flow Optimizer"; }
@@ -86,12 +87,14 @@
}
-BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
switch (FlagEnableTailMerge) {
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
case cl::BOU_TRUE: EnableTailMerge = true; break;
case cl::BOU_FALSE: EnableTailMerge = false; break;
}
+
+ EnableHoistCommonCode = CommonHoist;
}
/// RemoveDeadBlock - Remove the specified dead machine basic block from the
@@ -105,6 +108,9 @@
while (!MBB->succ_empty())
MBB->removeSuccessor(MBB->succ_end()-1);
+ // Avoid matching if this pointer gets reused.
+ TriedMerging.erase(MBB);
+
// Remove the block.
MF->erase(MBB);
}
@@ -168,6 +174,8 @@
MachineModuleInfo *mmi) {
if (!tii) return false;
+ TriedMerging.clear();
+
TII = tii;
TRI = tri;
MMI = mmi;
@@ -186,9 +194,10 @@
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
- MadeChangeThisIteration = false;
- MadeChangeThisIteration |= TailMergeBlocks(MF);
- MadeChangeThisIteration |= OptimizeBranches(MF);
+ MadeChangeThisIteration = TailMergeBlocks(MF);
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ if (EnableHoistCommonCode)
+ MadeChangeThisIteration |= HoistCommonCode(MF);
MadeChange |= MadeChangeThisIteration;
}
@@ -319,7 +328,13 @@
--I2;
}
// I1, I2==first (untested) non-DBGs preceding known match
- if (!I1->isIdenticalTo(I2)) {
+ if (!I1->isIdenticalTo(I2) ||
+ // FIXME: This check is dubious. It's used to get around a problem where
+ // people incorrectly expect inline asm directives to remain in the same
+ // relative order. This is untenable because normal compiler
+ // optimizations (like this one) may reorder and/or merge these
+ // directives.
+ I1->isInlineAsm()) {
++I1; ++I2;
break;
}
@@ -351,11 +366,31 @@
return TailLen;
}
+void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
+ MachineBasicBlock *NewMBB) {
+ if (RS) {
+ RS->enterBasicBlock(CurMBB);
+ if (!CurMBB->empty())
+ RS->forward(prior(CurMBB->end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+ if (RegsLiveAtExit[i])
+ NewMBB->addLiveIn(i);
+ }
+}
+
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
/// after it, replacing it with an unconditional branch to NewDest.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
+ MachineBasicBlock *CurMBB = OldInst->getParent();
+
TII->ReplaceTailWithBranchTo(OldInst, NewDest);
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ MaintainLiveIns(CurMBB, NewDest);
+
++NumTailMerge;
}
@@ -384,16 +419,7 @@
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
// For targets that use the register scavenger, we must maintain LiveIns.
- if (RS) {
- RS->enterBasicBlock(&CurMBB);
- if (!CurMBB.empty())
- RS->forward(prior(CurMBB.end()));
- BitVector RegsLiveAtExit(TRI->getNumRegs());
- RS->getRegsUsed(RegsLiveAtExit, false);
- for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
- if (RegsLiveAtExit[i])
- NewMBB->addLiveIn(i);
- }
+ MaintainLiveIns(&CurMBB, NewMBB);
return NewMBB;
}
@@ -406,10 +432,10 @@
for (; I != E; ++I) {
if (I->isDebugValue())
continue;
- const TargetInstrDesc &TID = I->getDesc();
- if (TID.isCall())
+ const MCInstrDesc &MCID = I->getDesc();
+ if (MCID.isCall())
Time += 10;
- else if (TID.mayLoad() || TID.mayStore())
+ else if (MCID.mayLoad() || MCID.mayStore())
Time += 2;
else
++Time;
@@ -495,10 +521,11 @@
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB) {
CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
- MachineFunction *MF = MBB1->getParent();
-
if (CommonTailLen == 0)
return false;
+ DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
+ << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+ << '\n');
// It's almost always profitable to merge any number of non-terminator
// instructions with the block that falls through into the common successor.
@@ -535,6 +562,7 @@
// we don't have to split a block. At worst we will be introducing 1 new
// branch instruction, which is likely to be smaller than the 2
// instructions that would be deleted in the merge.
+ MachineFunction *MF = MBB1->getParent();
if (EffectiveTailLen >= 2 &&
MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
@@ -787,14 +815,21 @@
// First find blocks with no successors.
MergePotentials.clear();
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
+ if (TriedMerging.count(I))
+ continue;
if (I->succ_empty())
MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
}
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
// See if we can do any tail merging on those.
- if (MergePotentials.size() < TailMergeThreshold &&
- MergePotentials.size() >= 2)
+ if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(NULL, NULL);
// Look at blocks (IBB) with multiple predecessors (PBB).
@@ -818,15 +853,17 @@
for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I) {
- if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+ if (I->pred_size() >= 2) {
SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
MachineBasicBlock *IBB = I;
MachineBasicBlock *PredBB = prior(I);
MergePotentials.clear();
for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
E2 = I->pred_end();
- P != E2; ++P) {
+ P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
MachineBasicBlock *PBB = *P;
+ if (TriedMerging.count(PBB))
+ continue;
// Skip blocks that loop to themselves, can't tail merge these.
if (PBB == IBB)
continue;
@@ -846,6 +883,28 @@
if (!FBB)
FBB = llvm::next(MachineFunction::iterator(PBB));
}
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice
+ // to have a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock *PredNextBB = NULL;
+ if (IP != MF.end())
+ PredNextBB = IP;
+ if (TBB == NULL) {
+ if (IBB != PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB != IBB) // ubr
+ continue;
+ } else {
+ if (TBB != IBB && IBB != PredNextBB) // cbr
+ continue;
+ }
+ }
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
DebugLoc dl; // FIXME: this is nowhere
@@ -857,6 +916,11 @@
MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
}
}
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(IBB, PredBB);
// Reinsert an unconditional branch if needed.
@@ -880,7 +944,8 @@
// Make sure blocks are numbered in order
MF.RenumberBlocks();
- for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ) {
MachineBasicBlock *MBB = I++;
MadeChange |= OptimizeBlock(MBB);
@@ -961,7 +1026,7 @@
// explicitly. Landing pads should not do this since the landing-pad table
// points to this block. Blocks with their addresses taken shouldn't be
// optimized away.
- if (IsEmptyBlock(MBB) && !MBB->hasAddressTaken()) {
+ if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
// Dead block? Leave for cleanup later.
if (MBB->pred_empty()) return MadeChange;
@@ -1018,9 +1083,25 @@
// AnalyzeBranch.
if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
PrevBB.succ_size() == 1 &&
- !MBB->hasAddressTaken()) {
+ !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
DEBUG(dbgs() << "\nMerging into block: " << PrevBB
<< "From MBB: " << *MBB);
+ // Remove redundant DBG_VALUEs first.
+ if (PrevBB.begin() != PrevBB.end()) {
+ MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
+ --PrevBBIter;
+ MachineBasicBlock::iterator MBBIter = MBB->begin();
+ // Check if DBG_VALUE at the end of PrevBB is identical to the
+ // DBG_VALUE at the beginning of MBB.
+ while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
+ && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
+ if (!MBBIter->isIdenticalTo(PrevBBIter))
+ break;
+ MachineInstr *DuplicateDbg = MBBIter;
+ ++MBBIter; -- PrevBBIter;
+ DuplicateDbg->eraseFromParent();
+ }
+ }
PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
PrevBB.removeSuccessor(PrevBB.succ_begin());;
assert(PrevBB.succ_empty());
@@ -1235,7 +1316,7 @@
// see if it has a fall-through into its successor.
bool CurFallsThru = MBB->canFallThrough();
- {
+ if (!MBB->isLandingPad()) {
// Check all the predecessors of this block. If one of them has no fall
// throughs, move this block right after it.
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
@@ -1284,7 +1365,8 @@
// and if the successor isn't an EH destination, we can arrange for the
// fallthrough to happen.
if (SuccBB != MBB && &*SuccPrev != MBB &&
- !SuccPrev->canFallThrough() && !CurUnAnalyzable) {
+ !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
+ !SuccBB->isLandingPad()) {
MBB->moveBefore(SuccBB);
MadeChange = true;
goto ReoptimizeBlock;
@@ -1308,3 +1390,285 @@
return MadeChange;
}
+
+//===----------------------------------------------------------------------===//
+// Hoist Common Code
+//===----------------------------------------------------------------------===//
+
+/// HoistCommonCode - Hoist common instruction sequences at the start of basic
+/// blocks to their common predecessor.
+bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ MadeChange |= HoistCommonCodeInSuccs(MBB);
+ }
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// findHoistingInsertPosAndDeps - Find the location to move common instructions
+/// in successors to. The location is ususally just before the terminator,
+/// however if the terminator is a conditional branch and its previous
+/// instruction is the flag setting instruction, the previous instruction is
+/// the preferred location. This function also gathers uses and defs of the
+/// instructions from the insertion point to the end of the block. The data is
+/// used by HoistCommonCodeInSuccs to ensure safety.
+static
+MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ SmallSet<unsigned,4> &Uses,
+ SmallSet<unsigned,4> &Defs) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ if (!TII->isUnpredicatedTerminator(Loc))
+ return MBB->end();
+
+ for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Loc->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ Uses.insert(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.insert(*AS);
+ } else if (!MO.isDead())
+ // Don't try to hoist code in the rare case the terminator defines a
+ // register that is later used.
+ return MBB->end();
+ }
+
+ if (Uses.empty())
+ return Loc;
+ if (Loc == MBB->begin())
+ return MBB->end();
+
+ // The terminator is probably a conditional branch, try not to separate the
+ // branch from condition setting instruction.
+ MachineBasicBlock::iterator PI = Loc;
+ --PI;
+ while (PI != MBB->begin() && Loc->isDebugValue())
+ --PI;
+
+ bool IsDef = false;
+ for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Uses.count(Reg))
+ IsDef = true;
+ }
+ if (!IsDef)
+ // The condition setting instruction is not just before the conditional
+ // branch.
+ return Loc;
+
+ // Be conservative, don't insert instruction above something that may have
+ // side-effects. And since it's potentially bad to separate flag setting
+ // instruction from the conditional branch, just abort the optimization
+ // completely.
+ // Also avoid moving code above predicated instruction since it's hard to
+ // reason about register liveness with predicated instruction.
+ bool DontMoveAcrossStore = true;
+ if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) ||
+ TII->isPredicated(PI))
+ return MBB->end();
+
+
+ // Find out what registers are live. Note this routine is ignoring other live
+ // registers which are only used by instructions in successor blocks.
+ for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = PI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ Uses.insert(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.insert(*AS);
+ } else {
+ if (Uses.count(Reg)) {
+ Uses.erase(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Uses.erase(*SR); // Use getSubRegisters to be conservative
+ }
+ Defs.insert(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Defs.insert(*AS);
+ }
+ }
+
+ return PI;
+}
+
+/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
+/// sequence at the start of the function, move the instructions before MBB
+/// terminator if it's legal.
+bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+ return false;
+
+ if (!FBB) FBB = findFalseBlock(MBB, TBB);
+ if (!FBB)
+ // Malformed bcc? True and false blocks are the same?
+ return false;
+
+ // Restrict the optimization to cases where MBB is the only predecessor,
+ // it is an obvious win.
+ if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
+ return false;
+
+ // Find a suitable position to hoist the common instructions to. Also figure
+ // out which registers are used or defined by instructions from the insertion
+ // point to the end of the block.
+ SmallSet<unsigned, 4> Uses, Defs;
+ MachineBasicBlock::iterator Loc =
+ findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
+ if (Loc == MBB->end())
+ return false;
+
+ bool HasDups = false;
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallSet<unsigned, 4> LocalDefsSet;
+ MachineBasicBlock::iterator TIB = TBB->begin();
+ MachineBasicBlock::iterator FIB = FBB->begin();
+ MachineBasicBlock::iterator TIE = TBB->end();
+ MachineBasicBlock::iterator FIE = FBB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead))
+ break;
+
+ if (TII->isPredicated(TIB))
+ // Hard to reason about register liveness with predicated instruction.
+ break;
+
+ bool IsSafe = true;
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(Reg)) {
+ // Avoid clobbering a register that's used by the instruction at
+ // the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (Defs.count(Reg) && !MO.isDead()) {
+ // Don't hoist the instruction if the def would be clobber by the
+ // instruction at the point insertion. FIXME: This is overly
+ // conservative. It should be possible to hoist the instructions
+ // in BB2 in the following example:
+ // BB1:
+ // r1, eflag = op1 r2, r3
+ // brcc eflag
+ //
+ // BB2:
+ // r1 = op2, ...
+ // = op3, r1<kill>
+ IsSafe = false;
+ break;
+ }
+ } else if (!LocalDefsSet.count(Reg)) {
+ if (Defs.count(Reg)) {
+ // Use is defined by the instruction at the point of insertion.
+ IsSafe = false;
+ break;
+ }
+ }
+ }
+ if (!IsSafe)
+ break;
+
+ bool DontMoveAcrossStore = true;
+ if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
+ break;
+
+ // Remove kills from LocalDefsSet, these registers had short live ranges.
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !LocalDefsSet.count(Reg))
+ continue;
+ for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+ LocalDefsSet.erase(*OR);
+ }
+
+ // Track local defs so we can update liveins.
+ for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = TIB->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ LocalDefs.push_back(Reg);
+ for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+ LocalDefsSet.insert(*OR);
+ }
+
+ HasDups = true;;
+ ++TIB;
+ ++FIB;
+ }
+
+ if (!HasDups)
+ return false;
+
+ MBB->splice(Loc, TBB, TBB->begin(), TIB);
+ FBB->erase(FBB->begin(), FIB);
+
+ // Update livein's.
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Def = LocalDefs[i];
+ if (LocalDefsSet.count(Def)) {
+ TBB->addLiveIn(Def);
+ FBB->addLiveIn(Def);
+ }
+ }
+
+ ++NumHoist;
+ return true;
+}
diff --git a/src/LLVM/lib/CodeGen/BranchFolding.h b/src/LLVM/lib/CodeGen/BranchFolding.h
index 15dfa7f..df795df 100644
--- a/src/LLVM/lib/CodeGen/BranchFolding.h
+++ b/src/LLVM/lib/CodeGen/BranchFolding.h
@@ -10,6 +10,7 @@
#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include <vector>
@@ -19,11 +20,10 @@
class RegScavenger;
class TargetInstrInfo;
class TargetRegisterInfo;
- template<typename T> class SmallVectorImpl;
class BranchFolder {
public:
- explicit BranchFolder(bool defaultEnableTailMerge);
+ explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
bool OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
@@ -48,6 +48,7 @@
};
typedef std::vector<MergePotentialsElt>::iterator MPIterator;
std::vector<MergePotentialsElt> MergePotentials;
+ SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
class SameTailElt {
MPIterator MPIter;
@@ -85,6 +86,7 @@
std::vector<SameTailElt> SameTails;
bool EnableTailMerge;
+ bool EnableHoistCommonCode;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
@@ -93,6 +95,8 @@
bool TailMergeBlocks(MachineFunction &MF);
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
+ void MaintainLiveIns(MachineBasicBlock *CurMBB,
+ MachineBasicBlock *NewMBB);
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
@@ -110,6 +114,9 @@
bool OptimizeBlock(MachineBasicBlock *MBB);
void RemoveDeadBlock(MachineBasicBlock *MBB);
bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+ bool HoistCommonCode(MachineFunction &MF);
+ bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
};
}
diff --git a/src/LLVM/lib/CodeGen/CalcSpillWeights.cpp b/src/LLVM/lib/CodeGen/CalcSpillWeights.cpp
index 8ecce4a..ea16a25 100644
--- a/src/LLVM/lib/CodeGen/CalcSpillWeights.cpp
+++ b/src/LLVM/lib/CodeGen/CalcSpillWeights.cpp
@@ -25,8 +25,12 @@
using namespace llvm;
char CalculateSpillWeights::ID = 0;
-INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights",
- "Calculate spill weights", false, false);
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<LiveIntervals>();
@@ -83,8 +87,8 @@
}
void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
- MachineRegisterInfo &mri = mf_.getRegInfo();
- const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo();
+ MachineRegisterInfo &mri = MF.getRegInfo();
+ const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
MachineBasicBlock *mbb = 0;
MachineLoop *loop = 0;
unsigned loopDepth = 0;
@@ -99,6 +103,9 @@
// Don't recompute a target specific hint.
bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+ // Don't recompute spill weight for an unspillable register.
+ bool Spillable = li.isSpillable();
+
for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
MachineInstr *mi = I.skipInstruction();) {
if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
@@ -106,34 +113,37 @@
if (!visited.insert(mi))
continue;
- // Get loop info for mi.
- if (mi->getParent() != mbb) {
- mbb = mi->getParent();
- loop = loops_.getLoopFor(mbb);
- loopDepth = loop ? loop->getLoopDepth() : 0;
- isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ float weight = 1.0f;
+ if (Spillable) {
+ // Get loop info for mi.
+ if (mi->getParent() != mbb) {
+ mbb = mi->getParent();
+ loop = Loops.getLoopFor(mbb);
+ loopDepth = loop ? loop->getLoopDepth() : 0;
+ isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ }
+
+ // Calculate instr weight.
+ bool reads, writes;
+ tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+ weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+
+ // Give extra weight to what looks like a loop induction variable update.
+ if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
+ weight *= 3;
+
+ totalWeight += weight;
}
- // Calculate instr weight.
- bool reads, writes;
- tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
- float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
-
- // Give extra weight to what looks like a loop induction variable update.
- if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb))
- weight *= 3;
-
- totalWeight += weight;
-
// Get allocation hints from copies.
if (noHint || !mi->isCopy())
continue;
unsigned hint = copyHint(mi, li.reg, tri, mri);
if (!hint)
continue;
- float hweight = hint_[hint] += weight;
+ float hweight = Hint[hint] += weight;
if (TargetRegisterInfo::isPhysicalRegister(hint)) {
- if (hweight > bestPhys && lis_.isAllocatable(hint))
+ if (hweight > bestPhys && LIS.isAllocatable(hint))
bestPhys = hweight, hintPhys = hint;
} else {
if (hweight > bestVirt)
@@ -141,17 +151,21 @@
}
}
- hint_.clear();
+ Hint.clear();
// Always prefer the physreg hint.
if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
mri.setRegAllocationHint(li.reg, 0, hint);
- // Weakly boost the spill weifght of hinted registers.
+ // Weakly boost the spill weight of hinted registers.
totalWeight *= 1.01F;
}
+ // If the live interval was already unspillable, leave it that way.
+ if (!Spillable)
+ return;
+
// Mark li as unspillable if all live ranges are tiny.
- if (li.isZeroLength()) {
+ if (li.isZeroLength(LIS.getSlotIndexes())) {
li.markNotSpillable();
return;
}
@@ -162,65 +176,12 @@
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
bool isLoad = false;
- SmallVector<LiveInterval*, 4> spillIs;
- if (lis_.isReMaterializable(li, spillIs, isLoad)) {
+ if (LIS.isReMaterializable(li, 0, isLoad)) {
if (isLoad)
totalWeight *= 0.9F;
else
totalWeight *= 0.5F;
}
- li.weight = totalWeight;
- lis_.normalizeSpillWeight(li);
-}
-
-void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
- MachineRegisterInfo &mri = mf_.getRegInfo();
- const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo();
- const TargetRegisterClass *orc = mri.getRegClass(reg);
- SmallPtrSet<const TargetRegisterClass*,8> rcs;
-
- for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg),
- E = mri.reg_nodbg_end(); I != E; ++I) {
- // The targets don't have accurate enough regclass descriptions that we can
- // handle subregs. We need something similar to
- // TRI::getMatchingSuperRegClass, but returning a super class instead of a
- // sub class.
- if (I.getOperand().getSubReg()) {
- DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n');
- return;
- }
- if (const TargetRegisterClass *rc =
- I->getDesc().getRegClass(I.getOperandNo(), tri))
- rcs.insert(rc);
- }
-
- // If we found no regclass constraints, just leave reg as is.
- // In theory, we could inflate to the largest superclass of reg's existing
- // class, but that might not be legal for the current cpu setting.
- // This could happen if reg is only used by COPY instructions, so we may need
- // to improve on this.
- if (rcs.empty()) {
- DEBUG(dbgs() << "Not inflating unconstrained" << orc->getName() << ":%reg"
- << reg << ".\n");
- return;
- }
-
- // Compute the intersection of all classes in rcs.
- // This ought to be independent of iteration order, but if the target register
- // classes don't form a proper algebra, it is possible to get different
- // results. The solution is to make sure the intersection of any two register
- // classes is also a register class or the null set.
- const TargetRegisterClass *rc = 0;
- for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(),
- E = rcs.end(); I != E; ++I) {
- rc = rc ? getCommonSubClass(rc, *I) : *I;
- assert(rc && "Incompatible regclass constraints found");
- }
-
- if (rc == orc)
- return;
- DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to "
- << rc->getName() <<".\n");
- mri.setRegClass(reg, rc);
+ li.weight = normalizeSpillWeight(totalWeight, li.getSize());
}
diff --git a/src/LLVM/lib/CodeGen/CallingConvLower.cpp b/src/LLVM/lib/CodeGen/CallingConvLower.cpp
index 62ad817..14eb054 100644
--- a/src/LLVM/lib/CodeGen/CallingConvLower.cpp
+++ b/src/LLVM/lib/CodeGen/CallingConvLower.cpp
@@ -13,29 +13,34 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
using namespace llvm;
-CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
- SmallVector<CCValAssign, 16> &locs, LLVMContext &C)
- : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
- TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) {
+CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
+ const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &C)
+ : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
+ CallOrPrologue(Unknown) {
// No stack is used.
StackOffset = 0;
-
+
+ clearFirstByValReg();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
}
-// HandleByVal - Allocate a stack slot large enough to pass an argument by
-// value. The size and alignment information of the argument is encoded in its
-// parameter attribute.
-void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
+// HandleByVal - Allocate space on the stack large enough to pass an argument
+// by value. The size and alignment information of the argument is encoded in
+// its parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
int MinSize, int MinAlign,
ISD::ArgFlagsTy ArgFlags) {
unsigned Align = ArgFlags.getByValAlign();
@@ -44,18 +49,18 @@
Size = MinSize;
if (MinAlign > (int)Align)
Align = MinAlign;
+ if (MF.getFrameInfo()->getMaxAlignment() < Align)
+ MF.getFrameInfo()->setMaxAlignment(Align);
+ TM.getTargetLowering()->HandleByVal(this, Size);
unsigned Offset = AllocateStack(Size, Align);
-
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void CCState::MarkAllocated(unsigned Reg) {
- UsedRegs[Reg/32] |= 1 << (Reg&31);
-
- if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
- for (; (Reg = *RegAliases); ++RegAliases)
- UsedRegs[Reg/32] |= 1 << (Reg&31);
+ for (const unsigned *Alias = TRI.getOverlaps(Reg);
+ unsigned Reg = *Alias; ++Alias)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
}
/// AnalyzeFormalArguments - Analyze an array of argument values,
@@ -66,12 +71,12 @@
unsigned NumArgs = Ins.size();
for (unsigned i = 0; i != NumArgs; ++i) {
- EVT ArgVT = Ins[i].VT;
+ MVT ArgVT = Ins[i].VT;
ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Formal argument #" << i << " has unhandled type "
- << ArgVT.getEVTString();
+ << EVT(ArgVT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -84,7 +89,7 @@
CCAssignFn Fn) {
// Determine which register each value should be copied into.
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].VT;
+ MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
return false;
@@ -98,12 +103,12 @@
CCAssignFn Fn) {
// Determine which register each value should be copied into.
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].VT;
+ MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Return operand #" << i << " has unhandled type "
- << VT.getEVTString();
+ << EVT(VT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -116,12 +121,12 @@
CCAssignFn Fn) {
unsigned NumOps = Outs.size();
for (unsigned i = 0; i != NumOps; ++i) {
- EVT ArgVT = Outs[i].VT;
+ MVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << ArgVT.getEVTString();
+ << EVT(ArgVT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -130,17 +135,17 @@
/// AnalyzeCallOperands - Same as above except it takes vectors of types
/// and argument flags.
-void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
CCAssignFn Fn) {
unsigned NumOps = ArgVTs.size();
for (unsigned i = 0; i != NumOps; ++i) {
- EVT ArgVT = ArgVTs[i];
+ MVT ArgVT = ArgVTs[i];
ISD::ArgFlagsTy ArgFlags = Flags[i];
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << ArgVT.getEVTString();
+ << EVT(ArgVT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -152,12 +157,12 @@
void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
CCAssignFn Fn) {
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- EVT VT = Ins[i].VT;
+ MVT VT = Ins[i].VT;
ISD::ArgFlagsTy Flags = Ins[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
dbgs() << "Call result #" << i << " has unhandled type "
- << VT.getEVTString();
+ << EVT(VT).getEVTString() << "\n";
#endif
llvm_unreachable(0);
}
@@ -166,11 +171,11 @@
/// AnalyzeCallResult - Same as above except it's specialized for calls which
/// produce a single value.
-void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
#ifndef NDEBUG
dbgs() << "Call result has unhandled type "
- << VT.getEVTString();
+ << EVT(VT).getEVTString();
#endif
llvm_unreachable(0);
}
diff --git a/src/LLVM/lib/CodeGen/CodeGen.cpp b/src/LLVM/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 0000000..424535b
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,60 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeCalculateSpillWeightsPass(Registry);
+ initializeDeadMachineInstructionElimPass(Registry);
+ initializeGCModuleInfoPass(Registry);
+ initializeIfConverterPass(Registry);
+ initializeLiveDebugVariablesPass(Registry);
+ initializeLiveIntervalsPass(Registry);
+ initializeLiveStacksPass(Registry);
+ initializeLiveVariablesPass(Registry);
+ initializeMachineBlockFrequencyInfoPass(Registry);
+ initializeMachineCSEPass(Registry);
+ initializeMachineDominatorTreePass(Registry);
+ initializeMachineLICMPass(Registry);
+ initializeMachineLoopInfoPass(Registry);
+ initializeMachineModuleInfoPass(Registry);
+ initializeMachineSinkingPass(Registry);
+ initializeMachineVerifierPassPass(Registry);
+ initializeOptimizePHIsPass(Registry);
+ initializePHIEliminationPass(Registry);
+ initializePeepholeOptimizerPass(Registry);
+ initializeProcessImplicitDefsPass(Registry);
+ initializePEIPass(Registry);
+ initializeRALinScanPass(Registry);
+ initializeRegisterCoalescerPass(Registry);
+ initializeRenderMachineFunctionPass(Registry);
+ initializeSlotIndexesPass(Registry);
+ initializeLoopSplitterPass(Registry);
+ initializeStackProtectorPass(Registry);
+ initializeStackSlotColoringPass(Registry);
+ initializeStrongPHIEliminationPass(Registry);
+ initializeTwoAddressInstructionPassPass(Registry);
+ initializeUnreachableBlockElimPass(Registry);
+ initializeUnreachableMachineBlockElimPass(Registry);
+ initializeVirtRegMapPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+ initializeCodeGen(*unwrap(R));
+}
diff --git a/src/LLVM/lib/CodeGen/CodePlacementOpt.cpp b/src/LLVM/lib/CodeGen/CodePlacementOpt.cpp
index a554745..270c337 100644
--- a/src/LLVM/lib/CodeGen/CodePlacementOpt.cpp
+++ b/src/LLVM/lib/CodeGen/CodePlacementOpt.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the pass that optimize code placement and align loop
-// headers to target specific alignment boundary.
+// This file implements the pass that optimizes code placement and aligns loop
+// headers to target-specific alignment boundaries.
//
//===----------------------------------------------------------------------===//
@@ -40,7 +40,7 @@
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
- return "Code Placement Optimizater";
+ return "Code Placement Optimizer";
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -99,6 +99,9 @@
/// whenever possible.
///
bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
+ // Conservatively ignore EH landing pads.
+ if (MBB->isLandingPad()) return false;
+
// Aggressively handle return blocks and similar constructs.
if (MBB->succ_empty()) return true;
@@ -251,7 +254,7 @@
// Determine a position to move orphaned loop blocks to. If TopMBB is not
// entered via fallthrough and BotMBB is exited via fallthrough, prepend them
- // to the top of the loop to avoid loosing that fallthrough. Otherwise append
+ // to the top of the loop to avoid losing that fallthrough. Otherwise append
// them to the bottom, even if it previously had a fallthrough, on the theory
// that it's worth an extra branch to keep the loop contiguous.
MachineFunction::iterator InsertPt =
diff --git a/src/LLVM/lib/CodeGen/CriticalAntiDepBreaker.cpp b/src/LLVM/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 98d4d41..84c4d59 100644
--- a/src/LLVM/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/src/LLVM/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -27,12 +27,12 @@
using namespace llvm;
CriticalAntiDepBreaker::
-CriticalAntiDepBreaker(MachineFunction& MFi) :
+CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
- AllocatableSet(TRI->getAllocatableSet(MF)),
+ RegClassInfo(RCI),
Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
KillIndices(TRI->getNumRegs(), 0),
DefIndices(TRI->getNumRegs(), 0) {}
@@ -130,21 +130,25 @@
return;
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
- // Any register which was defined within the previous scheduling region
- // may have been rescheduled and its lifetime may overlap with registers
- // in ways not reflected in our current liveness state. For each such
- // register, adjust the liveness state to be conservatively correct.
- for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
- if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
- assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
-
- // Mark this register to be non-renamable.
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] != ~0u) {
+ // If Reg is currently live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled).
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = Count;
+ } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
// Move the def index to the end of the previous region, to reflect
// that the def could theoretically have been scheduled at the end.
DefIndices[Reg] = InsertPosIndex;
}
+ }
PrescanInstruction(MI);
ScanInstruction(MI, Count);
@@ -177,7 +181,7 @@
// that have special allocation requirements. Also assume all registers
// used in a call must not be changed (ABI).
// FIXME: The issue with predicated instruction is more complex. We are being
- // conservatively here because the kill markers cannot be trusted after
+ // conservative here because the kill markers cannot be trusted after
// if-conversion:
// %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
// ...
@@ -203,7 +207,7 @@
const TargetRegisterClass *NewRC = 0;
if (i < MI->getDesc().getNumOperands())
- NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
@@ -291,7 +295,7 @@
const TargetRegisterClass *NewRC = 0;
if (i < MI->getDesc().getNumOperands())
- NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
@@ -321,25 +325,79 @@
}
}
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg)
+{
+ for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+ MachineOperand *RefOper = I->second;
+
+ // Don't allow the instruction defining AntiDepReg to earlyclobber its
+ // operands, in case they may be assigned to NewReg. In this case antidep
+ // breaking must fail, but it's too rare to bother optimizing.
+ if (RefOper->isDef() && RefOper->isEarlyClobber())
+ return true;
+
+ // Handle cases in which this instructions defines NewReg.
+ MachineInstr *MI = RefOper->getParent();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &CheckOper = MI->getOperand(i);
+
+ if (!CheckOper.isReg() || !CheckOper.isDef() ||
+ CheckOper.getReg() != NewReg)
+ continue;
+
+ // Don't allow the instruction to define NewReg and AntiDepReg.
+ // When AntiDepReg is renamed it will be an illegal op.
+ if (RefOper->isDef())
+ return true;
+
+ // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+ // NewReg
+ if (CheckOper.isEarlyClobber())
+ return true;
+
+ // Don't allow inline asm to define NewReg at all. Who know what it's
+ // doing with it.
+ if (MI->isInlineAsm())
+ return true;
+ }
+ }
+ return false;
+}
+
unsigned
-CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
+CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
unsigned AntiDepReg,
unsigned LastNewReg,
const TargetRegisterClass *RC)
{
- for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
- RE = RC->allocation_order_end(MF); R != RE; ++R) {
- unsigned NewReg = *R;
+ ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned NewReg = Order[i];
// Don't replace a register with itself.
if (NewReg == AntiDepReg) continue;
// Don't replace a register with one that was recently used to repair
// an anti-dependence with this AntiDepReg, because that would
// re-introduce that anti-dependence.
if (NewReg == LastNewReg) continue;
- // If the instruction already has a def of the NewReg, it's not suitable.
- // For example, Instruction with multiple definitions can result in this
- // condition.
- if (MI->modifiesRegister(NewReg, TRI)) continue;
+ // If any instructions that define AntiDepReg also define the NewReg, it's
+ // not suitable. For example, Instruction with multiple definitions can
+ // result in this condition.
+ if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
// If NewReg is dead and NewReg's most recent def is not before
// AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
@@ -361,7 +419,8 @@
BreakAntiDependencies(const std::vector<SUnit>& SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
- unsigned InsertPosIndex) {
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
// The code below assumes that there is at least one instruction,
// so just duck out immediately if the block is empty.
if (SUnits.empty()) return 0;
@@ -473,7 +532,7 @@
if (Edge->getKind() == SDep::Anti) {
AntiDepReg = Edge->getReg();
assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
- if (!AllocatableSet.test(AntiDepReg))
+ if (!RegClassInfo.isAllocatable(AntiDepReg))
// Don't break anti-dependencies on non-allocatable registers.
AntiDepReg = 0;
else if (KeepRegs.count(AntiDepReg))
@@ -546,7 +605,11 @@
// TODO: Instead of picking the first free register, consider which might
// be the best.
if (AntiDepReg != 0) {
- if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg,
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+ AntiDepReg,
LastNewReg[AntiDepReg],
RC)) {
DEBUG(dbgs() << "Breaking anti-dependence edge on "
@@ -556,9 +619,6 @@
// Update the references to the old register to refer to the new
// register.
- std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
- std::multimap<unsigned, MachineOperand *>::iterator>
- Range = RegRefs.equal_range(AntiDepReg);
for (std::multimap<unsigned, MachineOperand *>::iterator
Q = Range.first, QE = Range.second; Q != QE; ++Q) {
Q->second->setReg(NewReg);
@@ -567,18 +627,14 @@
// as well.
const SUnit *SU = MISUnitMap[Q->second->getParent()];
if (!SU) continue;
- for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
- MachineInstr *DI = SU->DbgInstrList[i];
- assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
- DI->getOperand(0).getReg()
- && "Non register dbg_value attached to SUnit!");
- if (DI->getOperand(0).getReg() == AntiDepReg)
- DI->getOperand(0).setReg(NewReg);
- }
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q->second->getParent())
+ UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
}
// We just went back in time and modified history; the
- // liveness information for the anti-depenence reg is now
+ // liveness information for the anti-dependence reg is now
// inconsistent. Set the state as if it were dead.
Classes[NewReg] = Classes[AntiDepReg];
DefIndices[NewReg] = DefIndices[AntiDepReg];
diff --git a/src/LLVM/lib/CodeGen/CriticalAntiDepBreaker.h b/src/LLVM/lib/CodeGen/CriticalAntiDepBreaker.h
index 0ed7c35..0710780 100644
--- a/src/LLVM/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/src/LLVM/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -17,6 +17,7 @@
#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
#include "AntiDepBreaker.h"
+#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -27,6 +28,7 @@
#include <map>
namespace llvm {
+class RegisterClassInfo;
class TargetInstrInfo;
class TargetRegisterInfo;
@@ -35,6 +37,7 @@
MachineRegisterInfo &MRI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
/// AllocatableSet - The set of allocatable registers.
/// We'll be ignoring anti-dependencies on non-allocatable registers,
@@ -48,8 +51,10 @@
/// pointer.
std::vector<const TargetRegisterClass*> Classes;
- /// RegRegs - Map registers to all their references within a live range.
+ /// RegRefs - Map registers to all their references within a live range.
std::multimap<unsigned, MachineOperand *> RegRefs;
+ typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+ RegRefIter;
/// KillIndices - The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
@@ -64,7 +69,7 @@
SmallSet<unsigned, 4> KeepRegs;
public:
- CriticalAntiDepBreaker(MachineFunction& MFi);
+ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
~CriticalAntiDepBreaker();
/// Start - Initialize anti-dep breaking for a new basic block.
@@ -77,7 +82,8 @@
unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
- unsigned InsertPosIndex);
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues);
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
@@ -90,10 +96,14 @@
private:
void PrescanInstruction(MachineInstr *MI);
void ScanInstruction(MachineInstr *MI, unsigned Count);
- unsigned findSuitableFreeRegister(MachineInstr *MI,
+ bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg);
+ unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
unsigned AntiDepReg,
unsigned LastNewReg,
- const TargetRegisterClass *);
+ const TargetRegisterClass *RC);
};
}
diff --git a/src/LLVM/lib/CodeGen/DeadMachineInstructionElim.cpp b/src/LLVM/lib/CodeGen/DeadMachineInstructionElim.cpp
index 138b83d..6de6c0c 100644
--- a/src/LLVM/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/src/LLVM/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -36,7 +36,9 @@
public:
static char ID; // Pass identification, replacement for typeid
- DeadMachineInstructionElim() : MachineFunctionPass(ID) {}
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+ initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+ }
private:
bool isDead(const MachineInstr *MI) const;
@@ -45,13 +47,19 @@
char DeadMachineInstructionElim::ID = 0;
INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
- "Remove dead machine instructions", false, false);
+ "Remove dead machine instructions", false, false)
FunctionPass *llvm::createDeadMachineInstructionElimPass() {
return new DeadMachineInstructionElim();
}
bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI->isInlineAsm())
+ return false;
+
// Don't delete instructions with side effects.
bool SawStore = false;
if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
@@ -80,9 +88,8 @@
TRI = MF.getTarget().getRegisterInfo();
TII = MF.getTarget().getInstrInfo();
- // Compute a bitvector to represent all non-allocatable physregs.
- BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF);
- NonAllocatableRegs.flip();
+ // Treat reserved registers as always live.
+ BitVector ReservedRegs = TRI->getReservedRegs(MF);
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
@@ -91,9 +98,8 @@
I != E; ++I) {
MachineBasicBlock *MBB = &*I;
- // Start out assuming that all non-allocatable registers are live
- // out of this block.
- LivePhysRegs = NonAllocatableRegs;
+ // Start out assuming that reserved registers are live out of this block.
+ LivePhysRegs = ReservedRegs;
// Also add any explicit live-out physregs for this block.
if (!MBB->empty() && MBB->back().getDesc().isReturn())
@@ -104,6 +110,15 @@
LivePhysRegs.set(Reg);
}
+ // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+ // live across blocks, but some targets (x86) can have flags live out of a
+ // block.
+ for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
+ E = MBB->succ_end(); S != E; S++)
+ for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
+ LI != (*S)->livein_end(); LI++)
+ LivePhysRegs.set(*LI);
+
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
@@ -149,7 +164,7 @@
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
LivePhysRegs.reset(Reg);
// Check the subreg set, not the alias set, because a def
// of a super-register may still be partially live after
@@ -166,7 +181,7 @@
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isUse()) {
unsigned Reg = MO.getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
LivePhysRegs.set(Reg);
for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
*AliasSet; ++AliasSet)
diff --git a/src/LLVM/lib/CodeGen/DwarfEHPrepare.cpp b/src/LLVM/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 0000000..ed9e409
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,750 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumLandingPadsSplit, "Number of landing pads split");
+STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered");
+STATISTIC(NumResumesLowered, "Number of eh.resume calls lowered");
+STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+
+namespace {
+ class DwarfEHPrepare : public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLowering *TLI;
+
+ // The eh.exception intrinsic.
+ Function *ExceptionValueIntrinsic;
+
+ // The eh.selector intrinsic.
+ Function *SelectorIntrinsic;
+
+ // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
+ Constant *URoR;
+
+ // The EH language-specific catch-all type.
+ GlobalVariable *EHCatchAllValue;
+
+ // _Unwind_Resume or the target equivalent.
+ Constant *RewindFunction;
+
+ // We both use and preserve dominator info.
+ DominatorTree *DT;
+
+ // The function we are running on.
+ Function *F;
+
+ // The landing pads for this function.
+ typedef SmallPtrSet<BasicBlock*, 8> BBSet;
+ BBSet LandingPads;
+
+ bool InsertUnwindResumeCalls();
+
+ bool NormalizeLandingPads();
+ bool LowerUnwindsAndResumes();
+ bool MoveExceptionValueCalls();
+
+ Instruction *CreateExceptionValueCall(BasicBlock *BB);
+
+ /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
+ /// use the "llvm.eh.catch.all.value" call need to convert to using its
+ /// initializer instead.
+ bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+
+ bool HasCatchAllInSelector(IntrinsicInst *);
+
+ /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+ void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+ SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
+
+ /// FindAllURoRInvokes - Find all URoR invokes in the function.
+ void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
+
+ /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+ /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
+ /// a landing pad within the current function. This is a candidate to merge
+ /// the selector associated with the URoR invoke with the one from the
+ /// URoR's landing pad.
+ bool HandleURoRInvokes();
+
+ /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
+ /// with the eh.exception call. This recursively looks past instructions
+ /// which don't change the EH pointer value, like casts or PHI nodes.
+ bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+ SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
+ SmallPtrSet<PHINode*, 32> &SeenPHIs);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ DwarfEHPrepare(const TargetMachine *tm) :
+ FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
+ ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
+ URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+ initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &Fn);
+
+ // getAnalysisUsage - We need the dominator tree for handling URoR.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ }
+
+ const char *getPassName() const {
+ return "Exception handling preparation";
+ }
+
+ };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
+ return new DwarfEHPrepare(tm);
+}
+
+/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
+/// catch-all.
+bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
+ if (!EHCatchAllValue) return false;
+
+ unsigned ArgIdx = II->getNumArgOperands() - 1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
+ return GV == EHCatchAllValue;
+}
+
+/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+void DwarfEHPrepare::
+FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+ SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
+ for (Value::use_iterator
+ I = SelectorIntrinsic->use_begin(),
+ E = SelectorIntrinsic->use_end(); I != E; ++I) {
+ IntrinsicInst *II = cast<IntrinsicInst>(*I);
+
+ if (II->getParent()->getParent() != F)
+ continue;
+
+ if (!HasCatchAllInSelector(II))
+ Sels.insert(II);
+ else
+ CatchAllSels.insert(II);
+ }
+}
+
+/// FindAllURoRInvokes - Find all URoR invokes in the function.
+void DwarfEHPrepare::
+FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
+ for (Value::use_iterator
+ I = URoR->use_begin(),
+ E = URoR->use_end(); I != E; ++I) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
+ URoRInvokes.insert(II);
+ }
+}
+
+/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
+/// the "llvm.eh.catch.all.value" call need to convert to using its
+/// initializer instead.
+bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
+ if (!EHCatchAllValue) return false;
+
+ if (!SelectorIntrinsic) {
+ SelectorIntrinsic =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+ if (!SelectorIntrinsic) return false;
+ }
+
+ bool Changed = false;
+ for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+ I = Sels.begin(), E = Sels.end(); I != E; ++I) {
+ IntrinsicInst *Sel = *I;
+
+ // Index of the "llvm.eh.catch.all.value" variable.
+ unsigned OpIdx = Sel->getNumArgOperands() - 1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
+ if (GV != EHCatchAllValue) continue;
+ Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// FindSelectorAndURoR - Find the eh.selector call associated with the
+/// eh.exception call. And indicate if there is a URoR "invoke" associated with
+/// the eh.exception call. This recursively looks past instructions which don't
+/// change the EH pointer value, like casts or PHI nodes.
+bool
+DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+ SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
+ SmallPtrSet<PHINode*, 32> &SeenPHIs) {
+ bool Changed = false;
+
+ for (Value::use_iterator
+ I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
+ Instruction *II = dyn_cast<Instruction>(*I);
+ if (!II || II->getParent()->getParent() != F) continue;
+
+ if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
+ if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
+ SelCalls.insert(Sel);
+ } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
+ if (Invoke->getCalledFunction() == URoR)
+ URoRInvoke = true;
+ } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
+ Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs);
+ } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
+ if (SeenPHIs.insert(PN))
+ // Don't process a PHI node more than once.
+ Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs);
+ }
+ }
+
+ return Changed;
+}
+
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
+/// landing pad within the current function. This is a candidate to merge the
+/// selector associated with the URoR invoke with the one from the URoR's
+/// landing pad.
+bool DwarfEHPrepare::HandleURoRInvokes() {
+ if (!EHCatchAllValue) {
+ EHCatchAllValue =
+ F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
+ if (!EHCatchAllValue) return false;
+ }
+
+ if (!SelectorIntrinsic) {
+ SelectorIntrinsic =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+ if (!SelectorIntrinsic) return false;
+ }
+
+ SmallPtrSet<IntrinsicInst*, 32> Sels;
+ SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
+ FindAllCleanupSelectors(Sels, CatchAllSels);
+
+ if (!URoR) {
+ URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
+ if (!URoR) return CleanupSelectors(CatchAllSels);
+ }
+
+ SmallPtrSet<InvokeInst*, 32> URoRInvokes;
+ FindAllURoRInvokes(URoRInvokes);
+
+ SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
+
+ for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+ SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
+ const BasicBlock *SelBB = (*SI)->getParent();
+ for (SmallPtrSet<InvokeInst*, 32>::iterator
+ UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
+ const BasicBlock *URoRBB = (*UI)->getParent();
+ if (DT->dominates(SelBB, URoRBB)) {
+ SelsToConvert.insert(*SI);
+ break;
+ }
+ }
+ }
+
+ bool Changed = false;
+
+ if (Sels.size() != SelsToConvert.size()) {
+ // If we haven't been able to convert all of the clean-up selectors, then
+ // loop through the slow way to see if they still need to be converted.
+ if (!ExceptionValueIntrinsic) {
+ ExceptionValueIntrinsic =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
+ if (!ExceptionValueIntrinsic)
+ return CleanupSelectors(CatchAllSels);
+ }
+
+ for (Value::use_iterator
+ I = ExceptionValueIntrinsic->use_begin(),
+ E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
+ IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
+ if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
+
+ bool URoRInvoke = false;
+ SmallPtrSet<IntrinsicInst*, 8> SelCalls;
+ SmallPtrSet<PHINode*, 32> SeenPHIs;
+ Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs);
+
+ if (URoRInvoke) {
+ // This EH pointer is being used by an invoke of an URoR instruction and
+ // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
+ // need to convert it to a 'catch-all'.
+ for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+ SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
+ if (!HasCatchAllInSelector(*SI))
+ SelsToConvert.insert(*SI);
+ }
+ }
+ }
+
+ if (!SelsToConvert.empty()) {
+ // Convert all clean-up eh.selectors, which are associated with "invokes" of
+ // URoR calls, into catch-all eh.selectors.
+ Changed = true;
+
+ for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+ SI = SelsToConvert.begin(), SE = SelsToConvert.end();
+ SI != SE; ++SI) {
+ IntrinsicInst *II = *SI;
+
+ // Use the exception object pointer and the personality function
+ // from the original selector.
+ CallSite CS(II);
+ IntrinsicInst::op_iterator I = CS.arg_begin();
+ IntrinsicInst::op_iterator E = CS.arg_end();
+ IntrinsicInst::op_iterator B = prior(E);
+
+ // Exclude last argument if it is an integer.
+ if (isa<ConstantInt>(B)) E = B;
+
+ // Add exception object pointer (front).
+ // Add personality function (next).
+ // Add in any filter IDs (rest).
+ SmallVector<Value*, 8> Args(I, E);
+
+ Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
+
+ CallInst *NewSelector =
+ CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II);
+
+ NewSelector->setTailCall(II->isTailCall());
+ NewSelector->setAttributes(II->getAttributes());
+ NewSelector->setCallingConv(II->getCallingConv());
+
+ II->replaceAllUsesWith(NewSelector);
+ II->eraseFromParent();
+ }
+ }
+
+ Changed |= CleanupSelectors(CatchAllSels);
+ return Changed;
+}
+
+/// NormalizeLandingPads - Normalize and discover landing pads, noting them
+/// in the LandingPads set. A landing pad is normal if the only CFG edges
+/// that end at it are unwind edges from invoke instructions. If we inlined
+/// through an invoke we could have a normal branch from the previous
+/// unwind block through to the landing pad for the original invoke.
+/// Abnormal landing pads are fixed up by redirecting all unwind edges to
+/// a new basic block which falls through to the original.
+bool DwarfEHPrepare::NormalizeLandingPads() {
+ bool Changed = false;
+
+ const MCAsmInfo *MAI = TM->getMCAsmInfo();
+ bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (!isa<InvokeInst>(TI))
+ continue;
+ BasicBlock *LPad = TI->getSuccessor(1);
+ // Skip landing pads that have already been normalized.
+ if (LandingPads.count(LPad))
+ continue;
+
+ // Check that only invoke unwind edges end at the landing pad.
+ bool OnlyUnwoundTo = true;
+ bool SwitchOK = usingSjLjEH;
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
+ PI != PE; ++PI) {
+ TerminatorInst *PT = (*PI)->getTerminator();
+ // The SjLj dispatch block uses a switch instruction. This is effectively
+ // an unwind edge, so we can disregard it here. There will only ever
+ // be one dispatch, however, so if there are multiple switches, one
+ // of them truly is a normal edge, not an unwind edge.
+ if (SwitchOK && isa<SwitchInst>(PT)) {
+ SwitchOK = false;
+ continue;
+ }
+ if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
+ OnlyUnwoundTo = false;
+ break;
+ }
+ }
+
+ if (OnlyUnwoundTo) {
+ // Only unwind edges lead to the landing pad. Remember the landing pad.
+ LandingPads.insert(LPad);
+ continue;
+ }
+
+ // At least one normal edge ends at the landing pad. Redirect the unwind
+ // edges to a new basic block which falls through into this one.
+
+ // Create the new basic block.
+ BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
+ LPad->getName() + "_unwind_edge");
+
+ // Insert it into the function right before the original landing pad.
+ LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
+
+ // Redirect unwind edges from the original landing pad to NewBB.
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
+ TerminatorInst *PT = (*PI++)->getTerminator();
+ if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
+ // Unwind to the new block.
+ PT->setSuccessor(1, NewBB);
+ }
+
+ // If there are any PHI nodes in LPad, we need to update them so that they
+ // merge incoming values from NewBB instead.
+ for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
+ PHINode *PN = cast<PHINode>(II);
+ pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
+
+ // Check to see if all of the values coming in via unwind edges are the
+ // same. If so, we don't need to create a new PHI node.
+ Value *InVal = PN->getIncomingValueForBlock(*PB);
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
+ InVal = 0;
+ break;
+ }
+ }
+
+ if (InVal == 0) {
+ // Different unwind edges have different values. Create a new PHI node
+ // in NewBB.
+ PHINode *NewPN = PHINode::Create(PN->getType(),
+ PN->getNumIncomingValues(),
+ PN->getName()+".unwind", NewBB);
+ // Add an entry for each unwind edge, using the value from the old PHI.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
+
+ // Now use this new PHI as the common incoming value for NewBB in PN.
+ InVal = NewPN;
+ }
+
+ // Revector exactly one entry in the PHI node to come from NewBB
+ // and delete all other entries that come from unwind edges. If
+ // there are both normal and unwind edges from the same predecessor,
+ // this leaves an entry for the normal edge.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ PN->removeIncomingValue(*PI);
+ PN->addIncoming(InVal, NewBB);
+ }
+
+ // Add a fallthrough from NewBB to the original landing pad.
+ BranchInst::Create(LPad, NewBB);
+
+ // Now update DominatorTree analysis information.
+ DT->splitBlock(NewBB);
+
+ // Remember the newly constructed landing pad. The original landing pad
+ // LPad is no longer a landing pad now that all unwind edges have been
+ // revectored to NewBB.
+ LandingPads.insert(NewBB);
+ ++NumLandingPadsSplit;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
+/// rethrowing any previously caught exception. This will crash horribly
+/// at runtime if there is no such exception: using unwind to throw a new
+/// exception is currently not supported.
+bool DwarfEHPrepare::LowerUnwindsAndResumes() {
+ SmallVector<Instruction*, 16> ResumeInsts;
+
+ for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) {
+ for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){
+ if (isa<UnwindInst>(bi))
+ ResumeInsts.push_back(bi);
+ else if (CallInst *call = dyn_cast<CallInst>(bi))
+ if (Function *fn = dyn_cast<Function>(call->getCalledValue()))
+ if (fn->getName() == "llvm.eh.resume")
+ ResumeInsts.push_back(bi);
+ }
+ }
+
+ if (ResumeInsts.empty()) return false;
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ LLVMContext &Ctx = ResumeInsts[0]->getContext();
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+ Type::getInt8PtrTy(Ctx), false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ bool Changed = false;
+
+ for (SmallVectorImpl<Instruction*>::iterator
+ I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) {
+ Instruction *RI = *I;
+
+ // Replace the resuming instruction with a call to _Unwind_Resume (or the
+ // appropriate target equivalent).
+
+ llvm::Value *ExnValue;
+ if (isa<UnwindInst>(RI))
+ ExnValue = CreateExceptionValueCall(RI->getParent());
+ else
+ ExnValue = cast<CallInst>(RI)->getArgOperand(0);
+
+ // Create the call...
+ CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // ...followed by an UnreachableInst, if it was an unwind.
+ // Calls to llvm.eh.resume are typically already followed by this.
+ if (isa<UnwindInst>(RI))
+ new UnreachableInst(RI->getContext(), RI);
+
+ if (isa<UnwindInst>(RI))
+ ++NumUnwindsLowered;
+ else
+ ++NumResumesLowered;
+
+ // Nuke the resume instruction.
+ RI->eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
+/// landing pads by replacing calls outside of landing pads with direct use of
+/// a register holding the appropriate value; this requires adding calls inside
+/// all landing pads to initialize the register. Also, move eh.exception calls
+/// inside landing pads to the start of the landing pad (optional, but may make
+/// things simpler for later passes).
+bool DwarfEHPrepare::MoveExceptionValueCalls() {
+ // If the eh.exception intrinsic is not declared in the module then there is
+ // nothing to do. Speed up compilation by checking for this common case.
+ if (!ExceptionValueIntrinsic &&
+ !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
+ return false;
+
+ bool Changed = false;
+
+ // Move calls to eh.exception that are inside a landing pad to the start of
+ // the landing pad.
+ for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI) {
+ BasicBlock *LP = *LI;
+ for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
+ II != IE;)
+ if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+ // Found a call to eh.exception.
+ if (!EI->use_empty()) {
+ // If there is already a call to eh.exception at the start of the
+ // landing pad, then get hold of it; otherwise create such a call.
+ Value *CallAtStart = CreateExceptionValueCall(LP);
+
+ // If the call was at the start of a landing pad then leave it alone.
+ if (EI == CallAtStart)
+ continue;
+ EI->replaceAllUsesWith(CallAtStart);
+ }
+ EI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ Changed = true;
+ }
+ }
+
+ // Look for calls to eh.exception that are not in a landing pad. If one is
+ // found, then a register that holds the exception value will be created in
+ // each landing pad, and the SSAUpdater will be used to compute the values
+ // returned by eh.exception calls outside of landing pads.
+ SSAUpdater SSA;
+
+ // Remember where we found the eh.exception call, to avoid rescanning earlier
+ // basic blocks which we already know contain no eh.exception calls.
+ bool FoundCallOutsideLandingPad = false;
+ Function::iterator BB = F->begin();
+ for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+ // Skip over landing pads.
+ if (LandingPads.count(BB))
+ continue;
+
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE; ++II)
+ if (isa<EHExceptionInst>(II)) {
+ SSA.Initialize(II->getType(), II->getName());
+ FoundCallOutsideLandingPad = true;
+ break;
+ }
+
+ if (FoundCallOutsideLandingPad)
+ break;
+ }
+
+ // If all calls to eh.exception are in landing pads then we are done.
+ if (!FoundCallOutsideLandingPad)
+ return Changed;
+
+ // Add a call to eh.exception at the start of each landing pad, and tell the
+ // SSAUpdater that this is the value produced by the landing pad.
+ for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI)
+ SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
+
+ // Now turn all calls to eh.exception that are not in a landing pad into a use
+ // of the appropriate register.
+ for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+ // Skip over landing pads.
+ if (LandingPads.count(BB))
+ continue;
+
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE;)
+ if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+ // Found a call to eh.exception, replace it with the value from any
+ // upstream landing pad(s).
+ EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
+ EI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ }
+ }
+
+ return true;
+}
+
+/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
+/// the start of the basic block (unless there already is one, in which case
+/// the existing call is returned).
+Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
+ Instruction *Start = BB->getFirstNonPHIOrDbg();
+ // Is this a call to eh.exception?
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
+ if (CI->getIntrinsicID() == Intrinsic::eh_exception)
+ // Reuse the existing call.
+ return Start;
+
+ // Find the eh.exception intrinsic if we didn't already.
+ if (!ExceptionValueIntrinsic)
+ ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::eh_exception);
+
+ // Create the call.
+ return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+}
+
+/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
+/// into calls to the appropriate _Unwind_Resume function.
+bool DwarfEHPrepare::InsertUnwindResumeCalls() {
+ bool UsesNewEH = false;
+ SmallVector<ResumeInst*, 16> Resumes;
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
+ Resumes.push_back(RI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(TI))
+ UsesNewEH = II->getUnwindDest()->isLandingPad();
+ }
+
+ if (Resumes.empty())
+ return UsesNewEH;
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ LLVMContext &Ctx = Resumes[0]->getContext();
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+ Type::getInt8PtrTy(Ctx), false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ // Create the basic block where the _Unwind_Resume call will live.
+ LLVMContext &Ctx = F->getContext();
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(),
+ "exn.obj", UnwindBB);
+
+ // Extract the exception object from the ResumeInst and add it to the PHI node
+ // that feeds the _Unwind_Resume call.
+ BasicBlock *UnwindBBDom = Resumes[0]->getParent();
+ for (SmallVectorImpl<ResumeInst*>::iterator
+ I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
+ ResumeInst *RI = *I;
+ BranchInst::Create(UnwindBB, RI->getParent());
+ ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0),
+ 0, "exn.obj", RI);
+ PN->addIncoming(ExnObj, RI->getParent());
+ UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom);
+ RI->eraseFromParent();
+ }
+
+ // Call the function.
+ CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+
+ // Now update DominatorTree analysis information.
+ DT->addNewBlock(UnwindBB, UnwindBBDom);
+ return true;
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+ bool Changed = false;
+
+ // Initialize internal state.
+ DT = &getAnalysis<DominatorTree>(); // FIXME: We won't need this with the new EH.
+ F = &Fn;
+
+ if (InsertUnwindResumeCalls()) {
+ // FIXME: The reset of this function can go once the new EH is done.
+ LandingPads.clear();
+ return true;
+ }
+
+ // Ensure that only unwind edges end at landing pads (a landing pad is a
+ // basic block where an invoke unwind edge ends).
+ Changed |= NormalizeLandingPads();
+
+ // Turn unwind instructions and eh.resume calls into libcalls.
+ Changed |= LowerUnwindsAndResumes();
+
+ // TODO: Move eh.selector calls to landing pads and combine them.
+
+ // Move eh.exception calls to landing pads.
+ Changed |= MoveExceptionValueCalls();
+
+ Changed |= HandleURoRInvokes();
+
+ LandingPads.clear();
+
+ return Changed;
+}
diff --git a/src/LLVM/lib/CodeGen/ELF.h b/src/LLVM/lib/CodeGen/ELF.h
index fb884c9..5b63468 100644
--- a/src/LLVM/lib/CodeGen/ELF.h
+++ b/src/LLVM/lib/CodeGen/ELF.h
@@ -23,7 +23,7 @@
#include "llvm/CodeGen/BinaryObject.h"
#include "llvm/CodeGen/MachineRelocation.h"
#include "llvm/Support/ELF.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
class GlobalValue;
@@ -173,7 +173,7 @@
unsigned Offset; // sh_offset - Offset from the file start
unsigned Size; // sh_size - The section size.
unsigned Link; // sh_link - Section header table index link.
- unsigned Info; // sh_info - Auxillary information.
+ unsigned Info; // sh_info - Auxiliary information.
unsigned Align; // sh_addralign - Alignment of section.
unsigned EntSize; // sh_entsize - Size of entries in the section e
diff --git a/src/LLVM/lib/CodeGen/ELFCodeEmitter.cpp b/src/LLVM/lib/CodeGen/ELFCodeEmitter.cpp
new file mode 100644
index 0000000..660424c
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/ELFCodeEmitter.cpp
@@ -0,0 +1,205 @@
+//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfce"
+
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+//===----------------------------------------------------------------------===//
+// ELFCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void ELFCodeEmitter::startFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "processing function: "
+ << MF.getFunction()->getName() << "\n");
+
+ // Get the ELF Section that this function belongs in.
+ ES = &EW.getTextSection(MF.getFunction());
+
+ // Set the desired binary object to be used by the code emitters
+ setBinaryObject(ES);
+
+ // Get the function alignment in bytes
+ unsigned Align = (1 << MF.getAlignment());
+
+ // The function must start on its required alignment
+ ES->emitAlignment(Align);
+
+ // Update the section alignment if needed.
+ ES->Align = std::max(ES->Align, Align);
+
+ // Record the function start offset
+ FnStartOff = ES->getCurrentPCOffset();
+
+ // Emit constant pool and jump tables to their appropriate sections.
+ // They need to be emitted before the function because in some targets
+ // the later may reference JT or CP entry address.
+ emitConstantPool(MF.getConstantPool());
+ if (MF.getJumpTableInfo())
+ emitJumpTables(MF.getJumpTableInfo());
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
+ // Add a symbol to represent the function.
+ const Function *F = MF.getFunction();
+ ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
+ EW.getGlobalELFVisibility(F));
+ FnSym->SectionIdx = ES->SectionIdx;
+ FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
+ EW.AddPendingGlobalSymbol(F, true);
+
+ // Offset from start of Section
+ FnSym->Value = FnStartOff;
+
+ if (!F->hasPrivateLinkage())
+ EW.SymbolList.push_back(FnSym);
+
+ // Patch up Jump Table Section relocations to use the real MBBs offsets
+ // now that the MBB label offsets inside the function are known.
+ if (MF.getJumpTableInfo()) {
+ ELFSection &JTSection = EW.getJumpTableSection();
+ for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
+ MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
+ MachineRelocation &MR = *MRI;
+ uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
+ MR.setResultPointer((void*)MBBOffset);
+ MR.setConstantVal(ES->SectionIdx);
+ JTSection.addRelocation(MR);
+ }
+ }
+
+ // If we have emitted any relocations to function-specific objects such as
+ // basic blocks, constant pools entries, or jump tables, record their
+ // addresses now so that we can rewrite them with the correct addresses later
+ for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = Relocations[i];
+ intptr_t Addr;
+ if (MR.isGlobalValue()) {
+ EW.AddPendingGlobalSymbol(MR.getGlobalValue());
+ } else if (MR.isExternalSymbol()) {
+ EW.AddPendingExternalSymbol(MR.getExternalSymbol());
+ } else if (MR.isBasicBlock()) {
+ Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
+ MR.setConstantVal(ES->SectionIdx);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isConstantPoolIndex()) {
+ Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+ MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isJumpTableIndex()) {
+ ELFSection &JTSection = EW.getJumpTableSection();
+ Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+ MR.setConstantVal(JTSection.SectionIdx);
+ MR.setResultPointer((void*)Addr);
+ } else {
+ llvm_unreachable("Unhandled relocation type");
+ }
+ ES->addRelocation(MR);
+ }
+
+ // Clear per-function data structures.
+ JTRelocations.clear();
+ Relocations.clear();
+ CPLocations.clear();
+ CPSections.clear();
+ JTLocations.clear();
+ MBBLocations.clear();
+ return false;
+}
+
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in and emit the constant
+void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // TODO: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for elf constant pools!");
+
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = CP[i];
+
+ // Record the constant pool location and the section index
+ ELFSection &CstPool = EW.getConstantPoolSection(CPE);
+ CPLocations.push_back(CstPool.size());
+ CPSections.push_back(CstPool.SectionIdx);
+
+ if (CPE.isMachineConstantPoolEntry())
+ assert(0 && "CPE.isMachineConstantPoolEntry not supported yet");
+
+ // Emit the constant to constant pool section
+ EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
+ }
+}
+
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // FIXME: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for elf jump tables!");
+
+ const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
+ unsigned EntrySize = 4; //MJTI->getEntrySize();
+
+ // Get the ELF Section to emit the jump table
+ ELFSection &JTSection = EW.getJumpTableSection();
+
+ // For each JT, record its offset from the start of the section
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+
+ // Record JT 'i' offset in the JT section
+ JTLocations.push_back(JTSection.size());
+
+ // Each MBB entry in the Jump table section has a relocation entry
+ // against the current text section.
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+ unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
+ MachineRelocation MR =
+ MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
+
+ // Add the relocation to the Jump Table section
+ JTRelocations.push_back(MR);
+
+ // Output placeholder for MBB in the JT section
+ for (unsigned s=0; s < EntrySize; ++s)
+ JTSection.emitByte(0);
+ }
+ }
+}
+
+} // end namespace llvm
diff --git a/src/LLVM/lib/CodeGen/ELFCodeEmitter.h b/src/LLVM/lib/CodeGen/ELFCodeEmitter.h
new file mode 100644
index 0000000..8671c67
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/ELFCodeEmitter.h
@@ -0,0 +1,78 @@
+//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFCODEEMITTER_H
+#define ELFCODEEMITTER_H
+
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include <vector>
+
+namespace llvm {
+ class ELFWriter;
+ class ELFSection;
+
+ /// ELFCodeEmitter - This class is used by the ELFWriter to
+ /// emit the code for functions to the ELF file.
+ class ELFCodeEmitter : public ObjectCodeEmitter {
+ ELFWriter &EW;
+
+ /// Target machine description
+ TargetMachine &TM;
+
+ /// Section containing code for functions
+ ELFSection *ES;
+
+ /// Relocations - Record relocations needed by the current function
+ std::vector<MachineRelocation> Relocations;
+
+ /// JTRelocations - Record relocations needed by the relocation
+ /// section.
+ std::vector<MachineRelocation> JTRelocations;
+
+ /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
+ uintptr_t FnStartOff;
+ public:
+ explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
+
+ /// addRelocation - Register new relocations for this function
+ void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ /// emitConstantPool - For each constant pool entry, figure out which
+ /// section the constant should live in and emit data to it
+ void emitConstantPool(MachineConstantPool *MCP);
+
+ /// emitJumpTables - Emit all the jump tables for a given jump table
+ /// info and record them to the appropriate section.
+ void emitJumpTables(MachineJumpTableInfo *MJTI);
+
+ void startFunction(MachineFunction &F);
+ bool finishFunction(MachineFunction &F);
+
+ /// emitLabel - Emits a label
+ virtual void emitLabel(MCSymbol *Label) {
+ assert(0 && "emitLabel not implemented");
+ }
+
+ /// getLabelAddress - Return the address of the specified LabelID,
+ /// only usable after the LabelID has been emitted.
+ virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
+ assert(0 && "getLabelAddress not implemented");
+ return 0;
+ }
+
+ virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
+
+}; // end class ELFCodeEmitter
+
+} // end namespace llvm
+
+#endif
+
diff --git a/src/LLVM/lib/CodeGen/ELFWriter.cpp b/src/LLVM/lib/CodeGen/ELFWriter.cpp
new file mode 100644
index 0000000..f2c2185
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/ELFWriter.cpp
@@ -0,0 +1,1105 @@
+//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent ELF writer. This file writes out
+// the ELF file in the following order:
+//
+// #1. ELF Header
+// #2. '.text' section
+// #3. '.data' section
+// #4. '.bss' section (conceptual position in file)
+// ...
+// #X. '.shstrtab' section
+// #Y. Section Table
+//
+// The entries in the section table are laid out as:
+// #0. Null entry [required]
+// #1. ".text" entry - the program code
+// #2. ".data" entry - global variables with initializers. [ if needed ]
+// #3. ".bss" entry - global variables without initializers. [ if needed ]
+// ...
+// #N. ".shstrtab" entry - String table for the section names.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfwriter"
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+char ELFWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// ELFWriter Implementation
+//===----------------------------------------------------------------------===//
+
+ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
+ : MachineFunctionPass(ID), O(o), TM(tm),
+ OutContext(*new MCContext(*TM.getMCAsmInfo(), *TM.getRegisterInfo(),
+ &TM.getTargetLowering()->getObjFileLowering())),
+ TLOF(TM.getTargetLowering()->getObjFileLowering()),
+ is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
+ isLittleEndian(TM.getTargetData()->isLittleEndian()),
+ ElfHdr(isLittleEndian, is64Bit) {
+
+ MAI = TM.getMCAsmInfo();
+ TEW = TM.getELFWriterInfo();
+
+ // Create the object code emitter object for this target.
+ ElfCE = new ELFCodeEmitter(*this);
+
+ // Initial number of sections
+ NumSections = 0;
+}
+
+ELFWriter::~ELFWriter() {
+ delete ElfCE;
+ delete &OutContext;
+
+ while(!SymbolList.empty()) {
+ delete SymbolList.back();
+ SymbolList.pop_back();
+ }
+
+ while(!PrivateSyms.empty()) {
+ delete PrivateSyms.back();
+ PrivateSyms.pop_back();
+ }
+
+ while(!SectionList.empty()) {
+ delete SectionList.back();
+ SectionList.pop_back();
+ }
+
+ // Release the name mangler object.
+ delete Mang; Mang = 0;
+}
+
+// doInitialization - Emit the file header and all of the global variables for
+// the module to the ELF file.
+bool ELFWriter::doInitialization(Module &M) {
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
+
+ Mang = new Mangler(OutContext, *TM.getTargetData());
+
+ // ELF Header
+ // ----------
+ // Fields e_shnum e_shstrndx are only known after all section have
+ // been emitted. They locations in the ouput buffer are recorded so
+ // to be patched up later.
+ //
+ // Note
+ // ----
+ // emitWord method behaves differently for ELF32 and ELF64, writing
+ // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
+
+ ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
+ ElfHdr.emitByte('E'); // e_ident[EI_MAG1]
+ ElfHdr.emitByte('L'); // e_ident[EI_MAG2]
+ ElfHdr.emitByte('F'); // e_ident[EI_MAG3]
+
+ ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
+ ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA]
+ ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION]
+ ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD]
+
+ ElfHdr.emitWord16(ELF::ET_REL); // e_type
+ ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
+ ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version
+ ElfHdr.emitWord(0); // e_entry, no entry point in .o file
+ ElfHdr.emitWord(0); // e_phoff, no program header for .o
+ ELFHdr_e_shoff_Offset = ElfHdr.size();
+ ElfHdr.emitWord(0); // e_shoff = sec hdr table off in bytes
+ ElfHdr.emitWord32(TEW->getEFlags()); // e_flags = whatever the target wants
+ ElfHdr.emitWord16(TEW->getHdrSize()); // e_ehsize = ELF header size
+ ElfHdr.emitWord16(0); // e_phentsize = prog header entry size
+ ElfHdr.emitWord16(0); // e_phnum = # prog header entries = 0
+
+ // e_shentsize = Section header entry size
+ ElfHdr.emitWord16(TEW->getSHdrSize());
+
+ // e_shnum = # of section header ents
+ ELFHdr_e_shnum_Offset = ElfHdr.size();
+ ElfHdr.emitWord16(0); // Placeholder
+
+ // e_shstrndx = Section # of '.shstrtab'
+ ELFHdr_e_shstrndx_Offset = ElfHdr.size();
+ ElfHdr.emitWord16(0); // Placeholder
+
+ // Add the null section, which is required to be first in the file.
+ getNullSection();
+
+ // The first entry in the symtab is the null symbol and the second
+ // is a local symbol containing the module/file name
+ SymbolList.push_back(new ELFSym());
+ SymbolList.push_back(ELFSym::getFileSym());
+
+ return false;
+}
+
+// AddPendingGlobalSymbol - Add a global to be processed and to
+// the global symbol lookup, use a zero index because the table
+// index will be determined later.
+void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV,
+ bool AddToLookup /* = false */) {
+ PendingGlobals.insert(GV);
+ if (AddToLookup)
+ GblSymLookup[GV] = 0;
+}
+
+// AddPendingExternalSymbol - Add the external to be processed
+// and to the external symbol lookup, use a zero index because
+// the symbol table index will be determined later.
+void ELFWriter::AddPendingExternalSymbol(const char *External) {
+ PendingExternals.insert(External);
+ ExtSymLookup[External] = 0;
+}
+
+ELFSection &ELFWriter::getDataSection() {
+ const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
+ return getSection(Data->getSectionName(), Data->getType(),
+ Data->getFlags(), 4);
+}
+
+ELFSection &ELFWriter::getBSSSection() {
+ const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
+ return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
+}
+
+// getCtorSection - Get the static constructor section
+ELFSection &ELFWriter::getCtorSection() {
+ const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
+ return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags());
+}
+
+// getDtorSection - Get the static destructor section
+ELFSection &ELFWriter::getDtorSection() {
+ const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
+ return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
+}
+
+// getTextSection - Get the text section for the specified function
+ELFSection &ELFWriter::getTextSection(const Function *F) {
+ const MCSectionELF *Text =
+ (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
+ return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
+}
+
+// getJumpTableSection - Get a read only section for constants when
+// emitting jump tables. TODO: add PIC support
+ELFSection &ELFWriter::getJumpTableSection() {
+ const MCSectionELF *JT =
+ (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
+ return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
+ TM.getTargetData()->getPointerABIAlignment());
+}
+
+// getConstantPoolSection - Get a constant pool section based on the machine
+// constant pool entry type and relocation info.
+ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
+ SectionKind Kind;
+ switch (CPE.getRelocationInfo()) {
+ default: llvm_unreachable("Unknown section kind");
+ case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+ case 4: Kind = SectionKind::getMergeableConst4(); break;
+ case 8: Kind = SectionKind::getMergeableConst8(); break;
+ case 16: Kind = SectionKind::getMergeableConst16(); break;
+ default: Kind = SectionKind::getMergeableConst(); break;
+ }
+ }
+
+ const MCSectionELF *CPSect =
+ (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
+ return getSection(CPSect->getSectionName(), CPSect->getType(),
+ CPSect->getFlags(), CPE.getAlignment());
+}
+
+// getRelocSection - Return the relocation section of section 'S'. 'RelA'
+// is true if the relocation section contains entries with addends.
+ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
+ unsigned SectionType = TEW->hasRelocationAddend() ?
+ ELF::SHT_RELA : ELF::SHT_REL;
+
+ std::string SectionName(".rel");
+ if (TEW->hasRelocationAddend())
+ SectionName.append("a");
+ SectionName.append(S.getName());
+
+ return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
+}
+
+// getGlobalELFVisibility - Returns the ELF specific visibility type
+unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
+ switch (GV->getVisibility()) {
+ default:
+ llvm_unreachable("unknown visibility type");
+ case GlobalValue::DefaultVisibility:
+ return ELF::STV_DEFAULT;
+ case GlobalValue::HiddenVisibility:
+ return ELF::STV_HIDDEN;
+ case GlobalValue::ProtectedVisibility:
+ return ELF::STV_PROTECTED;
+ }
+ return 0;
+}
+
+// getGlobalELFBinding - Returns the ELF specific binding type
+unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
+ if (GV->hasInternalLinkage())
+ return ELF::STB_LOCAL;
+
+ if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
+ return ELF::STB_WEAK;
+
+ return ELF::STB_GLOBAL;
+}
+
+// getGlobalELFType - Returns the ELF specific type for a global
+unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
+ if (GV->isDeclaration())
+ return ELF::STT_NOTYPE;
+
+ if (isa<Function>(GV))
+ return ELF::STT_FUNC;
+
+ return ELF::STT_OBJECT;
+}
+
+// IsELFUndefSym - True if the global value must be marked as a symbol
+// which points to a SHN_UNDEF section. This means that the symbol has
+// no definition on the module.
+static bool IsELFUndefSym(const GlobalValue *GV) {
+ return GV->isDeclaration() || (isa<Function>(GV));
+}
+
+// AddToSymbolList - Update the symbol lookup and If the symbol is
+// private add it to PrivateSyms list, otherwise to SymbolList.
+void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
+ assert(GblSym->isGlobalValue() && "Symbol must be a global value");
+
+ const GlobalValue *GV = GblSym->getGlobalValue();
+ if (GV->hasPrivateLinkage()) {
+ // For a private symbols, keep track of the index inside
+ // the private list since it will never go to the symbol
+ // table and won't be patched up later.
+ PrivateSyms.push_back(GblSym);
+ GblSymLookup[GV] = PrivateSyms.size()-1;
+ } else {
+ // Non private symbol are left with zero indices until
+ // they are patched up during the symbol table emition
+ // (where the indicies are created).
+ SymbolList.push_back(GblSym);
+ GblSymLookup[GV] = 0;
+ }
+}
+
+/// HasCommonSymbols - True if this section holds common symbols, this is
+/// indicated on the ELF object file by a symbol with SHN_COMMON section
+/// header index.
+static bool HasCommonSymbols(const MCSectionELF &S) {
+ // FIXME: this is wrong, a common symbol can be in .data for example.
+ if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
+ return true;
+
+ return false;
+}
+
+
+// EmitGlobal - Choose the right section for global and emit it
+void ELFWriter::EmitGlobal(const GlobalValue *GV) {
+
+ // Check if the referenced symbol is already emitted
+ if (GblSymLookup.find(GV) != GblSymLookup.end())
+ return;
+
+ // Handle ELF Bind, Visibility and Type for the current symbol
+ unsigned SymBind = getGlobalELFBinding(GV);
+ unsigned SymType = getGlobalELFType(GV);
+ bool IsUndefSym = IsELFUndefSym(GV);
+
+ ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
+ : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
+
+ if (!IsUndefSym) {
+ assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+ // Handle special llvm globals
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ // Get the ELF section where this global belongs from TLOF
+ const MCSectionELF *S =
+ (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
+ ELFSection &ES =
+ getSection(S->getSectionName(), S->getType(), S->getFlags());
+ SectionKind Kind = S->getKind();
+
+ // The symbol align should update the section alignment if needed
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = TD->getPreferredAlignment(GVar);
+ unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
+ GblSym->Size = Size;
+
+ if (HasCommonSymbols(*S)) { // Symbol must go to a common section
+ GblSym->SectionIdx = ELF::SHN_COMMON;
+
+ // A new linkonce section is created for each global in the
+ // common section, the default alignment is 1 and the symbol
+ // value contains its alignment.
+ ES.Align = 1;
+ GblSym->Value = Align;
+
+ } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
+ GblSym->SectionIdx = ES.SectionIdx;
+
+ // Update the size with alignment and the next object can
+ // start in the right offset in the section
+ if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
+ ES.Align = std::max(ES.Align, Align);
+
+ // GblSym->Value should contain the virtual offset inside the section.
+ // Virtual because the BSS space is not allocated on ELF objects
+ GblSym->Value = ES.Size;
+ ES.Size += Size;
+
+ } else { // The symbol must go to some kind of data section
+ GblSym->SectionIdx = ES.SectionIdx;
+
+ // GblSym->Value should contain the symbol offset inside the section,
+ // and all symbols should start on their required alignment boundary
+ ES.Align = std::max(ES.Align, Align);
+ ES.emitAlignment(Align);
+ GblSym->Value = ES.size();
+
+ // Emit the global to the data section 'ES'
+ EmitGlobalConstant(GVar->getInitializer(), ES);
+ }
+ }
+
+ AddToSymbolList(GblSym);
+}
+
+void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
+ ELFSection &GblS) {
+
+ // Print the fields in successive locations. Pad to align if needed!
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CVS->getType());
+ const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+ uint64_t sizeSoFar = 0;
+ for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+ const Constant* field = CVS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
+ uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
+ - cvsLayout->getElementOffset(i)) - fieldSize;
+ sizeSoFar += fieldSize + padSize;
+
+ // Now print the actual field value.
+ EmitGlobalConstant(field, GblS);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ GblS.emitZeros(padSize);
+ }
+ assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CV->getType());
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+ for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstant(CVA->getOperand(i), GblS);
+ return;
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ GblS.emitZeros(Size);
+ return;
+ } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+ EmitGlobalConstantStruct(CVS, GblS);
+ return;
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ APInt Val = CFP->getValueAPF().bitcastToAPInt();
+ if (CFP->getType()->isDoubleTy())
+ GblS.emitWord64(Val.getZExtValue());
+ else if (CFP->getType()->isFloatTy())
+ GblS.emitWord32(Val.getZExtValue());
+ else if (CFP->getType()->isX86_FP80Ty()) {
+ unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
+ TD->getTypeStoreSize(CFP->getType());
+ GblS.emitWordFP80(Val.getRawData(), PadSize);
+ } else if (CFP->getType()->isPPC_FP128Ty())
+ llvm_unreachable("PPC_FP128Ty global emission not implemented");
+ return;
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ if (Size == 1)
+ GblS.emitByte(CI->getZExtValue());
+ else if (Size == 2)
+ GblS.emitWord16(CI->getZExtValue());
+ else if (Size == 4)
+ GblS.emitWord32(CI->getZExtValue());
+ else
+ EmitGlobalConstantLargeInt(CI, GblS);
+ return;
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ VectorType *PTy = CP->getType();
+ for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+ EmitGlobalConstant(CP->getOperand(I), GblS);
+ return;
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Resolve a constant expression which returns a (Constant, Offset)
+ // pair. If 'Res.first' is a GlobalValue, emit a relocation with
+ // the offset 'Res.second', otherwise emit a global constant like
+ // it is always done for not contant expression types.
+ CstExprResTy Res = ResolveConstantExpr(CE);
+ const Constant *Op = Res.first;
+
+ if (isa<GlobalValue>(Op))
+ EmitGlobalDataRelocation(cast<const GlobalValue>(Op),
+ TD->getTypeAllocSize(Op->getType()),
+ GblS, Res.second);
+ else
+ EmitGlobalConstant(Op, GblS);
+
+ return;
+ } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
+ // Fill the data entry with zeros or emit a relocation entry
+ if (isa<ConstantPointerNull>(CV))
+ GblS.emitZeros(Size);
+ else
+ EmitGlobalDataRelocation(cast<const GlobalValue>(CV),
+ Size, GblS);
+ return;
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ // This is a constant address for a global variable or function and
+ // therefore must be referenced using a relocation entry.
+ EmitGlobalDataRelocation(GV, Size, GblS);
+ return;
+ }
+
+ std::string msg;
+ raw_string_ostream ErrorMsg(msg);
+ ErrorMsg << "Constant unimp for type: " << *CV->getType();
+ report_fatal_error(ErrorMsg.str());
+}
+
+// ResolveConstantExpr - Resolve the constant expression until it stop
+// yielding other constant expressions.
+CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
+ const TargetData *TD = TM.getTargetData();
+
+ // There ins't constant expression inside others anymore
+ if (!isa<ConstantExpr>(CV))
+ return std::make_pair(CV, 0);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ return ResolveConstantExpr(CE->getOperand(0));
+
+ case Instruction::GetElementPtr: {
+ const Constant *ptrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+ int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), idxVec);
+ return std::make_pair(ptrVal, Offset);
+ }
+ case Instruction::IntToPtr: {
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+ false/*ZExt*/);
+ return ResolveConstantExpr(Op);
+ }
+ case Instruction::PtrToInt: {
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot greater or equal to the size of the pointer.
+ if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
+ return ResolveConstantExpr(Op);
+
+ llvm_unreachable("Integer size less then pointer size");
+ }
+ case Instruction::Add:
+ case Instruction::Sub: {
+ // Only handle cases where there's a constant expression with GlobalValue
+ // as first operand and ConstantInt as second, which are the cases we can
+ // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
+ // 1) Instruction::Add => (global) + CstInt
+ // 2) Instruction::Sub => (global) + -CstInt
+ const Constant *Op0 = CE->getOperand(0);
+ const Constant *Op1 = CE->getOperand(1);
+ assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
+
+ CstExprResTy Res = ResolveConstantExpr(Op0);
+ assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
+
+ const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
+ switch (CE->getOpcode()) {
+ case Instruction::Add:
+ return std::make_pair(Res.first, RHS.getSExtValue());
+ case Instruction::Sub:
+ return std::make_pair(Res.first, (-RHS).getSExtValue());
+ }
+ }
+ }
+
+ report_fatal_error(CE->getOpcodeName() +
+ StringRef(": Unsupported ConstantExpr type"));
+
+ return std::make_pair(CV, 0); // silence warning
+}
+
+void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+ ELFSection &GblS, int64_t Offset) {
+ // Create the relocation entry for the global value
+ MachineRelocation MR =
+ MachineRelocation::getGV(GblS.getCurrentPCOffset(),
+ TEW->getAbsoluteLabelMachineRelTy(),
+ const_cast<GlobalValue*>(GV),
+ Offset);
+
+ // Fill the data entry with zeros
+ GblS.emitZeros(Size);
+
+ // Add the relocation entry for the current data section
+ GblS.addRelocation(MR);
+}
+
+void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
+ ELFSection &S) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned BitWidth = CI->getBitWidth();
+ assert(isPowerOf2_32(BitWidth) &&
+ "Non-power-of-2-sized integers not handled!");
+
+ const uint64_t *RawData = CI->getValue().getRawData();
+ uint64_t Val = 0;
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
+ S.emitWord64(Val);
+ }
+}
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used")
+ llvm_unreachable("not implemented yet");
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = TD->getPointerPrefAlignment();
+ if (GV->getName() == "llvm.global_ctors") {
+ ELFSection &Ctor = getCtorSection();
+ Ctor.emitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer(), Ctor);
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ ELFSection &Dtor = getDtorSection();
+ Dtor.emitAlignment(Align);
+ EmitXXStructorList(GV->getInitializer(), Dtor);
+ return true;
+ }
+
+ return false;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the
+/// function pointers, ignoring the init priority.
+void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
+ // Should be an array of '{ i32, void ()* }' structs. The first value is the
+ // init priority, which we ignore.
+ if (List->isNullValue()) return;
+ const ConstantArray *InitList = cast<ConstantArray>(List);
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ if (InitList->getOperand(i)->isNullValue())
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
+
+ if (CS->getOperand(1)->isNullValue())
+ continue;
+
+ // Emit the function pointer.
+ EmitGlobalConstant(CS->getOperand(1), Xtor);
+ }
+}
+
+bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
+ // Nothing to do here, this is all done through the ElfCE object above.
+ return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the ELF file to 'O'.
+bool ELFWriter::doFinalization(Module &M) {
+ // Emit .data section placeholder
+ getDataSection();
+
+ // Emit .bss section placeholder
+ getBSSSection();
+
+ // Build and emit data, bss and "common" sections.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobal(I);
+
+ // Emit all pending globals
+ for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
+ I != E; ++I)
+ EmitGlobal(*I);
+
+ // Emit all pending externals
+ for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
+ I != E; ++I)
+ SymbolList.push_back(ELFSym::getExtSym(*I));
+
+ // Emit a symbol for each section created until now, skip null section
+ for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
+ ELFSection &ES = *SectionList[i];
+ ELFSym *SectionSym = ELFSym::getSectionSym();
+ SectionSym->SectionIdx = ES.SectionIdx;
+ SymbolList.push_back(SectionSym);
+ ES.Sym = SymbolList.back();
+ }
+
+ // Emit string table
+ EmitStringTable(M.getModuleIdentifier());
+
+ // Emit the symbol table now, if non-empty.
+ EmitSymbolTable();
+
+ // Emit the relocation sections.
+ EmitRelocations();
+
+ // Emit the sections string table.
+ EmitSectionTableStringTable();
+
+ // Dump the sections and section table to the .o file.
+ OutputSectionsAndSectionTable();
+
+ return false;
+}
+
+// RelocateField - Patch relocatable field with 'Offset' in 'BO'
+// using a 'Value' of known 'Size'
+void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
+ int64_t Value, unsigned Size) {
+ if (Size == 32)
+ BO.fixWord32(Value, Offset);
+ else if (Size == 64)
+ BO.fixWord64(Value, Offset);
+ else
+ llvm_unreachable("don't know howto patch relocatable field");
+}
+
+/// EmitRelocations - Emit relocations
+void ELFWriter::EmitRelocations() {
+
+ // True if the target uses the relocation entry to hold the addend,
+ // otherwise the addend is written directly to the relocatable field.
+ bool HasRelA = TEW->hasRelocationAddend();
+
+ // Create Relocation sections for each section which needs it.
+ for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
+ ELFSection &S = *SectionList[i];
+
+ // This section does not have relocations
+ if (!S.hasRelocations()) continue;
+ ELFSection &RelSec = getRelocSection(S);
+
+ // 'Link' - Section hdr idx of the associated symbol table
+ // 'Info' - Section hdr idx of the section to which the relocation applies
+ ELFSection &SymTab = getSymbolTableSection();
+ RelSec.Link = SymTab.SectionIdx;
+ RelSec.Info = S.SectionIdx;
+ RelSec.EntSize = TEW->getRelocationEntrySize();
+
+ // Get the relocations from Section
+ std::vector<MachineRelocation> Relos = S.getRelocations();
+ for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
+ MRE = Relos.end(); MRI != MRE; ++MRI) {
+ MachineRelocation &MR = *MRI;
+
+ // Relocatable field offset from the section start
+ unsigned RelOffset = MR.getMachineCodeOffset();
+
+ // Symbol index in the symbol table
+ unsigned SymIdx = 0;
+
+ // Target specific relocation field type and size
+ unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
+ unsigned RelTySize = TEW->getRelocationTySize(RelType);
+ int64_t Addend = 0;
+
+ // There are several machine relocations types, and each one of
+ // them needs a different approach to retrieve the symbol table index.
+ if (MR.isGlobalValue()) {
+ const GlobalValue *G = MR.getGlobalValue();
+ int64_t GlobalOffset = MR.getConstantVal();
+ SymIdx = GblSymLookup[G];
+ if (G->hasPrivateLinkage()) {
+ // If the target uses a section offset in the relocation:
+ // SymIdx + Addend = section sym for global + section offset
+ unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
+ Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
+ SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+ } else {
+ Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
+ }
+ } else if (MR.isExternalSymbol()) {
+ const char *ExtSym = MR.getExternalSymbol();
+ SymIdx = ExtSymLookup[ExtSym];
+ Addend = TEW->getDefaultAddendForRelTy(RelType);
+ } else {
+ // Get the symbol index for the section symbol
+ unsigned SectionIdx = MR.getConstantVal();
+ SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+
+ // The symbol offset inside the section
+ int64_t SymOffset = (int64_t)MR.getResultPointer();
+
+ // For pc relative relocations where symbols are defined in the same
+ // section they are referenced, ignore the relocation entry and patch
+ // the relocatable field with the symbol offset directly.
+ if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
+ int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
+ RelocateField(S, RelOffset, Value, RelTySize);
+ continue;
+ }
+
+ Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
+ }
+
+ // The target without addend on the relocation symbol must be
+ // patched in the relocation place itself to contain the addend
+ // otherwise write zeros to make sure there is no garbage there
+ RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
+
+ // Get the relocation entry and emit to the relocation section
+ ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
+ EmitRelocation(RelSec, Rel, HasRelA);
+ }
+ }
+}
+
+/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel'
+void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel,
+ bool HasRelA) {
+ RelSec.emitWord(Rel.getOffset());
+ RelSec.emitWord(Rel.getInfo(is64Bit));
+ if (HasRelA)
+ RelSec.emitWord(Rel.getAddend());
+}
+
+/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
+void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
+ if (is64Bit) {
+ SymbolTable.emitWord32(Sym.NameIdx);
+ SymbolTable.emitByte(Sym.Info);
+ SymbolTable.emitByte(Sym.Other);
+ SymbolTable.emitWord16(Sym.SectionIdx);
+ SymbolTable.emitWord64(Sym.Value);
+ SymbolTable.emitWord64(Sym.Size);
+ } else {
+ SymbolTable.emitWord32(Sym.NameIdx);
+ SymbolTable.emitWord32(Sym.Value);
+ SymbolTable.emitWord32(Sym.Size);
+ SymbolTable.emitByte(Sym.Info);
+ SymbolTable.emitByte(Sym.Other);
+ SymbolTable.emitWord16(Sym.SectionIdx);
+ }
+}
+
+/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
+/// Section Header Table
+void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
+ const ELFSection &SHdr) {
+ SHdrTab.emitWord32(SHdr.NameIdx);
+ SHdrTab.emitWord32(SHdr.Type);
+ if (is64Bit) {
+ SHdrTab.emitWord64(SHdr.Flags);
+ SHdrTab.emitWord(SHdr.Addr);
+ SHdrTab.emitWord(SHdr.Offset);
+ SHdrTab.emitWord64(SHdr.Size);
+ SHdrTab.emitWord32(SHdr.Link);
+ SHdrTab.emitWord32(SHdr.Info);
+ SHdrTab.emitWord64(SHdr.Align);
+ SHdrTab.emitWord64(SHdr.EntSize);
+ } else {
+ SHdrTab.emitWord32(SHdr.Flags);
+ SHdrTab.emitWord(SHdr.Addr);
+ SHdrTab.emitWord(SHdr.Offset);
+ SHdrTab.emitWord32(SHdr.Size);
+ SHdrTab.emitWord32(SHdr.Link);
+ SHdrTab.emitWord32(SHdr.Info);
+ SHdrTab.emitWord32(SHdr.Align);
+ SHdrTab.emitWord32(SHdr.EntSize);
+ }
+}
+
+/// EmitStringTable - If the current symbol table is non-empty, emit the string
+/// table for it
+void ELFWriter::EmitStringTable(const std::string &ModuleName) {
+ if (!SymbolList.size()) return; // Empty symbol table.
+ ELFSection &StrTab = getStringTableSection();
+
+ // Set the zero'th symbol to a null byte, as required.
+ StrTab.emitByte(0);
+
+ // Walk on the symbol list and write symbol names into the string table.
+ unsigned Index = 1;
+ for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+ ELFSym &Sym = *(*I);
+
+ std::string Name;
+ if (Sym.isGlobalValue()) {
+ SmallString<40> NameStr;
+ Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false);
+ Name.append(NameStr.begin(), NameStr.end());
+ } else if (Sym.isExternalSym())
+ Name.append(Sym.getExternalSymbol());
+ else if (Sym.isFileType())
+ Name.append(ModuleName);
+
+ if (Name.empty()) {
+ Sym.NameIdx = 0;
+ } else {
+ Sym.NameIdx = Index;
+ StrTab.emitString(Name);
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += Name.size()+1;
+ }
+ }
+ assert(Index == StrTab.size());
+ StrTab.Size = Index;
+}
+
+// SortSymbols - On the symbol table local symbols must come before
+// all other symbols with non-local bindings. The return value is
+// the position of the first non local symbol.
+unsigned ELFWriter::SortSymbols() {
+ unsigned FirstNonLocalSymbol;
+ std::vector<ELFSym*> LocalSyms, OtherSyms;
+
+ for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+ if ((*I)->isLocalBind())
+ LocalSyms.push_back(*I);
+ else
+ OtherSyms.push_back(*I);
+ }
+ SymbolList.clear();
+ FirstNonLocalSymbol = LocalSyms.size();
+
+ for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
+ SymbolList.push_back(LocalSyms[i]);
+
+ for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
+ SymbolList.push_back(*I);
+
+ LocalSyms.clear();
+ OtherSyms.clear();
+
+ return FirstNonLocalSymbol;
+}
+
+/// EmitSymbolTable - Emit the symbol table itself.
+void ELFWriter::EmitSymbolTable() {
+ if (!SymbolList.size()) return; // Empty symbol table.
+
+ // Now that we have emitted the string table and know the offset into the
+ // string table of each symbol, emit the symbol table itself.
+ ELFSection &SymTab = getSymbolTableSection();
+ SymTab.Align = TEW->getPrefELFAlignment();
+
+ // Section Index of .strtab.
+ SymTab.Link = getStringTableSection().SectionIdx;
+
+ // Size of each symtab entry.
+ SymTab.EntSize = TEW->getSymTabEntrySize();
+
+ // Reorder the symbol table with local symbols first!
+ unsigned FirstNonLocalSymbol = SortSymbols();
+
+ // Emit all the symbols to the symbol table.
+ for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
+ ELFSym &Sym = *SymbolList[i];
+
+ // Emit symbol to the symbol table
+ EmitSymbol(SymTab, Sym);
+
+ // Record the symbol table index for each symbol
+ if (Sym.isGlobalValue())
+ GblSymLookup[Sym.getGlobalValue()] = i;
+ else if (Sym.isExternalSym())
+ ExtSymLookup[Sym.getExternalSymbol()] = i;
+
+ // Keep track on the symbol index into the symbol table
+ Sym.SymTabIdx = i;
+ }
+
+ // One greater than the symbol table index of the last local symbol
+ SymTab.Info = FirstNonLocalSymbol;
+ SymTab.Size = SymTab.size();
+}
+
+/// EmitSectionTableStringTable - This method adds and emits a section for the
+/// ELF Section Table string table: the string table that holds all of the
+/// section names.
+void ELFWriter::EmitSectionTableStringTable() {
+ // First step: add the section for the string table to the list of sections:
+ ELFSection &SHStrTab = getSectionHeaderStringTableSection();
+
+ // Now that we know which section number is the .shstrtab section, update the
+ // e_shstrndx entry in the ELF header.
+ ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
+
+ // Set the NameIdx of each section in the string table and emit the bytes for
+ // the string table.
+ unsigned Index = 0;
+
+ for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+ ELFSection &S = *(*I);
+ // Set the index into the table. Note if we have lots of entries with
+ // common suffixes, we could memoize them here if we cared.
+ S.NameIdx = Index;
+ SHStrTab.emitString(S.getName());
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += S.getName().size()+1;
+ }
+
+ // Set the size of .shstrtab now that we know what it is.
+ assert(Index == SHStrTab.size());
+ SHStrTab.Size = Index;
+}
+
+/// OutputSectionsAndSectionTable - Now that we have constructed the file header
+/// and all of the sections, emit these to the ostream destination and emit the
+/// SectionTable.
+void ELFWriter::OutputSectionsAndSectionTable() {
+ // Pass #1: Compute the file offset for each section.
+ size_t FileOff = ElfHdr.size(); // File header first.
+
+ // Adjust alignment of all section if needed, skip the null section.
+ for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
+ ELFSection &ES = *SectionList[i];
+ if (!ES.size()) {
+ ES.Offset = FileOff;
+ continue;
+ }
+
+ // Update Section size
+ if (!ES.Size)
+ ES.Size = ES.size();
+
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (ES.Align)
+ FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
+
+ ES.Offset = FileOff;
+ FileOff += ES.Size;
+ }
+
+ // Align Section Header.
+ unsigned TableAlign = TEW->getPrefELFAlignment();
+ FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+
+ // Now that we know where all of the sections will be emitted, set the e_shnum
+ // entry in the ELF header.
+ ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
+
+ // Now that we know the offset in the file of the section table, update the
+ // e_shoff address in the ELF header.
+ ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
+
+ // Now that we know all of the data in the file header, emit it and all of the
+ // sections!
+ O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
+ FileOff = ElfHdr.size();
+
+ // Section Header Table blob
+ BinaryObject SHdrTable(isLittleEndian, is64Bit);
+
+ // Emit all of sections to the file and build the section header table.
+ for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+ ELFSection &S = *(*I);
+ DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+ << ", Size: " << S.Size << ", Offset: " << S.Offset
+ << ", SectionData Size: " << S.size() << "\n");
+
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (S.size()) {
+ if (S.Align) {
+ for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+ }
+ O.write((char *)&S.getData()[0], S.Size);
+ FileOff += S.Size;
+ }
+
+ EmitSectionHeader(SHdrTable, S);
+ }
+
+ // Align output for the section table.
+ for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+
+ // Emit the section table itself.
+ O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
+}
diff --git a/src/LLVM/lib/CodeGen/ELFWriter.h b/src/LLVM/lib/CodeGen/ELFWriter.h
new file mode 100644
index 0000000..6f7fbac
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/ELFWriter.h
@@ -0,0 +1,251 @@
+//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFWRITER_H
+#define ELFWRITER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <map>
+
+namespace llvm {
+ class BinaryObject;
+ class Constant;
+ class ConstantInt;
+ class ConstantStruct;
+ class ELFCodeEmitter;
+ class ELFRelocation;
+ class ELFSection;
+ struct ELFSym;
+ class GlobalVariable;
+ class JITDebugRegisterer;
+ class Mangler;
+ class MachineCodeEmitter;
+ class MachineConstantPoolEntry;
+ class ObjectCodeEmitter;
+ class MCAsmInfo;
+ class TargetELFWriterInfo;
+ class TargetLoweringObjectFile;
+ class raw_ostream;
+ class SectionKind;
+ class MCContext;
+ class TargetMachine;
+
+ typedef std::vector<ELFSym*>::iterator ELFSymIter;
+ typedef std::vector<ELFSection*>::iterator ELFSectionIter;
+ typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
+ typedef SetVector<const char *>::const_iterator PendingExtsIter;
+ typedef std::pair<const Constant *, int64_t> CstExprResTy;
+
+ /// ELFWriter - This class implements the common target-independent code for
+ /// writing ELF files. Targets should derive a class from this to
+ /// parameterize the output format.
+ ///
+ class ELFWriter : public MachineFunctionPass {
+ friend class ELFCodeEmitter;
+ friend class JITDebugRegisterer;
+ public:
+ static char ID;
+
+ /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
+ ObjectCodeEmitter *getObjectCodeEmitter() {
+ return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
+ }
+
+ ELFWriter(raw_ostream &O, TargetMachine &TM);
+ ~ELFWriter();
+
+ protected:
+ /// Output stream to send the resultant object file to.
+ raw_ostream &O;
+
+ /// Target machine description.
+ TargetMachine &TM;
+
+ /// Context object for machine code objects.
+ MCContext &OutContext;
+
+ /// Target Elf Writer description.
+ const TargetELFWriterInfo *TEW;
+
+ /// Mang - The object used to perform name mangling for this module.
+ Mangler *Mang;
+
+ /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+ /// code for functions to the .o file.
+ ELFCodeEmitter *ElfCE;
+
+ /// TLOF - Target Lowering Object File, provide section names for globals
+ /// and other object file specific stuff
+ const TargetLoweringObjectFile &TLOF;
+
+ /// MAI - Target Asm Info, provide information about section names for
+ /// globals and other target specific stuff.
+ const MCAsmInfo *MAI;
+
+ //===------------------------------------------------------------------===//
+ // Properties inferred automatically from the target machine.
+ //===------------------------------------------------------------------===//
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
+ bool is64Bit, isLittleEndian;
+
+ /// doInitialization - Emit the file header and all of the global variables
+ /// for the module to the ELF file.
+ bool doInitialization(Module &M);
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// doFinalization - Now that the module has been completely processed, emit
+ /// the ELF file to 'O'.
+ bool doFinalization(Module &M);
+
+ private:
+ /// Blob containing the Elf header
+ BinaryObject ElfHdr;
+
+ /// SectionList - This is the list of sections that we have emitted to the
+ /// file. Once the file has been completely built, the section header table
+ /// is constructed from this info.
+ std::vector<ELFSection*> SectionList;
+ unsigned NumSections; // Always = SectionList.size()
+
+ /// SectionLookup - This is a mapping from section name to section number in
+ /// the SectionList. Used to quickly gather the Section Index from MAI names
+ std::map<std::string, ELFSection*> SectionLookup;
+
+ /// PendingGlobals - Globals not processed as symbols yet.
+ SetVector<const GlobalValue*> PendingGlobals;
+
+ /// GblSymLookup - This is a mapping from global value to a symbol index
+ /// in the symbol table or private symbols list. This is useful since reloc
+ /// symbol references must be quickly mapped to their indices on the lists.
+ std::map<const GlobalValue*, uint32_t> GblSymLookup;
+
+ /// PendingExternals - Externals not processed as symbols yet.
+ SetVector<const char *> PendingExternals;
+
+ /// ExtSymLookup - This is a mapping from externals to a symbol index
+ /// in the symbol table list. This is useful since reloc symbol references
+ /// must be quickly mapped to their symbol table indices.
+ std::map<const char *, uint32_t> ExtSymLookup;
+
+ /// SymbolList - This is the list of symbols emitted to the symbol table.
+ /// When the SymbolList is finally built, local symbols must be placed in
+ /// the beginning while non-locals at the end.
+ std::vector<ELFSym*> SymbolList;
+
+ /// PrivateSyms - Record private symbols, every symbol here must never be
+ /// present in the SymbolList.
+ std::vector<ELFSym*> PrivateSyms;
+
+ /// getSection - Return the section with the specified name, creating a new
+ /// section if one does not already exist.
+ ELFSection &getSection(const std::string &Name, unsigned Type,
+ unsigned Flags = 0, unsigned Align = 0) {
+ ELFSection *&SN = SectionLookup[Name];
+ if (SN) return *SN;
+
+ SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
+ SN = SectionList.back();
+ SN->SectionIdx = NumSections++;
+ SN->Type = Type;
+ SN->Flags = Flags;
+ SN->Link = ELF::SHN_UNDEF;
+ SN->Align = Align;
+ return *SN;
+ }
+
+ ELFSection &getNonExecStackSection() {
+ return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
+ }
+
+ ELFSection &getSymbolTableSection() {
+ return getSection(".symtab", ELF::SHT_SYMTAB, 0);
+ }
+
+ ELFSection &getStringTableSection() {
+ return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
+ }
+
+ ELFSection &getSectionHeaderStringTableSection() {
+ return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
+ }
+
+ ELFSection &getNullSection() {
+ return getSection("", ELF::SHT_NULL, 0);
+ }
+
+ ELFSection &getDataSection();
+ ELFSection &getBSSSection();
+ ELFSection &getCtorSection();
+ ELFSection &getDtorSection();
+ ELFSection &getJumpTableSection();
+ ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
+ ELFSection &getTextSection(const Function *F);
+ ELFSection &getRelocSection(ELFSection &S);
+
+ // Helpers for obtaining ELF specific info.
+ unsigned getGlobalELFBinding(const GlobalValue *GV);
+ unsigned getGlobalELFType(const GlobalValue *GV);
+ unsigned getGlobalELFVisibility(const GlobalValue *GV);
+
+ // AddPendingGlobalSymbol - Add a global to be processed and to
+ // the global symbol lookup, use a zero index because the table
+ // index will be determined later.
+ void AddPendingGlobalSymbol(const GlobalValue *GV,
+ bool AddToLookup = false);
+
+ // AddPendingExternalSymbol - Add the external to be processed
+ // and to the external symbol lookup, use a zero index because
+ // the symbol table index will be determined later.
+ void AddPendingExternalSymbol(const char *External);
+
+ // AddToSymbolList - Update the symbol lookup and If the symbol is
+ // private add it to PrivateSyms list, otherwise to SymbolList.
+ void AddToSymbolList(ELFSym *GblSym);
+
+ // As we complete the ELF file, we need to update fields in the ELF header
+ // (e.g. the location of the section table). These members keep track of
+ // the offset in ELFHeader of these various pieces to update and other
+ // locations in the file.
+ unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header.
+ unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header.
+ unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header.
+
+ private:
+ void EmitGlobal(const GlobalValue *GV);
+ void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
+ void EmitGlobalConstantStruct(const ConstantStruct *CVS,
+ ELFSection &GblS);
+ void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
+ void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+ ELFSection &GblS, int64_t Offset = 0);
+ bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
+ void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
+ void EmitRelocations();
+ void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
+ void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
+ void EmitSectionTableStringTable();
+ void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
+ void EmitSymbolTable();
+ void EmitStringTable(const std::string &ModuleName);
+ void OutputSectionsAndSectionTable();
+ void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
+ unsigned Size);
+ unsigned SortSymbols();
+ CstExprResTy ResolveConstantExpr(const Constant *CV);
+ };
+}
+
+#endif
diff --git a/src/LLVM/lib/CodeGen/EdgeBundles.cpp b/src/LLVM/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 0000000..a7aba89
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,97 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+ cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+ /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ EC.clear();
+ EC.grow(2 * MF->getNumBlockIDs());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
+ const MachineBasicBlock &MBB = *I;
+ unsigned OutE = 2 * MBB.getNumber() + 1;
+ // Join the outgoing bundle with the ingoing bundles of all successors.
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ EC.join(OutE, 2 * (*SI)->getNumber());
+ }
+ EC.compress();
+ if (ViewEdgeBundles)
+ view();
+
+ // Compute the reverse mapping.
+ Blocks.clear();
+ Blocks.resize(getNumBundles());
+
+ for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
+ unsigned b0 = getBundle(i, 0);
+ unsigned b1 = getBundle(i, 1);
+ Blocks[b0].push_back(i);
+ if (b1 != b0)
+ Blocks[b1].push_back(i);
+ }
+
+ return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+ ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames,
+ const std::string &Title) {
+ const MachineFunction *MF = G.getMachineFunction();
+
+ O << "digraph {\n";
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ unsigned BB = I->getNumber();
+ O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+ << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+ << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+ for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end(); SI != SE; ++SI)
+ O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+ << "\" [ color=lightgray ]\n";
+ }
+ O << "}\n";
+ return O;
+}
diff --git a/src/LLVM/lib/CodeGen/ExecutionDepsFix.cpp b/src/LLVM/lib/CodeGen/ExecutionDepsFix.cpp
new file mode 100644
index 0000000..01dccdb
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/ExecutionDepsFix.cpp
@@ -0,0 +1,523 @@
+//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the execution dependency fix pass.
+//
+// Some X86 SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains. ARM cores
+// have similar issues when they are configured with both VFP and NEON
+// pipelines.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "execution-fix"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+namespace {
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Bitmask of available domains. For an open DomainValue, it is the still
+ // possible domains for collapsing. For a collapsed DomainValue it is the
+ // domains where the register is available for free.
+ unsigned AvailableDomains;
+
+ // Position of the last defining instruction.
+ unsigned Dist;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // A collapsed DomainValue has no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool isCollapsed() const { return Instrs.empty(); }
+
+ // Is domain available?
+ bool hasDomain(unsigned domain) const {
+ return AvailableDomains & (1u << domain);
+ }
+
+ // Mark domain as available.
+ void addDomain(unsigned domain) {
+ AvailableDomains |= 1u << domain;
+ }
+
+ // Restrict to a single domain available.
+ void setSingleDomain(unsigned domain) {
+ AvailableDomains = 1u << domain;
+ }
+
+ // Return bitmask of domains that are available and in mask.
+ unsigned getCommonDomains(unsigned mask) const {
+ return AvailableDomains & mask;
+ }
+
+ // First domain available.
+ unsigned getFirstDomain() const {
+ return CountTrailingZeros_32(AvailableDomains);
+ }
+
+ DomainValue() { clear(); }
+
+ void clear() {
+ Refs = AvailableDomains = Dist = 0;
+ Instrs.clear();
+ }
+};
+}
+
+namespace {
+class ExeDepsFix : public MachineFunctionPass {
+ static char ID;
+ SpecificBumpPtrAllocator<DomainValue> Allocator;
+ SmallVector<DomainValue*,16> Avail;
+
+ const TargetRegisterClass *const RC;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineBasicBlock *MBB;
+ std::vector<int> AliasMap;
+ const unsigned NumRegs;
+ DomainValue **LiveRegs;
+ typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+ LiveOutMap LiveOuts;
+ unsigned Distance;
+
+public:
+ ExeDepsFix(const TargetRegisterClass *rc)
+ : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "SSE execution domain fixup";
+ }
+
+private:
+ // Register mapping.
+ int RegIndex(unsigned Reg);
+
+ // DomainValue allocation.
+ DomainValue *Alloc(int domain = -1);
+ void Recycle(DomainValue*);
+
+ // LiveRegs manipulations.
+ void SetLiveReg(int rx, DomainValue *DV);
+ void Kill(int rx);
+ void Force(int rx, unsigned domain);
+ void Collapse(DomainValue *dv, unsigned domain);
+ bool Merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock();
+ void visitGenericInstr(MachineInstr*);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
+};
+}
+
+char ExeDepsFix::ID = 0;
+
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int ExeDepsFix::RegIndex(unsigned Reg) {
+ assert(Reg < AliasMap.size() && "Invalid register");
+ return AliasMap[Reg];
+}
+
+DomainValue *ExeDepsFix::Alloc(int domain) {
+ DomainValue *dv = Avail.empty() ?
+ new(Allocator.Allocate()) DomainValue :
+ Avail.pop_back_val();
+ dv->Dist = Distance;
+ if (domain >= 0)
+ dv->addDomain(domain);
+ return dv;
+}
+
+void ExeDepsFix::Recycle(DomainValue *dv) {
+ assert(dv && "Cannot recycle NULL");
+ dv->clear();
+ Avail.push_back(dv);
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs) {
+ LiveRegs = new DomainValue*[NumRegs];
+ std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
+ }
+
+ if (LiveRegs[rx] == dv)
+ return;
+ if (LiveRegs[rx]) {
+ assert(LiveRegs[rx]->Refs && "Bad refcount");
+ if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
+ }
+ LiveRegs[rx] = dv;
+ if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void ExeDepsFix::Kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs || !LiveRegs[rx]) return;
+
+ // Before killing the last reference to an open DomainValue, collapse it to
+ // the first available domain.
+ if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
+ Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
+ else
+ SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void ExeDepsFix::Force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ DomainValue *dv;
+ if (LiveRegs && (dv = LiveRegs[rx])) {
+ if (dv->isCollapsed())
+ dv->addDomain(domain);
+ else if (dv->hasDomain(domain))
+ Collapse(dv, domain);
+ else {
+ // This is an incompatible open DomainValue. Collapse it to whatever and
+ // force the new value into domain. This costs a domain crossing.
+ Collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx] && "Not live after collapse?");
+ LiveRegs[rx]->addDomain(domain);
+ }
+ } else {
+ // Set up basic collapsed DomainValue.
+ SetLiveReg(rx, Alloc(domain));
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->hasDomain(domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty())
+ TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain);
+ dv->setSingleDomain(domain);
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (LiveRegs && dv->Refs > 1)
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == dv)
+ SetLiveReg(rx, Alloc(domain));
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
+ assert(!A->isCollapsed() && "Cannot merge into collapsed");
+ assert(!B->isCollapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ // Restrict to the domains that A and B have in common.
+ unsigned common = A->getCommonDomains(B->AvailableDomains);
+ if (!common)
+ return false;
+ A->AvailableDomains = common;
+ A->Dist = std::max(A->Dist, B->Dist);
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == B)
+ SetLiveReg(rx, A);
+ return true;
+}
+
+void ExeDepsFix::enterBasicBlock() {
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+ e = MBB->livein_end(); i != e; ++i) {
+ int rx = RegIndex(*i);
+ if (rx < 0) continue;
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) continue;
+ DomainValue *pdv = fi->second[rx];
+ if (!pdv) continue;
+ if (!LiveRegs || !LiveRegs[rx]) {
+ SetLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx]->isCollapsed()) {
+ // We are already collapsed, but predecessor is not. Force him.
+ unsigned domain = LiveRegs[rx]->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(domain))
+ Collapse(pdv, domain);
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->isCollapsed())
+ Merge(LiveRegs[rx], pdv);
+ else
+ Force(rx, pdv->getFirstDomain());
+ }
+ }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Force(rx, domain);
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ Force(rx, domain);
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Bitmask of available domains for this instruction after taking collapsed
+ // operands into account.
+ unsigned available = mask;
+
+ // Scan the explicit use operands for incoming domains.
+ SmallVector<int, 4> used;
+ if (LiveRegs)
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (DomainValue *dv = LiveRegs[rx]) {
+ // Bitmask of domains that dv and available have in common.
+ unsigned common = dv->getCommonDomains(available);
+ // Is it possible to use this collapsed register for free?
+ if (dv->isCollapsed()) {
+ // Restrict available domains to the ones in common with the operand.
+ // If there are no common domains, we must pay the cross-domain
+ // penalty for this operand.
+ if (common) available = common;
+ } else if (common)
+ // Open DomainValue is compatible, save it for merging.
+ used.push_back(rx);
+ else
+ // Open DomainValue is not compatible with instruction. It is useless
+ // now.
+ Kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(available)) {
+ unsigned domain = CountTrailingZeros_32(available);
+ TII->setExecutionDomain(mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match available, and build a list of
+ // incoming DomainValues that we want to merge.
+ SmallVector<DomainValue*,4> doms;
+ for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ DomainValue *dv = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!dv->getCommonDomains(available)) {
+ Kill(*i);
+ continue;
+ }
+ // sorted, uniqued insert.
+ bool inserted = false;
+ for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+ i != e && !inserted; ++i) {
+ if (dv == *i)
+ inserted = true;
+ else if (dv->Dist < (*i)->Dist) {
+ inserted = true;
+ doms.insert(i, dv);
+ }
+ }
+ if (!inserted)
+ doms.push_back(dv);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = 0;
+ while (!doms.empty()) {
+ if (!dv) {
+ dv = doms.pop_back_val();
+ continue;
+ }
+
+ DomainValue *latest = doms.pop_back_val();
+ if (Merge(dv, latest)) continue;
+
+ // If latest didn't merge, it is useless now. Kill all registers using it.
+ for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+ if (LiveRegs[*i] == latest)
+ Kill(*i);
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv)
+ dv = Alloc();
+ dv->Dist = Distance;
+ dv->AvailableDomains = available;
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv.
+ for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+ Kill(rx);
+ SetLiveReg(rx, dv);
+ }
+ }
+}
+
+void ExeDepsFix::visitGenericInstr(MachineInstr *mi) {
+ // Process explicit defs, kill any relevant registers redefined.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ }
+}
+
+bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ TII = MF->getTarget().getInstrInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+ MBB = 0;
+ LiveRegs = 0;
+ Distance = 0;
+ assert(NumRegs == RC->getNumRegs() && "Bad regclass");
+
+ // If no relevant registers are used in the function, we can skip it
+ // completely.
+ bool anyregs = false;
+ for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ anyregs = true;
+ break;
+ }
+ if (!anyregs) return false;
+
+ // Initialize the AliasMap on the first use.
+ if (AliasMap.empty()) {
+ // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC,
+ // or -1.
+ AliasMap.resize(TRI->getNumRegs(), -1);
+ for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
+ for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
+ AliasMap[*AI] = i;
+ }
+
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*, 16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
+ DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
+ DFI != DFE; ++DFI) {
+ MBB = *DFI;
+ enterBasicBlock();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ MachineInstr *mi = I;
+ if (mi->isDebugValue()) continue;
+ ++Distance;
+ std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(mi);
+ if (domp.first)
+ if (domp.second)
+ visitSoftInstr(mi, domp.second);
+ else
+ visitHardInstr(mi, domp.first);
+ else if (LiveRegs)
+ visitGenericInstr(mi);
+ }
+
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ if (LiveRegs)
+ LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+ LiveRegs = 0;
+ }
+
+ // Clear the LiveOuts vectors. Should we also collapse any remaining
+ // DomainValues?
+ for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
+ i != e; ++i)
+ delete[] i->second;
+ LiveOuts.clear();
+ Avail.clear();
+ Allocator.DestroyAll();
+
+ return false;
+}
+
+FunctionPass *
+llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
+ return new ExeDepsFix(RC);
+}
diff --git a/src/LLVM/lib/CodeGen/ExpandISelPseudos.cpp b/src/LLVM/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 0000000..a67140e
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,82 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Pseudo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ class ExpandISelPseudos : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+ private:
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "Expand ISel Pseudo-instructions";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+ "Expand CodeGen Pseudo-instructions", false, false)
+
+FunctionPass *llvm::createExpandISelPseudosPass() {
+ return new ExpandISelPseudos();
+}
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+ // Iterate through each instruction in the function, looking for pseudos.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE; ) {
+ MachineInstr *MI = MBBI++;
+
+ // If MI is a pseudo, expand it.
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.usesCustomInsertionHook()) {
+ Changed = true;
+ MachineBasicBlock *NewMBB =
+ TLI->EmitInstrWithCustomInserter(MI, MBB);
+ // The expansion may involve new basic blocks.
+ if (NewMBB != MBB) {
+ MBB = NewMBB;
+ I = NewMBB;
+ MBBI = NewMBB->begin();
+ MBBE = NewMBB->end();
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/src/LLVM/lib/CodeGen/ExpandPostRAPseudos.cpp b/src/LLVM/lib/CodeGen/ExpandPostRAPseudos.cpp
new file mode 100644
index 0000000..e2a14a8
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -0,0 +1,237 @@
+//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo
+// instructions after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "postrapseudos"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+struct ExpandPostRA : public MachineFunctionPass {
+private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandPostRA() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const {
+ return "Post-RA pseudo instruction expansion pass";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&);
+
+private:
+ bool LowerSubregToReg(MachineInstr *MI);
+ bool LowerCopy(MachineInstr *MI);
+
+ void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+ const TargetRegisterInfo *TRI);
+ void TransferImplicitDefs(MachineInstr *MI);
+};
+} // end anonymous namespace
+
+char ExpandPostRA::ID = 0;
+
+FunctionPass *llvm::createExpandPostRAPseudosPass() {
+ return new ExpandPostRA();
+}
+
+/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the dead flag.
+void
+ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+ const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock::iterator MII =
+ prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+ if (MII->addRegisterDead(DstReg, TRI))
+ break;
+ assert(MII != MI->getParent()->begin() &&
+ "copyPhysReg output doesn't reference destination register!");
+ }
+}
+
+/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit-def
+/// operands from MI to the replacement instruction.
+void
+ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit() || MO.isUse())
+ continue;
+ CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true));
+ }
+}
+
+bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ MI->getOperand(1).isImm() &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned InsReg = MI->getOperand(2).getReg();
+ assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Insert destination must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+ if (DstSubReg == InsReg) {
+ // No need to insert an identify copy instruction.
+ // Watch out for case like this:
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
+ // We must leave %RAX live.
+ if (DstReg != InsReg) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(3); // SubIdx
+ MI->RemoveOperand(1); // Imm
+ DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ return true;
+ }
+ DEBUG(dbgs() << "subreg: eliminated!");
+ } else {
+ TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+ MI->getOperand(2).isKill());
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstSubReg, TRI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "subreg: " << *(--dMI);
+ });
+ }
+
+ DEBUG(dbgs() << '\n');
+ MBB->erase(MI);
+ return true;
+}
+
+bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if (SrcMO.getReg() == DstMO.getReg()) {
+ DEBUG(dbgs() << "identity copy: " << *MI);
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+ }
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return true;
+ }
+
+ DEBUG(dbgs() << "real copy: " << *MI);
+ TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
+ DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
+
+ if (DstMO.isDead())
+ TransferDeadFlag(MI, DstMO.getReg(), TRI);
+ if (MI->getNumOperands() > 2)
+ TransferImplicitDefs(MI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "replaced by: " << *(--dMI);
+ });
+ MI->eraseFromParent();
+ return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Machine Function\n"
+ << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
+ << "********** Function: "
+ << MF.getFunction()->getName() << '\n');
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ bool MadeChange = false;
+
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me;) {
+ MachineInstr *MI = mi;
+ // Advance iterator here because MI may be erased.
+ ++mi;
+
+ // Only expand pseudos.
+ if (!MI->getDesc().isPseudo())
+ continue;
+
+ // Give targets a chance to expand even standard pseudos.
+ if (TII->expandPostRAPseudo(MI)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Expand standard pseudos.
+ switch (MI->getOpcode()) {
+ case TargetOpcode::SUBREG_TO_REG:
+ MadeChange |= LowerSubregToReg(MI);
+ break;
+ case TargetOpcode::COPY:
+ MadeChange |= LowerCopy(MI);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ continue;
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::EXTRACT_SUBREG:
+ llvm_unreachable("Sub-register pseudos should have been eliminated.");
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/src/LLVM/lib/CodeGen/GCMetadata.cpp b/src/LLVM/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 0000000..d757cf4
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,213 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+ class Printer : public FunctionPass {
+ static char ID;
+ raw_ostream &OS;
+
+ public:
+ explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ };
+
+ class Deleter : public FunctionPass {
+ static char ID;
+
+ public:
+ Deleter();
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+
+}
+
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+ "Create Garbage Collector Module Metadata", false, false)
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+ : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo()
+ : ImmutablePass(ID) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+GCModuleInfo::~GCModuleInfo() {
+ clear();
+}
+
+GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
+ const std::string &Name) {
+ strategy_map_type::iterator NMI = StrategyMap.find(Name);
+ if (NMI != StrategyMap.end())
+ return NMI->getValue();
+
+ for (GCRegistry::iterator I = GCRegistry::begin(),
+ E = GCRegistry::end(); I != E; ++I) {
+ if (Name == I->getName()) {
+ GCStrategy *S = I->instantiate();
+ S->M = M;
+ S->Name = Name;
+ StrategyMap.GetOrCreateValue(Name).setValue(S);
+ StrategyList.push_back(S);
+ return S;
+ }
+ }
+
+ dbgs() << "unsupported GC: " << Name << "\n";
+ llvm_unreachable(0);
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC());
+
+ finfo_map_type::iterator I = FInfoMap.find(&F);
+ if (I != FInfoMap.end())
+ return *I->second;
+
+ GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+ GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+ FInfoMap[&F] = GFI;
+ return *GFI;
+}
+
+void GCModuleInfo::clear() {
+ FInfoMap.clear();
+ StrategyMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ StrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
+ return new Printer(OS);
+}
+
+
+const char *Printer::getPassName() const {
+ return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+ switch (Kind) {
+ default: llvm_unreachable("Unknown GC point kind");
+ case GC::Loop: return "loop";
+ case GC::Return: return "return";
+ case GC::PreCall: return "pre-call";
+ case GC::PostCall: return "post-call";
+ }
+}
+
+bool Printer::runOnFunction(Function &F) {
+ if (F.hasGC()) return false;
+
+ GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+
+ OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+ for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+ RE = FD->roots_end(); RI != RE; ++RI)
+ OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+
+ OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+ for (GCFunctionInfo::iterator PI = FD->begin(),
+ PE = FD->end(); PI != PE; ++PI) {
+
+ OS << "\t" << PI->Label->getName() << ": "
+ << DescKind(PI->Kind) << ", live = {";
+
+ for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+ RE = FD->live_end(PI);;) {
+ OS << " " << RI->Num;
+ if (++RI == RE)
+ break;
+ OS << ",";
+ }
+
+ OS << " }\n";
+ }
+
+ return false;
+}
+
+// -----------------------------------------------------------------------------
+
+char Deleter::ID = 0;
+
+FunctionPass *llvm::createGCInfoDeleter() {
+ return new Deleter();
+}
+
+Deleter::Deleter() : FunctionPass(ID) {}
+
+const char *Deleter::getPassName() const {
+ return "Delete Garbage Collector Information";
+}
+
+void Deleter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+bool Deleter::runOnFunction(Function &MF) {
+ return false;
+}
+
+bool Deleter::doFinalization(Module &M) {
+ GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(GMI && "Deleter didn't require GCModuleInfo?!");
+ GMI->clear();
+ return false;
+}
diff --git a/src/LLVM/lib/CodeGen/GCMetadataPrinter.cpp b/src/LLVM/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 0000000..f80e9ce
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,27 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() { }
+
+GCMetadataPrinter::~GCMetadataPrinter() { }
+
+void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+ // Default is no action.
+}
+
+void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+ // Default is no action.
+}
diff --git a/src/LLVM/lib/CodeGen/GCStrategy.cpp b/src/LLVM/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 0000000..766c6ee
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,416 @@
+//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
+// are identified in SelectionDAGISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+ /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// directed by the GCStrategy. It also performs automatic root initialization
+ /// and custom intrinsic lowering.
+ class LowerIntrinsics : public FunctionPass {
+ static bool NeedsDefaultLoweringPass(const GCStrategy &C);
+ static bool NeedsCustomLoweringPass(const GCStrategy &C);
+ static bool CouldBecomeSafePoint(Instruction *I);
+ bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+ static bool InsertRootInitializers(Function &F,
+ AllocaInst **Roots, unsigned Count);
+
+ public:
+ static char ID;
+
+ LowerIntrinsics();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+ };
+
+
+ /// MachineCodeAnalysis - This is a target-independent pass over the machine
+ /// function representation to identify safe points for the garbage collector
+ /// in the machine code. It inserts labels at safe points and populates a
+ /// GCMetadata record for each function.
+ class MachineCodeAnalysis : public MachineFunctionPass {
+ const TargetMachine *TM;
+ GCFunctionInfo *FI;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+ public:
+ static char ID;
+
+ MachineCodeAnalysis();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ };
+
+}
+
+// -----------------------------------------------------------------------------
+
+GCStrategy::GCStrategy() :
+ NeededSafePoints(0),
+ CustomReadBarriers(false),
+ CustomWriteBarriers(false),
+ CustomRoots(false),
+ InitRoots(true),
+ UsesMetadata(false)
+{}
+
+GCStrategy::~GCStrategy() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+
+ Functions.clear();
+}
+
+bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
+
+bool GCStrategy::performCustomLowering(Function &F) {
+ dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+ llvm_unreachable(0);
+ return 0;
+}
+
+GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
+ GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
+ Functions.push_back(FI);
+ return FI;
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() {
+ return new LowerIntrinsics();
+}
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics()
+ : FunctionPass(ID) {
+ initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+ }
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<DominatorTree>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ // FIXME: This is rather antisocial in the context of a JIT since it performs
+ // work against the entire module. But this cannot be done at
+ // runFunction time (initializeCustomLowering likely needs to change
+ // the module).
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && I->hasGC())
+ MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+ bool MadeChange = false;
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (NeedsCustomLoweringPass(**I))
+ if ((*I)->initializeCustomLowering(M))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP)) ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst*,16> InitedRoots;
+ for (; !CouldBecomeSafePoint(IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I)) {
+ StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+ cast<PointerType>((*I)->getType())->getElementType())),
+ *I);
+ SI->insertAfter(*I);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier()
+ || !C.customReadBarrier()
+ || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
+ // Custom lowering is only necessary if enabled for some action.
+ return C.customWriteBarrier()
+ || C.customReadBarrier()
+ || C.customRoots();
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I) || isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (unsigned IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(S))
+ MadeChange |= PerformDefaultLowering(F, S);
+
+ bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+ if (UseCustomLoweringPass)
+ MadeChange |= S.performCustomLowering(F);
+
+ // Custom lowering may modify the CFG, so dominators must be recomputed.
+ if (UseCustomLoweringPass) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->DT->recalculate(F);
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+ bool LowerWr = !S.customWriteBarrier();
+ bool LowerRd = !S.customReadBarrier();
+ bool InitRoots = S.initializeRoots();
+
+ SmallVector<AllocaInst*, 32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::gcwrite:
+ if (LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getArgOperand(0),
+ CI->getArgOperand(2), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcread:
+ if (LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcroot:
+ if (InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(cast<AllocaInst>(
+ CI->getArgOperand(0)->stripPointerCasts()));
+ }
+ break;
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
+ return new MachineCodeAnalysis();
+}
+
+char MachineCodeAnalysis::ID = 0;
+
+MachineCodeAnalysis::MachineCodeAnalysis()
+ : MachineFunctionPass(ID) {}
+
+const char *MachineCodeAnalysis::getPassName() const {
+ return "Analyze Machine Code For Garbage Collection";
+}
+
+void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+ }
+
+ if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+ }
+}
+
+void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(),
+ BBE = MF.end(); BBI != BBE; ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(),
+ ME = BBI->end(); MI != ME; ++MI)
+ if (MI->getDesc().isCall())
+ VisitCallPoint(MI);
+}
+
+void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetFrameLowering *TFI = TM->getFrameLowering();
+ assert(TFI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+ RE = FI->roots_end(); RI != RE; ++RI)
+ RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+}
+
+bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ if (!FI->getStrategy().needsSafePoints())
+ return false;
+
+ TM = &MF.getTarget();
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = TM->getInstrInfo();
+
+ // Find the size of the stack frame.
+ FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+
+ // Find all safe points.
+ FindSafePoints(MF);
+
+ // Find the stack offsets for all roots.
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/src/LLVM/lib/CodeGen/INSTALL.vcxproj b/src/LLVM/lib/CodeGen/INSTALL.vcxproj
new file mode 100644
index 0000000..3c0bbfd
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/INSTALL.vcxproj
@@ -0,0 +1,261 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="MinSizeRel|Win32">
+ <Configuration>MinSizeRel</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="RelWithDebInfo|Win32">
+ <Configuration>RelWithDebInfo</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID>
+ <Keyword>Win32Proj</Keyword>
+ <Platform>Win32</Platform>
+ <ProjectName>INSTALL</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\INSTALL_force.rule">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeLists.txt">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\..\ALL_BUILD.vcxproj">
+ <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/INSTALL.vcxproj.filters b/src/LLVM/lib/CodeGen/INSTALL.vcxproj.filters
new file mode 100644
index 0000000..251dd1d
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\INSTALL_force.rule">
+ <Filter>CMake Rules</Filter>
+ </CustomBuild>
+ <CustomBuild Include="CMakeLists.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <Filter Include="CMake Rules">
+ <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+</Project>
diff --git a/src/LLVM/lib/CodeGen/IfConversion.cpp b/src/LLVM/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 0000000..ce7ed29
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1494 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "BranchFolding.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+
+namespace {
+ class IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond // BB is entry of a diamond sub-CFG.
+ };
+
+ /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if AnalyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// ExtraCost - Extra cost for multi-cycle instructions.
+ /// ExtraCost2 - Some instructions are slower when predicated
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See AnalyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize;
+ unsigned ExtraCost;
+ unsigned ExtraCost2;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *TrueBB;
+ MachineBasicBlock *FalseBB;
+ SmallVector<MachineOperand, 4> BrCond;
+ SmallVector<MachineOperand, 4> Predicate;
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ HasFallThrough(false), IsUnpredicable(false),
+ CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+ ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
+ };
+
+ /// IfcvtToken - Record information about pending if-conversions to attempt:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumption - True if the to-be-predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ bool NeedSubsumption;
+ unsigned NumDups;
+ unsigned NumDups2;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+ : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ };
+
+ /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+ /// basic block number.
+ std::vector<BBInfo> BBAnalysis;
+
+ const TargetLowering *TLI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ const MachineBranchProbabilityInfo *MBPI;
+
+ bool MadeChange;
+ int FnNum;
+ public:
+ static char ID;
+ IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+ initializeIfConverterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "If Converter"; }
+
+ private:
+ bool ReverseBranchCondition(BBInfo &BBI);
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ const BranchProbability &Prediction) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ const BranchProbability &Prediction) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const;
+ void ScanInstructions(BBInfo &BBI);
+ BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+ bool isTriangle = false, bool RevBranch = false);
+ void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
+ void InvalidatePreds(MachineBasicBlock *BB);
+ void RemoveExtraEdges(BBInfo &BBI);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+ unsigned Cycle, unsigned Extra,
+ const BranchProbability &Prediction) const {
+ return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+ Prediction);
+ }
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+ unsigned TCycle, unsigned TExtra,
+ MachineBasicBlock &FBB,
+ unsigned FCycle, unsigned FExtra,
+ const BranchProbability &Prediction) const {
+ return TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+ Prediction);
+ }
+
+ // blockAlwaysFallThrough - Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+ }
+
+ // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumption.
+ if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+ return true;
+ else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+ char IfConverter::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
+
+FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ InstrItins = MF.getTarget().getInstrItineraryData();
+ if (!TII) return false;
+
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true, false);
+ bool BFChange = BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+
+ DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getFunction()->getName() << "\'");
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ DEBUG(dbgs() << " skipped\n");
+ return false;
+ }
+ DEBUG(dbgs() << "\n");
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ std::vector<IfcvtToken*> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an initial analysis for each basic block and find all the potential
+ // candidates to perform if-conversion.
+ bool Change = false;
+ AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+ unsigned NumDups = Token->NumDups;
+ unsigned NumDups2 = Token->NumDups2;
+
+ delete Token;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: assert(false && "Unexpected!");
+ break;
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
+ " false" : "")
+ << "): BB#" << BBI.BB->getNumber() << " ("
+ << ((Kind == ICSimpleFalse)
+ ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber()) << ") ");
+ RetVal = IfConvertSimple(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) ++NumSimpleFalse;
+ else ++NumSimple;
+ }
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ if (DisableTriangleFR && isFalse && isRev) break;
+ DEBUG(dbgs() << "Ifcvt (Triangle");
+ if (isFalse)
+ DEBUG(dbgs() << " false");
+ if (isRev)
+ DEBUG(dbgs() << " rev");
+ DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertTriangle(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) ++NumTriangleFRev;
+ else ++NumTriangleFalse;
+ } else {
+ if (isRev) ++NumTriangleRev;
+ else ++NumTriangle;
+ }
+ }
+ break;
+ }
+ case ICDiamond: {
+ if (DisableDiamond) break;
+ DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) ++NumDiamonds;
+ break;
+ }
+ }
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ // Delete tokens in case of early exit.
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ delete Token;
+ }
+
+ Tokens.clear();
+ BBAnalysis.clear();
+
+ if (MadeChange && IfCvtBranchFold) {
+ BranchFolder BF(false, false);
+ BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+ }
+
+ MadeChange |= BFChange;
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ DebugLoc dl; // FIXME: this is nowhere
+ if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+ TII->RemoveBranch(*BBI.BB);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ if (++I == E)
+ return NULL;
+ return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ const BranchProbability &Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+ Prediction))
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the 'false' block rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ const BranchProbability &Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+ // Ends with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB;
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+ (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ return false;
+
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(FIB))
+ break;
+ ++Dups1;
+ ++TIB;
+ ++FIB;
+ }
+
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, move the end iterators up past any branch instructions.
+ while (TIE != TIB) {
+ --TIE;
+ if (!TIE->getDesc().isBranch())
+ break;
+ }
+ while (FIE != FIB) {
+ --FIE;
+ if (!FIE->getDesc().isBranch())
+ break;
+ }
+
+ // If Dups1 includes all of a block, then don't count duplicate
+ // instructions at the end of the blocks.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+
+ // Count duplicate instructions at the ends of the blocks.
+ while (TIE != TIB && FIE != FIB) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIE->isDebugValue()) {
+ while (TIE != TIB && TIE->isDebugValue())
+ --TIE;
+ if (TIE == TIB)
+ break;
+ }
+ if (FIE->isDebugValue()) {
+ while (FIE != FIB && FIE->isDebugValue())
+ --FIE;
+ if (FIE == FIB)
+ break;
+ }
+ if (!TIE->isIdenticalTo(FIE))
+ break;
+ ++Dups2;
+ --TIE;
+ --FIE;
+ }
+
+ return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ bool AlreadyPredicated = BBI.Predicate.size() > 0;
+ // First analyze the end of BB branches.
+ BBI.TrueBB = BBI.FalseBB = NULL;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ if (!BBI.FalseBB) {
+ // Malformed bcc? True and false blocks are the same?
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ // Then scan all the instructions.
+ BBI.NonPredSize = 0;
+ BBI.ExtraCost = 0;
+ BBI.ExtraCost2 = 0;
+ BBI.ClobbersPred = false;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ const MCInstrDesc &MCID = I->getDesc();
+ if (MCID.isNotDuplicable())
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(I);
+ bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
+
+ if (!isCondBr) {
+ if (!isPredicated) {
+ BBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+ &ExtraPredCost);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ if (isCondBr) {
+ // A conditional branch is not predicable, but it may be eliminated.
+ continue;
+ }
+
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(I, PredDefs))
+ BBI.ClobbersPred = true;
+
+ if (!TII->isPredicable(I)) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return false;
+
+ // If it is already predicated, check if its predicate subsumes the new
+ // predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+ return false;
+
+ if (BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumption.
+ SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (RevBranch) {
+ if (TII->ReverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->ReverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens) {
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+ return BBI;
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ ScanInstructions(BBI);
+
+ // Unanalyzable or ends with fallthrough or unconditional branch, or if is not
+ // considered for ifcvt anymore.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if true and false fallthrough blocks are the same.
+ if (!BBI.FalseBB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens);
+ BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = TrueBBI.Predicate.size() > 0;
+ bool FNeedSub = FalseBBI.Predicate.size() > 0;
+ bool Enqueued = false;
+
+ BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
+
+ if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+ TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+ *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+ FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+ Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+ Dups2));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates.
+void IfConverter::AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ AnalyzeBlock(BB, Tokens);
+ }
+
+ // Sort to favor more complex ifcvt scheme.
+ std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+ MachineFunction::iterator PI = BB;
+ MachineFunction::iterator I = llvm::next(PI);
+ MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ while (I != TI) {
+ // Check isSuccessor to avoid case where the next block is empty, but
+ // it's not a successor.
+ if (I == E || !I->empty() || !PI->isSuccessor(I))
+ return false;
+ PI = I++;
+ }
+ return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == BB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc dl; // FIXME: this is nowhere
+ SmallVector<MachineOperand, 0> NoCond;
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+ MachineBasicBlock *TBB = NULL, *FBB = NULL;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are
+/// modeled as read + write (sort like two-address instructions). These
+/// routines track register liveness and add implicit uses to if-converted
+/// instructions to conform to the model.
+static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+ E = BB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Redefs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Redefs.insert(*Subreg);
+ }
+}
+
+static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI,
+ bool AddImpUse = false) {
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (MO.isKill()) {
+ Redefs.erase(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Redefs.erase(*SR);
+ }
+ }
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (Redefs.count(Reg)) {
+ if (AddImpUse)
+ // Treat predicated update as read + write.
+ MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/,false/*IsKill*/));
+ } else {
+ Redefs.insert(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Redefs.insert(*SR);
+ }
+ }
+}
+
+static void UpdatePredRedefs(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ while (I != E) {
+ UpdatePredRedefs(I, Redefs, TRI);
+ ++I;
+ }
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICSimpleFalse)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+ } else {
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+ // Merge converted block into entry block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+ DebugLoc dl; // FIXME: this is nowhere
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ if (ReverseBranchCondition(*CvtBBI)) {
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+ E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PBB = *PI;
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+ }
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+ bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+ // Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI, false);
+ }
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+ CvtBBI->BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add an unconditional branch to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 ||
+ FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ // Put the predicated instructions from the 'true' block before the
+ // instructions from the 'false' block, unless the true block would clobber
+ // the predicate, in which case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+ SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ DoSwap = true;
+ else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ }
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(BBI1->BB, Redefs, TRI);
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+ MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ MachineBasicBlock::iterator DIE1 = BBI1->BB->end();
+ MachineBasicBlock::iterator DIE2 = BBI2->BB->end();
+ // Skip dbg_value instructions
+ while (DI1 != DIE1 && DI1->isDebugValue())
+ ++DI1;
+ while (DI2 != DIE2 && DI2->isDebugValue())
+ ++DI2;
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+
+ // Skip past the dups on each side separately since there may be
+ // differing dbg_value entries.
+ for (unsigned i = 0; i < NumDups1; ++DI1) {
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ while (NumDups1 != 0) {
+ ++DI2;
+ if (!DI2->isDebugValue())
+ --NumDups1;
+ }
+
+ UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI);
+ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+ BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+ // Predicate the 'true' block after removing its branch.
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ DI1 = BBI1->BB->end();
+ for (unsigned i = 0; i != NumDups2; ) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI1 != BBI1->BB->begin());
+ --DI1;
+ // skip dbg_value instructions
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ BBI1->BB->erase(DI1, BBI1->BB->end());
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
+
+ // Predicate the 'false' block.
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ DI2 = BBI2->BB->end();
+ while (NumDups2 != 0) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI2 != BBI2->BB->begin());
+ --DI2;
+ // skip dbg_value instructions
+ if (!DI2->isDebugValue())
+ --NumDups2;
+ }
+ PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1, TailBB == 0);
+ MergeBlocks(BBI, *BBI2, TailBB == 0);
+
+ // If the if-converted block falls through or unconditionally branches into
+ // the tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+ bool CanMergeTail = !TailBBI.HasFallThrough;
+ // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+ // check if there are any other predecessors besides those.
+ unsigned NumPreds = TailBB->pred_size();
+ if (NumPreds > 1)
+ CanMergeTail = false;
+ else if (NumPreds == 1 && CanMergeTail) {
+ MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+ if (*PI != BBI1->BB && *PI != BBI2->BB)
+ CanMergeTail = false;
+ }
+ if (CanMergeTail) {
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ BBI.BB->addSuccessor(TailBB);
+ InsertUncondBranch(BBI.BB, TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch,
+ // which can happen here if TailBB is unanalyzable and is merged, so
+ // explicitly remove BBI1 and BBI2 as successors.
+ BBI.BB->removeSuccessor(BBI1->BB);
+ BBI.BB->removeSuccessor(BBI2->BB);
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs) {
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+ if (I->isDebugValue() || TII->isPredicated(I))
+ continue;
+ if (!TII->PredicateInstruction(I, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(I, Redefs, TRI, true);
+ }
+
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ ++NumIfConvBBs;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
+ bool IgnoreBr) {
+ MachineFunction &MF = *ToBBI.BB->getParent();
+
+ for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+ E = FromBBI.BB->end(); I != E; ++I) {
+ const MCInstrDesc &MCID = I->getDesc();
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && MCID.isBranch())
+ break;
+
+ MachineInstr *MI = MF.CloneMachineInstr(I);
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+ if (NumCycles > 1)
+ ToBBI.ExtraCost += NumCycles-1;
+ ToBBI.ExtraCost2 += ExtraPredCost;
+
+ if (!TII->isPredicated(I) && !MI->isDebugValue()) {
+ if (!TII->PredicateInstruction(MI, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+ llvm_unreachable(0);
+ }
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(MI, Redefs, TRI, true);
+ }
+
+ if (!IgnoreBr) {
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
+ }
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ ++NumDupBBs;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+/// This will leave FromBB as an empty block, so remove all of its
+/// successor edges except for the fall-through edge. If AddEdges is true,
+/// i.e., when FromBBI's branch is being moved, add those successor edges to
+/// ToBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+ ToBBI.BB->splice(ToBBI.BB->end(),
+ FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ FromBBI.BB->removeSuccessor(Succ);
+ if (AddEdges)
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ // Now FromBBI always falls through to the next block!
+ if (NBB && !FromBBI.BB->isSuccessor(NBB))
+ FromBBI.BB->addSuccessor(NBB);
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ ToBBI.ExtraCost += FromBBI.ExtraCost;
+ ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
+ FromBBI.NonPredSize = 0;
+ FromBBI.ExtraCost = 0;
+ FromBBI.ExtraCost2 = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
diff --git a/src/LLVM/lib/CodeGen/InlineSpiller.cpp b/src/LLVM/lib/CodeGen/InlineSpiller.cpp
index 63a89aa..726af46 100644
--- a/src/LLVM/lib/CodeGen/InlineSpiller.cpp
+++ b/src/LLVM/lib/CodeGen/InlineSpiller.cpp
@@ -12,49 +12,123 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "spiller"
+#define DEBUG_TYPE "regalloc"
#include "Spiller.h"
-#include "SplitKit.h"
+#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+STATISTIC(NumSpilledRanges, "Number of spilled live ranges");
+STATISTIC(NumSnippets, "Number of spilled snippets");
+STATISTIC(NumSpills, "Number of spills inserted");
+STATISTIC(NumSpillsRemoved, "Number of spills removed");
+STATISTIC(NumReloads, "Number of reloads inserted");
+STATISTIC(NumReloadsRemoved, "Number of reloads removed");
+STATISTIC(NumFolded, "Number of folded stack accesses");
+STATISTIC(NumFoldedLoads, "Number of folded loads");
+STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
+STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads");
+STATISTIC(NumHoists, "Number of hoisted spills");
+
+static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
+ cl::desc("Disable inline spill hoisting"));
+
namespace {
class InlineSpiller : public Spiller {
- MachineFunctionPass &pass_;
- MachineFunction &mf_;
- LiveIntervals &lis_;
- MachineLoopInfo &loops_;
- VirtRegMap &vrm_;
- MachineFrameInfo &mfi_;
- MachineRegisterInfo &mri_;
- const TargetInstrInfo &tii_;
- const TargetRegisterInfo &tri_;
- const BitVector reserved_;
-
- SplitAnalysis splitAnalysis_;
+ MachineFunctionPass &Pass;
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ AliasAnalysis *AA;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineFrameInfo &MFI;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
// Variables that are valid during spill(), but used by multiple methods.
- LiveInterval *li_;
- std::vector<LiveInterval*> *newIntervals_;
- const TargetRegisterClass *rc_;
- int stackSlot_;
- const SmallVectorImpl<LiveInterval*> *spillIs_;
+ LiveRangeEdit *Edit;
+ LiveInterval *StackInt;
+ int StackSlot;
+ unsigned Original;
- // Values of the current interval that can potentially remat.
- SmallPtrSet<VNInfo*, 8> reMattable_;
+ // All registers to spill to StackSlot, including the main register.
+ SmallVector<unsigned, 8> RegsToSpill;
- // Values in reMattable_ that failed to remat at some point.
- SmallPtrSet<VNInfo*, 8> usedValues_;
+ // All COPY instructions to/from snippets.
+ // They are ignored since both operands refer to the same stack slot.
+ SmallPtrSet<MachineInstr*, 8> SnippetCopies;
+
+ // Values that failed to remat at some point.
+ SmallPtrSet<VNInfo*, 8> UsedValues;
+
+public:
+ // Information about a value that was defined by a copy from a sibling
+ // register.
+ struct SibValueInfo {
+ // True when all reaching defs were reloads: No spill is necessary.
+ bool AllDefsAreReloads;
+
+ // True when value is defined by an original PHI not from splitting.
+ bool DefByOrigPHI;
+
+ // True when the COPY defining this value killed its source.
+ bool KillsSource;
+
+ // The preferred register to spill.
+ unsigned SpillReg;
+
+ // The value of SpillReg that should be spilled.
+ VNInfo *SpillVNI;
+
+ // The block where SpillVNI should be spilled. Currently, this must be the
+ // block containing SpillVNI->def.
+ MachineBasicBlock *SpillMBB;
+
+ // A defining instruction that is not a sibling copy or a reload, or NULL.
+ // This can be used as a template for rematerialization.
+ MachineInstr *DefMI;
+
+ // List of values that depend on this one. These values are actually the
+ // same, but live range splitting has placed them in different registers,
+ // or SSA update needed to insert PHI-defs to preserve SSA form. This is
+ // copies of the current value and phi-kills. Usually only phi-kills cause
+ // more than one dependent value.
+ TinyPtrVector<VNInfo*> Deps;
+
+ SibValueInfo(unsigned Reg, VNInfo *VNI)
+ : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
+ SpillReg(Reg), SpillVNI(VNI), SpillMBB(0), DefMI(0) {}
+
+ // Returns true when a def has been found.
+ bool hasDef() const { return DefByOrigPHI || DefMI; }
+ };
+
+private:
+ // Values in RegsToSpill defined by sibling copies.
+ typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
+ SibValueMap SibValues;
+
+ // Dead defs generated during spilling.
+ SmallVector<MachineInstr*, 8> DeadDefs;
~InlineSpiller() {}
@@ -62,36 +136,53 @@
InlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm)
- : pass_(pass),
- mf_(mf),
- lis_(pass.getAnalysis<LiveIntervals>()),
- loops_(pass.getAnalysis<MachineLoopInfo>()),
- vrm_(vrm),
- mfi_(*mf.getFrameInfo()),
- mri_(mf.getRegInfo()),
- tii_(*mf.getTarget().getInstrInfo()),
- tri_(*mf.getTarget().getRegisterInfo()),
- reserved_(tri_.getReservedRegs(mf_)),
- splitAnalysis_(mf, lis_, loops_) {}
+ : Pass(pass),
+ MF(mf),
+ LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AliasAnalysis>()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()),
+ VRM(vrm),
+ MFI(*mf.getFrameInfo()),
+ MRI(mf.getRegInfo()),
+ TII(*mf.getTarget().getInstrInfo()),
+ TRI(*mf.getTarget().getRegisterInfo()) {}
- void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex);
+ void spill(LiveRangeEdit &);
private:
- bool split();
+ bool isSnippet(const LiveInterval &SnipLI);
+ void collectRegsToSpill();
- bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
- SlotIndex UseIdx);
- bool reMaterializeFor(MachineBasicBlock::iterator MI);
+ bool isRegToSpill(unsigned Reg) {
+ return std::find(RegsToSpill.begin(),
+ RegsToSpill.end(), Reg) != RegsToSpill.end();
+ }
+
+ bool isSibling(unsigned Reg);
+ MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
+ void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = 0);
+ void analyzeSiblingValues();
+
+ bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI);
+ void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
+
+ void markValueUsed(LiveInterval*, VNInfo*);
+ bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI);
void reMaterializeAll();
- bool coalesceStackAccess(MachineInstr *MI);
+ bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
bool foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops);
- void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
- void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI = 0);
+ void insertReload(LiveInterval &NewLI, SlotIndex,
+ MachineBasicBlock::iterator MI);
+ void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+ SlotIndex, MachineBasicBlock::iterator MI);
+
+ void spillAroundUses(unsigned Reg);
+ void spillAll();
};
}
@@ -103,220 +194,829 @@
}
}
-/// split - try splitting the current interval into pieces that may allocate
-/// separately. Return true if successful.
-bool InlineSpiller::split() {
- // FIXME: Add intra-MBB splitting.
- if (lis_.intervalIsInOneMBB(*li_))
- return false;
+//===----------------------------------------------------------------------===//
+// Snippets
+//===----------------------------------------------------------------------===//
- splitAnalysis_.analyze(li_);
+// When spilling a virtual register, we also spill any snippets it is connected
+// to. The snippets are small live ranges that only have a single real use,
+// leftovers from live range splitting. Spilling them enables memory operand
+// folding or tightens the live range around the single use.
+//
+// This minimizes register pressure and maximizes the store-to-load distance for
+// spill slots which can be important in tight loops.
- if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) {
- // We can split, but li_ may be left intact with fewer uses.
- if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
- .splitAroundLoop(loop))
- return true;
- }
- return false;
+/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
+/// otherwise return 0.
+static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) {
+ if (!MI->isFullCopy())
+ return 0;
+ if (MI->getOperand(0).getReg() == Reg)
+ return MI->getOperand(1).getReg();
+ if (MI->getOperand(1).getReg() == Reg)
+ return MI->getOperand(0).getReg();
+ return 0;
}
-/// allUsesAvailableAt - Return true if all registers used by OrigMI at
-/// OrigIdx are also available with the same value at UseIdx.
-bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI,
- SlotIndex OrigIdx,
- SlotIndex UseIdx) {
- OrigIdx = OrigIdx.getUseIndex();
- UseIdx = UseIdx.getUseIndex();
- for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OrigMI->getOperand(i);
- if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg)
- continue;
- // Reserved registers are OK.
- if (MO.isUndef() || !lis_.hasInterval(MO.getReg()))
- continue;
- // We don't want to move any defs.
- if (MO.isDef())
- return false;
- // We cannot depend on virtual registers in spillIs_. They will be spilled.
- for (unsigned si = 0, se = spillIs_->size(); si != se; ++si)
- if ((*spillIs_)[si]->reg == MO.getReg())
- return false;
+/// isSnippet - Identify if a live interval is a snippet that should be spilled.
+/// It is assumed that SnipLI is a virtual register with the same original as
+/// Edit->getReg().
+bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
+ unsigned Reg = Edit->getReg();
- LiveInterval &LI = lis_.getInterval(MO.getReg());
- const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx);
- if (!OVNI)
+ // A snippet is a tiny live range with only a single instruction using it
+ // besides copies to/from Reg or spills/fills. We accept:
+ //
+ // %snip = COPY %Reg / FILL fi#
+ // %snip = USE %snip
+ // %Reg = COPY %snip / SPILL %snip, fi#
+ //
+ if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI))
+ return false;
+
+ MachineInstr *UseMI = 0;
+
+ // Check that all uses satisfy our criteria.
+ for (MachineRegisterInfo::reg_nodbg_iterator
+ RI = MRI.reg_nodbg_begin(SnipLI.reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+
+ // Allow copies to/from Reg.
+ if (isFullCopyOf(MI, Reg))
continue;
- if (OVNI != LI.getVNInfoAt(UseIdx))
+
+ // Allow stack slot loads.
+ int FI;
+ if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow stack slot stores.
+ if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow a single additional instruction.
+ if (UseMI && MI != UseMI)
return false;
+ UseMI = MI;
}
return true;
}
-/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of
-/// reloading it.
-bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
- SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
- VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx);
- if (!OrigVNI) {
+/// collectRegsToSpill - Collect live range snippets that only have a single
+/// real use.
+void InlineSpiller::collectRegsToSpill() {
+ unsigned Reg = Edit->getReg();
+
+ // Main register always spills.
+ RegsToSpill.assign(1, Reg);
+ SnippetCopies.clear();
+
+ // Snippets all have the same original, so there can't be any for an original
+ // register.
+ if (Original == Reg)
+ return;
+
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+ unsigned SnipReg = isFullCopyOf(MI, Reg);
+ if (!isSibling(SnipReg))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(SnipReg);
+ if (!isSnippet(SnipLI))
+ continue;
+ SnippetCopies.insert(MI);
+ if (isRegToSpill(SnipReg))
+ continue;
+ RegsToSpill.push_back(SnipReg);
+ DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+ ++NumSnippets;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Sibling Values
+//===----------------------------------------------------------------------===//
+
+// After live range splitting, some values to be spilled may be defined by
+// copies from sibling registers. We trace the sibling copies back to the
+// original value if it still exists. We need it for rematerialization.
+//
+// Even when the value can't be rematerialized, we still want to determine if
+// the value has already been spilled, or we may want to hoist the spill from a
+// loop.
+
+bool InlineSpiller::isSibling(unsigned Reg) {
+ return TargetRegisterInfo::isVirtualRegister(Reg) &&
+ VRM.getOriginal(Reg) == Original;
+}
+
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+ const InlineSpiller::SibValueInfo &SVI) {
+ OS << "spill " << PrintReg(SVI.SpillReg) << ':'
+ << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def;
+ if (SVI.SpillMBB)
+ OS << " in BB#" << SVI.SpillMBB->getNumber();
+ if (SVI.AllDefsAreReloads)
+ OS << " all-reloads";
+ if (SVI.DefByOrigPHI)
+ OS << " orig-phi";
+ if (SVI.KillsSource)
+ OS << " kill";
+ OS << " deps[";
+ for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i)
+ OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def;
+ OS << " ]";
+ if (SVI.DefMI)
+ OS << " def: " << *SVI.DefMI;
+ else
+ OS << '\n';
+ return OS;
+}
+#endif
+
+/// propagateSiblingValue - Propagate the value in SVI to dependents if it is
+/// known. Otherwise remember the dependency for later.
+///
+/// @param SVI SibValues entry to propagate.
+/// @param VNI Dependent value, or NULL to propagate to all saved dependents.
+void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI,
+ VNInfo *VNI) {
+ // When VNI is non-NULL, add it to SVI's deps, and only propagate to that.
+ TinyPtrVector<VNInfo*> FirstDeps;
+ if (VNI) {
+ FirstDeps.push_back(VNI);
+ SVI->second.Deps.push_back(VNI);
+ }
+
+ // Has the value been completely determined yet? If not, defer propagation.
+ if (!SVI->second.hasDef())
+ return;
+
+ // Work list of values to propagate. It would be nice to use a SetVector
+ // here, but then we would be forced to use a SmallSet.
+ SmallVector<SibValueMap::iterator, 8> WorkList(1, SVI);
+ SmallPtrSet<VNInfo*, 8> WorkSet;
+
+ do {
+ SVI = WorkList.pop_back_val();
+ WorkSet.erase(SVI->first);
+ TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
+ VNI = 0;
+
+ SibValueInfo &SV = SVI->second;
+ if (!SV.SpillMBB)
+ SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);
+
+ DEBUG(dbgs() << " prop to " << Deps->size() << ": "
+ << SVI->first->id << '@' << SVI->first->def << ":\t" << SV);
+
+ assert(SV.hasDef() && "Propagating undefined value");
+
+ // Should this value be propagated as a preferred spill candidate? We don't
+ // propagate values of registers that are about to spill.
+ bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
+ unsigned SpillDepth = ~0u;
+
+ for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(),
+ DepE = Deps->end(); DepI != DepE; ++DepI) {
+ SibValueMap::iterator DepSVI = SibValues.find(*DepI);
+ assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
+ SibValueInfo &DepSV = DepSVI->second;
+ if (!DepSV.SpillMBB)
+ DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);
+
+ bool Changed = false;
+
+ // Propagate defining instruction.
+ if (!DepSV.hasDef()) {
+ Changed = true;
+ DepSV.DefMI = SV.DefMI;
+ DepSV.DefByOrigPHI = SV.DefByOrigPHI;
+ }
+
+ // Propagate AllDefsAreReloads. For PHI values, this computes an AND of
+ // all predecessors.
+ if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
+ Changed = true;
+ DepSV.AllDefsAreReloads = false;
+ }
+
+ // Propagate best spill value.
+ if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
+ if (SV.SpillMBB == DepSV.SpillMBB) {
+ // DepSV is in the same block. Hoist when dominated.
+ if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) {
+ // This is an alternative def earlier in the same MBB.
+ // Hoist the spill as far as possible in SpillMBB. This can ease
+ // register pressure:
+ //
+ // x = def
+ // y = use x
+ // s = copy x
+ //
+ // Hoisting the spill of s to immediately after the def removes the
+ // interference between x and y:
+ //
+ // x = def
+ // spill x
+ // y = use x<kill>
+ //
+ // This hoist only helps when the DepSV copy kills its source.
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ } else {
+ // DepSV is in a different block.
+ if (SpillDepth == ~0u)
+ SpillDepth = Loops.getLoopDepth(SV.SpillMBB);
+
+ // Also hoist spills to blocks with smaller loop depth, but make sure
+ // that the new value dominates. Non-phi dependents are always
+ // dominated, phis need checking.
+ if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
+ (!DepSVI->first->isPHIDef() ||
+ MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
+ Changed = true;
+ DepSV.SpillReg = SV.SpillReg;
+ DepSV.SpillVNI = SV.SpillVNI;
+ DepSV.SpillMBB = SV.SpillMBB;
+ }
+ }
+ }
+
+ if (!Changed)
+ continue;
+
+ // Something changed in DepSVI. Propagate to dependents.
+ if (WorkSet.insert(DepSVI->first))
+ WorkList.push_back(DepSVI);
+
+ DEBUG(dbgs() << " update " << DepSVI->first->id << '@'
+ << DepSVI->first->def << " to:\t" << DepSV);
+ }
+ } while (!WorkList.empty());
+}
+
+/// traceSiblingValue - Trace a value that is about to be spilled back to the
+/// real defining instructions by looking through sibling copies. Always stay
+/// within the range of OrigVNI so the registers are known to carry the same
+/// value.
+///
+/// Determine if the value is defined by all reloads, so spilling isn't
+/// necessary - the value is already in the stack slot.
+///
+/// Return a defining instruction that may be a candidate for rematerialization.
+///
+MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
+ VNInfo *OrigVNI) {
+ // Check if a cached value already exists.
+ SibValueMap::iterator SVI;
+ bool Inserted;
+ tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI)));
+ if (!Inserted) {
+ DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second);
+ return SVI->second.DefMI;
+ }
+
+ DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
+ << UseVNI->id << '@' << UseVNI->def << '\n');
+
+ // List of (Reg, VNI) that have been inserted into SibValues, but need to be
+ // processed.
+ SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(UseReg, UseVNI));
+
+ do {
+ unsigned Reg;
+ VNInfo *VNI;
+ tie(Reg, VNI) = WorkList.pop_back_val();
+ DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+ << ":\t");
+
+ // First check if this value has already been computed.
+ SVI = SibValues.find(VNI);
+ assert(SVI != SibValues.end() && "Missing SibValues entry");
+
+ // Trace through PHI-defs created by live range splitting.
+ if (VNI->isPHIDef()) {
+ // Stop at original PHIs. We don't know the value at the predecessors.
+ if (VNI->def == OrigVNI->def) {
+ DEBUG(dbgs() << "orig phi value\n");
+ SVI->second.DefByOrigPHI = true;
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
+ continue;
+ }
+
+ // This is a PHI inserted by live range splitting. We could trace the
+ // live-out value from predecessor blocks, but that search can be very
+ // expensive if there are many predecessors and many more PHIs as
+ // generated by tail-dup when it sees an indirectbr. Instead, look at
+ // all the non-PHI defs that have the same value as OrigVNI. They must
+ // jointly dominate VNI->def. This is not optimal since VNI may actually
+ // be jointly dominated by a smaller subset of defs, so there is a change
+ // we will miss a AllDefsAreReloads optimization.
+
+ // Separate all values dominated by OrigVNI into PHIs and non-PHIs.
+ SmallVector<VNInfo*, 8> PHIs, NonPHIs;
+ LiveInterval &LI = LIS.getInterval(Reg);
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+
+ for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI2 = *VI;
+ if (VNI2->isUnused())
+ continue;
+ if (!OrigLI.containsOneValue() &&
+ OrigLI.getVNInfoAt(VNI2->def) != OrigVNI)
+ continue;
+ if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def)
+ PHIs.push_back(VNI2);
+ else
+ NonPHIs.push_back(VNI2);
+ }
+ DEBUG(dbgs() << "split phi value, checking " << PHIs.size()
+ << " phi-defs, and " << NonPHIs.size()
+ << " non-phi/orig defs\n");
+
+ // Create entries for all the PHIs. Don't add them to the worklist, we
+ // are processing all of them in one go here.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i])));
+
+ // Add every PHI as a dependent of all the non-PHIs.
+ for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) {
+ VNInfo *NonPHI = NonPHIs[i];
+ // Known value? Try an insertion.
+ tie(SVI, Inserted) =
+ SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
+ // Add all the PHIs as dependents of NonPHI.
+ for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi)
+ SVI->second.Deps.push_back(PHIs[pi]);
+ // This is the first time we see NonPHI, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(Reg, NonPHI));
+ else
+ // Propagate to all inserted PHIs, not just VNI.
+ propagateSiblingValue(SVI);
+ }
+
+ // Next work list item.
+ continue;
+ }
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Missing def");
+
+ // Trace through sibling copies.
+ if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(SrcReg)) {
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex());
+ assert(SrcLR && "Copy from non-existing value");
+ // Check if this COPY kills its source.
+ SVI->second.KillsSource = (SrcLR->end == VNI->def);
+ VNInfo *SrcVNI = SrcLR->valno;
+ DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
+ << SrcVNI->id << '@' << SrcVNI->def
+ << " kill=" << unsigned(SVI->second.KillsSource) << '\n');
+ // Known sibling source value? Try an insertion.
+ tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI,
+ SibValueInfo(SrcReg, SrcVNI)));
+ // This is the first time we see Src, add it to the worklist.
+ if (Inserted)
+ WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+ propagateSiblingValue(SVI, VNI);
+ // Next work list item.
+ continue;
+ }
+ }
+
+ // Track reachable reloads.
+ SVI->second.DefMI = MI;
+ SVI->second.SpillMBB = MI->getParent();
+ int FI;
+ if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << "reload\n");
+ propagateSiblingValue(SVI);
+ // Next work list item.
+ continue;
+ }
+
+ // Potential remat candidate.
+ DEBUG(dbgs() << "def " << *MI);
+ SVI->second.AllDefsAreReloads = false;
+ propagateSiblingValue(SVI);
+ } while (!WorkList.empty());
+
+ // Look up the value we were looking for. We already did this lokup at the
+ // top of the function, but SibValues may have been invalidated.
+ SVI = SibValues.find(UseVNI);
+ assert(SVI != SibValues.end() && "Didn't compute requested info");
+ DEBUG(dbgs() << " traced to:\t" << SVI->second);
+ return SVI->second.DefMI;
+}
+
+/// analyzeSiblingValues - Trace values defined by sibling copies back to
+/// something that isn't a sibling copy.
+///
+/// Keep track of values that may be rematerializable.
+void InlineSpiller::analyzeSiblingValues() {
+ SibValues.clear();
+
+ // No siblings at all?
+ if (Edit->getReg() == Original)
+ return;
+
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
+ VE = LI.vni_end(); VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = 0;
+ // Check possible sibling copies.
+ if (VNI->isPHIDef() || VNI->getCopy()) {
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ assert(OrigVNI && "Def outside original live range");
+ if (OrigVNI->def != VNI->def)
+ DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
+ }
+ if (!DefMI && !VNI->isPHIDef())
+ DefMI = LIS.getInstructionFromIndex(VNI->def);
+ if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) {
+ DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
+ << VNI->def << " may remat from " << *DefMI);
+ }
+ }
+ }
+}
+
+/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
+/// a spill at a better location.
+bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
+ SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+ VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex());
+ assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy");
+ SibValueMap::iterator I = SibValues.find(VNI);
+ if (I == SibValues.end())
+ return false;
+
+ const SibValueInfo &SVI = I->second;
+
+ // Let the normal folding code deal with the boring case.
+ if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
+ return false;
+
+ // SpillReg may have been deleted by remat and DCE.
+ if (!LIS.hasInterval(SVI.SpillReg)) {
+ DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
+ return false;
+ }
+
+ LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
+ if (!SibLI.containsValue(SVI.SpillVNI)) {
+ DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
+ SibValues.erase(I);
+ return false;
+ }
+
+ // Conservatively extend the stack slot range to the range of the original
+ // value. We may be able to do better with stack slot coloring by being more
+ // careful here.
+ assert(StackInt && "No stack slot assigned yet.");
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
+ << *StackInt << '\n');
+
+ // Already spilled everywhere.
+ if (SVI.AllDefsAreReloads) {
+ DEBUG(dbgs() << "\tno spill needed: " << SVI);
+ ++NumOmitReloadSpill;
+ return true;
+ }
+ // We are going to spill SVI.SpillVNI immediately after its def, so clear out
+ // any later spills of the same value.
+ eliminateRedundantSpills(SibLI, SVI.SpillVNI);
+
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
+ MachineBasicBlock::iterator MII;
+ if (SVI.SpillVNI->isPHIDef())
+ MII = MBB->SkipPHIsAndLabels(MBB->begin());
+ else {
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+ assert(DefMI && "Defining instruction disappeared");
+ MII = DefMI;
+ ++MII;
+ }
+ // Insert spill without kill flag immediately after def.
+ TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
+ MRI.getRegClass(SVI.SpillReg), &TRI);
+ --MII; // Point to store instruction.
+ LIS.InsertMachineInstrInMaps(MII);
+ VRM.addSpillSlotUse(StackSlot, MII);
+ DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
+
+ ++NumSpills;
+ ++NumHoists;
+ return true;
+}
+
+/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any
+/// redundant spills of this value in SLI.reg and sibling copies.
+void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
+ assert(VNI && "Missing value");
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(&SLI, VNI));
+ assert(StackInt && "No stack slot assigned yet.");
+
+ do {
+ LiveInterval *LI;
+ tie(LI, VNI) = WorkList.pop_back_val();
+ unsigned Reg = LI->reg;
+ DEBUG(dbgs() << "Checking redundant spills for "
+ << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
+
+ // Regs to spill are taken care of.
+ if (isRegToSpill(Reg))
+ continue;
+
+ // Add all of VNI's live range to StackInt.
+ StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
+
+ // Find all spills and copies of VNI.
+ for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
+ MachineInstr *MI = UI.skipInstruction();) {
+ if (!MI->isCopy() && !MI->getDesc().mayStore())
+ continue;
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (LI->getVNInfoAt(Idx) != VNI)
+ continue;
+
+ // Follow sibling copies down the dominator tree.
+ if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(DstReg)) {
+ LiveInterval &DstLI = LIS.getInterval(DstReg);
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex());
+ assert(DstVNI && "Missing defined value");
+ assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot");
+ WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+ }
+ continue;
+ }
+
+ // Erase spills.
+ int FI;
+ if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI);
+ // eliminateDeadDefs won't normally remove stores, so switch opcode.
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ DeadDefs.push_back(MI);
+ ++NumSpillsRemoved;
+ --NumSpills;
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Rematerialization
+//===----------------------------------------------------------------------===//
+
+/// markValueUsed - Remember that VNI failed to rematerialize, so its defining
+/// instruction cannot be eliminated. See through snippet copies
+void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(LI, VNI));
+ do {
+ tie(LI, VNI) = WorkList.pop_back_val();
+ if (!UsedValues.insert(VNI))
+ continue;
+
+ if (VNI->isPHIDef()) {
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+ if (PVNI)
+ WorkList.push_back(std::make_pair(LI, PVNI));
+ }
+ continue;
+ }
+
+ // Follow snippet copies.
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ if (!SnippetCopies.count(MI))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
+ assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
+ VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex());
+ assert(SnipVNI && "Snippet undefined before copy");
+ WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
+ } while (!WorkList.empty());
+}
+
+/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
+bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
+ MachineBasicBlock::iterator MI) {
+ SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex();
+ VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
+
+ if (!ParentVNI) {
DEBUG(dbgs() << "\tadding <undef> flags: ");
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg)
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
MO.setIsUndef();
}
DEBUG(dbgs() << UseIdx << '\t' << *MI);
return true;
}
- if (!reMattable_.count(OrigVNI)) {
- DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": "
- << UseIdx << '\t' << *MI);
+
+ if (SnippetCopies.count(MI))
return false;
- }
- MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def);
- if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) {
- usedValues_.insert(OrigVNI);
+
+ // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
+ if (SibI != SibValues.end())
+ RM.OrigMI = SibI->second.DefMI;
+ if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) {
+ markValueUsed(&VirtReg, ParentVNI);
DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
return false;
}
- // If the instruction also writes li_->reg, it had better not require the same
- // register for uses and defs.
+ // If the instruction also writes VirtReg.reg, it had better not require the
+ // same register for uses and defs.
bool Reads, Writes;
SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops);
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops);
if (Writes) {
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i]);
if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
- usedValues_.insert(OrigVNI);
+ markValueUsed(&VirtReg, ParentVNI);
DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
return false;
}
}
}
+ // Before rematerializing into a register for a single instruction, try to
+ // fold a load into the instruction. That avoids allocating a new register.
+ if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+ foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+ Edit->markRematerialized(RM.ParentVNI);
+ ++NumFoldedLoads;
+ return true;
+ }
+
// Alocate a new register for the remat.
- unsigned NewVReg = mri_.createVirtualRegister(rc_);
- vrm_.grow();
- LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM);
NewLI.markNotSpillable();
- newIntervals_->push_back(&NewLI);
// Finally we can rematerialize OrigMI before MI.
- MachineBasicBlock &MBB = *MI->getParent();
- tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_);
- MachineBasicBlock::iterator RematMI = MI;
- SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex();
- DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI);
+ SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+ LIS, TII, TRI);
+ DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
+ << *LIS.getInstructionFromIndex(DefIdx));
// Replace operands
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i]);
- if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) {
- MO.setReg(NewVReg);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
+ MO.setReg(NewLI.reg);
MO.setIsKill();
}
}
DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
- VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true,
- lis_.getVNInfoAllocator());
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ ++NumRemats;
return true;
}
-/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible,
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
- // Do a quick scan of the interval values to find if any are remattable.
- reMattable_.clear();
- usedValues_.clear();
- for (LiveInterval::const_vni_iterator I = li_->vni_begin(),
- E = li_->vni_end(); I != E; ++I) {
- VNInfo *VNI = *I;
- if (VNI->isUnused() || !VNI->isDefAccurate())
- continue;
- MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
- if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI))
- continue;
- reMattable_.insert(VNI);
- }
-
- // Often, no defs are remattable.
- if (reMattable_.empty())
+ // analyzeSiblingValues has already tested all relevant defining instructions.
+ if (!Edit->anyRematerializable(LIS, TII, AA))
return;
- // Try to remat before all uses of li_->reg.
- bool anyRemat = false;
- for (MachineRegisterInfo::use_nodbg_iterator
- RI = mri_.use_nodbg_begin(li_->reg);
- MachineInstr *MI = RI.skipInstruction();)
- anyRemat |= reMaterializeFor(MI);
+ UsedValues.clear();
+ // Try to remat before all uses of snippets.
+ bool anyRemat = false;
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (MachineRegisterInfo::use_nodbg_iterator
+ RI = MRI.use_nodbg_begin(Reg);
+ MachineInstr *MI = RI.skipInstruction();)
+ anyRemat |= reMaterializeFor(LI, MI);
+ }
if (!anyRemat)
return;
// Remove any values that were completely rematted.
- bool anyRemoved = false;
- for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(),
- E = reMattable_.end(); I != E; ++I) {
- VNInfo *VNI = *I;
- if (VNI->hasPHIKill() || usedValues_.count(VNI))
- continue;
- MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
- DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
- lis_.RemoveMachineInstrFromMaps(DefMI);
- vrm_.RemoveMachineInstrFromMaps(DefMI);
- DefMI->eraseFromParent();
- VNI->setIsDefAccurate(false);
- anyRemoved = true;
- }
-
- if (!anyRemoved)
- return;
-
- // Removing values may cause debug uses where li_ is not live.
- for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg);
- MachineInstr *MI = RI.skipInstruction();) {
- if (!MI->isDebugValue())
- continue;
- // Try to preserve the debug value if li_ is live immediately after it.
- MachineBasicBlock::iterator NextMI = MI;
- ++NextMI;
- if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
- VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI));
- if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ MI->addRegisterDead(Reg, &TRI);
+ if (!MI->allDefsAreDead())
+ continue;
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ DeadDefs.push_back(MI);
}
- DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI);
- MI->eraseFromParent();
}
+
+ // Eliminate dead code after remat. Note that some snippet copies may be
+ // deleted here.
+ if (DeadDefs.empty())
+ return;
+ DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
+ Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+
+ // Get rid of deleted and empty intervals.
+ for (unsigned i = RegsToSpill.size(); i != 0; --i) {
+ unsigned Reg = RegsToSpill[i-1];
+ if (!LIS.hasInterval(Reg)) {
+ RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ continue;
+ }
+ LiveInterval &LI = LIS.getInterval(Reg);
+ if (!LI.empty())
+ continue;
+ Edit->eraseVirtReg(Reg, LIS);
+ RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ }
+ DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
}
-/// If MI is a load or store of stackSlot_, it can be removed.
-bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
+
+//===----------------------------------------------------------------------===//
+// Spilling
+//===----------------------------------------------------------------------===//
+
+/// If MI is a load or store of StackSlot, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
int FI = 0;
- unsigned reg;
- if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) &&
- !(reg = tii_.isStoreToStackSlot(MI, FI)))
- return false;
+ unsigned InstrReg = TII.isLoadFromStackSlot(MI, FI);
+ bool IsLoad = InstrReg;
+ if (!IsLoad)
+ InstrReg = TII.isStoreToStackSlot(MI, FI);
// We have a stack access. Is it the right register and slot?
- if (reg != li_->reg || FI != stackSlot_)
+ if (InstrReg != Reg || FI != StackSlot)
return false;
DEBUG(dbgs() << "Coalescing stack access: " << *MI);
- lis_.RemoveMachineInstrFromMaps(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
MI->eraseFromParent();
+
+ if (IsLoad) {
+ ++NumReloadsRemoved;
+ --NumReloads;
+ } else {
+ ++NumSpillsRemoved;
+ --NumSpills;
+ }
+
return true;
}
/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
-/// Return true on success, and MI will be erased.
+/// @param MI Instruction using or defining the current register.
+/// @param Ops Operand indices from readsWritesVirtualRegister().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return True on success, and MI will be erased.
bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops) {
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) {
+ bool WasCopy = MI->isCopy();
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
@@ -328,79 +1028,72 @@
// FIXME: Teach targets to deal with subregs.
if (MO.getSubReg())
return false;
+ // We cannot fold a load instruction into a def.
+ if (LoadMI && MO.isDef())
+ return false;
// Tied use operands should not be passed to foldMemoryOperand.
if (!MI->isRegTiedToDefOperand(Idx))
FoldOps.push_back(Idx);
}
- MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
+ MachineInstr *FoldMI =
+ LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
+ : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
if (!FoldMI)
return false;
- lis_.ReplaceMachineInstrInMaps(MI, FoldMI);
- vrm_.addSpillSlotUse(stackSlot_, FoldMI);
+ LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
+ if (!LoadMI)
+ VRM.addSpillSlotUse(StackSlot, FoldMI);
MI->eraseFromParent();
DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+ if (!WasCopy)
+ ++NumFolded;
+ else if (Ops.front() == 0)
+ ++NumSpills;
+ else
+ ++NumReloads;
return true;
}
/// insertReload - Insert a reload of NewLI.reg before MI.
void InlineSpiller::insertReload(LiveInterval &NewLI,
+ SlotIndex Idx,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
- SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
- tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_);
+ TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
+ MRI.getRegClass(NewLI.reg), &TRI);
--MI; // Point to load instruction.
- SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
- vrm_.addSpillSlotUse(stackSlot_, MI);
+ SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
+ VRM.addSpillSlotUse(StackSlot, MI);
DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
- VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true,
- lis_.getVNInfoAllocator());
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
+ LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+ ++NumReloads;
}
/// insertSpill - Insert a spill of NewLI.reg after MI.
-void InlineSpiller::insertSpill(LiveInterval &NewLI,
- MachineBasicBlock::iterator MI) {
+void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+ SlotIndex Idx, MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
- SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
- tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_);
+ TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
+ MRI.getRegClass(NewLI.reg), &TRI);
--MI; // Point to store instruction.
- SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
- vrm_.addSpillSlotUse(stackSlot_, MI);
+ SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
+ VRM.addSpillSlotUse(StackSlot, MI);
DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
- VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true,
- lis_.getVNInfoAllocator());
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+ ++NumSpills;
}
-void InlineSpiller::spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex) {
- DEBUG(dbgs() << "Inline spilling " << *li << "\n");
- assert(li->isSpillable() && "Attempting to spill already spilled value.");
- assert(!li->isStackSlot() && "Trying to spill a stack slot.");
+/// spillAroundUses - insert spill code around each use of Reg.
+void InlineSpiller::spillAroundUses(unsigned Reg) {
+ DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
+ LiveInterval &OldLI = LIS.getInterval(Reg);
- li_ = li;
- newIntervals_ = &newIntervals;
- rc_ = mri_.getRegClass(li->reg);
- spillIs_ = &spillIs;
-
- if (split())
- return;
-
- reMaterializeAll();
-
- // Remat may handle everything.
- if (li_->empty())
- return;
-
- stackSlot_ = vrm_.getStackSlot(li->reg);
- if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
- stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
-
- // Iterate over instructions using register.
- for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg);
+ // Iterate over instructions using Reg.
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
MachineInstr *MI = RI.skipInstruction();) {
// Debug values are not allowed to affect codegen.
@@ -409,7 +1102,7 @@
uint64_t Offset = MI->getOperand(1).getImm();
const MDNode *MDPtr = MI->getOperand(2).getMetadata();
DebugLoc DL = MI->getDebugLoc();
- if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_,
+ if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot,
Offset, MDPtr, DL)) {
DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
MachineBasicBlock *MBB = MI->getParent();
@@ -421,14 +1114,50 @@
continue;
}
+ // Ignore copies to/from snippets. We'll delete them.
+ if (SnippetCopies.count(MI))
+ continue;
+
// Stack slot accesses may coalesce away.
- if (coalesceStackAccess(MI))
+ if (coalesceStackAccess(MI, Reg))
continue;
// Analyze instruction.
bool Reads, Writes;
SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops);
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops);
+
+ // Find the slot index where this instruction reads and writes OldLI.
+ // This is usually the def slot, except for tied early clobbers.
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+ if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex()))
+ if (SlotIndex::isSameInstr(Idx, VNI->def))
+ Idx = VNI->def;
+
+ // Check for a sibling copy.
+ unsigned SibReg = isFullCopyOf(MI, Reg);
+ if (SibReg && isSibling(SibReg)) {
+ // This may actually be a copy between snippets.
+ if (isRegToSpill(SibReg)) {
+ DEBUG(dbgs() << "Found new snippet copy: " << *MI);
+ SnippetCopies.insert(MI);
+ continue;
+ }
+ if (Writes) {
+ // Hoist the spill of a sib-reg copy.
+ if (hoistSpill(OldLI, MI)) {
+ // This COPY is now dead, the value is already in the stack slot.
+ MI->getOperand(0).setIsDead();
+ DeadDefs.push_back(MI);
+ continue;
+ }
+ } else {
+ // This is a reload for a sib-reg copy. Drop spills downstream.
+ LiveInterval &SibLI = LIS.getInterval(SibReg);
+ eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
+ // The COPY will fold to a reload below.
+ }
+ }
// Attempt to fold memory ops.
if (foldMemoryOperand(MI, Ops))
@@ -436,19 +1165,17 @@
// Allocate interval around instruction.
// FIXME: Infer regclass from instruction alone.
- unsigned NewVReg = mri_.createVirtualRegister(rc_);
- vrm_.grow();
- LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM);
NewLI.markNotSpillable();
if (Reads)
- insertReload(NewLI, MI);
+ insertReload(NewLI, Idx, MI);
// Rewrite instruction operands.
bool hasLiveDef = false;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i]);
- MO.setReg(NewVReg);
+ MO.setReg(NewLI.reg);
if (MO.isUse()) {
if (!MI->isRegTiedToDefOperand(Ops[i]))
MO.setIsKill();
@@ -457,12 +1184,95 @@
hasLiveDef = true;
}
}
+ DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
// FIXME: Use a second vreg if instruction has no tied ops.
- if (Writes && hasLiveDef)
- insertSpill(NewLI, MI);
+ if (Writes) {
+ if (hasLiveDef)
+ insertSpill(NewLI, OldLI, Idx, MI);
+ else {
+ // This instruction defines a dead value. We don't need to spill it,
+ // but do create a live range for the dead value.
+ VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI));
+ }
+ }
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
- newIntervals.push_back(&NewLI);
}
}
+
+/// spillAll - Spill all registers remaining after rematerialization.
+void InlineSpiller::spillAll() {
+ // Update LiveStacks now that we are committed to spilling.
+ if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
+ StackSlot = VRM.assignVirt2StackSlot(Original);
+ StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
+ StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator());
+ } else
+ StackInt = &LSS.getInterval(StackSlot);
+
+ if (Original != Edit->getReg())
+ VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
+
+ assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]),
+ StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
+
+ // Spill around uses of all RegsToSpill.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ spillAroundUses(RegsToSpill[i]);
+
+ // Hoisted spills may cause dead code.
+ if (!DeadDefs.empty()) {
+ DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
+ Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+ }
+
+ // Finally delete the SnippetCopies.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]);
+ MachineInstr *MI = RI.skipInstruction();) {
+ assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
+ // FIXME: Do this with a LiveRangeEdit callback.
+ VRM.RemoveMachineInstrFromMaps(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
+ }
+
+ // Delete all spilled registers.
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+ Edit->eraseVirtReg(RegsToSpill[i], LIS);
+}
+
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+ ++NumSpilledRanges;
+ Edit = &edit;
+ assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+ && "Trying to spill a stack slot.");
+ // Share a stack slot among all descendants of Original.
+ Original = VRM.getOriginal(edit.getReg());
+ StackSlot = VRM.getStackSlot(Original);
+ StackInt = 0;
+
+ DEBUG(dbgs() << "Inline spilling "
+ << MRI.getRegClass(edit.getReg())->getName()
+ << ':' << edit.getParent() << "\nFrom original "
+ << LIS.getInterval(Original) << '\n');
+ assert(edit.getParent().isSpillable() &&
+ "Attempting to spill already spilled value.");
+ assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
+
+ collectRegsToSpill();
+ analyzeSiblingValues();
+ reMaterializeAll();
+
+ // Remat may handle everything.
+ if (!RegsToSpill.empty())
+ spillAll();
+
+ Edit->calculateRegClassAndHint(MF, LIS, Loops);
+}
diff --git a/src/LLVM/lib/CodeGen/InterferenceCache.cpp b/src/LLVM/lib/CodeGen/InterferenceCache.cpp
new file mode 100644
index 0000000..29b47bd
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/InterferenceCache.cpp
@@ -0,0 +1,169 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "InterferenceCache.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// Static member used for null interference cursors.
+InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
+
+void InterferenceCache::init(MachineFunction *mf,
+ LiveIntervalUnion *liuarray,
+ SlotIndexes *indexes,
+ const TargetRegisterInfo *tri) {
+ MF = mf;
+ LIUArray = liuarray;
+ TRI = tri;
+ PhysRegEntries.assign(TRI->getNumRegs(), 0);
+ for (unsigned i = 0; i != CacheEntries; ++i)
+ Entries[i].clear(mf, indexes);
+}
+
+InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
+ unsigned E = PhysRegEntries[PhysReg];
+ if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
+ if (!Entries[E].valid(LIUArray, TRI))
+ Entries[E].revalidate();
+ return &Entries[E];
+ }
+ // No valid entry exists, pick the next round-robin entry.
+ E = RoundRobin;
+ if (++RoundRobin == CacheEntries)
+ RoundRobin = 0;
+ for (unsigned i = 0; i != CacheEntries; ++i) {
+ // Skip entries that are in use.
+ if (Entries[E].hasRefs()) {
+ if (++E == CacheEntries)
+ E = 0;
+ continue;
+ }
+ Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+ PhysRegEntries[PhysReg] = E;
+ return &Entries[E];
+ }
+ llvm_unreachable("Ran out of interference cache entries.");
+}
+
+/// revalidate - LIU contents have changed, update tags.
+void InterferenceCache::Entry::revalidate() {
+ // Invalidate all block entries.
+ ++Tag;
+ // Invalidate all iterators.
+ PrevPos = SlotIndex();
+ for (unsigned i = 0, e = Aliases.size(); i != e; ++i)
+ Aliases[i].second = Aliases[i].first->getTag();
+}
+
+void InterferenceCache::Entry::reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF) {
+ assert(!hasRefs() && "Cannot reset cache entry with references");
+ // LIU's changed, invalidate cache.
+ ++Tag;
+ PhysReg = physReg;
+ Blocks.resize(MF->getNumBlockIDs());
+ Aliases.clear();
+ for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
+ LiveIntervalUnion *LIU = LIUArray + *AS;
+ Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
+ }
+
+ // Reset iterators.
+ PrevPos = SlotIndex();
+ unsigned e = Aliases.size();
+ Iters.resize(e);
+ for (unsigned i = 0; i != e; ++i)
+ Iters[i].setMap(Aliases[i].first->getMap());
+}
+
+bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ unsigned i = 0, e = Aliases.size();
+ for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
+ LiveIntervalUnion *LIU = LIUArray + *AS;
+ if (i == e || Aliases[i].first != LIU)
+ return false;
+ if (LIU->changedSince(Aliases[i].second))
+ return false;
+ }
+ return i == e;
+}
+
+void InterferenceCache::Entry::update(unsigned MBBNum) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+
+ // Use advanceTo only when possible.
+ if (PrevPos != Start) {
+ if (!PrevPos.isValid() || Start < PrevPos)
+ for (unsigned i = 0, e = Iters.size(); i != e; ++i)
+ Iters[i].find(Start);
+ else
+ for (unsigned i = 0, e = Iters.size(); i != e; ++i)
+ Iters[i].advanceTo(Start);
+ PrevPos = Start;
+ }
+
+ MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+ BlockInterference *BI = &Blocks[MBBNum];
+ for (;;) {
+ BI->Tag = Tag;
+ BI->First = BI->Last = SlotIndex();
+
+ // Check for first interference.
+ for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
+ Iter &I = Iters[i];
+ if (!I.valid())
+ continue;
+ SlotIndex StartI = I.start();
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ PrevPos = Stop;
+ if (BI->First.isValid())
+ break;
+
+ // No interference in this block? Go ahead and precompute the next block.
+ if (++MFI == MF->end())
+ return;
+ MBBNum = MFI->getNumber();
+ BI = &Blocks[MBBNum];
+ if (BI->Tag == Tag)
+ return;
+ tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+ }
+
+ // Check for last interference in block.
+ for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
+ Iter &I = Iters[i];
+ if (!I.valid() || I.start() >= Stop)
+ continue;
+ I.advanceTo(Stop);
+ bool Backup = !I.valid() || I.start() >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I.stop();
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+}
diff --git a/src/LLVM/lib/CodeGen/InterferenceCache.h b/src/LLVM/lib/CodeGen/InterferenceCache.h
new file mode 100644
index 0000000..4df0a9e
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/InterferenceCache.h
@@ -0,0 +1,205 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INTERFERENCECACHE
+#define LLVM_CODEGEN_INTERFERENCECACHE
+
+#include "LiveIntervalUnion.h"
+
+namespace llvm {
+
+class InterferenceCache {
+ const TargetRegisterInfo *TRI;
+ LiveIntervalUnion *LIUArray;
+ SlotIndexes *Indexes;
+ MachineFunction *MF;
+
+ /// BlockInterference - information about the interference in a single basic
+ /// block.
+ struct BlockInterference {
+ BlockInterference() : Tag(0) {}
+ unsigned Tag;
+ SlotIndex First;
+ SlotIndex Last;
+ };
+
+ /// Entry - A cache entry containing interference information for all aliases
+ /// of PhysReg in all basic blocks.
+ class Entry {
+ /// PhysReg - The register currently represented.
+ unsigned PhysReg;
+
+ /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
+ /// change.
+ unsigned Tag;
+
+ /// RefCount - The total number of Cursor instances referring to this Entry.
+ unsigned RefCount;
+
+ /// MF - The current function.
+ MachineFunction *MF;
+
+ /// Indexes - Mapping block numbers to SlotIndex ranges.
+ SlotIndexes *Indexes;
+
+ /// PrevPos - The previous position the iterators were moved to.
+ SlotIndex PrevPos;
+
+ /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of
+ /// PhysReg.
+ SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases;
+
+ typedef LiveIntervalUnion::SegmentIter Iter;
+
+ /// Iters - an iterator for each alias
+ SmallVector<Iter, 8> Iters;
+
+ /// Blocks - Interference for each block in the function.
+ SmallVector<BlockInterference, 8> Blocks;
+
+ /// update - Recompute Blocks[MBBNum]
+ void update(unsigned MBBNum);
+
+ public:
+ Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {}
+
+ void clear(MachineFunction *mf, SlotIndexes *indexes) {
+ assert(!hasRefs() && "Cannot clear cache entry with references");
+ PhysReg = 0;
+ MF = mf;
+ Indexes = indexes;
+ }
+
+ unsigned getPhysReg() const { return PhysReg; }
+
+ void addRef(int Delta) { RefCount += Delta; }
+
+ bool hasRefs() const { return RefCount > 0; }
+
+ void revalidate();
+
+ /// valid - Return true if this is a valid entry for physReg.
+ bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// reset - Initialize entry to represent physReg's aliases.
+ void reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF);
+
+ /// get - Return an up to date BlockInterference.
+ BlockInterference *get(unsigned MBBNum) {
+ if (Blocks[MBBNum].Tag != Tag)
+ update(MBBNum);
+ return &Blocks[MBBNum];
+ }
+ };
+
+ // We don't keep a cache entry for every physical register, that would use too
+ // much memory. Instead, a fixed number of cache entries are used in a round-
+ // robin manner.
+ enum { CacheEntries = 32 };
+
+ // Point to an entry for each physreg. The entry pointed to may not be up to
+ // date, and it may have been reused for a different physreg.
+ SmallVector<unsigned char, 2> PhysRegEntries;
+
+ // Next round-robin entry to be picked.
+ unsigned RoundRobin;
+
+ // The actual cache entries.
+ Entry Entries[CacheEntries];
+
+ // get - Get a valid entry for PhysReg.
+ Entry *get(unsigned PhysReg);
+
+public:
+ InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {}
+
+ /// init - Prepare cache for a new function.
+ void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
+ const TargetRegisterInfo *);
+
+ /// getMaxCursors - Return the maximum number of concurrent cursors that can
+ /// be supported.
+ unsigned getMaxCursors() const { return CacheEntries; }
+
+ /// Cursor - The primary query interface for the block interference cache.
+ class Cursor {
+ Entry *CacheEntry;
+ BlockInterference *Current;
+ static BlockInterference NoInterference;
+
+ void setEntry(Entry *E) {
+ Current = 0;
+ // Update reference counts. Nothing happens when RefCount reaches 0, so
+ // we don't have to check for E == CacheEntry etc.
+ if (CacheEntry)
+ CacheEntry->addRef(-1);
+ CacheEntry = E;
+ if (CacheEntry)
+ CacheEntry->addRef(+1);
+ }
+
+ public:
+ /// Cursor - Create a dangling cursor.
+ Cursor() : CacheEntry(0), Current(0) {}
+ ~Cursor() { setEntry(0); }
+
+ Cursor(const Cursor &O) : CacheEntry(0), Current(0) {
+ setEntry(O.CacheEntry);
+ }
+
+ Cursor &operator=(const Cursor &O) {
+ setEntry(O.CacheEntry);
+ return *this;
+ }
+
+ /// setPhysReg - Point this cursor to PhysReg's interference.
+ void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+ // Release reference before getting a new one. That guarantees we can
+ // actually have CacheEntries live cursors.
+ setEntry(0);
+ if (PhysReg)
+ setEntry(Cache.get(PhysReg));
+ }
+
+ /// moveTo - Move cursor to basic block MBBNum.
+ void moveToBlock(unsigned MBBNum) {
+ Current = CacheEntry ? CacheEntry->get(MBBNum) : &NoInterference;
+ }
+
+ /// hasInterference - Return true if the current block has any interference.
+ bool hasInterference() {
+ return Current->First.isValid();
+ }
+
+ /// first - Return the starting index of the first interfering range in the
+ /// current block.
+ SlotIndex first() {
+ return Current->First;
+ }
+
+ /// last - Return the ending index of the last interfering range in the
+ /// current block.
+ SlotIndex last() {
+ return Current->Last;
+ }
+ };
+
+ friend class Cursor;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/src/LLVM/lib/CodeGen/IntrinsicLowering.cpp b/src/LLVM/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 0000000..0f92c2d
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,570 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ Type *RetTy) {
+ // Insert a correctly-typed definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back(I->getType());
+ M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+ const char *FName,
+ const char *DName, const char *LDName) {
+ // Insert definitions for all the floating point types.
+ switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getFloatTy(M.getContext()));
+ break;
+ case Type::DoubleTyID:
+ EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+ Type::getDoubleTy(M.getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
+ Fn->arg_begin()->getType());
+ break;
+ }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function. This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ Type *RetTy) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getParent()->getParent()->getParent();
+ // Get or insert the definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ Constant* FCache = M->getOrInsertFunction(NewFn,
+ FunctionType::get(RetTy, ParamTys, false));
+
+ IRBuilder<> Builder(CI->getParent(), CI);
+ SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args);
+ NewCI->setName(CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+ LLVMContext &Context = M.getContext();
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration() && !I->use_empty())
+ switch (I->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::setjmp:
+ EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ Type::getInt32Ty(M.getContext()));
+ break;
+ case Intrinsic::longjmp:
+ EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::siglongjmp:
+ EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::memcpy:
+ M.getOrInsertFunction("memcpy",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memmove:
+ M.getOrInsertFunction("memmove",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::memset:
+ M.getOrInsertFunction("memset",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt32Ty(M.getContext()),
+ TD.getIntPtrType(Context), (Type *)0);
+ break;
+ case Intrinsic::sqrt:
+ EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+ break;
+ case Intrinsic::sin:
+ EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+ break;
+ case Intrinsic::cos:
+ EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+ break;
+ case Intrinsic::pow:
+ EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+ break;
+ case Intrinsic::log:
+ EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+ break;
+ case Intrinsic::log2:
+ EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+ break;
+ case Intrinsic::log10:
+ EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+ break;
+ case Intrinsic::exp:
+ EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+ break;
+ case Intrinsic::exp2:
+ EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+ break;
+ }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ switch(BitSize) {
+ default: llvm_unreachable("Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.1");
+ V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.4");
+ Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.3");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
+ "bswap.1");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+ "bswap.and2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+ V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+ "bswap.8");
+ Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+ "bswap.7");
+ Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.6");
+ Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.5");
+ Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.4");
+ Value* Tmp3 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 24),
+ "bswap.3");
+ Value* Tmp2 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 40),
+ "bswap.2");
+ Value* Tmp1 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 56),
+ "bswap.1");
+ Tmp7 = Builder.CreateAnd(Tmp7,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000000000ULL),
+ "bswap.and7");
+ Tmp6 = Builder.CreateAnd(Tmp6,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000000000ULL),
+ "bswap.and6");
+ Tmp5 = Builder.CreateAnd(Tmp5,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00000000ULL),
+ "bswap.and5");
+ Tmp4 = Builder.CreateAnd(Tmp4,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000ULL),
+ "bswap.and4");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000ULL),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00ULL),
+ "bswap.and2");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+ Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+ V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+ break;
+ }
+ }
+ return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+ Value *VShift = Builder.CreateLShr(PartValue,
+ ConstantInt::get(V->getType(), i),
+ "ctpop.sh");
+ Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+ PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+ }
+ Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+ if (BitSize > 64) {
+ V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh");
+ BitSize -= 64;
+ }
+ }
+
+ return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+ V = Builder.CreateOr(V, ShVal, "ctlz.step");
+ }
+
+ V = Builder.CreateNot(V);
+ return LowerCTPOP(Context, V, IP);
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+ const char *Dname,
+ const char *LDname) {
+ CallSite CS(CI);
+ switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid type in intrinsic");
+ case Type::FloatTyID:
+ ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getFloatTy(CI->getContext()));
+ break;
+ case Type::DoubleTyID:
+ ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getDoubleTy(CI->getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
+ CI->getArgOperand(0)->getType());
+ break;
+ }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+ LLVMContext &Context = CI->getContext();
+
+ const Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ CallSite CS(CI);
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
+ Callee->getName() + "'!");
+ default:
+ report_fatal_error("Code generator does not support intrinsic function '"+
+ Callee->getName()+"'!");
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ Value *V = CI->getArgOperand(0);
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+
+ // The setjmp/longjmp intrinsics should only exist in the code if it was
+ // never optimized (ie, right out of the CFE), or if it has been hacked on
+ // by the lowerinvoke pass. In both cases, the right thing to do is to
+ // convert the call to an explicit setjmp or longjmp call.
+ case Intrinsic::setjmp: {
+ Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getInt32Ty(Context));
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+ case Intrinsic::sigsetjmp:
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::longjmp: {
+ ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+
+ case Intrinsic::siglongjmp: {
+ // Insert the call to abort
+ ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getArgOperand(0);
+ Value *NotSrc = Builder.CreateNot(Src);
+ NotSrc->setName(Src->getName() + ".not");
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = Builder.CreateSub(Src, SrcM1);
+ Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ if (!Warned)
+ errs() << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ errs() << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(ConstantPointerNull::get(
+ cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+ break;
+ }
+
+ case Intrinsic::dbg_declare:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_exception:
+ case Intrinsic::eh_selector:
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::eh_typeid_for:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::var_annotation:
+ break; // Strip out annotate intrinsic
+
+ case Intrinsic::memcpy: {
+ IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memmove: {
+ IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memset: {
+ IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ // Extend the amount to i32.
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt32Ty(Context),
+ /* isSigned */ false);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::sqrt: {
+ ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
+ break;
+ }
+ case Intrinsic::log: {
+ ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
+ break;
+ }
+ case Intrinsic::log2: {
+ ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
+ break;
+ }
+ case Intrinsic::log10: {
+ ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
+ break;
+ }
+ case Intrinsic::exp: {
+ ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
+ break;
+ }
+ case Intrinsic::exp2: {
+ ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
+ break;
+ }
+ case Intrinsic::pow: {
+ ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
+ break;
+ }
+ case Intrinsic::flt_rounds:
+ // Lower to "round to the nearest"
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ break;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ break;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+ // Verify this is a simple bswap.
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
+ !CI->getType()->isIntegerTy())
+ return false;
+
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+
+ Value *Op = CI->getArgOperand(0);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/src/LLVM/lib/CodeGen/LLVMCodeGen.vcxproj b/src/LLVM/lib/CodeGen/LLVMCodeGen.vcxproj
new file mode 100644
index 0000000..74fd4c7
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LLVMCodeGen.vcxproj
@@ -0,0 +1,482 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Profile|Win32">
+ <Configuration>Profile</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Profile|x64">
+ <Configuration>Profile</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGUID>{7BE5F26F-0525-42BB-AAED-56C5B4582B99}</ProjectGUID>
+ <Keyword>Win32Proj</Keyword>
+ <Platform>Win32</Platform>
+ <ProjectName>LLVMCodeGen</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)</TargetName>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.lib</TargetExt>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</IntDir>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(ProjectName)</TargetName>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">.lib</TargetExt>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>
+ </ExceptionHandling>
+ <InlineFunctionExpansion>Disabled</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Disabled</Optimization>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Debug</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\Debug/LLVMCodeGen.pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>
+ </ExceptionHandling>
+ <InlineFunctionExpansion>Disabled</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Disabled</Optimization>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Debug</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\Debug/LLVMCodeGen.pdb</ProgramDataBaseFileName>
+ <OmitFramePointers>false</OmitFramePointers>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>
+ </DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\Release/LLVMCodeGen.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\Release/LLVMCodeGen.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>false</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>
+ </DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\Release/LLVMCodeGen.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>
+ </DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\Release/LLVMCodeGen.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>false</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="AggressiveAntiDepBreaker.cpp" />
+ <ClCompile Include="AllocationOrder.cpp" />
+ <ClCompile Include="Analysis.cpp" />
+ <ClCompile Include="BranchFolding.cpp" />
+ <ClCompile Include="CalcSpillWeights.cpp" />
+ <ClCompile Include="CallingConvLower.cpp" />
+ <ClCompile Include="CodePlacementOpt.cpp" />
+ <ClCompile Include="CriticalAntiDepBreaker.cpp" />
+ <ClCompile Include="DeadMachineInstructionElim.cpp" />
+ <ClCompile Include="DwarfEHPrepare.cpp" />
+ <ClCompile Include="EdgeBundles.cpp" />
+ <ClCompile Include="ELFCodeEmitter.cpp" />
+ <ClCompile Include="ELFWriter.cpp" />
+ <ClCompile Include="ExecutionDepsFix.cpp" />
+ <ClCompile Include="ExpandISelPseudos.cpp" />
+ <ClCompile Include="ExpandPostRAPseudos.cpp" />
+ <ClCompile Include="GCMetadata.cpp" />
+ <ClCompile Include="GCStrategy.cpp" />
+ <ClCompile Include="InlineSpiller.cpp" />
+ <ClCompile Include="InterferenceCache.cpp" />
+ <ClCompile Include="IntrinsicLowering.cpp" />
+ <ClCompile Include="LLVMTargetMachine.cpp" />
+ <ClCompile Include="LatencyPriorityQueue.cpp" />
+ <ClCompile Include="LexicalScopes.cpp" />
+ <ClCompile Include="LiveDebugVariables.cpp" />
+ <ClCompile Include="LiveInterval.cpp" />
+ <ClCompile Include="LiveIntervalAnalysis.cpp" />
+ <ClCompile Include="LiveIntervalUnion.cpp" />
+ <ClCompile Include="LiveStackAnalysis.cpp" />
+ <ClCompile Include="LiveVariables.cpp" />
+ <ClCompile Include="LiveRangeCalc.cpp" />
+ <ClCompile Include="LiveRangeEdit.cpp" />
+ <ClCompile Include="LocalStackSlotAllocation.cpp" />
+ <ClCompile Include="MachineBasicBlock.cpp" />
+ <ClCompile Include="MachineCSE.cpp" />
+ <ClCompile Include="MachineDominators.cpp" />
+ <ClCompile Include="MachineFunction.cpp" />
+ <ClCompile Include="MachineFunctionAnalysis.cpp" />
+ <ClCompile Include="MachineFunctionPass.cpp" />
+ <ClCompile Include="MachineFunctionPrinterPass.cpp" />
+ <ClCompile Include="MachineInstr.cpp" />
+ <ClCompile Include="MachineLICM.cpp" />
+ <ClCompile Include="MachineLoopInfo.cpp" />
+ <ClCompile Include="MachineModuleInfo.cpp" />
+ <ClCompile Include="MachineModuleInfoImpls.cpp" />
+ <ClCompile Include="MachinePassRegistry.cpp" />
+ <ClCompile Include="MachineRegisterInfo.cpp" />
+ <ClCompile Include="MachineSSAUpdater.cpp" />
+ <ClCompile Include="MachineSink.cpp" />
+ <ClCompile Include="MachineVerifier.cpp" />
+ <ClCompile Include="ObjectCodeEmitter.cpp" />
+ <ClCompile Include="OptimizePHIs.cpp" />
+ <ClCompile Include="PHIElimination.cpp" />
+ <ClCompile Include="PHIEliminationUtils.cpp" />
+ <ClCompile Include="Passes.cpp" />
+ <ClCompile Include="PeepholeOptimizer.cpp" />
+ <ClCompile Include="PostRASchedulerList.cpp" />
+ <ClCompile Include="ProcessImplicitDefs.cpp" />
+ <ClCompile Include="PrologEpilogInserter.cpp" />
+ <ClCompile Include="PseudoSourceValue.cpp" />
+ <ClCompile Include="RegAllocBasic.cpp" />
+ <ClCompile Include="RegAllocFast.cpp" />
+ <ClCompile Include="RegAllocGreedy.cpp" />
+ <ClCompile Include="RegAllocLinearScan.cpp" />
+ <ClCompile Include="RegisterClassInfo.cpp" />
+ <ClCompile Include="RegisterCoalescer.cpp" />
+ <ClCompile Include="RegisterScavenging.cpp" />
+ <ClCompile Include="RenderMachineFunction.cpp" />
+ <ClCompile Include="ScheduleDAG.cpp" />
+ <ClCompile Include="ScheduleDAGEmit.cpp" />
+ <ClCompile Include="ScheduleDAGInstrs.cpp" />
+ <ClCompile Include="ScheduleDAGPrinter.cpp" />
+ <ClCompile Include="ScoreboardHazardRecognizer.cpp" />
+ <ClCompile Include="ShrinkWrapping.cpp" />
+ <ClCompile Include="SjLjEHPrepare.cpp" />
+ <ClCompile Include="SlotIndexes.cpp" />
+ <ClCompile Include="Spiller.cpp" />
+ <ClCompile Include="SpillPlacement.cpp" />
+ <ClCompile Include="SplitKit.cpp" />
+ <ClCompile Include="StackProtector.cpp" />
+ <ClCompile Include="StackSlotColoring.cpp" />
+ <ClCompile Include="StrongPHIElimination.cpp" />
+ <ClCompile Include="TailDuplication.cpp" />
+ <ClCompile Include="TargetInstrInfoImpl.cpp" />
+ <ClCompile Include="TargetLoweringObjectFileImpl.cpp" />
+ <ClCompile Include="TwoAddressInstructionPass.cpp" />
+ <ClCompile Include="UnreachableBlockElim.cpp" />
+ <ClCompile Include="VirtRegMap.cpp" />
+ <ClCompile Include="VirtRegRewriter.cpp" />
+ <ClInclude Include="AggressiveAntiDepBreaker.h" />
+ <ClInclude Include="AllocationOrder.h" />
+ <ClInclude Include="AntiDepBreaker.h" />
+ <ClInclude Include="BranchFolding.h" />
+ <ClInclude Include="CriticalAntiDepBreaker.h" />
+ <ClInclude Include="ELF.h" />
+ <ClInclude Include="ELFCodeEmitter.h" />
+ <ClInclude Include="ELFWriter.h" />
+ <ClInclude Include="InterferenceCache.h" />
+ <ClInclude Include="LiveDebugVariables.h" />
+ <ClInclude Include="LiveIntervalUnion.h" />
+ <ClInclude Include="LiveRangeCalc.h" />
+ <ClInclude Include="LiveRangeEdit.h" />
+ <ClInclude Include="PHIEliminationUtils.h" />
+ <ClInclude Include="PrologEpilogInserter.h" />
+ <ClInclude Include="RegAllocBase.h" />
+ <ClInclude Include="RegisterClassInfo.h" />
+ <ClInclude Include="RegisterCoalescer.h" />
+ <ClInclude Include="RenderMachineFunction.h" />
+ <ClInclude Include="ScheduleDAGInstrs.h" />
+ <ClInclude Include="Spiller.h" />
+ <ClInclude Include="SpillPlacement.h" />
+ <ClInclude Include="SplitKit.h" />
+ <ClInclude Include="Splitter.h" />
+ <ClInclude Include="VirtRegMap.h" />
+ <ClInclude Include="VirtRegRewriter.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Analysis/LLVMAnalysis.vcxproj">
+ <Project>97EDF19C-6360-4770-9255-EBA2F1A13E9B</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\VMCore/LLVMCore.vcxproj">
+ <Project>00F3295C-F7A0-43D3-BD0B-1BC0515B30E1</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\MC/LLVMMC.vcxproj">
+ <Project>8AB70E5D-2814-4682-AF9F-3062758BAEAB</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\Transforms/Scalar/LLVMScalarOpts.vcxproj">
+ <Project>A3C67D8F-E19A-46EF-91AB-C7840FE2B97C</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\Support/LLVMSupport.vcxproj">
+ <Project>C688DD59-C6CB-4B33-B56F-A7D6F3761524</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\Target/LLVMTarget.vcxproj">
+ <Project>76F7B8C2-C825-40DC-BB68-9D987275E320</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\Transforms/Utils/LLVMTransformUtils.vcxproj">
+ <Project>5C514254-58EE-4850-8743-F5D7BEAA3E66</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\include/llvm/intrinsics_gen.vcxproj">
+ <Project>E9B87B46-1EB0-4D95-9049-41B148FBADCD</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/LLVMCodeGen.vcxproj.filters b/src/LLVM/lib/CodeGen/LLVMCodeGen.vcxproj.filters
new file mode 100644
index 0000000..05d627e
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LLVMCodeGen.vcxproj.filters
@@ -0,0 +1,366 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <ClCompile Include="AggressiveAntiDepBreaker.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="AllocationOrder.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="Analysis.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="BranchFolding.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="CalcSpillWeights.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="CallingConvLower.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="CodePlacementOpt.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="CriticalAntiDepBreaker.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="DeadMachineInstructionElim.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="DwarfEHPrepare.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="EdgeBundles.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ELFCodeEmitter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ELFWriter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ExecutionDepsFix.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ExpandISelPseudos.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ExpandPostRAPseudos.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="GCMetadata.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="GCStrategy.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="InlineSpiller.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="InterferenceCache.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="IntrinsicLowering.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LLVMTargetMachine.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LatencyPriorityQueue.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LexicalScopes.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LiveDebugVariables.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LiveInterval.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LiveIntervalAnalysis.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LiveIntervalUnion.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LiveStackAnalysis.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LiveVariables.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LiveRangeCalc.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LiveRangeEdit.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LocalStackSlotAllocation.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineBasicBlock.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineCSE.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineDominators.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineFunction.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineFunctionAnalysis.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineFunctionPass.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineFunctionPrinterPass.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineInstr.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineLICM.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineLoopInfo.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineModuleInfo.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineModuleInfoImpls.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachinePassRegistry.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineRegisterInfo.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineSSAUpdater.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineSink.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="MachineVerifier.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ObjectCodeEmitter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="OptimizePHIs.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="PHIElimination.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="PHIEliminationUtils.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="Passes.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="PeepholeOptimizer.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="PostRASchedulerList.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ProcessImplicitDefs.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="PrologEpilogInserter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="PseudoSourceValue.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="RegAllocBasic.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="RegAllocFast.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="RegAllocGreedy.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="RegAllocLinearScan.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="RegisterClassInfo.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="RegisterCoalescer.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="RegisterScavenging.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="RenderMachineFunction.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ScheduleDAG.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ScheduleDAGEmit.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ScheduleDAGInstrs.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ScheduleDAGPrinter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ScoreboardHazardRecognizer.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ShrinkWrapping.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="SjLjEHPrepare.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="SlotIndexes.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="Spiller.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="SpillPlacement.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="SplitKit.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="StackProtector.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="StackSlotColoring.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="StrongPHIElimination.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="TailDuplication.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="TargetInstrInfoImpl.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="TargetLoweringObjectFileImpl.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="TwoAddressInstructionPass.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="UnreachableBlockElim.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="VirtRegMap.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="VirtRegRewriter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="AggressiveAntiDepBreaker.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="AllocationOrder.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="AntiDepBreaker.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="BranchFolding.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="CriticalAntiDepBreaker.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ELF.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ELFCodeEmitter.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ELFWriter.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="InterferenceCache.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="LiveDebugVariables.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="LiveIntervalUnion.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="LiveRangeCalc.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="LiveRangeEdit.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="PHIEliminationUtils.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="PrologEpilogInserter.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="RegAllocBase.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="RegisterClassInfo.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="RegisterCoalescer.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="RenderMachineFunction.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ScheduleDAGInstrs.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="Spiller.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="SpillPlacement.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="SplitKit.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="Splitter.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="VirtRegMap.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="VirtRegRewriter.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{1733179C-6FE4-462E-9EA5-4A29A1ACFE25}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{CFA0CD99-0550-4E94-A4D9-080C3F5D695C}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/LLVMTargetMachine.cpp b/src/LLVM/lib/CodeGen/LLVMTargetMachine.cpp
index 2f9ed2a..187147a 100644
--- a/src/LLVM/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/src/LLVM/lib/CodeGen/LLVMTargetMachine.cpp
@@ -13,6 +13,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -20,18 +21,30 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
+namespace llvm {
+ bool EnableFastISel;
+}
+
static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
cl::desc("Disable Post Regalloc"));
static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -44,8 +57,12 @@
cl::desc("Disable code placement"));
static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+ cl::desc("Disable Machine Dead Code Elimination"));
static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+ cl::desc("Disable Machine Common Subexpression Elimination"));
static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
cl::Hidden,
cl::desc("Disable Machine LICM"));
@@ -55,12 +72,21 @@
cl::desc("Disable Loop Strength Reduction Pass"));
static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
cl::desc("Show encoding in .s output"));
static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
cl::desc("Show instruction structure in .s output"));
static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
cl::desc("Enable MC API logging"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
static cl::opt<cl::boolOrDefault>
AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
@@ -72,30 +98,112 @@
case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
case cl::BOU_TRUE: return true;
case cl::BOU_FALSE: return false;
- }
+ }
}
-// Enable or disable an experimental optimization to split GEPs
-// and run a special GVN pass which does not examine loads, in
-// an effort to factor out redundancy implicit in complex GEPs.
-static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden,
- cl::desc("Split GEPs and run no-load GVN"));
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
-LLVMTargetMachine::LLVMTargetMachine(const Target &T,
- const std::string &Triple)
- : TargetMachine(T), TargetTriple(Triple) {
- AsmInfo = T.createAsmInfo(TargetTriple);
+LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : TargetMachine(T, Triple, CPU, FS) {
+ CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM);
+ AsmInfo = T.createMCAsmInfo(Triple);
+ // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
+ // and if the old one gets included then MCAsmInfo will be NULL and
+ // we'll crash later.
+ // Provide the user with a useful error message about what's wrong.
+ assert(AsmInfo && "MCAsmInfo not initialized."
+ "Make sure you include the correct TargetSelect.h"
+ "and that InitializeAllTargetMCs() is being invoked!");
}
-// Set the default code model for the JIT for a generic target.
-// FIXME: Is small right here? or .is64Bit() ? Large : Small?
-void LLVMTargetMachine::setCodeModelForJIT() {
- setCodeModel(CodeModel::Small);
-}
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ MCContext *Context = 0;
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+ return true;
+ assert(Context != 0 && "Failed to get MCContext");
-// Set the default code model for static compilation for a generic target.
-void LLVMTargetMachine::setCodeModelForStatic() {
- setCodeModel(CodeModel::Small);
+ if (hasMCSaveTempLabels())
+ Context->setAllowTemporaryLabels(false);
+
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ OwningPtr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ default: return true;
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter =
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
+
+ // Create a code emitter if asked to show the encoding.
+ MCCodeEmitter *MCE = 0;
+ MCAsmBackend *MAB = 0;
+ if (ShowMCEncoding) {
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, *Context);
+ MAB = getTarget().createMCAsmBackend(getTargetTriple());
+ }
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ getVerboseAsm(),
+ hasMCUseLoc(),
+ hasMCUseCFI(),
+ InstPrinter,
+ MCE, MAB,
+ ShowMCInst);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI,
+ *Context);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
+ if (MCE == 0 || MAB == 0)
+ return true;
+
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(),
+ *Context, *MAB, Out,
+ MCE, hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->InitSections();
+ break;
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(createNullStreamer(*Context));
+ break;
+ }
+
+ if (EnableMCLogging)
+ AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ PM.add(createGCInfoDeleter());
+ return false;
}
/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
@@ -106,16 +214,15 @@
///
bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
JITCodeEmitter &JCE,
- CodeGenOpt::Level OptLevel) {
- // Make sure the code model is set.
- setCodeModelForJIT();
-
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
// Add common CodeGen passes.
MCContext *Ctx = 0;
- if (addCommonCodeGenPasses(PM, OptLevel, Ctx))
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
return true;
addCodeEmitter(PM, OptLevel, JCE);
+ PM.add(createGCInfoDeleter());
return false; // success!
}
@@ -127,38 +234,116 @@
///
bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
MCContext *&Ctx,
- CodeGenOpt::Level OptLevel) {
+ raw_ostream &Out,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
// Add common CodeGen passes.
- if (addCommonCodeGenPasses(PM, OptLevel, Ctx))
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
return true;
- // Make sure the code model is set.
- setCodeModelForJIT();
+
+ if (hasMCSaveTempLabels())
+ Ctx->setAllowTemporaryLabels(false);
+
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, *Ctx);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
+ if (MCE == 0 || MAB == 0)
+ return true;
+
+ OwningPtr<MCStreamer> AsmStreamer;
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), *Ctx,
+ *MAB, Out, MCE,
+ hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->InitSections();
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
return false; // success!
}
+static void printNoVerify(PassManagerBase &PM, const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+
+static void printAndVerify(PassManagerBase &PM,
+ const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ if (VerifyMachineCode)
+ PM.add(createMachineVerifierPass(Banner));
+}
+
/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
/// emitting to assembly files or machine code output.
///
bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
+ bool DisableVerify,
MCContext *&OutContext) {
// Standard LLVM-Level Passes.
- // Optionally, tun split-GEPs and no-load GVN.
- if (EnableSplitGEPGVN) {
- PM.add(createGEPSplitterPass());
- PM.add(createGVNPass(/*NoLoads=*/true));
- }
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
// Run loop strength reduction before anything else.
if (OptLevel != CodeGenOpt::None && !DisableLSR) {
PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ if (PrintLSR)
+ PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
+ PM.add(createGCLoweringPass());
+
// Make sure that no unreachable blocks are instruction selected.
PM.add(createUnreachableBlockEliminationPass());
+ // Turn exception handling constructs into something the code generators can
+ // handle.
+ switch (getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ PM.add(createSjLjEHPass(getTargetLowering()));
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::Win64:
+ PM.add(createDwarfEHPass(this));
+ break;
+ case ExceptionHandling::None:
+ PM.add(createLowerInvokePass(getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
+ break;
+ }
+
if (OptLevel != CodeGenOpt::None && !DisableCGP)
PM.add(createCodeGenPreparePass(getTargetLowering()));
@@ -166,52 +351,87 @@
addPreISel(PM, OptLevel);
+ if (PrintISelInput)
+ PM.add(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
// Standard Lower-Level Passes.
-
+
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
- MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo());
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
+ *getRegisterInfo(),
+ &getTargetLowering()->getObjFileLowering());
PM.add(MMI);
OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
-
// Set up a MachineFunction for the rest of CodeGen to work on.
PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+ // Enable FastISel with -fast, but allow that to be overridden.
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
+ EnableFastISel = true;
+
// Ask the target for an isel.
if (addInstSelector(PM, OptLevel))
return true;
+ // Print the instruction selected machine code...
+ printAndVerify(PM, "After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ PM.add(createExpandISelPseudosPass());
+
+ // Pre-ra tail duplication.
+ if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
+ PM.add(createTailDuplicatePass(true));
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+ }
+
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
if (OptLevel != CodeGenOpt::None)
PM.add(createOptimizePHIsPass());
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ PM.add(createLocalStackSlotAllocationPass());
+
if (OptLevel != CodeGenOpt::None) {
// With optimization, dead code should already be eliminated. However
// there is one known exception: lowered code for arguments that are only
// used by tail calls, where the tail calls reuse the incoming stack
// arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
- PM.add(createDeadMachineInstructionElimPass());
+ if (!DisableMachineDCE)
+ PM.add(createDeadMachineInstructionElimPass());
+ printAndVerify(PM, "After codegen DCE pass");
- PM.add(createPeepholeOptimizerPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
- PM.add(createMachineCSEPass());
+ if (!DisableMachineCSE)
+ PM.add(createMachineCSEPass());
if (!DisableMachineSink)
PM.add(createMachineSinkingPass());
- }
+ printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
- // Pre-ra tail duplication.
- if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
- PM.add(createTailDuplicatePass(true));
+ PM.add(createPeepholeOptimizerPass());
+ printAndVerify(PM, "After codegen peephole optimization pass");
}
// Run pre-ra passes.
- addPreRegAlloc(PM, OptLevel);
+ if (addPreRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PreRegAlloc passes");
// Perform register allocation.
PM.add(createRegisterAllocator(OptLevel));
+ printAndVerify(PM, "After Register Allocation");
// Perform stack slot coloring and post-ra machine LICM.
if (OptLevel != CodeGenOpt::None) {
@@ -223,39 +443,55 @@
// Run post-ra machine LICM to hoist reloads / remats.
if (!DisablePostRAMachineLICM)
PM.add(createMachineLICMPass(false));
+
+ printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
}
// Run post-ra passes.
- addPostRegAlloc(PM, OptLevel);
+ if (addPostRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PostRegAlloc passes");
- PM.add(createLowerSubregsPass());
+ PM.add(createExpandPostRAPseudosPass());
+ printAndVerify(PM, "After ExpandPostRAPseudos");
// Insert prolog/epilog code. Eliminate abstract frame index references...
PM.add(createPrologEpilogCodeInserter());
+ printAndVerify(PM, "After PrologEpilogCodeInserter");
// Run pre-sched2 passes.
- addPreSched2(PM, OptLevel);
-
+ if (addPreSched2(PM, OptLevel))
+ printAndVerify(PM, "After PreSched2 passes");
+
// Second pass scheduler.
if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
PM.add(createPostRAScheduler(OptLevel));
+ printAndVerify(PM, "After PostRAScheduler");
}
// Branch folding must be run after regalloc and prolog/epilog insertion.
if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ printNoVerify(PM, "After BranchFolding");
}
// Tail duplication.
if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
PM.add(createTailDuplicatePass(false));
+ printNoVerify(PM, "After TailDuplicate");
}
+ PM.add(createGCMachineCodeAnalysisPass());
+
+ if (PrintGCInfo)
+ PM.add(createGCInfoPrinter(dbgs()));
+
if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
PM.add(createCodePlacementOptPass());
+ printNoVerify(PM, "After CodePlacementOpt");
}
- addPreEmitPass(PM, OptLevel);
+ if (addPreEmitPass(PM, OptLevel))
+ printNoVerify(PM, "After PreEmit passes");
return false;
}
diff --git a/src/LLVM/lib/CodeGen/LatencyPriorityQueue.cpp b/src/LLVM/lib/CodeGen/LatencyPriorityQueue.cpp
index b9527fa..0eb009d 100644
--- a/src/LLVM/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/src/LLVM/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "scheduler"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
@@ -35,14 +36,14 @@
unsigned RHSLatency = PQ->getLatency(RHSNum);
if (LHSLatency < RHSLatency) return true;
if (LHSLatency > RHSLatency) return false;
-
+
// After that, if two nodes have identical latencies, look to see if one will
// unblock more other nodes than the other.
unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
if (LHSBlocked < RHSBlocked) return true;
if (LHSBlocked > RHSBlocked) return false;
-
+
// Finally, just to provide a stable ordering, use the node number as a
// deciding factor.
return LHSNum < RHSNum;
@@ -64,7 +65,7 @@
OnlyAvailablePred = &Pred;
}
}
-
+
return OnlyAvailablePred;
}
@@ -78,7 +79,7 @@
++NumNodesBlocking;
}
NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
-
+
Queue.push_back(SU);
}
@@ -102,10 +103,10 @@
/// node of the same priority that will not make a node available.
void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
if (SU->isAvailable) return; // All preds scheduled.
-
+
SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
-
+
// Okay, we found a single predecessor that is available, but not scheduled.
// Since it is available, it must be in the priority queue. First remove it.
remove(OnlyAvailablePred);
@@ -136,3 +137,16 @@
std::swap(*I, Queue.back());
Queue.pop_back();
}
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+ LatencyPriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/src/LLVM/lib/CodeGen/LexicalScopes.cpp b/src/LLVM/lib/CodeGen/LexicalScopes.cpp
new file mode 100644
index 0000000..a12e1a3
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LexicalScopes.cpp
@@ -0,0 +1,335 @@
+//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LexicalScopes analysis.
+//
+// This pass collects lexical scope information and maps machine instructions
+// to respective lexical scopes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lexicalscopes"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/Function.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+LexicalScopes::~LexicalScopes() {
+ releaseMemory();
+}
+
+/// releaseMemory - release memory.
+void LexicalScopes::releaseMemory() {
+ MF = NULL;
+ CurrentFnLexicalScope = NULL;
+ DeleteContainerSeconds(LexicalScopeMap);
+ DeleteContainerSeconds(AbstractScopeMap);
+ InlinedLexicalScopeMap.clear();
+ AbstractScopesList.clear();
+}
+
+/// initialize - Scan machine function and constuct lexical scope nest.
+void LexicalScopes::initialize(const MachineFunction &Fn) {
+ releaseMemory();
+ MF = &Fn;
+ SmallVector<InsnRange, 4> MIRanges;
+ DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap;
+ extractLexicalScopes(MIRanges, MI2ScopeMap);
+ if (CurrentFnLexicalScope) {
+ constructScopeNest(CurrentFnLexicalScope);
+ assignInstructionRanges(MIRanges, MI2ScopeMap);
+ }
+}
+
+/// extractLexicalScopes - Extract instruction ranges for each lexical scopes
+/// for the given machine function.
+void LexicalScopes::
+extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+
+ // Scan each instruction and create scopes. First build working set of scopes.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ const MachineInstr *RangeBeginMI = NULL;
+ const MachineInstr *PrevMI = NULL;
+ DebugLoc PrevDL;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+
+ // Check if instruction has valid location information.
+ const DebugLoc MIDL = MInsn->getDebugLoc();
+ if (MIDL.isUnknown()) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // If scope has not changed then skip this instruction.
+ if (MIDL == PrevDL) {
+ PrevMI = MInsn;
+ continue;
+ }
+
+ // Ignore DBG_VALUE. It does not contribute to any instruction in output.
+ if (MInsn->isDebugValue())
+ continue;
+
+ if (RangeBeginMI) {
+ // If we have already seen a beginning of an instruction range and
+ // current instruction scope does not match scope of first instruction
+ // in this range then create a new instruction range.
+ InsnRange R(RangeBeginMI, PrevMI);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ MIRanges.push_back(R);
+ }
+
+ // This is a beginning of a new instruction range.
+ RangeBeginMI = MInsn;
+
+ // Reset previous markers.
+ PrevMI = MInsn;
+ PrevDL = MIDL;
+ }
+
+ // Create last instruction range.
+ if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+ InsnRange R(RangeBeginMI, PrevMI);
+ MIRanges.push_back(R);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ }
+ }
+}
+
+/// findLexicalScope - Find lexical scope, either regular or inlined, for the
+/// given DebugLoc. Return NULL if not found.
+LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
+ MDNode *Scope = NULL;
+ MDNode *IA = NULL;
+ DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
+ if (!Scope) return NULL;
+
+ // The scope that we were created with could have an extra file - which
+ // isn't what we care about in this case.
+ DIDescriptor D = DIDescriptor(Scope);
+ if (D.isLexicalBlockFile())
+ Scope = DILexicalBlockFile(Scope).getScope();
+
+ if (IA)
+ return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA));
+ return LexicalScopeMap.lookup(Scope);
+}
+
+/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
+/// not available then create new lexical scope.
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
+ MDNode *Scope = NULL;
+ MDNode *InlinedAt = NULL;
+ DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
+
+ if (InlinedAt) {
+ // Create an abstract scope for inlined function.
+ getOrCreateAbstractScope(Scope);
+ // Create an inlined scope for inlined function.
+ return getOrCreateInlinedScope(Scope, InlinedAt);
+ }
+
+ return getOrCreateRegularScope(Scope);
+}
+
+/// getOrCreateRegularScope - Find or create a regular lexical scope.
+LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
+ DIDescriptor D = DIDescriptor(Scope);
+ if (D.isLexicalBlockFile()) {
+ Scope = DILexicalBlockFile(Scope).getScope();
+ D = DIDescriptor(Scope);
+ }
+
+ LexicalScope *WScope = LexicalScopeMap.lookup(Scope);
+ if (WScope)
+ return WScope;
+
+ LexicalScope *Parent = NULL;
+ if (D.isLexicalBlock())
+ Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
+ WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false);
+ LexicalScopeMap.insert(std::make_pair(Scope, WScope));
+ if (!Parent && DIDescriptor(Scope).isSubprogram()
+ && DISubprogram(Scope).describes(MF->getFunction()))
+ CurrentFnLexicalScope = WScope;
+
+ return WScope;
+}
+
+/// getOrCreateInlinedScope - Find or create an inlined lexical scope.
+LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope,
+ MDNode *InlinedAt) {
+ LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt);
+ if (InlinedScope)
+ return InlinedScope;
+
+ DebugLoc InlinedLoc = DebugLoc::getFromDILocation(InlinedAt);
+ InlinedScope = new LexicalScope(getOrCreateLexicalScope(InlinedLoc),
+ DIDescriptor(Scope), InlinedAt, false);
+ InlinedLexicalScopeMap[InlinedLoc] = InlinedScope;
+ LexicalScopeMap[InlinedAt] = InlinedScope;
+ return InlinedScope;
+}
+
+/// getOrCreateAbstractScope - Find or create an abstract lexical scope.
+LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
+ assert(N && "Invalid Scope encoding!");
+
+ DIDescriptor Scope(N);
+ if (Scope.isLexicalBlockFile())
+ Scope = DILexicalBlockFile(Scope).getScope();
+ LexicalScope *AScope = AbstractScopeMap.lookup(N);
+ if (AScope)
+ return AScope;
+
+ LexicalScope *Parent = NULL;
+ if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ DIDescriptor ParentDesc = DB.getContext();
+ Parent = getOrCreateAbstractScope(ParentDesc);
+ }
+ AScope = new LexicalScope(Parent, DIDescriptor(N), NULL, true);
+ AbstractScopeMap[N] = AScope;
+ if (DIDescriptor(N).isSubprogram())
+ AbstractScopesList.push_back(AScope);
+ return AScope;
+}
+
+/// constructScopeNest
+void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
+ assert (Scope && "Unable to calculate scop edominance graph!");
+ SmallVector<LexicalScope *, 4> WorkStack;
+ WorkStack.push_back(Scope);
+ unsigned Counter = 0;
+ while (!WorkStack.empty()) {
+ LexicalScope *WS = WorkStack.back();
+ const SmallVector<LexicalScope *, 4> &Children = WS->getChildren();
+ bool visitedChildren = false;
+ for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(),
+ SE = Children.end(); SI != SE; ++SI) {
+ LexicalScope *ChildScope = *SI;
+ if (!ChildScope->getDFSOut()) {
+ WorkStack.push_back(ChildScope);
+ visitedChildren = true;
+ ChildScope->setDFSIn(++Counter);
+ break;
+ }
+ }
+ if (!visitedChildren) {
+ WorkStack.pop_back();
+ WS->setDFSOut(++Counter);
+ }
+ }
+}
+
+/// assignInstructionRanges - Find ranges of instructions covered by each
+/// lexical scope.
+void LexicalScopes::
+assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap)
+{
+
+ LexicalScope *PrevLexicalScope = NULL;
+ for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
+ RE = MIRanges.end(); RI != RE; ++RI) {
+ const InsnRange &R = *RI;
+ LexicalScope *S = MI2ScopeMap.lookup(R.first);
+ assert (S && "Lost LexicalScope for a machine instruction!");
+ if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
+ PrevLexicalScope->closeInsnRange(S);
+ S->openInsnRange(R.first);
+ S->extendInsnRange(R.second);
+ PrevLexicalScope = S;
+ }
+
+ if (PrevLexicalScope)
+ PrevLexicalScope->closeInsnRange();
+}
+
+/// getMachineBasicBlocks - Populate given set using machine basic blocks which
+/// have machine instructions that belong to lexical scope identified by
+/// DebugLoc.
+void LexicalScopes::
+getMachineBasicBlocks(DebugLoc DL,
+ SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) {
+ MBBs.clear();
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return;
+
+ if (Scope == CurrentFnLexicalScope) {
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I)
+ MBBs.insert(I);
+ return;
+ }
+
+ SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges();
+ for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(),
+ E = InsnRanges.end(); I != E; ++I) {
+ InsnRange &R = *I;
+ MBBs.insert(R.first->getParent());
+ }
+}
+
+/// dominates - Return true if DebugLoc's lexical scope dominates at least one
+/// machine instruction's lexical scope in a given machine basic block.
+bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return false;
+
+ // Current function scope covers all basic blocks in the function.
+ if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF)
+ return true;
+
+ bool Result = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (IDL.isUnknown())
+ continue;
+ if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
+ if (Scope->dominates(IScope))
+ return true;
+ }
+ return Result;
+}
+
+/// dump - Print data structures.
+void LexicalScope::dump() const {
+#ifndef NDEBUG
+ raw_ostream &err = dbgs();
+ err.indent(IndentLevel);
+ err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
+ const MDNode *N = Desc;
+ N->dump();
+ if (AbstractScope)
+ err << "Abstract Scope\n";
+
+ IndentLevel += 2;
+ if (!Children.empty())
+ err << "Children ...\n";
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ if (Children[i] != this)
+ Children[i]->dump();
+
+ IndentLevel -= 2;
+#endif
+}
+
diff --git a/src/LLVM/lib/CodeGen/LiveDebugVariables.cpp b/src/LLVM/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 0000000..3dfe4c0
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,1011 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+ cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+namespace {
+/// UserValueScopes - Keeps track of lexical scopes associated with an
+/// user value's source location.
+class UserValueScopes {
+ DebugLoc DL;
+ LexicalScopes &LS;
+ SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
+
+public:
+ UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(D), LS(L) {}
+
+ /// dominates - Return true if current scope dominates at least one machine
+ /// instruction in a given machine basic block.
+ bool dominates(MachineBasicBlock *MBB) {
+ if (LBlocks.empty())
+ LS.getMachineBasicBlocks(DL, LBlocks);
+ if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB))
+ return true;
+ return false;
+ }
+};
+} // end anonymous namespace
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class LDVImpl;
+class UserValue {
+ const MDNode *variable; ///< The debug info variable we are part of.
+ unsigned offset; ///< Byte offset into variable.
+ DebugLoc dl; ///< The debug location for the variable. This is
+ ///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next; ///< Next value in equivalence class, or null.
+
+ /// Numbered locations referenced by locmap.
+ SmallVector<MachineOperand, 4> locations;
+
+ /// Map of slot indices where this value is live.
+ LocMap locInts;
+
+ /// coalesceLocation - After LocNo was changed, check if it has become
+ /// identical to another location, and coalesce them. This may cause LocNo or
+ /// a later location to be erased, but no earlier location will be erased.
+ void coalesceLocation(unsigned LocNo);
+
+ /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+ void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+ /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+ /// is live. Returns true if any changes were made.
+ bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+public:
+ /// UserValue - Create a new UserValue.
+ UserValue(const MDNode *var, unsigned o, DebugLoc L,
+ LocMap::Allocator &alloc)
+ : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+ {}
+
+ /// getLeader - Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// getNext - Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// match - Does this UserValue match the parameters?
+ bool match(const MDNode *Var, unsigned Offset) const {
+ return Var == variable && Offset == offset;
+ }
+
+ /// merge - Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next)
+ End->leader = L1, End = End->next;
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
+ }
+
+ /// getLocationNo - Return the location number that matches Loc.
+ unsigned getLocationNo(const MachineOperand &LocMO) {
+ if (LocMO.isReg()) {
+ if (LocMO.getReg() == 0)
+ return ~0u;
+ // For register locations we dont care about use/def and other flags.
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ locations[i].getReg() == LocMO.getReg() &&
+ locations[i].getSubReg() == LocMO.getSubReg())
+ return i;
+ } else
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (LocMO.isIdenticalTo(locations[i]))
+ return i;
+ locations.push_back(LocMO);
+ // We are storing a MachineOperand outside a MachineInstr.
+ locations.back().clearParent();
+ // Don't store def operands.
+ if (locations.back().isReg())
+ locations.back().setIsUse();
+ return locations.size() - 1;
+ }
+
+ /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+ void mapVirtRegs(LDVImpl *LDV);
+
+ /// addDef - Add a definition point to this value.
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ // Add a singular (Idx,Idx) -> Loc mapping.
+ LocMap::iterator I = locInts.find(Idx);
+ if (!I.valid() || I.start() != Idx)
+ I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+ else
+ // A later DBG_VALUE at the same SlotIndex overrides the old location.
+ I.setValue(getLocationNo(LocMO));
+ }
+
+ /// extendDef - Extend the current definition as far as possible down the
+ /// dominator tree. Stop when meeting an existing def or when leaving the live
+ /// range of VNI.
+ /// End points where VNI is no longer live are added to Kills.
+ /// @param Idx Starting point for the definition.
+ /// @param LocNo Location number to propagate.
+ /// @param LI Restrict liveness to where LI has the value VNI. May be null.
+ /// @param VNI When LI is not null, this is the value to restrict to.
+ /// @param Kills Append end points of VNI's live range to Kills.
+ /// @param LIS Live intervals analysis.
+ /// @param MDT Dominator tree.
+ void extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
+
+ /// addDefsFromCopies - The value in LI/LocNo may be copies to other
+ /// registers. Determine if any of the copies are available at the kill
+ /// points, and add defs if possible.
+ /// @param LI Scan for copies of the value in LI->reg.
+ /// @param LocNo Location number of LI->reg.
+ /// @param Kills Points where the range of LocNo could be extended.
+ /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+ void addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS);
+
+ /// computeIntervals - Compute the live intervals of all locations after
+ /// collecting all their def points.
+ void computeIntervals(MachineRegisterInfo &MRI,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
+
+ /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+ const TargetRegisterInfo *TRI);
+
+ /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+ /// live. Returns true if any changes were made.
+ bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+
+ /// rewriteLocations - Rewrite virtual register locations according to the
+ /// provided virtual register map.
+ void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+ /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM,
+ LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+ /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+ /// variable may have more than one corresponding DBG_VALUE instructions.
+ /// Only first one needs DebugLoc to identify variable's lexical scope
+ /// in source file.
+ DebugLoc findDebugLoc();
+
+ /// getDebugLoc - Return DebugLoc of this UserValue.
+ DebugLoc getDebugLoc() { return dl;}
+ void print(raw_ostream&, const TargetMachine*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+ LiveDebugVariables &pass;
+ LocMap::Allocator allocator;
+ MachineFunction *MF;
+ LiveIntervals *LIS;
+ LexicalScopes LS;
+ MachineDominatorTree *MDT;
+ const TargetRegisterInfo *TRI;
+
+ /// userValues - All allocated UserValue instances.
+ SmallVector<UserValue*, 8> userValues;
+
+ /// Map virtual register to eq class leader.
+ typedef DenseMap<unsigned, UserValue*> VRMap;
+ VRMap virtRegToEqClass;
+
+ /// Map user variable to eq class leader.
+ typedef DenseMap<const MDNode *, UserValue*> UVMap;
+ UVMap userVarMap;
+
+ /// getUserValue - Find or create a UserValue.
+ UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+ /// lookupVirtReg - Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(unsigned VirtReg);
+
+ /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+ /// @param MI DBG_VALUE instruction
+ /// @param Idx Last valid SLotIndex before instruction.
+ /// @return True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+ /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+ /// a UserValue def for each instruction.
+ /// @param mf MachineFunction to be scanned.
+ /// @return True if any debug values were found.
+ bool collectDebugValues(MachineFunction &mf);
+
+ /// computeIntervals - Compute the live intervals of all user values after
+ /// collecting all their def points.
+ void computeIntervals();
+
+public:
+ LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+ bool runOnMachineFunction(MachineFunction &mf);
+
+ /// clear - Relase all memory.
+ void clear() {
+ DeleteContainerPointers(userValues);
+ userValues.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
+ }
+
+ /// mapVirtReg - Map virtual register to an equivalence class.
+ void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+ /// renameRegister - Replace all references to OldReg with NewReg:SubIdx.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// splitRegister - Replace all references to OldReg with NewRegs.
+ void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+ /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
+ DIVariable DV(variable);
+ OS << "!\"";
+ DV.printExtendedName(OS);
+ OS << "\"\t";
+ if (offset)
+ OS << '+' << offset;
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ OS << " [" << I.start() << ';' << I.stop() << "):";
+ if (I.value() == ~0u)
+ OS << "undef";
+ else
+ OS << I.value();
+ }
+ for (unsigned i = 0, e = locations.size(); i != e; ++i) {
+ OS << " Loc" << i << '=';
+ locations[i].print(OS, TM);
+ }
+ OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+ OS << "********** DEBUG VARIABLES **********\n";
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+ userValues[i]->print(OS, &MF->getTarget());
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+ unsigned KeepLoc = 0;
+ for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+ if (KeepLoc == LocNo)
+ continue;
+ if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+ break;
+ }
+ // No matches.
+ if (KeepLoc == locations.size())
+ return;
+
+ // Keep the smaller location, erase the larger one.
+ unsigned EraseLoc = LocNo;
+ if (KeepLoc > EraseLoc)
+ std::swap(KeepLoc, EraseLoc);
+ locations.erase(locations.begin() + EraseLoc);
+
+ // Rewrite values.
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ unsigned v = I.value();
+ if (v == EraseLoc)
+ I.setValue(KeepLoc); // Coalesce when possible.
+ else if (v > EraseLoc)
+ I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+ }
+}
+
+void UserValue::mapVirtRegs(LDVImpl *LDV) {
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ TargetRegisterInfo::isVirtualRegister(locations[i].getReg()))
+ LDV->mapVirtReg(locations[i].getReg(), this);
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+ DebugLoc DL) {
+ UserValue *&Leader = userVarMap[Var];
+ if (Leader) {
+ UserValue *UV = Leader->getLeader();
+ Leader = UV;
+ for (; UV; UV = UV->getNext())
+ if (UV->match(Var, Offset))
+ return UV;
+ }
+
+ UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+ userValues.push_back(UV);
+ Leader = UserValue::merge(Leader, UV);
+ return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
+ return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+ // DBG_VALUE loc, offset, variable
+ if (MI->getNumOperands() != 3 ||
+ !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+ DEBUG(dbgs() << "Can't handle " << *MI);
+ return false;
+ }
+
+ // Get or create the UserValue for (variable,offset).
+ unsigned Offset = MI->getOperand(1).getImm();
+ const MDNode *Var = MI->getOperand(2).getMetadata();
+ UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+ UV->addDef(Idx, MI->getOperand(0));
+ return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE;) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ // DBG_VALUE has no slot index, use the previous instruction instead.
+ SlotIndex Idx = MBBI == MBB->begin() ?
+ LIS->getMBBStartIdx(MBB) :
+ LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+ // Handle consecutive DBG_VALUE instructions with the same slot index.
+ do {
+ if (handleDebugValue(MBBI, Idx)) {
+ MBBI = MBB->erase(MBBI);
+ Changed = true;
+ } else
+ ++MBBI;
+ } while (MBBI != MBBE && MBBI->isDebugValue());
+ }
+ }
+ return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
+ SmallVector<SlotIndex, 16> Todo;
+ Todo.push_back(Idx);
+ do {
+ SlotIndex Start = Todo.pop_back_val();
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to VNI's live range.
+ bool ToEnd = true;
+ if (LI && VNI) {
+ LiveRange *Range = LI->getLiveRangeContaining(Start);
+ if (!Range || Range->valno != VNI) {
+ if (Kills)
+ Kills->push_back(Start);
+ continue;
+ }
+ if (Range->end < Stop)
+ Stop = Range->end, ToEnd = false;
+ }
+
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != LocNo || I.stop() != Start)
+ continue;
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
+
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop)
+ Stop = I.start(), ToEnd = false;
+ // Limited by VNI's live range.
+ else if (!ToEnd && Kills)
+ Kills->push_back(Stop);
+
+ if (Start >= Stop)
+ continue;
+
+ I.insert(Start, Stop, LocNo);
+
+ // If we extended to the MBB end, propagate down the dominator tree.
+ if (!ToEnd)
+ continue;
+ const std::vector<MachineDomTreeNode*> &Children =
+ MDT.getNode(MBB)->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Children[i]->getBlock();
+ if (UVS.dominates(MBB))
+ Todo.push_back(LIS.getMBBStartIdx(MBB));
+ }
+ } while (!Todo.empty());
+}
+
+void
+UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+ if (Kills.empty())
+ return;
+ // Don't track copies from physregs, there are too many uses.
+ if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
+ return;
+
+ // Collect all the (vreg, valno) pairs that are copies of LI.
+ SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI.use_nodbg_begin(LI->reg),
+ UE = MRI.use_nodbg_end(); UI != UE; ++UI) {
+ // Copies of the full value.
+ if (UI.getOperand().getSubReg() || !UI->isCopy())
+ continue;
+ MachineInstr *MI = &*UI;
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // Don't follow copies to physregs. These are usually setting up call
+ // arguments, and the argument registers are always call clobbered. We are
+ // better off in the source register which could be a callee-saved register,
+ // or it could be spilled.
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ continue;
+
+ // Is LocNo extended to reach this copy? If not, another def may be blocking
+ // it, or we are looking at a wrong value of LI.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ LocMap::iterator I = locInts.find(Idx.getUseIndex());
+ if (!I.valid() || I.value() != LocNo)
+ continue;
+
+ if (!LIS.hasInterval(DstReg))
+ continue;
+ LiveInterval *DstLI = &LIS.getInterval(DstReg);
+ const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex());
+ assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value");
+ CopyValues.push_back(std::make_pair(DstLI, DstVNI));
+ }
+
+ if (CopyValues.empty())
+ return;
+
+ DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n');
+
+ // Try to add defs of the copied values for each kill point.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ SlotIndex Idx = Kills[i];
+ for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) {
+ LiveInterval *DstLI = CopyValues[j].first;
+ const VNInfo *DstVNI = CopyValues[j].second;
+ if (DstLI->getVNInfoAt(Idx) != DstVNI)
+ continue;
+ // Check that there isn't already a def at Idx
+ LocMap::iterator I = locInts.find(Idx);
+ if (I.valid() && I.start() <= Idx)
+ continue;
+ DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
+ << DstVNI->id << " in " << *DstLI << '\n');
+ MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
+ assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
+ unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
+ I.insert(Idx, Idx.getNextSlot(), LocNo);
+ NewDefs.push_back(std::make_pair(Idx, LocNo));
+ break;
+ }
+ }
+}
+
+void
+UserValue::computeIntervals(MachineRegisterInfo &MRI,
+ LiveIntervals &LIS,
+ MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
+ SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+ // Collect all defs to be extended (Skipping undefs).
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+ if (I.value() != ~0u)
+ Defs.push_back(std::make_pair(I.start(), I.value()));
+
+ // Extend all defs, and possibly add new ones along the way.
+ for (unsigned i = 0; i != Defs.size(); ++i) {
+ SlotIndex Idx = Defs[i].first;
+ unsigned LocNo = Defs[i].second;
+ const MachineOperand &Loc = locations[LocNo];
+
+ // Register locations are constrained to where the register value is live.
+ if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
+ LiveInterval *LI = &LIS.getInterval(Loc.getReg());
+ const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ SmallVector<SlotIndex, 16> Kills;
+ extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
+ addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+ } else
+ extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS);
+ }
+
+ // Finally, erase all the undefs.
+ for (LocMap::iterator I = locInts.begin(); I.valid();)
+ if (I.value() == ~0u)
+ I.erase();
+ else
+ ++I;
+}
+
+void LDVImpl::computeIntervals() {
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
+ userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT, UVS);
+ userValues[i]->mapVirtRegs(this);
+ }
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ LIS = &pass.getAnalysis<LiveIntervals>();
+ MDT = &pass.getAnalysis<MachineDominatorTree>();
+ TRI = mf.getTarget().getRegisterInfo();
+ clear();
+ LS.initialize(mf);
+ DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+ << ((Value*)mf.getFunction())->getName()
+ << " **********\n");
+
+ bool Changed = collectDebugValues(mf);
+ computeIntervals();
+ DEBUG(print(dbgs()));
+ LS.releaseMemory();
+ return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+ if (!EnableLDV)
+ return false;
+ if (!pImpl)
+ pImpl = new LDVImpl(this);
+ return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+ if (pImpl)
+ delete static_cast<LDVImpl*>(pImpl);
+}
+
+void UserValue::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned i = locations.size(); i; --i) {
+ unsigned LocNo = i - 1;
+ MachineOperand &Loc = locations[LocNo];
+ if (!Loc.isReg() || Loc.getReg() != OldReg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ Loc.substPhysReg(NewReg, *TRI);
+ else
+ Loc.substVirtReg(NewReg, SubIdx, *TRI);
+ coalesceLocation(LocNo);
+ }
+}
+
+void LDVImpl::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+ UserValue *UV = lookupVirtReg(OldReg);
+ if (!UV)
+ return;
+
+ if (TargetRegisterInfo::isVirtualRegister(NewReg))
+ mapVirtReg(NewReg, UV);
+ virtRegToEqClass.erase(OldReg);
+
+ do {
+ UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
+ UV = UV->getNext();
+ } while (UV);
+}
+
+void LiveDebugVariables::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+bool
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) {
+ DEBUG({
+ dbgs() << "Splitting Loc" << OldLocNo << '\t';
+ print(dbgs(), 0);
+ });
+ bool DidChange = false;
+ LocMap::iterator LocMapI;
+ LocMapI.setMap(locInts);
+ for (unsigned i = 0; i != NewRegs.size(); ++i) {
+ LiveInterval *LI = NewRegs[i];
+ if (LI->empty())
+ continue;
+
+ // Don't allocate the new LocNo until it is needed.
+ unsigned NewLocNo = ~0u;
+
+ // Iterate over the overlaps between locInts and LI.
+ LocMapI.find(LI->beginIndex());
+ if (!LocMapI.valid())
+ continue;
+ LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start());
+ LiveInterval::iterator LIE = LI->end();
+ while (LocMapI.valid() && LII != LIE) {
+ // At this point, we know that LocMapI.stop() > LII->start.
+ LII = LI->advanceTo(LII, LocMapI.start());
+ if (LII == LIE)
+ break;
+
+ // Now LII->end > LocMapI.start(). Do we have an overlap?
+ if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) {
+ // Overlapping correct location. Allocate NewLocNo now.
+ if (NewLocNo == ~0u) {
+ MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
+ MO.setSubReg(locations[OldLocNo].getSubReg());
+ NewLocNo = getLocationNo(MO);
+ DidChange = true;
+ }
+
+ SlotIndex LStart = LocMapI.start();
+ SlotIndex LStop = LocMapI.stop();
+
+ // Trim LocMapI down to the LII overlap.
+ if (LStart < LII->start)
+ LocMapI.setStartUnchecked(LII->start);
+ if (LStop > LII->end)
+ LocMapI.setStopUnchecked(LII->end);
+
+ // Change the value in the overlap. This may trigger coalescing.
+ LocMapI.setValue(NewLocNo);
+
+ // Re-insert any removed OldLocNo ranges.
+ if (LStart < LocMapI.start()) {
+ LocMapI.insert(LStart, LocMapI.start(), OldLocNo);
+ ++LocMapI;
+ assert(LocMapI.valid() && "Unexpected coalescing");
+ }
+ if (LStop > LocMapI.stop()) {
+ ++LocMapI;
+ LocMapI.insert(LII->end, LStop, OldLocNo);
+ --LocMapI;
+ }
+ }
+
+ // Advance to the next overlap.
+ if (LII->end < LocMapI.stop()) {
+ if (++LII == LIE)
+ break;
+ LocMapI.advanceTo(LII->start);
+ } else {
+ ++LocMapI;
+ if (!LocMapI.valid())
+ break;
+ LII = LI->advanceTo(LII, LocMapI.start());
+ }
+ }
+ }
+
+ // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
+ locations.erase(locations.begin() + OldLocNo);
+ LocMapI.goToBegin();
+ while (LocMapI.valid()) {
+ unsigned v = LocMapI.value();
+ if (v == OldLocNo) {
+ DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
+ << LocMapI.stop() << ")\n");
+ LocMapI.erase();
+ } else {
+ if (v > OldLocNo)
+ LocMapI.setValueUnchecked(v-1);
+ ++LocMapI;
+ }
+ }
+
+ DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);});
+ return DidChange;
+}
+
+bool
+UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ bool DidChange = false;
+ // Split locations referring to OldReg. Iterate backwards so splitLocation can
+ // safely erase unuused locations.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ const MachineOperand *Loc = &locations[LocNo];
+ if (!Loc->isReg() || Loc->getReg() != OldReg)
+ continue;
+ DidChange |= splitLocation(LocNo, NewRegs);
+ }
+ return DidChange;
+}
+
+void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ bool DidChange = false;
+ for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+ DidChange |= UV->splitRegister(OldReg, NewRegs);
+
+ if (!DidChange)
+ return;
+
+ // Map all of the new virtual registers.
+ UserValue *UV = lookupVirtReg(OldReg);
+ for (unsigned i = 0; i != NewRegs.size(); ++i)
+ mapVirtReg(NewRegs[i]->reg, UV);
+}
+
+void LiveDebugVariables::
+splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+ // Iterate over locations in reverse makes it easier to handle coalescing.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ MachineOperand &Loc = locations[LocNo];
+ // Only virtual registers are rewritten.
+ if (!Loc.isReg() || !Loc.getReg() ||
+ !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+ continue;
+ unsigned VirtReg = Loc.getReg();
+ if (VRM.isAssignedReg(VirtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ // This can create a %noreg operand in rare cases when the sub-register
+ // index is no longer available. That means the user value is in a
+ // non-existent sub-register, and %noreg is exactly what we want.
+ Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
+ VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+ // FIXME: Translate SubIdx to a stackslot offset.
+ Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+ } else {
+ Loc.setReg(0);
+ Loc.setSubReg(0);
+ }
+ coalesceLocation(LocNo);
+ }
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS) {
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ Idx = Idx.getBaseIndex();
+
+ // Try to find an insert location by going backwards from Idx.
+ MachineInstr *MI;
+ while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+ // We've reached the beginning of MBB.
+ if (Idx == Start) {
+ MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+ return I;
+ }
+ Idx = Idx.getPrevIndex();
+ }
+
+ // Don't insert anything after the first terminator, though.
+ return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
+ llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+ DebugLoc D = dl;
+ dl = DebugLoc();
+ return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+ unsigned LocNo,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ MachineOperand &Loc = locations[LocNo];
+ ++NumInsertedDebugValues;
+
+ // Frame index locations may require a target callback.
+ if (Loc.isFI()) {
+ MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+ Loc.getIndex(), offset, variable,
+ findDebugLoc());
+ if (MI) {
+ MBB->insert(I, MI);
+ return;
+ }
+ }
+ // This is not a frame index, or the target is happy with a standard FI.
+ BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+ for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+ SlotIndex Start = I.start();
+ SlotIndex Stop = I.stop();
+ unsigned LocNo = I.value();
+ DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ // This interval may span multiple basic blocks.
+ // Insert a DBG_VALUE into each one.
+ while(Stop > MBBEnd) {
+ // Move to the next block.
+ Start = MBBEnd;
+ if (++MBB == MFEnd)
+ break;
+ MBBEnd = LIS.getMBBEndIdx(MBB);
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ }
+ DEBUG(dbgs() << '\n');
+ if (MBB == MFEnd)
+ break;
+
+ ++I;
+ }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+ DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
+ userValues[i]->rewriteLocations(*VRM, *TRI);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+ }
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/src/LLVM/lib/CodeGen/LiveDebugVariables.h b/src/LLVM/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 0000000..3ce3c39
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,70 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class LiveInterval;
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+ void *pImpl;
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ LiveDebugVariables();
+ ~LiveDebugVariables();
+
+ /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+ /// @param OldReg Old virtual register that is going away.
+ /// @param NewReg New register holding the user variables.
+ /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+ /// register.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// splitRegister - Move any user variables in OldReg to the live ranges in
+ /// NewRegs where they are live. Mark the values as unavailable where no new
+ /// register is live.
+ void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+
+ /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+ /// that happened during register allocation.
+ /// @param VRM Rename virtual registers according to map.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ /// dump - Print data structures to dbgs().
+ void dump();
+
+private:
+
+ virtual bool runOnMachineFunction(MachineFunction &);
+ virtual void releaseMemory();
+ virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/src/LLVM/lib/CodeGen/LiveInterval.cpp b/src/LLVM/lib/CodeGen/LiveInterval.cpp
index 6820a61..b69945a 100644
--- a/src/LLVM/lib/CodeGen/LiveInterval.cpp
+++ b/src/LLVM/lib/CodeGen/LiveInterval.cpp
@@ -30,58 +30,22 @@
#include <algorithm>
using namespace llvm;
-// An example for liveAt():
-//
-// this = [1,4), liveAt(0) will return false. The instruction defining this
-// spans slots [0,3]. The interval belongs to an spilled definition of the
-// variable it represents. This is because slot 1 is used (def slot) and spans
-// up to slot 3 (store slot).
-//
-bool LiveInterval::liveAt(SlotIndex I) const {
- Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
- if (r == ranges.begin())
- return false;
-
- --r;
- return r->contains(I);
-}
-
-// liveBeforeAndAt - Check if the interval is live at the index and the index
-// just before it. If index is liveAt, check if it starts a new live range.
-// If it does, then check if the previous live range ends at index-1.
-bool LiveInterval::liveBeforeAndAt(SlotIndex I) const {
- Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
- if (r == ranges.begin())
- return false;
-
- --r;
- if (!r->contains(I))
- return false;
- if (I != r->start)
- return true;
- // I is the start of a live range. Check if the previous live range ends
- // at I-1.
- if (r == ranges.begin())
- return false;
- return r->end == I;
-}
-
-/// killedAt - Return true if a live range ends at index. Note that the kill
-/// point is not contained in the half-open live range. It is usually the
-/// getDefIndex() slot following its last use.
-bool LiveInterval::killedAt(SlotIndex I) const {
- Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I);
-
- // Now r points to the first interval with start >= I, or ranges.end().
- if (r == ranges.begin())
- return false;
-
- --r;
- // Now r points to the last interval with end <= I.
- // r->end is the kill point.
- return r->end == I;
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+ // This algorithm is basically std::upper_bound.
+ // Unfortunately, std::upper_bound cannot be used with mixed types until we
+ // adopt C++0x. Many libraries can do it, but not all.
+ if (empty() || Pos >= endIndex())
+ return end();
+ iterator I = begin();
+ size_t Len = ranges.size();
+ do {
+ size_t Mid = Len >> 1;
+ if (Pos < I[Mid].end)
+ Len = Mid;
+ else
+ I += Mid + 1, Len -= Mid + 1;
+ } while (Len);
+ return I;
}
/// killedInRange - Return true if the interval has kills in [Start,End).
@@ -182,7 +146,7 @@
/// RenumberValues - Renumber all values in order of appearance and delete the
/// remaining unused values.
-void LiveInterval::RenumberValues() {
+void LiveInterval::RenumberValues(LiveIntervals &lis) {
SmallPtrSet<VNInfo*, 8> Seen;
valnos.clear();
for (const_iterator I = begin(), E = end(); I != E; ++I) {
@@ -309,25 +273,30 @@
return ranges.insert(it, LR);
}
-/// isInOneLiveRange - Return true if the range specified is entirely in
-/// a single LiveRange of the live interval.
-bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) {
- Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
- if (I == ranges.begin())
- return false;
+/// extendInBlock - If this interval is live before Kill in the basic
+/// block that starts at StartIdx, extend it to be live up to Kill and return
+/// the value. If there is no live range before Kill, return NULL.
+VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
+ if (empty())
+ return 0;
+ iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot());
+ if (I == begin())
+ return 0;
--I;
- return I->containsRange(Start, End);
+ if (I->end <= StartIdx)
+ return 0;
+ if (I->end < Kill)
+ extendIntervalEndTo(I, Kill);
+ return I->valno;
}
-
/// removeRange - Remove the specified range from this interval. Note that
/// the range must be in a single LiveRange in its entirety.
void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
bool RemoveDeadValNo) {
// Find the LiveRange containing this span.
- Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
- assert(I != ranges.begin() && "Range is not in interval!");
- --I;
+ Ranges::iterator I = find(Start);
+ assert(I != ranges.end() && "Range is not in interval!");
assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
// If the span we are removing is at the start of the LiveRange, adjust it.
@@ -384,32 +353,6 @@
markValNoForDeletion(ValNo);
}
-/// getLiveRangeContaining - Return the live range that contains the
-/// specified index, or null if there is none.
-LiveInterval::const_iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
- const_iterator It = std::upper_bound(begin(), end(), Idx);
- if (It != ranges.begin()) {
- --It;
- if (It->contains(Idx))
- return It;
- }
-
- return end();
-}
-
-LiveInterval::iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
- iterator It = std::upper_bound(begin(), end(), Idx);
- if (It != begin()) {
- --It;
- if (It->contains(Idx))
- return It;
- }
-
- return end();
-}
-
/// findDefinedVNInfo - Find the VNInfo defined by the specified
/// index (register interval).
VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
@@ -422,27 +365,16 @@
return 0;
}
-/// findDefinedVNInfo - Find the VNInfo defined by the specified
-/// register (stack inteval).
-VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
- for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
- i != e; ++i) {
- if ((*i)->getReg() == reg)
- return *i;
- }
- return 0;
-}
-
/// join - Join two live intervals (this, and other) together. This applies
/// mappings to the value numbers in the LHS/RHS intervals as specified. If
/// the intervals are not joinable, this aborts.
void LiveInterval::join(LiveInterval &Other,
const int *LHSValNoAssignments,
- const int *RHSValNoAssignments,
+ const int *RHSValNoAssignments,
SmallVector<VNInfo*, 16> &NewVNInfo,
MachineRegisterInfo *MRI) {
// Determine if any of our live range values are mapped. This is uncommon, so
- // we want to avoid the interval scan if not.
+ // we want to avoid the interval scan if not.
bool MustMapCurValNos = false;
unsigned NumVals = getNumValNums();
unsigned NumNewVals = NewVNInfo.size();
@@ -462,7 +394,7 @@
++OutIt;
for (iterator I = OutIt, E = end(); I != E; ++I) {
OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
-
+
// If this live range has the same value # as its immediate predecessor,
// and if they are neighbors, remove one LiveRange. This happens when we
// have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
@@ -473,12 +405,12 @@
OutIt->start = I->start;
OutIt->end = I->end;
}
-
+
// Didn't merge, on to the next one.
++OutIt;
}
}
-
+
// If we merge some live ranges, chop off the end.
ranges.erase(OutIt, end());
}
@@ -496,7 +428,7 @@
if (VNI) {
if (NumValNos >= NumVals)
valnos.push_back(VNI);
- else
+ else
valnos[NumValNos] = VNI;
VNI->id = NumValNos++; // Renumber val#.
}
@@ -521,7 +453,7 @@
/// interval as the specified value number. The LiveRanges in RHS are
/// allowed to overlap with LiveRanges in the current interval, but only if
/// the overlapping LiveRanges have the specified value number.
-void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
VNInfo *LHSValNo) {
// TODO: Make this more efficient.
iterator InsertPos = begin();
@@ -542,157 +474,19 @@
void LiveInterval::MergeValueInAsValue(
const LiveInterval &RHS,
const VNInfo *RHSValNo, VNInfo *LHSValNo) {
- SmallVector<VNInfo*, 4> ReplacedValNos;
- iterator IP = begin();
+ // TODO: Make this more efficient.
+ iterator InsertPos = begin();
for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo");
if (I->valno != RHSValNo)
continue;
- SlotIndex Start = I->start, End = I->end;
- IP = std::upper_bound(IP, end(), Start);
- // If the start of this range overlaps with an existing liverange, trim it.
- if (IP != begin() && IP[-1].end > Start) {
- if (IP[-1].valno != LHSValNo) {
- ReplacedValNos.push_back(IP[-1].valno);
- IP[-1].valno = LHSValNo; // Update val#.
- }
- Start = IP[-1].end;
- // Trimmed away the whole range?
- if (Start >= End) continue;
- }
- // If the end of this range overlaps with an existing liverange, trim it.
- if (IP != end() && End > IP->start) {
- if (IP->valno != LHSValNo) {
- ReplacedValNos.push_back(IP->valno);
- IP->valno = LHSValNo; // Update val#.
- }
- End = IP->start;
- // If this trimmed away the whole range, ignore it.
- if (Start == End) continue;
- }
-
// Map the valno in the other live range to the current live range.
- IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP);
- }
-
-
- SmallSet<VNInfo*, 4> Seen;
- for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) {
- VNInfo *V1 = ReplacedValNos[i];
- if (Seen.insert(V1)) {
- bool isDead = true;
- for (const_iterator I = begin(), E = end(); I != E; ++I)
- if (I->valno == V1) {
- isDead = false;
- break;
- }
- if (isDead) {
- // Now that V1 is dead, remove it.
- markValNoForDeletion(V1);
- }
- }
+ LiveRange Tmp = *I;
+ Tmp.valno = LHSValNo;
+ InsertPos = addRangeFrom(Tmp, InsertPos);
}
}
-/// MergeInClobberRanges - For any live ranges that are not defined in the
-/// current interval, but are defined in the Clobbers interval, mark them
-/// used with an unknown definition value.
-void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
- const LiveInterval &Clobbers,
- VNInfo::Allocator &VNInfoAllocator) {
- if (Clobbers.empty()) return;
-
- DenseMap<VNInfo*, VNInfo*> ValNoMaps;
- VNInfo *UnusedValNo = 0;
- iterator IP = begin();
- for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
- // For every val# in the Clobbers interval, create a new "unknown" val#.
- VNInfo *ClobberValNo = 0;
- DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno);
- if (VI != ValNoMaps.end())
- ClobberValNo = VI->second;
- else if (UnusedValNo)
- ClobberValNo = UnusedValNo;
- else {
- UnusedValNo = ClobberValNo =
- getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
- ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
- }
-
- bool Done = false;
- SlotIndex Start = I->start, End = I->end;
- // If a clobber range starts before an existing range and ends after
- // it, the clobber range will need to be split into multiple ranges.
- // Loop until the entire clobber range is handled.
- while (!Done) {
- Done = true;
- IP = std::upper_bound(IP, end(), Start);
- SlotIndex SubRangeStart = Start;
- SlotIndex SubRangeEnd = End;
-
- // If the start of this range overlaps with an existing liverange, trim it.
- if (IP != begin() && IP[-1].end > SubRangeStart) {
- SubRangeStart = IP[-1].end;
- // Trimmed away the whole range?
- if (SubRangeStart >= SubRangeEnd) continue;
- }
- // If the end of this range overlaps with an existing liverange, trim it.
- if (IP != end() && SubRangeEnd > IP->start) {
- // If the clobber live range extends beyond the existing live range,
- // it'll need at least another live range, so set the flag to keep
- // iterating.
- if (SubRangeEnd > IP->end) {
- Start = IP->end;
- Done = false;
- }
- SubRangeEnd = IP->start;
- // If this trimmed away the whole range, ignore it.
- if (SubRangeStart == SubRangeEnd) continue;
- }
-
- // Insert the clobber interval.
- IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo),
- IP);
- UnusedValNo = 0;
- }
- }
-
- if (UnusedValNo) {
- // Delete the last unused val#.
- valnos.pop_back();
- }
-}
-
-void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
- SlotIndex Start,
- SlotIndex End,
- VNInfo::Allocator &VNInfoAllocator) {
- // Find a value # to use for the clobber ranges. If there is already a value#
- // for unknown values, use it.
- VNInfo *ClobberValNo =
- getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
-
- iterator IP = begin();
- IP = std::upper_bound(IP, end(), Start);
-
- // If the start of this range overlaps with an existing liverange, trim it.
- if (IP != begin() && IP[-1].end > Start) {
- Start = IP[-1].end;
- // Trimmed away the whole range?
- if (Start >= End) return;
- }
- // If the end of this range overlaps with an existing liverange, trim it.
- if (IP != end() && End > IP->start) {
- End = IP->start;
- // If this trimmed away the whole range, ignore it.
- if (Start == End) return;
- }
-
- // Insert the clobber interval.
- addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
-}
-
/// MergeValueNumberInto - This method is called when two value nubmers
/// are found to be equivalent. This eliminates V1, replacing all
/// LiveRanges with the V1 value number with the V2 value number. This can
@@ -715,7 +509,7 @@
for (iterator I = begin(); I != end(); ) {
iterator LR = I++;
if (LR->valno != V1) continue; // Not a V1 LiveRange.
-
+
// Okay, we found a V1 live range. If it had a previous, touching, V2 live
// range, extend it.
if (LR != begin()) {
@@ -729,11 +523,11 @@
LR = Prev;
}
}
-
+
// Okay, now we have a V1 or V2 live range that is maximally merged forward.
// Ensure that it is a V2 live-range.
LR->valno = V2;
-
+
// If we can merge it into later V2 live ranges, do so now. We ignore any
// following V1 live ranges, as they will be merged in subsequent iterations
// of the loop.
@@ -745,10 +539,13 @@
}
}
}
-
+
+ // Merge the relevant flags.
+ V2->mergeFlags(V1);
+
// Now that V1 is dead, remove it.
markValNoForDeletion(V1);
-
+
return V2;
}
@@ -810,14 +607,9 @@
}
void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
- if (isStackSlot())
- OS << "SS#" << getStackSlotIndex();
- else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
- OS << TRI->getName(reg);
- else
- OS << "%reg" << reg;
-
- OS << ',' << weight;
+ OS << PrintReg(reg, TRI);
+ if (weight != 0)
+ OS << ',' << weight;
if (empty())
OS << " EMPTY";
@@ -842,10 +634,9 @@
if (vni->isUnused()) {
OS << "x";
} else {
- if (!vni->isDefAccurate() && !vni->isPHIDef())
- OS << "?";
- else
- OS << vni->def;
+ OS << vni->def;
+ if (vni->isPHIDef())
+ OS << "-phidef";
if (vni->hasPHIKill())
OS << "-phikill";
if (vni->hasRedefByEC())
@@ -863,3 +654,101 @@
void LiveRange::print(raw_ostream &os) const {
os << *this;
}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+ // Create initial equivalence classes.
+ EqClass.clear();
+ EqClass.grow(LI->getNumValNums());
+
+ const VNInfo *used = 0, *unused = 0;
+
+ // Determine connections.
+ for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ // Group all unused values into one class.
+ if (VNI->isUnused()) {
+ if (unused)
+ EqClass.join(unused->id, VNI->id);
+ unused = VNI;
+ continue;
+ }
+ used = VNI;
+ if (VNI->isPHIDef()) {
+ const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ assert(MBB && "Phi-def has no defining MBB");
+ // Connect to values live out of predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI)
+ if (const VNInfo *PVNI =
+ LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()))
+ EqClass.join(VNI->id, PVNI->id);
+ } else {
+ // Normal value defined by an instruction. Check for two-addr redef.
+ // FIXME: This could be coincidental. Should we really check for a tied
+ // operand constraint?
+ // Note that VNI->def may be a use slot for an early clobber def.
+ if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+ EqClass.join(VNI->id, UVNI->id);
+ }
+ }
+
+ // Lump all the unused values in with the last used value.
+ if (used && unused)
+ EqClass.join(used->id, unused->id);
+
+ EqClass.compress();
+ return EqClass.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
+ MachineRegisterInfo &MRI) {
+ assert(LIV[0] && "LIV[0] must be set");
+ LiveInterval &LI = *LIV[0];
+
+ // Rewrite instructions.
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ if (MO.isUse() && MO.isUndef())
+ continue;
+ // DBG_VALUE instructions should have been eliminated earlier.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+ const VNInfo *VNI = LI.getVNInfoAt(Idx);
+ assert(VNI && "Interval not live at use.");
+ MO.setReg(LIV[getEqClass(VNI)]->reg);
+ }
+
+ // Move runs to new intervals.
+ LiveInterval::iterator J = LI.begin(), E = LI.end();
+ while (J != E && EqClass[J->valno->id] == 0)
+ ++J;
+ for (LiveInterval::iterator I = J; I != E; ++I) {
+ if (unsigned eq = EqClass[I->valno->id]) {
+ assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ LIV[eq]->ranges.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LI.ranges.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LI.getNumValNums();
+ while (j != e && EqClass[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LI.getValNumInfo(i);
+ if (unsigned eq = EqClass[i]) {
+ VNI->id = LIV[eq]->getNumValNums();
+ LIV[eq]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LI.valnos[j++] = VNI;
+ }
+ }
+ LI.valnos.resize(j);
+}
diff --git a/src/LLVM/lib/CodeGen/LiveIntervalAnalysis.cpp b/src/LLVM/lib/CodeGen/LiveIntervalAnalysis.cpp
index 587c55f..b1e202a 100644
--- a/src/LLVM/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/src/LLVM/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -20,6 +20,7 @@
#include "VirtRegMap.h"
#include "llvm/Value.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -47,7 +48,7 @@
using namespace llvm;
// Hidden options for help debugging.
-static cl::opt<bool> DisableReMat("disable-rematerialization",
+static cl::opt<bool> DisableReMat("disable-rematerialization",
cl::init(false), cl::Hidden);
STATISTIC(numIntervals , "Number of original intervals");
@@ -55,23 +56,33 @@
STATISTIC(numSplits , "Number of intervals split");
char LiveIntervals::ID = 0;
-INITIALIZE_PASS(LiveIntervals, "liveintervals",
- "Live Interval Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<LiveVariables>();
AU.addRequired<LiveVariables>();
- AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
AU.addPreservedID(MachineDominatorsID);
-
+
if (!StrongPHIElim) {
AU.addPreservedID(PHIEliminationID);
AU.addRequiredID(PHIEliminationID);
}
-
+
AU.addRequiredID(TwoAddressInstructionPassID);
AU.addPreserved<ProcessImplicitDefs>();
AU.addRequired<ProcessImplicitDefs>();
@@ -85,7 +96,7 @@
for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
E = r2iMap_.end(); I != E; ++I)
delete I->second;
-
+
r2iMap_.clear();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
@@ -131,19 +142,7 @@
void LiveIntervals::printInstrs(raw_ostream &OS) const {
OS << "********** MACHINEINSTRS **********\n";
-
- for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
- mbbi != mbbe; ++mbbi) {
- OS << "BB#" << mbbi->getNumber()
- << ":\t\t# derived from " << mbbi->getName() << "\n";
- for (MachineBasicBlock::iterator mii = mbbi->begin(),
- mie = mbbi->end(); mii != mie; ++mii) {
- if (mii->isDebugValue())
- OS << " \t" << *mii;
- else
- OS << getInstructionIndex(mii) << '\t' << *mii;
- }
- }
+ mf_->print(OS, indexes_);
}
void LiveIntervals::dumpInstrs() const {
@@ -247,15 +246,6 @@
return false;
}
-#ifndef NDEBUG
-static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
- if (TargetRegisterInfo::isPhysicalRegister(reg))
- dbgs() << tri_->getName(reg);
- else
- dbgs() << "%reg" << reg;
-}
-#endif
-
static
bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
unsigned Reg = MI.getOperand(MOIdx).getReg();
@@ -275,7 +265,7 @@
/// isPartialRedef - Return true if the specified def at the specific index is
/// partially re-defining the specified live interval. A common case of this is
-/// a definition of the sub-register.
+/// a definition of the sub-register.
bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
LiveInterval &interval) {
if (!MO.getSubReg() || MO.isEarlyClobber())
@@ -284,8 +274,8 @@
SlotIndex RedefIndex = MIIdx.getDefIndex();
const LiveRange *OldLR =
interval.getLiveRangeContaining(RedefIndex.getUseIndex());
- if (OldLR->valno->isDefAccurate()) {
- MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+ MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+ if (DefMI != 0) {
return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
}
return false;
@@ -297,10 +287,7 @@
MachineOperand& MO,
unsigned MOIdx,
LiveInterval &interval) {
- DEBUG({
- dbgs() << "\t\tregister: ";
- printRegName(interval.reg, tri_);
- });
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
// Virtual registers may be defined multiple times (due to phi
// elimination and 2-addr elimination). Much of what we do only has to be
@@ -317,16 +304,26 @@
// Make sure the first definition is not a partial redefinition. Add an
// <imp-def> of the full register.
- if (MO.getSubReg())
+ // FIXME: LiveIntervals shouldn't modify the code like this. Whoever
+ // created the machine instruction should annotate it with <undef> flags
+ // as needed. Then we can simply assert here. The REG_SEQUENCE lowering
+ // is the main suspect.
+ if (MO.getSubReg()) {
mi->addRegisterDefined(interval.reg);
+ // Mark all defs of interval.reg on this instruction as reading <undef>.
+ for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO2 = mi->getOperand(i);
+ if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg())
+ MO2.setIsUndef();
+ }
+ }
MachineInstr *CopyMI = NULL;
if (mi->isCopyLike()) {
CopyMI = mi;
}
- VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true,
- VNInfoAllocator);
+ VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
assert(ValNo->id == 0 && "First value in interval is not 0?");
// Loop over all of the blocks that the vreg is defined in. There are
@@ -392,8 +389,9 @@
// Create interval with one of a NEW value number. Note that this value
// number isn't actually defined by an instruction, weird huh? :)
if (PHIJoin) {
- ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false,
- VNInfoAllocator);
+ assert(getInstructionFromIndex(Start) == 0 &&
+ "PHI def index points at actual instruction.");
+ ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
ValNo->setIsPHIDef(true);
}
LiveRange LR(Start, killIdx, ValNo);
@@ -415,8 +413,8 @@
// def-and-use register operand.
// It may also be partial redef like this:
- // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
- // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
+ // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
+ // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
bool PartReDef = isPartialRedef(MIIdx, MO, interval);
if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
// If this is a two-address definition, then we have already processed
@@ -439,10 +437,7 @@
// The new value number (#1) is defined by the instruction we claimed
// defined value #0.
- VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
- false, // update at *
- VNInfoAllocator);
- ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
+ VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
// Value#0 is now defined by the 2-addr instruction.
OldValNo->def = RedefIndex;
@@ -451,7 +446,7 @@
// A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
if (PartReDef && mi->isCopyLike())
OldValNo->setCopy(&*mi);
-
+
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
DEBUG(dbgs() << " replace range with " << LR);
@@ -480,8 +475,8 @@
MachineInstr *CopyMI = NULL;
if (mi->isCopyLike())
CopyMI = mi;
- ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
-
+ ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
@@ -503,10 +498,7 @@
MachineInstr *CopyMI) {
// A physical register cannot be live across basic block, so its
// lifetime must end somewhere in its defining basic block.
- DEBUG({
- dbgs() << "\t\tregister: ";
- printRegName(interval.reg, tri_);
- });
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
SlotIndex baseIndex = MIIdx;
SlotIndex start = baseIndex.getDefIndex();
@@ -558,10 +550,10 @@
goto exit;
}
}
-
+
baseIndex = baseIndex.getNextIndex();
}
-
+
// The only case we should have a dead physreg here without a killing or
// instruction where we know it's dead is if it is live-in to the function
// and never used. Another possible case is the implicit use of the
@@ -572,11 +564,11 @@
assert(start < end && "did not find end of interval?");
// Already exists? Extend old live interval.
- LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
- bool Extend = OldLR != interval.end();
- VNInfo *ValNo = Extend
- ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator);
- if (MO.isEarlyClobber() && Extend)
+ VNInfo *ValNo = interval.getVNInfoAt(start);
+ bool Extend = ValNo != 0;
+ if (!Extend)
+ ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
+ if (Extend && MO.isEarlyClobber())
ValNo->setHasRedefByEC(true);
LiveRange LR(start, end, ValNo);
interval.addRange(LR);
@@ -591,29 +583,19 @@
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
getOrCreateInterval(MO.getReg()));
- else if (allocatableRegs_[MO.getReg()]) {
+ else {
MachineInstr *CopyMI = NULL;
if (MI->isCopyLike())
CopyMI = MI;
handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
getOrCreateInterval(MO.getReg()), CopyMI);
- // Def of a register also defines its sub-registers.
- for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS)
- // If MI also modifies the sub-register explicitly, avoid processing it
- // more than once. Do not pass in TRI here so it checks for exact match.
- if (!MI->definesRegister(*AS))
- handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
- getOrCreateInterval(*AS), 0);
}
}
void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
SlotIndex MIIdx,
LiveInterval &interval, bool isAlias) {
- DEBUG({
- dbgs() << "\t\tlivein register: ";
- printRegName(interval.reg, tri_);
- });
+ DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
// Look for kills, if it reaches a def before it's killed, then it shouldn't
// be considered a livein.
@@ -667,13 +649,15 @@
end = MIIdx.getStoreIndex();
} else {
DEBUG(dbgs() << " live through");
- end = baseIndex;
+ end = getMBBEndIdx(MBB);
}
}
+ SlotIndex defIdx = getMBBStartIdx(MBB);
+ assert(getInstructionFromIndex(defIdx) == 0 &&
+ "PHI def index points at actual instruction.");
VNInfo *vni =
- interval.getNextValue(SlotIndex(getMBBStartIdx(MBB), true),
- 0, false, VNInfoAllocator);
+ interval.getNextValue(defIdx, 0, VNInfoAllocator);
vni->setIsPHIDef(true);
LiveRange LR(start, end, vni);
@@ -685,7 +669,7 @@
/// registers. for some ordering of the machine instructions [1,N] a
/// live interval is an interval [i, j) where 1 <= i <= j < N for
/// which a variable is live
-void LiveIntervals::computeIntervals() {
+void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
<< "********** Function: "
<< ((Value*)mf_->getFunction())->getName() << '\n');
@@ -712,11 +696,11 @@
handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
true);
}
-
+
// Skip over empty initial indices.
if (getInstructionFromIndex(MIIndex) == 0)
MIIndex = indexes_->getNextNonNullIndex(MIIndex);
-
+
for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
MI != miEnd; ++MI) {
DEBUG(dbgs() << MIIndex << "\t" << *MI);
@@ -735,7 +719,7 @@
else if (MO.isUndef())
UndefUses.push_back(MO.getReg());
}
-
+
// Move to the next instr slot.
MIIndex = indexes_->getNextNonNullIndex(MIIndex);
}
@@ -763,10 +747,194 @@
return NewLI;
}
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+bool LiveIntervals::shrinkToUses(LiveInterval *li,
+ SmallVectorImpl<MachineInstr*> *dead) {
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+ && "Can't only shrink physical registers");
+ // Find all the values used, including PHI kills.
+ SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+ // Blocks that have already been added to WorkList as live-out.
+ SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+
+ // Visit all instructions reading li->reg.
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ continue;
+ SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
+ VNInfo *VNI = li->getVNInfoAt(Idx);
+ if (!VNI) {
+ // This shouldn't happen: readsVirtualRegister returns true, but there is
+ // no live value. It is likely caused by a target getting <undef> flags
+ // wrong.
+ DEBUG(dbgs() << Idx << '\t' << *UseMI
+ << "Warning: Instr claims to read non-existent value in "
+ << *li << '\n');
+ continue;
+ }
+ if (VNI->def == Idx) {
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ Idx = Idx.getPrevSlot();
+ VNI = li->getVNInfoAt(Idx);
+ assert(VNI && "Early-clobber tied value not available");
+ }
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create a new live interval with only minimal live segments per def.
+ LiveInterval NewLI(li->reg, 0);
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+
+ // A use tied to an early-clobber def ends at the load slot and isn't caught
+ // above. Catch it here instead. This probably only ever happens for inline
+ // assembly.
+ if (VNI->def.isUse())
+ if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex()))
+ WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI));
+ }
+
+ // Keep track of the PHIs that are in use.
+ SmallPtrSet<VNInfo*, 8> UsedPHIs;
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+ const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+ SlotIndex BlockStart = getMBBStartIdx(MBB);
+
+ // Extend the live range for VNI to be live at Idx.
+ if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) {
+ (void)ExtVNI;
+ assert(ExtVNI == VNI && "Unexpected existing value number");
+ // Is this a PHIDef we haven't seen before?
+ if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI))
+ continue;
+ // The PHI is live, make sure the predecessors are live-out.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ if (!LiveOut.insert(*PI))
+ continue;
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (VNInfo *PVNI = li->getVNInfoAt(Stop))
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ if (!LiveOut.insert(*PI))
+ continue;
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ }
+ }
+
+ // Handle dead values.
+ bool CanSeparate = false;
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+ assert(LII != NewLI.end() && "Missing live range for PHI");
+ if (LII->end != VNI->def.getNextSlot())
+ continue;
+ if (VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->setIsUnused(true);
+ NewLI.removeRange(*LII);
+ DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+ CanSeparate = true;
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(VNI->def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(li->reg, tri_);
+ if (dead && MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
+ dead->push_back(MI);
+ }
+ }
+ }
+
+ // Move the trimmed ranges back.
+ li->ranges.swap(NewLI.ranges);
+ DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ return CanSeparate;
+}
+
+
//===----------------------------------------------------------------------===//
// Register allocator hooks.
//
+MachineBasicBlock::iterator
+LiveIntervals::getLastSplitPoint(const LiveInterval &li,
+ MachineBasicBlock *mbb) const {
+ const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
+
+ // If li is not live into a landing pad, we can insert spill code before the
+ // first terminator.
+ if (!lpad || !isLiveInToMBB(li, lpad))
+ return mbb->getFirstTerminator();
+
+ // When there is a landing pad, spill code must go before the call instruction
+ // that can throw.
+ MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
+ while (I != B) {
+ --I;
+ if (I->getDesc().isCall())
+ return I;
+ }
+ // The block contains no calls that can throw, so use the first terminator.
+ return mbb->getFirstTerminator();
+}
+
+void LiveIntervals::addKillFlags() {
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ unsigned Reg = I->first;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (mri_->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = I->second;
+
+ // Every instruction that kills Reg corresponds to a live range end point.
+ for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+ ++RI) {
+ // A LOAD index indicates an MBB edge.
+ if (RI->end.isLoad())
+ continue;
+ MachineInstr *MI = getInstructionFromIndex(RI->end);
+ if (!MI)
+ continue;
+ MI->addRegisterKilled(Reg, NULL);
+ }
+ }
+}
+
/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
/// allow one) virtual register operand, then its uses are implicitly using
/// the register. Returns the virtual register.
@@ -780,7 +948,7 @@
unsigned Reg = MO.getReg();
if (Reg == 0 || Reg == li.reg)
continue;
-
+
if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
!allocatableRegs_[Reg])
continue;
@@ -799,18 +967,17 @@
/// which reaches the given instruction also reaches the specified use index.
bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
SlotIndex UseIdx) const {
- SlotIndex Index = getInstructionIndex(MI);
- VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
- LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
- return UI != li.end() && UI->valno == ValNo;
+ VNInfo *UValNo = li.getVNInfoAt(UseIdx);
+ return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
}
/// isReMaterializable - Returns true if the definition MI of the specified
/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
- const VNInfo *ValNo, MachineInstr *MI,
- SmallVectorImpl<LiveInterval*> &SpillIs,
- bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI,
+ const SmallVectorImpl<LiveInterval*> *SpillIs,
+ bool &isLoad) {
if (DisableReMat)
return false;
@@ -828,7 +995,7 @@
ri != re; ++ri) {
MachineInstr *UseMI = &*ri;
SlotIndex UseIdx = getInstructionIndex(UseMI);
- if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
+ if (li.getVNInfoAt(UseIdx) != ValNo)
continue;
if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
return false;
@@ -836,9 +1003,10 @@
// If a register operand of the re-materialized instruction is going to
// be spilled next, then it's not legal to re-materialize this instruction.
- for (unsigned i = 0, e = SpillIs.size(); i != e; ++i)
- if (ImpUse == SpillIs[i]->reg)
- return false;
+ if (SpillIs)
+ for (unsigned i = 0, e = SpillIs->size(); i != e; ++i)
+ if (ImpUse == (*SpillIs)[i]->reg)
+ return false;
}
return true;
}
@@ -847,16 +1015,16 @@
/// val# of the specified interval is re-materializable.
bool LiveIntervals::isReMaterializable(const LiveInterval &li,
const VNInfo *ValNo, MachineInstr *MI) {
- SmallVector<LiveInterval*, 4> Dummy1;
bool Dummy2;
- return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2);
+ return isReMaterializable(li, ValNo, MI, 0, Dummy2);
}
/// isReMaterializable - Returns true if every definition of MI of every
/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
- SmallVectorImpl<LiveInterval*> &SpillIs,
- bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const SmallVectorImpl<LiveInterval*> *SpillIs,
+ bool &isLoad) {
isLoad = false;
for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
i != e; ++i) {
@@ -864,9 +1032,9 @@
if (VNI->isUnused())
continue; // Dead val#.
// Is the def for the val# rematerializable?
- if (!VNI->isDefAccurate())
- return false;
MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+ if (!ReMatDefMI)
+ return false;
bool DefIsLoad = false;
if (!ReMatDefMI ||
!isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
@@ -904,7 +1072,7 @@
}
return false;
}
-
+
/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
/// slot / to reg or any rematerialized load into ith operand of specified
@@ -1009,7 +1177,7 @@
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (!vrm.isReMaterialized(Reg))
continue;
@@ -1024,7 +1192,7 @@
/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
bool LiveIntervals::
rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
- bool TrySplit, SlotIndex index, SlotIndex end,
+ bool TrySplit, SlotIndex index, SlotIndex end,
MachineInstr *MI,
MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
unsigned Slot, int LdSlot,
@@ -1043,7 +1211,7 @@
if (!mop.isReg())
continue;
unsigned Reg = mop.getReg();
- if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (Reg != li.reg)
continue;
@@ -1083,7 +1251,7 @@
// keep the src/dst regs pinned.
//
// Keep track of whether we replace a use and/or def so that we can
- // create the spill interval with the appropriate range.
+ // create the spill interval with the appropriate range.
SmallVector<unsigned, 2> Ops;
tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
@@ -1139,13 +1307,16 @@
rewriteImplicitOps(li, MI, NewVReg, vrm);
// Reuse NewVReg for other reads.
+ bool HasEarlyClobber = false;
for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
MachineOperand &mopj = MI->getOperand(Ops[j]);
mopj.setReg(NewVReg);
if (mopj.isImplicit())
rewriteImplicitOps(li, MI, NewVReg, vrm);
+ if (mopj.isEarlyClobber())
+ HasEarlyClobber = true;
}
-
+
if (CreatedNewVReg) {
if (DefIsReMat) {
vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
@@ -1189,7 +1360,7 @@
if (HasUse) {
if (CreatedNewVReg) {
LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
- nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+ nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
DEBUG(dbgs() << " +" << LR);
nI.addRange(LR);
} else {
@@ -1202,8 +1373,12 @@
}
}
if (HasDef) {
- LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
- nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+ // An early clobber starts at the use slot, except for an early clobber
+ // tied to a use operand (yes, that is a thing).
+ LiveRange LR(HasEarlyClobber && !HasUse ?
+ index.getUseIndex() : index.getDefIndex(),
+ index.getStoreIndex(),
+ nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
DEBUG(dbgs() << " +" << LR);
nI.addRange(LR);
}
@@ -1345,7 +1520,7 @@
// ...
// def = ...
// = use
- // It's better to start a new interval to avoid artifically
+ // It's better to start a new interval to avoid artificially
// extend the new interval.
if (MI->readsWritesVirtualRegister(li.reg) ==
std::make_pair(false,true)) {
@@ -1548,20 +1723,22 @@
// overflow a float. This expression behaves like 10^d for small d, but is
// more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
// headroom before overflow.
- float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
+ // By the way, powf() might be unavailable here. For consistency,
+ // We may take pow(double,double).
+ float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
return (isDef + isUse) * lc;
}
-void
-LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
- normalizeSpillWeight(*NewLIs[i]);
+ NewLIs[i]->weight =
+ normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
}
std::vector<LiveInterval*> LiveIntervals::
addIntervalsForSpills(const LiveInterval &li,
- SmallVectorImpl<LiveInterval*> &SpillIs,
+ const SmallVectorImpl<LiveInterval*> *SpillIs,
const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
assert(li.isSpillable() && "attempt to spill already spilled interval!");
@@ -1652,8 +1829,7 @@
if (VNI->isUnused())
continue; // Dead val#.
// Is the def for the val# rematerializable?
- MachineInstr *ReMatDefMI = VNI->isDefAccurate()
- ? getInstructionFromIndex(VNI->def) : 0;
+ MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
bool dummy;
if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
// Remember how to remat the def of this val#.
@@ -1685,7 +1861,7 @@
if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
Slot = vrm.assignVirt2StackSlot(li.reg);
-
+
// This case only occurs when the prealloc splitter has already assigned
// a stack slot to this vreg.
else
@@ -1742,7 +1918,7 @@
Ops.push_back(j);
if (MO.isDef())
continue;
- if (isReMat ||
+ if (isReMat ||
(!FoundUse && !alsoFoldARestore(Id, index, VReg,
RestoreMBBs, RestoreIdxes))) {
// MI has two-address uses of the same register. If the use
@@ -1887,7 +2063,7 @@
/// getRepresentativeReg - Find the largest super register of the specified
/// physical register.
unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
- // Find the largest super-register that is allocatable.
+ // Find the largest super-register that is allocatable.
unsigned BestReg = Reg;
for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
unsigned SuperReg = *AS;
@@ -1925,6 +2101,9 @@
unsigned PhysReg, VirtRegMap &vrm) {
unsigned SpillReg = getRepresentativeReg(PhysReg);
+ DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
+ << " represented by " << tri_->getName(SpillReg) << '\n');
+
for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
// If there are registers which alias PhysReg, but which are not a
// sub-register of the chosen representative super register. Assert
@@ -1936,15 +2115,16 @@
SmallVector<unsigned, 4> PRegs;
if (hasInterval(SpillReg))
PRegs.push_back(SpillReg);
- else {
- SmallSet<unsigned, 4> Added;
- for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS)
- if (Added.insert(*AS) && hasInterval(*AS)) {
- PRegs.push_back(*AS);
- for (const unsigned* ASS = tri_->getSubRegisters(*AS); *ASS; ++ASS)
- Added.insert(*ASS);
- }
- }
+ for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
+ if (hasInterval(*SR))
+ PRegs.push_back(*SR);
+
+ DEBUG({
+ dbgs() << "Trying to spill:";
+ for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
+ dbgs() << ' ' << tri_->getName(PRegs[i]);
+ dbgs() << '\n';
+ });
SmallPtrSet<MachineInstr*, 8> SeenMIs;
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
@@ -1955,32 +2135,34 @@
continue;
SeenMIs.insert(MI);
SlotIndex Index = getInstructionIndex(MI);
+ bool LiveReg = false;
for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
unsigned PReg = PRegs[i];
LiveInterval &pli = getInterval(PReg);
if (!pli.liveAt(Index))
continue;
- vrm.addEmergencySpill(PReg, MI);
+ LiveReg = true;
SlotIndex StartIdx = Index.getLoadIndex();
SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
- if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
- pli.removeRange(StartIdx, EndIdx);
- Cut = true;
- } else {
+ if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
std::string msg;
raw_string_ostream Msg(msg);
Msg << "Ran out of registers during register allocation!";
+ if (MI->isInlineAsm()) {
+ Msg << "\nPlease check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(Msg, tm_);
+ }
report_fatal_error(Msg.str());
}
- for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) {
- if (!hasInterval(*AS))
- continue;
- LiveInterval &spli = getInterval(*AS);
- if (spli.liveAt(Index))
- spli.removeRange(Index.getLoadIndex(),
- Index.getNextIndex().getBaseIndex());
- }
+ pli.removeRange(StartIdx, EndIdx);
+ LiveReg = true;
}
+ if (!LiveReg)
+ continue;
+ DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
+ vrm.addEmergencySpill(SpillReg, MI);
+ Cut = true;
}
return Cut;
}
@@ -1990,13 +2172,13 @@
LiveInterval& Interval = getOrCreateInterval(reg);
VNInfo* VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
- startInst, true, getVNInfoAllocator());
+ startInst, getVNInfoAllocator());
VN->setHasPHIKill(true);
LiveRange LR(
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
getMBBEndIdx(startInst->getParent()), VN);
Interval.addRange(LR);
-
+
return LR;
}
diff --git a/src/LLVM/lib/CodeGen/LiveIntervalUnion.cpp b/src/LLVM/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 0000000..110fe1e
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,209 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Insert each of the virtual register's live segments into the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ while (SegPos.valid()) {
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+ if (++RegPos == RegEnd)
+ return;
+ SegPos.advanceTo(RegPos->start);
+ }
+
+ // We have reached the end of Segments, so it is no longer necessary to search
+ // for the insertion position.
+ // It is faster to insert the end first.
+ --RegEnd;
+ SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg);
+ for (; RegPos != RegEnd; ++RegPos, ++SegPos)
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Remove each of the virtual register's live segments from the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ for (;;) {
+ assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+ SegPos.erase();
+ if (!SegPos.valid())
+ return;
+
+ // Skip all segments that may have been coalesced.
+ RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+ if (RegPos == RegEnd)
+ return;
+
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ OS << "LIU " << PrintReg(RepReg, TRI);
+ if (empty()) {
+ OS << " empty\n";
+ return;
+ }
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+ << PrintReg(SI.value()->reg, TRI);
+ }
+ OS << '\n';
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+ for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+ VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+ SmallVectorImpl<LiveInterval*>::const_iterator I =
+ std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+ return I != InterferingVRegs.end();
+}
+
+// Collect virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The query state is one of:
+//
+// 1. CheckedFirstInterference == false: Iterators are uninitialized.
+// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.
+// 3. Iterators left at the last seen intersection.
+//
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+ // Fast path return if we already have the desired information.
+ if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+
+ // Set up iterators on the first call.
+ if (!CheckedFirstInterference) {
+ CheckedFirstInterference = true;
+
+ // Quickly skip interference check for empty sets.
+ if (VirtReg->empty() || LiveUnion->empty()) {
+ SeenAllInterferences = true;
+ return 0;
+ }
+
+ // In most cases, the union will start before VirtReg.
+ VirtRegI = VirtReg->begin();
+ LiveUnionI.setMap(LiveUnion->getMap());
+ LiveUnionI.find(VirtRegI->start);
+ }
+
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveInterval *RecentReg = 0;
+ while (LiveUnionI.valid()) {
+ assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg");
+
+ // Check for overlapping interference.
+ while (VirtRegI->start < LiveUnionI.stop() &&
+ VirtRegI->end > LiveUnionI.start()) {
+ // This is an overlap, record the interfering register.
+ LiveInterval *VReg = LiveUnionI.value();
+ if (VReg != RecentReg && !isSeenInterference(VReg)) {
+ RecentReg = VReg;
+ InterferingVRegs.push_back(VReg);
+ if (InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+ }
+ // This LiveUnion segment is no longer interesting.
+ if (!(++LiveUnionI).valid()) {
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+ }
+ }
+
+ // The iterators are now not overlapping, LiveUnionI has been advanced
+ // beyond VirtRegI.
+ assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap");
+
+ // Advance the iterator that ends first.
+ VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start());
+ if (VirtRegI == VirtRegEnd)
+ break;
+
+ // Detect overlap, handle above.
+ if (VirtRegI->start < LiveUnionI.stop())
+ continue;
+
+ // Still not overlapping. Catch up LiveUnionI.
+ LiveUnionI.advanceTo(VirtRegI->start);
+ }
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+}
+
+bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
+ // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
+ // overlaps.
+ IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
+ Overlaps(LiveUnion->getMap(), Loop->getMap());
+ if (!Overlaps.valid())
+ return false;
+
+ // The loop is overlapping an LIU assignment. Check VirtReg as well.
+ LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
+
+ for (;;) {
+ if (VRI == VirtReg->end())
+ return false;
+ if (VRI->start < Overlaps.stop())
+ return true;
+
+ Overlaps.advanceTo(VRI->start);
+ if (!Overlaps.valid())
+ return false;
+ if (Overlaps.start() < VRI->end)
+ return true;
+
+ VRI = VirtReg->advanceTo(VRI, Overlaps.start());
+ }
+}
diff --git a/src/LLVM/lib/CodeGen/LiveIntervalUnion.h b/src/LLVM/lib/CodeGen/LiveIntervalUnion.h
new file mode 100644
index 0000000..5d64d28
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LiveIntervalUnion.h
@@ -0,0 +1,192 @@
+//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion is a union of live segments across multiple live virtual
+// registers. This may be used during coalescing to represent a congruence
+// class, or during register allocation to model liveness of a physical
+// register.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
+#define LLVM_CODEGEN_LIVEINTERVALUNION
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+#include <algorithm>
+
+namespace llvm {
+
+class MachineLoopRange;
+class TargetRegisterInfo;
+
+#ifndef NDEBUG
+// forward declaration
+template <unsigned Element> class SparseBitVector;
+typedef SparseBitVector<128> LiveVirtRegBitSet;
+#endif
+
+/// Compare a live virtual register segment to a LiveIntervalUnion segment.
+inline bool
+overlap(const LiveRange &VRSeg,
+ const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
+ return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
+}
+
+/// Union of live intervals that are strong candidates for coalescing into a
+/// single register (either physical or virtual depending on the context). We
+/// expect the constituent live intervals to be disjoint, although we may
+/// eventually make exceptions to handle value-based interference.
+class LiveIntervalUnion {
+ // A set of live virtual register segments that supports fast insertion,
+ // intersection, and removal.
+ // Mapping SlotIndex intervals to virtual register numbers.
+ typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
+
+public:
+ // SegmentIter can advance to the next segment ordered by starting position
+ // which may belong to a different live virtual register. We also must be able
+ // to reach the current segment's containing virtual register.
+ typedef LiveSegments::iterator SegmentIter;
+
+ // LiveIntervalUnions share an external allocator.
+ typedef LiveSegments::Allocator Allocator;
+
+ class Query;
+
+private:
+ const unsigned RepReg; // representative register number
+ unsigned Tag; // unique tag for current contents.
+ LiveSegments Segments; // union of virtual reg segments
+
+public:
+ LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
+ {}
+
+ // Iterate over all segments in the union of live virtual registers ordered
+ // by their starting position.
+ SegmentIter begin() { return Segments.begin(); }
+ SegmentIter end() { return Segments.end(); }
+ SegmentIter find(SlotIndex x) { return Segments.find(x); }
+ bool empty() const { return Segments.empty(); }
+ SlotIndex startIndex() const { return Segments.start(); }
+
+ // Provide public access to the underlying map to allow overlap iteration.
+ typedef LiveSegments Map;
+ const Map &getMap() { return Segments; }
+
+ /// getTag - Return an opaque tag representing the current state of the union.
+ unsigned getTag() const { return Tag; }
+
+ /// changedSince - Return true if the union change since getTag returned tag.
+ bool changedSince(unsigned tag) const { return tag != Tag; }
+
+ // Add a live virtual register to this union and merge its segments.
+ void unify(LiveInterval &VirtReg);
+
+ // Remove a live virtual register's segments from this union.
+ void extract(LiveInterval &VirtReg);
+
+ // Remove all inserted virtual registers.
+ void clear() { Segments.clear(); ++Tag; }
+
+ // Print union, using TRI to translate register names
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+
+#ifndef NDEBUG
+ // Verify the live intervals in this union and add them to the visited set.
+ void verify(LiveVirtRegBitSet& VisitedVRegs);
+#endif
+
+ /// Query interferences between a single live virtual register and a live
+ /// interval union.
+ class Query {
+ LiveIntervalUnion *LiveUnion;
+ LiveInterval *VirtReg;
+ LiveInterval::iterator VirtRegI; // current position in VirtReg
+ SegmentIter LiveUnionI; // current position in LiveUnion
+ SmallVector<LiveInterval*,4> InterferingVRegs;
+ bool CheckedFirstInterference;
+ bool SeenAllInterferences;
+ bool SeenUnspillableVReg;
+ unsigned Tag, UserTag;
+
+ public:
+ Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {}
+
+ Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
+ LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
+ SeenAllInterferences(false), SeenUnspillableVReg(false)
+ {}
+
+ void clear() {
+ LiveUnion = NULL;
+ VirtReg = NULL;
+ InterferingVRegs.clear();
+ CheckedFirstInterference = false;
+ SeenAllInterferences = false;
+ SeenUnspillableVReg = false;
+ Tag = 0;
+ UserTag = 0;
+ }
+
+ void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) {
+ assert(VReg && LIU && "Invalid arguments");
+ if (UserTag == UTag && VirtReg == VReg &&
+ LiveUnion == LIU && !LIU->changedSince(Tag)) {
+ // Retain cached results, e.g. firstInterference.
+ return;
+ }
+ clear();
+ LiveUnion = LIU;
+ VirtReg = VReg;
+ Tag = LIU->getTag();
+ UserTag = UTag;
+ }
+
+ LiveInterval &virtReg() const {
+ assert(VirtReg && "uninitialized");
+ return *VirtReg;
+ }
+
+ // Does this live virtual register interfere with the union?
+ bool checkInterference() { return collectInterferingVRegs(1); }
+
+ // Count the virtual registers in this union that interfere with this
+ // query's live virtual register, up to maxInterferingRegs.
+ unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+
+ // Was this virtual register visited during collectInterferingVRegs?
+ bool isSeenInterference(LiveInterval *VReg) const;
+
+ // Did collectInterferingVRegs collect all interferences?
+ bool seenAllInterferences() const { return SeenAllInterferences; }
+
+ // Did collectInterferingVRegs encounter an unspillable vreg?
+ bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
+
+ // Vector generated by collectInterferingVRegs.
+ const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
+ return InterferingVRegs;
+ }
+
+ /// checkLoopInterference - Return true if there is interference overlapping
+ /// Loop.
+ bool checkLoopInterference(MachineLoopRange*);
+
+ private:
+ Query(const Query&); // DO NOT IMPLEMENT
+ void operator=(const Query&); // DO NOT IMPLEMENT
+ };
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/src/LLVM/lib/CodeGen/LiveRangeCalc.cpp b/src/LLVM/lib/CodeGen/LiveRangeCalc.cpp
new file mode 100644
index 0000000..a7d5af5
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LiveRangeCalc.cpp
@@ -0,0 +1,270 @@
+//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the LiveRangeCalc class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveRangeCalc.h"
+#include "llvm/CodeGen/MachineDominators.h"
+
+using namespace llvm;
+
+void LiveRangeCalc::reset(const MachineFunction *MF) {
+ unsigned N = MF->getNumBlockIDs();
+ Seen.clear();
+ Seen.resize(N);
+ LiveOut.resize(N);
+ LiveIn.clear();
+}
+
+
+// Transfer information from the LiveIn vector to the live ranges.
+void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) {
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
+ E = LiveIn.end(); I != E; ++I) {
+ if (!I->DomNode)
+ continue;
+ MachineBasicBlock *MBB = I->DomNode->getBlock();
+
+ VNInfo *VNI = OverrideVNI ? OverrideVNI : I->Value;
+ assert(VNI && "No live-in value found");
+
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(MBB);
+
+ if (I->Kill.isValid())
+ I->LI->addRange(LiveRange(Start, I->Kill, VNI));
+ else {
+ I->LI->addRange(LiveRange(Start, End, VNI));
+ // The value is live-through, update LiveOut as well. Defer the Domtree
+ // lookup until it is needed.
+ assert(Seen.test(MBB->getNumber()));
+ LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0);
+ }
+ }
+ LiveIn.clear();
+}
+
+
+void LiveRangeCalc::extend(LiveInterval *LI,
+ SlotIndex Kill,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc) {
+ assert(LI && "Missing live range");
+ assert(Kill.isValid() && "Invalid SlotIndex");
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
+ assert(Kill && "No MBB at Kill");
+
+ // Is there a def in the same MBB we can extend?
+ if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
+ return;
+
+ // Find the single reaching def, or determine if Kill is jointly dominated by
+ // multiple values, and we may need to create even more phi-defs to preserve
+ // VNInfo SSA form. Perform a search for all predecessor blocks where we
+ // know the dominating VNInfo.
+ VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, Indexes, DomTree);
+
+ // When there were multiple different values, we may need new PHIs.
+ if (!VNI)
+ updateSSA(Indexes, DomTree, Alloc);
+
+ updateLiveIns(VNI, Indexes);
+}
+
+
+// This function is called by a client after using the low-level API to add
+// live-out and live-in blocks. The unique value optimization is not
+// available, SplitEditor::transferValues handles that case directly anyway.
+void LiveRangeCalc::calculateValues(SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc) {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+ updateSSA(Indexes, DomTree, Alloc);
+ updateLiveIns(0, Indexes);
+}
+
+
+VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree) {
+ // Blocks where LI should be live-in.
+ SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
+
+ // Remember if we have seen more than one value.
+ bool UniqueVNI = true;
+ VNInfo *TheVNI = 0;
+
+ // Using Seen as a visited set, perform a BFS for all reaching defs.
+ for (unsigned i = 0; i != WorkList.size(); ++i) {
+ MachineBasicBlock *MBB = WorkList[i];
+ assert(!MBB->pred_empty() && "Value live-in to entry block?");
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+
+ // Is this a known live-out block?
+ if (Seen.test(Pred->getNumber())) {
+ if (VNInfo *VNI = LiveOut[Pred].first) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ }
+ continue;
+ }
+
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(Pred);
+
+ // First time we see Pred. Try to determine the live-out value, but set
+ // it as null if Pred is live-through with an unknown value.
+ VNInfo *VNI = LI->extendInBlock(Start, End);
+ setLiveOutValue(Pred, VNI);
+ if (VNI) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ continue;
+ }
+
+ // No, we need a live-in value for Pred as well
+ if (Pred != KillMBB)
+ WorkList.push_back(Pred);
+ else
+ // Loopback to KillMBB, so value is really live through.
+ Kill = SlotIndex();
+ }
+ }
+
+ // Transfer WorkList to LiveInBlocks in reverse order.
+ // This ordering works best with updateSSA().
+ LiveIn.clear();
+ LiveIn.reserve(WorkList.size());
+ while(!WorkList.empty())
+ addLiveInBlock(LI, DomTree->getNode(WorkList.pop_back_val()));
+
+ // The kill block may not be live-through.
+ assert(LiveIn.back().DomNode->getBlock() == KillMBB);
+ LiveIn.back().Kill = Kill;
+
+ return UniqueVNI ? TheVNI : 0;
+}
+
+
+// This is essentially the same iterative algorithm that SSAUpdater uses,
+// except we already have a dominator tree, so we don't have to recompute it.
+void LiveRangeCalc::updateSSA(SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc) {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ // Interate until convergence.
+ unsigned Changes;
+ do {
+ Changes = 0;
+ // Propagate live-out values down the dominator tree, inserting phi-defs
+ // when necessary.
+ for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
+ E = LiveIn.end(); I != E; ++I) {
+ MachineDomTreeNode *Node = I->DomNode;
+ // Skip block if the live-in value has already been determined.
+ if (!Node)
+ continue;
+ MachineBasicBlock *MBB = Node->getBlock();
+ MachineDomTreeNode *IDom = Node->getIDom();
+ LiveOutPair IDomValue;
+
+ // We need a live-in value to a block with no immediate dominator?
+ // This is probably an unreachable block that has survived somehow.
+ bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber());
+
+ // IDom dominates all of our predecessors, but it may not be their
+ // immediate dominator. Check if any of them have live-out values that are
+ // properly dominated by IDom. If so, we need a phi-def here.
+ if (!needPHI) {
+ IDomValue = LiveOut[IDom->getBlock()];
+
+ // Cache the DomTree node that defined the value.
+ if (IDomValue.first && !IDomValue.second)
+ LiveOut[IDom->getBlock()].second = IDomValue.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ LiveOutPair &Value = LiveOut[*PI];
+ if (!Value.first || Value.first == IDomValue.first)
+ continue;
+
+ // Cache the DomTree node that defined the value.
+ if (!Value.second)
+ Value.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def));
+
+ // This predecessor is carrying something other than IDomValue.
+ // It could be because IDomValue hasn't propagated yet, or it could be
+ // because MBB is in the dominance frontier of that value.
+ if (DomTree->dominates(IDom, Value.second)) {
+ needPHI = true;
+ break;
+ }
+ }
+ }
+
+ // The value may be live-through even if Kill is set, as can happen when
+ // we are called from extendRange. In that case LiveOutSeen is true, and
+ // LiveOut indicates a foreign or missing value.
+ LiveOutPair &LOP = LiveOut[MBB];
+
+ // Create a phi-def if required.
+ if (needPHI) {
+ ++Changes;
+ assert(Alloc && "Need VNInfo allocator to create PHI-defs");
+ SlotIndex Start, End;
+ tie(Start, End) = Indexes->getMBBRange(MBB);
+ VNInfo *VNI = I->LI->getNextValue(Start, 0, *Alloc);
+ VNI->setIsPHIDef(true);
+ I->Value = VNI;
+ // This block is done, we know the final value.
+ I->DomNode = 0;
+
+ // Add liveness since updateLiveIns now skips this node.
+ if (I->Kill.isValid())
+ I->LI->addRange(LiveRange(Start, I->Kill, VNI));
+ else {
+ I->LI->addRange(LiveRange(Start, End, VNI));
+ LOP = LiveOutPair(VNI, Node);
+ }
+ } else if (IDomValue.first) {
+ // No phi-def here. Remember incoming value.
+ I->Value = IDomValue.first;
+
+ // If the IDomValue is killed in the block, don't propagate through.
+ if (I->Kill.isValid())
+ continue;
+
+ // Propagate IDomValue if it isn't killed:
+ // MBB is live-out and doesn't define its own value.
+ if (LOP.first == IDomValue.first)
+ continue;
+ ++Changes;
+ LOP = IDomValue;
+ }
+ }
+ } while (Changes);
+}
diff --git a/src/LLVM/lib/CodeGen/LiveRangeCalc.h b/src/LLVM/lib/CodeGen/LiveRangeCalc.h
new file mode 100644
index 0000000..b8c8585
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LiveRangeCalc.h
@@ -0,0 +1,226 @@
+//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeCalc class can be used to compute live ranges from scratch. It
+// caches information about values in the CFG to speed up repeated operations
+// on the same live range. The cache can be shared by non-overlapping live
+// ranges. SplitKit uses that when computing the live range of split products.
+//
+// A low-level interface is available to clients that know where a variable is
+// live, but don't know which value it has as every point. LiveRangeCalc will
+// propagate values down the dominator tree, and even insert PHI-defs where
+// needed. SplitKit uses this faster interface when possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGECALC_H
+#define LLVM_CODEGEN_LIVERANGECALC_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+/// Forward declarations for MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
+class LiveRangeCalc {
+ /// Seen - Bit vector of active entries in LiveOut, also used as a visited
+ /// set by findReachingDefs. One entry per basic block, indexed by block
+ /// number. This is kept as a separate bit vector because it can be cleared
+ /// quickly when switching live ranges.
+ BitVector Seen;
+
+ /// LiveOutPair - A value and the block that defined it. The domtree node is
+ /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
+ typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+
+ /// LiveOutMap - Map basic blocks to the value leaving the block.
+ typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+
+ /// LiveOut - Map each basic block where a live range is live out to the
+ /// live-out value and its defining block.
+ ///
+ /// For every basic block, MBB, one of these conditions shall be true:
+ ///
+ /// 1. !Seen.count(MBB->getNumber())
+ /// Blocks without a Seen bit are ignored.
+ /// 2. LiveOut[MBB].second.getNode() == MBB
+ /// The live-out value is defined in MBB.
+ /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB]
+ /// The live-out value passses through MBB. All predecessors must carry
+ /// the same value.
+ ///
+ /// The domtree node may be null, it can be computed.
+ ///
+ /// The map can be shared by multiple live ranges as long as no two are
+ /// live-out of the same block.
+ LiveOutMap LiveOut;
+
+ /// LiveInBlock - Information about a basic block where a live range is known
+ /// to be live-in, but the value has not yet been determined.
+ struct LiveInBlock {
+ // LI - The live range that is live-in to this block. The algorithms can
+ // handle multiple non-overlapping live ranges simultaneously.
+ LiveInterval *LI;
+
+ // DomNode - Dominator tree node for the block.
+ // Cleared when the final value has been determined and LI has been updated.
+ MachineDomTreeNode *DomNode;
+
+ // Position in block where the live-in range ends, or SlotIndex() if the
+ // range passes through the block. When the final value has been
+ // determined, the range from the block start to Kill will be added to LI.
+ SlotIndex Kill;
+
+ // Live-in value filled in by updateSSA once it is known.
+ VNInfo *Value;
+
+ LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill)
+ : LI(li), DomNode(node), Kill(kill), Value(0) {}
+ };
+
+ /// LiveIn - Work list of blocks where the live-in value has yet to be
+ /// determined. This list is typically computed by findReachingDefs() and
+ /// used as a work list by updateSSA(). The low-level interface may also be
+ /// used to add entries directly.
+ SmallVector<LiveInBlock, 16> LiveIn;
+
+ /// findReachingDefs - Assuming that LI is live-in to KillMBB and killed at
+ /// Kill, search for values that can reach KillMBB. All blocks that need LI
+ /// to be live-in are added to LiveIn. If a unique reaching def is found,
+ /// its value is returned, if Kill is jointly dominated by multiple values,
+ /// NULL is returned.
+ VNInfo *findReachingDefs(LiveInterval *LI,
+ MachineBasicBlock *KillMBB,
+ SlotIndex Kill,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree);
+
+ /// updateSSA - Compute the values that will be live in to all requested
+ /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
+ ///
+ /// Every live-in block must be jointly dominated by the added live-out
+ /// blocks. No values are read from the live ranges.
+ void updateSSA(SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc);
+
+ /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI
+ /// as a wildcard value for LiveIn entries without a value.
+ void updateLiveIns(VNInfo *VNI, SlotIndexes*);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // High-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Calculate live ranges from scratch.
+ //
+
+ /// reset - Prepare caches for a new set of non-overlapping live ranges. The
+ /// caches must be reset before attempting calculations with a live range
+ /// that may overlap a previously computed live range, and before the first
+ /// live range in a function. If live ranges are not known to be
+ /// non-overlapping, call reset before each.
+ void reset(const MachineFunction *MF);
+
+ /// calculate - Calculate the live range of a virtual register from its defs
+ /// and uses. LI must be empty with no values.
+ void calculate(LiveInterval *LI,
+ MachineRegisterInfo *MRI,
+ SlotIndexes *Indexes,
+ VNInfo::Allocator *Alloc);
+
+ //===--------------------------------------------------------------------===//
+ // Mid-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Modify existing live ranges.
+ //
+
+ /// extend - Extend the live range of LI to reach Kill.
+ ///
+ /// The existing values in LI must be live so they jointly dominate Kill. If
+ /// Kill is not dominated by a single existing value, PHI-defs are inserted
+ /// as required to preserve SSA form. If Kill is known to be dominated by a
+ /// single existing value, Alloc may be null.
+ void extend(LiveInterval *LI,
+ SlotIndex Kill,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc);
+
+ /// extendToUses - Extend the live range of LI to reach all uses.
+ ///
+ /// All uses must be jointly dominated by existing liveness. PHI-defs are
+ /// inserted as needed to preserve SSA form.
+ void extendToUses(LiveInterval *LI,
+ MachineRegisterInfo *MRI,
+ SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc);
+
+ //===--------------------------------------------------------------------===//
+ // Low-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // These functions can be used to compute live ranges where the live-in and
+ // live-out blocks are already known, but the SSA value in each block is
+ // unknown.
+ //
+ // After calling reset(), add known live-out values and known live-in blocks.
+ // Then call calculateValues() to compute the actual value that is
+ // live-in to each block, and add liveness to the live ranges.
+ //
+
+ /// setLiveOutValue - Indicate that VNI is live out from MBB. The
+ /// calculateValues() function will not add liveness for MBB, the caller
+ /// should take care of that.
+ ///
+ /// VNI may be null only if MBB is a live-through block also passed to
+ /// addLiveInBlock().
+ void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) {
+ Seen.set(MBB->getNumber());
+ LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0);
+ }
+
+ /// addLiveInBlock - Add a block with an unknown live-in value. This
+ /// function can only be called once per basic block. Once the live-in value
+ /// has been determined, calculateValues() will add liveness to LI.
+ ///
+ /// @param LI The live range that is live-in to the block.
+ /// @param DomNode The domtree node for the block.
+ /// @param Kill Index in block where LI is killed. If the value is
+ /// live-through, set Kill = SLotIndex() and also call
+ /// setLiveOutValue(MBB, 0).
+ void addLiveInBlock(LiveInterval *LI,
+ MachineDomTreeNode *DomNode,
+ SlotIndex Kill = SlotIndex()) {
+ LiveIn.push_back(LiveInBlock(LI, DomNode, Kill));
+ }
+
+ /// calculateValues - Calculate the value that will be live-in to each block
+ /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA
+ /// form. Add liveness to all live-in blocks up to the Kill point, or the
+ /// whole block for live-through blocks.
+ ///
+ /// Every predecessor of a live-in block must have been given a value with
+ /// setLiveOutValue, the value may be null for live-trough blocks.
+ void calculateValues(SlotIndexes *Indexes,
+ MachineDominatorTree *DomTree,
+ VNInfo::Allocator *Alloc);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/src/LLVM/lib/CodeGen/LiveRangeEdit.cpp b/src/LLVM/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 0000000..b23f851
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,330 @@
+//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
+STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
+STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
+
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
+ LiveIntervals &LIS,
+ VirtRegMap &VRM) {
+ MachineRegisterInfo &MRI = VRM.getRegInfo();
+ unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ VRM.grow();
+ VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg));
+ LiveInterval &LI = LIS.getOrCreateInterval(VReg);
+ newRegs_.push_back(&LI);
+ return LI;
+}
+
+bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
+ const MachineInstr *DefMI,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ assert(DefMI && "Missing instruction");
+ scannedRemattable_ = true;
+ if (!tii.isTriviallyReMaterializable(DefMI, aa))
+ return false;
+ remattable_.insert(VNI);
+ return true;
+}
+
+void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ for (LiveInterval::vni_iterator I = parent_.vni_begin(),
+ E = parent_.vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+ if (!DefMI)
+ continue;
+ checkRematerializable(VNI, DefMI, tii, aa);
+ }
+ scannedRemattable_ = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ if (!scannedRemattable_)
+ scanRemattable(lis, tii, aa);
+ return !remattable_.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx,
+ LiveIntervals &lis) {
+ OrigIdx = OrigIdx.getUseIndex();
+ UseIdx = UseIdx.getUseIndex();
+ for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OrigMI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || MO.isDef())
+ continue;
+ // Reserved registers are OK.
+ if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+ continue;
+ // We cannot depend on virtual registers in uselessRegs_.
+ if (uselessRegs_)
+ for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui)
+ if ((*uselessRegs_)[ui]->reg == MO.getReg())
+ return false;
+
+ LiveInterval &li = lis.getInterval(MO.getReg());
+ const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+ if (OVNI != li.getVNInfoAt(UseIdx))
+ return false;
+ }
+ return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove,
+ LiveIntervals &lis) {
+ assert(scannedRemattable_ && "Call anyRematerializable first");
+
+ // Use scanRemattable info.
+ if (!remattable_.count(RM.ParentVNI))
+ return false;
+
+ // No defining instruction provided.
+ SlotIndex DefIdx;
+ if (RM.OrigMI)
+ DefIdx = lis.getInstructionIndex(RM.OrigMI);
+ else {
+ DefIdx = RM.ParentVNI->def;
+ RM.OrigMI = lis.getInstructionFromIndex(DefIdx);
+ assert(RM.OrigMI && "No defining instruction for remattable value");
+ }
+
+ // If only cheap remats were requested, bail out early.
+ if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+ return false;
+
+ // Verify that all used registers are available with the same values.
+ if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis))
+ return false;
+
+ return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri,
+ bool Late) {
+ assert(RM.OrigMI && "Invalid remat");
+ tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ rematted_.insert(RM.ParentVNI);
+ return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+ .getDefIndex();
+}
+
+void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) {
+ if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
+ LIS.removeInterval(Reg);
+}
+
+bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
+ SmallVectorImpl<MachineInstr*> &Dead,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineInstr *DefMI = 0, *UseMI = 0;
+
+ // Check that there is a single def and a single use.
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg),
+ E = MRI.reg_nodbg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MO.isDef()) {
+ if (DefMI && DefMI != MI)
+ return false;
+ if (!MI->getDesc().canFoldAsLoad())
+ return false;
+ DefMI = MI;
+ } else if (!MO.isUndef()) {
+ if (UseMI && UseMI != MI)
+ return false;
+ // FIXME: Targets don't know how to fold subreg uses.
+ if (MO.getSubReg())
+ return false;
+ UseMI = MI;
+ }
+ }
+ if (!DefMI || !UseMI)
+ return false;
+
+ DEBUG(dbgs() << "Try to fold single def: " << *DefMI
+ << " into single use: " << *UseMI);
+
+ SmallVector<unsigned, 8> Ops;
+ if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
+ return false;
+
+ MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI);
+ if (!FoldMI)
+ return false;
+ DEBUG(dbgs() << " folded: " << *FoldMI);
+ LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI);
+ UseMI->eraseFromParent();
+ DefMI->addRegisterDead(LI->reg, 0);
+ Dead.push_back(DefMI);
+ ++NumDCEFoldedLoads;
+ return true;
+}
+
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+ LiveIntervals &LIS, VirtRegMap &VRM,
+ const TargetInstrInfo &TII) {
+ SetVector<LiveInterval*,
+ SmallVector<LiveInterval*, 8>,
+ SmallPtrSet<LiveInterval*, 8> > ToShrink;
+ MachineRegisterInfo &MRI = VRM.getRegInfo();
+
+ for (;;) {
+ // Erase all dead defs.
+ while (!Dead.empty()) {
+ MachineInstr *MI = Dead.pop_back_val();
+ assert(MI->allDefsAreDead() && "Def isn't really dead");
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+
+ // Never delete inline asm.
+ if (MI->isInlineAsm()) {
+ DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+ continue;
+ }
+
+ // Use the same criteria as DeadMachineInstructionElim.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(&TII, 0, SawStore)) {
+ DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+ continue;
+ }
+
+ DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+
+ // Check for live intervals that may shrink
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg())
+ continue;
+ unsigned Reg = MOI->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ LiveInterval &LI = LIS.getInterval(Reg);
+
+ // Shrink read registers, unless it is likely to be expensive and
+ // unlikely to change anything. We typically don't want to shrink the
+ // PIC base register that has lots of uses everywhere.
+ // Always shrink COPY uses that probably come from live range splitting.
+ if (MI->readsVirtualRegister(Reg) &&
+ (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
+ LI.killedAt(Idx)))
+ ToShrink.insert(&LI);
+
+ // Remove defined value.
+ if (MOI->isDef()) {
+ if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
+ if (delegate_)
+ delegate_->LRE_WillShrinkVirtReg(LI.reg);
+ LI.removeValNo(VNI);
+ if (LI.empty()) {
+ ToShrink.remove(&LI);
+ eraseVirtReg(Reg, LIS);
+ }
+ }
+ }
+ }
+
+ if (delegate_)
+ delegate_->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
+
+ if (ToShrink.empty())
+ break;
+
+ // Shrink just one live interval. Then delete new dead defs.
+ LiveInterval *LI = ToShrink.back();
+ ToShrink.pop_back();
+ if (foldAsLoad(LI, Dead, MRI, LIS, TII))
+ continue;
+ if (delegate_)
+ delegate_->LRE_WillShrinkVirtReg(LI->reg);
+ if (!LIS.shrinkToUses(LI, &Dead))
+ continue;
+
+ // LI may have been separated, create new intervals.
+ LI->RenumberValues(LIS);
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ unsigned NumComp = ConEQ.Classify(LI);
+ if (NumComp <= 1)
+ continue;
+ ++NumFracRanges;
+ bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg;
+ DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
+ SmallVector<LiveInterval*, 8> Dups(1, LI);
+ for (unsigned i = 1; i != NumComp; ++i) {
+ Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+ // If LI is an original interval that hasn't been split yet, make the new
+ // intervals their own originals instead of referring to LI. The original
+ // interval must contain all the split products, and LI doesn't.
+ if (IsOriginal)
+ VRM.setIsSplitFromReg(Dups.back()->reg, 0);
+ if (delegate_)
+ delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+ }
+ ConEQ.Distribute(&Dups[0], MRI);
+ }
+}
+
+void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+ LiveIntervals &LIS,
+ const MachineLoopInfo &Loops) {
+ VirtRegAuxInfo VRAI(MF, LIS, Loops);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ LiveInterval &LI = **I;
+ if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
+ DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
+ << MRI.getRegClass(LI.reg)->getName() << '\n');
+ VRAI.CalculateWeightAndHint(LI);
+ }
+}
diff --git a/src/LLVM/lib/CodeGen/LiveRangeEdit.h b/src/LLVM/lib/CodeGen/LiveRangeEdit.h
new file mode 100644
index 0000000..9b0a671
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LiveRangeEdit.h
@@ -0,0 +1,206 @@
+//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//
+// The parent register is never changed. Instead, a number of new virtual
+// registers are created and added to the newRegs vector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
+#define LLVM_CODEGEN_LIVERANGEEDIT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class VirtRegMap;
+
+class LiveRangeEdit {
+public:
+ /// Callback methods for LiveRangeEdit owners.
+ struct Delegate {
+ /// Called immediately before erasing a dead machine instruction.
+ virtual void LRE_WillEraseInstruction(MachineInstr *MI) {}
+
+ /// Called when a virtual register is no longer used. Return false to defer
+ /// its deletion from LiveIntervals.
+ virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
+
+ /// Called before shrinking the live range of a virtual register.
+ virtual void LRE_WillShrinkVirtReg(unsigned) {}
+
+ /// Called after cloning a virtual register.
+ /// This is used for new registers representing connected components of Old.
+ virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
+
+ virtual ~Delegate() {}
+ };
+
+private:
+ LiveInterval &parent_;
+ SmallVectorImpl<LiveInterval*> &newRegs_;
+ Delegate *const delegate_;
+ const SmallVectorImpl<LiveInterval*> *uselessRegs_;
+
+ /// firstNew_ - Index of the first register added to newRegs_.
+ const unsigned firstNew_;
+
+ /// scannedRemattable_ - true when remattable values have been identified.
+ bool scannedRemattable_;
+
+ /// remattable_ - Values defined by remattable instructions as identified by
+ /// tii.isTriviallyReMaterializable().
+ SmallPtrSet<const VNInfo*,4> remattable_;
+
+ /// rematted_ - Values that were actually rematted, and so need to have their
+ /// live range trimmed or entirely removed.
+ SmallPtrSet<const VNInfo*,4> rematted_;
+
+ /// scanRemattable - Identify the parent_ values that may rematerialize.
+ void scanRemattable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa);
+
+ /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+ /// OrigIdx are also available with the same value at UseIdx.
+ bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+ SlotIndex UseIdx, LiveIntervals &lis);
+
+ /// foldAsLoad - If LI has a single use and a single def that can be folded as
+ /// a load, eliminate the register by folding the def into the use.
+ bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead,
+ MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&);
+
+public:
+ /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
+ /// @param parent The register being spilled or split.
+ /// @param newRegs List to receive any new registers created. This needn't be
+ /// empty initially, any existing registers are ignored.
+ /// @param uselessRegs List of registers that can't be used when
+ /// rematerializing values because they are about to be removed.
+ LiveRangeEdit(LiveInterval &parent,
+ SmallVectorImpl<LiveInterval*> &newRegs,
+ Delegate *delegate = 0,
+ const SmallVectorImpl<LiveInterval*> *uselessRegs = 0)
+ : parent_(parent), newRegs_(newRegs),
+ delegate_(delegate),
+ uselessRegs_(uselessRegs),
+ firstNew_(newRegs.size()),
+ scannedRemattable_(false) {}
+
+ LiveInterval &getParent() const { return parent_; }
+ unsigned getReg() const { return parent_.reg; }
+
+ /// Iterator for accessing the new registers added by this edit.
+ typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
+ iterator begin() const { return newRegs_.begin()+firstNew_; }
+ iterator end() const { return newRegs_.end(); }
+ unsigned size() const { return newRegs_.size()-firstNew_; }
+ bool empty() const { return size() == 0; }
+ LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
+
+ ArrayRef<LiveInterval*> regs() const {
+ return makeArrayRef(newRegs_).slice(firstNew_);
+ }
+
+ /// FIXME: Temporary accessors until we can get rid of
+ /// LiveIntervals::AddIntervalsForSpills
+ SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; }
+ const SmallVectorImpl<LiveInterval*> *getUselessVRegs() {
+ return uselessRegs_;
+ }
+
+ /// createFrom - Create a new virtual register based on OldReg.
+ LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&);
+
+ /// create - Create a new register with the same class and original slot as
+ /// parent.
+ LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) {
+ return createFrom(getReg(), LIS, VRM);
+ }
+
+ /// anyRematerializable - Return true if any parent values may be
+ /// rematerializable.
+ /// This function must be called before any rematerialization is attempted.
+ bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
+ AliasAnalysis*);
+
+ /// checkRematerializable - Manually add VNI to the list of rematerializable
+ /// values if DefMI may be rematerializable.
+ bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
+ const TargetInstrInfo&, AliasAnalysis*);
+
+ /// Remat - Information needed to rematerialize at a specific location.
+ struct Remat {
+ VNInfo *ParentVNI; // parent_'s value at the remat location.
+ MachineInstr *OrigMI; // Instruction defining ParentVNI.
+ explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
+ };
+
+ /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
+ /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
+ /// When cheapAsAMove is set, only cheap remats are allowed.
+ bool canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove,
+ LiveIntervals &lis);
+
+ /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
+ /// instruction into MBB before MI. The new instruction is mapped, but
+ /// liveness is not updated.
+ /// Return the SlotIndex of the new instruction.
+ SlotIndex rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ LiveIntervals&,
+ const TargetInstrInfo&,
+ const TargetRegisterInfo&,
+ bool Late = false);
+
+ /// markRematerialized - explicitly mark a value as rematerialized after doing
+ /// it manually.
+ void markRematerialized(const VNInfo *ParentVNI) {
+ rematted_.insert(ParentVNI);
+ }
+
+ /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
+ bool didRematerialize(const VNInfo *ParentVNI) const {
+ return rematted_.count(ParentVNI);
+ }
+
+ /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
+ /// to erase it from LIS.
+ void eraseVirtReg(unsigned Reg, LiveIntervals &LIS);
+
+ /// eliminateDeadDefs - Try to delete machine instructions that are now dead
+ /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
+ /// and further dead efs to be eliminated.
+ void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+ LiveIntervals&, VirtRegMap&,
+ const TargetInstrInfo&);
+
+ /// calculateRegClassAndHint - Recompute register class and hint for each new
+ /// register.
+ void calculateRegClassAndHint(MachineFunction&, LiveIntervals&,
+ const MachineLoopInfo&);
+};
+
+}
+
+#endif
diff --git a/src/LLVM/lib/CodeGen/LiveStackAnalysis.cpp b/src/LLVM/lib/CodeGen/LiveStackAnalysis.cpp
index b5c385f..939e795 100644
--- a/src/LLVM/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/src/LLVM/lib/CodeGen/LiveStackAnalysis.cpp
@@ -26,7 +26,9 @@
char LiveStacks::ID = 0;
INITIALIZE_PASS(LiveStacks, "livestacks",
- "Live Stack Slot Analysis", false, false);
+ "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -42,12 +44,29 @@
S2RCMap.clear();
}
-bool LiveStacks::runOnMachineFunction(MachineFunction &) {
+bool LiveStacks::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getTarget().getRegisterInfo();
// FIXME: No analysis is being done right now. We are relying on the
// register allocators to provide the information.
return false;
}
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::iterator I = S2IMap.find(Slot);
+ if (I == S2IMap.end()) {
+ I = S2IMap.insert(I, std::make_pair(Slot,
+ LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+ S2RCMap.insert(std::make_pair(Slot, RC));
+ } else {
+ // Use the largest common subclass register class.
+ const TargetRegisterClass *OldRC = S2RCMap[Slot];
+ S2RCMap[Slot] = TRI->getCommonSubClass(OldRC, RC);
+ }
+ return I->second;
+}
+
/// print - Implement the dump method.
void LiveStacks::print(raw_ostream &OS, const Module*) const {
diff --git a/src/LLVM/lib/CodeGen/LiveVariables.cpp b/src/LLVM/lib/CodeGen/LiveVariables.cpp
index 1d935b2..2ca90f9 100644
--- a/src/LLVM/lib/CodeGen/LiveVariables.cpp
+++ b/src/LLVM/lib/CodeGen/LiveVariables.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -42,8 +41,11 @@
using namespace llvm;
char LiveVariables::ID = 0;
-INITIALIZE_PASS(LiveVariables, "livevars",
- "Live Variable Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -79,13 +81,7 @@
LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
"getVarInfo: not a virtual register!");
- RegIdx -= TargetRegisterInfo::FirstVirtualRegister;
- if (RegIdx >= VirtRegInfo.size()) {
- if (RegIdx >= 2*VirtRegInfo.size())
- VirtRegInfo.resize(RegIdx*2);
- else
- VirtRegInfo.resize(2*VirtRegInfo.size());
- }
+ VirtRegInfo.grow(RegIdx);
return VirtRegInfo[RegIdx];
}
@@ -111,9 +107,7 @@
// Mark the variable known alive in this bb
VRInfo.AliveBlocks.set(BBNum);
- for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(),
- E = MBB->pred_rend(); PI != E; ++PI)
- WorkList.push_back(*PI);
+ WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
}
void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
@@ -483,21 +477,6 @@
}
}
-namespace {
- struct RegSorter {
- const TargetRegisterInfo *TRI;
-
- RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { }
- bool operator()(unsigned A, unsigned B) {
- if (TRI->isSubRegister(A, B))
- return true;
- else if (TRI->isSubRegister(B, A))
- return false;
- return A < B;
- }
- };
-}
-
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MRI = &mf.getRegInfo();
@@ -513,9 +492,6 @@
std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
PHIJoins.clear();
- /// Get some space for a respectable number of registers.
- VirtRegInfo.resize(64);
-
analyzePHINodes(mf);
// Calculate live variable information in depth first order on the CFG of the
@@ -643,19 +619,14 @@
// Convert and transfer the dead / killed information we have gathered into
// VirtRegInfo onto MI's.
- for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
- for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j)
- if (VirtRegInfo[i].Kills[j] ==
- MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister))
- VirtRegInfo[i]
- .Kills[j]->addRegisterDead(i +
- TargetRegisterInfo::FirstVirtualRegister,
- TRI);
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+ const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+ VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
else
- VirtRegInfo[i]
- .Kills[j]->addRegisterKilled(i +
- TargetRegisterInfo::FirstVirtualRegister,
- TRI);
+ VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+ }
// Check to make sure there are no unreachable blocks in the MC CFG for the
// function. If so, it is due to a bug in the instruction selector or some
@@ -691,7 +662,7 @@
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
bool removed = getVarInfo(Reg).removeKill(MI);
assert(removed && "kill not in register's VarInfo?");
- removed = true;
+ (void)removed;
}
}
}
@@ -734,7 +705,7 @@
// Loop over all of the successors of the basic block, checking to see if
// the value is either live in the block, or if it is killed in the block.
- std::vector<MachineBasicBlock*> OpSuccBlocks;
+ SmallVector<MachineBasicBlock*, 8> OpSuccBlocks;
for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
E = MBB.succ_end(); SI != E; ++SI) {
MachineBasicBlock *SuccMBB = *SI;
@@ -790,8 +761,8 @@
getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
// Update info for all live variables
- for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
- E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
VarInfo &VI = getVarInfo(Reg);
if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
VI.AliveBlocks.set(NumNew);
diff --git a/src/LLVM/lib/CodeGen/LocalStackSlotAllocation.cpp b/src/LLVM/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 0000000..1318d62
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,359 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates they are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "localstackalloc"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+using namespace llvm;
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+ class FrameRef {
+ MachineBasicBlock::iterator MI; // Instr referencing the frame
+ int64_t LocalOffset; // Local offset of the frame idx referenced
+ public:
+ FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
+ MI(I), LocalOffset(Offset) {}
+ bool operator<(const FrameRef &RHS) const {
+ return LocalOffset < RHS.LocalOffset;
+ }
+ MachineBasicBlock::iterator getMachineInstr() { return MI; }
+ };
+
+ class LocalStackSlotPass: public MachineFunctionPass {
+ SmallVector<int64_t,16> LocalOffsets;
+
+ void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown, unsigned &MaxAlign);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ bool insertFrameReferenceRegisters(MachineFunction &Fn);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ const char *getPassName() const {
+ return "Local Stack Slot Allocation";
+ }
+
+ private:
+ };
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+
+FunctionPass *llvm::createLocalStackSlotAllocationPass() {
+ return new LocalStackSlotPass();
+}
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+
+ // If the target doesn't want/need this pass, or if there are no locals
+ // to consider, early exit.
+ if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+ return true;
+
+ // Make sure we have enough space to store the local offsets.
+ LocalOffsets.resize(MFI->getObjectIndexEnd());
+
+ // Lay out the local blob.
+ calculateFrameObjectOffsets(MF);
+
+ // Insert virtual base registers to resolve frame index references.
+ bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+ // Tell MFI whether any base registers were allocated. PEI will only
+ // want to use the local block allocations from this pass if there were any.
+ // Otherwise, PEI can do a bit better job of getting the alignment right
+ // without a hole at the start since it knows the alignment of the stack
+ // at the start of local allocation, and this pass doesn't.
+ MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+ return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+ int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+ DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+ << LocalOffset << "\n");
+ // Keep the offset available for base register allocation
+ LocalOffsets[FrameIdx] = LocalOffset;
+ // And tell MFI about it for PEI to use later
+ MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+
+ if (!StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ ++NumAllocations;
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int64_t Offset = 0;
+ unsigned MaxAlign = 0;
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+ StackGrowsDown, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (LargeStackObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ }
+
+ // Remember how big this blob of stack space is
+ MFI->setLocalFrameSize(Offset);
+ MFI->setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
+ std::pair<unsigned, int64_t> &RegOffset,
+ int64_t FrameSizeAdjust,
+ int64_t LocalFrameOffset,
+ const MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ unsigned e = Regs.size();
+ for (unsigned i = 0; i < e; ++i) {
+ RegOffset = Regs[i];
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
+ if (TRI->isFrameOffsetLegal(MI, Offset))
+ return true;
+ }
+ return false;
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+ // Scan the function's instructions looking for frame index references.
+ // For each, ask the target if it wants a virtual base register for it
+ // based on what we can tell it about where the local will end up in the
+ // stack frame. If it wants one, re-use a suitable one we've previously
+ // allocated, or if there isn't one that fits the bill, allocate a new one
+ // and ask the target to create a defining instruction for it.
+ bool UsedBaseReg = false;
+
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Collect all of the instructions in the block that reference
+ // a frame index. Also store the frame index referenced to ease later
+ // lookup. (For any insn that has more than one FI reference, we arbitrarily
+ // choose the first one).
+ SmallVector<FrameRef, 64> FrameReferenceInsns;
+
+ // A base register definition is a register + offset pair.
+ SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ MachineInstr *MI = I;
+
+ // Debug value instructions can't be out of range, so they don't need
+ // any updates.
+ if (MI->isDebugValue())
+ continue;
+
+ // For now, allocate the base register(s) within the basic block
+ // where they're used, and don't try to keep them around outside
+ // of that. It may be beneficial to try sharing them more broadly
+ // than that, but the increased register pressure makes that a
+ // tricky thing to balance. Investigate if re-materializing these
+ // becomes an issue.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(i).isFI()) {
+ // Don't try this with values not in the local block.
+ if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
+ break;
+ FrameReferenceInsns.
+ push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+ break;
+ }
+ }
+ }
+ }
+
+ // Sort the frame references by local offset
+ array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+
+ MachineBasicBlock *Entry = Fn.begin();
+
+ // Loop through the frame references and allocate for them as necessary.
+ for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+ MachineBasicBlock::iterator I =
+ FrameReferenceInsns[ref].getMachineInstr();
+ MachineInstr *MI = I;
+ for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(idx).isFI()) {
+ int FrameIdx = MI->getOperand(idx).getIndex();
+
+ assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ DEBUG(dbgs() << "Considering: " << *MI);
+ if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
+ unsigned BaseReg = 0;
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust =
+ StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+ DEBUG(dbgs() << " Replacing FI in: " << *MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ std::pair<unsigned, int64_t> RegOffset;
+ if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
+ FrameSizeAdjust,
+ LocalOffsets[FrameIdx],
+ MI, TRI)) {
+ DEBUG(dbgs() << " Reusing base register " <<
+ RegOffset.first << "\n");
+ // We found a register to reuse.
+ BaseReg = RegOffset.first;
+ Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
+ RegOffset.second;
+ } else {
+ // No previously defined register was in range, so create a
+ // new one.
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+ const TargetRegisterClass *RC = TRI->getPointerRegClass();
+ BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+ DEBUG(dbgs() << " Materializing base register " << BaseReg <<
+ " at frame local offset " <<
+ LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+ InstrOffset);
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
+ InstrOffset;
+ BaseRegisters.push_back(
+ std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
+ ++NumBaseRegisters;
+ UsedBaseReg = true;
+ }
+ assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(I, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << *MI);
+
+ ++NumReplacements;
+ }
+ }
+ }
+ }
+ return UsedBaseReg;
+}
diff --git a/src/LLVM/lib/CodeGen/MachineBasicBlock.cpp b/src/LLVM/lib/CodeGen/MachineBasicBlock.cpp
index c6db0f7..4c5fe4c 100644
--- a/src/LLVM/lib/CodeGen/MachineBasicBlock.cpp
+++ b/src/LLVM/lib/CodeGen/MachineBasicBlock.cpp
@@ -17,11 +17,11 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrDesc.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Assembly/Writer.h"
@@ -34,7 +34,7 @@
using namespace llvm;
MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
- : BB(bb), Number(-1), xParent(&mf), Alignment(0),
+ : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
AddressTaken(false) {
Insts.Parent = this;
}
@@ -60,7 +60,7 @@
return OS;
}
-/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
/// parent pointer of the MBB, the MBB numbering, and any instructions in the
/// MBB to be on the right operand list for registers.
///
@@ -92,7 +92,7 @@
void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
assert(N->getParent() == 0 && "machine instruction already in a basic block");
N->setParent(Parent);
-
+
// Add the instruction's register operands to their corresponding
// use/def lists.
MachineFunction *MF = Parent->getParent();
@@ -109,7 +109,7 @@
// Remove from the use/def lists.
N->RemoveRegOperandsFromUseLists();
-
+
N->setParent(0);
LeakDetector::addGarbageObject(N);
@@ -146,29 +146,48 @@
return I;
}
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+ ++I;
+ return I;
+}
+
MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
iterator I = end();
- while (I != begin() && (--I)->getDesc().isTerminator())
+ while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
; /*noop */
- if (I != end() && !I->getDesc().isTerminator()) ++I;
+ while (I != end() && !I->getDesc().isTerminator())
+ ++I;
return I;
}
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+ iterator B = begin(), I = end();
+ while (I != B) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+ // A block with a landing pad successor only has one other successor.
+ if (succ_size() > 2)
+ return 0;
+ for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+ if ((*I)->isLandingPad())
+ return *I;
+ return 0;
+}
+
void MachineBasicBlock::dump() const {
print(dbgs());
}
-static inline void OutputReg(raw_ostream &os, unsigned RegNo,
- const TargetRegisterInfo *TRI = 0) {
- if (RegNo != 0 && TargetRegisterInfo::isPhysicalRegister(RegNo)) {
- if (TRI)
- os << " %" << TRI->get(RegNo).Name;
- else
- os << " %physreg" << RegNo;
- } else
- os << " %reg" << RegNo;
-}
-
StringRef MachineBasicBlock::getName() const {
if (const BasicBlock *LBB = getBasicBlock())
return LBB->getName();
@@ -176,7 +195,7 @@
return "(null)";
}
-void MachineBasicBlock::print(raw_ostream &OS) const {
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
const MachineFunction *MF = getParent();
if (!MF) {
OS << "Can't print out MachineBasicBlock because parent MachineFunction"
@@ -186,6 +205,9 @@
if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
+ if (Indexes)
+ OS << Indexes->getMBBStartIdx(this) << '\t';
+
OS << "BB#" << getNumber() << ": ";
const char *Comma = "";
@@ -194,31 +216,40 @@
WriteAsOperand(OS, LBB, /*PrintType=*/false);
Comma = ", ";
}
+ if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
OS << '\n';
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
if (!livein_empty()) {
+ if (Indexes) OS << '\t';
OS << " Live Ins:";
for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
- OutputReg(OS, *I, TRI);
+ OS << ' ' << PrintReg(*I, TRI);
OS << '\n';
}
// Print the preds of this block according to the CFG.
if (!pred_empty()) {
+ if (Indexes) OS << '\t';
OS << " Predecessors according to CFG:";
for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
OS << " BB#" << (*PI)->getNumber();
OS << '\n';
}
-
+
for (const_iterator I = begin(); I != end(); ++I) {
+ if (Indexes) {
+ if (Indexes->hasIndex(I))
+ OS << Indexes->getInstructionIndex(I);
+ OS << '\t';
+ }
OS << '\t';
I->print(OS, &getParent()->getTarget());
}
// Print the successors of this block according to the CFG.
if (!succ_empty()) {
+ if (Indexes) OS << '\t';
OS << " Successors according to CFG:";
for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
OS << " BB#" << (*SI)->getNumber();
@@ -307,32 +338,70 @@
}
}
-void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
- Successors.push_back(succ);
- succ->addPredecessor(this);
-}
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
+
+ // If we see non-zero value for the first time it means we actually use Weight
+ // list, so we fill all Weights with 0's.
+ if (weight != 0 && Weights.empty())
+ Weights.resize(Successors.size());
+
+ if (weight != 0 || !Weights.empty())
+ Weights.push_back(weight);
+
+ Successors.push_back(succ);
+ succ->addPredecessor(this);
+ }
void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
succ->removePredecessor(this);
succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
assert(I != Successors.end() && "Not a current successor!");
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(I);
+ Weights.erase(WI);
+ }
+
Successors.erase(I);
}
-MachineBasicBlock::succ_iterator
+MachineBasicBlock::succ_iterator
MachineBasicBlock::removeSuccessor(succ_iterator I) {
assert(I != Successors.end() && "Not a current successor!");
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(I);
+ Weights.erase(WI);
+ }
+
(*I)->removePredecessor(this);
return Successors.erase(I);
}
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ uint32_t weight = 0;
+ succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!Weights.empty()) {
+ weight_iterator WI = getWeightIterator(SI);
+ weight = *WI;
+ }
+
+ // Update the successor information.
+ removeSuccessor(SI);
+ addSuccessor(New, weight);
+}
+
void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
Predecessors.push_back(pred);
}
void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
- std::vector<MachineBasicBlock *>::iterator I =
- std::find(Predecessors.begin(), Predecessors.end(), pred);
+ pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
Predecessors.erase(I);
}
@@ -340,10 +409,17 @@
void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
if (this == fromMBB)
return;
-
+
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
- addSuccessor(Succ);
+ uint32_t weight = 0;
+
+
+ // If Weight list is empty it means we don't use it (disabled optimization).
+ if (!fromMBB->Weights.empty())
+ weight = *fromMBB->Weights.begin();
+
+ addSuccessor(Succ, weight);
fromMBB->removeSuccessor(Succ);
}
}
@@ -352,7 +428,7 @@
MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
if (this == fromMBB)
return;
-
+
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
addSuccessor(Succ);
@@ -370,8 +446,7 @@
}
bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
- std::vector<MachineBasicBlock *>::const_iterator I =
- std::find(Successors.begin(), Successors.end(), MBB);
+ const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
return I != Successors.end();
}
@@ -430,21 +505,55 @@
MachineFunction *MF = getParent();
DebugLoc dl; // FIXME: this is nowhere
- // We may need to update this's terminator, but we can't do that if AnalyzeBranch
- // fails. If this uses a jump table, we won't touch it.
+ // We may need to update this's terminator, but we can't do that if
+ // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
return NULL;
+ // Avoid bugpoint weirdness: A block may end with a conditional branch but
+ // jumps to the same MBB is either case. We have duplicate CFG edges in that
+ // case that we can't handle. Since this never happens in properly optimized
+ // code, just skip those edges.
+ if (TBB && TBB == FBB) {
+ DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+ << getNumber() << '\n');
+ return NULL;
+ }
+
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
- DEBUG(dbgs() << "PHIElimination splitting critical edge:"
+ DEBUG(dbgs() << "Splitting critical edge:"
" BB#" << getNumber()
<< " -- BB#" << NMBB->getNumber()
<< " -- BB#" << Succ->getNumber() << '\n');
+ // On some targets like Mips, branches may kill virtual registers. Make sure
+ // that LiveVariables is properly updated after updateTerminator replaces the
+ // terminators.
+ LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();
+
+ // Collect a list of virtual registers killed by the terminators.
+ SmallVector<unsigned, 4> KilledRegs;
+ if (LV)
+ for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+ MachineInstr *MI = I;
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef())
+ continue;
+ unsigned Reg = OI->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ LV->getVarInfo(Reg).removeKill(MI)) {
+ KilledRegs.push_back(Reg);
+ DEBUG(dbgs() << "Removing terminator kill: " << *MI);
+ OI->setIsKill(false);
+ }
+ }
+ }
+
ReplaceUsesOfBlockWith(Succ, NMBB);
updateTerminator();
@@ -462,16 +571,56 @@
if (i->getOperand(ni+1).getMBB() == this)
i->getOperand(ni+1).setMBB(NMBB);
- if (LiveVariables *LV =
- P->getAnalysisIfAvailable<LiveVariables>())
+ // Inherit live-ins from the successor
+ for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
+ E = Succ->livein_end(); I != E; ++I)
+ NMBB->addLiveIn(*I);
+
+ // Update LiveVariables.
+ if (LV) {
+ // Restore kills of virtual registers that were killed by the terminators.
+ while (!KilledRegs.empty()) {
+ unsigned Reg = KilledRegs.pop_back_val();
+ for (iterator I = end(), E = begin(); I != E;) {
+ if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
+ continue;
+ LV->getVarInfo(Reg).Kills.push_back(I);
+ DEBUG(dbgs() << "Restored terminator kill: " << *I);
+ break;
+ }
+ }
+ // Update relevant live-through information.
LV->addNewBlock(NMBB, this, Succ);
+ }
if (MachineDominatorTree *MDT =
- P->getAnalysisIfAvailable<MachineDominatorTree>())
- MDT->addNewBlock(NMBB, this);
+ P->getAnalysisIfAvailable<MachineDominatorTree>()) {
+ // Update dominator information.
+ MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
- if (MachineLoopInfo *MLI =
- P->getAnalysisIfAvailable<MachineLoopInfo>())
+ bool IsNewIDom = true;
+ for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
+ PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (PredBB == NMBB)
+ continue;
+ if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
+ IsNewIDom = false;
+ break;
+ }
+ }
+
+ // We know "this" dominates the newly created basic block.
+ MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom)
+ MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
+ }
+
+ if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
if (MachineLoop *TIL = MLI->getLoopFor(this)) {
// If one or the other blocks were not in a loop, the new block is not
// either, and thus LI doesn't need to be updated.
@@ -538,15 +687,14 @@
}
// Update the successor information.
- removeSuccessor(Old);
- addSuccessor(New);
+ replaceSuccessor(Old, New);
}
/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be
/// null.
-///
+///
/// Besides DestA and DestB, retain other edges leading to LandingPads
/// (currently there can be only one; we don't check or require that here).
/// Note it is possible that DestA and/or DestB are LandingPads.
@@ -592,7 +740,7 @@
while (SI != succ_end()) {
const MachineBasicBlock *MBB = *SI;
if (!SeenMBBs.insert(MBB) ||
- MBB != DestA && MBB != DestB) {
+ (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
// This is a superfluous edge, remove it.
SI = removeSuccessor(SI);
Changed = true;
@@ -621,6 +769,26 @@
return DL;
}
+/// getSuccWeight - Return weight of the edge from this block to MBB.
+///
+uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+ if (Weights.empty())
+ return 0;
+
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ return *getWeightIterator(I);
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::succ_iterator I) {
+ assert(Weights.size() == Successors.size() && "Async weight list!");
+ size_t index = std::distance(Successors.begin(), I);
+ assert(index < Weights.size() && "Not a current successor!");
+ return Weights.begin() + index;
+}
+
void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
bool t) {
OS << "BB#" << MBB->getNumber();
diff --git a/src/LLVM/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/src/LLVM/lib/CodeGen/MachineBlockFrequencyInfo.cpp
new file mode 100644
index 0000000..b92cda9
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -0,0 +1,61 @@
+//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+
+char MachineBlockFrequencyInfo::ID = 0;
+
+
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) {
+ initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+ MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
+ MachineBranchProbabilityInfo>();
+}
+
+MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {
+ delete MBFI;
+}
+
+void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
+ MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI->doFunction(&F, &MBPI);
+ return false;
+}
+
+/// getblockFreq - Return block frequency. Return 0 if we don't have the
+/// information. Please note that initial frequency is equal to 1024. It means
+/// that we should not rely on the value itself, but only on the comparison to
+/// the other block frequencies. We do this to avoid using of floating points.
+///
+BlockFrequency MachineBlockFrequencyInfo::
+getBlockFreq(MachineBasicBlock *MBB) const {
+ return MBFI->getBlockFreq(MBB);
+}
diff --git a/src/LLVM/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/src/LLVM/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 0000000..c13fa6b
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,113 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(MachineBasicBlock *MBB) const {
+ uint32_t Sum = 0;
+
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ MachineBasicBlock *Succ = *I;
+ uint32_t Weight = getEdgeWeight(MBB, Succ);
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+ }
+
+ return Sum;
+}
+
+uint32_t
+MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ uint32_t Weight = Src->getSuccWeight(Dst);
+ if (!Weight)
+ return DEFAULT_WEIGHT;
+ return Weight;
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ // Hot probability is at least 4/5 = 80%
+ uint32_t Weight = getEdgeWeight(Src, Dst);
+ uint32_t Sum = getSumForBlock(Src);
+
+ // FIXME: Implement BranchProbability::compare then change this code to
+ // compare this BranchProbability against a static "hot" BranchProbability.
+ return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+MachineBasicBlock *
+MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
+ uint32_t Sum = 0;
+ uint32_t MaxWeight = 0;
+ MachineBasicBlock *MaxSucc = 0;
+
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ MachineBasicBlock *Succ = *I;
+ uint32_t Weight = getEdgeWeight(MBB, Succ);
+ uint32_t PrevSum = Sum;
+
+ Sum += Weight;
+ assert(Sum > PrevSum); (void) PrevSum;
+
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxSucc = Succ;
+ }
+ }
+
+ // FIXME: Use BranchProbability::compare.
+ if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+ return MaxSucc;
+
+ return 0;
+}
+
+BranchProbability
+MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+ uint32_t N = getEdgeWeight(Src, Dst);
+ uint32_t D = getSumForBlock(Src);
+
+ return BranchProbability(N, D);
+}
+
+raw_ostream &MachineBranchProbabilityInfo::
+printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) const {
+
+ const BranchProbability Prob = getEdgeProbability(Src, Dst);
+ OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+ << " probability is " << Prob
+ << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+ return OS;
+}
diff --git a/src/LLVM/lib/CodeGen/MachineCSE.cpp b/src/LLVM/lib/CodeGen/MachineCSE.cpp
index c2f41ef..7eda8c1 100644
--- a/src/LLVM/lib/CodeGen/MachineCSE.cpp
+++ b/src/LLVM/lib/CodeGen/MachineCSE.cpp
@@ -22,15 +22,18 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
-STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated");
+STATISTIC(NumPhysCSEs,
+ "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
namespace {
class MachineCSE : public MachineFunctionPass {
@@ -41,7 +44,9 @@
MachineRegisterInfo *MRI;
public:
static char ID; // Pass identification
- MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {}
+ MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+ initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -49,16 +54,25 @@
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
+ AU.addPreservedID(MachineLoopInfoID);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
}
+ virtual void releaseMemory() {
+ ScopeMap.clear();
+ Exps.clear();
+ }
+
private:
const unsigned LookAheadLimit;
- typedef ScopedHashTableScope<MachineInstr*, unsigned,
- MachineInstrExpressionTrait> ScopeType;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+ typedef ScopedHashTable<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+ typedef ScopedHTType::ScopeTy ScopeType;
DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
- ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
+ ScopedHTType VNT;
SmallVector<MachineInstr*, 64> Exps;
unsigned CurrVN;
@@ -66,11 +80,11 @@
bool isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator E) const ;
- bool hasLivePhysRegDefUse(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- unsigned &PhysDef) const;
- bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
- unsigned PhysDef) const;
+ bool hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs) const;
+ bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI);
@@ -85,8 +99,12 @@
} // end anonymous namespace
char MachineCSE::ID = 0;
-INITIALIZE_PASS(MachineCSE, "machine-cse",
- "Machine Common Subexpression Elimination", false, false);
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
@@ -98,9 +116,9 @@
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- if (!MRI->hasOneUse(Reg))
+ if (!MRI->hasOneNonDBGUse(Reg))
// Only coalesce single use copies. This ensure the copy will be
// deleted.
continue;
@@ -114,17 +132,12 @@
continue;
if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
continue;
- const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
- if (!NewRC)
+ if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
continue;
DEBUG(dbgs() << "Coalescing: " << *DefMI);
- DEBUG(dbgs() << "*** to: " << *MI);
+ DEBUG(dbgs() << "*** to: " << *MI);
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
- if (NewRC != SRC)
- MRI->setRegClass(SrcReg, NewRC);
DefMI->eraseFromParent();
++NumCoalesces;
Changed = true;
@@ -170,14 +183,14 @@
return false;
}
-/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
/// physical registers (except for dead defs of physical registers). It also
/// returns the physical register def by reference if it's the only one and the
/// instruction does not uses a physical register.
-bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- unsigned &PhysDef) const {
- PhysDef = 0;
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs) const {
+ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
@@ -187,35 +200,22 @@
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- if (MO.isUse()) {
- // Can't touch anything to read a physical register.
- PhysDef = 0;
- return true;
- }
- if (MO.isDead())
- // If the def is dead, it's ok.
- continue;
- // Ok, this is a physical register def that's not marked "dead". That's
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
// common since this pass is run before livevariables. We can scan
// forward a few instructions and check if it is obviously dead.
- if (PhysDef) {
- // Multiple physical register defs. These are rare, forget about it.
- PhysDef = 0;
- return true;
- }
- PhysDef = Reg;
+ if (MO.isDef() &&
+ (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
+ continue;
+ PhysRefs.insert(Reg);
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ PhysRefs.insert(*Alias);
}
- if (PhysDef) {
- MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
- if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
- return true;
- }
- return false;
+ return !PhysRefs.empty();
}
-bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
- unsigned PhysDef) const {
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs) const {
// For now conservatively returns false if the common subexpression is
// not in the same basic block as the given instruction.
MachineBasicBlock *MBB = MI->getParent();
@@ -231,8 +231,17 @@
if (I == E)
return true;
- if (I->modifiesRegister(PhysDef, TRI))
- return false;
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ continue;
+ if (PhysRefs.count(MOReg))
+ return false;
+ }
--LookAheadLeft;
++I;
@@ -243,7 +252,7 @@
bool MachineCSE::isCSECandidate(MachineInstr *MI) {
if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
- MI->isKill() || MI->isDebugValue())
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
return false;
// Ignore copies.
@@ -251,12 +260,12 @@
return false;
// Ignore stuff that we obviously can't move.
- const TargetInstrDesc &TID = MI->getDesc();
- if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
- TID.hasUnmodeledSideEffects())
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
+ MI->hasUnmodeledSideEffects())
return false;
- if (TID.mayLoad()) {
+ if (MCID.mayLoad()) {
// Okay, this instruction does a load. As a refinement, we allow the target
// to decide whether the loaded value is actually a constant. If so, we can
// actually use it as a load.
@@ -275,14 +284,13 @@
MachineInstr *CSMI, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
- // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an
- // immediate predecessor. We don't want to increase register pressure and end up
- // causing other computation to be spilled.
+ // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+ // an immediate predecessor. We don't want to increase register pressure and
+ // end up causing other computation to be spilled.
if (MI->getDesc().isAsCheapAsAMove()) {
MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
- if (CSBB != BB &&
- find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end())
+ if (CSBB != BB && !CSBB->isSuccessor(BB))
return false;
}
@@ -291,7 +299,7 @@
bool HasVRegUse = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isUse() && MO.getReg() &&
+ if (MO.isReg() && MO.isUse() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
HasVRegUse = true;
break;
@@ -353,35 +361,50 @@
if (!isCSECandidate(MI))
continue;
- bool DefPhys = false;
bool FoundCSE = VNT.count(MI);
if (!FoundCSE) {
// Look for trivial copy coalescing opportunities.
if (PerformTrivialCoalescing(MI, MBB)) {
+ Changed = true;
+
// After coalescing MI itself may become a copy.
if (MI->isCopyLike())
continue;
FoundCSE = VNT.count(MI);
}
}
- // FIXME: commute commutable instructions?
- // If the instruction defines a physical register and the value *may* be
+ // Commute commutable instructions.
+ bool Commuted = false;
+ if (!FoundCSE && MI->getDesc().isCommutable()) {
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+ if (NewMI) {
+ Commuted = true;
+ FoundCSE = VNT.count(NewMI);
+ if (NewMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ Changed = true;
+ } else if (!FoundCSE)
+ // MI was changed but it didn't help, commute it back!
+ (void)TII->commuteInstruction(MI);
+ }
+ }
+
+ // If the instruction defines physical registers and the values *may* be
// used, then it's not safe to replace it with a common subexpression.
- unsigned PhysDef = 0;
- if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) {
+ // It's also not safe if the instruction uses physical registers.
+ SmallSet<unsigned,8> PhysRefs;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
FoundCSE = false;
// ... Unless the CS is local and it also defines the physical register
- // which is not clobbered in between.
- if (PhysDef) {
- unsigned CSVN = VNT.lookup(MI);
- MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefReaches(CSMI, MI, PhysDef)) {
- FoundCSE = true;
- DefPhys = true;
- }
- }
+ // which is not clobbered in between and the physical register uses
+ // were not clobbered.
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+ FoundCSE = true;
}
if (!FoundCSE) {
@@ -407,13 +430,24 @@
unsigned NewReg = CSMI->getOperand(i).getReg();
if (OldReg == NewReg)
continue;
+
assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
TargetRegisterInfo::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
+
if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
DoCSE = false;
break;
}
+
+ // Don't perform CSE if the result of the old instruction cannot exist
+ // within the register class of the new instruction.
+ const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
+ if (!MRI->constrainRegClass(NewReg, OldRC)) {
+ DoCSE = false;
+ break;
+ }
+
CSEPairs.push_back(std::make_pair(OldReg, NewReg));
--NumDefs;
}
@@ -426,8 +460,11 @@
}
MI->eraseFromParent();
++NumCSEs;
- if (DefPhys)
+ if (!PhysRefs.empty())
++NumPhysCSEs;
+ if (Commuted)
+ ++NumCommutes;
+ Changed = true;
} else {
DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
VNT.insert(MI, CurrVN++);
@@ -468,6 +505,8 @@
DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+ CurrVN = 0;
+
// Perform a DFS walk to determine the order of visit.
WorkList.push_back(Node);
do {
diff --git a/src/LLVM/lib/CodeGen/MachineDominators.cpp b/src/LLVM/lib/CodeGen/MachineDominators.cpp
index 36c9d66..04c8ecb 100644
--- a/src/LLVM/lib/CodeGen/MachineDominators.cpp
+++ b/src/LLVM/lib/CodeGen/MachineDominators.cpp
@@ -24,8 +24,8 @@
char MachineDominatorTree::ID = 0;
-static RegisterPass<MachineDominatorTree>
-E("machinedomtree", "MachineDominator Tree Construction", true);
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+ "MachineDominator Tree Construction", true, true)
char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
@@ -42,6 +42,7 @@
MachineDominatorTree::MachineDominatorTree()
: MachineFunctionPass(ID) {
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
DT = new DominatorTreeBase<MachineBasicBlock>(false);
}
diff --git a/src/LLVM/lib/CodeGen/MachineFunction.cpp b/src/LLVM/lib/CodeGen/MachineFunction.cpp
index 0171700..20066a0 100644
--- a/src/LLVM/lib/CodeGen/MachineFunction.cpp
+++ b/src/LLVM/lib/CodeGen/MachineFunction.cpp
@@ -33,7 +33,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/GraphWriter.h"
@@ -52,19 +52,24 @@
}
MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
- unsigned FunctionNum, MachineModuleInfo &mmi)
- : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) {
+ unsigned FunctionNum, MachineModuleInfo &mmi,
+ GCModuleInfo* gmi)
+ : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
if (TM.getRegisterInfo())
RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
else
RegInfo = 0;
MFInfo = 0;
- FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo());
+ FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
if (Fn->hasFnAttr(Attribute::StackAlignment))
FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
Fn->getAttributes().getFnAttributes()));
ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
- Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
+ Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
+ // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+ if (!Fn->hasFnAttr(Attribute::OptimizeForSize))
+ Alignment = std::max(Alignment,
+ TM.getTargetLowering()->getPrefFunctionAlignment());
FunctionNumber = FunctionNum;
JumpTableInfo = 0;
}
@@ -147,10 +152,10 @@
/// of `new MachineInstr'.
///
MachineInstr *
-MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID,
+MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
DebugLoc DL, bool NoImp) {
return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
- MachineInstr(TID, DL, NoImp);
+ MachineInstr(MCID, DL, NoImp);
}
/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
@@ -190,20 +195,21 @@
}
MachineMemOperand *
-MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
- int64_t o, uint64_t s,
- unsigned base_alignment) {
- return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment);
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+ uint64_t s, unsigned base_alignment,
+ const MDNode *TBAAInfo) {
+ return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+ TBAAInfo);
}
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
return new (Allocator)
- MachineMemOperand(MMO->getValue(), MMO->getFlags(),
- int64_t(uint64_t(MMO->getOffset()) +
- uint64_t(Offset)),
- Size, MMO->getBaseAlignment());
+ MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+ MMO->getOffset()+Offset),
+ MMO->getFlags(), Size,
+ MMO->getBaseAlignment(), 0);
}
MachineInstr::mmo_iterator
@@ -231,10 +237,10 @@
else {
// Clone the MMO and unset the store flag.
MachineMemOperand *JustLoad =
- getMachineMemOperand((*I)->getValue(),
+ getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOStore,
- (*I)->getOffset(), (*I)->getSize(),
- (*I)->getBaseAlignment());
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
Result[Index] = JustLoad;
}
++Index;
@@ -263,10 +269,10 @@
else {
// Clone the MMO and unset the load flag.
MachineMemOperand *JustStore =
- getMachineMemOperand((*I)->getValue(),
+ getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOLoad,
- (*I)->getOffset(), (*I)->getSize(),
- (*I)->getBaseAlignment());
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
Result[Index] = JustStore;
}
++Index;
@@ -279,7 +285,7 @@
print(dbgs());
}
-void MachineFunction::print(raw_ostream &OS) const {
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << "# Machine code for function " << Fn->getName() << ":\n";
// Print Frame Information
@@ -298,37 +304,25 @@
OS << "Function Live Ins: ";
for (MachineRegisterInfo::livein_iterator
I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
- if (TRI)
- OS << "%" << TRI->getName(I->first);
- else
- OS << " %physreg" << I->first;
-
+ OS << PrintReg(I->first, TRI);
if (I->second)
- OS << " in reg%" << I->second;
-
+ OS << " in " << PrintReg(I->second, TRI);
if (llvm::next(I) != E)
OS << ", ";
}
OS << '\n';
}
if (RegInfo && !RegInfo->liveout_empty()) {
- OS << "Function Live Outs: ";
+ OS << "Function Live Outs:";
for (MachineRegisterInfo::liveout_iterator
- I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I){
- if (TRI)
- OS << '%' << TRI->getName(*I);
- else
- OS << "%physreg" << *I;
-
- if (llvm::next(I) != E)
- OS << " ";
- }
+ I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
+ OS << ' ' << PrintReg(*I, TRI);
OS << '\n';
}
for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
OS << '\n';
- BB->print(OS);
+ BB->print(OS, Indexes);
}
OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
@@ -346,17 +340,15 @@
std::string getNodeLabel(const MachineBasicBlock *Node,
const MachineFunction *Graph) {
- if (isSimple () && Node->getBasicBlock() &&
- !Node->getBasicBlock()->getName().empty())
- return Node->getBasicBlock()->getNameStr() + ":";
-
std::string OutStr;
{
raw_string_ostream OSS(OutStr);
-
- if (isSimple())
- OSS << Node->getNumber() << ':';
- else
+
+ if (isSimple()) {
+ OSS << "BB#" << Node->getNumber();
+ if (const BasicBlock *BB = Node->getBasicBlock())
+ OSS << ": " << BB->getName();
+ } else
Node->print(OSS);
}
@@ -426,6 +418,13 @@
return Ctx.GetOrCreateSymbol(Name.str());
}
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+ const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(getFunctionNumber())+"$pb");
+}
//===----------------------------------------------------------------------===//
// MachineFrameInfo implementation
@@ -485,7 +484,7 @@
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (Objects.empty()) return;
- const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
+ const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
OS << "Frame Objects:\n";
@@ -620,7 +619,7 @@
// MachineConstantPool implementation
//===----------------------------------------------------------------------===//
-const Type *MachineConstantPoolEntry::getType() const {
+Type *MachineConstantPoolEntry::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
return Val.ConstVal->getType();
@@ -637,6 +636,10 @@
for (unsigned i = 0, e = Constants.size(); i != e; ++i)
if (Constants[i].isMachineConstantPoolEntry())
delete Constants[i].Val.MachineCPVal;
+ for (DenseSet<MachineConstantPoolValue*>::iterator I =
+ MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
+ I != E; ++I)
+ delete *I;
}
/// CanShareConstantPoolEntry - Test whether the given two constants
@@ -714,8 +717,10 @@
//
// FIXME, this could be made much more efficient for large constant pools.
int Idx = V->getExistingMachineCPValue(this, Alignment);
- if (Idx != -1)
+ if (Idx != -1) {
+ MachineCPVsSharingEntries.insert(V);
return (unsigned)Idx;
+ }
Constants.push_back(MachineConstantPoolEntry(V, Alignment));
return Constants.size()-1;
diff --git a/src/LLVM/lib/CodeGen/MachineFunctionAnalysis.cpp b/src/LLVM/lib/CodeGen/MachineFunctionAnalysis.cpp
index 4f84b95..054c750 100644
--- a/src/LLVM/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/src/LLVM/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -12,22 +12,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
using namespace llvm;
-// Register this pass with PassInfo directly to avoid having to define
-// a default constructor.
-static PassInfo
-X("Machine Function Analysis", "machine-function-analysis",
- &MachineFunctionAnalysis::ID, 0,
- /*CFGOnly=*/false, /*is_analysis=*/true);
-
char MachineFunctionAnalysis::ID = 0;
MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
CodeGenOpt::Level OL) :
FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
MachineFunctionAnalysis::~MachineFunctionAnalysis() {
@@ -52,7 +47,8 @@
bool MachineFunctionAnalysis::runOnFunction(Function &F) {
assert(!MF && "MachineFunctionAnalysis already initialized!");
MF = new MachineFunction(&F, TM, NextFnNum++,
- getAnalysis<MachineModuleInfo>());
+ getAnalysis<MachineModuleInfo>(),
+ getAnalysisIfAvailable<GCModuleInfo>());
return false;
}
diff --git a/src/LLVM/lib/CodeGen/MachineInstr.cpp b/src/LLVM/lib/CodeGen/MachineInstr.cpp
index 1602a7d..a240667 100644
--- a/src/LLVM/lib/CodeGen/MachineInstr.cpp
+++ b/src/LLVM/lib/CodeGen/MachineInstr.cpp
@@ -15,19 +15,22 @@
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Metadata.h"
+#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/Value.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrDesc.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DebugInfo.h"
@@ -48,7 +51,7 @@
/// explicitly nulled out.
void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
assert(isReg() && "Can only add reg operand to use lists");
-
+
// If the reginfo pointer is null, just explicitly null out or next/prev
// pointers, to ensure they are not garbage.
if (RegInfo == 0) {
@@ -56,23 +59,23 @@
Contents.Reg.Next = 0;
return;
}
-
+
// Otherwise, add this operand to the head of the registers use/def list.
MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
-
+
// For SSA values, we prefer to keep the definition at the start of the list.
// we do this by skipping over the definition if it is at the head of the
// list.
if (*Head && (*Head)->isDef())
Head = &(*Head)->Contents.Reg.Next;
-
+
Contents.Reg.Next = *Head;
if (Contents.Reg.Next) {
assert(getReg() == Contents.Reg.Next->getReg() &&
"Different regs on the same list!");
Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
}
-
+
Contents.Reg.Prev = Head;
*Head = this;
}
@@ -83,7 +86,7 @@
assert(isOnRegUseList() && "Reg operand is not on a use list");
// Unlink this from the doubly linked list of operands.
MachineOperand *NextOp = Contents.Reg.Next;
- *Contents.Reg.Prev = NextOp;
+ *Contents.Reg.Prev = NextOp;
if (NextOp) {
assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
@@ -94,7 +97,7 @@
void MachineOperand::setReg(unsigned Reg) {
if (getReg() == Reg) return; // No change.
-
+
// Otherwise, we have to change the register. If this operand is embedded
// into a machine function, we need to update the old and new register's
// use/def lists.
@@ -102,13 +105,13 @@
if (MachineBasicBlock *MBB = MI->getParent())
if (MachineFunction *MF = MBB->getParent()) {
RemoveRegOperandFromRegInfo();
- Contents.Reg.RegNo = Reg;
+ SmallContents.RegNo = Reg;
AddRegOperandToRegInfo(&MF->getRegInfo());
return;
}
-
+
// Otherwise, just change the register, no problem. :)
- Contents.Reg.RegNo = Reg;
+ SmallContents.RegNo = Reg;
}
void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
@@ -125,7 +128,8 @@
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
if (getSubReg()) {
Reg = TRI.getSubReg(Reg, getSubReg());
- assert(Reg && "Invalid SubReg for physical register");
+ // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+ // That won't happen in legal code.
setSubReg(0);
}
setReg(Reg);
@@ -140,7 +144,7 @@
if (isReg() && getParent() && getParent()->getParent() &&
getParent()->getParent()->getParent())
RemoveRegOperandFromRegInfo();
-
+
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
}
@@ -151,7 +155,7 @@
void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
bool isKill, bool isDead, bool isUndef,
bool isDebug) {
- // If this operand is already a register operand, use setReg to update the
+ // If this operand is already a register operand, use setReg to update the
// register's use/def lists.
if (isReg()) {
assert(!isEarlyClobber());
@@ -159,7 +163,7 @@
} else {
// Otherwise, change this to a register and set the reg#.
OpKind = MO_Register;
- Contents.Reg.RegNo = Reg;
+ SmallContents.RegNo = Reg;
// If this operand is embedded in a function, add the operand to the
// register's use/def list.
@@ -185,7 +189,7 @@
if (getType() != Other.getType() ||
getTargetFlags() != Other.getTargetFlags())
return false;
-
+
switch (getType()) {
default: llvm_unreachable("Unrecognized operand type");
case MachineOperand::MO_Register:
@@ -193,6 +197,8 @@
getSubReg() == Other.getSubReg();
case MachineOperand::MO_Immediate:
return getImm() == Other.getImm();
+ case MachineOperand::MO_CImmediate:
+ return getCImm() == Other.getCImm();
case MachineOperand::MO_FPImmediate:
return getFPImm() == Other.getFPImm();
case MachineOperand::MO_MachineBasicBlock:
@@ -227,24 +233,11 @@
if (const MachineBasicBlock *MBB = MI->getParent())
if (const MachineFunction *MF = MBB->getParent())
TM = &MF->getTarget();
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
switch (getType()) {
case MachineOperand::MO_Register:
- if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
- OS << "%reg" << getReg();
- } else {
- if (TM)
- OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
- else
- OS << "%physreg" << getReg();
- }
-
- if (getSubReg() != 0) {
- if (TM)
- OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg());
- else
- OS << ':' << getSubReg();
- }
+ OS << PrintReg(getReg(), TRI, getSubReg());
if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
isEarlyClobber()) {
@@ -279,6 +272,9 @@
case MachineOperand::MO_Immediate:
OS << getImm();
break;
+ case MachineOperand::MO_CImmediate:
+ getCImm()->getValue().print(OS, false);
+ break;
case MachineOperand::MO_FPImmediate:
if (getFPImm()->getType()->isFloatTy())
OS << getFPImm()->getValueAPF().convertToFloat();
@@ -326,7 +322,7 @@
default:
llvm_unreachable("Unrecognized operand type");
}
-
+
if (unsigned TF = getTargetFlags())
OS << "[TF=" << TF << ']';
}
@@ -335,10 +331,45 @@
// MachineMemOperand Implementation
//===----------------------------------------------------------------------===//
-MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
- int64_t o, uint64_t s, unsigned int a)
- : Offset(o), Size(s), V(v),
- Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) {
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+ if (V == 0) return 0;
+ return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+ return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+ return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+ return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+ return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+ return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+ uint64_t s, unsigned int a,
+ const MDNode *TBAAInfo)
+ : PtrInfo(ptrinfo), Size(s),
+ Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+ TBAAInfo(TBAAInfo) {
+ assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+ "invalid pointer value");
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
}
@@ -346,9 +377,9 @@
/// Profile - Gather unique data for the object.
///
void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
- ID.AddInteger(Offset);
+ ID.AddInteger(getOffset());
ID.AddInteger(Size);
- ID.AddPointer(V);
+ ID.AddPointer(getValue());
ID.AddInteger(Flags);
}
@@ -364,8 +395,7 @@
((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
// Also update the base and offset, because the new alignment may
// not be applicable with the old ones.
- V = MMO->getValue();
- Offset = MMO->getOffset();
+ PtrInfo = MMO->PtrInfo;
}
}
@@ -378,7 +408,7 @@
raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
assert((MMO.isLoad() || MMO.isStore()) &&
"SV has to be a load, store or both.");
-
+
if (MMO.isVolatile())
OS << "Volatile ";
@@ -387,7 +417,7 @@
if (MMO.isStore())
OS << "ST";
OS << MMO.getSize();
-
+
// Print the address information.
OS << "[";
if (!MMO.getValue())
@@ -410,6 +440,20 @@
MMO.getBaseAlignment() != MMO.getSize())
OS << "(align=" << MMO.getAlignment() << ")";
+ // Print TBAA info.
+ if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+ OS << "(tbaa=";
+ if (TBAAInfo->getNumOperands() > 0)
+ WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print nontemporal info.
+ if (MMO.isNonTemporal())
+ OS << "(nontemporal)";
+
return OS;
}
@@ -418,32 +462,34 @@
//===----------------------------------------------------------------------===//
/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
-/// TID NULL and no operands.
+/// MCID NULL and no operands.
MachineInstr::MachineInstr()
- : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
+ : MCID(0), Flags(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0),
Parent(0) {
// Make sure that we get added to a machine basicblock
LeakDetector::addGarbageObject(this);
}
void MachineInstr::addImplicitDefUseOperands() {
- if (TID->ImplicitDefs)
- for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ if (MCID->ImplicitDefs)
+ for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
- if (TID->ImplicitUses)
- for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+ if (MCID->ImplicitUses)
+ for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
}
/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
/// implicit operands. It reserves space for the number of operands specified by
-/// the TargetInstrDesc.
-MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
- : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+/// the MCInstrDesc.
+MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
+ : MCID(&tid), Flags(0), AsmPrinterFlags(0),
MemRefs(0), MemRefsEnd(0), Parent(0) {
+ unsigned NumImplicitOps = 0;
if (!NoImp)
- NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
- Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ Operands.reserve(NumImplicitOps + MCID->getNumOperands());
if (!NoImp)
addImplicitDefUseOperands();
// Make sure that we get added to a machine basicblock
@@ -451,13 +497,14 @@
}
/// MachineInstr ctor - As above, but with a DebugLoc.
-MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
+MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
bool NoImp)
- : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
- Parent(0), debugLoc(dl) {
+ : MCID(&tid), Flags(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ unsigned NumImplicitOps = 0;
if (!NoImp)
- NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
- Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ Operands.reserve(NumImplicitOps + MCID->getNumOperands());
if (!NoImp)
addImplicitDefUseOperands();
// Make sure that we get added to a machine basicblock
@@ -465,14 +512,15 @@
}
/// MachineInstr ctor - Work exactly the same as the ctor two above, except
-/// that the MachineInstr is created and added to the end of the specified
+/// that the MachineInstr is created and added to the end of the specified
/// basic block.
-MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
- : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
+ : MCID(&tid), Flags(0), AsmPrinterFlags(0),
MemRefs(0), MemRefsEnd(0), Parent(0) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
- NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
- Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ unsigned NumImplicitOps =
+ MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ Operands.reserve(NumImplicitOps + MCID->getNumOperands());
addImplicitDefUseOperands();
// Make sure that we get added to a machine basicblock
LeakDetector::addGarbageObject(this);
@@ -482,12 +530,13 @@
/// MachineInstr ctor - As above, but with a DebugLoc.
///
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
- const TargetInstrDesc &tid)
- : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
- Parent(0), debugLoc(dl) {
+ const MCInstrDesc &tid)
+ : MCID(&tid), Flags(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
- NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
- Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ unsigned NumImplicitOps =
+ MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+ Operands.reserve(NumImplicitOps + MCID->getNumOperands());
addImplicitDefUseOperands();
// Make sure that we get added to a machine basicblock
LeakDetector::addGarbageObject(this);
@@ -497,7 +546,7 @@
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0),
+ : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
Parent(0), debugLoc(MI.getDebugLoc()) {
Operands.reserve(MI.getNumOperands());
@@ -505,7 +554,9 @@
// Add operands
for (unsigned i = 0; i != MI.getNumOperands(); ++i)
addOperand(MI.getOperand(i));
- NumImplicitOps = MI.NumImplicitOps;
+
+ // Copy all the flags.
+ Flags = MI.Flags;
// Set parent to null.
Parent = 0;
@@ -557,102 +608,74 @@
/// addOperand - Add the specified operand to the instruction. If it is an
/// implicit operand, it is added to the end of the operand list. If it is
/// an explicit operand it is added at the end of the explicit operand list
-/// (before the first implicit operand).
+/// (before the first implicit operand).
void MachineInstr::addOperand(const MachineOperand &Op) {
+ assert(MCID && "Cannot add operands before providing an instr descriptor");
bool isImpReg = Op.isReg() && Op.isImplicit();
- assert((isImpReg || !OperandsComplete()) &&
- "Trying to add an operand to a machine instr that is already done!");
-
MachineRegisterInfo *RegInfo = getRegInfo();
- // If we are adding the operand to the end of the list, our job is simpler.
- // This is true most of the time, so this is a reasonable optimization.
- if (isImpReg || NumImplicitOps == 0) {
- // We can only do this optimization if we know that the operand list won't
- // reallocate.
- if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) {
- Operands.push_back(Op);
-
- // Set the parent of the operand.
- Operands.back().ParentMI = this;
-
- // If the operand is a register, update the operand's use list.
- if (Op.isReg()) {
- Operands.back().AddRegOperandToRegInfo(RegInfo);
- // If the register operand is flagged as early, mark the operand as such
- unsigned OpNo = Operands.size() - 1;
- if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
- Operands[OpNo].setIsEarlyClobber(true);
- }
- return;
+ // If the Operands backing store is reallocated, all register operands must
+ // be removed and re-added to RegInfo. It is storing pointers to operands.
+ bool Reallocate = RegInfo &&
+ !Operands.empty() && Operands.size() == Operands.capacity();
+
+ // Find the insert location for the new operand. Implicit registers go at
+ // the end, everything goes before the implicit regs.
+ unsigned OpNo = Operands.size();
+
+ // Remove all the implicit operands from RegInfo if they need to be shifted.
+ // FIXME: Allow mixed explicit and implicit operands on inline asm.
+ // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as
+ // implicit-defs, but they must not be moved around. See the FIXME in
+ // InstrEmitter.cpp.
+ if (!isImpReg && !isInlineAsm()) {
+ while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
+ --OpNo;
+ if (RegInfo)
+ Operands[OpNo].RemoveRegOperandFromRegInfo();
}
}
-
- // Otherwise, we have to insert a real operand before any implicit ones.
- unsigned OpNo = Operands.size()-NumImplicitOps;
- // If this instruction isn't embedded into a function, then we don't need to
- // update any operand lists.
- if (RegInfo == 0) {
- // Simple insertion, no reginfo update needed for other register operands.
- Operands.insert(Operands.begin()+OpNo, Op);
- Operands[OpNo].ParentMI = this;
+ // OpNo now points as the desired insertion point. Unless this is a variadic
+ // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
+ assert((isImpReg || MCID->isVariadic() || OpNo < MCID->getNumOperands()) &&
+ "Trying to add an operand to a machine instr that is already done!");
- // Do explicitly set the reginfo for this operand though, to ensure the
- // next/prev fields are properly nulled out.
- if (Operands[OpNo].isReg()) {
- Operands[OpNo].AddRegOperandToRegInfo(0);
- // If the register operand is flagged as early, mark the operand as such
- if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
- Operands[OpNo].setIsEarlyClobber(true);
- }
+ // All operands from OpNo have been removed from RegInfo. If the Operands
+ // backing store needs to be reallocated, we also need to remove any other
+ // register operands.
+ if (Reallocate)
+ for (unsigned i = 0; i != OpNo; ++i)
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
- } else if (Operands.size()+1 <= Operands.capacity()) {
- // Otherwise, we have to remove register operands from their register use
- // list, add the operand, then add the register operands back to their use
- // list. This also must handle the case when the operand list reallocates
- // to somewhere else.
-
- // If insertion of this operand won't cause reallocation of the operand
- // list, just remove the implicit operands, add the operand, then re-add all
- // the rest of the operands.
- for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
- assert(Operands[i].isReg() && "Should only be an implicit reg!");
- Operands[i].RemoveRegOperandFromRegInfo();
- }
-
- // Add the operand. If it is a register, add it to the reg list.
- Operands.insert(Operands.begin()+OpNo, Op);
- Operands[OpNo].ParentMI = this;
+ // Insert the new operand at OpNo.
+ Operands.insert(Operands.begin() + OpNo, Op);
+ Operands[OpNo].ParentMI = this;
- if (Operands[OpNo].isReg()) {
- Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
- // If the register operand is flagged as early, mark the operand as such
- if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
- Operands[OpNo].setIsEarlyClobber(true);
- }
-
- // Re-add all the implicit ops.
- for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) {
+ // The Operands backing store has now been reallocated, so we can re-add the
+ // operands before OpNo.
+ if (Reallocate)
+ for (unsigned i = 0; i != OpNo; ++i)
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+
+ // When adding a register operand, tell RegInfo about it.
+ if (Operands[OpNo].isReg()) {
+ // Add the new operand to RegInfo, even when RegInfo is NULL.
+ // This will initialize the linked list pointers.
+ Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+ // If the register operand is flagged as early, mark the operand as such.
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
+
+ // Re-add all the implicit ops.
+ if (RegInfo) {
+ for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) {
assert(Operands[i].isReg() && "Should only be an implicit reg!");
Operands[i].AddRegOperandToRegInfo(RegInfo);
}
- } else {
- // Otherwise, we will be reallocating the operand list. Remove all reg
- // operands from their list, then readd them after the operand list is
- // reallocated.
- RemoveRegOperandsFromUseLists();
-
- Operands.insert(Operands.begin()+OpNo, Op);
- Operands[OpNo].ParentMI = this;
-
- // Re-add all the operands.
- AddRegOperandsToUseLists(*RegInfo);
-
- // If the register operand is flagged as early, mark the operand as such
- if (Operands[OpNo].isReg()
- && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
- Operands[OpNo].setIsEarlyClobber(true);
}
}
@@ -661,13 +684,13 @@
///
void MachineInstr::RemoveOperand(unsigned OpNo) {
assert(OpNo < Operands.size() && "Invalid operand number");
-
+
// Special case removing the last one.
if (OpNo == Operands.size()-1) {
// If needed, remove from the reg def/use list.
if (Operands.back().isReg() && Operands.back().isOnRegUseList())
Operands.back().RemoveRegOperandFromRegInfo();
-
+
Operands.pop_back();
return;
}
@@ -682,7 +705,7 @@
Operands[i].RemoveRegOperandFromRegInfo();
}
}
-
+
Operands.erase(Operands.begin()+OpNo);
if (RegInfo) {
@@ -724,20 +747,41 @@
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
const MachineOperand &OMO = Other->getOperand(i);
+ if (!MO.isReg()) {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ continue;
+ }
+
// Clients may or may not want to ignore defs when testing for equality.
// For example, machine CSE pass only cares about finding common
// subexpressions, so it's safe to ignore virtual register defs.
- if (Check != CheckDefs && MO.isReg() && MO.isDef()) {
+ if (MO.isDef()) {
if (Check == IgnoreDefs)
continue;
- // Check == IgnoreVRegDefs
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
- TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
- if (MO.getReg() != OMO.getReg())
+ else if (Check == IgnoreVRegDefs) {
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+ if (MO.getReg() != OMO.getReg())
+ return false;
+ } else {
+ if (!MO.isIdenticalTo(OMO))
return false;
- } else if (!MO.isIdenticalTo(OMO))
- return false;
+ if (Check == CheckKillDead && MO.isDead() != OMO.isDead())
+ return false;
+ }
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isKill() != OMO.isKill())
+ return false;
+ }
}
+ // If DebugLoc does not match then two dbg.values are not identical.
+ if (isDebugValue())
+ if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
+ && getDebugLoc() != Other->getDebugLoc())
+ return false;
return true;
}
@@ -758,20 +802,11 @@
}
-/// OperandComplete - Return true if it's illegal to add a new operand
-///
-bool MachineInstr::OperandsComplete() const {
- unsigned short NumOperands = TID->getNumOperands();
- if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
- return true; // Broken: we have all the operands of this instruction!
- return false;
-}
-
/// getNumExplicitOperands - Returns the number of non-implicit operands.
///
unsigned MachineInstr::getNumExplicitOperands() const {
- unsigned NumOperands = TID->getNumOperands();
- if (!TID->isVariadic())
+ unsigned NumOperands = MCID->getNumOperands();
+ if (!MCID->isVariadic())
return NumOperands;
for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
@@ -782,6 +817,75 @@
return NumOperands;
}
+bool MachineInstr::isStackAligningInlineAsm() const {
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ return true;
+ }
+ return false;
+}
+
+int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
+ unsigned *GroupNo) const {
+ assert(isInlineAsm() && "Expected an inline asm instruction");
+ assert(OpIdx < getNumOperands() && "OpIdx out of range");
+
+ // Ignore queries about the initial operands.
+ if (OpIdx < InlineAsm::MIOp_FirstOperand)
+ return -1;
+
+ unsigned Group = 0;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ // If we reach the implicit register operands, stop looking.
+ if (!FlagMO.isImm())
+ return -1;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ if (i + NumOps > OpIdx) {
+ if (GroupNo)
+ *GroupNo = Group;
+ return i;
+ }
+ ++Group;
+ }
+ return -1;
+}
+
+const TargetRegisterClass*
+MachineInstr::getRegClassConstraint(unsigned OpIdx,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) const {
+ // Most opcodes have fixed constraints in their MCInstrDesc.
+ if (!isInlineAsm())
+ return TII->getRegClass(getDesc(), OpIdx, TRI);
+
+ if (!getOperand(OpIdx).isReg())
+ return NULL;
+
+ // For tied uses on inline asm, get the constraint from the def.
+ unsigned DefIdx;
+ if (getOperand(OpIdx).isUse() && isRegTiedToDefOperand(OpIdx, &DefIdx))
+ OpIdx = DefIdx;
+
+ // Inline asm stores register class constraints in the flag word.
+ int FlagIdx = findInlineAsmFlagIdx(OpIdx);
+ if (FlagIdx < 0)
+ return NULL;
+
+ unsigned Flag = getOperand(FlagIdx).getImm();
+ unsigned RCID;
+ if (InlineAsm::hasRegClassConstraint(Flag, RCID))
+ return TRI->getRegClass(RCID);
+
+ // Assume that all registers in a memory operand are pointers.
+ if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
+ return TRI->getPointerRegClass();
+
+ return NULL;
+}
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
@@ -824,7 +928,8 @@
Ops->push_back(i);
if (MO.isUse())
Use |= !MO.isUndef();
- else if (MO.getSubReg())
+ else if (MO.getSubReg() && !MO.isUndef())
+ // A partial <def,undef> doesn't count as reading the register.
PartDef = true;
else
FullDef = true;
@@ -864,28 +969,64 @@
/// operand list that is used to represent the predicate. It returns -1 if
/// none is found.
int MachineInstr::findFirstPredOperandIdx() const {
- const TargetInstrDesc &TID = getDesc();
- if (TID.isPredicable()) {
+ // Don't call MCID.findFirstPredOperandIdx() because this variant
+ // is sometimes called on an instruction that's not yet complete, and
+ // so the number of operands is less than the MCID indicates. In
+ // particular, the PTX target does this.
+ const MCInstrDesc &MCID = getDesc();
+ if (MCID.isPredicable()) {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (TID.OpInfo[i].isPredicate())
+ if (MCID.OpInfo[i].isPredicate())
return i;
}
return -1;
}
-
+
/// isRegTiedToUseOperand - Given the index of a register def operand,
/// check if the register def is tied to a source operand, due to either
/// two-address elimination or inline assembly constraints. Returns the
/// first tied use operand index by reference is UseOpIdx is not null.
bool MachineInstr::
isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
+ if (isInlineAsm()) {
+ assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
+ const MachineOperand &MO = getOperand(DefOpIdx);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ return false;
+ // Determine the actual operand index that corresponds to this index.
+ unsigned DefNo = 0;
+ int FlagIdx = findInlineAsmFlagIdx(DefOpIdx, &DefNo);
+ if (FlagIdx < 0)
+ return false;
+
+ // Which part of the group is DefOpIdx?
+ unsigned DefPart = DefOpIdx - (FlagIdx + 1);
+
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &FMO = getOperand(i);
+ if (!FMO.isImm())
+ continue;
+ if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
+ continue;
+ unsigned Idx;
+ if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
+ Idx == DefNo) {
+ if (UseOpIdx)
+ *UseOpIdx = (unsigned)i + 1 + DefPart;
+ return true;
+ }
+ }
+ return false;
+ }
+
assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
- const TargetInstrDesc &TID = getDesc();
- for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+ const MCInstrDesc &MCID = getDesc();
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
if (MO.isReg() && MO.isUse() &&
- TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) {
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) {
if (UseOpIdx)
*UseOpIdx = (unsigned)i;
return true;
@@ -899,13 +1040,45 @@
/// operand index by reference.
bool MachineInstr::
isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
- const TargetInstrDesc &TID = getDesc();
- if (UseOpIdx >= TID.getNumOperands())
+ if (isInlineAsm()) {
+ const MachineOperand &MO = getOperand(UseOpIdx);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
+ return false;
+
+ // Find the flag operand corresponding to UseOpIdx
+ int FlagIdx = findInlineAsmFlagIdx(UseOpIdx);
+ if (FlagIdx < 0)
+ return false;
+
+ const MachineOperand &UFMO = getOperand(FlagIdx);
+ unsigned DefNo;
+ if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
+ if (!DefOpIdx)
+ return true;
+
+ unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
+ // Remember to adjust the index. First operand is asm string, second is
+ // the HasSideEffects and AlignStack bits, then there is a flag for each.
+ while (DefNo) {
+ const MachineOperand &FMO = getOperand(DefIdx);
+ assert(FMO.isImm());
+ // Skip over this def.
+ DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+ --DefNo;
+ }
+ *DefOpIdx = DefIdx + UseOpIdx - FlagIdx;
+ return true;
+ }
+ return false;
+ }
+
+ const MCInstrDesc &MCID = getDesc();
+ if (UseOpIdx >= MCID.getNumOperands())
return false;
const MachineOperand &MO = getOperand(UseOpIdx);
if (!MO.isReg() || !MO.isUse())
return false;
- int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO);
+ int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO);
if (DefIdx == -1)
return false;
if (DefOpIdx)
@@ -945,11 +1118,11 @@
/// copyPredicates - Copies predicate operand(s) from MI.
void MachineInstr::copyPredicates(const MachineInstr *MI) {
- const TargetInstrDesc &TID = MI->getDesc();
- if (!TID.isPredicable())
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isPredicable())
return;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- if (TID.OpInfo[i].isPredicate()) {
+ if (MCID.OpInfo[i].isPredicate()) {
// Predicated operands must be last operands.
addOperand(MI->getOperand(i));
}
@@ -986,11 +1159,13 @@
AliasAnalysis *AA,
bool &SawStore) const {
// Ignore stuff that we obviously can't move.
- if (TID->mayStore() || TID->isCall()) {
+ if (MCID->mayStore() || MCID->isCall()) {
SawStore = true;
return false;
}
- if (TID->isTerminator() || TID->hasUnmodeledSideEffects())
+
+ if (isLabel() || isDebugValue() ||
+ MCID->isTerminator() || hasUnmodeledSideEffects())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
@@ -998,7 +1173,7 @@
// destination. The check for isInvariantLoad gives the targe the chance to
// classify the load as always returning a constant, e.g. a constant pool
// load.
- if (TID->mayLoad() && !isInvariantLoad(AA))
+ if (MCID->mayLoad() && !isInvariantLoad(AA))
// Otherwise, this is a real load. If there is a store between the load and
// end of block, or if the load is volatile, we can't move it.
return !SawStore && !hasVolatileMemoryRef();
@@ -1038,17 +1213,17 @@
/// have no volatile memory references.
bool MachineInstr::hasVolatileMemoryRef() const {
// An instruction known never to access memory won't have a volatile access.
- if (!TID->mayStore() &&
- !TID->mayLoad() &&
- !TID->isCall() &&
- !TID->hasUnmodeledSideEffects())
+ if (!MCID->mayStore() &&
+ !MCID->mayLoad() &&
+ !MCID->isCall() &&
+ !hasUnmodeledSideEffects())
return false;
// Otherwise, if the instruction has no memory reference information,
// conservatively assume it wasn't preserved.
if (memoperands_empty())
return true;
-
+
// Check the memory reference information for volatile references.
for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
if ((*I)->isVolatile())
@@ -1064,7 +1239,7 @@
/// *all* loads the instruction does are invariant (if it does multiple loads).
bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
// If the instruction doesn't load at all, it isn't an invariant load.
- if (!TID->mayLoad())
+ if (!MCID->mayLoad())
return false;
// If the instruction has lost its memoperands, conservatively assume that
@@ -1085,7 +1260,9 @@
if (PSV->isConstant(MFI))
continue;
// If we have an AliasAnalysis, ask it whether the memory is constant.
- if (AA && AA->pointsToConstantMemory(V))
+ if (AA && AA->pointsToConstantMemory(
+ AliasAnalysis::Location(V, (*I)->getSize(),
+ (*I)->getTBAAInfo())))
continue;
}
@@ -1113,6 +1290,18 @@
return Reg;
}
+bool MachineInstr::hasUnmodeledSideEffects() const {
+ if (getDesc().hasUnmodeledSideEffects())
+ return true;
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ return true;
+ }
+
+ return false;
+}
+
/// allDefsAreDead - Return true if all the defs of this instruction are dead.
///
bool MachineInstr::allDefsAreDead() const {
@@ -1126,11 +1315,22 @@
return true;
}
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+ for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ addOperand(MO);
+ }
+}
+
void MachineInstr::dump() const {
dbgs() << " " << *this;
}
-static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
+static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
raw_ostream &CommentOS) {
const LLVMContext &Ctx = MF->getFunction()->getContext();
if (!DL.isUnknown()) { // Print source line info.
@@ -1176,7 +1376,7 @@
if (StartOp != 0) OS << ", ";
getOperand(StartOp).print(OS, TM);
unsigned Reg = getOperand(StartOp).getReg();
- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
VirtRegs.push_back(Reg);
}
@@ -1189,11 +1389,30 @@
// Print the rest of the operands.
bool OmittedAnyCallClobbers = false;
bool FirstOp = true;
+ unsigned AsmDescOp = ~0u;
+ unsigned AsmOpCount = 0;
+
+ if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
+ // Print asm string.
+ OS << " ";
+ getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+ // Print HasSideEffects, IsAlignStack
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ OS << " [sideeffect]";
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ OS << " [alignstack]";
+
+ StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
+ FirstOp = false;
+ }
+
+
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
- if (MO.isReg() && MO.getReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
VirtRegs.push_back(MO.getReg());
// Omit call-clobbered registers which aren't used anywhere. This makes
@@ -1203,7 +1422,7 @@
if (MF && getDesc().isCall() &&
MO.isReg() && MO.isImplicit() && MO.isDef()) {
unsigned Reg = MO.getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
bool HasAliasLive = false;
@@ -1224,10 +1443,10 @@
if (FirstOp) FirstOp = false; else OS << ",";
OS << " ";
if (i < getDesc().NumOperands) {
- const TargetOperandInfo &TOI = getDesc().OpInfo[i];
- if (TOI.isPredicate())
+ const MCOperandInfo &MCOI = getDesc().OpInfo[i];
+ if (MCOI.isPredicate())
OS << "pred:";
- if (TOI.isOptionalDef())
+ if (MCOI.isOptionalDef())
OS << "opt:";
}
if (isDebugValue() && MO.isMetadata()) {
@@ -1239,6 +1458,36 @@
MO.print(OS, TM);
} else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+ } else if (i == AsmDescOp && MO.isImm()) {
+ // Pretty print the inline asm operand descriptor.
+ OS << '$' << AsmOpCount++;
+ unsigned Flag = MO.getImm();
+ switch (InlineAsm::getKind(Flag)) {
+ case InlineAsm::Kind_RegUse: OS << ":[reguse"; break;
+ case InlineAsm::Kind_RegDef: OS << ":[regdef"; break;
+ case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break;
+ case InlineAsm::Kind_Clobber: OS << ":[clobber"; break;
+ case InlineAsm::Kind_Imm: OS << ":[imm"; break;
+ case InlineAsm::Kind_Mem: OS << ":[mem"; break;
+ default: OS << ":[??" << InlineAsm::getKind(Flag); break;
+ }
+
+ unsigned RCID = 0;
+ if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ if (TM)
+ OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName();
+ else
+ OS << ":RC" << RCID;
+ }
+
+ unsigned TiedTo = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ OS << " tiedto:$" << TiedTo;
+
+ OS << ']';
+
+ // Compute the index of the next operand descriptor.
+ AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
} else
MO.print(OS, TM);
}
@@ -1250,6 +1499,14 @@
}
bool HaveSemi = false;
+ if (Flags) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ OS << " flags: ";
+
+ if (Flags & FrameSetup)
+ OS << "FrameSetup";
+ }
+
if (!memoperands_empty()) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
@@ -1267,26 +1524,39 @@
if (!HaveSemi) OS << ";"; HaveSemi = true;
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
- OS << " " << RC->getName() << ":%reg" << VirtRegs[i];
+ OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
for (unsigned j = i+1; j != VirtRegs.size();) {
if (MRI->getRegClass(VirtRegs[j]) != RC) {
++j;
continue;
}
if (VirtRegs[i] != VirtRegs[j])
- OS << "," << VirtRegs[j];
+ OS << "," << PrintReg(VirtRegs[j]);
VirtRegs.erase(VirtRegs.begin()+j);
}
}
}
- if (!debugLoc.isUnknown() && MF) {
- if (!HaveSemi) OS << ";";
+ // Print debug location information.
+ if (isDebugValue() && getOperand(e - 1).isMetadata()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ DIVariable DV(getOperand(e - 1).getMetadata());
+ OS << " line no:" << DV.getLineNumber();
+ if (MDNode *InlinedAt = DV.getInlinedAt()) {
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
+ if (!InlinedAtDL.isUnknown()) {
+ OS << " inlined @[ ";
+ printDebugLoc(InlinedAtDL, MF, OS);
+ OS << " ]";
+ }
+ }
+ } else if (!debugLoc.isUnknown() && MF) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
OS << " dbg:";
printDebugLoc(debugLoc, MF, OS);
}
- OS << "\n";
+ OS << '\n';
}
bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
@@ -1363,13 +1633,8 @@
continue;
if (Reg == IncomingReg) {
- if (!Found) {
- if (MO.isDead())
- // The register is already marked dead.
- return true;
- MO.setIsDead();
- Found = true;
- }
+ MO.setIsDead();
+ Found = true;
} else if (hasAliases && MO.isDead() &&
TargetRegisterInfo::isPhysicalRegister(Reg)) {
// There exists a super-register that's marked dead.
@@ -1396,7 +1661,7 @@
// new implicit operand if required.
if (Found || !AddIfNotFound)
return Found;
-
+
addOperand(MachineOperand::CreateReg(IncomingReg,
true /*IsDef*/,
true /*IsImp*/,
@@ -1452,8 +1717,7 @@
switch (MO.getType()) {
default: break;
case MachineOperand::MO_Register:
- if (MO.isDef() && MO.getReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue; // Skip virtual register defs.
Key |= MO.getReg();
break;
@@ -1490,3 +1754,24 @@
}
return Hash;
}
+
+void MachineInstr::emitError(StringRef Msg) const {
+ // Find the source location cookie.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
+ for (unsigned i = getNumOperands(); i != 0; --i) {
+ if (getOperand(i-1).isMetadata() &&
+ (LocMD = getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ if (const MachineBasicBlock *MBB = getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+ report_fatal_error(Msg);
+}
diff --git a/src/LLVM/lib/CodeGen/MachineLICM.cpp b/src/LLVM/lib/CodeGen/MachineLICM.cpp
index 1a74b74..a1f80d5 100644
--- a/src/LLVM/lib/CodeGen/MachineLICM.cpp
+++ b/src/LLVM/lib/CodeGen/MachineLICM.cpp
@@ -28,6 +28,8 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -35,13 +37,24 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-
using namespace llvm;
-STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
-STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed");
+static cl::opt<bool>
+AvoidSpeculation("avoid-speculation",
+ cl::desc("MachineLICM should avoid speculation"),
+ cl::init(false), cl::Hidden);
+
+STATISTIC(NumHoisted,
+ "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+ "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+ "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+ "Number of hoisted machine instructions CSEed");
STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
@@ -51,9 +64,11 @@
const TargetMachine *TM;
const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
const TargetRegisterInfo *TRI;
const MachineFrameInfo *MFI;
- MachineRegisterInfo *RegInfo;
+ MachineRegisterInfo *MRI;
+ const InstrItineraryData *InstrItins;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
@@ -68,23 +83,48 @@
BitVector AllocatableSet;
+ // Track 'estimated' register pressure.
+ SmallSet<unsigned, 32> RegSeen;
+ SmallVector<unsigned, 8> RegPressure;
+
+ // Register pressure "limit" per register class. If the pressure
+ // is higher than the limit, then it's considered high.
+ SmallVector<unsigned, 8> RegLimit;
+
+ // Register pressure on path leading from loop preheader to current BB.
+ SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
// For each opcode, keep a list of potential CSE instructions.
DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+ enum {
+ SpeculateFalse = 0,
+ SpeculateTrue = 1,
+ SpeculateUnknown = 2
+ };
+
+ // If a MBB does not dominate loop exiting blocks then it may not safe
+ // to hoist loads from this block.
+ // Tri-state: 0 - false, 1 - true, 2 - unknown
+ unsigned SpeculationState;
+
public:
static char ID; // Pass identification, replacement for typeid
MachineLICM() :
- MachineFunctionPass(ID), PreRegAlloc(true) {}
+ MachineFunctionPass(ID), PreRegAlloc(true) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
explicit MachineLICM(bool PreRA) :
- MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "Machine Instruction LICM"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<AliasAnalysis>();
@@ -94,6 +134,13 @@
}
virtual void releaseMemory() {
+ RegSeen.clear();
+ RegPressure.clear();
+ RegLimit.clear();
+ BackTrace.clear();
+ for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+ CI->second.clear();
CSEMap.clear();
}
@@ -138,21 +185,59 @@
///
bool IsLoopInvariantInst(MachineInstr &I);
+ /// HasAnyPHIUse - Return true if the specified register is used by any
+ /// phi node.
+ bool HasAnyPHIUse(unsigned Reg) const;
+
+ /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+ /// and an use in the current loop, return true if the target considered
+ /// it 'high'.
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ unsigned Reg) const;
+
+ bool IsCheapInstruction(MachineInstr &MI) const;
+
+ /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+ /// check if hoisting an instruction of the given cost matrix can cause high
+ /// register pressure.
+ bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+
+ /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+ /// the current block and update their register pressures to reflect the
+ /// effect of hoisting MI from the current block to the preheader.
+ void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
/// IsProfitableToHoist - Return true if it is potentially profitable to
/// hoist the given loop invariant.
bool IsProfitableToHoist(MachineInstr &MI);
+ /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
+ /// If not then a load from this mbb may not be safe to hoist.
+ bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+
/// HoistRegion - Walk the specified region of the CFG (defined by all
/// blocks dominated by the specified block, and that are in the current
/// loop) in depth first order w.r.t the DominatorTree. This allows us to
/// visit definitions before uses, allowing us to hoist a loop body in one
/// pass without iteration.
///
- void HoistRegion(MachineDomTreeNode *N);
+ void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
- /// isLoadFromConstantMemory - Return true if the given instruction is a
- /// load from constant memory.
- bool isLoadFromConstantMemory(MachineInstr *MI);
+ /// getRegisterClassIDAndCost - For a given MI, register, and the operand
+ /// index, return the ID and cost of its representative register class by
+ /// reference.
+ void getRegisterClassIDAndCost(const MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ unsigned &RCId, unsigned &RCCost) const;
+
+ /// InitRegPressure - Find all virtual register references that are liveout
+ /// of the preheader to initialize the starting "register pressure". Note
+ /// this does not count live through (livein but not used) registers.
+ void InitRegPressure(MachineBasicBlock *BB);
+
+ /// UpdateRegPressure - Update estimate of register pressure after the
+ /// specified instruction.
+ void UpdateRegPressure(const MachineInstr *MI);
/// ExtractHoistableLoad - Unfold a load from the given machineinstr if
/// the load itself could be hoisted. Return the unfolded and hoistable
@@ -172,10 +257,14 @@
bool EliminateCSE(MachineInstr *MI,
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+ /// MayCSE - Return true if the given instruction will be CSE'd if it's
+ /// hoisted out of the loop.
+ bool MayCSE(MachineInstr *MI);
+
/// Hoist - When an instruction is found to only use loop invariant operands
/// that is safe to hoist, this instruction is called to do the dirty work.
- ///
- void Hoist(MachineInstr *MI);
+ /// It returns true if the instruction is hoisted.
+ bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
/// InitCSEMap - Initialize the CSE map with instructions that are in the
/// current loop preheader that may become duplicates of instructions that
@@ -189,8 +278,13 @@
} // end anonymous namespace
char MachineLICM::ID = 0;
-INITIALIZE_PASS(MachineLICM, "machinelicm",
- "Machine Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
return new MachineLICM(PreRegAlloc);
@@ -212,18 +306,32 @@
bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
if (PreRegAlloc)
- DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n");
+ DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
else
- DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
+ DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
Changed = FirstInLoop = false;
TM = &MF.getTarget();
TII = TM->getInstrInfo();
+ TLI = TM->getTargetLowering();
TRI = TM->getRegisterInfo();
MFI = MF.getFrameInfo();
- RegInfo = &MF.getRegInfo();
+ MRI = &MF.getRegInfo();
+ InstrItins = TM->getInstrItineraryData();
AllocatableSet = TRI->getAllocatableSet(MF);
+ if (PreRegAlloc) {
+ // Estimate register pressure during pre-regalloc pass.
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegPressure.resize(NumRC);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ RegLimit.resize(NumRC);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+ }
+
// Get our Loop information...
MLI = &getAnalysis<MachineLoopInfo>();
DT = &getAnalysis<MachineDominatorTree>();
@@ -248,7 +356,7 @@
// being hoisted.
MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
FirstInLoop = true;
- HoistRegion(N);
+ HoistRegion(N, true);
CSEMap.clear();
}
}
@@ -365,6 +473,12 @@
const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *BB = Blocks[i];
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad()) continue;
+
// Conservatively treat live-in's as an external def.
// FIXME: That means a reload that're reused in successor block(s) will not
// be LICM'ed.
@@ -376,6 +490,7 @@
++PhysRegDefs[*AS];
}
+ SpeculationState = SpeculateUnknown;
for (MachineBasicBlock::iterator
MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
MachineInstr *MI = &*MII;
@@ -469,22 +584,65 @@
Changed = true;
}
+// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
+// If not then a load from this mbb may not be safe to hoist.
+bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
+ if (SpeculationState != SpeculateUnknown)
+ return SpeculationState == SpeculateFalse;
+
+ if (BB != CurLoop->getHeader()) {
+ // Check loop exiting blocks.
+ SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
+ CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
+ for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
+ if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
+ SpeculationState = SpeculateTrue;
+ return false;
+ }
+ }
+
+ SpeculationState = SpeculateFalse;
+ return true;
+}
+
/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
/// first order w.r.t the DominatorTree. This allows us to visit definitions
/// before uses, allowing us to hoist a loop body in one pass without iteration.
///
-void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
+void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
assert(N != 0 && "Null dominator tree node?");
MachineBasicBlock *BB = N->getBlock();
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad()) return;
+
// If this subregion is not in the top level loop at all, exit.
if (!CurLoop->contains(BB)) return;
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ if (IsHeader) {
+ // Compute registers which are livein into the loop headers.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
+ }
+
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+
+ SpeculationState = SpeculateUnknown;
for (MachineBasicBlock::iterator
MII = BB->begin(), E = BB->end(); MII != E; ) {
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
- Hoist(&*MII);
+ MachineInstr *MI = &*MII;
+ if (!Hoist(MI, Preheader))
+ UpdateRegPressure(MI);
MII = NextMII;
}
@@ -496,6 +654,112 @@
for (unsigned I = 0, E = Children.size(); I != E; ++I)
HoistRegion(Children[I]);
}
+
+ BackTrace.pop_back();
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// getRegisterClassIDAndCost - For a given MI, register, and the operand
+/// index, return the ID and cost of its representative register class.
+void
+MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ unsigned &RCId, unsigned &RCCost) const {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ if (VT == MVT::untyped) {
+ RCId = RC->getID();
+ RCCost = 1;
+ } else {
+ RCId = TLI->getRepRegClassFor(VT)->getID();
+ RCCost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+ // If the preheader has only a single predecessor and it ends with a
+ // fallthrough or an unconditional branch, then scan its predecessor for live
+ // defs as well. This happens whenever the preheader is created by splitting
+ // the critical edge from the loop predecessor to the loop header.
+ if (BB->pred_size() == 1) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+ InitRegPressure(*BB->pred_begin());
+ }
+
+ for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+ MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
+ if (MO.isDef())
+ RegPressure[RCId] += RCCost;
+ else {
+ bool isKill = isOperandKill(MO, MRI);
+ if (isNew && !isKill)
+ // Haven't seen this, it must be a livein.
+ RegPressure[RCId] += RCCost;
+ else if (!isNew && isKill)
+ RegPressure[RCId] -= RCCost;
+ }
+ }
+ }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (!isNew && isOperandKill(MO, MRI)) {
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
+ if (RCCost > RegPressure[RCId])
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= RCCost;
+ }
+ }
+
+ unsigned Idx = 0;
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.pop_back_val();
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
+ RegPressure[RCId] += RCCost;
+ ++Idx;
+ }
}
/// IsLICMCandidate - Returns true if the instruction may be a suitable
@@ -506,7 +770,14 @@
bool DontMoveAcrossStore = true;
if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
return false;
-
+
+ // If it is load then check if it is guaranteed to execute by making sure that
+ // it dominates all exiting blocks. If it doesn't, then there is a path out of
+ // the loop which does not execute this load, so we can't hoist it.
+ // Stores and side effects are already checked by isSafeToMove.
+ if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent()))
+ return false;
+
return true;
}
@@ -535,14 +806,14 @@
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!RegInfo->def_empty(Reg))
+ if (!MRI->def_empty(Reg))
return false;
if (AllocatableSet.test(Reg))
return false;
// Check for a def among the register's aliases too.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
- if (!RegInfo->def_empty(AliasReg))
+ if (!MRI->def_empty(AliasReg))
return false;
if (AllocatableSet.test(AliasReg))
return false;
@@ -562,12 +833,12 @@
if (!MO.isUse())
continue;
- assert(RegInfo->getVRegDef(Reg) &&
+ assert(MRI->getVRegDef(Reg) &&
"Machine instr not mapped for this vreg?!");
// If the loop contains the definition of an operand, then the instruction
// isn't loop invariant.
- if (CurLoop->contains(RegInfo->getVRegDef(Reg)))
+ if (CurLoop->contains(MRI->getVRegDef(Reg)))
return false;
}
@@ -576,59 +847,238 @@
}
-/// HasPHIUses - Return true if the specified register has any PHI use.
-static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
- for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg),
- UE = RegInfo->use_end(); UI != UE; ++UI) {
+/// HasAnyPHIUse - Return true if the specified register is used by any
+/// phi node.
+bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
if (UseMI->isPHI())
return true;
+ // Look pass copies as well.
+ if (UseMI->isCopy()) {
+ unsigned Def = UseMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Def) &&
+ HasAnyPHIUse(Def))
+ return true;
+ }
}
return false;
}
-/// isLoadFromConstantMemory - Return true if the given instruction is a
-/// load from constant memory. Machine LICM will hoist these even if they are
-/// not re-materializable.
-bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
- if (!MI->getDesc().mayLoad()) return false;
- if (!MI->hasOneMemOperand()) return false;
- MachineMemOperand *MMO = *MI->memoperands_begin();
- if (MMO->isVolatile()) return false;
- if (!MMO->getValue()) return false;
- const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
- if (PSV) {
- MachineFunction &MF = *MI->getParent()->getParent();
- return PSV->isConstant(MF.getFrameInfo());
- } else {
- return AA->pointsToConstantMemory(MMO->getValue());
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+ unsigned DefIdx, unsigned Reg) const {
+ if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+ return false;
+
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->isCopyLike())
+ continue;
+ if (!CurLoop->contains(UseMI->getParent()))
+ continue;
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+ return true;
+ }
+
+ // Only look at the first in loop use.
+ break;
+ }
+
+ return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+ if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+ return true;
+ if (!InstrItins || InstrItins->isEmpty())
+ return false;
+
+ bool isCheap = false;
+ unsigned NumDefs = MI.getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &DefMO = MI.getOperand(i);
+ if (!DefMO.isReg() || !DefMO.isDef())
+ continue;
+ --NumDefs;
+ unsigned Reg = DefMO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+ return false;
+ isCheap = true;
+ }
+
+ return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ if (CI->second <= 0)
+ continue;
+
+ unsigned RCId = CI->first;
+ for (unsigned i = BackTrace.size(); i != 0; --i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+ if (RP[RCId] + CI->second >= RegLimit[RCId])
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ // First compute the 'cost' of the instruction, i.e. its contribution
+ // to register pressure.
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
+ if (MO.isDef()) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Update register pressure of blocks from loop header to current block.
+ for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i];
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ unsigned RCId = CI->first;
+ RP[RCId] += CI->second;
+ }
}
}
/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
/// the given loop invariant.
bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
- // FIXME: For now, only hoist re-materilizable instructions. LICM will
- // increase register pressure. We want to make sure it doesn't increase
- // spilling.
+ if (MI.isImplicitDef())
+ return true;
+
+ // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+ // will increase register pressure. It's probably not worth it if the
+ // instruction is cheap.
// Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
// these tend to help performance in low register pressure situation. The
// trade off is it may cause spill in high pressure situation. It will end up
// adding a store in the loop preheader. But the reload is no more expensive.
// The side benefit is these loads are frequently CSE'ed.
- if (!TII->isTriviallyReMaterializable(&MI, AA)) {
- if (!isLoadFromConstantMemory(&MI))
+ if (IsCheapInstruction(MI)) {
+ if (!TII->isTriviallyReMaterializable(&MI, AA))
+ return false;
+ } else {
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
+ if (MO.isDef()) {
+ if (HasHighOperandLatency(MI, i, Reg)) {
+ ++NumHighLatency;
+ return true;
+ }
+
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ // Is a virtual register use is a kill, hoisting it out of the loop
+ // may actually reduce register pressure or be register pressure
+ // neutral.
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost)) {
+ ++NumLowRP;
+ return true;
+ }
+
+ // Do not "speculate" in high register pressure situation. If an
+ // instruction is not guaranteed to be executed in the loop, it's best to be
+ // conservative.
+ if (AvoidSpeculation &&
+ (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI)))
+ return false;
+
+ // High register pressure situation, only hoist if the instruction is going to
+ // be remat'ed.
+ if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+ !MI.isInvariantLoad(AA))
return false;
}
- // If result(s) of this instruction is used by PHIs, then don't hoist it.
- // The presence of joins makes it difficult for current register allocator
- // implementation to perform remat.
+ // If result(s) of this instruction is used by PHIs outside of the loop, then
+ // don't hoist it if the instruction because it will introduce an extra copy.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- if (HasPHIUses(MO.getReg(), RegInfo))
+ if (HasAnyPHIUse(MO.getReg()))
return false;
}
@@ -636,10 +1086,14 @@
}
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+ // Don't unfold simple loads.
+ if (MI->getDesc().canFoldAsLoad())
+ return 0;
+
// If not, we may be able to unfold a load and hoist that.
// First test whether the instruction is loading from an amenable
// memory location.
- if (!isLoadFromConstantMemory(MI))
+ if (!MI->isInvariantLoad(AA))
return 0;
// Next determine the register class for a temporary register.
@@ -650,11 +1104,11 @@
/*UnfoldStore=*/false,
&LoadRegIndex);
if (NewOpc == 0) return 0;
- const TargetInstrDesc &TID = TII->get(NewOpc);
- if (TID.getNumDefs() != 1) return 0;
- const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
+ const MCInstrDesc &MID = TII->get(NewOpc);
+ if (MID.getNumDefs() != 1) return 0;
+ const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
// Ok, we're unfolding. Create a temporary register and do the unfold.
- unsigned Reg = RegInfo->createVirtualRegister(RC);
+ unsigned Reg = MRI->createVirtualRegister(RC);
MachineFunction &MF = *MI->getParent()->getParent();
SmallVector<MachineInstr *, 2> NewMIs;
@@ -678,6 +1132,10 @@
NewMIs[1]->eraseFromParent();
return 0;
}
+
+ // Update register pressure for the unfolded instruction.
+ UpdateRegPressure(NewMIs[1]);
+
// Otherwise we successfully unfolded a load that we can hoist.
MI->eraseFromParent();
return NewMIs[0];
@@ -686,20 +1144,15 @@
void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
const MachineInstr *MI = &*I;
- // FIXME: For now, only hoist re-materilizable instructions. LICM will
- // increase register pressure. We want to make sure it doesn't increase
- // spilling.
- if (TII->isTriviallyReMaterializable(MI, AA)) {
- unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
- CI = CSEMap.find(Opcode);
- if (CI != CSEMap.end())
- CI->second.push_back(MI);
- else {
- std::vector<const MachineInstr*> CSEMIs;
- CSEMIs.push_back(MI);
- CSEMap.insert(std::make_pair(Opcode, CSEMIs));
- }
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
}
}
}
@@ -709,7 +1162,7 @@
std::vector<const MachineInstr*> &PrevMIs) {
for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
const MachineInstr *PrevMI = PrevMIs[i];
- if (TII->produceSameValue(MI, PrevMI))
+ if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
return PrevMI;
}
return 0;
@@ -738,8 +1191,8 @@
if (MO.isReg() && MO.isDef() &&
!TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
- RegInfo->clearKillFlags(Dup->getOperand(i).getReg());
+ MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+ MRI->clearKillFlags(Dup->getOperand(i).getReg());
}
}
MI->eraseFromParent();
@@ -749,18 +1202,29 @@
return false;
}
+/// MayCSE - Return true if the given instruction will be CSE'd if it's
+/// hoisted out of the loop.
+bool MachineLICM::MayCSE(MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ return LookForDuplicate(MI, CI->second) != 0;
+}
+
/// Hoist - When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
///
-void MachineLICM::Hoist(MachineInstr *MI) {
- MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader) return;
-
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
// If not, try unfolding a hoistable load.
MI = ExtractHoistableLoad(MI);
- if (!MI) return;
+ if (!MI) return false;
}
// Now move the instructions to the predecessor, inserting it before any
@@ -791,13 +1255,16 @@
// Otherwise, splice the instruction to the preheader.
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+ // Update register pressure for BBs from header to this block.
+ UpdateBackTraceRegPressure(MI);
+
// Clear the kill flags of any register this instruction defines,
// since they may need to be live throughout the entire loop
// rather than just live for part of it.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef() && !MO.isDead())
- RegInfo->clearKillFlags(MO.getReg());
+ MRI->clearKillFlags(MO.getReg());
}
// Add to the CSE map.
@@ -812,6 +1279,8 @@
++NumHoisted;
Changed = true;
+
+ return true;
}
MachineBasicBlock *MachineLICM::getCurPreheader() {
diff --git a/src/LLVM/lib/CodeGen/MachineLoopInfo.cpp b/src/LLVM/lib/CodeGen/MachineLoopInfo.cpp
index 039edf2..189cb2b 100644
--- a/src/LLVM/lib/CodeGen/MachineLoopInfo.cpp
+++ b/src/LLVM/lib/CodeGen/MachineLoopInfo.cpp
@@ -30,8 +30,11 @@
}
char MachineLoopInfo::ID = 0;
-static RegisterPass<MachineLoopInfo>
-X("machine-loops", "Machine Natural Loop Construction", true);
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
diff --git a/src/LLVM/lib/CodeGen/MachineLoopRanges.cpp b/src/LLVM/lib/CodeGen/MachineLoopRanges.cpp
new file mode 100644
index 0000000..17fe67f
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/MachineLoopRanges.cpp
@@ -0,0 +1,116 @@
+//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MachineLoopRanges analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+char MachineLoopRanges::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
+ "Machine Loop Ranges", true, true)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
+ "Machine Loop Ranges", true, true)
+
+char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
+
+void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<SlotIndexes>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
+bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ Indexes = &getAnalysis<SlotIndexes>();
+ return false;
+}
+
+void MachineLoopRanges::releaseMemory() {
+ DeleteContainerSeconds(Cache);
+ Cache.clear();
+}
+
+MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
+ MachineLoopRange *&Range = Cache[Loop];
+ if (!Range)
+ Range = new MachineLoopRange(Loop, Allocator, *Indexes);
+ return Range;
+}
+
+/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
+ MachineLoopRange::Allocator &alloc,
+ SlotIndexes &Indexes)
+ : Loop(loop), Intervals(alloc), Area(0) {
+ // Compute loop coverage.
+ for (MachineLoop::block_iterator I = Loop->block_begin(),
+ E = Loop->block_end(); I != E; ++I) {
+ const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
+ Intervals.insert(Range.first, Range.second, 1u);
+ Area += Range.first.distance(Range.second);
+ }
+}
+
+/// overlaps - Return true if this loop overlaps the given range of machine
+/// instructions.
+bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
+ Map::const_iterator I = Intervals.find(Start);
+ return I.valid() && Stop > I.start();
+}
+
+unsigned MachineLoopRange::getNumber() const {
+ return Loop->getHeader()->getNumber();
+}
+
+/// byNumber - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by number.
+int MachineLoopRange::byNumber(const void *pa, const void *pb) {
+ const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+ const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+ unsigned na = a->getNumber();
+ unsigned nb = b->getNumber();
+ if (na < nb)
+ return -1;
+ if (na > nb)
+ return 1;
+ return 0;
+}
+
+/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by:
+/// 1. Descending area.
+/// 2. Ascending number.
+int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
+ const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+ const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+ if (a->getArea() != b->getArea())
+ return a->getArea() > b->getArea() ? -1 : 1;
+ return byNumber(pa, pb);
+}
+
+void MachineLoopRange::print(raw_ostream &OS) const {
+ OS << "Loop#" << getNumber() << " =";
+ for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
+ OS << " [" << I.start() << ';' << I.stop() << ')';
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
+ MLR.print(OS);
+ return OS;
+}
diff --git a/src/LLVM/lib/CodeGen/MachineModuleInfo.cpp b/src/LLVM/lib/CodeGen/MachineModuleInfo.cpp
index 2d510b9..80c4854 100644
--- a/src/LLVM/lib/CodeGen/MachineModuleInfo.cpp
+++ b/src/LLVM/lib/CodeGen/MachineModuleInfo.cpp
@@ -17,9 +17,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/Support/Dwarf.h"
@@ -29,7 +27,7 @@
// Handle the Pass registration stuff necessary to use TargetData's.
INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
- "Machine Module Information", false, false);
+ "Machine Module Information", false, false)
char MachineModuleInfo::ID = 0;
// Out of line virtual method.
@@ -41,30 +39,30 @@
public:
MMIAddrLabelMapCallbackPtr() : Map(0) {}
MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
-
+
void setPtr(BasicBlock *BB) {
ValueHandleBase::operator=(BB);
}
-
+
void setMap(MMIAddrLabelMap *map) { Map = map; }
-
+
virtual void deleted();
virtual void allUsesReplacedWith(Value *V2);
};
-
+
class MMIAddrLabelMap {
MCContext &Context;
struct AddrLabelSymEntry {
/// Symbols - The symbols for the label. This is a pointer union that is
/// either one symbol (the common case) or a list of symbols.
PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
-
+
Function *Fn; // The containing function of the BasicBlock.
unsigned Index; // The index in BBCallbacks for the BasicBlock.
};
-
+
DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
-
+
/// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We
/// use this so we get notified if a block is deleted or RAUWd.
std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
@@ -76,23 +74,23 @@
DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
DeletedAddrLabelsNeedingEmission;
public:
-
+
MMIAddrLabelMap(MCContext &context) : Context(context) {}
~MMIAddrLabelMap() {
assert(DeletedAddrLabelsNeedingEmission.empty() &&
"Some labels for deleted blocks never got emitted");
-
+
// Deallocate any of the 'list of symbols' case.
for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
}
-
+
MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
- void takeDeletedSymbolsForFunction(Function *F,
+ void takeDeletedSymbolsForFunction(Function *F,
std::vector<MCSymbol*> &Result);
void UpdateForDeletedBlock(BasicBlock *BB);
@@ -104,7 +102,7 @@
assert(BB->hasAddressTaken() &&
"Shouldn't get label for block without address taken");
AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-
+
// If we already had an entry for this block, just return it.
if (!Entry.Symbols.isNull()) {
assert(BB->getParent() == Entry.Fn && "Parent changed");
@@ -112,7 +110,7 @@
return Entry.Symbols.get<MCSymbol*>();
return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
}
-
+
// Otherwise, this is a new entry, create a new symbol for it and add an
// entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
BBCallbacks.push_back(BB);
@@ -129,9 +127,9 @@
assert(BB->hasAddressTaken() &&
"Shouldn't get label for block without address taken");
AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-
+
std::vector<MCSymbol*> Result;
-
+
// If we already had an entry for this block, just return it.
if (Entry.Symbols.isNull())
Result.push_back(getAddrLabelSymbol(BB));
@@ -152,7 +150,7 @@
// If there are no entries for the function, just return.
if (I == DeletedAddrLabelsNeedingEmission.end()) return;
-
+
// Otherwise, take the list.
std::swap(Result, I->second);
DeletedAddrLabelsNeedingEmission.erase(I);
@@ -175,7 +173,7 @@
if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
if (Sym->isDefined())
return;
-
+
// If the block is not yet defined, we need to emit it at the end of the
// function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
// for the containing Function. Since the block is being deleted, its
@@ -187,7 +185,7 @@
for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
MCSymbol *Sym = (*Syms)[i];
if (Sym->isDefined()) continue; // Ignore already emitted labels.
-
+
// If the block is not yet defined, we need to emit it at the end of the
// function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
// for the containing Function. Since the block is being deleted, its
@@ -195,7 +193,7 @@
// 'Entry'.
DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
}
-
+
// The entry is deleted, free the memory associated with the symbol list.
delete Syms;
}
@@ -225,7 +223,7 @@
SymList->push_back(PrevSym);
NewEntry.Symbols = SymList;
}
-
+
std::vector<MCSymbol*> *SymList =
NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
@@ -234,7 +232,7 @@
SymList->push_back(Sym);
return;
}
-
+
// Otherwise, concatenate the list.
std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
SymList->insert(SymList->end(), Syms->begin(), Syms->end());
@@ -253,16 +251,23 @@
//===----------------------------------------------------------------------===//
-MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
-: ImmutablePass(ID), Context(MAI),
- ObjFileMMI(0),
- CurCallSite(0), DbgInfoAvailable(false){
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+ const MCRegisterInfo &MRI,
+ const MCObjectFileInfo *MOFI)
+ : ImmutablePass(ID), Context(MAI, MRI, MOFI),
+ ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0),
+ CallsUnwindInit(0), DbgInfoAvailable(false),
+ CallsExternalVAFunctionWithFloatingPointArguments(false) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+ // Always emit some info, by default "no personality" info.
+ Personalities.push_back(NULL);
AddrLabelSymbols = 0;
TheModule = 0;
}
MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(ID), Context(*(MCAsmInfo*)0) {
+ : ImmutablePass(ID),
+ Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) {
assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
"should always be explicitly constructed by LLVMTargetMachine");
abort();
@@ -270,7 +275,7 @@
MachineModuleInfo::~MachineModuleInfo() {
delete ObjFileMMI;
-
+
// FIXME: Why isn't doFinalization being called??
//assert(AddrLabelSymbols == 0 && "doFinalization not called");
delete AddrLabelSymbols;
@@ -299,10 +304,14 @@
FrameMoves.clear();
// Clean up exception info.
+ LandingPads.clear();
CallSiteMap.clear();
TypeInfos.clear();
FilterIds.clear();
FilterEnds.clear();
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+ CompactUnwindEncoding = 0;
VariableDbgInfo.clear();
}
@@ -362,6 +371,142 @@
takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
}
+//===- EH -----------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+ (MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+ MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+ MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+ const Function *Personality) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.Personality = Personality;
+
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ if (Personalities[i] == Personality)
+ return;
+
+ // If this is the first personality we're adding go
+ // ahead and add it at the beginning.
+ if (Personalities[0] == NULL)
+ Personalities[0] = Personality;
+ else
+ Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::
+addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalVariable *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::
+addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalVariable *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter(TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.TypeIds.push_back(0);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ if (LandingPad.LandingPadLabel &&
+ !LandingPad.LandingPadLabel->isDefined() &&
+ (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+ LandingPad.LandingPadLabel = 0;
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "nounwind" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+ MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad.EndLabels[j];
+ if ((BeginLabel->isDefined() ||
+ (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+ (EndLabel->isDefined() ||
+ (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ --j, --e;
+ }
+
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ // If there is no landing pad, ensure that the list of typeids is empty.
+ // If the only typeid is a cleanup, this is the same as having no typeids.
+ if (!LandingPad.LandingPadBlock ||
+ (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+ LandingPad.TypeIds.clear();
+ ++i;
+ }
+}
+
+/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site
+/// indexes.
+void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
+ ArrayRef<unsigned> Sites) {
+ for (unsigned I = 0, E = Sites.size(); I != E; ++I)
+ LPadToCallSiteMap[Sym].push_back(Sites[I]);
+}
+
/// getTypeIDFor - Return the type id for the specified typeinfo. This is
/// function wide.
unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
@@ -403,19 +548,31 @@
return FilterID;
}
-namespace {
- /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map
- /// by source location, to avoid depending on the arbitrary order that
- /// instruction selection visits variables in.
- struct VariableDebugSorter {
- bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A,
- const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B)
- const {
- if (A.second.second.getLine() != B.second.second.getLine())
- return A.second.second.getLine() < B.second.second.getLine();
- if (A.second.second.getCol() != B.second.second.getCol())
- return A.second.second.getCol() < B.second.second.getCol();
- return false;
+/// getPersonality - Return the personality function for the current function.
+const Function *MachineModuleInfo::getPersonality() const {
+ // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+ // function
+ return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL/first personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+ const Function* Personality = NULL;
+
+ // Scan landing pads. If there is at least one non-NULL personality - use it.
+ for (unsigned i = 0; i != LandingPads.size(); ++i)
+ if (LandingPads[i].Personality) {
+ Personality = LandingPads[i].Personality;
+ break;
}
- };
+
+ for (unsigned i = 0; i < Personalities.size(); ++i) {
+ if (Personalities[i] == Personality)
+ return i;
+ }
+
+ // This will happen if the current personality function is
+ // in the zero index.
+ return 0;
}
diff --git a/src/LLVM/lib/CodeGen/MachineRegisterInfo.cpp b/src/LLVM/lib/CodeGen/MachineRegisterInfo.cpp
index 5d852f2..266ebf6 100644
--- a/src/LLVM/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/src/LLVM/lib/CodeGen/MachineRegisterInfo.cpp
@@ -14,13 +14,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
+ : TRI(&TRI), IsSSA(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
- RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()];
UsedPhysRegs.resize(TRI.getNumRegs());
// Create the physreg use/def lists.
@@ -30,34 +30,64 @@
MachineRegisterInfo::~MachineRegisterInfo() {
#ifndef NDEBUG
- for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i)
- assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?");
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+ "Vreg use list non-empty still?");
for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
assert(!PhysRegUseDefLists[i] &&
"PhysRegUseDefLists has entries after all instructions are deleted");
#endif
delete [] PhysRegUseDefLists;
- delete [] RegClass2VRegMap;
}
/// setRegClass - Set the register class of the specified virtual register.
///
void
MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
- unsigned VR = Reg;
- Reg -= TargetRegisterInfo::FirstVirtualRegister;
- assert(Reg < VRegInfo.size() && "Invalid vreg!");
- const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
VRegInfo[Reg].first = RC;
+}
- // Remove from old register class's vregs list. This may be slow but
- // fortunately this operation is rarely needed.
- std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
- std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR);
- VRegs.erase(I);
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+ const TargetRegisterClass *RC,
+ unsigned MinNumRegs) {
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ if (OldRC == RC)
+ return RC;
+ const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC);
+ if (!NewRC || NewRC == OldRC)
+ return NewRC;
+ if (NewRC->getNumRegs() < MinNumRegs)
+ return 0;
+ setRegClass(Reg, NewRC);
+ return NewRC;
+}
- // Add to new register class's vregs list.
- RegClass2VRegMap[RC->getID()].push_back(VR);
+bool
+MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
+
+ // Stop early if there is no room to grow.
+ if (NewRC == OldRC)
+ return false;
+
+ // Accumulate constraints from all uses.
+ for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
+ ++I) {
+ // TRI doesn't have accurate enough information to model this yet.
+ if (I.getOperand().getSubReg())
+ return false;
+ const TargetRegisterClass *OpRC =
+ I->getRegClassConstraint(I.getOperandNo(), TII, TRI);
+ if (OpRC)
+ NewRC = TRI->getCommonSubClass(NewRC, OpRC);
+ if (!NewRC || NewRC == OldRC)
+ return false;
+ }
+ setRegClass(Reg, NewRC);
+ return true;
}
/// createVirtualRegister - Create and return a new virtual register in the
@@ -66,17 +96,23 @@
unsigned
MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
assert(RegClass && "Cannot create register without RegClass!");
- // Add a reg, but keep track of whether the vector reallocated or not.
- void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
- VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
- RegAllocHints.push_back(std::make_pair(0, 0));
+ assert(RegClass->isAllocatable() &&
+ "Virtual register RegClass must be allocatable.");
- if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
+ // New virtual register number.
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+
+ // Add a reg, but keep track of whether the vector reallocated or not.
+ const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
+ void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
+ VRegInfo.grow(Reg);
+ VRegInfo[Reg].first = RegClass;
+ RegAllocHints.grow(Reg);
+
+ if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
// The vector reallocated, handle this now.
HandleVRegListReallocation();
- unsigned VR = getLastVirtReg();
- RegClass2VRegMap[RegClass->getID()].push_back(VR);
- return VR;
+ return Reg;
}
/// HandleVRegListReallocation - We just added a virtual register to the
@@ -85,11 +121,12 @@
void MachineRegisterInfo::HandleVRegListReallocation() {
// The back pointers for the vreg lists point into the previous vector.
// Update them to point to their correct slots.
- for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) {
- MachineOperand *List = VRegInfo[i].second;
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ MachineOperand *List = VRegInfo[Reg].second;
if (!List) continue;
// Update the back-pointer to be accurate once more.
- List->Contents.Reg.Prev = &VRegInfo[i].second;
+ List->Contents.Reg.Prev = &VRegInfo[Reg].second;
}
}
@@ -112,8 +149,6 @@
/// register or null if none is found. This assumes that the code is in SSA
/// form, so there should only be one definition.
MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
- assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
- "Invalid vreg!");
// Since we are in SSA form, we can use the first definition.
if (!def_empty(Reg))
return &*def_begin(Reg);
diff --git a/src/LLVM/lib/CodeGen/MachineSink.cpp b/src/LLVM/lib/CodeGen/MachineSink.cpp
index b6a9bc4..29cfb49 100644
--- a/src/LLVM/lib/CodeGen/MachineSink.cpp
+++ b/src/LLVM/lib/CodeGen/MachineSink.cpp
@@ -25,26 +25,41 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-STATISTIC(NumSunk, "Number of machine instructions sunk");
+static cl::opt<bool>
+SplitEdges("machine-sink-split",
+ cl::desc("Split critical edges during machine sinking"),
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
namespace {
class MachineSinking : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- MachineRegisterInfo *RegInfo; // Machine register information
+ MachineRegisterInfo *MRI; // Machine register information
MachineDominatorTree *DT; // Machine dominator tree
MachineLoopInfo *LI;
AliasAnalysis *AA;
BitVector AllocatableSet; // Which physregs are allocatable?
+ // Remember which edges have been considered for breaking.
+ SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+ CEBCandidates;
+
public:
static char ID; // Pass identification
- MachineSinking() : MachineFunctionPass(ID) {}
+ MachineSinking() : MachineFunctionPass(ID) {
+ initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -57,40 +72,132 @@
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
}
+
+ virtual void releaseMemory() {
+ CEBCandidates.clear();
+ }
+
private:
bool ProcessBlock(MachineBasicBlock &MBB);
+ bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+ MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
bool SinkInstruction(MachineInstr *MI, bool &SawStore);
- bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const;
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge, bool &LocalUse) const;
+ bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB);
};
} // end anonymous namespace
char MachineSinking::ID = 0;
-INITIALIZE_PASS(MachineSinking, "machine-sink",
- "Machine code sinking", false, false);
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (!MI->isCopy())
+ return false;
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+ !MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+ if (SRC != DRC)
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isCopyLike())
+ return false;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MRI->replaceRegWith(DstReg, SrcReg);
+ MI->eraseFromParent();
+ ++NumCoalesces;
+ return true;
+}
+
/// AllUsesDominatedByBlock - Return true if all uses of the specified register
-/// occur in blocks dominated by the specified block.
-bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
- MachineBasicBlock *MBB) const {
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
+
+ if (MRI->use_nodbg_empty(Reg))
+ return true;
+
// Ignoring debug uses is necessary so debug info doesn't affect the code.
// This may leave a referencing dbg_value in the original block, before
// the definition of the vreg. Dwarf generator handles this although the
// user might not get the right info at runtime.
+
+ // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+ // into and they are all PHI nodes. In this case, machine-sink must break
+ // the critical edge first. e.g.
+ //
+ // BB#1: derived from LLVM BB %bb4.preheader
+ // Predecessors according to CFG: BB#0
+ // ...
+ // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+ // ...
+ // JE_4 <BB#37>, %EFLAGS<imp-use>
+ // Successors according to CFG: BB#37 BB#2
+ //
+ // BB#2: derived from LLVM BB %bb.nph
+ // Predecessors according to CFG: BB#0 BB#1
+ // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+ BreakPHIEdge = true;
for (MachineRegisterInfo::use_nodbg_iterator
- I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end();
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (!(UseBlock == MBB && UseInst->isPHI() &&
+ UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+ BreakPHIEdge = false;
+ break;
+ }
+ }
+ if (BreakPHIEdge)
+ return true;
+
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
I != E; ++I) {
// Determine the block of the use.
MachineInstr *UseInst = &*I;
MachineBasicBlock *UseBlock = UseInst->getParent();
-
if (UseInst->isPHI()) {
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+ } else if (UseBlock == DefMBB) {
+ LocalUse = true;
+ return false;
}
// Check that it dominates.
@@ -107,7 +214,7 @@
const TargetMachine &TM = MF.getTarget();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
- RegInfo = &MF.getRegInfo();
+ MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
@@ -119,6 +226,7 @@
bool MadeChange = false;
// Process all basic blocks.
+ CEBCandidates.clear();
for (MachineFunction::iterator I = MF.begin(), E = MF.end();
I != E; ++I)
MadeChange |= ProcessBlock(*I);
@@ -157,6 +265,12 @@
if (MI->isDebugValue())
continue;
+ bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
+ if (Joined) {
+ MadeChange = true;
+ continue;
+ }
+
if (SinkInstruction(MI, SawStore))
++NumSunk, MadeChange = true;
@@ -166,9 +280,135 @@
return MadeChange;
}
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+ // FIXME: Need much better heuristics.
+
+ // If the pass has already considered breaking this edge (during this pass
+ // through the function), then let's go ahead and break it. This means
+ // sinking multiple "cheap" instructions into the same block.
+ if (!CEBCandidates.insert(std::make_pair(From, To)))
+ return true;
+
+ if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+ return true;
+
+ // MI is cheap, we probably don't want to break the critical edge for it.
+ // However, if this would allow some definitions of its source operands
+ // to be sunk then it's probably worth it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MRI->hasOneNonDBGUse(Reg))
+ return true;
+ }
+
+ return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+ return 0;
+
+ // Avoid breaking back edge. From == To means backedge for single BB loop.
+ if (!SplitEdges || FromBB == ToBB)
+ return 0;
+
+ // Check for backedges of more "complex" loops.
+ if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+ LI->isLoopHeader(ToBB))
+ return 0;
+
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // BB#1:
+ // v1024
+ // Beq BB#3
+ // <fallthrough>
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ //
+ // BB#1:
+ // ...
+ // Bne BB#2
+ // BB#4:
+ // v1024 =
+ // B BB#3
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ //
+ // There is no need to do this check if all the uses are PHI nodes. PHI
+ // sources are only defined on the specific predecessor edges.
+ if (!BreakPHIEdge) {
+ for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
+ E = ToBB->pred_end(); PI != E; ++PI) {
+ if (*PI == FromBB)
+ continue;
+ if (!DT->dominates(ToBB, *PI))
+ return 0;
+ }
+ }
+
+ return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+ return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
+}
+
+/// collectDebgValues - Scan instructions following MI and collect any
+/// matching DBG_VALUEs.
+static void collectDebugValues(MachineInstr *MI,
+ SmallVector<MachineInstr *, 2> & DbgValues) {
+ DbgValues.clear();
+ if (!MI->getOperand(0).isReg())
+ return;
+
+ MachineBasicBlock::iterator DI = MI; ++DI;
+ for (MachineBasicBlock::iterator DE = MI->getParent()->end();
+ DI != DE; ++DI) {
+ if (!DI->isDebugValue())
+ return;
+ if (DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg() == MI->getOperand(0).getReg())
+ DbgValues.push_back(DI);
+ }
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+ // be close to the source to make it easier to coalesce.
+ if (AvoidsSinking(MI, MRI))
+ return false;
+
// Check if it's safe to move the instruction.
if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
@@ -189,6 +429,7 @@
// decide.
MachineBasicBlock *SuccToSinkTo = 0;
+ bool BreakPHIEdge = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
@@ -201,7 +442,7 @@
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!RegInfo->def_empty(Reg))
+ if (!MRI->def_empty(Reg))
return false;
if (AllocatableSet.test(Reg))
@@ -210,7 +451,7 @@
// Check for a def among the register's aliases too.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
- if (!RegInfo->def_empty(AliasReg))
+ if (!MRI->def_empty(AliasReg))
return false;
if (AllocatableSet.test(AliasReg))
@@ -225,7 +466,7 @@
if (MO.isUse()) continue;
// If it's not safe to move defs of the register class, then abort.
- if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
+ if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
return false;
// FIXME: This picks a successor to sink into based on having one
@@ -246,7 +487,9 @@
if (SuccToSinkTo) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+ BreakPHIEdge, LocalUse))
return false;
continue;
@@ -256,10 +499,15 @@
// we should sink to.
for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
E = ParentBlock->succ_end(); SI != E; ++SI) {
- if (AllUsesDominatedByBlock(Reg, *SI)) {
+ bool LocalUse = false;
+ if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+ BreakPHIEdge, LocalUse)) {
SuccToSinkTo = *SI;
break;
}
+ if (LocalUse)
+ // Def is used locally, it's never safe to move this def.
+ return false;
}
// If we couldn't find a block to sink to, ignore this instruction.
@@ -272,6 +520,11 @@
if (SuccToSinkTo == 0)
return false;
+ // It's not safe to sink instructions to EH landing pad. Control flow into
+ // landing pad is implicitly defined.
+ if (SuccToSinkTo->isLandingPad())
+ return false;
+
// It is not possible to sink an instruction into its own block. This can
// happen with loops.
if (MI->getParent() == SuccToSinkTo)
@@ -294,31 +547,69 @@
// If the block has multiple predecessors, this would introduce computation on
// a path that it doesn't already exist. We could split the critical edge,
// but for now we just punt.
- // FIXME: Split critical edges if not backedges.
if (SuccToSinkTo->pred_size() > 1) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
+ bool TryBreak = false;
bool store = true;
if (!MI->isSafeToMove(TII, AA, store)) {
- DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
- return false;
+ DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+ TryBreak = true;
}
// We don't want to sink across a critical edge if we don't dominate the
// successor. We could be introducing calculations to new code paths.
- if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
- DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
- return false;
+ if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+ TryBreak = true;
}
// Don't sink instructions into a loop.
- if (LI->isLoopHeader(SuccToSinkTo)) {
- DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
- return false;
+ if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ TryBreak = true;
}
// Otherwise we are OK with sinking along a critical edge.
- DEBUG(dbgs() << "Sinking along critical edge.\n");
+ if (!TryBreak)
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ else {
+ MachineBasicBlock *NewSucc =
+ SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ } else {
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ BreakPHIEdge = false;
+ }
+ }
+ }
+
+ if (BreakPHIEdge) {
+ // BreakPHIEdge is true if all the uses are in the successor MBB being
+ // sunken into and they are all PHI nodes. In this case, machine-sink must
+ // break the critical edge first.
+ MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
}
// Determine where to insert into. Skip phi nodes.
@@ -326,10 +617,22 @@
while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
++InsertPos;
+ // collect matching debug values.
+ SmallVector<MachineInstr *, 2> DbgValuesToSink;
+ collectDebugValues(MI, DbgValuesToSink);
+
// Move the instruction.
SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
++MachineBasicBlock::iterator(MI));
+ // Move debug values.
+ for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(),
+ DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
+ MachineInstr *DbgMI = *DBI;
+ SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI,
+ ++MachineBasicBlock::iterator(DbgMI));
+ }
+
// Conservatively, clear any kill flags, since it's possible that they are no
// longer correct.
MI->clearKillInfo();
diff --git a/src/LLVM/lib/CodeGen/MachineVerifier.cpp b/src/LLVM/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 0000000..26847d3
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,1242 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct MachineVerifier {
+
+ MachineVerifier(Pass *pass, const char *b) :
+ PASS(pass),
+ Banner(b),
+ OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+ {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ Pass *const PASS;
+ const char *Banner;
+ const char *const OutFileName;
+ raw_ostream *OS;
+ const MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+ unsigned foundErrors;
+
+ typedef SmallVector<unsigned, 16> RegVector;
+ typedef DenseSet<unsigned> RegSet;
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+ const MachineInstr *FirstTerminator;
+
+ BitVector regsReserved;
+ RegSet regsLive;
+ RegVector regsDefined, regsDead, regsKilled;
+ RegSet regsLiveInButUnused;
+
+ SlotIndex lastIndex;
+
+ // Add Reg and any sub-registers to RV
+ void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ RV.push_back(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+ RV.push_back(*R);
+ }
+
+ struct BBInfo {
+ // Is this MBB reachable from the MF entry point?
+ bool reachable;
+
+ // Vregs that must be live in because they are used without being
+ // defined. Map value is the user.
+ RegMap vregsLiveIn;
+
+ // Regs killed in MBB. They may be defined again, and will then be in both
+ // regsKilled and regsLiveOut.
+ RegSet regsKilled;
+
+ // Regs defined in MBB and live out. Note that vregs passing through may
+ // be live out without being mentioned here.
+ RegSet regsLiveOut;
+
+ // Vregs that pass through MBB untouched. This set is disjoint from
+ // regsKilled and regsLiveOut.
+ RegSet vregsPassed;
+
+ // Vregs that must pass through MBB because they are needed by a successor
+ // block. This set is disjoint from regsLiveOut.
+ RegSet vregsRequired;
+
+ BBInfo() : reachable(false) {}
+
+ // Add register to vregsPassed if it belongs there. Return true if
+ // anything changed.
+ bool addPassed(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+ return false;
+ return vregsPassed.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addPassed(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addPassed(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Add register to vregsRequired if it belongs there. Return true if
+ // anything changed.
+ bool addRequired(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsLiveOut.count(Reg))
+ return false;
+ return vregsRequired.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addRequired(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addRequired(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Same for a full map.
+ bool addRequired(const RegMap &RM) {
+ bool changed = false;
+ for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
+ if (addRequired(I->first))
+ changed = true;
+ return changed;
+ }
+
+ // Live-out registers are either in regsLiveOut or vregsPassed.
+ bool isLiveOut(unsigned Reg) const {
+ return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+ }
+ };
+
+ // Extra register info per MBB.
+ DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+ bool isReserved(unsigned Reg) {
+ return Reg < regsReserved.size() && regsReserved.test(Reg);
+ }
+
+ // Analysis information if available
+ LiveVariables *LiveVars;
+ LiveIntervals *LiveInts;
+ LiveStacks *LiveStks;
+ SlotIndexes *Indexes;
+
+ void visitMachineFunctionBefore();
+ void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineInstrBefore(const MachineInstr *MI);
+ void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+ void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+ void visitMachineFunctionAfter();
+
+ void report(const char *msg, const MachineFunction *MF);
+ void report(const char *msg, const MachineBasicBlock *MBB);
+ void report(const char *msg, const MachineInstr *MI);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+
+ void markReachable(const MachineBasicBlock *MBB);
+ void calcRegsPassed();
+ void checkPHIOps(const MachineBasicBlock *MBB);
+
+ void calcRegsRequired();
+ void verifyLiveVariables();
+ void verifyLiveIntervals();
+ };
+
+ struct MachineVerifierPass : public MachineFunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+ const char *const Banner;
+
+ MachineVerifierPass(const char *b = 0)
+ : MachineFunctionPass(ID), Banner(b) {
+ initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ MF.verify(this, Banner);
+ return false;
+ }
+ };
+
+}
+
+char MachineVerifierPass::ID = 0;
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+ "Verify generated machine code", false, false)
+
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+ return new MachineVerifierPass(Banner);
+}
+
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+ MachineVerifier(p, Banner)
+ .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
+bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+ raw_ostream *OutFile = 0;
+ if (OutFileName) {
+ std::string ErrorInfo;
+ OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+ raw_fd_ostream::F_Append);
+ if (!ErrorInfo.empty()) {
+ errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+ exit(1);
+ }
+
+ OS = OutFile;
+ } else {
+ OS = &errs();
+ }
+
+ foundErrors = 0;
+
+ this->MF = &MF;
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ TRI = TM->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ LiveVars = NULL;
+ LiveInts = NULL;
+ LiveStks = NULL;
+ Indexes = NULL;
+ if (PASS) {
+ LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+ // We don't want to verify LiveVariables if LiveIntervals is available.
+ if (!LiveInts)
+ LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+ Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
+ }
+
+ visitMachineFunctionBefore();
+ for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+ MFI!=MFE; ++MFI) {
+ visitMachineBasicBlockBefore(MFI);
+ for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
+ MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ if (MBBI->getParent() != MFI) {
+ report("Bad instruction parent pointer", MFI);
+ *OS << "Instruction: " << *MBBI;
+ continue;
+ }
+ visitMachineInstrBefore(MBBI);
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
+ visitMachineOperand(&MBBI->getOperand(I), I);
+ visitMachineInstrAfter(MBBI);
+ }
+ visitMachineBasicBlockAfter(MFI);
+ }
+ visitMachineFunctionAfter();
+
+ if (OutFile)
+ delete OutFile;
+ else if (foundErrors)
+ report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
+
+ // Clean up.
+ regsLive.clear();
+ regsDefined.clear();
+ regsDead.clear();
+ regsKilled.clear();
+ regsLiveInButUnused.clear();
+ MBBInfoMap.clear();
+
+ return false; // no changes
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
+ assert(MF);
+ *OS << '\n';
+ if (!foundErrors++) {
+ if (Banner)
+ *OS << "# " << Banner << '\n';
+ MF->print(*OS, Indexes);
+ }
+ *OS << "*** Bad machine code: " << msg << " ***\n"
+ << "- function: " << MF->getFunction()->getNameStr() << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
+ assert(MBB);
+ report(msg, MBB->getParent());
+ *OS << "- basic block: " << MBB->getName()
+ << " " << (void*)MBB
+ << " (BB#" << MBB->getNumber() << ")";
+ if (Indexes)
+ *OS << " [" << Indexes->getMBBStartIdx(MBB)
+ << ';' << Indexes->getMBBEndIdx(MBB) << ')';
+ *OS << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
+ assert(MI);
+ report(msg, MI->getParent());
+ *OS << "- instruction: ";
+ if (Indexes && Indexes->hasIndex(MI))
+ *OS << Indexes->getInstructionIndex(MI) << '\t';
+ MI->print(*OS, TM);
+}
+
+void MachineVerifier::report(const char *msg,
+ const MachineOperand *MO, unsigned MONum) {
+ assert(MO);
+ report(msg, MO->getParent());
+ *OS << "- operand " << MONum << ": ";
+ MO->print(*OS, TM);
+ *OS << "\n";
+}
+
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ if (!MInfo.reachable) {
+ MInfo.reachable = true;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+ markReachable(*SuI);
+ }
+}
+
+void MachineVerifier::visitMachineFunctionBefore() {
+ lastIndex = SlotIndex();
+ regsReserved = TRI->getReservedRegs(*MF);
+
+ // A sub-register of a reserved register is also reserved
+ for (int Reg = regsReserved.find_first(); Reg>=0;
+ Reg = regsReserved.find_next(Reg)) {
+ for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+ // FIXME: This should probably be:
+ // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
+ regsReserved.set(*Sub);
+ }
+ }
+ markReachable(&MF->front());
+}
+
+// Does iterator point to a and b as the first two elements?
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+ const MachineBasicBlock *a, const MachineBasicBlock *b) {
+ if (*i == a)
+ return *++i == b;
+ if (*i == b)
+ return *++i == a;
+ return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+ FirstTerminator = 0;
+
+ // Count the number of landing pad successors.
+ SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ if ((*I)->isLandingPad())
+ LandingPadSuccs.insert(*I);
+ }
+
+ const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
+ const BasicBlock *BB = MBB->getBasicBlock();
+ if (LandingPadSuccs.size() > 1 &&
+ !(AsmInfo &&
+ AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
+ BB && isa<SwitchInst>(BB->getTerminator())))
+ report("MBB has more than one landing pad successor", MBB);
+
+ // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
+ TBB, FBB, Cond)) {
+ // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+ // check whether its answers match up with reality.
+ if (!TBB && !FBB) {
+ // Block falls through to its successor.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actually fall
+ // out the bottom of the function.
+ } else if (MBB->succ_size() == LandingPadSuccs.size()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actuall fall
+ // out of the block.
+ } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ report("MBB exits via unconditional fall-through but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(MBBI)) {
+ report("MBB exits via unconditional fall-through but its successor "
+ "differs from its CFG successor!", MBB);
+ }
+ if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+ !TII->isPredicated(&MBB->back())) {
+ report("MBB exits via unconditional fall-through but ends with a "
+ "barrier instruction!", MBB);
+ }
+ if (!Cond.empty()) {
+ report("MBB exits via unconditional fall-through but has a condition!",
+ MBB);
+ }
+ } else if (TBB && !FBB && Cond.empty()) {
+ // Block unconditionally branches somewhere.
+ if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ report("MBB exits via unconditional branch but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(TBB)) {
+ report("MBB exits via unconditional branch but the CFG "
+ "successor doesn't match the actual successor!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via unconditional branch but doesn't contain "
+ "any instructions!", MBB);
+ } else if (!MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via unconditional branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().getDesc().isTerminator()) {
+ report("MBB exits via unconditional branch but the branch isn't a "
+ "terminator instruction!", MBB);
+ }
+ } else if (TBB && !FBB && !Cond.empty()) {
+ // Block conditionally branches somewhere, otherwise falls through.
+ MachineFunction::const_iterator MBBI = MBB;
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ report("MBB conditionally falls through out of function!", MBB);
+ } if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/fall-through but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/fall-through but doesn't "
+ "contain any instructions!", MBB);
+ } else if (MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via conditional branch/fall-through but ends with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().getDesc().isTerminator()) {
+ report("MBB exits via conditional branch/fall-through but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ } else if (TBB && FBB) {
+ // Block conditionally branches somewhere, otherwise branches
+ // somewhere else.
+ if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/branch but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
+ report("MBB exits via conditional branch/branch but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/branch but doesn't "
+ "contain any instructions!", MBB);
+ } else if (!MBB->back().getDesc().isBarrier()) {
+ report("MBB exits via conditional branch/branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().getDesc().isTerminator()) {
+ report("MBB exits via conditional branch/branch but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ if (Cond.empty()) {
+ report("MBB exits via conditinal branch/branch but there's no "
+ "condition!", MBB);
+ }
+ } else {
+ report("AnalyzeBranch returned invalid data!", MBB);
+ }
+ }
+
+ regsLive.clear();
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ regsLive.insert(*I);
+ for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+ regsLive.insert(*R);
+ }
+ regsLiveInButUnused = regsLive;
+
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ assert(MFI && "Function has no frame info");
+ BitVector PR = MFI->getPristineRegs(MBB);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+ regsLive.insert(I);
+ for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+ regsLive.insert(*R);
+ }
+
+ regsKilled.clear();
+ regsDefined.clear();
+
+ if (Indexes)
+ lastIndex = Indexes->getMBBStartIdx(MBB);
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->getNumOperands() < MCID.getNumOperands()) {
+ report("Too few operands", MI);
+ *OS << MCID.getNumOperands() << " operands expected, but "
+ << MI->getNumExplicitOperands() << " given.\n";
+ }
+
+ // Check the MachineMemOperands for basic consistency.
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ if ((*I)->isLoad() && !MCID.mayLoad())
+ report("Missing mayLoad flag", MI);
+ if ((*I)->isStore() && !MCID.mayStore())
+ report("Missing mayStore flag", MI);
+ }
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(MI);
+ if (MI->isDebugValue()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
+ // Ensure non-terminators don't follow terminators.
+ if (MCID.isTerminator()) {
+ if (!FirstTerminator)
+ FirstTerminator = MI;
+ } else if (FirstTerminator) {
+ report("Non-terminator instruction after the first terminator", MI);
+ *OS << "First terminator was:\t" << *FirstTerminator;
+ }
+
+ StringRef ErrorInfo;
+ if (!TII->verifyInstruction(MI, ErrorInfo))
+ report(ErrorInfo.data(), MI);
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const MCInstrDesc &MCID = MI->getDesc();
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+
+ // The first MCID.NumDefs operands must be explicit register defines
+ if (MONum < MCID.getNumDefs()) {
+ if (!MO->isReg())
+ report("Explicit definition must be a register", MO, MONum);
+ else if (!MO->isDef())
+ report("Explicit definition marked as use", MO, MONum);
+ else if (MO->isImplicit())
+ report("Explicit definition marked as implicit", MO, MONum);
+ } else if (MONum < MCID.getNumOperands()) {
+ // Don't check if it's the last operand in a variadic instruction. See,
+ // e.g., LDM_RET in the arm back end.
+ if (MO->isReg() &&
+ !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+ if (MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit operand marked as def", MO, MONum);
+ if (MO->isImplicit())
+ report("Explicit operand marked as implicit", MO, MONum);
+ }
+ } else {
+ // ARM adds %reg0 operands to indicate predicates. We'll allow that.
+ if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
+ report("Extra explicit operand on non-variadic instruction", MO, MONum);
+ }
+
+ switch (MO->getType()) {
+ case MachineOperand::MO_Register: {
+ const unsigned Reg = MO->getReg();
+ if (!Reg)
+ return;
+
+ // Check Live Variables.
+ if (MI->isDebugValue()) {
+ // Liveness checks are not valid for debug values.
+ } else if (MO->isUse() && !MO->isUndef()) {
+ regsLiveInButUnused.erase(Reg);
+
+ bool isKill = false;
+ unsigned defIdx;
+ if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+ // A two-addr use counts as a kill if use and def are the same.
+ unsigned DefReg = MI->getOperand(defIdx).getReg();
+ if (Reg == DefReg)
+ isKill = true;
+ else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ report("Two-address instruction operands must be identical",
+ MO, MONum);
+ }
+ } else
+ isKill = MO->isKill();
+
+ if (isKill)
+ addRegWithSubRegs(regsKilled, Reg);
+
+ // Check that LiveVars knows this kill.
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MO->isKill()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(),
+ VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (!LI.liveAt(UseIdx)) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+
+ // Use of a dead register.
+ if (!regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ if (!isReserved(Reg))
+ report("Using an undefined physical register", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (!MI->isPHI())
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ } else if (MO->isDef()) {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ // Verify SSA form.
+ if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
+ report("Multiple virtual register defs in SSA form", MO, MONum);
+
+ // Check LiveInts for a live range, but only for virtual registers.
+ if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+ report("Inconsistent valno->def", MO, MONum);
+ *OS << "Valno " << VNI->id << " is not defined at "
+ << DefIdx << " in " << LI << '\n';
+ }
+ } else {
+ report("No live range at def", MO, MONum);
+ *OS << DefIdx << " is not live in " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+ }
+
+ // Check register classes.
+ if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
+ unsigned SubIdx = MO->getSubReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (SubIdx) {
+ report("Illegal subregister index for physical register", MO, MONum);
+ return;
+ }
+ if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+ if (!DRC->contains(Reg)) {
+ report("Illegal physical register for instruction", MO, MONum);
+ *OS << TRI->getName(Reg) << " is not a "
+ << DRC->getName() << " register.\n";
+ }
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (SubIdx) {
+ const TargetRegisterClass *SRC =
+ TRI->getSubClassWithSubReg(RC, SubIdx);
+ if (!SRC) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ *OS << "Register class " << RC->getName()
+ << " does not support subreg index " << SubIdx << "\n";
+ return;
+ }
+ if (RC != SRC) {
+ report("Invalid register class for subregister index", MO, MONum);
+ *OS << "Register class " << RC->getName()
+ << " does not fully support subreg index " << SubIdx << "\n";
+ return;
+ }
+ }
+ if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+ if (SubIdx) {
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(RC);
+ if (!SuperRC) {
+ report("No largest legal super class exists.", MO, MONum);
+ return;
+ }
+ DRC = TRI->getMatchingSuperRegClass(SuperRC, DRC, SubIdx);
+ if (!DRC) {
+ report("No matching super-reg register class.", MO, MONum);
+ return;
+ }
+ }
+ if (!RC->hasSuperClassEq(DRC)) {
+ report("Illegal virtual register for instruction", MO, MONum);
+ *OS << "Expected a " << DRC->getName() << " register, but got a "
+ << RC->getName() << " register\n";
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+ report("PHI operand is not in the CFG", MO, MONum);
+ break;
+
+ case MachineOperand::MO_FrameIndex:
+ if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+ SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+ if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+ report("Instruction loads from dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+ report("Instruction stores to dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ set_union(MInfo.regsKilled, regsKilled);
+ set_subtract(regsLive, regsKilled); regsKilled.clear();
+ set_subtract(regsLive, regsDead); regsDead.clear();
+ set_union(regsLive, regsDefined); regsDefined.clear();
+
+ if (Indexes && Indexes->hasIndex(MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ *OS << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
+ MBBInfoMap[MBB].regsLiveOut = regsLive;
+ regsLive.clear();
+
+ if (Indexes) {
+ SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+ if (!(stop > lastIndex)) {
+ report("Block ends before last instruction index", MBB);
+ *OS << "Block ends at " << stop
+ << " last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = stop;
+ }
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void MachineVerifier::calcRegsPassed() {
+ // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+ // have any vregsPassed.
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ if (!MInfo.reachable)
+ continue;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+ SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.regsLiveOut))
+ todo.insert(*SuI);
+ }
+ }
+
+ // Iteratively push vregsPassed to successors. This will converge to the same
+ // final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+ if (*SuI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.vregsPassed))
+ todo.insert(*SuI);
+ }
+ }
+}
+
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+ // First push live-in regs to predecessors' vregsRequired.
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
+ PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PInfo = MBBInfoMap[*PrI];
+ if (PInfo.addRequired(MInfo.vregsLiveIn))
+ todo.insert(*PrI);
+ }
+ }
+
+ // Iteratively push vregsRequired to predecessors. This will converge to the
+ // same final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (*PrI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*PrI];
+ if (SInfo.addRequired(MInfo.vregsRequired))
+ todo.insert(*PrI);
+ }
+ }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+ for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ DenseSet<const MachineBasicBlock*> seen;
+
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+ if (!Pre->isSuccessor(MBB))
+ continue;
+ seen.insert(Pre);
+ BBInfo &PrInfo = MBBInfoMap[Pre];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+ report("PHI operand is not live-out from predecessor",
+ &BBI->getOperand(i), i);
+ }
+
+ // Did we see all predecessors?
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (!seen.count(*PrI)) {
+ report("Missing PHI operand", BBI);
+ *OS << "BB#" << (*PrI)->getNumber()
+ << " is a predecessor according to the CFG.\n";
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineFunctionAfter() {
+ calcRegsPassed();
+
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ checkPHIOps(MFI);
+ }
+
+ // Now check liveness info if available
+ if (LiveVars || LiveInts)
+ calcRegsRequired();
+ if (LiveVars)
+ verifyLiveVariables();
+ if (LiveInts)
+ verifyLiveIntervals();
+}
+
+void MachineVerifier::verifyLiveVariables() {
+ assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+ if (MInfo.vregsRequired.count(Reg)) {
+ if (!VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block missing from AliveBlocks", MFI);
+ *OS << "Virtual register " << PrintReg(Reg)
+ << " must be live through the block.\n";
+ }
+ } else {
+ if (VI.AliveBlocks.test(MFI->getNumber())) {
+ report("LiveVariables: Block should not be in AliveBlocks", MFI);
+ *OS << "Virtual register " << PrintReg(Reg)
+ << " is not needed live through the block.\n";
+ }
+ }
+ }
+ }
+}
+
+void MachineVerifier::verifyLiveIntervals() {
+ assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+ for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
+ LVE = LiveInts->end(); LVI != LVE; ++LVI) {
+ const LiveInterval &LI = *LVI->second;
+
+ // Spilling and splitting may leave unused registers around. Skip them.
+ if (MRI->use_empty(LI.reg))
+ continue;
+
+ // Physical registers have much weirdness going on, mostly from coalescing.
+ // We should probably fix it, but for now just ignore them.
+ if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+ continue;
+
+ assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
+
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I) {
+ VNInfo *VNI = *I;
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+
+ if (!DefVNI) {
+ if (!VNI->isUnused()) {
+ report("Valno not live at def and not marked unused", MF);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+ continue;
+ }
+
+ if (VNI->isUnused())
+ continue;
+
+ if (DefVNI != VNI) {
+ report("Live range at def has different valno", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
+ continue;
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ continue;
+ }
+
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber()
+ << " in " << LI << '\n';
+ }
+ } else {
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ } else if (!MI->modifiesRegister(LI.reg, TRI)) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+
+ bool isEarlyClobber = false;
+ if (MI) {
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
+ MOI->isEarlyClobber()) {
+ isEarlyClobber = true;
+ break;
+ }
+ }
+ }
+
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isUse()) {
+ report("Early clobber def must be at a USE slot", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ }
+ } else if (!VNI->def.isDef()) {
+ report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ }
+ }
+ }
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
+
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live range", MF);
+ I->print(*OS);
+ *OS << " has a valno not in " << LI << '\n';
+ }
+
+ if (VNI->isUnused()) {
+ report("Live range valno is marked unused", MF);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ continue;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB);
+ I->print(*OS);
+ *OS << " in " << LI << '\n' << "Basic block starts at "
+ << MBBStartIdx << '\n';
+ }
+
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ continue;
+ }
+ if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
+ I->print(*OS);
+ *OS << " in " << LI << '\n' << "Basic block starts at "
+ << MBBStartIdx << '\n';
+ } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ !MI->readsVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ // FIXME: Should we check for each of these?
+ bool hasDeadDef = false;
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
+ hasDeadDef = true;
+ break;
+ }
+ }
+
+ if (!hasDeadDef) {
+ report("Instruction killing live segment neither defines nor reads "
+ "register", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ }
+ }
+
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ continue;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
+ const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+
+ if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI))
+ continue;
+
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+ << PEnd << " in " << LI << '\n';
+ continue;
+ }
+
+ if (PVNI != VNI) {
+ report("Different value live out of predecessor", *PI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
+ }
+ }
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+ }
+
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF);
+ *OS << NumComp << " components in " << LI << '\n';
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
+ }
+ }
+ }
+ }
+}
+
diff --git a/src/LLVM/lib/CodeGen/OcamlGC.cpp b/src/LLVM/lib/CodeGen/OcamlGC.cpp
new file mode 100644
index 0000000..48db200
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/OcamlGC.cpp
@@ -0,0 +1,37 @@
+//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics compatible with
+// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
+//
+// The frametable emitter is in OcamlGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+namespace {
+ class OcamlGC : public GCStrategy {
+ public:
+ OcamlGC();
+ };
+}
+
+static GCRegistry::Add<OcamlGC>
+X("ocaml", "ocaml 3.10-compatible GC");
+
+void llvm::linkOcamlGC() { }
+
+OcamlGC::OcamlGC() {
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+}
diff --git a/src/LLVM/lib/CodeGen/OptimizePHIs.cpp b/src/LLVM/lib/CodeGen/OptimizePHIs.cpp
index edb4eea..c05be13 100644
--- a/src/LLVM/lib/CodeGen/OptimizePHIs.cpp
+++ b/src/LLVM/lib/CodeGen/OptimizePHIs.cpp
@@ -33,7 +33,9 @@
public:
static char ID; // Pass identification
- OptimizePHIs() : MachineFunctionPass(ID) {}
+ OptimizePHIs() : MachineFunctionPass(ID) {
+ initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -55,7 +57,7 @@
char OptimizePHIs::ID = 0;
INITIALIZE_PASS(OptimizePHIs, "opt-phis",
- "Optimize machine instruction PHIs", false, false);
+ "Optimize machine instruction PHIs", false, false)
FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
diff --git a/src/LLVM/lib/CodeGen/PACKAGE.vcxproj b/src/LLVM/lib/CodeGen/PACKAGE.vcxproj
new file mode 100644
index 0000000..bd98632
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/PACKAGE.vcxproj
@@ -0,0 +1,277 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="MinSizeRel|Win32">
+ <Configuration>MinSizeRel</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="RelWithDebInfo|Win32">
+ <Configuration>RelWithDebInfo</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID>
+ <Keyword>Win32Proj</Keyword>
+ <Platform>Win32</Platform>
+ <ProjectName>PACKAGE</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\CodeGen;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\PACKAGE_force.rule">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeLists.txt">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\..\ALL_BUILD.vcxproj">
+ <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/PACKAGE.vcxproj.filters b/src/LLVM/lib/CodeGen/PACKAGE.vcxproj.filters
new file mode 100644
index 0000000..a570359
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\PACKAGE_force.rule">
+ <Filter>CMake Rules</Filter>
+ </CustomBuild>
+ <CustomBuild Include="CMakeLists.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <Filter Include="CMake Rules">
+ <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+</Project>
diff --git a/src/LLVM/lib/CodeGen/PHIElimination.cpp b/src/LLVM/lib/CodeGen/PHIElimination.cpp
index 147135e..6994aa5 100644
--- a/src/LLVM/lib/CodeGen/PHIElimination.cpp
+++ b/src/LLVM/lib/CodeGen/PHIElimination.cpp
@@ -14,12 +14,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "phielim"
-#include "PHIElimination.h"
+#include "PHIEliminationUtils.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Function.h"
@@ -27,38 +28,98 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include <algorithm>
-#include <map>
using namespace llvm;
+static cl::opt<bool>
+DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
+ cl::Hidden, cl::desc("Disable critical edge splitting "
+ "during PHI elimination"));
+
+namespace {
+ class PHIElimination : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PHIElimination() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ LiveVariables &LV, MachineLoopInfo *MLI);
+
+ typedef std::pair<unsigned, unsigned> BBVRegPair;
+ typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+ // Map reusable lowered PHI node -> incoming join register.
+ typedef DenseMap<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait> LoweredPHIMap;
+ LoweredPHIMap LoweredPHIs;
+ };
+}
+
STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
-static RegisterPass<PHIElimination>
-X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
+INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation", false, false)
-char &llvm::PHIEliminationID = PHIElimination::ID;
+char& llvm::PHIEliminationID = PHIElimination::ID;
-void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveVariables>();
AU.addPreserved<MachineDominatorTree>();
- // rdar://7401784 This would be nice:
- // AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
bool Changed = false;
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
// Split critical edges to help the coalescer
- if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>())
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- Changed |= SplitPHIEdges(MF, *I, *LV);
+ if (!DisableEdgeSplitting) {
+ if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
+ }
+ }
// Populate VRegPHIUseCount
analyzePHINodes(MF);
@@ -91,14 +152,14 @@
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
/// predecessor basic blocks.
///
-bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF,
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
MachineBasicBlock &MBB) {
if (MBB.empty() || !MBB.front().isPHI())
return false; // Quick exit for basic blocks without PHIs.
// Get an iterator to the first instruction after the last PHI node (this may
// also be the end of the basic block).
- MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin());
+ MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
while (MBB.front().isPHI())
LowerAtomicPHINode(MBB, AfterPHIsIt);
@@ -119,27 +180,14 @@
return true;
}
-// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
-// when following the CFG edge to SuccMBB. This needs to be after any def of
-// SrcReg, but before any subsequent point where control flow might jump out of
-// the basic block.
-MachineBasicBlock::iterator
-llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
- MachineBasicBlock &SuccMBB,
- unsigned SrcReg) {
- // Handle the trivial case trivially.
- if (MBB.empty())
- return MBB.begin();
- return MBB.getFirstTerminator();
-}
/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
/// under the assuption that it needs to be lowered in a way that supports
/// atomic execution of PHIs. This lowering method is always correct all of the
/// time.
///
-void llvm::PHIElimination::LowerAtomicPHINode(
+void PHIElimination::LowerAtomicPHINode(
MachineBasicBlock &MBB,
MachineBasicBlock::iterator AfterPHIsIt) {
++NumAtomic;
@@ -148,6 +196,7 @@
unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
unsigned DestReg = MPhi->getOperand(0).getReg();
+ assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
bool isDead = MPhi->getOperand(0).isDead();
// Create a new register for the incoming PHI arguments.
@@ -173,7 +222,7 @@
IncomingReg = entry;
reusedIncoming = true;
++NumReused;
- DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
+ DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
} else {
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -234,6 +283,8 @@
SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
for (int i = NumSrcs - 1; i >= 0; --i) {
unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+
assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
"Machine PHI Operands must all be virtual registers!");
@@ -258,12 +309,12 @@
// Find a safe location to insert the copy, this may be the first terminator
// in the block (or end()).
MachineBasicBlock::iterator InsertPos =
- FindCopyInsertPoint(opBlock, MBB, SrcReg);
+ findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
// Insert the copy.
if (!reusedIncoming && IncomingReg)
BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg);
+ TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);
// Now update live variable information if we have it. Otherwise we're done
if (!LV) continue;
@@ -299,6 +350,8 @@
#ifndef NDEBUG
for (MachineBasicBlock::iterator TI = llvm::next(Term);
TI != opBlock.end(); ++TI) {
+ if (TI->isDebugValue())
+ continue;
assert(!TI->readsRegister(SrcReg) &&
"Terminator instructions cannot use virtual registers unless"
"they are the first terminator in a block!");
@@ -307,9 +360,13 @@
} else if (reusedIncoming || !IncomingReg) {
// We may have to rewind a bit if we didn't insert a copy this time.
KillInst = Term;
- while (KillInst != opBlock.begin())
- if ((--KillInst)->readsRegister(SrcReg))
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
break;
+ }
} else {
// We just inserted this copy.
KillInst = prior(InsertPos);
@@ -335,7 +392,7 @@
/// used in a PHI node. We map that to the BB the vreg is coming from. This is
/// used later to determine when the vreg is killed in the BB.
///
-void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
I != E; ++I)
for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
@@ -345,12 +402,14 @@
BBI->getOperand(i).getReg())];
}
-bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
- MachineBasicBlock &MBB,
- LiveVariables &LV) {
- if (MBB.empty() || !MBB.front().isPHI())
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ LiveVariables &LV,
+ MachineLoopInfo *MLI) {
+ if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
+ bool Changed = false;
for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
@@ -359,9 +418,20 @@
// We break edges when registers are live out from the predecessor block
// (not considering PHI nodes). If the register is live in to this block
// anyway, we would gain nothing from splitting.
- if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB))
- PreMBB->SplitCriticalEdge(&MBB, this);
+ // Avoid splitting backedges of loops. It would introduce small
+ // out-of-line blocks into the loop which is very bad for code placement.
+ if (PreMBB != &MBB &&
+ !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
+ if (!MLI ||
+ !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
+ MLI->isLoopHeader(&MBB))) {
+ if (PreMBB->SplitCriticalEdge(&MBB, this)) {
+ Changed = true;
+ ++NumCriticalEdgesSplit;
+ }
+ }
+ }
}
}
- return true;
+ return Changed;
}
diff --git a/src/LLVM/lib/CodeGen/PHIEliminationUtils.cpp b/src/LLVM/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 0000000..10bfdcc
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB->empty())
+ return MBB->begin();
+
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB->isLandingPad())
+ return MBB->getFirstTerminator();
+
+ // Discover any defs/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+ RE = MRI.reg_end(); RI != RE; ++RI) {
+ MachineInstr* DefUseMI = &*RI;
+ if (DefUseMI->getParent() == MBB)
+ DefUsesInMBB.insert(DefUseMI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No defs. Insert the copy at the start of the basic block.
+ InsertPoint = MBB->begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the def/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last def/use.
+ InsertPoint = MBB->end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/src/LLVM/lib/CodeGen/PHIEliminationUtils.h b/src/LLVM/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 0000000..9ac47fb
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+ /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
+ MachineBasicBlock::iterator
+ findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg);
+}
+
+#endif
diff --git a/src/LLVM/lib/CodeGen/Passes.cpp b/src/LLVM/lib/CodeGen/Passes.cpp
index 3489db2..315aedd 100644
--- a/src/LLVM/lib/CodeGen/Passes.cpp
+++ b/src/LLVM/lib/CodeGen/Passes.cpp
@@ -55,6 +55,11 @@
RegisterRegAlloc::setDefault(RegAlloc);
}
+ // This forces linking of the linear scan register allocator,
+ // so -regalloc=linearscan still works in clang.
+ if (Ctor == createLinearScanRegisterAllocator)
+ return createLinearScanRegisterAllocator();
+
if (Ctor != createDefaultRegisterAllocator)
return Ctor();
@@ -63,6 +68,6 @@
case CodeGenOpt::None:
return createFastRegisterAllocator();
default:
- return createLinearScanRegisterAllocator();
+ return createGreedyRegisterAllocator();
}
}
diff --git a/src/LLVM/lib/CodeGen/PeepholeOptimizer.cpp b/src/LLVM/lib/CodeGen/PeepholeOptimizer.cpp
index 96a2b6e..bbc7ce2 100644
--- a/src/LLVM/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/src/LLVM/lib/CodeGen/PeepholeOptimizer.cpp
@@ -30,6 +30,15 @@
// If the "sub" instruction all ready sets (or could be modified to set) the
// same flag that the "cmp" instruction sets and that "bz" uses, then we can
// eliminate the "cmp" instruction.
+//
+// - Optimize Bitcast pairs:
+//
+// v1 = bitcast v0
+// v2 = bitcast v1
+// = v2
+// =>
+// v1 = bitcast v0
+// = v0
//
//===----------------------------------------------------------------------===//
@@ -41,7 +50,9 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -50,13 +61,14 @@
Aggressive("aggressive-ext-opt", cl::Hidden,
cl::desc("Aggressive extension optimization"));
-STATISTIC(NumReuse, "Number of extension results reused");
-
-// Optimize Comparisons
static cl::opt<bool>
-EnableOptCmps("enable-optimize-cmps", cl::init(true), cl::Hidden);
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+ cl::desc("Disable the peephole optimizer"));
-STATISTIC(NumEliminated, "Number of compares eliminated");
+STATISTIC(NumReuse, "Number of extension results reused");
+STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
+STATISTIC(NumCmps, "Number of compares eliminated");
+STATISTIC(NumImmFold, "Number of move immediate foled");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -67,7 +79,9 @@
public:
static char ID; // Pass identification
- PeepholeOptimizer() : MachineFunctionPass(ID) {}
+ PeepholeOptimizer() : MachineFunctionPass(ID) {
+ initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -81,15 +95,25 @@
}
private:
+ bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
};
}
char PeepholeOptimizer::ID = 0;
-INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts",
- "Peephole Optimizations", false, false);
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
FunctionPass *llvm::createPeepholeOptimizerPass() {
return new PeepholeOptimizer();
@@ -107,12 +131,10 @@
bool PeepholeOptimizer::
OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
- LocalMIs.insert(MI);
-
unsigned SrcReg, DstReg, SubIdx;
if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
return false;
-
+
if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
@@ -232,37 +254,148 @@
return Changed;
}
+/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
+/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
+/// a value cross register classes), and the source is defined by another
+/// bitcast instruction B. And if the register class of source of B matches
+/// the register class of instruction A, then it is legal to replace all uses
+/// of the def of A with source of B. e.g.
+/// %vreg0<def> = VMOVSR %vreg1
+/// %vreg3<def> = VMOVRS %vreg0
+/// Replace all uses of vreg3 with vreg1.
+
+bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ unsigned NumDefs = MI->getDesc().getNumDefs();
+ unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
+ if (NumDefs != 1)
+ return false;
+
+ unsigned Def = 0;
+ unsigned Src = 0;
+ for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Def = Reg;
+ else if (Src)
+ // Multiple sources?
+ return false;
+ else
+ Src = Reg;
+ }
+
+ assert(Def && Src && "Malformed bitcast instruction!");
+
+ MachineInstr *DefMI = MRI->getVRegDef(Src);
+ if (!DefMI || !DefMI->getDesc().isBitcast())
+ return false;
+
+ unsigned SrcSrc = 0;
+ NumDefs = DefMI->getDesc().getNumDefs();
+ NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
+ if (NumDefs != 1)
+ return false;
+ for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+ const MachineOperand &MO = DefMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (!MO.isDef()) {
+ if (SrcSrc)
+ // Multiple sources?
+ return false;
+ else
+ SrcSrc = Reg;
+ }
+ }
+
+ if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
+ return false;
+
+ MRI->replaceRegWith(Def, SrcSrc);
+ MRI->clearKillFlags(SrcSrc);
+ MI->eraseFromParent();
+ ++NumBitcasts;
+ return true;
+}
+
/// OptimizeCmpInstr - If the instruction is a compare and the previous
/// instruction it's comparing against all ready sets (or could be modified to
/// set) the same flag as the compare, then we can remove the comparison and use
/// the flag from the previous instruction.
bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
MachineBasicBlock *MBB) {
- if (!EnableOptCmps) return false;
-
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
unsigned SrcReg;
- int CmpValue;
- if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0)
+ int CmpMask, CmpValue;
+ if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
- MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
- if (llvm::next(DI) != MRI->def_end())
- // Only support one definition.
- return false;
-
- // Attempt to convert the defining instruction to set the "zero" flag.
- if (TII->ConvertToSetZeroFlag(&*DI, MI)) {
- ++NumEliminated;
+ // Attempt to optimize the comparison instruction.
+ if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
+ ++NumCmps;
return true;
}
return false;
}
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isMoveImmediate())
+ return false;
+ if (MCID.getNumDefs() != 1)
+ return false;
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ ImmDefMIs.insert(std::make_pair(Reg, MI));
+ ImmDefRegs.insert(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+/// FoldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (ImmDefRegs.count(Reg) == 0)
+ continue;
+ DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ assert(II != ImmDefMIs.end());
+ if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+ ++NumImmFold;
+ return true;
+ }
+ }
+ return false;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ if (DisablePeephole)
+ return false;
+
TM = &MF.getTarget();
TII = TM->getInstrInfo();
MRI = &MF.getRegInfo();
@@ -271,21 +404,61 @@
bool Changed = false;
SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallSet<unsigned, 4> ImmDefRegs;
+ DenseMap<unsigned, MachineInstr*> ImmDefMIs;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
+
+ bool SeenMoveImm = false;
LocalMIs.clear();
+ ImmDefRegs.clear();
+ ImmDefMIs.clear();
+ bool First = true;
+ MachineBasicBlock::iterator PMII;
for (MachineBasicBlock::iterator
- MII = I->begin(), ME = I->end(); MII != ME; ) {
+ MII = I->begin(), MIE = I->end(); MII != MIE; ) {
MachineInstr *MI = &*MII;
+ LocalMIs.insert(MI);
- if (MI->getDesc().isCompare()) {
- ++MII; // The iterator may become invalid if the compare is deleted.
- Changed |= OptimizeCmpInstr(MI, MBB);
+ if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+ MI->hasUnmodeledSideEffects()) {
+ ++MII;
+ continue;
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ if (MCID.isBitcast()) {
+ if (OptimizeBitcastInstr(MI, MBB)) {
+ // MI is deleted.
+ LocalMIs.erase(MI);
+ Changed = true;
+ MII = First ? I->begin() : llvm::next(PMII);
+ continue;
+ }
+ } else if (MCID.isCompare()) {
+ if (OptimizeCmpInstr(MI, MBB)) {
+ // MI is deleted.
+ LocalMIs.erase(MI);
+ Changed = true;
+ MII = First ? I->begin() : llvm::next(PMII);
+ continue;
+ }
+ }
+
+ if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+ SeenMoveImm = true;
} else {
Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
- ++MII;
+ if (SeenMoveImm)
+ Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
+
+ First = false;
+ PMII = MII;
+ ++MII;
}
}
diff --git a/src/LLVM/lib/CodeGen/PostRASchedulerList.cpp b/src/LLVM/lib/CodeGen/PostRASchedulerList.cpp
index f0bd6d1..c73e877 100644
--- a/src/LLVM/lib/CodeGen/PostRASchedulerList.cpp
+++ b/src/LLVM/lib/CodeGen/PostRASchedulerList.cpp
@@ -22,6 +22,7 @@
#include "AntiDepBreaker.h"
#include "AggressiveAntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
#include "ScheduleDAGInstrs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
@@ -37,7 +38,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -52,7 +53,7 @@
STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
// Post-RA scheduling is enabled with
-// TargetSubtarget.enablePostRAScheduler(). This flag can be used to
+// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
// override the target.
static cl::opt<bool>
EnablePostRAScheduler("post-RA-scheduler",
@@ -80,6 +81,7 @@
class PostRAScheduler : public MachineFunctionPass {
AliasAnalysis *AA;
const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
CodeGenOpt::Level OptLevel;
public:
@@ -133,18 +135,13 @@
std::vector<unsigned> KillIndices;
public:
- SchedulePostRATDList(MachineFunction &MF,
- const MachineLoopInfo &MLI,
- const MachineDominatorTree &MDT,
- ScheduleHazardRecognizer *HR,
- AntiDepBreaker *ADB,
- AliasAnalysis *aa)
- : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
- HazardRec(HR), AntiDepBreak(ADB), AA(aa),
- KillIndices(TRI->getNumRegs()) {}
+ SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, const RegisterClassInfo&,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
- ~SchedulePostRATDList() {
- }
+ ~SchedulePostRATDList();
/// StartBlock - Initialize register live-range state for scheduling in
/// this block.
@@ -183,46 +180,64 @@
};
}
+SchedulePostRATDList::SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
+ KillIndices(TRI->getNumRegs())
+{
+ const TargetMachine &TM = MF.getTarget();
+ const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+ HazardRec =
+ TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+ AntiDepBreak =
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
+ (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+ delete HazardRec;
+ delete AntiDepBreak;
+}
+
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
- AA = &getAnalysis<AliasAnalysis>();
TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ RegClassInfo.runOnMachineFunction(Fn);
// Check for explicit enable/disable of post-ra scheduling.
- TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
if (EnablePostRAScheduler.getPosition() > 0) {
if (!EnablePostRAScheduler)
return false;
} else {
// Check that post-RA scheduling is enabled for this target.
- const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
+ // This may upgrade the AntiDepMode.
+ const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
return false;
}
// Check for antidep breaking override...
if (EnableAntiDepBreaking.getPosition() > 0) {
- AntiDepMode = (EnableAntiDepBreaking == "all") ?
- TargetSubtarget::ANTIDEP_ALL :
- (EnableAntiDepBreaking == "critical")
- ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
+ AntiDepMode = (EnableAntiDepBreaking == "all")
+ ? TargetSubtargetInfo::ANTIDEP_ALL
+ : ((EnableAntiDepBreaking == "critical")
+ ? TargetSubtargetInfo::ANTIDEP_CRITICAL
+ : TargetSubtargetInfo::ANTIDEP_NONE);
}
DEBUG(dbgs() << "PostRAScheduler\n");
- const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
- const TargetMachine &TM = Fn.getTarget();
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- ScheduleHazardRecognizer *HR =
- TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins);
- AntiDepBreaker *ADB =
- ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
- (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
- ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
- (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
-
- SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA);
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
+ CriticalPathRCs);
// Loop over all of the basic blocks
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
@@ -270,9 +285,6 @@
Scheduler.FixupKills(MBB);
}
- delete HR;
- delete ADB;
-
return true;
}
@@ -298,7 +310,7 @@
if (AntiDepBreak != NULL) {
unsigned Broken =
AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
- InsertPosIndex);
+ InsertPosIndex, DbgValues);
if (Broken != 0) {
// We made changes. Update the dependency graph.
@@ -534,10 +546,16 @@
#endif
--SuccSU->NumPredsLeft;
- // Compute how many cycles it will be before this actually becomes
- // available. This is the max of the start time of all predecessors plus
- // their latencies.
- SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+ // Standard scheduler algorithms will recompute the depth of the successor
+ // here as such:
+ // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+ //
+ // However, we lazily compute node depth instead. Note that
+ // ScheduleNodeTopDown has already updated the depth of this node which causes
+ // all descendents to be marked dirty. Setting the successor depth explicitly
+ // here would cause depth to be recomputed for all its ancestors. If the
+ // successor is not yet ready (because of a transitively redundant edge) then
+ // this causes depth computation to be quadratic in the size of the DAG.
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
@@ -617,13 +635,7 @@
MinDepth = PendingQueue[i]->getDepth();
}
- DEBUG(dbgs() << "\n*** Examining Available\n";
- LatencyPriorityQueue q = AvailableQueue;
- while (!q.empty()) {
- SUnit *su = q.pop();
- dbgs() << "Height " << su->getHeight() << ": ";
- su->dump(this);
- });
+ DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
SUnit *FoundSUnit = 0;
bool HasNoopHazards = false;
@@ -631,7 +643,7 @@
SUnit *CurSUnit = AvailableQueue.pop();
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(CurSUnit);
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
if (HT == ScheduleHazardRecognizer::NoHazard) {
FoundSUnit = CurSUnit;
break;
@@ -655,6 +667,12 @@
ScheduleNodeTopDown(FoundSUnit, CurCycle);
HazardRec->EmitInstruction(FoundSUnit);
CycleHasInsts = true;
+ if (HazardRec->atIssueLimit()) {
+ DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
} else {
if (CycleHasInsts) {
DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
diff --git a/src/LLVM/lib/CodeGen/ProcessImplicitDefs.cpp b/src/LLVM/lib/CodeGen/ProcessImplicitDefs.cpp
index b8831db..b1d8c97 100644
--- a/src/LLVM/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/src/LLVM/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,8 +26,11 @@
using namespace llvm;
char ProcessImplicitDefs::ID = 0;
-INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs",
- "Process Implicit Definitions.", false, false);
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -44,7 +47,6 @@
bool
ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
unsigned Reg, unsigned OpIdx,
- const TargetInstrInfo *tii_,
SmallSet<unsigned, 8> &ImpDefRegs) {
switch(OpIdx) {
case 1:
@@ -58,7 +60,6 @@
}
static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
- const TargetInstrInfo *tii_,
SmallSet<unsigned, 8> &ImpDefRegs) {
if (MI->isCopy()) {
MachineOperand &MO0 = MI->getOperand(0);
@@ -83,11 +84,10 @@
bool Changed = false;
- const TargetInstrInfo *tii_ = fn.getTarget().getInstrInfo();
- const TargetRegisterInfo *tri_ = fn.getTarget().getRegisterInfo();
- MachineRegisterInfo *mri_ = &fn.getRegInfo();
-
- LiveVariables *lv_ = &getAnalysis<LiveVariables>();
+ TII = fn.getTarget().getInstrInfo();
+ TRI = fn.getTarget().getRegisterInfo();
+ MRI = &fn.getRegInfo();
+ LV = &getAnalysis<LiveVariables>();
SmallSet<unsigned, 8> ImpDefRegs;
SmallVector<MachineInstr*, 8> ImpDefMIs;
@@ -110,7 +110,7 @@
unsigned Reg = MI->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS)
+ for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
ImpDefRegs.insert(*SS);
}
ImpDefMIs.push_back(MI);
@@ -122,11 +122,17 @@
MachineOperand &MO = MI->getOperand(1);
if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
if (MO.isKill()) {
- LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
+ LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
vi.removeKill(MI);
}
+ unsigned Reg = MI->getOperand(0).getReg();
MI->eraseFromParent();
Changed = true;
+
+ // A REG_SEQUENCE may have been expanded into partial definitions.
+ // If this was the last one, mark Reg as implicitly defined.
+ if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg))
+ ImpDefRegs.insert(Reg);
continue;
}
}
@@ -142,14 +148,14 @@
if (!ImpDefRegs.count(Reg))
continue;
// Use is a copy, just turn it into an implicit_def.
- if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) {
+ if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) {
bool isKill = MO.isKill();
- MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
+ MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
MI->RemoveOperand(j);
if (isKill) {
ImpDefRegs.erase(Reg);
- LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+ LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
vi.removeKill(MI);
}
ChangedToImpDef = true;
@@ -207,8 +213,8 @@
// uses.
bool Skip = false;
SmallVector<MachineInstr*, 4> DeadImpDefs;
- for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
- DE = mri_->def_end(); DI != DE; ++DI) {
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
MachineInstr *DeadImpDef = &*DI;
if (!DeadImpDef->isImplicitDef()) {
Skip = true;
@@ -226,8 +232,8 @@
Changed = true;
// Process each use instruction once.
- for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
- UE = mri_->use_end(); UI != UE; ++UI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
if (UI.getOperand().isUndef())
continue;
MachineInstr *RMI = &*UI;
@@ -239,8 +245,8 @@
MachineInstr *RMI = RUses[i];
// Turn a copy use into an implicit_def.
- if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) {
- RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
+ if (isUndefCopy(RMI, Reg, ImpDefRegs)) {
+ RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
bool isKill = false;
SmallVector<unsigned, 4> Ops;
@@ -260,15 +266,15 @@
// Update LiveVariables varinfo if the instruction is a kill.
if (isKill) {
- LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+ LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
vi.removeKill(RMI);
}
continue;
}
// Replace Reg with a new vreg that's marked implicit.
- const TargetRegisterClass* RC = mri_->getRegClass(Reg);
- unsigned NewVReg = mri_->createVirtualRegister(RC);
+ const TargetRegisterClass* RC = MRI->getRegClass(Reg);
+ unsigned NewVReg = MRI->createVirtualRegister(RC);
bool isKill = true;
for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
MachineOperand &RRMO = RMI->getOperand(j);
diff --git a/src/LLVM/lib/CodeGen/PrologEpilogInserter.cpp b/src/LLVM/lib/CodeGen/PrologEpilogInserter.cpp
index 166cf96..32c9325 100644
--- a/src/LLVM/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/src/LLVM/lib/CodeGen/PrologEpilogInserter.cpp
@@ -19,7 +19,9 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "pei"
#include "PrologEpilogInserter.h"
+#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -27,13 +29,16 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <climits>
@@ -41,8 +46,17 @@
char PEI::ID = 0;
-INITIALIZE_PASS(PEI, "prologepilog",
- "Prologue/Epilogue Insertion", false, false);
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
+
+STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+STATISTIC(NumBytesStackSpace,
+ "Number of bytes used for stack in all functions");
/// createPrologEpilogCodeInserter - This function returns a pass that inserts
/// prolog and epilog code, and eliminates abstract frame references.
@@ -55,9 +69,10 @@
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const Function* F = Fn.getFunction();
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
- FrameConstantRegMap.clear();
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
// function's frame information. Also eliminates call frame pseudo
@@ -66,7 +81,7 @@
// Allow the target machine to make some adjustments to the function
// e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
- TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+ TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
// Scan the function for modified callee saved registers and insert spill code
// for any callee saved registers that are modified.
@@ -86,7 +101,7 @@
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
- TRI->processFunctionBeforeFrameFinalized(Fn);
+ TFI->processFunctionBeforeFrameFinalized(Fn);
// Calculate actual frame offsets for all abstract stack objects...
calculateFrameObjectOffsets(Fn);
@@ -133,14 +148,16 @@
/// pseudo instructions.
void PEI::calculateCallsInformation(MachineFunction &Fn) {
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
unsigned MaxCallFrameSize = 0;
bool AdjustsStack = MFI->adjustsStack();
// Get the function call frame set-up and tear-down instruction opcode
- int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode();
- int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
+ int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
// Early exit for targets which have no call frame setup/destroy pseudo
// instructions.
@@ -158,6 +175,11 @@
if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
AdjustsStack = true;
FrameSDOps.push_back(I);
+ } else if (I->isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
}
MFI->setAdjustsStack(AdjustsStack);
@@ -171,7 +193,7 @@
// the target doesn't indicate otherwise, remove the call frame pseudos
// here. The sub/add sp instruction pairs are still inserted, but we don't
// need to track the SP adjustment for frame index elimination.
- if (RegInfo->canSimplifyCallFramePseudos(Fn))
+ if (TFI->canSimplifyCallFramePseudos(Fn))
RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
}
}
@@ -181,7 +203,7 @@
/// registers.
void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
- const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
// Get the callee saved register list...
@@ -220,7 +242,7 @@
return; // Early exit if no callee saved registers are modified!
unsigned NumFixedSpillSlots;
- const TargetFrameInfo::SpillSlot *FixedSpillSlots =
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
// Now that we know which registers need to be saved and restored, allocate
@@ -238,7 +260,7 @@
// Check to see if this physreg must be spilled to a particular stack slot
// on this target.
- const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
FixedSlot->Reg != Reg)
++FixedSlot;
@@ -281,13 +303,14 @@
return;
const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
MachineBasicBlock::iterator I;
if (! ShrinkWrapThisFunction) {
// Spill using target interface.
I = EntryBlock->begin();
- if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+ if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in.
// It's killed at the spill.
@@ -318,8 +341,8 @@
--BeforeI;
// Restore all registers immediately before the return and any
- // terminators that preceed it.
- if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ // terminators that precede it.
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -418,7 +441,7 @@
--BeforeI;
// Restore all registers immediately before the return and any
- // terminators that preceed it.
+ // terminators that precede it.
for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
unsigned Reg = blockCSI[i].getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -458,8 +481,10 @@
Offset = (Offset + Align - 1) / Align * Align;
if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
} else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
MFI->setObjectOffset(FrameIdx, Offset);
Offset += MFI->getObjectSize(FrameIdx);
}
@@ -469,10 +494,10 @@
/// abstract stack objects.
///
void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
- const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
bool StackGrowsDown =
- TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -538,12 +563,39 @@
// Make sure the special register scavenging spill slot is closest to the
// frame pointer if a frame pointer is required.
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
- if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
+ if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
+ !RegInfo->needsStackRealignment(Fn)) {
int SFI = RS->getScavengingFrameIndex();
if (SFI >= 0)
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
// Make sure that the stack protector comes before the local variables on the
// stack.
SmallSet<int, 16> LargeStackObjs;
@@ -553,6 +605,9 @@
// Assign large stack objects first.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
if (RS && (int)i == RS->getScavengingFrameIndex())
@@ -572,6 +627,9 @@
// Then assign frame offsets to stack objects that are not used to spill
// callee saved registers.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
if (RS && (int)i == RS->getScavengingFrameIndex())
@@ -588,17 +646,18 @@
// Make sure the special register scavenging spill slot is closest to the
// stack pointer.
- if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
+ if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
+ !RegInfo->useFPForScavengingIndex(Fn))) {
int SFI = RS->getScavengingFrameIndex();
if (SFI >= 0)
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
- if (!RegInfo->targetHandlesStackFrameRounding()) {
+ if (!TFI.targetHandlesStackFrameRounding()) {
// If we have reserved argument space for call sites in the function
// immediately on entry to the current function, count it as part of the
// overall stack size.
- if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn))
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
Offset += MFI->getMaxCallFrameSize();
// Round up the size to a multiple of the alignment. If the function has
@@ -621,7 +680,9 @@
}
// Update frame info to pretend that this is part of the stack...
- MFI->setStackSize(Offset - LocalAreaOffset);
+ int64_t StackSize = Offset - LocalAreaOffset;
+ MFI->setStackSize(StackSize);
+ NumBytesStackSpace += StackSize;
}
/// insertPrologEpilogCode - Scan the function for modified callee saved
@@ -629,17 +690,24 @@
/// prolog and epilog code to the function.
///
void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
// Add prologue to the function...
- TRI->emitPrologue(Fn);
+ TFI.emitPrologue(Fn);
// Add epilogue to restore the callee-save registers in each exiting block
for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
if (!I->empty() && I->back().getDesc().isReturn())
- TRI->emitEpilogue(Fn, *I);
+ TFI.emitEpilogue(Fn, *I);
}
+
+ // Emit additional code that is required to support segmented stacks, if
+ // we've been asked for it. This, when linked with a runtime with support
+ // for segmented stacks (libgcc is one), will result in allocating stack
+ // space in small chunks instead of one large contiguous block.
+ if (EnableSegmentedStacks)
+ TFI.adjustForSegmentedStacks(Fn);
}
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
@@ -650,12 +718,13 @@
const TargetMachine &TM = Fn.getTarget();
assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
- const TargetFrameInfo *TFI = TM.getFrameInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
bool StackGrowsDown =
- TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
- int FrameSetupOpcode = TRI.getCallFrameSetupOpcode();
- int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
for (MachineFunction::iterator BB = Fn.begin(),
E = Fn.end(); BB != E; ++BB) {
@@ -712,16 +781,8 @@
// If this instruction has a FrameIndex operand, we need to
// use that target machine register info object to eliminate
// it.
- TargetRegisterInfo::FrameIndexValue Value;
- unsigned VReg =
- TRI.eliminateFrameIndex(MI, SPAdj, &Value,
- FrameIndexVirtualScavenging ? NULL : RS);
- if (VReg) {
- assert (FrameIndexVirtualScavenging &&
- "Not scavenging, but virtual returned from "
- "eliminateFrameIndex()!");
- FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj);
- }
+ TRI.eliminateFrameIndex(MI, SPAdj,
+ FrameIndexVirtualScavenging ? NULL : RS);
// Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) {
@@ -749,38 +810,6 @@
}
}
-/// findLastUseReg - find the killing use of the specified register within
-/// the instruciton range. Return the operand number of the kill in Operand.
-static MachineBasicBlock::iterator
-findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME,
- unsigned Reg) {
- // Scan forward to find the last use of this virtual register
- for (++I; I != ME; ++I) {
- MachineInstr *MI = I;
- bool isDefInsn = false;
- bool isKillInsn = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
- if (MI->getOperand(i).isReg()) {
- unsigned OpReg = MI->getOperand(i).getReg();
- if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg))
- continue;
- assert (OpReg == Reg
- && "overlapping use of scavenged index register!");
- // If this is the killing use, we have a candidate.
- if (MI->getOperand(i).isKill())
- isKillInsn = true;
- else if (MI->getOperand(i).isDef())
- isDefInsn = true;
- }
- if (isKillInsn && !isDefInsn)
- return I;
- }
- // If we hit the end of the basic block, there was no kill of
- // the virtual register, which is wrong.
- assert (0 && "scavenged index register never killed!");
- return ME;
-}
-
/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
/// with physical registers. Use the register scavenger to find an
/// appropriate register to use.
@@ -790,153 +819,46 @@
E = Fn.end(); BB != E; ++BB) {
RS->enterBasicBlock(BB);
- // FIXME: The logic flow in this function is still too convoluted.
- // It needs a cleanup refactoring. Do that in preparation for tracking
- // more than one scratch register value and using ranges to find
- // available scratch registers.
- unsigned CurrentVirtReg = 0;
- unsigned CurrentScratchReg = 0;
- bool havePrevValue = false;
- TargetRegisterInfo::FrameIndexValue PrevValue(0,0);
- TargetRegisterInfo::FrameIndexValue Value(0,0);
- MachineInstr *PrevLastUseMI = NULL;
- unsigned PrevLastUseOp = 0;
- bool trackingCurrentValue = false;
+ unsigned VirtReg = 0;
+ unsigned ScratchReg = 0;
int SPAdj = 0;
// The instruction stream may change in the loop, so check BB->end()
// directly.
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
MachineInstr *MI = I;
- bool isDefInsn = false;
- bool isKillInsn = false;
- bool clobbersScratchReg = false;
- bool DoIncr = true;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).isReg()) {
MachineOperand &MO = MI->getOperand(i);
unsigned Reg = MO.getReg();
if (Reg == 0)
continue;
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- // If we have a previous scratch reg, check and see if anything
- // here kills whatever value is in there.
- if (Reg == CurrentScratchReg) {
- if (MO.isUse()) {
- // Two-address operands implicitly kill
- if (MO.isKill() || MI->isRegTiedToDefOperand(i))
- clobbersScratchReg = true;
- } else {
- assert (MO.isDef());
- clobbersScratchReg = true;
- }
- }
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- }
- // If this is a def, remember that this insn defines the value.
- // This lets us properly consider insns which re-use the scratch
- // register, such as r2 = sub r2, #imm, in the middle of the
- // scratch range.
- if (MO.isDef())
- isDefInsn = true;
+
+ ++NumVirtualFrameRegs;
// Have we already allocated a scratch register for this virtual?
- if (Reg != CurrentVirtReg) {
+ if (Reg != VirtReg) {
// When we first encounter a new virtual register, it
// must be a definition.
assert(MI->getOperand(i).isDef() &&
"frame index virtual missing def!");
- // We can't have nested virtual register live ranges because
- // there's only a guarantee of one scavenged register at a time.
- assert (CurrentVirtReg == 0 &&
- "overlapping frame index virtual registers!");
-
- // If the target gave us information about what's in the register,
- // we can use that to re-use scratch regs.
- DenseMap<unsigned, FrameConstantEntry>::iterator Entry =
- FrameConstantRegMap.find(Reg);
- trackingCurrentValue = Entry != FrameConstantRegMap.end();
- if (trackingCurrentValue) {
- SPAdj = (*Entry).second.second;
- Value = (*Entry).second.first;
- } else {
- SPAdj = 0;
- Value.first = 0;
- Value.second = 0;
- }
-
- // If the scratch register from the last allocation is still
- // available, see if the value matches. If it does, just re-use it.
- if (trackingCurrentValue && havePrevValue && PrevValue == Value) {
- // FIXME: This assumes that the instructions in the live range
- // for the virtual register are exclusively for the purpose
- // of populating the value in the register. That's reasonable
- // for these frame index registers, but it's still a very, very
- // strong assumption. rdar://7322732. Better would be to
- // explicitly check each instruction in the range for references
- // to the virtual register. Only delete those insns that
- // touch the virtual register.
-
- // Find the last use of the new virtual register. Remove all
- // instruction between here and there, and update the current
- // instruction to reference the last use insn instead.
- MachineBasicBlock::iterator LastUseMI =
- findLastUseReg(I, BB->end(), Reg);
-
- // Remove all instructions up 'til the last use, since they're
- // just calculating the value we already have.
- BB->erase(I, LastUseMI);
- I = LastUseMI;
-
- // Extend the live range of the scratch register
- PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false);
- RS->setUsed(CurrentScratchReg);
- CurrentVirtReg = Reg;
-
- // We deleted the instruction we were scanning the operands of.
- // Jump back to the instruction iterator loop. Don't increment
- // past this instruction since we updated the iterator already.
- DoIncr = false;
- break;
- }
-
// Scavenge a new scratch register
- CurrentVirtReg = Reg;
+ VirtReg = Reg;
const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
- CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
- PrevValue = Value;
+ ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
+ ++NumScavengedRegs;
}
- // replace this reference to the virtual register with the
+ // Replace this reference to the virtual register with the
// scratch register.
- assert (CurrentScratchReg && "Missing scratch register!");
- MI->getOperand(i).setReg(CurrentScratchReg);
+ assert (ScratchReg && "Missing scratch register!");
+ MI->getOperand(i).setReg(ScratchReg);
- if (MI->getOperand(i).isKill()) {
- isKillInsn = true;
- PrevLastUseOp = i;
- PrevLastUseMI = MI;
- }
}
}
- // If this is the last use of the scratch, stop tracking it. The
- // last use will be a kill operand in an instruction that does
- // not also define the scratch register.
- if (isKillInsn && !isDefInsn) {
- CurrentVirtReg = 0;
- havePrevValue = trackingCurrentValue;
- }
- // Similarly, notice if instruction clobbered the value in the
- // register we're tracking for possible later reuse. This is noted
- // above, but enforced here since the value is still live while we
- // process the rest of the operands of the instruction.
- if (clobbersScratchReg) {
- havePrevValue = false;
- CurrentScratchReg = 0;
- }
- if (DoIncr) {
- RS->forward(I);
- ++I;
- }
+ RS->forward(I);
+ ++I;
}
}
}
diff --git a/src/LLVM/lib/CodeGen/PrologEpilogInserter.h b/src/LLVM/lib/CodeGen/PrologEpilogInserter.h
index e56c36f..e239159 100644
--- a/src/LLVM/lib/CodeGen/PrologEpilogInserter.h
+++ b/src/LLVM/lib/CodeGen/PrologEpilogInserter.h
@@ -36,7 +36,9 @@
class PEI : public MachineFunctionPass {
public:
static char ID;
- PEI() : MachineFunctionPass(ID) {}
+ PEI() : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+ }
const char *getPassName() const {
return "Prolog/Epilog Insertion & Frame Finalization";
@@ -99,13 +101,6 @@
// TRI->requiresFrameIndexScavenging() for the curren function.
bool FrameIndexVirtualScavenging;
- // When using the scavenger post-pass to resolve frame reference
- // materialization registers, maintain a map of the registers to
- // the constant value and SP adjustment associated with it.
- typedef std::pair<TargetRegisterInfo::FrameIndexValue, int>
- FrameConstantEntry;
- DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap;
-
#ifndef NDEBUG
// Machine function handle.
MachineFunction* MF;
diff --git a/src/LLVM/lib/CodeGen/PseudoSourceValue.cpp b/src/LLVM/lib/CodeGen/PseudoSourceValue.cpp
index 5e86e5a..73b66d8 100644
--- a/src/LLVM/lib/CodeGen/PseudoSourceValue.cpp
+++ b/src/LLVM/lib/CodeGen/PseudoSourceValue.cpp
@@ -18,7 +18,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include <map>
using namespace llvm;
diff --git a/src/LLVM/lib/CodeGen/README.txt b/src/LLVM/lib/CodeGen/README.txt
index b655dda..7f75f65 100644
--- a/src/LLVM/lib/CodeGen/README.txt
+++ b/src/LLVM/lib/CodeGen/README.txt
@@ -26,7 +26,7 @@
sxth r3, r3
mla r4, r3, lr, r4
-It also increase the likelyhood the store may become dead.
+It also increase the likelihood the store may become dead.
//===---------------------------------------------------------------------===//
@@ -162,7 +162,7 @@
//===---------------------------------------------------------------------===//
-Stack coloring improvments:
+Stack coloring improvements:
1. Do proper LiveStackAnalysis on all stack objects including those which are
not spill slots.
diff --git a/src/LLVM/lib/CodeGen/RegAllocBase.h b/src/LLVM/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 0000000..0316421
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,193 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "LiveIntervalUnion.h"
+#include "RegisterClassInfo.h"
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class Spiller;
+
+// Forward declare a priority queue of live virtual registers. If an
+// implementation needs to prioritize by anything other than spill weight, then
+// this will become an abstract base class with virtual calls to push/get.
+class LiveVirtRegQueue;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They must also override enqueue/dequeue to provide an
+/// assignment order.
+class RegAllocBase {
+ LiveIntervalUnion::Allocator UnionAllocator;
+
+ // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual
+ // registers may have changed.
+ unsigned UserTag;
+
+protected:
+ // Array of LiveIntervalUnions indexed by physical register.
+ class LiveUnionArray {
+ unsigned NumRegs;
+ LiveIntervalUnion *Array;
+ public:
+ LiveUnionArray(): NumRegs(0), Array(0) {}
+ ~LiveUnionArray() { clear(); }
+
+ unsigned numRegs() const { return NumRegs; }
+
+ void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
+
+ void clear();
+
+ LiveIntervalUnion& operator[](unsigned PhysReg) {
+ assert(PhysReg < NumRegs && "physReg out of bounds");
+ return Array[PhysReg];
+ }
+ };
+
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ VirtRegMap *VRM;
+ LiveIntervals *LIS;
+ RegisterClassInfo RegClassInfo;
+ LiveUnionArray PhysReg2LiveUnion;
+
+ // Current queries, one per physreg. They must be reinitialized each time we
+ // query on a new live virtual register.
+ OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+ RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
+
+ virtual ~RegAllocBase() {}
+
+ // A RegAlloc pass should call this before allocatePhysRegs.
+ void init(VirtRegMap &vrm, LiveIntervals &lis);
+
+ // Get an initialized query to check interferences between lvr and preg. Note
+ // that Query::init must be called at least once for each physical register
+ // before querying a new live virtual register. This ties Queries and
+ // PhysReg2LiveUnion together.
+ LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
+ Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]);
+ return Queries[PhysReg];
+ }
+
+ // Invalidate all cached information about virtual registers - live ranges may
+ // have changed.
+ void invalidateVirtRegs() { ++UserTag; }
+
+ // The top-level driver. The output is a VirtRegMap that us updated with
+ // physical register assignments.
+ //
+ // If an implementation wants to override the LiveInterval comparator, we
+ // should modify this interface to allow passing in an instance derived from
+ // LiveVirtRegQueue.
+ void allocatePhysRegs();
+
+ // Get a temporary reference to a Spiller instance.
+ virtual Spiller &spiller() = 0;
+
+ /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+ virtual void enqueue(LiveInterval *LI) = 0;
+
+ /// dequeue - Return the next unassigned register, or NULL.
+ virtual LiveInterval *dequeue() = 0;
+
+ // A RegAlloc pass should override this to provide the allocation heuristics.
+ // Each call must guarantee forward progess by returning an available PhysReg
+ // or new set of split live virtual registers. It is up to the splitter to
+ // converge quickly toward fully spilled live ranges.
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+ // A RegAlloc pass should call this when PassManager releases its memory.
+ virtual void releaseMemory();
+
+ // Helper for checking interference between a live virtual register and a
+ // physical register, including all its register aliases. If an interference
+ // exists, return the interfering register, which may be preg or an alias.
+ unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
+
+ /// assign - Assign VirtReg to PhysReg.
+ /// This should not be called from selectOrSplit for the current register.
+ void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// unassign - Undo a previous assignment of VirtReg to PhysReg.
+ /// This can be invoked from selectOrSplit, but be careful to guarantee that
+ /// allocation is making progress.
+ void unassign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// addMBBLiveIns - Add physreg liveins to basic blocks.
+ void addMBBLiveIns(MachineFunction *);
+
+#ifndef NDEBUG
+ // Verify each LiveIntervalUnion.
+ void verify();
+#endif
+
+ // Use this group name for NamedRegionTimer.
+ static const char *TimerGroupName;
+
+public:
+ /// VerifyEnabled - True when -verify-regalloc is given.
+ static bool VerifyEnabled;
+
+private:
+ void seedLiveRegs();
+
+ void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/src/LLVM/lib/CodeGen/RegAllocBasic.cpp b/src/LLVM/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 0000000..5496d69
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,591 @@
+//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
+#include "LiveDebugVariables.h"
+#include "LiveIntervalUnion.h"
+#include "LiveRangeEdit.h"
+#include "RenderMachineFunction.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <cstdlib>
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+STATISTIC(NumNewQueued , "Number of new live ranges queued");
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+ createBasicRegisterAllocator);
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+namespace {
+ struct CompSpillWeight {
+ bool operator()(LiveInterval *A, LiveInterval *B) const {
+ return A->weight < B->weight;
+ }
+ };
+}
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+ // context
+ MachineFunction *MF;
+
+ // analyses
+ LiveStacks *LS;
+ RenderMachineFunction *RMF;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
+ CompSpillWeight> Queue;
+public:
+ RABasic();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Basic Register Allocator";
+ }
+
+ /// RABasic analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ virtual Spiller &spiller() { return *SpillerInstance; }
+
+ virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+ virtual void enqueue(LiveInterval *LI) {
+ Queue.push(LI);
+ }
+
+ virtual LiveInterval *dequeue() {
+ if (Queue.empty())
+ return 0;
+ LiveInterval *LI = Queue.top();
+ Queue.pop();
+ return LI;
+ }
+
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ AU.addRequiredTransitiveID(RegisterCoalescerPassID);
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ DEBUG(AU.addRequired<RenderMachineFunction>());
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+ SpillerInstance.reset(0);
+ RegAllocBase::releaseMemory();
+}
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+ LiveVirtRegBitSet VisitedVRegs;
+ OwningArrayPtr<LiveVirtRegBitSet>
+ unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+ // Verify disjoint unions.
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+ LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+ PhysReg2LiveUnion[PhysReg].verify(VRegs);
+ // Union + intersection test could be done efficiently in one pass, but
+ // don't add a method to SparseBitVector unless we really need it.
+ assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+ VisitedVRegs |= VRegs;
+ }
+
+ // Verify vreg coverage.
+ for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+ liItr != liEnd; ++liItr) {
+ unsigned reg = liItr->first;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+ if (!VRM->hasPhys(reg)) continue; // spilled?
+ unsigned PhysReg = VRM->getPhys(reg);
+ if (!unionVRegs[PhysReg].test(reg)) {
+ dbgs() << "LiveVirtReg " << reg << " not in union " <<
+ TRI->getName(PhysReg) << "\n";
+ llvm_unreachable("unallocated live vreg");
+ }
+ }
+ // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+ unsigned NRegs) {
+ NumRegs = NRegs;
+ Array =
+ static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+ for (unsigned r = 0; r != NRegs; ++r)
+ new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+ NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+
+ const unsigned NumRegs = TRI->getNumRegs();
+ if (NumRegs != PhysReg2LiveUnion.numRegs()) {
+ PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
+ // Cache an interferece query for each physical reg
+ Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+ }
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+ if (!Array)
+ return;
+ for (unsigned r = 0; r != NumRegs; ++r)
+ Array[r].~LiveIntervalUnion();
+ free(Array);
+ NumRegs = 0;
+ Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+ for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
+ PhysReg2LiveUnion[r].clear();
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+ NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+ for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+ unsigned RegNum = I->first;
+ LiveInterval &VirtReg = *I->second;
+ if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+ PhysReg2LiveUnion[RegNum].unify(VirtReg);
+ else
+ enqueue(&VirtReg);
+ }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << '\n');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ MRI->setPhysRegUsed(PhysReg);
+ PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+ ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << '\n');
+ assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+ PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+ VRM->clearVirt(VirtReg.reg);
+ ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+ seedLiveRegs();
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (LiveInterval *VirtReg = dequeue()) {
+ assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+ // Unused registers can appear when the spiller coalesces snippets.
+ if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+ DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ LIS->removeInterval(VirtReg->reg);
+ continue;
+ }
+
+ // Invalidate all interference queries, live ranges could have changed.
+ invalidateVirtRegs();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ DEBUG(dbgs() << "\nselectOrSplit "
+ << MRI->getRegClass(VirtReg->reg)->getName()
+ << ':' << *VirtReg << '\n');
+ typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+ VirtRegVec SplitVRegs;
+ unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg == ~0u) {
+ // selectOrSplit failed to find a register!
+ const char *Msg = "ran out of registers during register allocation";
+ // Probably caused by an inline asm.
+ MachineInstr *MI;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+ (MI = I.skipInstruction());)
+ if (MI->isInlineAsm())
+ break;
+ if (MI)
+ MI->emitError(Msg);
+ else
+ report_fatal_error(Msg);
+ // Keep going after reporting the error.
+ VRM->assignVirt2Phys(VirtReg->reg,
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+ continue;
+ }
+
+ if (AvailablePhysReg)
+ assign(*VirtReg, AvailablePhysReg);
+
+ for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+ I != E; ++I) {
+ LiveInterval *SplitVirtReg = *I;
+ assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ LIS->removeInterval(SplitVirtReg->reg);
+ continue;
+ }
+ DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ "expect split value in virtual register");
+ enqueue(SplitVirtReg);
+ ++NumNewQueued;
+ }
+ }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ if (query(VirtReg, *AliasI).checkInterference())
+ return *AliasI;
+ return 0;
+}
+
+// Helper for spillInteferences() that spills all interfering vregs currently
+// assigned to this physical register.
+void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
+ assert(Q.seenAllInterferences() && "need collectInterferences()");
+ const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
+
+ for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
+ E = PendingSpills.end(); I != E; ++I) {
+ LiveInterval &SpilledVReg = **I;
+ DEBUG(dbgs() << "extracting from " <<
+ TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ unassign(SpilledVReg, PhysReg);
+
+ // Spill the extracted interval.
+ LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills);
+ spiller().spill(LRE);
+ }
+ // After extracting segments, the query's results are invalid. But keep the
+ // contents valid until we're done accessing pendingSpills.
+ Q.clear();
+}
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool
+RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Record each interference and determine if all are spillable before mutating
+ // either the union or live intervals.
+ unsigned NumInterferences = 0;
+ // Collect interferences assigned to any alias of the physical register.
+ for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+ LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
+ NumInterferences += QAlias.collectInterferingVRegs();
+ if (QAlias.seenUnspillableVReg()) {
+ return false;
+ }
+ }
+ DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+ " interferences with " << VirtReg << "\n");
+ assert(NumInterferences > 0 && "expect interference");
+
+ // Spill each interfering vreg allocated to PhysReg or an alias.
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ spillReg(VirtReg, *AliasI, SplitVRegs);
+ return true;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+ NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+ SlotIndexes *Indexes = LIS->getSlotIndexes();
+ if (MF->size() <= 1)
+ return;
+
+ LiveIntervalUnion::SegmentIter SI;
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+ if (LiveUnion.empty())
+ continue;
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
+ MachineFunction::iterator MBB = llvm::next(MF->begin());
+ MachineFunction::iterator MFE = MF->end();
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(MBB);
+ SI.setMap(LiveUnion.getMap());
+ SI.find(Start);
+ while (SI.valid()) {
+ if (SI.start() <= Start) {
+ if (!MBB->isLiveIn(PhysReg))
+ MBB->addLiveIn(PhysReg);
+ DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
+ << PrintReg(SI.value()->reg, TRI));
+ } else if (SI.start() > Stop)
+ MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
+ if (++MBB == MFE)
+ break;
+ tie(Start, Stop) = Indexes->getMBBRange(MBB);
+ SI.advanceTo(Start);
+ }
+ DEBUG(dbgs() << '\n');
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// RABasic Implementation
+//===----------------------------------------------------------------------===//
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Populate a list of physical register spill candidates.
+ SmallVector<unsigned, 8> PhysRegSpillCands;
+
+ // Check for an available register in this class.
+ ArrayRef<unsigned> Order =
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg));
+ for (ArrayRef<unsigned>::iterator I = Order.begin(), E = Order.end(); I != E;
+ ++I) {
+ unsigned PhysReg = *I;
+
+ // Check interference and as a side effect, intialize queries for this
+ // VirtReg and its aliases.
+ unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
+ if (interfReg == 0) {
+ // Found an available register.
+ return PhysReg;
+ }
+ Queries[interfReg].collectInterferingVRegs(1);
+ LiveInterval *interferingVirtReg =
+ Queries[interfReg].interferingVRegs().front();
+
+ // The current VirtReg must either be spillable, or one of its interferences
+ // must have less spill weight.
+ if (interferingVirtReg->weight < VirtReg.weight ) {
+ PhysRegSpillCands.push_back(PhysReg);
+ }
+ }
+ // Try to spill another interfering reg with less spill weight.
+ for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+ PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+
+ if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+
+ assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+ "Interference after spill.");
+ // Tell the caller to allocate to this newly freed physical register.
+ return *PhysRegI;
+ }
+
+ // No other spill candidates were found, so spill the current VirtReg.
+ DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+ if (!VirtReg.isSpillable())
+ return ~0u;
+ LiveRangeEdit LRE(VirtReg, SplitVRegs);
+ spiller().spill(LRE);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)mf.getFunction())->getName() << '\n');
+
+ MF = &mf;
+ DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+
+ allocatePhysRegs();
+
+ addMBBLiveIns(MF);
+
+ // Diagnostic output before rewriting
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+ // optional HTML output
+ DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
+
+ // FIXME: Verification currently must run before VirtRegRewriter. We should
+ // make the rewriter a separate pass and override verifyAnalysis instead. When
+ // that happens, verification naturally falls under VerifyMachineCode.
+#ifndef NDEBUG
+ if (VerifyEnabled) {
+ // Verify accuracy of LiveIntervals. The standard machine code verifier
+ // ensures that each LiveIntervals covers all uses of the virtual reg.
+
+ // FIXME: MachineVerifier is badly broken when using the standard
+ // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
+ // inline spiller, some tests fail to verify because the coalescer does not
+ // always generate verifiable code.
+ MF->verify(this, "In RABasic::verify");
+
+ // Verify that LiveIntervals are partitioned into unions and disjoint within
+ // the unions.
+ verify();
+ }
+#endif // !NDEBUG
+
+ // Run rewriter
+ VRM->rewrite(LIS->getSlotIndexes());
+
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
+ // The pass output is in VirtRegMap. Release all the transient data.
+ releaseMemory();
+
+ return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+ return new RABasic();
+}
diff --git a/src/LLVM/lib/CodeGen/RegAllocFast.cpp b/src/LLVM/lib/CodeGen/RegAllocFast.cpp
index 5a6187b..b36a445 100644
--- a/src/LLVM/lib/CodeGen/RegAllocFast.cpp
+++ b/src/LLVM/lib/CodeGen/RegAllocFast.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
+#include "RegisterClassInfo.h"
#include "llvm/BasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -48,13 +49,17 @@
public:
static char ID;
RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
- isBulkSpilling(false) {}
+ isBulkSpilling(false) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
private:
const TargetMachine *TM;
MachineFunction *MF;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
// Basic block currently being allocated.
MachineBasicBlock *MBB;
@@ -81,7 +86,7 @@
// that is currently available in a physical register.
LiveRegMap LiveVirtRegs;
- DenseMap<unsigned, MachineInstr *> LiveDbgValueMap;
+ DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
// RegState - Track the state of a physical register.
enum RegState {
@@ -94,7 +99,7 @@
// immediately without checking aliases.
regFree,
- // A reserved register has been assigned expolicitly (e.g., setting up a
+ // A reserved register has been assigned explicitly (e.g., setting up a
// call parameter), and it remains reserved until it is used.
regReserved
@@ -110,13 +115,10 @@
// instruction, and so cannot be allocated.
BitVector UsedInInstr;
- // Allocatable - vector of allocatable physical registers.
- BitVector Allocatable;
-
- // SkippedInstrs - Descriptors of instructions whose clobber list was ignored
- // because all registers were spilled. It is still necessary to mark all the
- // clobbered registers as used by the function.
- SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs;
+ // SkippedInstrs - Descriptors of instructions whose clobber list was
+ // ignored because all registers were spilled. It is still necessary to
+ // mark all the clobbered registers as used by the function.
+ SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
// isBulkSpilling - This flag is set when LiveRegMap will be cleared
// completely after spilling all live registers. LiveRegMap entries should
@@ -239,8 +241,7 @@
}
/// spillVirtReg - This method spills the value specified by VirtReg into the
-/// corresponding stack slot if needed. If isKill is set, the register is also
-/// killed.
+/// corresponding stack slot if needed.
void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
@@ -260,19 +261,21 @@
// instruction, not on the spill.
bool SpillKill = LR.LastUse != MI;
LR.Dirty = false;
- DEBUG(dbgs() << "Spilling %reg" << LRI->first
- << " in " << TRI->getName(LR.PhysReg));
+ DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+ << " in " << PrintReg(LR.PhysReg, TRI));
const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
int FI = getStackSpaceFor(LRI->first, RC);
DEBUG(dbgs() << " to stack slot #" << FI << "\n");
TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
++NumStores; // Update statistics
- // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
// identify spilled location as the place to find corresponding variable's
// value.
- if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) {
- const MDNode *MDPtr =
+ SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+ for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
+ MachineInstr *DBG = LRIDbgValues[li];
+ const MDNode *MDPtr =
DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
int64_t Offset = 0;
if (DBG->getOperand(1).isImm())
@@ -285,14 +288,16 @@
}
else
DL = MI->getDebugLoc();
- if (MachineInstr *NewDV =
+ if (MachineInstr *NewDV =
TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) {
MachineBasicBlock *MBB = DBG->getParent();
MBB->insert(MI, NewDV);
DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
- LiveDbgValueMap[LRI->first] = NewDV;
}
}
+ // Now this register is spilled there is should not be any DBG_VALUE pointing
+ // to this register because they are all pointing to spilled value now.
+ LRIDbgValues.clear();
if (SpillKill)
LR.LastUse = 0; // Don't kill register again
}
@@ -332,7 +337,7 @@
MO.setIsKill();
return;
default:
- // The physreg was allocated to a virtual register. That means to value we
+ // The physreg was allocated to a virtual register. That means the value we
// wanted has been clobbered.
llvm_unreachable("Instruction uses an allocated register");
}
@@ -394,7 +399,6 @@
PhysRegState[PhysReg] = NewState;
for (const unsigned *AS = TRI->getAliasSet(PhysReg);
unsigned Alias = *AS; ++AS) {
- UsedInInstr.set(Alias);
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
@@ -418,20 +422,25 @@
// can be allocated directly.
// Returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
- if (UsedInInstr.test(PhysReg))
+ if (UsedInInstr.test(PhysReg)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
return spillImpossible;
+ }
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
break;
case regFree:
return 0;
case regReserved:
+ DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
+ << PrintReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default:
return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
}
- // This is a disabled register, add up const of aliases.
+ // This is a disabled register, add up cost of aliases.
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
unsigned Cost = 0;
for (const unsigned *AS = TRI->getAliasSet(PhysReg);
unsigned Alias = *AS; ++AS) {
@@ -459,8 +468,8 @@
/// register must not be used for anything else when this is called.
///
void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
- DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to "
- << TRI->getName(PhysReg) << "\n");
+ DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+ << PrintReg(PhysReg, TRI) << "\n");
PhysRegState[PhysReg] = LRE.first;
assert(!LRE.second.PhysReg && "Already assigned a physreg");
LRE.second.PhysReg = PhysReg;
@@ -477,38 +486,38 @@
// Ignore invalid hints.
if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
- !RC->contains(Hint) || !Allocatable.test(Hint)))
+ !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint)))
Hint = 0;
// Take hint when possible.
if (Hint) {
- switch(calcSpillCost(Hint)) {
- default:
- definePhysReg(MI, Hint, regFree);
- // Fall through.
- case 0:
+ // Ignore the hint if we would have to spill a dirty register.
+ unsigned Cost = calcSpillCost(Hint);
+ if (Cost < spillDirty) {
+ if (Cost)
+ definePhysReg(MI, Hint, regFree);
return assignVirtToPhysReg(LRE, Hint);
- case spillImpossible:
- break;
}
}
- TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF);
- TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF);
+ ArrayRef<unsigned> AO = RegClassInfo.getOrder(RC);
// First try to find a completely free register.
- for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+ for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
unsigned PhysReg = *I;
if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
return assignVirtToPhysReg(LRE, PhysReg);
}
- DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName()
- << "\n");
+ DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+ << RC->getName() << "\n");
unsigned BestReg = 0, BestCost = spillImpossible;
- for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+ for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
unsigned Cost = calcSpillCost(*I);
+ DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
+ DEBUG(dbgs() << "\tCost: " << Cost << "\n");
+ DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
// Cost is 0 when all aliases are already disabled.
if (Cost == 0)
return assignVirtToPhysReg(LRE, *I);
@@ -521,11 +530,10 @@
return assignVirtToPhysReg(LRE, BestReg);
}
- // Nothing we can do.
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- report_fatal_error(Msg.str());
+ // Nothing we can do. Report an error and keep going with a bad allocation.
+ MI->emitError("ran out of registers during register allocation");
+ definePhysReg(MI, *AO.begin(), regFree);
+ assignVirtToPhysReg(LRE, *AO.begin());
}
/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
@@ -577,8 +585,8 @@
allocVirtReg(MI, *LRI, Hint);
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
int FrameIndex = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into "
- << TRI->getName(LR.PhysReg) << "\n");
+ DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+ << PrintReg(LR.PhysReg, TRI) << "\n");
TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
++NumLoads;
} else if (LR.Dirty) {
@@ -646,11 +654,12 @@
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
(MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
if (ThroughRegs.insert(Reg))
- DEBUG(dbgs() << " %reg" << Reg);
+ DEBUG(dbgs() << ' ' << PrintReg(Reg));
}
}
@@ -678,7 +687,7 @@
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
unsigned DefIdx = 0;
if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
@@ -711,9 +720,9 @@
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+ << " as used in instr\n");
UsedInInstr.set(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- UsedInInstr.set(*AS);
}
// Also mark PartialDefs as used to avoid reallocation.
@@ -724,6 +733,27 @@
void RAFast::AllocateBasicBlock() {
DEBUG(dbgs() << "\nAllocating " << *MBB);
+ // FIXME: This should probably be added by instruction selection instead?
+ // If the last instruction in the block is a return, make sure to mark it as
+ // using all of the live-out values in the function. Things marked both call
+ // and return are tail calls; do not do this for them. The tail callee need
+ // not take the same registers as input that it produces as output, and there
+ // are dependencies for its input registers elsewhere.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
+ !MBB->back().getDesc().isCall()) {
+ MachineInstr *Ret = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+ "Cannot have a live-out virtual register.");
+
+ // Add live-out registers as implicit uses.
+ Ret->addRegisterKilled(*I, TRI, true);
+ }
+ }
+
PhysRegState.assign(TRI->getNumRegs(), regDisabled);
assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
@@ -732,7 +762,8 @@
// Add live-in registers as live.
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I)
- definePhysReg(MII, *I, regReserved);
+ if (RegClassInfo.isAllocatable(*I))
+ definePhysReg(MII, *I, regReserved);
SmallVector<unsigned, 8> VirtDead;
SmallVector<MachineInstr*, 32> Coalesced;
@@ -740,7 +771,7 @@
// Otherwise, sequentially allocate each instruction in the MBB.
while (MII != MBB->end()) {
MachineInstr *MI = MII++;
- const TargetInstrDesc &TID = MI->getDesc();
+ const MCInstrDesc &MCID = MI->getDesc();
DEBUG({
dbgs() << "\n>> " << *MI << "Regs:";
for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
@@ -753,7 +784,7 @@
dbgs() << "*";
break;
default:
- dbgs() << "=%reg" << PhysRegState[Reg];
+ dbgs() << '=' << PrintReg(PhysRegState[Reg]);
if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
dbgs() << "*";
assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
@@ -783,32 +814,39 @@
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- LiveDbgValueMap[Reg] = MI;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ LiveDbgValueMap[Reg].push_back(MI);
LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
if (LRI != LiveVirtRegs.end())
setPhysReg(MI, i, LRI->second.PhysReg);
else {
int SS = StackSlotForVirtReg[Reg];
- if (SS == -1)
- MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ if (SS == -1) {
+ // We can't allocate a physreg for a DebugValue, sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
else {
// Modify DBG_VALUE now that the value is in a spill slot.
int64_t Offset = MI->getOperand(1).getImm();
- const MDNode *MDPtr =
+ const MDNode *MDPtr =
MI->getOperand(MI->getNumOperands()-1).getMetadata();
DebugLoc DL = MI->getDebugLoc();
- if (MachineInstr *NewDV =
+ if (MachineInstr *NewDV =
TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
- DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ DEBUG(dbgs() << "Modifying debug info due to spill:" <<
+ "\t" << *MI);
MachineBasicBlock *MBB = MI->getParent();
MBB->insert(MBB->erase(MI), NewDV);
// Scan NewDV operands from the beginning.
MI = NewDV;
ScanDbgValue = true;
break;
- } else
- MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ } else {
+ // We can't allocate a physreg for a DebugValue; sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
}
}
}
@@ -846,7 +884,7 @@
VirtOpEnd = i+1;
if (MO.isUse()) {
hasTiedOps = hasTiedOps ||
- TID.getOperandConstraint(i, TOI::TIED_TO) != -1;
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
} else {
if (MO.isEarlyClobber())
hasEarlyClobbers = true;
@@ -855,7 +893,7 @@
}
continue;
}
- if (!Allocatable.test(Reg)) continue;
+ if (!RegClassInfo.isAllocatable(Reg)) continue;
if (MO.isUse()) {
usePhysReg(MO);
} else if (MO.isEarlyClobber()) {
@@ -871,12 +909,12 @@
// operands. If there are also physical defs, these registers must avoid
// both physical defs and uses, making them more constrained than normal
// operands.
- // Similarly, if there are multiple defs and tied operands, we must make sure
- // the same register is allocated to uses and defs.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
// We didn't detect inline asm tied operands above, so just make this extra
// pass for all inline asm.
- if (hasEarlyClobbers || hasPartialRedefs ||
- (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) {
+ if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
handleThroughOperands(MI, VirtDead);
// Don't attempt coalescing when we have funny stuff going on.
CopyDst = 0;
@@ -891,7 +929,7 @@
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
unsigned PhysReg = LRI->second.PhysReg;
@@ -921,18 +959,18 @@
}
unsigned DefOpEnd = MI->getNumOperands();
- if (TID.isCall()) {
+ if (MCID.isCall()) {
// Spill all virtregs before a call. This serves two purposes: 1. If an
- // exception is thrown, the landing pad is going to expect to find registers
- // in their spill slots, and 2. we don't have to wade through all the
- // <imp-def> operands on the call instruction.
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots, and 2. we don't have to wade through
+ // all the <imp-def> operands on the call instruction.
DefOpEnd = VirtOpEnd;
DEBUG(dbgs() << " Spilling remaining registers before call.\n");
spillAll(MI);
// The imp-defs are skipped below, but we still need to mark those
// registers as used by the function.
- SkippedInstrs.insert(&TID);
+ SkippedInstrs.insert(&MCID);
}
// Third scan.
@@ -944,7 +982,7 @@
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (!Allocatable.test(Reg)) continue;
+ if (!RegClassInfo.isAllocatable(Reg)) continue;
definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
regFree : regReserved);
continue;
@@ -1000,14 +1038,12 @@
TM = &Fn.getTarget();
TRI = TM->getRegisterInfo();
TII = TM->getInstrInfo();
-
+ RegClassInfo.runOnMachineFunction(Fn);
UsedInInstr.resize(TRI->getNumRegs());
- Allocatable = TRI->getAllocatableSet(*MF);
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
- unsigned LastVirtReg = MRI->getLastVirtReg();
- StackSlotForVirtReg.grow(LastVirtReg);
+ StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
// Loop over all of the basic blocks, eliminating virtual register references
for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
@@ -1020,7 +1056,7 @@
MRI->closePhysRegsUsed(*TRI);
// Add the clobber lists for all the instructions we skipped earlier.
- for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator
+ for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
if (const unsigned *Defs = (*I)->getImplicitDefs())
while (*Defs)
diff --git a/src/LLVM/lib/CodeGen/RegAllocGreedy.cpp b/src/LLVM/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 0000000..f54a2c8
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1654 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "InterferenceCache.h"
+#include "LiveDebugVariables.h"
+#include "LiveRangeEdit.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits, "Number of split local live ranges");
+STATISTIC(NumEvicted, "Number of interferences evicted");
+
+static cl::opt<SplitEditor::ComplementSpillMode>
+SplitSpillMode("split-spill-mode", cl::Hidden,
+ cl::desc("Spill mode for splitting live ranges"),
+ cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
+ clEnumValEnd),
+ cl::init(SplitEditor::SM_Partition));
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+ createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
+
+ // context
+ MachineFunction *MF;
+
+ // analyses
+ SlotIndexes *Indexes;
+ LiveStacks *LS;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ EdgeBundles *Bundles;
+ SpillPlacement *SpillPlacer;
+ LiveDebugVariables *DebugVars;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+ unsigned NextCascade;
+
+ // Live ranges pass through a number of stages as we try to allocate them.
+ // Some of the stages may also create new live ranges:
+ //
+ // - Region splitting.
+ // - Per-block splitting.
+ // - Local splitting.
+ // - Spilling.
+ //
+ // Ranges produced by one of the stages skip the previous stages when they are
+ // dequeued. This improves performance because we can skip interference checks
+ // that are unlikely to give any results. It also guarantees that the live
+ // range splitting algorithm terminates, something that is otherwise hard to
+ // ensure.
+ enum LiveRangeStage {
+ /// Newly created live range that has never been queued.
+ RS_New,
+
+ /// Only attempt assignment and eviction. Then requeue as RS_Split.
+ RS_Assign,
+
+ /// Attempt live range splitting if assignment is impossible.
+ RS_Split,
+
+ /// Attempt more aggressive live range splitting that is guaranteed to make
+ /// progress. This is used for split products that may not be making
+ /// progress.
+ RS_Split2,
+
+ /// Live range will be spilled. No more splitting will be attempted.
+ RS_Spill,
+
+ /// There is nothing more we can do to this live range. Abort compilation
+ /// if it can't be assigned.
+ RS_Done
+ };
+
+ static const char *const StageName[];
+
+ // RegInfo - Keep additional information about each live range.
+ struct RegInfo {
+ LiveRangeStage Stage;
+
+ // Cascade - Eviction loop prevention. See canEvictInterference().
+ unsigned Cascade;
+
+ RegInfo() : Stage(RS_New), Cascade(0) {}
+ };
+
+ IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
+
+ LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+ return ExtraRegInfo[VirtReg.reg].Stage;
+ }
+
+ void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ ExtraRegInfo[VirtReg.reg].Stage = Stage;
+ }
+
+ template<typename Iterator>
+ void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ for (;Begin != End; ++Begin) {
+ unsigned Reg = (*Begin)->reg;
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = NewStage;
+ }
+ }
+
+ /// Cost of evicting interference.
+ struct EvictionCost {
+ unsigned BrokenHints; ///< Total number of broken hints.
+ float MaxWeight; ///< Maximum spill weight evicted.
+
+ EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
+
+ bool operator<(const EvictionCost &O) const {
+ if (BrokenHints != O.BrokenHints)
+ return BrokenHints < O.BrokenHints;
+ return MaxWeight < O.MaxWeight;
+ }
+ };
+
+ // splitting state.
+ std::auto_ptr<SplitAnalysis> SA;
+ std::auto_ptr<SplitEditor> SE;
+
+ /// Cached per-block interference maps
+ InterferenceCache IntfCache;
+
+ /// All basic blocks where the current register has uses.
+ SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+ /// Global live range splitting candidate info.
+ struct GlobalSplitCandidate {
+ // Register intended for assignment, or 0.
+ unsigned PhysReg;
+
+ // SplitKit interval index for this candidate.
+ unsigned IntvIdx;
+
+ // Interference for PhysReg.
+ InterferenceCache::Cursor Intf;
+
+ // Bundles where this candidate should be live.
+ BitVector LiveBundles;
+ SmallVector<unsigned, 8> ActiveBlocks;
+
+ void reset(InterferenceCache &Cache, unsigned Reg) {
+ PhysReg = Reg;
+ IntvIdx = 0;
+ Intf.setPhysReg(Cache, Reg);
+ LiveBundles.clear();
+ ActiveBlocks.clear();
+ }
+
+ // Set B[i] = C for every live bundle where B[i] was NoCand.
+ unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
+ unsigned Count = 0;
+ for (int i = LiveBundles.find_first(); i >= 0;
+ i = LiveBundles.find_next(i))
+ if (B[i] == NoCand) {
+ B[i] = C;
+ Count++;
+ }
+ return Count;
+ }
+ };
+
+ /// Candidate info for for each PhysReg in AllocationOrder.
+ /// This vector never shrinks, but grows to the size of the largest register
+ /// class.
+ SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+
+ enum { NoCand = ~0u };
+
+ /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
+ /// NoCand which indicates the stack interval.
+ SmallVector<unsigned, 32> BundleCand;
+
+public:
+ RAGreedy();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Greedy Register Allocator";
+ }
+
+ /// RAGreedy analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void releaseMemory();
+ virtual Spiller &spiller() { return *SpillerInstance; }
+ virtual void enqueue(LiveInterval *LI);
+ virtual LiveInterval *dequeue();
+ virtual unsigned selectOrSplit(LiveInterval&,
+ SmallVectorImpl<LiveInterval*>&);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+
+private:
+ void LRE_WillEraseInstruction(MachineInstr*);
+ bool LRE_CanEraseVirtReg(unsigned);
+ void LRE_WillShrinkVirtReg(unsigned);
+ void LRE_DidCloneVirtReg(unsigned, unsigned);
+
+ float calcSpillCost();
+ bool addSplitConstraints(InterferenceCache::Cursor, float&);
+ void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ void growRegion(GlobalSplitCandidate &Cand);
+ float calcGlobalSplitCost(GlobalSplitCandidate&);
+ bool calcCompactRegion(GlobalSplitCandidate&);
+ void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
+ void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+ bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
+ bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+ void evictInterference(LiveInterval&, unsigned,
+ SmallVectorImpl<LiveInterval*>&);
+
+ unsigned tryAssign(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryEvict(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
+ unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned trySplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+#ifndef NDEBUG
+const char *const RAGreedy::StageName[] = {
+ "RS_New",
+ "RS_Assign",
+ "RS_Split",
+ "RS_Split2",
+ "RS_Spill",
+ "RS_Done"
+};
+#endif
+
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = 0.98f;
+
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+ return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ AU.addRequiredTransitiveID(RegisterCoalescerPassID);
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<EdgeBundles>();
+ AU.addRequired<SpillPlacement>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
+//===----------------------------------------------------------------------===//
+// LiveRangeEdit delegate methods
+//===----------------------------------------------------------------------===//
+
+void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) {
+ // LRE itself will remove from SlotIndexes and parent basic block.
+ VRM->RemoveMachineInstrFromMaps(MI);
+}
+
+bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
+ unassign(LIS->getInterval(VirtReg), PhysReg);
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ return false;
+}
+
+void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ if (!PhysReg)
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ unassign(LI, PhysReg);
+ enqueue(&LI);
+}
+
+void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+ // Cloning a register we haven't even heard about yet? Just ignore it.
+ if (!ExtraRegInfo.inBounds(Old))
+ return;
+
+ // LRE may clone a virtual register because dead code elimination causes it to
+ // be split into connected components. The new components are much smaller
+ // than the original, so they should get a new chance at being assigned.
+ // same stage as the parent.
+ ExtraRegInfo[Old].Stage = RS_Assign;
+ ExtraRegInfo.grow(New);
+ ExtraRegInfo[New] = ExtraRegInfo[Old];
+}
+
+void RAGreedy::releaseMemory() {
+ SpillerInstance.reset(0);
+ ExtraRegInfo.clear();
+ GlobalCand.clear();
+ RegAllocBase::releaseMemory();
+}
+
+void RAGreedy::enqueue(LiveInterval *LI) {
+ // Prioritize live ranges by size, assigning larger ranges first.
+ // The queue holds (size, reg) pairs.
+ const unsigned Size = LI->getSize();
+ const unsigned Reg = LI->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only enqueue virtual registers");
+ unsigned Prio;
+
+ ExtraRegInfo.grow(Reg);
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = RS_Assign;
+
+ if (ExtraRegInfo[Reg].Stage == RS_Split) {
+ // Unsplit ranges that couldn't be allocated immediately are deferred until
+ // everything else has been allocated.
+ Prio = Size;
+ } else {
+ // Everything is allocated in long->short order. Long ranges that don't fit
+ // should be spilled (or split) ASAP so they don't create interference.
+ Prio = (1u << 31) + Size;
+
+ // Boost ranges that have a physical register hint.
+ if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg)))
+ Prio |= (1u << 30);
+ }
+
+ Queue.push(std::make_pair(Prio, Reg));
+}
+
+LiveInterval *RAGreedy::dequeue() {
+ if (Queue.empty())
+ return 0;
+ LiveInterval *LI = &LIS->getInterval(Queue.top().second);
+ Queue.pop();
+ return LI;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Direct Assignment
+//===----------------------------------------------------------------------===//
+
+/// tryAssign - Try to assign VirtReg to an available register.
+unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ Order.rewind();
+ unsigned PhysReg;
+ while ((PhysReg = Order.next()))
+ if (!checkPhysRegInterference(VirtReg, PhysReg))
+ break;
+ if (!PhysReg || Order.isHint(PhysReg))
+ return PhysReg;
+
+ // PhysReg is available, but there may be a better choice.
+
+ // If we missed a simple hint, try to cheaply evict interference from the
+ // preferred register.
+ if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
+ if (Order.isHint(Hint)) {
+ DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+ EvictionCost MaxCost(1);
+ if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+ evictInterference(VirtReg, Hint, NewVRegs);
+ return Hint;
+ }
+ }
+
+ // Try to evict interference from a cheaper alternative.
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+
+ // Most registers have 0 additional cost.
+ if (!Cost)
+ return PhysReg;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+ << '\n');
+ unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+ return CheapReg ? CheapReg : PhysReg;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Interference eviction
+//===----------------------------------------------------------------------===//
+
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
+///
+/// @param A The live range to be assigned.
+/// @param IsHint True when A is about to be assigned to its preferred
+/// register.
+/// @param B The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
+ LiveInterval &B, bool BreaksHint) {
+ bool CanSplit = getStage(B) < RS_Spill;
+
+ // Be fairly aggressive about following hints as long as the evictee can be
+ // split.
+ if (CanSplit && IsHint && !BreaksHint)
+ return true;
+
+ return A.weight > B.weight;
+}
+
+/// canEvictInterference - Return true if all interferences between VirtReg and
+/// PhysReg can be evicted. When OnlyCheap is set, don't do anything
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @prarm IsHint True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+/// when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ bool IsHint, EvictionCost &MaxCost) {
+ // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+ // involved in an eviction before. If a cascade number was assigned, deny
+ // evicting anything with the same or a newer cascade number. This prevents
+ // infinite eviction loops.
+ //
+ // This works out so a register without a cascade number is allowed to evict
+ // anything, and it can be evicted by anything.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = NextCascade;
+
+ EvictionCost Cost;
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+ // If there is 10 or more interferences, chances are one is heavier.
+ if (Q.collectInterferingVRegs(10) >= 10)
+ return false;
+
+ // Check if any interfering live range is heavier than MaxWeight.
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
+ return false;
+ // Never evict spill products. They cannot split or spill.
+ if (getStage(*Intf) == RS_Done)
+ return false;
+ // Once a live range becomes small enough, it is urgent that we find a
+ // register for it. This is indicated by an infinite spill weight. These
+ // urgent live ranges get to evict almost anything.
+ bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable();
+ // Only evict older cascades or live ranges without a cascade.
+ unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+ if (Cascade <= IntfCascade) {
+ if (!Urgent)
+ return false;
+ // We permit breaking cascades for urgent evictions. It should be the
+ // last resort, though, so make it really expensive.
+ Cost.BrokenHints += 10;
+ }
+ // Would this break a satisfied hint?
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ // Update eviction cost.
+ Cost.BrokenHints += BreaksHint;
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ // Abort if this would be too expensive.
+ if (!(Cost < MaxCost))
+ return false;
+ // Finally, apply the eviction policy for non-urgent evictions.
+ if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+ return false;
+ }
+ }
+ MaxCost = Cost;
+ return true;
+}
+
+/// evictInterference - Evict any interferring registers that prevent VirtReg
+/// from being assigned to Physreg. This assumes that canEvictInterference
+/// returned true.
+void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // Make sure that VirtReg has a cascade number, and assign that cascade
+ // number to every evicted register. These live ranges than then only be
+ // evicted by a newer cascade, preventing infinite loops.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+
+ DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+ << " interference: Cascade " << Cascade << '\n');
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+ assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+ for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i];
+ unassign(*Intf, VRM->getPhys(Intf->reg));
+ assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+ VirtReg.isSpillable() < Intf->isSpillable()) &&
+ "Cannot decrease cascade number, illegal eviction");
+ ExtraRegInfo[Intf->reg].Cascade = Cascade;
+ ++NumEvicted;
+ NewVRegs.push_back(Intf);
+ }
+ }
+}
+
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs,
+ unsigned CostPerUseLimit) {
+ NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+
+ // Keep track of the cheapest interference seen so far.
+ EvictionCost BestCost(~0u);
+ unsigned BestPhys = 0;
+
+ // When we are just looking for a reduced cost per use, don't break any
+ // hints, and only evict smaller spill weights.
+ if (CostPerUseLimit < ~0u) {
+ BestCost.BrokenHints = 0;
+ BestCost.MaxWeight = VirtReg.weight;
+ }
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+ continue;
+ // The first use of a callee-saved register in a function has cost 1.
+ // Don't start using a CSR when the CostPerUseLimit is low.
+ if (CostPerUseLimit == 1)
+ if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+ if (!MRI->isPhysRegUsed(CSR)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+ << PrintReg(CSR, TRI) << '\n');
+ continue;
+ }
+
+ if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
+ continue;
+
+ // Best so far.
+ BestPhys = PhysReg;
+
+ // Stop if the hint can be used.
+ if (Order.isHint(PhysReg))
+ break;
+ }
+
+ if (!BestPhys)
+ return 0;
+
+ evictInterference(VirtReg, BestPhys, NewVRegs);
+ return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// addSplitConstraints - Fill out the SplitConstraints vector based on the
+/// interference pattern in Physreg and its aliases. Add the constraints to
+/// SpillPlacement and return the static cost of this split in Cost, assuming
+/// that all preferences in SplitConstraints are met.
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+ float &Cost) {
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+
+ // Reset interference dependent info.
+ SplitConstraints.resize(UseBlocks.size());
+ float StaticCost = 0;
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+
+ BC.Number = BI.MBB->getNumber();
+ Intf.moveToBlock(BC.Number);
+ BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.ChangesValue = BI.FirstDef;
+
+ if (!Intf.hasInterference())
+ continue;
+
+ // Number of spill code instructions to insert.
+ unsigned Ins = 0;
+
+ // Interference for the live-in value.
+ if (BI.LiveIn) {
+ if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number))
+ BC.Entry = SpillPlacement::MustSpill, ++Ins;
+ else if (Intf.first() < BI.FirstInstr)
+ BC.Entry = SpillPlacement::PrefSpill, ++Ins;
+ else if (Intf.first() < BI.LastInstr)
+ ++Ins;
+ }
+
+ // Interference for the live-out value.
+ if (BI.LiveOut) {
+ if (Intf.last() >= SA->getLastSplitPoint(BC.Number))
+ BC.Exit = SpillPlacement::MustSpill, ++Ins;
+ else if (Intf.last() > BI.LastInstr)
+ BC.Exit = SpillPlacement::PrefSpill, ++Ins;
+ else if (Intf.last() > BI.FirstInstr)
+ ++Ins;
+ }
+
+ // Accumulate the total frequency of inserted spill code.
+ if (Ins)
+ StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+ }
+ Cost = StaticCost;
+
+ // Add constraints for use-blocks. Note that these are the only constraints
+ // that may add a positive bias, it is downhill from here.
+ SpillPlacer->addConstraints(SplitConstraints);
+ return SpillPlacer->scanActiveBundles();
+}
+
+
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+ ArrayRef<unsigned> Blocks) {
+ const unsigned GroupSize = 8;
+ SpillPlacement::BlockConstraint BCS[GroupSize];
+ unsigned TBS[GroupSize];
+ unsigned B = 0, T = 0;
+
+ for (unsigned i = 0; i != Blocks.size(); ++i) {
+ unsigned Number = Blocks[i];
+ Intf.moveToBlock(Number);
+
+ if (!Intf.hasInterference()) {
+ assert(T < GroupSize && "Array overflow");
+ TBS[T] = Number;
+ if (++T == GroupSize) {
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
+ T = 0;
+ }
+ continue;
+ }
+
+ assert(B < GroupSize && "Array overflow");
+ BCS[B].Number = Number;
+
+ // Interference for the live-in value.
+ if (Intf.first() <= Indexes->getMBBStartIdx(Number))
+ BCS[B].Entry = SpillPlacement::MustSpill;
+ else
+ BCS[B].Entry = SpillPlacement::PrefSpill;
+
+ // Interference for the live-out value.
+ if (Intf.last() >= SA->getLastSplitPoint(Number))
+ BCS[B].Exit = SpillPlacement::MustSpill;
+ else
+ BCS[B].Exit = SpillPlacement::PrefSpill;
+
+ if (++B == GroupSize) {
+ ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+ SpillPlacer->addConstraints(Array);
+ B = 0;
+ }
+ }
+
+ ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+ SpillPlacer->addConstraints(Array);
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
+}
+
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+ // Keep track of through blocks that have not been added to SpillPlacer.
+ BitVector Todo = SA->getThroughBlocks();
+ SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+ unsigned AddedTo = 0;
+#ifndef NDEBUG
+ unsigned Visited = 0;
+#endif
+
+ for (;;) {
+ ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+ // Find new through blocks in the periphery of PrefRegBundles.
+ for (int i = 0, e = NewBundles.size(); i != e; ++i) {
+ unsigned Bundle = NewBundles[i];
+ // Look at all blocks connected to Bundle in the full graph.
+ ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ unsigned Block = *I;
+ if (!Todo.test(Block))
+ continue;
+ Todo.reset(Block);
+ // This is a new through block. Add it to SpillPlacer later.
+ ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+ ++Visited;
+#endif
+ }
+ }
+ // Any new blocks to add?
+ if (ActiveBlocks.size() == AddedTo)
+ break;
+
+ // Compute through constraints from the interference, or assume that all
+ // through blocks prefer spilling when forming compact regions.
+ ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
+ if (Cand.PhysReg)
+ addThroughConstraints(Cand.Intf, NewBlocks);
+ else
+ // Provide a strong negative bias on through blocks to prevent unwanted
+ // liveness on loop backedges.
+ SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
+ AddedTo = ActiveBlocks.size();
+
+ // Perhaps iterating can enable more bundles?
+ SpillPlacer->iterate();
+ }
+ DEBUG(dbgs() << ", v=" << Visited);
+}
+
+/// calcCompactRegion - Compute the set of edge bundles that should be live
+/// when splitting the current live range into compact regions. Compact
+/// regions can be computed without looking at interference. They are the
+/// regions formed by removing all the live-through blocks from the live range.
+///
+/// Returns false if the current live range is already compact, or if the
+/// compact regions would form single block regions anyway.
+bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
+ // Without any through blocks, the live range is already compact.
+ if (!SA->getNumThroughBlocks())
+ return false;
+
+ // Compact regions don't correspond to any physreg.
+ Cand.reset(IntfCache, 0);
+
+ DEBUG(dbgs() << "Compact region bundles");
+
+ // Use the spill placer to determine the live bundles. GrowRegion pretends
+ // that all the through blocks have interference when PhysReg is unset.
+ SpillPlacer->prepare(Cand.LiveBundles);
+
+ // The static split cost will be zero since Cand.Intf reports no interference.
+ float Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ growRegion(Cand);
+ SpillPlacer->finish();
+
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ DEBUG({
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ return true;
+}
+
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+float RAGreedy::calcSpillCost() {
+ float Cost = 0;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ // We normally only need one spill instruction - a load or a store.
+ Cost += SpillPlacer->getBlockFrequency(Number);
+
+ // Unless the value is redefined in the block.
+ if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
+ Cost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return Cost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SplitConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
+ float GlobalCost = 0;
+ const BitVector &LiveBundles = Cand.LiveBundles;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+ unsigned Ins = 0;
+
+ if (BI.LiveIn)
+ Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
+ if (BI.LiveOut)
+ Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
+ if (Ins)
+ GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+ }
+
+ for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+ unsigned Number = Cand.ActiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+ if (!RegIn && !RegOut)
+ continue;
+ if (RegIn && RegOut) {
+ // We need double spill code if this block has interference.
+ Cand.Intf.moveToBlock(Number);
+ if (Cand.Intf.hasInterference())
+ GlobalCost += 2*SpillPlacer->getBlockFrequency(Number);
+ continue;
+ }
+ // live-in / stack-out or stack-in live-out.
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return GlobalCost;
+}
+
+/// splitAroundRegion - Split the current live range around the regions
+/// determined by BundleCand and GlobalCand.
+///
+/// Before calling this function, GlobalCand and BundleCand must be initialized
+/// so each bundle is assigned to a valid candidate, or NoCand for the
+/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
+/// objects must be initialized for the current live range, and intervals
+/// created for the used candidates.
+///
+/// @param LREdit The LiveRangeEdit object handling the current split.
+/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
+/// must appear in this list.
+void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
+ ArrayRef<unsigned> UsedCands) {
+ // These are the intervals created for new global ranges. We may create more
+ // intervals for local ranges.
+ const unsigned NumGlobalIntvs = LREdit.size();
+ DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n");
+ assert(NumGlobalIntvs && "No global intervals configured");
+
+ // Isolate even single instructions when dealing with a proper sub-class.
+ // That guarantees register class inflation for the stack interval because it
+ // is all copies.
+ unsigned Reg = SA->getParent().reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+
+ // First handle all the blocks with uses.
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+ if (BI.LiveIn) {
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+ }
+ if (BI.LiveOut) {
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ }
+
+ // Create separate intervals for isolated blocks with multiple uses.
+ if (!IntvIn && !IntvOut) {
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ continue;
+ }
+
+ if (IntvIn && IntvOut)
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ else if (IntvIn)
+ SE->splitRegInBlock(BI, IntvIn, IntfIn);
+ else
+ SE->splitRegOutBlock(BI, IntvOut, IntfOut);
+ }
+
+ // Handle live-through blocks. The relevant live-through blocks are stored in
+ // the ActiveBlocks list with each candidate. We need to filter out
+ // duplicates.
+ BitVector Todo = SA->getThroughBlocks();
+ for (unsigned c = 0; c != UsedCands.size(); ++c) {
+ ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ unsigned Number = Blocks[i];
+ if (!Todo.test(Number))
+ continue;
+ Todo.reset(Number);
+
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ if (!IntvIn && !IntvOut)
+ continue;
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ }
+ }
+
+ ++NumGlobalSplits;
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(Reg, LREdit.regs());
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ unsigned OrigBlocks = SA->getNumLiveBlocks();
+
+ // Sort out the new intervals created by splitting. We get four kinds:
+ // - Remainder intervals should not be split again.
+ // - Candidate intervals can be assigned to Cand.PhysReg.
+ // - Block-local splits are candidates for local splitting.
+ // - DCE leftovers should go back on the queue.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &Reg = *LREdit.get(i);
+
+ // Ignore old intervals from DCE.
+ if (getStage(Reg) != RS_New)
+ continue;
+
+ // Remainder interval. Don't try splitting again, spill if it doesn't
+ // allocate.
+ if (IntvMap[i] == 0) {
+ setStage(Reg, RS_Spill);
+ continue;
+ }
+
+ // Global intervals. Allow repeated splitting as long as the number of live
+ // blocks is strictly decreasing.
+ if (IntvMap[i] < NumGlobalIntvs) {
+ if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
+ DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+ << " blocks as original.\n");
+ // Don't allow repeated splitting as a safe guard against looping.
+ setStage(Reg, RS_Split2);
+ }
+ continue;
+ }
+
+ // Other intervals are treated as new. This includes local intervals created
+ // for blocks with multiple uses, and anything created by DCE.
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around region");
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ unsigned NumCands = 0;
+ unsigned BestCand = NoCand;
+ float BestCost;
+ SmallVector<unsigned, 8> UsedCands;
+
+ // Check if we can split this live range around a compact region.
+ bool HasCompact = calcCompactRegion(GlobalCand.front());
+ if (HasCompact) {
+ // Yes, keep GlobalCand[0] as the compact region candidate.
+ NumCands = 1;
+ BestCost = HUGE_VALF;
+ } else {
+ // No benefit from the compact region, our fallback will be per-block
+ // splitting. Make sure we find a solution that is cheaper than spilling.
+ BestCost = Hysteresis * calcSpillCost();
+ DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+ }
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Discard bad candidates before we run out of interference cache cursors.
+ // This will only affect register classes with a lot of registers (>32).
+ if (NumCands == IntfCache.getMaxCursors()) {
+ unsigned WorstCount = ~0u;
+ unsigned Worst = 0;
+ for (unsigned i = 0; i != NumCands; ++i) {
+ if (i == BestCand || !GlobalCand[i].PhysReg)
+ continue;
+ unsigned Count = GlobalCand[i].LiveBundles.count();
+ if (Count < WorstCount)
+ Worst = i, WorstCount = Count;
+ }
+ --NumCands;
+ GlobalCand[Worst] = GlobalCand[NumCands];
+ }
+
+ if (GlobalCand.size() <= NumCands)
+ GlobalCand.resize(NumCands+1);
+ GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+ Cand.reset(IntfCache, PhysReg);
+
+ SpillPlacer->prepare(Cand.LiveBundles);
+ float Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
+ continue;
+ }
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+ if (Cost >= BestCost) {
+ DEBUG({
+ if (BestCand == NoCand)
+ dbgs() << " worse than no bundles\n";
+ else
+ dbgs() << " worse than "
+ << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+ });
+ continue;
+ }
+ growRegion(Cand);
+
+ SpillPlacer->finish();
+
+ // No live bundles, defer to splitSingleBlocks().
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << " no bundles.\n");
+ continue;
+ }
+
+ Cost += calcGlobalSplitCost(Cand);
+ DEBUG({
+ dbgs() << ", total = " << Cost << " with bundles";
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ if (Cost < BestCost) {
+ BestCand = NumCands;
+ BestCost = Hysteresis * Cost; // Prevent rounding effects.
+ }
+ ++NumCands;
+ }
+
+ // No solutions found, fall back to single block splitting.
+ if (!HasCompact && BestCand == NoCand)
+ return 0;
+
+ // Prepare split editor.
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ SE->reset(LREdit, SplitSpillMode);
+
+ // Assign all edge bundles to the preferred candidate, or NoCand.
+ BundleCand.assign(Bundles->getNumBundles(), NoCand);
+
+ // Assign bundles for the best candidate region.
+ if (BestCand != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[BestCand];
+ if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
+ UsedCands.push_back(BestCand);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
+ << B << " bundles, intv " << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ // Assign bundles for the compact region.
+ if (HasCompact) {
+ GlobalSplitCandidate &Cand = GlobalCand.front();
+ assert(!Cand.PhysReg && "Compact region has no physreg");
+ if (unsigned B = Cand.getBundles(BundleCand, 0)) {
+ UsedCands.push_back(0);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv "
+ << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ splitAroundRegion(LREdit, UsedCands);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Per-Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryBlockSplit - Split a global live range around every block with uses. This
+/// creates a lot of local live ranges, that will be split by tryLocalSplit if
+/// they don't allocate.
+unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
+ unsigned Reg = VirtReg.reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ SE->reset(LREdit, SplitSpillMode);
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ }
+ // No blocks were split.
+ if (LREdit.empty())
+ return 0;
+
+ // We did split for some blocks.
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+
+ // Tell LiveDebugVariables about the new ranges.
+ DebugVars->splitRegister(Reg, LREdit.regs());
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Sort out the new intervals created by splitting. The remainder interval
+ // goes straight to spilling, the new local ranges get to stay RS_New.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &LI = *LREdit.get(i);
+ if (getStage(LI) == RS_New && IntvMap[i] == 0)
+ setStage(LI, RS_Spill);
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+ SmallVectorImpl<float> &GapWeight) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ const unsigned NumGaps = Uses.size()-1;
+
+ // Start and end points for the interference check.
+ SlotIndex StartIdx =
+ BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
+ SlotIndex StopIdx =
+ BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
+
+ GapWeight.assign(NumGaps, 0.0f);
+
+ // Add interference from each overlapping register.
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
+ .checkInterference())
+ continue;
+
+ // We know that VirtReg is a continuous interval from FirstInstr to
+ // LastInstr, so we don't need InterferenceQuery.
+ //
+ // Interference that overlaps an instruction is counted in both gaps
+ // surrounding the instruction. The exception is interference before
+ // StartIdx and after StopIdx.
+ //
+ LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+ for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+ // Skip the gaps before IntI.
+ while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ // Update the gaps covered by IntI.
+ const float weight = IntI.value()->weight;
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+ if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+
+ // Note that it is possible to have an interval that is live-in or live-out
+ // while only covering a single block - A phi-def can use undef values from
+ // predecessors, and the block could be a single-block loop.
+ // We don't bother doing anything clever about such a case, we simply assume
+ // that the interval is continuous from FirstInstr to LastInstr. We should
+ // make sure that we don't do anything illegal to such an interval, though.
+
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ if (Uses.size() <= 2)
+ return 0;
+ const unsigned NumGaps = Uses.size()-1;
+
+ DEBUG({
+ dbgs() << "tryLocalSplit: ";
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ dbgs() << ' ' << SA->UseSlots[i];
+ dbgs() << '\n';
+ });
+
+ // Since we allow local split results to be split again, there is a risk of
+ // creating infinite loops. It is tempting to require that the new live
+ // ranges have less instructions than the original. That would guarantee
+ // convergence, but it is too strict. A live range with 3 instructions can be
+ // split 2+3 (including the COPY), and we want to allow that.
+ //
+ // Instead we use these rules:
+ //
+ // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
+ // noop split, of course).
+ // 2. Require progress be made for ranges with getStage() == RS_Split2. All
+ // the new ranges must have fewer instructions than before the split.
+ // 3. New ranges with the same number of instructions are marked RS_Split2,
+ // smaller ranges are marked RS_New.
+ //
+ // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
+ // excessive splitting and infinite loops.
+ //
+ bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
+
+ // Best split candidate.
+ unsigned BestBefore = NumGaps;
+ unsigned BestAfter = 0;
+ float BestDiff = 0;
+
+ const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
+ SmallVector<float, 8> GapWeight;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Keep track of the largest spill weight that would need to be evicted in
+ // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+ calcGapWeights(PhysReg, GapWeight);
+
+ // Try to find the best sequence of gaps to close.
+ // The new spill weight must be larger than any gap interference.
+
+ // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+ unsigned SplitBefore = 0, SplitAfter = 1;
+
+ // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+ // It is the spill weight that needs to be evicted.
+ float MaxGap = GapWeight[0];
+
+ for (;;) {
+ // Live before/after split?
+ const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+ const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+ << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+ << " i=" << MaxGap);
+
+ // Stop before the interval gets so big we wouldn't be making progress.
+ if (!LiveBefore && !LiveAfter) {
+ DEBUG(dbgs() << " all\n");
+ break;
+ }
+ // Should the interval be extended or shrunk?
+ bool Shrink = true;
+
+ // How many gaps would the new range have?
+ unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
+
+ // Legally, without causing looping?
+ bool Legal = !ProgressRequired || NewGaps < NumGaps;
+
+ if (Legal && MaxGap < HUGE_VALF) {
+ // Estimate the new spill weight. Each instruction reads or writes the
+ // register. Conservatively assume there are no read-modify-write
+ // instructions.
+ //
+ // Try to guess the size of the new interval.
+ const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
+ Uses[SplitBefore].distance(Uses[SplitAfter]) +
+ (LiveBefore + LiveAfter)*SlotIndex::InstrDist);
+ // Would this split be possible to allocate?
+ // Never allocate all gaps, we wouldn't be making progress.
+ DEBUG(dbgs() << " w=" << EstWeight);
+ if (EstWeight * Hysteresis >= MaxGap) {
+ Shrink = false;
+ float Diff = EstWeight - MaxGap;
+ if (Diff > BestDiff) {
+ DEBUG(dbgs() << " (best)");
+ BestDiff = Hysteresis * Diff;
+ BestBefore = SplitBefore;
+ BestAfter = SplitAfter;
+ }
+ }
+ }
+
+ // Try to shrink.
+ if (Shrink) {
+ if (++SplitBefore < SplitAfter) {
+ DEBUG(dbgs() << " shrink\n");
+ // Recompute the max when necessary.
+ if (GapWeight[SplitBefore - 1] >= MaxGap) {
+ MaxGap = GapWeight[SplitBefore];
+ for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+ MaxGap = std::max(MaxGap, GapWeight[i]);
+ }
+ continue;
+ }
+ MaxGap = 0;
+ }
+
+ // Try to extend the interval.
+ if (SplitAfter >= NumGaps) {
+ DEBUG(dbgs() << " end\n");
+ break;
+ }
+
+ DEBUG(dbgs() << " extend\n");
+ MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
+ }
+ }
+
+ // Didn't find any candidates?
+ if (BestBefore == NumGaps)
+ return 0;
+
+ DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+ << '-' << Uses[BestAfter] << ", " << BestDiff
+ << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ SE->reset(LREdit);
+
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
+ SE->useIntv(SegStart, SegStop);
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+ // If the new range has the same number of instructions as before, mark it as
+ // RS_Split2 so the next split will be forced to make progress. Otherwise,
+ // leave the new intervals as RS_New so they can compete.
+ bool LiveBefore = BestBefore != 0 || BI.LiveIn;
+ bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
+ unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
+ if (NewGaps >= NumGaps) {
+ DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ assert(!ProgressRequired && "Didn't make progress when it was required.");
+ for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
+ if (IntvMap[i] == 1) {
+ setStage(*LREdit.get(i), RS_Split2);
+ DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ ++NumLocalSplits;
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*>&NewVRegs) {
+ // Ranges must be Split2 or less.
+ if (getStage(VirtReg) >= RS_Spill)
+ return 0;
+
+ // Local intervals are handled separately.
+ if (LIS->intervalIsInOneMBB(VirtReg)) {
+ NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+ SA->analyze(&VirtReg);
+ return tryLocalSplit(VirtReg, Order, NewVRegs);
+ }
+
+ NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+ SA->analyze(&VirtReg);
+
+ // FIXME: SplitAnalysis may repair broken live ranges coming from the
+ // coalescer. That may cause the range to become allocatable which means that
+ // tryRegionSplit won't be making progress. This check should be replaced with
+ // an assertion when the coalescer is fixed.
+ if (SA->didRepairRange()) {
+ // VirtReg has changed, so all cached queries are invalid.
+ invalidateVirtRegs();
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ return PhysReg;
+ }
+
+ // First try to split around a region spanning multiple blocks. RS_Split2
+ // ranges already made dubious progress with region splitting, so they go
+ // straight to single block splitting.
+ if (getStage(VirtReg) < RS_Split2) {
+ unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ }
+
+ // Then isolate blocks.
+ return tryBlockSplit(VirtReg, Order, NewVRegs);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // First try assigning a free register.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ LiveRangeStage Stage = getStage(VirtReg);
+ DEBUG(dbgs() << StageName[Stage]
+ << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
+
+ // Try to evict a less worthy live range, but only for ranges from the primary
+ // queue. The RS_Split ranges already failed to do this, and they should not
+ // get a second chance until they have been split.
+ if (Stage != RS_Split)
+ if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+ // The first time we see a live range, don't try to split or spill.
+ // Wait until the second time, when all smaller ranges have been allocated.
+ // This gives a better picture of the interference to split around.
+ if (Stage < RS_Split) {
+ setStage(VirtReg, RS_Split);
+ DEBUG(dbgs() << "wait for second round\n");
+ NewVRegs.push_back(&VirtReg);
+ return 0;
+ }
+
+ // If we couldn't allocate a register from spilling, there is probably some
+ // invalid inline assembly. The base class wil report it.
+ if (Stage >= RS_Done || !VirtReg.isSpillable())
+ return ~0u;
+
+ // Try splitting VirtReg or interferences.
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Finally spill VirtReg itself.
+ NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ LiveRangeEdit LRE(VirtReg, NewVRegs, this);
+ spiller().spill(LRE);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+
+ if (VerifyEnabled)
+ MF->verify(this, "After spilling");
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)mf.getFunction())->getName() << '\n');
+
+ MF = &mf;
+ if (VerifyEnabled)
+ MF->verify(this, "Before greedy register allocator");
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+ Loops = &getAnalysis<MachineLoopInfo>();
+ Bundles = &getAnalysis<EdgeBundles>();
+ SpillPlacer = &getAnalysis<SpillPlacement>();
+ DebugVars = &getAnalysis<LiveDebugVariables>();
+
+ SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+ SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
+ ExtraRegInfo.clear();
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ NextCascade = 1;
+ IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
+ GlobalCand.resize(32); // This will grow as needed.
+
+ allocatePhysRegs();
+ addMBBLiveIns(MF);
+ LIS->addKillFlags();
+
+ // Run rewriter
+ {
+ NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
+ VRM->rewrite(Indexes);
+ }
+
+ // Write out new DBG_VALUE instructions.
+ {
+ NamedRegionTimer T("Emit Debug Info", TimerGroupName, TimePassesIsEnabled);
+ DebugVars->emitDebugValues(VRM);
+ }
+
+ // The pass output is in VirtRegMap. Release all the transient data.
+ releaseMemory();
+
+ return true;
+}
diff --git a/src/LLVM/lib/CodeGen/RegAllocLinearScan.cpp b/src/LLVM/lib/CodeGen/RegAllocLinearScan.cpp
index c9c51b3..ce3fb90 100644
--- a/src/LLVM/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/src/LLVM/lib/CodeGen/RegAllocLinearScan.cpp
@@ -12,20 +12,22 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
+#include "LiveDebugVariables.h"
+#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "VirtRegRewriter.h"
+#include "RegisterClassInfo.h"
#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -38,7 +40,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <set>
#include <queue>
#include <memory>
#include <cmath>
@@ -56,15 +57,15 @@
cl::init(false), cl::Hidden);
static cl::opt<bool>
-PreSplitIntervals("pre-alloc-split",
- cl::desc("Pre-register allocation live interval splitting"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
TrivCoalesceEnds("trivial-coalesce-ends",
cl::desc("Attempt trivial coalescing of interval ends"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+AvoidWAWHazard("avoid-waw-hazard",
+ cl::desc("Avoid write-write hazards for some register classes"),
+ cl::init(false), cl::Hidden);
+
static RegisterRegAlloc
linearscanRegAlloc("linearscan", "linear scan register allocator",
createLinearScanRegisterAllocator);
@@ -87,14 +88,27 @@
"to skip."),
cl::init(0),
cl::Hidden);
-
+
struct RALinScan : public MachineFunctionPass {
static char ID;
RALinScan() : MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(
+ *PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+
// Initialize the queue to record recently-used registers.
if (NumRecentlyUsedRegs > 0)
RecentRegs.resize(NumRecentlyUsedRegs, 0);
RecentNext = RecentRegs.begin();
+ avoidWAW_ = 0;
}
typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
@@ -125,9 +139,10 @@
const TargetRegisterInfo* tri_;
const TargetInstrInfo* tii_;
BitVector allocatableRegs_;
+ BitVector reservedRegs_;
LiveIntervals* li_;
- LiveStacks* ls_;
MachineLoopInfo *loopInfo;
+ RegisterClassInfo RegClassInfo;
/// handled_ - Intervals are added to the handled_ set in the order of their
/// start value. This is uses for backtracking.
@@ -165,6 +180,9 @@
SmallVector<unsigned, 4> RecentRegs;
SmallVector<unsigned, 4>::iterator RecentNext;
+ // Last write-after-write register written.
+ unsigned avoidWAW_;
+
// Record that we just picked this register.
void recordRecentlyUsed(unsigned reg) {
assert(reg != 0 && "Recently used register is NOREG!");
@@ -182,22 +200,25 @@
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
if (StrongPHIElim)
AU.addRequiredID(StrongPHIEliminationID);
// Make sure PassManager knows which analyses to make available
// to coalescing and which analyses coalescing invalidates.
- AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequiredTransitiveID(RegisterCoalescerPassID);
AU.addRequired<CalculateSpillWeights>();
- if (PreSplitIntervals)
- AU.addRequiredID(PreAllocSplittingID);
- AU.addRequired<LiveStacks>();
- AU.addPreserved<LiveStacks>();
+ AU.addRequiredID(LiveStacksID);
+ AU.addPreservedID(LiveStacksID);
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<VirtRegMap>();
AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequiredID(MachineDominatorsID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -207,8 +228,8 @@
// Determine if we skip this register due to its being recently used.
bool isRecentlyUsed(unsigned reg) const {
- return std::find(RecentRegs.begin(), RecentRegs.end(), reg) !=
- RecentRegs.end();
+ return reg == avoidWAW_ ||
+ std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
}
private:
@@ -335,6 +356,14 @@
SmallVector<unsigned, 256> &inactiveCounts,
bool SkipDGRegs);
+ /// getFirstNonReservedPhysReg - return the first non-reserved physical
+ /// register in the register class.
+ unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
+ ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
+ assert(!O.empty() && "All registers reserved?!");
+ return O.front();
+ }
+
void ComputeRelatedRegClasses();
template <typename ItTy>
@@ -344,7 +373,7 @@
dbgs() << str << " intervals:\n";
for (; i != e; ++i) {
- dbgs() << "\t" << *i->first << " -> ";
+ dbgs() << '\t' << *i->first << " -> ";
unsigned reg = i->first->reg;
if (TargetRegisterInfo::isVirtualRegister(reg))
@@ -358,8 +387,18 @@
char RALinScan::ID = 0;
}
-INITIALIZE_PASS(RALinScan, "linearscan-regalloc",
- "Linear Scan Register Allocator", false, false);
+INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false)
void RALinScan::ComputeRelatedRegClasses() {
// First pass, add all reg classes to the union, and determine at least one
@@ -371,7 +410,7 @@
for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
I != E; ++I) {
HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
-
+
const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
if (PRC) {
// Already processed this register. Just make sure we know that
@@ -382,7 +421,7 @@
}
}
}
-
+
// Second pass, now that we know conservatively what register classes each reg
// belongs to, add info about aliases. We don't need to do this for targets
// without register aliases.
@@ -390,8 +429,12 @@
for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
I != E; ++I)
- for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
- RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+ for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
+ const TargetRegisterClass *AliasClass =
+ OneClassForEachPhysReg.lookup(*AS);
+ if (AliasClass)
+ RelatedRegClasses.unionSets(I->second, AliasClass);
+ }
}
/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
@@ -413,14 +456,13 @@
const LiveRange &range = cur.ranges.front();
VNInfo *vni = range.valno;
- if (vni->isUnused())
+ if (vni->isUnused() || !vni->def.isValid())
return Reg;
unsigned CandReg;
{
MachineInstr *CopyMI;
- if (vni->def != SlotIndex() && vni->isDefAccurate() &&
- (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
+ if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
// Defined by a copy, try to extend SrcReg forward
CandReg = CopyMI->getOperand(1).getReg();
else if (TrivCoalesceEnds &&
@@ -430,6 +472,10 @@
CandReg = CopyMI->getOperand(0).getReg();
else
return Reg;
+
+ // If the target of the copy is a sub-register then don't coalesce.
+ if(CopyMI->getOperand(0).getSubReg())
+ return Reg;
}
if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
@@ -464,9 +510,10 @@
tri_ = tm_->getRegisterInfo();
tii_ = tm_->getInstrInfo();
allocatableRegs_ = tri_->getAllocatableSet(fn);
+ reservedRegs_ = tri_->getReservedRegs(fn);
li_ = &getAnalysis<LiveIntervals>();
- ls_ = &getAnalysis<LiveStacks>();
loopInfo = &getAnalysis<MachineLoopInfo>();
+ RegClassInfo.runOnMachineFunction(fn);
// We don't run the coalescer here because we have no reason to
// interact with it. If the coalescer requires interaction, it
@@ -482,9 +529,9 @@
vrm_ = &getAnalysis<VirtRegMap>();
if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
-
+
spiller_.reset(createSpiller(*this, *mf_, *vrm_));
-
+
initIntervalSets();
linearScan();
@@ -492,6 +539,9 @@
// Rewrite spill code and update the PhysRegsUsed set.
rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
+
assert(unhandled_.empty() && "Unhandled live intervals remain!");
finalizeRegUses();
@@ -520,7 +570,7 @@
for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
- if (!i->second->empty()) {
+ if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) {
mri_->setPhysRegUsed(i->second->reg);
fixed_.push_back(std::make_pair(i->second, i->second->begin()));
}
@@ -538,7 +588,7 @@
// linear scan algorithm
DEBUG({
dbgs() << "********** LINEAR SCAN **********\n"
- << "********** Function: "
+ << "********** Function: "
<< mf_->getFunction()->getName() << '\n';
printIntervals("fixed", fixed_.begin(), fixed_.end());
});
@@ -625,8 +675,6 @@
// Look for physical registers that end up not being allocated even though
// register allocator had to spill other registers in its register class.
- if (ls_->getNumIntervals() == 0)
- return;
if (!vrm_->FindUnusedRegisters(li_))
return;
}
@@ -742,7 +790,7 @@
// register class we are trying to allocate. Then add the weight to all
// sub-registers of the super-register even if they are not aliases.
// e.g. allocating for GR32, bh is not used, updating bl spill weight.
- // bl should get the same spill weight otherwise it will be choosen
+ // bl should get the same spill weight otherwise it will be chosen
// as a spill candidate since spilling bh doesn't make ebx available.
for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
@@ -760,7 +808,8 @@
return IP.end();
}
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, SlotIndex Point){
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
+ SlotIndex Point){
for (unsigned i = 0, e = V.size(); i != e; ++i) {
RALinScan::IntervalPtr &IP = V[i];
LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
@@ -770,30 +819,6 @@
}
}
-/// addStackInterval - Create a LiveInterval for stack if the specified live
-/// interval has been spilled.
-static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
- LiveIntervals *li_,
- MachineRegisterInfo* mri_, VirtRegMap &vrm_) {
- int SS = vrm_.getStackSlot(cur->reg);
- if (SS == VirtRegMap::NO_STACK_SLOT)
- return;
-
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
- LiveInterval &SI = ls_->getOrCreateInterval(SS, RC);
-
- VNInfo *VNI;
- if (SI.hasAtLeastOneValue())
- VNI = SI.getValNumInfo(0);
- else
- VNI = SI.getNextValue(SlotIndex(), 0, false,
- ls_->getVNInfoAllocator());
-
- LiveInterval &RI = li_->getInterval(cur->reg);
- // FIXME: This may be overly conservative.
- SI.MergeRangesInAsValue(RI, VNI);
-}
-
/// getConflictWeight - Return the number of conflicts between cur
/// live interval and defs and uses of Reg weighted by loop depthes.
static
@@ -832,7 +857,7 @@
dbgs() << tri_->getName(Candidates[i].first) << " ";
dbgs() << "\n";
});
-
+
// Calculate the number of conflicts of each candidate.
for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
unsigned Reg = i->first->reg;
@@ -911,13 +936,9 @@
}
void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
- bool isNew = DowngradedRegs.insert(Reg);
- isNew = isNew; // Silence compiler warning.
- assert(isNew && "Multiple reloads holding the same register?");
- DowngradeMap.insert(std::make_pair(li->reg, Reg));
- for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
- isNew = DowngradedRegs.insert(*AS);
- isNew = isNew; // Silence compiler warning.
+ for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
+ bool isNew = DowngradedRegs.insert(*AS);
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Multiple reloads holding the same register?");
DowngradeMap.insert(std::make_pair(li->reg, *AS));
}
@@ -943,14 +964,15 @@
/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
/// spill.
void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
- DEBUG(dbgs() << "\tallocating current interval: ");
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ DEBUG(dbgs() << "\tallocating current interval from "
+ << RC->getName() << ": ");
// This is an implicitly defined live interval, just assign any register.
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
if (cur->empty()) {
unsigned physReg = vrm_->getRegAllocPref(cur->reg);
if (!physReg)
- physReg = *RC->allocation_order_begin(*mf_);
+ physReg = getFirstNonReservedPhysReg(RC);
DEBUG(dbgs() << tri_->getName(physReg) << '\n');
// Note the register is not really in use.
vrm_->assignVirt2Phys(cur->reg, physReg);
@@ -970,8 +992,7 @@
// one, e.g. X86::mov32to32_. These move instructions are not coalescable.
if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
VNInfo *vni = cur->begin()->valno;
- if ((vni->def != SlotIndex()) && !vni->isUnused() &&
- vni->isDefAccurate()) {
+ if (!vni->isUnused() && vni->def.isValid()) {
MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
if (CopyMI && CopyMI->isCopy()) {
unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
@@ -1002,7 +1023,7 @@
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Can only allocate virtual registers!");
const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
- // If this is not in a related reg class to the register we're allocating,
+ // If this is not in a related reg class to the register we're allocating,
// don't check it.
if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
cur->overlapsFrom(*i->first, i->second-1)) {
@@ -1011,7 +1032,7 @@
SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
}
}
-
+
// Speculatively check to see if we can get a register right now. If not,
// we know we won't be able to by adding more constraints. If so, we can
// check to see if it is valid. Doing an exhaustive search of the fixed_ list
@@ -1026,7 +1047,7 @@
SmallSet<unsigned, 8> RegAliases;
for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
RegAliases.insert(*AS);
-
+
bool ConflictsWithFixed = false;
for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
IntervalPtr &IP = fixed_[i];
@@ -1046,7 +1067,7 @@
}
}
}
-
+
// Okay, the register picked by our speculative getFreePhysReg call turned
// out to be in use. Actually add all of the conflicting fixed registers to
// regUse_ so we can do an accurate query.
@@ -1058,7 +1079,7 @@
LiveInterval *I = IP.first;
const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
- if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
I->endIndex() > StartPosition) {
LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
IP.second = II;
@@ -1077,26 +1098,33 @@
physReg = getFreePhysReg(cur);
}
}
-
+
// Restore the physical register tracker, removing information about the
// future.
restoreRegUses();
-
+
// If we find a free register, we are done: assign this virtual to
// the free physical register and add this interval to the active
// list.
if (physReg) {
DEBUG(dbgs() << tri_->getName(physReg) << '\n');
+ assert(RC->contains(physReg) && "Invalid candidate");
vrm_->assignVirt2Phys(cur->reg, physReg);
addRegUse(physReg);
active_.push_back(std::make_pair(cur, cur->begin()));
handled_.push_back(cur);
+ // Remember physReg for avoiding a write-after-write hazard in the next
+ // instruction.
+ if (AvoidWAWHazard &&
+ tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
+ avoidWAW_ = physReg;
+
// "Upgrade" the physical register since it has been allocated.
UpgradeRegister(physReg);
if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
// "Downgrade" physReg to try to keep physReg from being allocated until
- // the next reload from the same SS is allocated.
+ // the next reload from the same SS is allocated.
mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
DowngradeRegister(cur, physReg);
}
@@ -1109,7 +1137,7 @@
for (std::vector<std::pair<unsigned, float> >::iterator
I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
updateSpillWeights(SpillWeights, I->first, I->second, RC);
-
+
// for each interval in active, update spill weights.
for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
i != e; ++i) {
@@ -1119,7 +1147,7 @@
reg = vrm_->getPhys(reg);
updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
}
-
+
DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
// Find a register to spill.
@@ -1128,22 +1156,21 @@
bool Found = false;
std::vector<std::pair<unsigned,float> > RegsWeights;
+ ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
if (!minReg || SpillWeights[minReg] == HUGE_VALF)
- for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
- e = RC->allocation_order_end(*mf_); i != e; ++i) {
- unsigned reg = *i;
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned reg = Order[i];
float regWeight = SpillWeights[reg];
- // Skip recently allocated registers.
+ // Skip recently allocated registers and reserved registers.
if (minWeight > regWeight && !isRecentlyUsed(reg))
Found = true;
RegsWeights.push_back(std::make_pair(reg, regWeight));
}
-
+
// If we didn't find a register that is spillable, try aliases?
if (!Found) {
- for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
- e = RC->allocation_order_end(*mf_); i != e; ++i) {
- unsigned reg = *i;
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned reg = Order[i];
// No need to worry about if the alias register size < regsize of RC.
// We are going to spill all registers that alias it anyway.
for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
@@ -1157,7 +1184,7 @@
minWeight = RegsWeights[0].second;
if (minWeight == HUGE_VALF) {
// All registers must have inf weight. Just grab one!
- minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_);
+ minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
if (cur->weight == HUGE_VALF ||
li_->getApproximateInstructionCount(*cur) == 0) {
// Spill a physical register around defs and uses.
@@ -1202,12 +1229,11 @@
// linearscan.
if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
- SmallVector<LiveInterval*, 8> spillIs;
- std::vector<LiveInterval*> added;
- spiller_->spill(cur, added, spillIs);
+ SmallVector<LiveInterval*, 8> added;
+ LiveRangeEdit LRE(*cur, added);
+ spiller_->spill(LRE);
std::sort(added.begin(), added.end(), LISorter());
- addStackInterval(cur, ls_, li_, mri_, *vrm_);
if (added.empty())
return; // Early exit if all spills were folded.
@@ -1266,27 +1292,33 @@
// The earliest start of a Spilled interval indicates up to where
// in handled we need to roll back
- assert(!spillIs.empty() && "No spill intervals?");
+ assert(!spillIs.empty() && "No spill intervals?");
SlotIndex earliestStart = spillIs[0]->beginIndex();
-
+
// Spill live intervals of virtual regs mapped to the physical register we
// want to clear (and its aliases). We only spill those that overlap with the
// current interval as the rest do not affect its allocation. we also keep
// track of the earliest start of all spilled live intervals since this will
// mark our rollback point.
- std::vector<LiveInterval*> added;
+ SmallVector<LiveInterval*, 8> added;
while (!spillIs.empty()) {
LiveInterval *sli = spillIs.back();
spillIs.pop_back();
DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
if (sli->beginIndex() < earliestStart)
earliestStart = sli->beginIndex();
-
- spiller_->spill(sli, added, spillIs, &earliestStart);
- addStackInterval(sli, ls_, li_, mri_, *vrm_);
+ LiveRangeEdit LRE(*sli, added, 0, &spillIs);
+ spiller_->spill(LRE);
spilled.insert(sli->reg);
}
+ // Include any added intervals in earliestStart.
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ SlotIndex SI = added[i]->beginIndex();
+ if (SI < earliestStart)
+ earliestStart = SI;
+ }
+
DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
// Scan handled in reverse order up to the earliest start of a
@@ -1395,22 +1427,28 @@
std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
// Resolve second part of the hint (if possible) given the current allocation.
unsigned physReg = Hint.second;
- if (physReg &&
- TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+ if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
physReg = vrm_->getPhys(physReg);
- TargetRegisterClass::iterator I, E;
- tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_);
- assert(I != E && "No allocatable register in this register class!");
+ ArrayRef<unsigned> Order;
+ if (Hint.first)
+ Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
+ else
+ Order = RegClassInfo.getOrder(RC);
+
+ assert(!Order.empty() && "No allocatable register in this register class!");
// Scan for the first available register.
- for (; I != E; ++I) {
- unsigned Reg = *I;
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned Reg = Order[i];
// Ignore "downgraded" registers.
if (SkipDGRegs && DowngradedRegs.count(Reg))
continue;
+ // Skip reserved registers.
+ if (reservedRegs_.test(Reg))
+ continue;
// Skip recently allocated registers.
- if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
+ if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
FreeReg = Reg;
if (FreeReg < inactiveCounts.size())
FreeRegInactiveCount = inactiveCounts[FreeReg];
@@ -1432,13 +1470,17 @@
// inactive count. Alkis found that this reduced register pressure very
// slightly on X86 (in rev 1.94 of this file), though this should probably be
// reevaluated now.
- for (; I != E; ++I) {
- unsigned Reg = *I;
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned Reg = Order[i];
// Ignore "downgraded" registers.
if (SkipDGRegs && DowngradedRegs.count(Reg))
continue;
+ // Skip reserved registers.
+ if (reservedRegs_.test(Reg))
+ continue;
if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
- FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
+ FreeRegInactiveCount < inactiveCounts[Reg] &&
+ (!SkipDGRegs || !isRecentlyUsed(Reg))) {
FreeReg = Reg;
FreeRegInactiveCount = inactiveCounts[Reg];
if (FreeRegInactiveCount == MaxInactiveCount)
@@ -1457,17 +1499,17 @@
unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
SmallVector<unsigned, 256> inactiveCounts;
unsigned MaxInactiveCount = 0;
-
+
const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
+
for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
i != e; ++i) {
unsigned reg = i->first->reg;
assert(TargetRegisterInfo::isVirtualRegister(reg) &&
"Can only allocate virtual registers!");
- // If this is not in a related reg class to the register we're allocating,
+ // If this is not in a related reg class to the register we're allocating,
// don't check it.
const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
@@ -1484,17 +1526,15 @@
unsigned Preference = vrm_->getRegAllocPref(cur->reg);
if (Preference) {
DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
- if (isRegAvail(Preference) &&
+ if (isRegAvail(Preference) &&
RC->contains(Preference))
return Preference;
}
- if (!DowngradedRegs.empty()) {
- unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
- true);
- if (FreeReg)
- return FreeReg;
- }
+ unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
+ true);
+ if (FreeReg)
+ return FreeReg;
return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
}
diff --git a/src/LLVM/lib/CodeGen/RegAllocPBQP.cpp b/src/LLVM/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 0000000..0d2cf2d
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,723 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+// architectures. In Proceedings of the Joint Conference on Languages,
+// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+// NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+
+#include "RenderMachineFunction.h"
+#include "Splitter.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "RegisterCoalescer.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <limits>
+#include <memory>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+static RegisterRegAlloc
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+ createDefaultPBQPRegisterAllocator);
+
+static cl::opt<bool>
+pbqpCoalescing("pbqp-coalescing",
+ cl::desc("Attempt coalescing during PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+pbqpPreSplitting("pbqp-pre-splitting",
+ cl::desc("Pre-split before PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+ static char ID;
+
+ /// Construct a PBQP register allocator.
+ RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
+ : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "PBQP Register Allocator";
+ }
+
+ /// PBQP analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ /// Perform register allocation
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+ typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+ typedef std::set<unsigned> RegSet;
+
+
+ std::auto_ptr<PBQPBuilder> builder;
+
+ char *customPassID;
+
+ MachineFunction *mf;
+ const TargetMachine *tm;
+ const TargetRegisterInfo *tri;
+ const TargetInstrInfo *tii;
+ const MachineLoopInfo *loopInfo;
+ MachineRegisterInfo *mri;
+ RenderMachineFunction *rmf;
+
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ VirtRegMap *vrm;
+
+ RegSet vregsToAlloc, emptyIntervalVRegs;
+
+ /// \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc();
+
+ /// \brief Adds a stack interval if the given live interval has been
+ /// spilled. Used to support stack slot coloring.
+ void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
+
+ /// \brief Given a solved PBQP problem maps this solution back to a register
+ /// assignment.
+ bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution);
+
+ /// \brief Postprocessing before final spilling. Sets basic block "live in"
+ /// variables.
+ void finalizeAlloc() const;
+
+};
+
+char RegAllocPBQP::ID = 0;
+
+} // End anonymous namespace.
+
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+ Node2VReg::const_iterator vregItr = node2VReg.find(node);
+ assert(vregItr != node2VReg.end() && "No vreg for node.");
+ return vregItr->second;
+}
+
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+ VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+ assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+ return nodeItr->second;
+
+}
+
+const PBQPRAProblem::AllowedSet&
+ PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+ AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+ assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+ const AllowedSet &allowedSet = allowedSetItr->second;
+ return allowedSet;
+}
+
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+ assert(isPRegOption(vreg, option) && "Not a preg option.");
+
+ const AllowedSet& allowedSet = getAllowedSet(vreg);
+ assert(option <= allowedSet.size() && "Option outside allowed set.");
+ return allowedSet[option - 1];
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
+
+ typedef std::vector<const LiveInterval*> LIVector;
+
+ MachineRegisterInfo *mri = &mf->getRegInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+
+ std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+ PBQP::Graph &g = p->getGraph();
+ RegSet pregs;
+
+ // Collect the set of preg intervals, record that they're used in the MF.
+ for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+ pregs.insert(itr->first);
+ mri->setPhysRegUsed(itr->first);
+ }
+ }
+
+ BitVector reservedRegs = tri->getReservedRegs(*mf);
+
+ // Iterate over vregs.
+ for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+ vregItr != vregEnd; ++vregItr) {
+ unsigned vreg = *vregItr;
+ const TargetRegisterClass *trc = mri->getRegClass(vreg);
+ const LiveInterval *vregLI = &lis->getInterval(vreg);
+
+ // Compute an initial allowed set for the current vreg.
+ typedef std::vector<unsigned> VRAllowed;
+ VRAllowed vrAllowed;
+ ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+ for (unsigned i = 0; i != rawOrder.size(); ++i) {
+ unsigned preg = rawOrder[i];
+ if (!reservedRegs.test(preg)) {
+ vrAllowed.push_back(preg);
+ }
+ }
+
+ // Remove any physical registers which overlap.
+ for (RegSet::const_iterator pregItr = pregs.begin(),
+ pregEnd = pregs.end();
+ pregItr != pregEnd; ++pregItr) {
+ unsigned preg = *pregItr;
+ const LiveInterval *pregLI = &lis->getInterval(preg);
+
+ if (pregLI->empty()) {
+ continue;
+ }
+
+ if (!vregLI->overlaps(*pregLI)) {
+ continue;
+ }
+
+ // Remove the register from the allowed set.
+ VRAllowed::iterator eraseItr =
+ std::find(vrAllowed.begin(), vrAllowed.end(), preg);
+
+ if (eraseItr != vrAllowed.end()) {
+ vrAllowed.erase(eraseItr);
+ }
+
+ // Also remove any aliases.
+ const unsigned *aliasItr = tri->getAliasSet(preg);
+ if (aliasItr != 0) {
+ for (; *aliasItr != 0; ++aliasItr) {
+ VRAllowed::iterator eraseItr =
+ std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
+
+ if (eraseItr != vrAllowed.end()) {
+ vrAllowed.erase(eraseItr);
+ }
+ }
+ }
+ }
+
+ // Construct the node.
+ PBQP::Graph::NodeItr node =
+ g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
+
+ // Record the mapping and allowed set in the problem.
+ p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
+
+ PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+ vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+
+ addSpillCosts(g.getNodeCosts(node), spillCost);
+ }
+
+ for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+ vr1Itr != vrEnd; ++vr1Itr) {
+ unsigned vr1 = *vr1Itr;
+ const LiveInterval &l1 = lis->getInterval(vr1);
+ const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+ for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+ vr2Itr != vrEnd; ++vr2Itr) {
+ unsigned vr2 = *vr2Itr;
+ const LiveInterval &l2 = lis->getInterval(vr2);
+ const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+ assert(!l2.empty() && "Empty interval in vreg set?");
+ if (l1.overlaps(l2)) {
+ PBQP::Graph::EdgeItr edge =
+ g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+ PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+ addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
+ }
+ }
+ }
+
+ return p;
+}
+
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+ PBQP::PBQPNum spillCost) {
+ costVec[0] = spillCost;
+}
+
+void PBQPBuilder::addInterferenceCosts(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ const TargetRegisterInfo *tri) {
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
+
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (tri->regsOverlap(preg1, preg2)) {
+ costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ }
+ }
+ }
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+ MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
+
+ std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+ PBQP::Graph &g = p->getGraph();
+
+ const TargetMachine &tm = mf->getTarget();
+ CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
+
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (MachineFunction::const_iterator mbbItr = mf->begin(),
+ mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ const MachineBasicBlock *mbb = &*mbbItr;
+
+ for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+ miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ const MachineInstr *mi = &*miItr;
+
+ if (!cp.setRegisters(mi)) {
+ continue; // Not coalescable.
+ }
+
+ if (cp.getSrcReg() == cp.getDstReg()) {
+ continue; // Already coalesced.
+ }
+
+ unsigned dst = cp.getDstReg(),
+ src = cp.getSrcReg();
+
+ const float copyFactor = 0.5; // Cost of copy relative to load. Current
+ // value plucked randomly out of the air.
+
+ PBQP::PBQPNum cBenefit =
+ copyFactor * LiveIntervals::getSpillWeight(false, true,
+ loopInfo->getLoopDepth(mbb));
+
+ if (cp.isPhys()) {
+ if (!lis->isAllocatable(dst)) {
+ continue;
+ }
+
+ const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+ unsigned pregOpt = 0;
+ while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+ ++pregOpt;
+ }
+ if (pregOpt < allowed.size()) {
+ ++pregOpt; // +1 to account for spill option.
+ PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+ addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+ }
+ } else {
+ const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+ const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+ PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+ PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+ PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+ if (edge == g.edgesEnd()) {
+ edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+ allowed2->size() + 1,
+ 0));
+ } else {
+ if (g.getEdgeNode1(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(allowed1, allowed2);
+ }
+ }
+
+ addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+ cBenefit);
+ }
+ }
+ }
+
+ return p;
+}
+
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+ unsigned pregOption,
+ PBQP::PBQPNum benefit) {
+ costVec[pregOption] += -benefit;
+}
+
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ PBQP::PBQPNum benefit) {
+
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
+
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (preg1 == preg2) {
+ costMat[i + 1][j + 1] += -benefit;
+ }
+ }
+ }
+}
+
+
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ //au.addRequiredID(SplitCriticalEdgesID);
+ au.addRequiredID(RegisterCoalescerPassID);
+ if (customPassID)
+ au.addRequiredID(*customPassID);
+ au.addRequired<CalculateSpillWeights>();
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ if (pbqpPreSplitting)
+ au.addRequired<LoopSplitter>();
+ au.addRequired<VirtRegMap>();
+ au.addRequired<RenderMachineFunction>();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
+
+ // Iterate over all live ranges.
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ // Ignore physical ones.
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+ continue;
+
+ LiveInterval *li = itr->second;
+
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!li->empty()) {
+ vregsToAlloc.insert(li->reg);
+ } else {
+ emptyIntervalVRegs.insert(li->reg);
+ }
+ }
+}
+
+void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
+ MachineRegisterInfo* mri) {
+ int stackSlot = vrm->getStackSlot(spilled->reg);
+
+ if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
+ return;
+ }
+
+ const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
+ LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
+
+ VNInfo *vni;
+ if (stackInterval.getNumValNums() != 0) {
+ vni = stackInterval.getValNumInfo(0);
+ } else {
+ vni = stackInterval.getNextValue(
+ SlotIndex(), 0, lss->getVNInfoAllocator());
+ }
+
+ LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
+ stackInterval.MergeRangesInAsValue(rhsInterval, vni);
+}
+
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution) {
+ // Set to true if we have any spills
+ bool anotherRoundNeeded = false;
+
+ // Clear the existing allocation.
+ vrm->clearAllVirt();
+
+ const PBQP::Graph &g = problem.getGraph();
+ // Iterate over the nodes mapping the PBQP solution to a register
+ // assignment.
+ for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+ nodeEnd = g.nodesEnd();
+ node != nodeEnd; ++node) {
+ unsigned vreg = problem.getVRegForNode(node);
+ unsigned alloc = solution.getSelection(node);
+
+ if (problem.isPRegOption(vreg, alloc)) {
+ unsigned preg = problem.getPRegForOption(vreg, alloc);
+ DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+ assert(preg != 0 && "Invalid preg selected.");
+ vrm->assignVirt2Phys(vreg, preg);
+ } else if (problem.isSpillOption(vreg, alloc)) {
+ vregsToAlloc.erase(vreg);
+ const LiveInterval* spillInterval = &lis->getInterval(vreg);
+ double oldWeight = spillInterval->weight;
+ rmf->rememberUseDefs(spillInterval);
+ std::vector<LiveInterval*> newSpills =
+ lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm);
+ addStackInterval(spillInterval, mri);
+ rmf->rememberSpills(spillInterval, newSpills);
+
+ (void) oldWeight;
+ DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+ << oldWeight << ", New vregs: ");
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (std::vector<LiveInterval*>::const_iterator
+ itr = newSpills.begin(), end = newSpills.end();
+ itr != end; ++itr) {
+ assert(!(*itr)->empty() && "Empty spill range.");
+ DEBUG(dbgs() << (*itr)->reg << " ");
+ vregsToAlloc.insert((*itr)->reg);
+ }
+
+ DEBUG(dbgs() << ")\n");
+
+ // We need another round if spill intervals were added.
+ anotherRoundNeeded |= !newSpills.empty();
+ } else {
+ assert(false && "Unknown allocation option.");
+ }
+ }
+
+ return !anotherRoundNeeded;
+}
+
+
+void RegAllocPBQP::finalizeAlloc() const {
+ typedef LiveIntervals::iterator LIIterator;
+ typedef LiveInterval::Ranges::const_iterator LRIterator;
+
+ // First allocate registers for the empty intervals.
+ for (RegSet::const_iterator
+ itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
+ itr != end; ++itr) {
+ LiveInterval *li = &lis->getInterval(*itr);
+
+ unsigned physReg = vrm->getRegAllocPref(li->reg);
+
+ if (physReg == 0) {
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+ physReg = liRC->getRawAllocationOrder(*mf).front();
+ }
+
+ vrm->assignVirt2Phys(li->reg, physReg);
+ }
+
+ // Finally iterate over the basic blocks to compute and set the live-in sets.
+ SmallVector<MachineBasicBlock*, 8> liveInMBBs;
+ MachineBasicBlock *entryMBB = &*mf->begin();
+
+ for (LIIterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = liItr->second;
+ unsigned reg = 0;
+
+ // Get the physical register for this interval
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
+ reg = li->reg;
+ } else if (vrm->isAssignedReg(li->reg)) {
+ reg = vrm->getPhys(li->reg);
+ } else {
+ // Ranges which are assigned a stack slot only are ignored.
+ continue;
+ }
+
+ if (reg == 0) {
+ // Filter out zero regs - they're for intervals that were spilled.
+ continue;
+ }
+
+ // Iterate over the ranges of the current interval...
+ for (LRIterator lrItr = li->begin(), lrEnd = li->end();
+ lrItr != lrEnd; ++lrItr) {
+
+ // Find the set of basic blocks which this range is live into...
+ if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) {
+ // And add the physreg for this interval to their live-in sets.
+ for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
+ if (liveInMBBs[i] != entryMBB) {
+ if (!liveInMBBs[i]->isLiveIn(reg)) {
+ liveInMBBs[i]->addLiveIn(reg);
+ }
+ }
+ }
+ liveInMBBs.clear();
+ }
+ }
+ }
+
+}
+
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+
+ mf = &MF;
+ tm = &mf->getTarget();
+ tri = tm->getRegisterInfo();
+ tii = tm->getInstrInfo();
+ mri = &mf->getRegInfo();
+
+ lis = &getAnalysis<LiveIntervals>();
+ lss = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+ rmf = &getAnalysis<RenderMachineFunction>();
+
+ vrm = &getAnalysis<VirtRegMap>();
+
+
+ DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
+
+ // Allocator main loop:
+ //
+ // * Map current regalloc problem to a PBQP problem
+ // * Solve the PBQP problem
+ // * Map the solution back to a register allocation
+ // * Spill if necessary
+ //
+ // This process is continued till no more spills are generated.
+
+ // Find the vreg intervals in need of allocation.
+ findVRegIntervalsToAlloc();
+
+ // If there are non-empty intervals allocate them using pbqp.
+ if (!vregsToAlloc.empty()) {
+
+ bool pbqpAllocComplete = false;
+ unsigned round = 0;
+
+ while (!pbqpAllocComplete) {
+ DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
+
+ std::auto_ptr<PBQPRAProblem> problem =
+ builder->build(mf, lis, loopInfo, vregsToAlloc);
+ PBQP::Solution solution =
+ PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+ problem->getGraph());
+
+ pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
+
+ ++round;
+ }
+ }
+
+ // Finalise allocation, allocate empty ranges.
+ finalizeAlloc();
+
+ rmf->renderMachineFunction("After PBQP register allocation.", vrm);
+
+ vregsToAlloc.clear();
+ emptyIntervalVRegs.clear();
+
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+
+ // Run rewriter
+ std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+
+ rewriter->runOnMachineFunction(*mf, *vrm, lis);
+
+ return true;
+}
+
+FunctionPass* llvm::createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder> builder,
+ char *customPassID) {
+ return new RegAllocPBQP(builder, customPassID);
+}
+
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+ if (pbqpCoalescing) {
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+ } // else
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
+
+#undef DEBUG_TYPE
diff --git a/src/LLVM/lib/CodeGen/RegisterClassInfo.cpp b/src/LLVM/lib/CodeGen/RegisterClassInfo.cpp
new file mode 100644
index 0000000..786d279
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/RegisterClassInfo.cpp
@@ -0,0 +1,117 @@
+//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
+{}
+
+void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
+ bool Update = false;
+ MF = &mf;
+
+ // Allocate new array the first time we see a new target.
+ if (MF->getTarget().getRegisterInfo() != TRI) {
+ TRI = MF->getTarget().getRegisterInfo();
+ RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
+ Update = true;
+ }
+
+ // Does this MF have different CSRs?
+ const unsigned *CSR = TRI->getCalleeSavedRegs(MF);
+ if (Update || CSR != CalleeSaved) {
+ // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+ // overlapping CSR.
+ CSRNum.clear();
+ CSRNum.resize(TRI->getNumRegs(), 0);
+ for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
+ for (const unsigned *AS = TRI->getOverlaps(Reg);
+ unsigned Alias = *AS; ++AS)
+ CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ Update = true;
+ }
+ CalleeSaved = CSR;
+
+ // Different reserved registers?
+ BitVector RR = TRI->getReservedRegs(*MF);
+ if (RR != Reserved)
+ Update = true;
+ Reserved = RR;
+
+ // Invalidate cached information from previous function.
+ if (Update)
+ ++Tag;
+}
+
+/// compute - Compute the preferred allocation order for RC with reserved
+/// registers filtered out. Volatile registers come first followed by CSR
+/// aliases ordered according to the CSR order specified by the target.
+void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
+ RCInfo &RCI = RegClass[RC->getID()];
+
+ // Raw register count, including all reserved regs.
+ unsigned NumRegs = RC->getNumRegs();
+
+ if (!RCI.Order)
+ RCI.Order.reset(new unsigned[NumRegs]);
+
+ unsigned N = 0;
+ SmallVector<unsigned, 16> CSRAlias;
+
+ // FIXME: Once targets reserve registers instead of removing them from the
+ // allocation order, we can simply use begin/end here.
+ ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+ for (unsigned i = 0; i != RawOrder.size(); ++i) {
+ unsigned PhysReg = RawOrder[i];
+ // Remove reserved registers from the allocation order.
+ if (Reserved.test(PhysReg))
+ continue;
+ if (CSRNum[PhysReg])
+ // PhysReg aliases a CSR, save it for later.
+ CSRAlias.push_back(PhysReg);
+ else
+ RCI.Order[N++] = PhysReg;
+ }
+ RCI.NumRegs = N + CSRAlias.size();
+ assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+
+ // CSR aliases go after the volatile registers, preserve the target's order.
+ std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
+
+ // Check if RC is a proper sub-class.
+ if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
+ if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
+ RCI.ProperSubClass = true;
+
+ DEBUG({
+ dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
+ for (unsigned I = 0; I != RCI.NumRegs; ++I)
+ dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
+ dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
+ });
+
+ // RCI is now up-to-date.
+ RCI.Tag = Tag;
+}
+
diff --git a/src/LLVM/lib/CodeGen/RegisterClassInfo.h b/src/LLVM/lib/CodeGen/RegisterClassInfo.h
new file mode 100644
index 0000000..2c14070
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/RegisterClassInfo.h
@@ -0,0 +1,132 @@
+//===-- RegisterClassInfo.h - Dynamic Register Class Info -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee saved and reserved
+// registers depends on calling conventions and other dynamic information, so
+// some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTERCLASSINFO_H
+#define LLVM_CODEGEN_REGISTERCLASSINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+class RegisterClassInfo {
+ struct RCInfo {
+ unsigned Tag;
+ unsigned NumRegs;
+ bool ProperSubClass;
+ OwningArrayPtr<unsigned> Order;
+
+ RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {}
+ operator ArrayRef<unsigned>() const {
+ return makeArrayRef(Order.get(), NumRegs);
+ }
+ };
+
+ // Brief cached information for each register class.
+ OwningArrayPtr<RCInfo> RegClass;
+
+ // Tag changes whenever cached information needs to be recomputed. An RCInfo
+ // entry is valid when its tag matches.
+ unsigned Tag;
+
+ const MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+
+ // Callee saved registers of last MF. Assumed to be valid until the next
+ // runOnFunction() call.
+ const unsigned *CalleeSaved;
+
+ // Map register number to CalleeSaved index + 1;
+ SmallVector<uint8_t, 4> CSRNum;
+
+ // Reserved registers in the current MF.
+ BitVector Reserved;
+
+ // Compute all information about RC.
+ void compute(const TargetRegisterClass *RC) const;
+
+ // Return an up-to-date RCInfo for RC.
+ const RCInfo &get(const TargetRegisterClass *RC) const {
+ const RCInfo &RCI = RegClass[RC->getID()];
+ if (Tag != RCI.Tag)
+ compute(RC);
+ return RCI;
+ }
+
+public:
+ RegisterClassInfo();
+
+ /// runOnFunction - Prepare to answer questions about MF. This must be called
+ /// before any other methods are used.
+ void runOnMachineFunction(const MachineFunction &MF);
+
+ /// getNumAllocatableRegs - Returns the number of actually allocatable
+ /// registers in RC in the current function.
+ unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const {
+ return get(RC).NumRegs;
+ }
+
+ /// getOrder - Returns the preferred allocation order for RC. The order
+ /// contains no reserved registers, and registers that alias callee saved
+ /// registers come last.
+ ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const {
+ return get(RC);
+ }
+
+ /// isProperSubClass - Returns true if RC has a legal super-class with more
+ /// allocatable registers.
+ ///
+ /// Register classes like GR32_NOSP are not proper sub-classes because %esp
+ /// is not allocatable. Similarly, tGPR is not a proper sub-class in Thumb
+ /// mode because the GPR super-class is not legal.
+ bool isProperSubClass(const TargetRegisterClass *RC) const {
+ return get(RC).ProperSubClass;
+ }
+
+ /// getLastCalleeSavedAlias - Returns the last callee saved register that
+ /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR.
+ unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ if (unsigned N = CSRNum[PhysReg])
+ return CalleeSaved[N-1];
+ return 0;
+ }
+
+ /// isReserved - Returns true when PhysReg is a reserved register.
+ ///
+ /// Reserved registers may belong to an allocatable register class, but the
+ /// target has explicitly requested that they are not used.
+ ///
+ bool isReserved(unsigned PhysReg) const {
+ return Reserved.test(PhysReg);
+ }
+
+ /// isAllocatable - Returns true when PhysReg belongs to an allocatable
+ /// register class and it hasn't been reserved.
+ ///
+ /// Allocatable registers may show up in the allocation order of some virtual
+ /// register, so a register allocator needs to track its liveness and
+ /// availability.
+ bool isAllocatable(unsigned PhysReg) const {
+ return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg);
+ }
+};
+} // end namespace llvm
+
+#endif
+
diff --git a/src/LLVM/lib/CodeGen/RegisterCoalescer.cpp b/src/LLVM/lib/CodeGen/RegisterCoalescer.cpp
index 02b5539..9b414d6 100644
--- a/src/LLVM/lib/CodeGen/RegisterCoalescer.cpp
+++ b/src/LLVM/lib/CodeGen/RegisterCoalescer.cpp
@@ -13,37 +13,216 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/RegisterCoalescer.h"
+#define DEBUG_TYPE "regcoalescing"
+#include "RegisterCoalescer.h"
+#include "LiveDebugVariables.h"
+#include "RegisterClassInfo.h"
+#include "VirtRegMap.h"
+
+#include "llvm/Pass.h"
+#include "llvm/Value.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Pass.h"
-
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
using namespace llvm;
-// Register the RegisterCoalescer interface, providing a nice name to refer to.
-static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer");
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends , "Number of copies extended");
+STATISTIC(NumReMats , "Number of instructions re-materialized");
+STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts , "Number of times interval joining aborted");
+STATISTIC(NumInflated , "Number of register classes inflated");
+
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true));
+
+static cl::opt<bool>
+DisableCrossClassJoin("disable-cross-class-join",
+ cl::desc("Avoid coalescing cross register class copies"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+EnablePhysicalJoin("join-physregs",
+ cl::desc("Join physical register copies"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+ cl::desc("Verify machine instrs before and after register coalescing"),
+ cl::Hidden);
+
+namespace {
+ class RegisterCoalescer : public MachineFunctionPass {
+ MachineFunction* MF;
+ MachineRegisterInfo* MRI;
+ const TargetMachine* TM;
+ const TargetRegisterInfo* TRI;
+ const TargetInstrInfo* TII;
+ LiveIntervals *LIS;
+ LiveDebugVariables *LDV;
+ const MachineLoopInfo* Loops;
+ AliasAnalysis *AA;
+ RegisterClassInfo RegClassInfo;
+
+ /// JoinedCopies - Keep track of copies eliminated due to coalescing.
+ ///
+ SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+
+ /// ReMatCopies - Keep track of copies eliminated due to remat.
+ ///
+ SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+
+ /// ReMatDefs - Keep track of definition instructions which have
+ /// been remat'ed.
+ SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+
+ /// joinIntervals - join compatible live intervals
+ void joinIntervals();
+
+ /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into the "TryAgain" list.
+ void CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<MachineInstr*> &TryAgain);
+
+ /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// which are the src/dst of the copy instruction CopyMI. This returns
+ /// true if the copy was successfully coalesced away. If it is not
+ /// currently possible to coalesce this interval, but it may be possible if
+ /// other things get coalesced, then it returns true by reference in
+ /// 'Again'.
+ bool JoinCopy(MachineInstr *TheCopy, bool &Again);
+
+ /// JoinIntervals - Attempt to join these two intervals. On failure, this
+ /// returns false. The output "SrcInt" will not have been modified, so we
+ /// can use this information below to update aliases.
+ bool JoinIntervals(CoalescerPair &CP);
+
+ /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// the source value number is defined by a copy from the destination reg
+ /// see if we can merge these two destination reg valno# into a single
+ /// value number, eliminating a copy.
+ bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+
+ /// HasOtherReachingDefs - Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+
+ /// ReMaterializeTrivialDef - If the source of a copy is defined by a
+ /// trivial computation, replace the copy by rematerialize the definition.
+ /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+ bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+ unsigned DstReg, MachineInstr *CopyMI);
+
+ /// shouldJoinPhys - Return true if a physreg copy should be joined.
+ bool shouldJoinPhys(CoalescerPair &CP);
+
+ /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+ /// two virtual registers from different register classes.
+ bool isWinToJoinCrossClass(unsigned SrcReg,
+ unsigned DstReg,
+ const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *NewRC);
+
+ /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// update the subregister number if it is not zero. If DstReg is a
+ /// physical register and the existing subregister number of the def / use
+ /// being updated is not zero, make sure to set it to the correct physical
+ /// subregister.
+ void UpdateRegDefsUses(const CoalescerPair &CP);
+
+ /// RemoveDeadDef - If a def of a live interval is now determined dead,
+ /// remove the val# it defines. If the live interval becomes empty, remove
+ /// it as well.
+ bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+
+ /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
+ /// VNInfo copy flag for DstReg and all aliases.
+ void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
+
+ /// markAsJoined - Remember that CopyMI has already been joined.
+ void markAsJoined(MachineInstr *CopyMI);
+
+ /// eliminateUndefCopy - Handle copies of undef values.
+ bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ RegisterCoalescer() : MachineFunctionPass(ID) {
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ /// runOnMachineFunction - pass entry point
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ /// print - Implement the dump method.
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+ };
+} /// end anonymous namespace
+
+char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID;
+
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+
char RegisterCoalescer::ID = 0;
-// RegisterCoalescer destructor: DO NOT move this to the header file
-// for RegisterCoalescer or else clients of the RegisterCoalescer
-// class may not depend on the RegisterCoalescer.o file in the current
-// .a file, causing alias analysis support to not be included in the
-// tool correctly!
-//
-RegisterCoalescer::~RegisterCoalescer() {}
-
-unsigned CoalescerPair::compose(unsigned a, unsigned b) const {
+static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) {
if (!a) return b;
if (!b) return a;
- return tri_.composeSubRegIndices(a, b);
+ return tri.composeSubRegIndices(a, b);
}
-bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
- unsigned &Src, unsigned &Dst,
- unsigned &SrcSub, unsigned &DstSub) const {
+static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
+ unsigned &Src, unsigned &Dst,
+ unsigned &SrcSub, unsigned &DstSub) {
if (MI->isCopy()) {
Dst = MI->getOperand(0).getReg();
DstSub = MI->getOperand(0).getSubReg();
@@ -51,7 +230,8 @@
SrcSub = MI->getOperand(1).getSubReg();
} else if (MI->isSubregToReg()) {
Dst = MI->getOperand(0).getReg();
- DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm());
+ DstSub = compose(tri, MI->getOperand(0).getSubReg(),
+ MI->getOperand(3).getImm());
Src = MI->getOperand(2).getReg();
SrcSub = MI->getOperand(2).getSubReg();
} else
@@ -60,14 +240,14 @@
}
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
- srcReg_ = dstReg_ = subIdx_ = 0;
- newRC_ = 0;
- flipped_ = crossClass_ = false;
+ SrcReg = DstReg = SubIdx = 0;
+ NewRC = 0;
+ Flipped = CrossClass = false;
unsigned Src, Dst, SrcSub, DstSub;
- if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
return false;
- partial_ = SrcSub || DstSub;
+ Partial = SrcSub || DstSub;
// If one register is a physreg, it must be Dst.
if (TargetRegisterInfo::isPhysicalRegister(Src)) {
@@ -75,7 +255,7 @@
return false;
std::swap(Src, Dst);
std::swap(SrcSub, DstSub);
- flipped_ = true;
+ Flipped = true;
}
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -83,14 +263,14 @@
if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
// Eliminate DstSub on a physreg.
if (DstSub) {
- Dst = tri_.getSubReg(Dst, DstSub);
+ Dst = TRI.getSubReg(Dst, DstSub);
if (!Dst) return false;
DstSub = 0;
}
// Eliminate SrcSub by picking a corresponding Dst superregister.
if (SrcSub) {
- Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+ Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
if (!Dst) return false;
SrcSub = 0;
} else if (!MRI.getRegClass(Src)->contains(Dst)) {
@@ -108,7 +288,7 @@
return false;
const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
- if (!getCommonSubClass(DstRC, SrcRC))
+ if (!TRI.getCommonSubClass(DstRC, SrcRC))
return false;
SrcSub = DstSub = 0;
}
@@ -118,36 +298,36 @@
std::swap(Src, Dst);
DstSub = SrcSub;
SrcSub = 0;
- assert(!flipped_ && "Unexpected flip");
- flipped_ = true;
+ assert(!Flipped && "Unexpected flip");
+ Flipped = true;
}
// Find the new register class.
const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
if (DstSub)
- newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
else
- newRC_ = getCommonSubClass(DstRC, SrcRC);
- if (!newRC_)
+ NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
+ if (!NewRC)
return false;
- crossClass_ = newRC_ != DstRC || newRC_ != SrcRC;
+ CrossClass = NewRC != DstRC || NewRC != SrcRC;
}
// Check our invariants
assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
"Cannot have a physical SubIdx");
- srcReg_ = Src;
- dstReg_ = Dst;
- subIdx_ = DstSub;
+ SrcReg = Src;
+ DstReg = Dst;
+ SubIdx = DstSub;
return true;
}
bool CoalescerPair::flip() {
- if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_))
+ if (SubIdx || TargetRegisterInfo::isPhysicalRegister(DstReg))
return false;
- std::swap(srcReg_, dstReg_);
- flipped_ = !flipped_;
+ std::swap(SrcReg, DstReg);
+ Flipped = !Flipped;
return true;
}
@@ -155,42 +335,1655 @@
if (!MI)
return false;
unsigned Src, Dst, SrcSub, DstSub;
- if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
return false;
- // Find the virtual register that is srcReg_.
- if (Dst == srcReg_) {
+ // Find the virtual register that is SrcReg.
+ if (Dst == SrcReg) {
std::swap(Src, Dst);
std::swap(SrcSub, DstSub);
- } else if (Src != srcReg_) {
+ } else if (Src != SrcReg) {
return false;
}
- // Now check that Dst matches dstReg_.
- if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) {
+ // Now check that Dst matches DstReg.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
if (!TargetRegisterInfo::isPhysicalRegister(Dst))
return false;
- assert(!subIdx_ && "Inconsistent CoalescerPair state.");
+ assert(!SubIdx && "Inconsistent CoalescerPair state.");
// DstSub could be set for a physreg from INSERT_SUBREG.
if (DstSub)
- Dst = tri_.getSubReg(Dst, DstSub);
+ Dst = TRI.getSubReg(Dst, DstSub);
// Full copy of Src.
if (!SrcSub)
- return dstReg_ == Dst;
+ return DstReg == Dst;
// This is a partial register copy. Check that the parts match.
- return tri_.getSubReg(dstReg_, SrcSub) == Dst;
+ return TRI.getSubReg(DstReg, SrcSub) == Dst;
} else {
- // dstReg_ is virtual.
- if (dstReg_ != Dst)
+ // DstReg is virtual.
+ if (DstReg != Dst)
return false;
// Registers match, do the subregisters line up?
- return compose(subIdx_, SrcSub) == DstSub;
+ return compose(TRI, SubIdx, SrcSub) == DstSub;
}
}
-// Because of the way .a files work, we must force the SimpleRC
-// implementation to be pulled in if the RegisterCoalescer classes are
-// pulled in. Otherwise we run the risk of RegisterCoalescer being
-// used, but the default implementation not being linked into the tool
-// that uses it.
-DEFINING_FILE_FOR(RegisterCoalescer)
+void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreservedID(StrongPHIEliminationID);
+ AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) {
+ /// Joined copies are not deleted immediately, but kept in JoinedCopies.
+ JoinedCopies.insert(CopyMI);
+
+ /// Mark all register operands of CopyMI as <undef> so they won't affect dead
+ /// code elimination.
+ for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
+ E = CopyMI->operands_end(); I != E; ++I)
+ if (I->isReg())
+ I->setIsUndef(true);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy. For example:
+///
+/// A3 = B0
+/// ...
+/// B1 = A3 <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ // Bail if there is no dst interval - can happen when merging physical subreg
+ // operations.
+ if (!LIS->hasInterval(CP.getDstReg()))
+ return false;
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ if (BLR == IntB.end()) return false;
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (!BValNo->isDefByCopy()) return false;
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+ LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
+ // The live range might not exist after fun with physreg coalescing.
+ if (ALR == IntA.end()) return false;
+ VNInfo *AValNo = ALR->valno;
+ // If it's re-defined by an early clobber somewhere in the live range, then
+ // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
+ // See PR3149:
+ // 172 %ECX<def> = MOV32rr %reg1039<kill>
+ // 180 INLINEASM <es:subl $5,$1
+ // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
+ // %EAX<kill>,
+ // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
+ // 188 %EAX<def> = MOV32rr %EAX<kill>
+ // 196 %ECX<def> = MOV32rr %ECX<kill>
+ // 204 %ECX<def> = MOV32rr %ECX<kill>
+ // 212 %EAX<def> = MOV32rr %EAX<kill>
+ // 220 %EAX<def> = MOV32rr %EAX
+ // 228 %reg1039<def> = MOV32rr %ECX<kill>
+ // The early clobber operand ties ECX input to the ECX def.
+ //
+ // The live interval of ECX is represented as this:
+ // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47)
+ // The coalescer has no idea there was a def in the middle of [174,230].
+ if (AValNo->hasRedefByEC())
+ return false;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+ // Get the instruction that defines this value number.
+ if (!CP.isCoalescable(AValNo->getCopy()))
+ return false;
+
+ // Get the LiveRange in IntB that this value number starts with.
+ LiveInterval::iterator ValLR =
+ IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
+ if (ValLR == IntB.end())
+ return false;
+
+ // Make sure that the end of the live range is inside the same block as
+ // CopyMI.
+ MachineInstr *ValLREndInst =
+ LIS->getInstructionFromIndex(ValLR->end.getPrevSlot());
+ if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+ return false;
+
+ // Okay, we now know that ValLR ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live ranges between them in
+ // IntB, we can merge them.
+ if (ValLR+1 != BLR) return false;
+
+ // If a live interval is a physical register, conservatively check if any
+ // of its aliases is overlapping the live interval of the virtual register.
+ // If so, do not coalesce.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+ if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) {
+ DEBUG({
+ dbgs() << "\t\tInterfere with alias ";
+ LIS->getInterval(*AS).print(dbgs(), TRI);
+ });
+ return false;
+ }
+ }
+
+ DEBUG({
+ dbgs() << "Extending: ";
+ IntB.print(dbgs(), TRI);
+ });
+
+ SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'. Update the valnum with the new defining
+ // instruction #.
+ BValNo->def = FillerStart;
+ BValNo->setCopy(0);
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValLR.end, BLR.begin) of either value number, then we merge the
+ // two value numbers.
+ IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+ // If the IntB live range is assigned to a physical register, and if that
+ // physreg has sub-registers, update their live intervals as well.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
+ if (!LIS->hasInterval(*SR))
+ continue;
+ LiveInterval &SRLI = LIS->getInterval(*SR);
+ SRLI.addRange(LiveRange(FillerStart, FillerEnd,
+ SRLI.getNextValue(FillerStart, 0,
+ LIS->getVNInfoAllocator())));
+ }
+ }
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValLR->valno) {
+ // If B1 is killed by a PHI, then the merged live range must also be killed
+ // by the same PHI, as B0 and B1 can not overlap.
+ bool HasPHIKill = BValNo->hasPHIKill();
+ IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+ if (HasPHIKill)
+ ValLR->valno->setHasPHIKill(true);
+ }
+ DEBUG({
+ dbgs() << " result = ";
+ IntB.print(dbgs(), TRI);
+ dbgs() << "\n";
+ });
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ if (UIdx != -1) {
+ ValLREndInst->getOperand(UIdx).setIsKill(false);
+ }
+
+ // If the copy instruction was killing the destination register before the
+ // merge, find the last use and trim the live range. That will also add the
+ // isKill marker.
+ if (ALR->end == CopyIdx)
+ LIS->shrinkToUses(&IntA);
+
+ ++numExtends;
+ return true;
+}
+
+/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ LiveInterval::Ranges::iterator BI =
+ std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+ if (BI != IntB.ranges.begin())
+ --BI;
+ for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+ if (BI->valno == BValNo)
+ continue;
+ if (BI->start <= AI->start && BI->end > AI->start)
+ return true;
+ if (BI->start > AI->start && BI->start < AI->end)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB. If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
+///
+/// A3 = op A2 B0<kill>
+/// ...
+/// B1 = A3 <- this copy
+/// ...
+/// = op A3 <- more uses
+///
+/// ==>
+///
+/// B2 = op B0 A2<kill>
+/// ...
+/// B1 = B2 <- now an identify copy
+/// ...
+/// = op B2 <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ // FIXME: For now, only eliminate the copy by commuting its def when the
+ // source register is a virtual register. We want to guard against cases
+ // where the copy is a back edge copy and commuting the def lengthen the
+ // live interval of the source register to the entire loop.
+ if (CP.isPhys() && CP.isFlipped())
+ return false;
+
+ // Bail if there is no dst interval.
+ if (!LIS->hasInterval(CP.getDstReg()))
+ return false;
+
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+ if (!BValNo || !BValNo->isDefByCopy())
+ return false;
+
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+ assert(AValNo && "COPY source not live");
+
+ // If other defs can reach uses of this def, then it's not safe to perform
+ // the optimization.
+ if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+ return false;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
+ if (!DefMI)
+ return false;
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ if (!MCID.isCommutable())
+ return false;
+ // If DefMI is a two-address instruction then commuting it will change the
+ // destination register.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+ assert(DefIdx != -1);
+ unsigned UseOpIdx;
+ if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+ return false;
+ unsigned Op1, Op2, NewDstIdx;
+ if (!TII->findCommutedOpIndices(DefMI, Op1, Op2))
+ return false;
+ if (Op1 == UseOpIdx)
+ NewDstIdx = Op2;
+ else if (Op2 == UseOpIdx)
+ NewDstIdx = Op1;
+ else
+ return false;
+
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (NewReg != IntB.reg || !NewDstMO.isKill())
+ return false;
+
+ // Make sure there are no other definitions of IntB that would reach the
+ // uses which the new definition can reach.
+ if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ return false;
+
+ // Abort if the aliases of IntB.reg have values that are not simply the
+ // clobbers from the superreg.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+ for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+ if (LIS->hasInterval(*AS) &&
+ HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0))
+ return false;
+
+ // If some of the uses of IntA.reg is already coalesced away, return false.
+ // It's not possible to determine whether it's safe to perform the coalescing.
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ MRI->use_nodbg_begin(IntA.reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end())
+ continue;
+ if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+ << *DefMI);
+
+ // At this point we have decided that it is legal to do this
+ // transformation. Start by commuting the instruction.
+ MachineBasicBlock *MBB = DefMI->getParent();
+ MachineInstr *NewMI = TII->commuteInstruction(DefMI);
+ if (!NewMI)
+ return false;
+ if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
+ TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+ !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
+ return false;
+ if (NewMI != DefMI) {
+ LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
+ MBB->insert(DefMI, NewMI);
+ MBB->erase(DefMI);
+ }
+ unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+ NewMI->getOperand(OpIdx).setIsKill();
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = A<kill>
+ // ...
+ // = B
+
+ // Update uses of IntA of the specific Val# with IntB.
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
+ UE = MRI->use_end(); UI != UE;) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (JoinedCopies.count(UseMI))
+ continue;
+ if (UseMI->isDebugValue()) {
+ // FIXME These don't have an instruction index. Not clear we have enough
+ // info to decide whether to do this replacement or not. For now do it.
+ UseMO.setReg(NewReg);
+ continue;
+ }
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex();
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end() || ULR->valno != AValNo)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ UseMO.substPhysReg(NewReg, *TRI);
+ else
+ UseMO.setReg(NewReg);
+ if (UseMI == CopyMI)
+ continue;
+ if (!UseMI->isCopy())
+ continue;
+ if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+
+ // This copy will become a noop. If it's defining a new val#, merge it into
+ // BValNo.
+ SlotIndex DefIdx = UseIdx.getDefIndex();
+ VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+ if (!DVNI)
+ continue;
+ DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+ assert(DVNI->def == DefIdx);
+ BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+ markAsJoined(UseMI);
+ }
+
+ // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+ // is updated.
+ VNInfo *ValNo = BValNo;
+ ValNo->def = AValNo->def;
+ ValNo->setCopy(0);
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
+ }
+ DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+ IntA.removeValNo(AValNo);
+ DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
+ ++numCommutes;
+ return true;
+}
+
+/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+ bool preserveSrcInt,
+ unsigned DstReg,
+ MachineInstr *CopyMI) {
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex();
+ LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+ assert(SrcLR != SrcInt.end() && "Live range not found!");
+ VNInfo *ValNo = SrcLR->valno;
+ if (ValNo->isPHIDef() || ValNo->isUnused())
+ return false;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
+ if (!DefMI)
+ return false;
+ assert(DefMI && "Defining instruction disappeared");
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ if (!MCID.isAsCheapAsAMove())
+ return false;
+ if (!TII->isTriviallyReMaterializable(DefMI, AA))
+ return false;
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(TII, AA, SawStore))
+ return false;
+ if (MCID.getNumDefs() != 1)
+ return false;
+ if (!DefMI->isImplicitDef()) {
+ // Make sure the copy destination register class fits the instruction
+ // definition register class. The mismatch can happen as a result of earlier
+ // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ if (MRI->getRegClass(DstReg) != RC)
+ return false;
+ } else if (!RC->contains(DstReg))
+ return false;
+ }
+
+ RemoveCopyFlag(DstReg, CopyMI);
+
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ MachineBasicBlock::iterator MII =
+ llvm::next(MachineBasicBlock::iterator(CopyMI));
+ TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
+ MachineInstr *NewMI = prior(MII);
+
+ // CopyMI may have implicit operands, transfer them over to the newly
+ // rematerialized instruction. And update implicit def interval valnos.
+ for (unsigned i = CopyMI->getDesc().getNumOperands(),
+ e = CopyMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = CopyMI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ NewMI->addOperand(MO);
+ if (MO.isDef())
+ RemoveCopyFlag(MO.getReg(), CopyMI);
+ }
+
+ NewMI->copyImplicitOps(CopyMI);
+ LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+ CopyMI->eraseFromParent();
+ ReMatCopies.insert(CopyMI);
+ ReMatDefs.insert(DefMI);
+ DEBUG(dbgs() << "Remat: " << *NewMI);
+ ++NumReMats;
+
+ // The source interval can become smaller because we removed a use.
+ if (preserveSrcInt)
+ LIS->shrinkToUses(&SrcInt);
+
+ return true;
+}
+
+/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef>
+/// values, it only removes local variables. When we have a copy like:
+///
+/// %vreg1 = COPY %vreg2<undef>
+///
+/// We delete the copy and remove the corresponding value number from %vreg1.
+/// Any uses of that value number are marked as <undef>.
+bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
+ const CoalescerPair &CP) {
+ SlotIndex Idx = LIS->getInstructionIndex(CopyMI);
+ LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg());
+ if (SrcInt->liveAt(Idx))
+ return false;
+ LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg());
+ if (DstInt->liveAt(Idx))
+ return false;
+
+ // No intervals are live-in to CopyMI - it is undef.
+ if (CP.isFlipped())
+ DstInt = SrcInt;
+ SrcInt = 0;
+
+ VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex());
+ assert(DeadVNI && "No value defined in DstInt");
+ DstInt->removeValNo(DeadVNI);
+
+ // Find new undef uses.
+ for (MachineRegisterInfo::reg_nodbg_iterator
+ I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end();
+ I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ if (MO.isDef() || MO.isUndef())
+ continue;
+ MachineInstr *MI = MO.getParent();
+ SlotIndex Idx = LIS->getInstructionIndex(MI);
+ if (DstInt->liveAt(Idx))
+ continue;
+ MO.setIsUndef(true);
+ DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI);
+ }
+ return true;
+}
+
+/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void
+RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
+ bool DstIsPhys = CP.isPhys();
+ unsigned SrcReg = CP.getSrcReg();
+ unsigned DstReg = CP.getDstReg();
+ unsigned SubIdx = CP.getSubIdx();
+
+ // Update LiveDebugVariables.
+ LDV->renameRegister(SrcReg, DstReg, SubIdx);
+
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ // A PhysReg copy that won't be coalesced can perhaps be rematerialized
+ // instead.
+ if (DstIsPhys) {
+ if (UseMI->isFullCopy() &&
+ UseMI->getOperand(1).getReg() == SrcReg &&
+ UseMI->getOperand(0).getReg() != SrcReg &&
+ UseMI->getOperand(0).getReg() != DstReg &&
+ !JoinedCopies.count(UseMI) &&
+ ReMaterializeTrivialDef(LIS->getInterval(SrcReg), false,
+ UseMI->getOperand(0).getReg(), UseMI))
+ continue;
+ }
+
+ SmallVector<unsigned,8> Ops;
+ bool Reads, Writes;
+ tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+ bool Kills = false, Deads = false;
+
+ // Replace SrcReg with DstReg in all UseMI operands.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(Ops[i]);
+ Kills |= MO.isKill();
+ Deads |= MO.isDead();
+
+ // Make sure we don't create read-modify-write defs accidentally. We
+ // assume here that a SrcReg def cannot be joined into a live DstReg. If
+ // RegisterCoalescer starts tracking partially live registers, we will
+ // need to check the actual LiveInterval to determine if DstReg is live
+ // here.
+ if (SubIdx && !Reads)
+ MO.setIsUndef();
+
+ if (DstIsPhys)
+ MO.substPhysReg(DstReg, *TRI);
+ else
+ MO.substVirtReg(DstReg, SubIdx, *TRI);
+ }
+
+ // This instruction is a copy that will be removed.
+ if (JoinedCopies.count(UseMI))
+ continue;
+
+ if (SubIdx) {
+ // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
+ // read-modify-write of DstReg.
+ if (Deads)
+ UseMI->addRegisterDead(DstReg, TRI);
+ else if (!Reads && Writes)
+ UseMI->addRegisterDefined(DstReg, TRI);
+
+ // Kill flags apply to the whole physical register.
+ if (DstIsPhys && Kills)
+ UseMI->addRegisterKilled(DstReg, TRI);
+ }
+
+ DEBUG({
+ dbgs() << "\t\tupdated: ";
+ if (!UseMI->isDebugValue())
+ dbgs() << LIS->getInstructionIndex(UseMI) << "\t";
+ dbgs() << *UseMI;
+ });
+ }
+}
+
+/// removeIntervalIfEmpty - Check if the live interval of a physical register
+/// is empty, if so remove it and also remove the empty intervals of its
+/// sub-registers. Return true if live interval is removed.
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
+ const TargetRegisterInfo *TRI) {
+ if (li.empty()) {
+ if (TargetRegisterInfo::isPhysicalRegister(li.reg))
+ for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
+ if (!LIS->hasInterval(*SR))
+ continue;
+ LiveInterval &sli = LIS->getInterval(*SR);
+ if (sli.empty())
+ LIS->removeInterval(*SR);
+ }
+ LIS->removeInterval(li.reg);
+ return true;
+ }
+ return false;
+}
+
+/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
+/// the val# it defines. If the live interval becomes empty, remove it as well.
+bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
+ MachineInstr *DefMI) {
+ SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex();
+ LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
+ if (DefIdx != MLR->valno->def)
+ return false;
+ li.removeValNo(MLR->valno);
+ return removeIntervalIfEmpty(li, LIS, TRI);
+}
+
+void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
+ const MachineInstr *CopyMI) {
+ SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+ if (LIS->hasInterval(DstReg)) {
+ LiveInterval &LI = LIS->getInterval(DstReg);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+ if (LR->valno->def == DefIdx)
+ LR->valno->setCopy(0);
+ }
+ if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return;
+ for (const unsigned* AS = TRI->getAliasSet(DstReg); *AS; ++AS) {
+ if (!LIS->hasInterval(*AS))
+ continue;
+ LiveInterval &LI = LIS->getInterval(*AS);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+ if (LR->valno->def == DefIdx)
+ LR->valno->setCopy(0);
+ }
+}
+
+/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
+/// We need to be careful about coalescing a source physical register with a
+/// virtual register. Once the coalescing is done, it cannot be broken and these
+/// are not spillable! If the destination interval uses are far away, think
+/// twice about coalescing them!
+bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
+ bool Allocatable = LIS->isAllocatable(CP.getDstReg());
+ LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
+
+ /// Always join simple intervals that are defined by a single copy from a
+ /// reserved register. This doesn't increase register pressure, so it is
+ /// always beneficial.
+ if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
+ return true;
+
+ if (!EnablePhysicalJoin) {
+ DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
+ return false;
+ }
+
+ // Only coalesce to allocatable physreg, we don't want to risk modifying
+ // reserved registers.
+ if (!Allocatable) {
+ DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
+ return false; // Not coalescable.
+ }
+
+ // Don't join with physregs that have a ridiculous number of live
+ // ranges. The data structure performance is really bad when that
+ // happens.
+ if (LIS->hasInterval(CP.getDstReg()) &&
+ LIS->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+ ++numAborts;
+ DEBUG(dbgs()
+ << "\tPhysical register live interval too complicated, abort!\n");
+ return false;
+ }
+
+ // FIXME: Why are we skipping this test for partial copies?
+ // CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
+ if (!CP.isPartial()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(CP.getSrcReg());
+ unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
+ unsigned Length = LIS->getApproximateInstructionCount(JoinVInt);
+ if (Length > Threshold) {
+ ++numAborts;
+ DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+/// two virtual registers from different register classes.
+bool
+RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg,
+ unsigned DstReg,
+ const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *NewRC) {
+ unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
+ // This heuristics is good enough in practice, but it's obviously not *right*.
+ // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+ // out all but the most restrictive register classes.
+ if (NewRCCount > 4 ||
+ // Early exit if the function is fairly small, coalesce aggressively if
+ // that's the case. For really special register classes with 3 or
+ // fewer registers, be a bit more careful.
+ (LIS->getFuncInstructionCount() / NewRCCount) < 8)
+ return true;
+ LiveInterval &SrcInt = LIS->getInterval(SrcReg);
+ LiveInterval &DstInt = LIS->getInterval(DstReg);
+ unsigned SrcSize = LIS->getApproximateInstructionCount(SrcInt);
+ unsigned DstSize = LIS->getApproximateInstructionCount(DstInt);
+
+ // Coalesce aggressively if the intervals are small compared to the number of
+ // registers in the new class. The number 4 is fairly arbitrary, chosen to be
+ // less aggressive than the 8 used for the whole function size.
+ const unsigned ThresSize = 4 * NewRCCount;
+ if (SrcSize <= ThresSize && DstSize <= ThresSize)
+ return true;
+
+ // Estimate *register use density*. If it doubles or more, abort.
+ unsigned SrcUses = std::distance(MRI->use_nodbg_begin(SrcReg),
+ MRI->use_nodbg_end());
+ unsigned DstUses = std::distance(MRI->use_nodbg_begin(DstReg),
+ MRI->use_nodbg_end());
+ unsigned NewUses = SrcUses + DstUses;
+ unsigned NewSize = SrcSize + DstSize;
+ if (SrcRC != NewRC && SrcSize > ThresSize) {
+ unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
+ if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
+ return false;
+ }
+ if (DstRC != NewRC && DstSize > ThresSize) {
+ unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
+ if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
+ return false;
+ }
+ return true;
+}
+
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI. This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
+
+ Again = false;
+ if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
+ return false; // Already done.
+
+ DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+
+ CoalescerPair CP(*TII, *TRI);
+ if (!CP.setRegisters(CopyMI)) {
+ DEBUG(dbgs() << "\tNot coalescable.\n");
+ return false;
+ }
+
+ // If they are already joined we continue.
+ if (CP.getSrcReg() == CP.getDstReg()) {
+ markAsJoined(CopyMI);
+ DEBUG(dbgs() << "\tCopy already coalesced.\n");
+ return false; // Not coalescable.
+ }
+
+ // Eliminate undefs.
+ if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) {
+ markAsJoined(CopyMI);
+ DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n");
+ return false; // Not coalescable.
+ }
+
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
+ << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSubIdx())
+ << "\n");
+
+ // Enforce policies.
+ if (CP.isPhys()) {
+ if (!shouldJoinPhys(CP)) {
+ // Before giving up coalescing, if definition of source is defined by
+ // trivial computation, try rematerializing it.
+ if (!CP.isFlipped() &&
+ ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true,
+ CP.getDstReg(), CopyMI))
+ return true;
+ return false;
+ }
+ } else {
+ // Avoid constraining virtual register regclass too much.
+ if (CP.isCrossClass()) {
+ DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
+ if (DisableCrossClassJoin) {
+ DEBUG(dbgs() << "\tCross-class joins disabled.\n");
+ return false;
+ }
+ if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
+ MRI->getRegClass(CP.getSrcReg()),
+ MRI->getRegClass(CP.getDstReg()),
+ CP.getNewRC())) {
+ DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+
+ // When possible, let DstReg be the larger interval.
+ if (!CP.getSubIdx() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
+ LIS->getInterval(CP.getDstReg()).ranges.size())
+ CP.flip();
+ }
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ if (!JoinIntervals(CP)) {
+ // Coalescing failed.
+
+ // If definition of source is defined by trivial computation, try
+ // rematerializing it.
+ if (!CP.isFlipped() &&
+ ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true,
+ CP.getDstReg(), CopyMI))
+ return true;
+
+ // If we can eliminate the copy without merging the live ranges, do so now.
+ if (!CP.isPartial()) {
+ if (AdjustCopiesBackFrom(CP, CopyMI) ||
+ RemoveCopyByCommutingDef(CP, CopyMI)) {
+ markAsJoined(CopyMI);
+ DEBUG(dbgs() << "\tTrivial!\n");
+ return true;
+ }
+ }
+
+ // Otherwise, we are unable to join the intervals.
+ DEBUG(dbgs() << "\tInterference!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+
+ // Coalescing to a virtual register that is of a sub-register class of the
+ // other. Make sure the resulting register is set to the right register class.
+ if (CP.isCrossClass()) {
+ ++numCrossRCs;
+ MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
+ }
+
+ // Remember to delete the copy instruction.
+ markAsJoined(CopyMI);
+
+ UpdateRegDefsUses(CP);
+
+ // If we have extended the live range of a physical register, make sure we
+ // update live-in lists as well.
+ if (CP.isPhys()) {
+ SmallVector<MachineBasicBlock*, 16> BlockSeq;
+ // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
+ // ranges for this, and they are preserved.
+ LiveInterval &SrcInt = LIS->getInterval(CP.getSrcReg());
+ for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
+ I != E; ++I ) {
+ LIS->findLiveInMBBs(I->start, I->end, BlockSeq);
+ for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
+ MachineBasicBlock &block = *BlockSeq[idx];
+ if (!block.isLiveIn(CP.getDstReg()))
+ block.addLiveIn(CP.getDstReg());
+ }
+ BlockSeq.clear();
+ }
+ }
+
+ // SrcReg is guarateed to be the register whose live interval that is
+ // being merged.
+ LIS->removeInterval(CP.getSrcReg());
+
+ // Update regalloc hint.
+ TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
+
+ DEBUG({
+ LiveInterval &DstInt = LIS->getInterval(CP.getDstReg());
+ dbgs() << "\tJoined. Result = ";
+ DstInt.print(dbgs(), TRI);
+ dbgs() << "\n";
+ });
+
+ ++numJoins;
+ return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be. This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve. InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(VNInfo *VNI,
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
+ DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
+ SmallVector<int, 16> &ThisValNoAssignments,
+ SmallVector<int, 16> &OtherValNoAssignments) {
+ unsigned VN = VNI->id;
+
+ // If the VN has already been computed, just return it.
+ if (ThisValNoAssignments[VN] >= 0)
+ return ThisValNoAssignments[VN];
+ assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
+
+ // If this val is not a copy from the other val, then it must be a new value
+ // number in the destination.
+ DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
+ if (I == ThisFromOther.end()) {
+ NewVNInfo.push_back(VNI);
+ return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
+ }
+ VNInfo *OtherValNo = I->second;
+
+ // Otherwise, this *is* a copy from the RHS. If the other side has already
+ // been computed, return it.
+ if (OtherValNoAssignments[OtherValNo->id] >= 0)
+ return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
+
+ // Mark this value number as currently being computed, then ask what the
+ // ultimate value # of the other value is.
+ ThisValNoAssignments[VN] = -2;
+ unsigned UltimateVN =
+ ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
+ OtherValNoAssignments, ThisValNoAssignments);
+ return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+
+// Find out if we have something like
+// A = X
+// B = X
+// if so, we can pretend this is actually
+// A = X
+// B = A
+// which allows us to coalesce A and B.
+// VNI is the definition of B. LR is the life range of A that includes
+// the slot just before B. If we return true, we add "B = X" to DupCopies.
+// This implies that A dominates B.
+static bool RegistersDefinedFromSameValue(LiveIntervals &li,
+ const TargetRegisterInfo &tri,
+ CoalescerPair &CP,
+ VNInfo *VNI,
+ LiveRange *LR,
+ SmallVector<MachineInstr*, 8> &DupCopies) {
+ // FIXME: This is very conservative. For example, we don't handle
+ // physical registers.
+
+ MachineInstr *MI = VNI->getCopy();
+
+ if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+ return false;
+
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+
+ if (!TargetRegisterInfo::isVirtualRegister(Src) ||
+ !TargetRegisterInfo::isVirtualRegister(Dst))
+ return false;
+
+ unsigned A = CP.getDstReg();
+ unsigned B = CP.getSrcReg();
+
+ if (B == Dst)
+ std::swap(A, B);
+ assert(Dst == A);
+
+ VNInfo *Other = LR->valno;
+ if (!Other->isDefByCopy())
+ return false;
+ const MachineInstr *OtherMI = Other->getCopy();
+
+ if (!OtherMI->isFullCopy())
+ return false;
+
+ unsigned OtherDst = OtherMI->getOperand(0).getReg();
+ unsigned OtherSrc = OtherMI->getOperand(1).getReg();
+
+ if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) ||
+ !TargetRegisterInfo::isVirtualRegister(OtherDst))
+ return false;
+
+ assert(OtherDst == B);
+
+ if (Src != OtherSrc)
+ return false;
+
+ // If the copies use two different value numbers of X, we cannot merge
+ // A and B.
+ LiveInterval &SrcInt = li.getInterval(Src);
+ // getVNInfoBefore returns NULL for undef copies. In this case, the
+ // optimization is still safe.
+ if (SrcInt.getVNInfoBefore(Other->def) != SrcInt.getVNInfoBefore(VNI->def))
+ return false;
+
+ DupCopies.push_back(MI);
+
+ return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false.
+bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), TRI); dbgs() << "\n"; });
+
+ // If a live interval is a physical register, check for interference with any
+ // aliases. The interference check implemented here is a bit more conservative
+ // than the full interfeence check below. We allow overlapping live ranges
+ // only when one is a copy of the other.
+ if (CP.isPhys()) {
+ for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
+ if (!LIS->hasInterval(*AS))
+ continue;
+ const LiveInterval &LHS = LIS->getInterval(*AS);
+ LiveInterval::const_iterator LI = LHS.begin();
+ for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
+ RI != RE; ++RI) {
+ LI = std::lower_bound(LI, LHS.end(), RI->start);
+ // Does LHS have an overlapping live range starting before RI?
+ if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
+ (RI->start != RI->valno->def ||
+ !CP.isCoalescable(LIS->getInstructionFromIndex(RI->start)))) {
+ DEBUG({
+ dbgs() << "\t\tInterference from alias: ";
+ LHS.print(dbgs(), TRI);
+ dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
+ });
+ return false;
+ }
+
+ // Check that LHS ranges beginning in this range are copies.
+ for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
+ if (LI->start != LI->valno->def ||
+ !CP.isCoalescable(LIS->getInstructionFromIndex(LI->start))) {
+ DEBUG({
+ dbgs() << "\t\tInterference from alias: ";
+ LHS.print(dbgs(), TRI);
+ dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
+ });
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ // Compute the final value assignment, assuming that the live ranges can be
+ // coalesced.
+ SmallVector<int, 16> LHSValNoAssignments;
+ SmallVector<int, 16> RHSValNoAssignments;
+ DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
+ DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
+ SmallVector<VNInfo*, 16> NewVNInfo;
+
+ SmallVector<MachineInstr*, 8> DupCopies;
+
+ LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg());
+ DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), TRI); dbgs() << "\n"; });
+
+ // Loop over the value numbers of the LHS, seeing if any are defined from
+ // the RHS.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
+ continue;
+
+ // Never join with a register that has EarlyClobber redefs.
+ if (VNI->hasRedefByEC())
+ return false;
+
+ // Figure out the value # from the RHS.
+ LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ // The copy could be to an aliased physreg.
+ if (!lr) continue;
+
+ // DstReg is known to be a register in the LHS interval. If the src is
+ // from the RHS interval, we can use its value #.
+ MachineInstr *MI = VNI->getCopy();
+ if (!CP.isCoalescable(MI) &&
+ !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
+ continue;
+
+ LHSValsDefinedFromRHS[VNI] = lr->valno;
+ }
+
+ // Loop over the value numbers of the RHS, seeing if any are defined from
+ // the LHS.
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
+ continue;
+
+ // Never join with a register that has EarlyClobber redefs.
+ if (VNI->hasRedefByEC())
+ return false;
+
+ // Figure out the value # from the LHS.
+ LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ // The copy could be to an aliased physreg.
+ if (!lr) continue;
+
+ // DstReg is known to be a register in the RHS interval. If the src is
+ // from the LHS interval, we can use its value #.
+ MachineInstr *MI = VNI->getCopy();
+ if (!CP.isCoalescable(MI) &&
+ !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
+ continue;
+
+ RHSValsDefinedFromLHS[VNI] = lr->valno;
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ ComputeUltimateVN(VNI, NewVNInfo,
+ LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+ LHSValNoAssignments, RHSValNoAssignments);
+ }
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ // If this value number isn't a copy from the LHS, it's a new number.
+ if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+ NewVNInfo.push_back(VNI);
+ RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+ continue;
+ }
+
+ ComputeUltimateVN(VNI, NewVNInfo,
+ RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+ RHSValNoAssignments, LHSValNoAssignments);
+ }
+
+ // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+ // interval lists to see if these intervals are coalescable.
+ LiveInterval::const_iterator I = LHS.begin();
+ LiveInterval::const_iterator IE = LHS.end();
+ LiveInterval::const_iterator J = RHS.begin();
+ LiveInterval::const_iterator JE = RHS.end();
+
+ // Skip ahead until the first place of potential sharing.
+ if (I != IE && J != JE) {
+ if (I->start < J->start) {
+ I = std::upper_bound(I, IE, J->start);
+ if (I != LHS.begin()) --I;
+ } else if (J->start < I->start) {
+ J = std::upper_bound(J, JE, I->start);
+ if (J != RHS.begin()) --J;
+ }
+ }
+
+ while (I != IE && J != JE) {
+ // Determine if these two live ranges overlap.
+ bool Overlaps;
+ if (I->start < J->start) {
+ Overlaps = I->end > J->start;
+ } else {
+ Overlaps = J->end > I->start;
+ }
+
+ // If so, check value # info to determine if they are really different.
+ if (Overlaps) {
+ // If the live range overlap will map to the same value number in the
+ // result liverange, we can still coalesce them. If not, we can't.
+ if (LHSValNoAssignments[I->valno->id] !=
+ RHSValNoAssignments[J->valno->id])
+ return false;
+ // If it's re-defined by an early clobber somewhere in the live range,
+ // then conservatively abort coalescing.
+ if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
+ return false;
+ }
+
+ if (I->end < J->end)
+ ++I;
+ else
+ ++J;
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
+ E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned LHSValID = LHSValNoAssignments[VNI->id];
+ if (VNI->hasPHIKill())
+ NewVNInfo[LHSValID]->setHasPHIKill(true);
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
+ E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned RHSValID = RHSValNoAssignments[VNI->id];
+ if (VNI->hasPHIKill())
+ NewVNInfo[RHSValID]->setHasPHIKill(true);
+ }
+
+ if (LHSValNoAssignments.empty())
+ LHSValNoAssignments.push_back(-1);
+ if (RHSValNoAssignments.empty())
+ RHSValNoAssignments.push_back(-1);
+
+ SmallVector<unsigned, 8> SourceRegisters;
+ for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
+ E = DupCopies.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+
+ // We have pretended that the assignment to B in
+ // A = X
+ // B = X
+ // was actually a copy from A. Now that we decided to coalesce A and B,
+ // transform the code into
+ // A = X
+ // X = X
+ // and mark the X as coalesced to keep the illusion.
+ unsigned Src = MI->getOperand(1).getReg();
+ SourceRegisters.push_back(Src);
+ MI->getOperand(0).substVirtReg(Src, 0, *TRI);
+
+ markAsJoined(MI);
+ }
+
+ // If B = X was the last use of X in a liverange, we have to shrink it now
+ // that B = X is gone.
+ for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(),
+ E = SourceRegisters.end(); I != E; ++I) {
+ LIS->shrinkToUses(&LIS->getInterval(*I));
+ }
+
+ // If we get here, we know that we can coalesce the live ranges. Ask the
+ // intervals to coalesce themselves now.
+ LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+ MRI);
+ return true;
+}
+
+namespace {
+ // DepthMBBCompare - Comparison predicate that sort first based on the loop
+ // depth of the basic block (the unsigned), and then on the MBB number.
+ struct DepthMBBCompare {
+ typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+ bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+ // Deeper loops first
+ if (LHS.first != RHS.first)
+ return LHS.first > RHS.first;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
+ unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
+ if (cl != cr)
+ return cl > cr;
+
+ // As a last resort, sort by block number.
+ return LHS.second->getNumber() < RHS.second->getNumber();
+ }
+ };
+}
+
+void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<MachineInstr*> &TryAgain) {
+ DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ SmallVector<MachineInstr*, 8> VirtCopies;
+ SmallVector<MachineInstr*, 8> PhysCopies;
+ SmallVector<MachineInstr*, 8> ImpDefCopies;
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E;) {
+ MachineInstr *Inst = MII++;
+
+ // If this isn't a copy nor a extract_subreg, we can't join intervals.
+ unsigned SrcReg, DstReg;
+ if (Inst->isCopy()) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(1).getReg();
+ } else if (Inst->isSubregToReg()) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(2).getReg();
+ } else
+ continue;
+
+ bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ if (LIS->hasInterval(SrcReg) && LIS->getInterval(SrcReg).empty())
+ ImpDefCopies.push_back(Inst);
+ else if (SrcIsPhys || DstIsPhys)
+ PhysCopies.push_back(Inst);
+ else
+ VirtCopies.push_back(Inst);
+ }
+
+ // Try coalescing implicit copies and insert_subreg <undef> first,
+ // followed by copies to / from physical registers, then finally copies
+ // from virtual registers to virtual registers.
+ for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
+ MachineInstr *TheCopy = ImpDefCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
+ MachineInstr *TheCopy = PhysCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
+ MachineInstr *TheCopy = VirtCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+}
+
+void RegisterCoalescer::joinIntervals() {
+ DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+
+ std::vector<MachineInstr*> TryAgainList;
+ if (Loops->empty()) {
+ // If there are no loops in the function, join intervals in function order.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end();
+ I != E; ++I)
+ CopyCoalesceInMBB(I, TryAgainList);
+ } else {
+ // Otherwise, join intervals in inner loops before other intervals.
+ // Unfortunately we can't just iterate over loop hierarchy here because
+ // there may be more MBB's than BB's. Collect MBB's for sorting.
+
+ // Join intervals in the function prolog first. We want to join physical
+ // registers with virtual registers before the intervals got too long.
+ std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
+ MachineBasicBlock *MBB = I;
+ MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I));
+ }
+
+ // Sort by loop depth.
+ std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+ // Finally, join intervals in loop nest order.
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+ CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+ }
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ bool ProgressMade = true;
+ while (ProgressMade) {
+ ProgressMade = false;
+
+ for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+ MachineInstr *&TheCopy = TryAgainList[i];
+ if (!TheCopy)
+ continue;
+
+ bool Again = false;
+ bool Success = JoinCopy(TheCopy, Again);
+ if (Success || !Again) {
+ TheCopy= 0; // Mark this one as done.
+ ProgressMade = true;
+ }
+ }
+ }
+}
+
+void RegisterCoalescer::releaseMemory() {
+ JoinedCopies.clear();
+ ReMatCopies.clear();
+ ReMatDefs.clear();
+}
+
+bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ MRI = &fn.getRegInfo();
+ TM = &fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ LDV = &getAnalysis<LiveDebugVariables>();
+ AA = &getAnalysis<AliasAnalysis>();
+ Loops = &getAnalysis<MachineLoopInfo>();
+
+ DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: "
+ << ((Value*)MF->getFunction())->getName() << '\n');
+
+ if (VerifyCoalescing)
+ MF->verify(this, "Before register coalescing");
+
+ RegClassInfo.runOnMachineFunction(fn);
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining) {
+ joinIntervals();
+ DEBUG({
+ dbgs() << "********** INTERVALS POST JOINING **********\n";
+ for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end();
+ I != E; ++I){
+ I->second->print(dbgs(), TRI);
+ dbgs() << "\n";
+ }
+ });
+ }
+
+ // Perform a final pass over the instructions and compute spill weights
+ // and remove identity moves.
+ SmallVector<unsigned, 4> DeadDefs, InflateRegs;
+ for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
+ mbbi != mbbe; ++mbbi) {
+ MachineBasicBlock* mbb = mbbi;
+ for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+ mii != mie; ) {
+ MachineInstr *MI = mii;
+ if (JoinedCopies.count(MI)) {
+ // Delete all coalesced copies.
+ bool DoDelete = true;
+ assert(MI->isCopyLike() && "Unrecognized copy instruction");
+ unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // Collect candidates for register class inflation.
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
+ InflateRegs.push_back(SrcReg);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
+ InflateRegs.push_back(DstReg);
+
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ MI->getNumOperands() > 2)
+ // Do not delete extract_subreg, insert_subreg of physical
+ // registers unless the definition is dead. e.g.
+ // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+ // or else the scavenger may complain. LowerSubregs will
+ // delete them later.
+ DoDelete = false;
+
+ if (MI->allDefsAreDead()) {
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ LIS->hasInterval(SrcReg))
+ LIS->shrinkToUses(&LIS->getInterval(SrcReg));
+ DoDelete = true;
+ }
+ if (!DoDelete) {
+ // We need the instruction to adjust liveness, so make it a KILL.
+ if (MI->isSubregToReg()) {
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(1);
+ }
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ mii = llvm::next(mii);
+ } else {
+ LIS->RemoveMachineInstrFromMaps(MI);
+ mii = mbbi->erase(mii);
+ ++numPeep;
+ }
+ continue;
+ }
+
+ // Now check if this is a remat'ed def instruction which is now dead.
+ if (ReMatDefs.count(MI)) {
+ bool isDead = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DeadDefs.push_back(Reg);
+ // Remat may also enable register class inflation.
+ if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
+ InflateRegs.push_back(Reg);
+ }
+ if (MO.isDead())
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->use_nodbg_empty(Reg)) {
+ isDead = false;
+ break;
+ }
+ }
+ if (isDead) {
+ while (!DeadDefs.empty()) {
+ unsigned DeadDef = DeadDefs.back();
+ DeadDefs.pop_back();
+ RemoveDeadDef(LIS->getInterval(DeadDef), MI);
+ }
+ LIS->RemoveMachineInstrFromMaps(mii);
+ mii = mbbi->erase(mii);
+ continue;
+ } else
+ DeadDefs.clear();
+ }
+
+ ++mii;
+
+ // Check for now unnecessary kill flags.
+ if (LIS->isNotInMIMap(MI)) continue;
+ SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isKill()) continue;
+ unsigned reg = MO.getReg();
+ if (!reg || !LIS->hasInterval(reg)) continue;
+ if (!LIS->getInterval(reg).killedAt(DefIdx)) {
+ MO.setIsKill(false);
+ continue;
+ }
+ // When leaving a kill flag on a physreg, check if any subregs should
+ // remain alive.
+ if (!TargetRegisterInfo::isPhysicalRegister(reg))
+ continue;
+ for (const unsigned *SR = TRI->getSubRegisters(reg);
+ unsigned S = *SR; ++SR)
+ if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx))
+ MI->addRegisterDefined(S, TRI);
+ }
+ }
+ }
+
+ // After deleting a lot of copies, register classes may be less constrained.
+ // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // DPR inflation.
+ array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+ InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+ InflateRegs.end());
+ DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+ for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+ unsigned Reg = InflateRegs[i];
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ if (MRI->recomputeRegClass(Reg, *TM)) {
+ DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+ << MRI->getRegClass(Reg)->getName() << '\n');
+ ++NumInflated;
+ }
+ }
+
+ DEBUG(dump());
+ DEBUG(LDV->dump());
+ if (VerifyCoalescing)
+ MF->verify(this, "After register coalescing");
+ return true;
+}
+
+/// print - Implement the dump method.
+void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
+ LIS->print(O, m);
+}
diff --git a/src/LLVM/lib/CodeGen/RegisterCoalescer.h b/src/LLVM/lib/CodeGen/RegisterCoalescer.h
new file mode 100644
index 0000000..472c483
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/RegisterCoalescer.h
@@ -0,0 +1,106 @@
+//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the abstract interface for register coalescers,
+// allowing them to interact with and query register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
+#define LLVM_CODEGEN_REGISTER_COALESCER_H
+
+namespace llvm {
+
+ class MachineInstr;
+ class TargetRegisterInfo;
+ class TargetRegisterClass;
+ class TargetInstrInfo;
+
+ /// CoalescerPair - A helper class for register coalescers. When deciding if
+ /// two registers can be coalesced, CoalescerPair can determine if a copy
+ /// instruction would become an identity copy after coalescing.
+ class CoalescerPair {
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+ /// DstReg - The register that will be left after coalescing. It can be a
+ /// virtual or physical register.
+ unsigned DstReg;
+
+ /// SrcReg - the virtual register that will be coalesced into dstReg.
+ unsigned SrcReg;
+
+ /// subReg_ - The subregister index of srcReg in DstReg. It is possible the
+ /// coalesce SrcReg into a subreg of the larger DstReg when DstReg is a
+ /// virtual register.
+ unsigned SubIdx;
+
+ /// Partial - True when the original copy was a partial subregister copy.
+ bool Partial;
+
+ /// CrossClass - True when both regs are virtual, and newRC is constrained.
+ bool CrossClass;
+
+ /// Flipped - True when DstReg and SrcReg are reversed from the oriignal
+ /// copy instruction.
+ bool Flipped;
+
+ /// NewRC - The register class of the coalesced register, or NULL if DstReg
+ /// is a physreg.
+ const TargetRegisterClass *NewRC;
+
+ public:
+ CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
+ : TII(tii), TRI(tri), DstReg(0), SrcReg(0), SubIdx(0),
+ Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+
+ /// setRegisters - set registers to match the copy instruction MI. Return
+ /// false if MI is not a coalescable copy instruction.
+ bool setRegisters(const MachineInstr*);
+
+ /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible
+ /// because DstReg is a physical register, or SubIdx is set.
+ bool flip();
+
+ /// isCoalescable - Return true if MI is a copy instruction that will become
+ /// an identity copy after coalescing.
+ bool isCoalescable(const MachineInstr*) const;
+
+ /// isPhys - Return true if DstReg is a physical register.
+ bool isPhys() const { return !NewRC; }
+
+ /// isPartial - Return true if the original copy instruction did not copy
+ /// the full register, but was a subreg operation.
+ bool isPartial() const { return Partial; }
+
+ /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller
+ /// register class than DstReg's.
+ bool isCrossClass() const { return CrossClass; }
+
+ /// isFlipped - Return true when getSrcReg is the register being defined by
+ /// the original copy instruction.
+ bool isFlipped() const { return Flipped; }
+
+ /// getDstReg - Return the register (virtual or physical) that will remain
+ /// after coalescing.
+ unsigned getDstReg() const { return DstReg; }
+
+ /// getSrcReg - Return the virtual register that will be coalesced away.
+ unsigned getSrcReg() const { return SrcReg; }
+
+ /// getSubIdx - Return the subregister index in DstReg that SrcReg will be
+ /// coalesced into, or 0.
+ unsigned getSubIdx() const { return SubIdx; }
+
+ /// getNewRC - Return the register class of the coalesced register.
+ const TargetRegisterClass *getNewRC() const { return NewRC; }
+ };
+} // End llvm namespace
+
+#endif
diff --git a/src/LLVM/lib/CodeGen/RegisterScavenging.cpp b/src/LLVM/lib/CodeGen/RegisterScavenging.cpp
index 43b3fb6..ca02aa1 100644
--- a/src/LLVM/lib/CodeGen/RegisterScavenging.cpp
+++ b/src/LLVM/lib/CodeGen/RegisterScavenging.cpp
@@ -21,7 +21,9 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -124,9 +126,10 @@
MBBI = MBB->begin();
Tracking = true;
} else {
- assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+ assert(MBBI != MBB->end() && "Already past the end of the basic block!");
MBBI = llvm::next(MBBI);
}
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
MachineInstr *MI = MBBI;
@@ -151,13 +154,16 @@
BitVector DeadRegs(NumPhysRegs);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || MO.isUndef())
+ if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg || isReserved(Reg))
continue;
if (MO.isUse()) {
+ // Ignore undef uses.
+ if (MO.isUndef())
+ continue;
// Two-address operands implicitly kill.
if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
addRegWithSubRegs(KillRegs, Reg);
@@ -175,12 +181,14 @@
// Verify uses and defs.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || MO.isUndef())
+ if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg || isReserved(Reg))
continue;
if (MO.isUse()) {
+ if (MO.isUndef())
+ continue;
if (!isUsed(Reg)) {
// Check if it's partial live: e.g.
// D0 = insert_subreg D0<undef>, S0
@@ -198,6 +206,7 @@
break;
}
assert(SubUsed && "Using an undefined register!");
+ (void)SubUsed;
}
assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
"Using an early clobbered register!");
@@ -226,30 +235,26 @@
used = ~RegsAvailable & ~ReservedRegs;
}
-/// CreateRegClassMask - Set the bits that represent the registers in the
-/// TargetRegisterClass.
-static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E;
- ++I)
- Mask.set(*I);
-}
-
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (!isAliasUsed(*I))
+ if (!isAliasUsed(*I)) {
+ DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
+ "\n");
return *I;
+ }
return 0;
}
/// getRegsAvailable - Return all available registers in the register class
/// in Mask.
-void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC,
- BitVector &Mask) {
+BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
+ BitVector Mask(TRI->getNumRegs());
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I)
if (!isAliasUsed(*I))
Mask.set(*I);
+ return Mask;
}
/// findSurvivorReg - Return the candidate register that is unused for the
@@ -325,11 +330,9 @@
unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
int SPAdj) {
- // Mask off the registers which are not in the TargetRegisterClass.
- BitVector Candidates(NumPhysRegs, false);
- CreateRegClassMask(RC, Candidates);
- // Do not include reserved registers.
- Candidates ^= ReservedRegs & Candidates;
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates =
+ TRI->getAllocatableSet(*I->getParent()->getParent(), RC);
// Exclude all the registers being used by the instruction.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
@@ -340,17 +343,23 @@
}
// Try to find a register that's unused if there is one, as then we won't
- // have to spill.
- if ((Candidates & RegsAvailable).any())
- Candidates &= RegsAvailable;
+ // have to spill. Search explicitly rather than masking out based on
+ // RegsAvailable, as RegsAvailable does not take aliases into account.
+ // That's what getRegsAvailable() is for.
+ BitVector Available = getRegsAvailable(RC);
+
+ if ((Candidates & Available).any())
+ Candidates &= Available;
// Find the register whose use is furthest away.
MachineBasicBlock::iterator UseMI;
unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
// If we found an unused register there is no reason to spill it.
- if (!isAliasUsed(SReg))
+ if (!isAliasUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
return SReg;
+ }
assert(ScavengedReg == 0 &&
"Scavenger slot is live, unable to scavenge another register!");
@@ -366,12 +375,12 @@
"Cannot scavenge register without an emergency spill slot!");
TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
MachineBasicBlock::iterator II = prior(I);
- TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
// Restore the scavenged register before its use (or first terminator).
TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
II = prior(UseMI);
- TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
}
ScavengeRestore = prior(UseMI);
@@ -380,5 +389,8 @@
// ScavengedReg = SReg;
ScavengedRC = RC;
+ DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
+ "\n");
+
return SReg;
}
diff --git a/src/LLVM/lib/CodeGen/RenderMachineFunction.cpp b/src/LLVM/lib/CodeGen/RenderMachineFunction.cpp
new file mode 100644
index 0000000..8b02ec4
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/RenderMachineFunction.cpp
@@ -0,0 +1,1012 @@
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "rendermf"
+
+#include "RenderMachineFunction.h"
+
+#include "VirtRegMap.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <sstream>
+
+using namespace llvm;
+
+char RenderMachineFunction::ID = 0;
+INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
+ "Render machine functions (and related info) to HTML pages",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
+ "Render machine functions (and related info) to HTML pages",
+ false, false)
+
+static cl::opt<std::string>
+outputFileSuffix("rmf-file-suffix",
+ cl::desc("Appended to function name to get output file name "
+ "(default: \".html\")"),
+ cl::init(".html"), cl::Hidden);
+
+static cl::opt<std::string>
+machineFuncsToRender("rmf-funcs",
+ cl::desc("Comma separated list of functions to render"
+ ", or \"*\"."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+pressureClasses("rmf-classes",
+ cl::desc("Register classes to render pressure for."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+showIntervals("rmf-intervals",
+ cl::desc("Live intervals to show alongside code."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<bool>
+filterEmpty("rmf-filter-empty-intervals",
+ cl::desc("Don't display empty intervals."),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+showEmptyIndexes("rmf-empty-indexes",
+ cl::desc("Render indexes not associated with instructions or "
+ "MBB starts."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+useFancyVerticals("rmf-fancy-verts",
+ cl::desc("Use SVG for vertical text."),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+prettyHTML("rmf-pretty-html",
+ cl::desc("Pretty print HTML. For debugging the renderer only.."),
+ cl::init(false), cl::Hidden);
+
+
+namespace llvm {
+
+ bool MFRenderingOptions::renderingOptionsProcessed;
+ std::set<std::string> MFRenderingOptions::mfNamesToRender;
+ bool MFRenderingOptions::renderAllMFs = false;
+
+ std::set<std::string> MFRenderingOptions::classNamesToRender;
+ bool MFRenderingOptions::renderAllClasses = false;
+
+ std::set<std::pair<unsigned, unsigned> >
+ MFRenderingOptions::intervalNumsToRender;
+ unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly;
+
+ template <typename OutputItr>
+ void MFRenderingOptions::splitComaSeperatedList(const std::string &s,
+ OutputItr outItr) {
+ std::string::const_iterator curPos = s.begin();
+ std::string::const_iterator nextComa = std::find(curPos, s.end(), ',');
+ while (nextComa != s.end()) {
+ std::string elem;
+ std::copy(curPos, nextComa, std::back_inserter(elem));
+ *outItr = elem;
+ ++outItr;
+ curPos = llvm::next(nextComa);
+ nextComa = std::find(curPos, s.end(), ',');
+ }
+
+ if (curPos != s.end()) {
+ std::string elem;
+ std::copy(curPos, s.end(), std::back_inserter(elem));
+ *outItr = elem;
+ ++outItr;
+ }
+ }
+
+ void MFRenderingOptions::processOptions() {
+ if (!renderingOptionsProcessed) {
+ processFuncNames();
+ processRegClassNames();
+ processIntervalNumbers();
+ renderingOptionsProcessed = true;
+ }
+ }
+
+ void MFRenderingOptions::processFuncNames() {
+ if (machineFuncsToRender == "*") {
+ renderAllMFs = true;
+ } else {
+ splitComaSeperatedList(machineFuncsToRender,
+ std::inserter(mfNamesToRender,
+ mfNamesToRender.begin()));
+ }
+ }
+
+ void MFRenderingOptions::processRegClassNames() {
+ if (pressureClasses == "*") {
+ renderAllClasses = true;
+ } else {
+ splitComaSeperatedList(pressureClasses,
+ std::inserter(classNamesToRender,
+ classNamesToRender.begin()));
+ }
+ }
+
+ void MFRenderingOptions::processIntervalNumbers() {
+ std::set<std::string> intervalRanges;
+ splitComaSeperatedList(showIntervals,
+ std::inserter(intervalRanges,
+ intervalRanges.begin()));
+ std::for_each(intervalRanges.begin(), intervalRanges.end(),
+ processIntervalRange);
+ }
+
+ void MFRenderingOptions::processIntervalRange(
+ const std::string &intervalRangeStr) {
+ if (intervalRangeStr == "*") {
+ intervalTypesToRender |= All;
+ } else if (intervalRangeStr == "virt-nospills*") {
+ intervalTypesToRender |= VirtNoSpills;
+ } else if (intervalRangeStr == "spills*") {
+ intervalTypesToRender |= VirtSpills;
+ } else if (intervalRangeStr == "virt*") {
+ intervalTypesToRender |= AllVirt;
+ } else if (intervalRangeStr == "phys*") {
+ intervalTypesToRender |= AllPhys;
+ } else {
+ std::istringstream iss(intervalRangeStr);
+ unsigned reg1, reg2;
+ if ((iss >> reg1 >> std::ws)) {
+ if (iss.eof()) {
+ intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1));
+ } else {
+ char c;
+ iss >> c;
+ if (c == '-' && (iss >> reg2)) {
+ intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1));
+ } else {
+ dbgs() << "Warning: Invalid interval range \""
+ << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+ }
+ }
+ } else {
+ dbgs() << "Warning: Invalid interval number \""
+ << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+ }
+ }
+ }
+
+ void MFRenderingOptions::setup(MachineFunction *mf,
+ const TargetRegisterInfo *tri,
+ LiveIntervals *lis,
+ const RenderMachineFunction *rmf) {
+ this->mf = mf;
+ this->tri = tri;
+ this->lis = lis;
+ this->rmf = rmf;
+
+ clear();
+ }
+
+ void MFRenderingOptions::clear() {
+ regClassesTranslatedToCurrentFunction = false;
+ regClassSet.clear();
+
+ intervalsTranslatedToCurrentFunction = false;
+ intervalSet.clear();
+ }
+
+ void MFRenderingOptions::resetRenderSpecificOptions() {
+ intervalSet.clear();
+ intervalsTranslatedToCurrentFunction = false;
+ }
+
+ bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const {
+ processOptions();
+
+ return (renderAllMFs ||
+ mfNamesToRender.find(mf->getFunction()->getName()) !=
+ mfNamesToRender.end());
+ }
+
+ const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{
+ translateRegClassNamesToCurrentFunction();
+ return regClassSet;
+ }
+
+ const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const {
+ translateIntervalNumbersToCurrentFunction();
+ return intervalSet;
+ }
+
+ bool MFRenderingOptions::renderEmptyIndexes() const {
+ return showEmptyIndexes;
+ }
+
+ bool MFRenderingOptions::fancyVerticals() const {
+ return useFancyVerticals;
+ }
+
+ void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const {
+ if (!regClassesTranslatedToCurrentFunction) {
+ processOptions();
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ if (renderAllClasses ||
+ classNamesToRender.find(trc->getName()) !=
+ classNamesToRender.end()) {
+ regClassSet.insert(trc);
+ }
+ }
+ regClassesTranslatedToCurrentFunction = true;
+ }
+ }
+
+ void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const {
+ if (!intervalsTranslatedToCurrentFunction) {
+ processOptions();
+
+ // If we're not just doing explicit then do a copy over all matching
+ // types.
+ if (intervalTypesToRender != ExplicitOnly) {
+ for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (filterEmpty && li->empty())
+ continue;
+
+ if ((TargetRegisterInfo::isPhysicalRegister(li->reg) &&
+ (intervalTypesToRender & AllPhys))) {
+ intervalSet.insert(li);
+ } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) {
+ if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) ||
+ ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) {
+ intervalSet.insert(li);
+ }
+ }
+ }
+ }
+
+ // If we need to process the explicit list...
+ if (intervalTypesToRender != All) {
+ for (std::set<std::pair<unsigned, unsigned> >::const_iterator
+ regRangeItr = intervalNumsToRender.begin(),
+ regRangeEnd = intervalNumsToRender.end();
+ regRangeItr != regRangeEnd; ++regRangeItr) {
+ const std::pair<unsigned, unsigned> &range = *regRangeItr;
+ for (unsigned reg = range.first; reg != range.second; ++reg) {
+ if (lis->hasInterval(reg)) {
+ intervalSet.insert(&lis->getInterval(reg));
+ }
+ }
+ }
+ }
+
+ intervalsTranslatedToCurrentFunction = true;
+ }
+ }
+
+ // ---------- TargetRegisterExtraInformation implementation ----------
+
+ TargetRegisterExtraInfo::TargetRegisterExtraInfo()
+ : mapsPopulated(false) {
+ }
+
+ void TargetRegisterExtraInfo::setup(MachineFunction *mf,
+ MachineRegisterInfo *mri,
+ const TargetRegisterInfo *tri,
+ LiveIntervals *lis) {
+ this->mf = mf;
+ this->mri = mri;
+ this->tri = tri;
+ this->lis = lis;
+ }
+
+ void TargetRegisterExtraInfo::reset() {
+ if (!mapsPopulated) {
+ initWorst();
+ //initBounds();
+ initCapacity();
+ mapsPopulated = true;
+ }
+
+ resetPressureAndLiveStates();
+ }
+
+ void TargetRegisterExtraInfo::clear() {
+ prWorst.clear();
+ vrWorst.clear();
+ capacityMap.clear();
+ pressureMap.clear();
+ //liveStatesMap.clear();
+ mapsPopulated = false;
+ }
+
+ void TargetRegisterExtraInfo::initWorst() {
+ assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() &&
+ "Worst map already initialised?");
+
+ // Start with the physical registers.
+ for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) {
+ WorstMapLine &pregLine = prWorst[preg];
+
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+
+ unsigned numOverlaps = 0;
+ for (TargetRegisterClass::iterator rItr = trc->begin(),
+ rEnd = trc->end();
+ rItr != rEnd; ++rItr) {
+ unsigned trcPReg = *rItr;
+ if (tri->regsOverlap(preg, trcPReg))
+ ++numOverlaps;
+ }
+
+ pregLine[trc] = numOverlaps;
+ }
+ }
+
+ // Now the register classes.
+ for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rc1Itr != rcEnd; ++rc1Itr) {
+ const TargetRegisterClass *trc1 = *rc1Itr;
+ WorstMapLine &classLine = vrWorst[trc1];
+
+ for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin();
+ rc2Itr != rcEnd; ++rc2Itr) {
+ const TargetRegisterClass *trc2 = *rc2Itr;
+
+ unsigned worst = 0;
+
+ for (TargetRegisterClass::iterator trc1Itr = trc1->begin(),
+ trc1End = trc1->end();
+ trc1Itr != trc1End; ++trc1Itr) {
+ unsigned trc1Reg = *trc1Itr;
+ unsigned trc1RegWorst = 0;
+
+ for (TargetRegisterClass::iterator trc2Itr = trc2->begin(),
+ trc2End = trc2->end();
+ trc2Itr != trc2End; ++trc2Itr) {
+ unsigned trc2Reg = *trc2Itr;
+ if (tri->regsOverlap(trc1Reg, trc2Reg))
+ ++trc1RegWorst;
+ }
+ if (trc1RegWorst > worst) {
+ worst = trc1RegWorst;
+ }
+ }
+
+ if (worst != 0) {
+ classLine[trc2] = worst;
+ }
+ }
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getWorst(
+ unsigned reg,
+ const TargetRegisterClass *trc) const {
+ const WorstMapLine *wml = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+ PRWorstMap::const_iterator prwItr = prWorst.find(reg);
+ assert(prwItr != prWorst.end() && "Missing prWorst entry.");
+ wml = &prwItr->second;
+ } else {
+ const TargetRegisterClass *regTRC = mri->getRegClass(reg);
+ VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC);
+ assert(vrwItr != vrWorst.end() && "Missing vrWorst entry.");
+ wml = &vrwItr->second;
+ }
+
+ WorstMapLine::const_iterator wmlItr = wml->find(trc);
+ if (wmlItr == wml->end())
+ return 0;
+
+ return wmlItr->second;
+ }
+
+ void TargetRegisterExtraInfo::initCapacity() {
+ assert(!mapsPopulated && capacityMap.empty() &&
+ "Capacity map already initialised?");
+
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ unsigned capacity = trc->getRawAllocationOrder(*mf).size();
+
+ if (capacity != 0)
+ capacityMap[trc] = capacity;
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getCapacity(
+ const TargetRegisterClass *trc) const {
+ CapacityMap::const_iterator cmItr = capacityMap.find(trc);
+ assert(cmItr != capacityMap.end() &&
+ "vreg with unallocable register class");
+ return cmItr->second;
+ }
+
+ void TargetRegisterExtraInfo::resetPressureAndLiveStates() {
+ pressureMap.clear();
+ //liveStatesMap.clear();
+
+ // Iterate over all slots.
+
+
+ // Iterate over all live intervals.
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg))
+ continue;
+
+ // For all ranges in the current interal.
+ for (LiveInterval::iterator lrItr = li->begin(),
+ lrEnd = li->end();
+ lrItr != lrEnd; ++lrItr) {
+ LiveRange *lr = &*lrItr;
+
+ // For all slots in the current range.
+ for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) {
+
+ // Record increased pressure at index for all overlapping classes.
+ for (TargetRegisterInfo::regclass_iterator
+ rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+
+ if (trc->getRawAllocationOrder(*mf).empty())
+ continue;
+
+ unsigned worstAtI = getWorst(li->reg, trc);
+
+ if (worstAtI != 0) {
+ pressureMap[i][trc] += worstAtI;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getPressureAtSlot(
+ const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ PressureMap::const_iterator pmItr = pressureMap.find(i);
+ if (pmItr == pressureMap.end())
+ return 0;
+ const PressureMapLine &pmLine = pmItr->second;
+ PressureMapLine::const_iterator pmlItr = pmLine.find(trc);
+ if (pmlItr == pmLine.end())
+ return 0;
+ return pmlItr->second;
+ }
+
+ bool TargetRegisterExtraInfo::classOverCapacityAtSlot(
+ const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ return (getPressureAtSlot(trc, i) > getCapacity(trc));
+ }
+
+ // ---------- MachineFunctionRenderer implementation ----------
+
+ void RenderMachineFunction::Spacer::print(raw_ostream &os) const {
+ if (!prettyHTML)
+ return;
+ for (unsigned i = 0; i < ns; ++i) {
+ os << " ";
+ }
+ }
+
+ RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const {
+ return Spacer(ns);
+ }
+
+ raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) {
+ s.print(os);
+ return os;
+ }
+
+ template <typename Iterator>
+ std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const {
+ std::string r;
+
+ for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) {
+ char c = *sItr;
+
+ switch (c) {
+ case '<': r.append("<"); break;
+ case '>': r.append(">"); break;
+ case '&': r.append("&"); break;
+ case ' ': r.append(" "); break;
+ case '\"': r.append("""); break;
+ default: r.push_back(c); break;
+ }
+ }
+
+ return r;
+ }
+
+ RenderMachineFunction::LiveState
+ RenderMachineFunction::getLiveStateAt(const LiveInterval *li,
+ SlotIndex i) const {
+ const MachineInstr *mi = sis->getInstructionFromIndex(i);
+
+ // For uses/defs recorded use/def indexes override current liveness and
+ // instruction operands (Only for the interval which records the indexes).
+ if (i.isUse() || i.isDef()) {
+ UseDefs::const_iterator udItr = useDefs.find(li);
+ if (udItr != useDefs.end()) {
+ const SlotSet &slotSet = udItr->second;
+ if (slotSet.count(i)) {
+ if (i.isUse()) {
+ return Used;
+ }
+ // else
+ return Defined;
+ }
+ }
+ }
+
+ // If the slot is a load/store, or there's no info in the use/def set then
+ // use liveness and instruction operand info.
+ if (li->liveAt(i)) {
+
+ if (mi == 0) {
+ if (vrm == 0 ||
+ (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+ return AliveReg;
+ } else {
+ return AliveStack;
+ }
+ } else {
+ if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+ return Defined;
+ } else if (i.isUse() && mi->readsRegister(li->reg)) {
+ return Used;
+ } else {
+ if (vrm == 0 ||
+ (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+ return AliveReg;
+ } else {
+ return AliveStack;
+ }
+ }
+ }
+ }
+ return Dead;
+ }
+
+ RenderMachineFunction::PressureState
+ RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ if (trei.getPressureAtSlot(trc, i) == 0) {
+ return Zero;
+ } else if (trei.classOverCapacityAtSlot(trc, i)){
+ return High;
+ }
+ return Low;
+ }
+
+ /// \brief Render a machine instruction.
+ void RenderMachineFunction::renderMachineInstr(raw_ostream &os,
+ const MachineInstr *mi) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ oss << *mi;
+
+ os << escapeChars(oss.str());
+ }
+
+ template <typename T>
+ void RenderMachineFunction::renderVertical(const Spacer &indent,
+ raw_ostream &os,
+ const T &t) const {
+ if (ro.fancyVerticals()) {
+ os << indent << "<object\n"
+ << indent + s(2) << "class=\"obj\"\n"
+ << indent + s(2) << "type=\"image/svg+xml\"\n"
+ << indent + s(2) << "width=\"14px\"\n"
+ << indent + s(2) << "height=\"55px\"\n"
+ << indent + s(2) << "data=\"data:image/svg+xml,\n"
+ << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n"
+ << indent + s(6) << "<text x='-55' y='10' "
+ "font-family='Courier' font-size='12' "
+ "transform='rotate(-90)' "
+ "text-rendering='optimizeSpeed' "
+ "fill='#000'>" << t << "</text>\n"
+ << indent + s(4) << "</svg>\">\n"
+ << indent << "</object>\n";
+ } else {
+ std::ostringstream oss;
+ oss << t;
+ std::string tStr(oss.str());
+
+ os << indent;
+ for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end();
+ tStrItr != tStrEnd; ++tStrItr) {
+ os << *tStrItr << "<br/>";
+ }
+ os << "\n";
+ }
+ }
+
+ void RenderMachineFunction::insertCSS(const Spacer &indent,
+ raw_ostream &os) const {
+ os << indent << "<style type=\"text/css\">\n"
+ << indent + s(2) << "body { font-color: black; }\n"
+ << indent + s(2) << "table.code td { font-family: monospace; "
+ "border-width: 0px; border-style: solid; "
+ "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n"
+ << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n"
+ << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n"
+ << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n"
+ << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n"
+ << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n"
+ << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n"
+ << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n"
+ << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n"
+ << indent + s(2) << "table.code th { border-width: 0px; "
+ "border-style: solid; }\n"
+ << indent << "</style>\n";
+ }
+
+ void RenderMachineFunction::renderFunctionSummary(
+ const Spacer &indent, raw_ostream &os,
+ const char * const renderContextStr) const {
+ os << indent << "<h1>Function: " << mf->getFunction()->getName()
+ << "</h1>\n"
+ << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n";
+ }
+
+
+ void RenderMachineFunction::renderPressureTableLegend(
+ const Spacer &indent,
+ raw_ostream &os) const {
+ os << indent << "<h2>Rendering Pressure Legend:</h2>\n"
+ << indent << "<table class=\"code\">\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<th>Pressure</th><th>Description</th>"
+ "<th>Appearance</th>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>No Pressure</td>"
+ "<td>No physical registers of this class requested.</td>"
+ "<td class=\"p-z\"> </td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>Low Pressure</td>"
+ "<td>Sufficient physical registers to meet demand.</td>"
+ "<td class=\"p-l\"> </td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>High Pressure</td>"
+ "<td>Potentially insufficient physical registers to meet demand.</td>"
+ "<td class=\"p-h\"> </td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent << "</table>\n";
+ }
+
+ template <typename CellType>
+ void RenderMachineFunction::renderCellsWithRLE(
+ const Spacer &indent, raw_ostream &os,
+ const std::pair<CellType, unsigned> &rleAccumulator,
+ const std::map<CellType, std::string> &cellTypeStrs) const {
+
+ if (rleAccumulator.second == 0)
+ return;
+
+ typename std::map<CellType, std::string>::const_iterator ctsItr =
+ cellTypeStrs.find(rleAccumulator.first);
+
+ assert(ctsItr != cellTypeStrs.end() && "No string for given cell type.");
+
+ os << indent + s(4) << "<td class=\"" << ctsItr->second << "\"";
+ if (rleAccumulator.second > 1)
+ os << " colspan=" << rleAccumulator.second;
+ os << "></td>\n";
+ }
+
+
+ void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent,
+ raw_ostream &os) const {
+
+ std::map<LiveState, std::string> lsStrs;
+ lsStrs[Dead] = "l-n";
+ lsStrs[Defined] = "l-d";
+ lsStrs[Used] = "l-u";
+ lsStrs[AliveReg] = "l-r";
+ lsStrs[AliveStack] = "l-s";
+
+ std::map<PressureState, std::string> psStrs;
+ psStrs[Zero] = "p-z";
+ psStrs[Low] = "p-l";
+ psStrs[High] = "p-h";
+
+ // Open the table...
+
+ os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n"
+ << indent + s(2) << "<tr>\n";
+
+ // Render the header row...
+
+ os << indent + s(4) << "<th>index</th>\n"
+ << indent + s(4) << "<th>instr</th>\n";
+
+ // Render class names if necessary...
+ if (!ro.regClasses().empty()) {
+ for (MFRenderingOptions::RegClassSet::const_iterator
+ rcItr = ro.regClasses().begin(),
+ rcEnd = ro.regClasses().end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ os << indent + s(4) << "<th>\n";
+ renderVertical(indent + s(6), os, trc->getName());
+ os << indent + s(4) << "</th>\n";
+ }
+ }
+
+ // FIXME: Is there a nicer way to insert space between columns in HTML?
+ if (!ro.regClasses().empty() && !ro.intervals().empty())
+ os << indent + s(4) << "<th> </th>\n";
+
+ // Render interval numbers if necessary...
+ if (!ro.intervals().empty()) {
+ for (MFRenderingOptions::IntervalSet::const_iterator
+ liItr = ro.intervals().begin(),
+ liEnd = ro.intervals().end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = *liItr;
+ os << indent + s(4) << "<th>\n";
+ renderVertical(indent + s(6), os, li->reg);
+ os << indent + s(4) << "</th>\n";
+ }
+ }
+
+ os << indent + s(2) << "</tr>\n";
+
+ // End header row, start with the data rows...
+
+ MachineInstr *mi = 0;
+
+ // Data rows:
+ for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex();
+ i = i.getNextSlot()) {
+
+ // Render the slot column.
+ os << indent + s(2) << "<tr height=6ex>\n";
+
+ // Render the code column.
+ if (i.isLoad()) {
+ MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
+ mi = sis->getInstructionFromIndex(i);
+
+ if (i == sis->getMBBStartIdx(mbb) || mi != 0 ||
+ ro.renderEmptyIndexes()) {
+ os << indent + s(4) << "<td rowspan=4>" << i << " </td>\n"
+ << indent + s(4) << "<td rowspan=4>\n";
+
+ if (i == sis->getMBBStartIdx(mbb)) {
+ os << indent + s(6) << "BB#" << mbb->getNumber() << ": \n";
+ } else if (mi != 0) {
+ os << indent + s(6) << " ";
+ renderMachineInstr(os, mi);
+ } else {
+ // Empty interval - leave blank.
+ }
+ os << indent + s(4) << "</td>\n";
+ } else {
+ i = i.getStoreIndex(); // <- Will be incremented to the next index.
+ continue;
+ }
+ }
+
+ // Render the class columns.
+ if (!ro.regClasses().empty()) {
+ std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0);
+ for (MFRenderingOptions::RegClassSet::const_iterator
+ rcItr = ro.regClasses().begin(),
+ rcEnd = ro.regClasses().end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ PressureState newPressure = getPressureStateAt(trc, i);
+
+ if (newPressure == psRLEAccumulator.first) {
+ ++psRLEAccumulator.second;
+ } else {
+ renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+ psRLEAccumulator.first = newPressure;
+ psRLEAccumulator.second = 1;
+ }
+ }
+ renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+ }
+
+ // FIXME: Is there a nicer way to insert space between columns in HTML?
+ if (!ro.regClasses().empty() && !ro.intervals().empty())
+ os << indent + s(4) << "<td width=2em></td>\n";
+
+ if (!ro.intervals().empty()) {
+ std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0);
+ for (MFRenderingOptions::IntervalSet::const_iterator
+ liItr = ro.intervals().begin(),
+ liEnd = ro.intervals().end();
+ liItr != liEnd; ++liItr) {
+ const LiveInterval *li = *liItr;
+ LiveState newLiveness = getLiveStateAt(li, i);
+
+ if (newLiveness == lsRLEAccumulator.first) {
+ ++lsRLEAccumulator.second;
+ } else {
+ renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+ lsRLEAccumulator.first = newLiveness;
+ lsRLEAccumulator.second = 1;
+ }
+ }
+ renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+ }
+ os << indent + s(2) << "</tr>\n";
+ }
+
+ os << indent << "</table>\n";
+
+ if (!ro.regClasses().empty())
+ renderPressureTableLegend(indent, os);
+ }
+
+ void RenderMachineFunction::renderFunctionPage(
+ raw_ostream &os,
+ const char * const renderContextStr) const {
+ os << "<html>\n"
+ << s(2) << "<head>\n"
+ << s(4) << "<title>" << fqn << "</title>\n";
+
+ insertCSS(s(4), os);
+
+ os << s(2) << "<head>\n"
+ << s(2) << "<body >\n";
+
+ renderFunctionSummary(s(4), os, renderContextStr);
+
+ os << s(4) << "<br/><br/><br/>\n";
+
+ //renderLiveIntervalInfoTable(" ", os);
+
+ os << s(4) << "<br/><br/><br/>\n";
+
+ renderCodeTablePlusPI(s(4), os);
+
+ os << s(2) << "</body>\n"
+ << "</html>\n";
+ }
+
+ void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) {
+
+ mf = &fn;
+ mri = &mf->getRegInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ lis = &getAnalysis<LiveIntervals>();
+ sis = &getAnalysis<SlotIndexes>();
+
+ trei.setup(mf, mri, tri, lis);
+ ro.setup(mf, tri, lis, this);
+ spillIntervals.clear();
+ spillFor.clear();
+ useDefs.clear();
+
+ fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+ mf->getFunction()->getName().str();
+
+ return false;
+ }
+
+ void RenderMachineFunction::releaseMemory() {
+ trei.clear();
+ ro.clear();
+ spillIntervals.clear();
+ spillFor.clear();
+ useDefs.clear();
+ }
+
+ void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) {
+
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg),
+ rEnd = mri->reg_end();
+ rItr != rEnd; ++rItr) {
+ const MachineInstr *mi = &*rItr;
+ if (mi->readsRegister(li->reg)) {
+ useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+ }
+ if (mi->definesRegister(li->reg)) {
+ useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+ }
+ }
+ }
+
+ void RenderMachineFunction::rememberSpills(
+ const LiveInterval *li,
+ const std::vector<LiveInterval*> &spills) {
+
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(),
+ siEnd = spills.end();
+ siItr != siEnd; ++siItr) {
+ const LiveInterval *spill = *siItr;
+ spillIntervals[li].insert(spill);
+ spillFor[spill] = li;
+ }
+ }
+
+ bool RenderMachineFunction::isSpill(const LiveInterval *li) const {
+ SpillForMap::const_iterator sfItr = spillFor.find(li);
+ if (sfItr == spillFor.end())
+ return false;
+ return true;
+ }
+
+ void RenderMachineFunction::renderMachineFunction(
+ const char *renderContextStr,
+ const VirtRegMap *vrm,
+ const char *renderSuffix) {
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ this->vrm = vrm;
+ trei.reset();
+
+ std::string rpFileName(mf->getFunction()->getName().str() +
+ (renderSuffix ? renderSuffix : "") +
+ outputFileSuffix);
+
+ std::string errMsg;
+ raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary);
+
+ renderFunctionPage(outFile, renderContextStr);
+
+ ro.resetRenderSpecificOptions();
+ }
+
+ std::string RenderMachineFunction::escapeChars(const std::string &s) const {
+ return escapeChars(s.begin(), s.end());
+ }
+
+}
diff --git a/src/LLVM/lib/CodeGen/RenderMachineFunction.h b/src/LLVM/lib/CodeGen/RenderMachineFunction.h
index 71a613b..8571992 100644
--- a/src/LLVM/lib/CodeGen/RenderMachineFunction.h
+++ b/src/LLVM/lib/CodeGen/RenderMachineFunction.h
@@ -28,11 +28,108 @@
class LiveIntervals;
class MachineInstr;
class MachineRegisterInfo;
+ class RenderMachineFunction;
class TargetRegisterClass;
class TargetRegisterInfo;
class VirtRegMap;
class raw_ostream;
+ /// \brief Helper class to process rendering options. Tries to be as lazy as
+ /// possible.
+ class MFRenderingOptions {
+ public:
+
+ struct RegClassComp {
+ bool operator()(const TargetRegisterClass *trc1,
+ const TargetRegisterClass *trc2) const {
+ std::string trc1Name(trc1->getName()), trc2Name(trc2->getName());
+ return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(),
+ trc2Name.begin(), trc2Name.end());
+ }
+ };
+
+ typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet;
+
+ struct IntervalComp {
+ bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
+ return li1->reg < li2->reg;
+ }
+ };
+
+ typedef std::set<const LiveInterval*, IntervalComp> IntervalSet;
+
+ /// Initialise the rendering options.
+ void setup(MachineFunction *mf, const TargetRegisterInfo *tri,
+ LiveIntervals *lis, const RenderMachineFunction *rmf);
+
+ /// Clear translations of options to the current function.
+ void clear();
+
+ /// Reset any options computed for this specific rendering.
+ void resetRenderSpecificOptions();
+
+ /// Should we render the current function.
+ bool shouldRenderCurrentMachineFunction() const;
+
+ /// Return the set of register classes to render pressure for.
+ const RegClassSet& regClasses() const;
+
+ /// Return the set of live intervals to render liveness for.
+ const IntervalSet& intervals() const;
+
+ /// Render indexes which are not associated with instructions / MBB starts.
+ bool renderEmptyIndexes() const;
+
+ /// Return whether or not to render using SVG for fancy vertical text.
+ bool fancyVerticals() const;
+
+ private:
+
+ static bool renderingOptionsProcessed;
+ static std::set<std::string> mfNamesToRender;
+ static bool renderAllMFs;
+
+ static std::set<std::string> classNamesToRender;
+ static bool renderAllClasses;
+
+
+ static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender;
+ typedef enum { ExplicitOnly = 0,
+ AllPhys = 1,
+ VirtNoSpills = 2,
+ VirtSpills = 4,
+ AllVirt = 6,
+ All = 7 }
+ IntervalTypesToRender;
+ static unsigned intervalTypesToRender;
+
+ template <typename OutputItr>
+ static void splitComaSeperatedList(const std::string &s, OutputItr outItr);
+
+ static void processOptions();
+
+ static void processFuncNames();
+ static void processRegClassNames();
+ static void processIntervalNumbers();
+
+ static void processIntervalRange(const std::string &intervalRangeStr);
+
+ MachineFunction *mf;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+ const RenderMachineFunction *rmf;
+
+ mutable bool regClassesTranslatedToCurrentFunction;
+ mutable RegClassSet regClassSet;
+
+ mutable bool intervalsTranslatedToCurrentFunction;
+ mutable IntervalSet intervalSet;
+
+ void translateRegClassNamesToCurrentFunction() const;
+
+ void translateIntervalNumbersToCurrentFunction() const;
+ };
+
/// \brief Provide extra information about the physical and virtual registers
/// in the function being compiled.
class TargetRegisterExtraInfo {
@@ -99,106 +196,15 @@
void resetPressureAndLiveStates();
};
- /// \brief Helper class to process rendering options. Tries to be as lazy as
- /// possible.
- class MFRenderingOptions {
- public:
-
- struct RegClassComp {
- bool operator()(const TargetRegisterClass *trc1,
- const TargetRegisterClass *trc2) const {
- std::string trc1Name(trc1->getName()), trc2Name(trc2->getName());
- return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(),
- trc2Name.begin(), trc2Name.end());
- }
- };
-
- typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet;
-
- struct IntervalComp {
- bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
- return li1->reg < li2->reg;
- }
- };
-
- typedef std::set<const LiveInterval*, IntervalComp> IntervalSet;
-
- /// Initialise the rendering options.
- void setup(MachineFunction *mf, const TargetRegisterInfo *tri,
- LiveIntervals *lis);
-
- /// Clear translations of options to the current function.
- void clear();
-
- /// Reset any options computed for this specific rendering.
- void resetRenderSpecificOptions();
-
- /// Should we render the current function.
- bool shouldRenderCurrentMachineFunction() const;
-
- /// Return the set of register classes to render pressure for.
- const RegClassSet& regClasses() const;
-
- /// Return the set of live intervals to render liveness for.
- const IntervalSet& intervals() const;
-
- /// Render indexes which are not associated with instructions / MBB starts.
- bool renderEmptyIndexes() const;
-
- /// Return whether or not to render using SVG for fancy vertical text.
- bool fancyVerticals() const;
-
- private:
-
- static bool renderingOptionsProcessed;
- static std::set<std::string> mfNamesToRender;
- static bool renderAllMFs;
-
- static std::set<std::string> classNamesToRender;
- static bool renderAllClasses;
-
-
- static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender;
- typedef enum { ExplicitOnly = 0,
- VirtPlusExplicit = 1,
- PhysPlusExplicit = 2,
- All = 3 }
- IntervalTypesToRender;
- static unsigned intervalTypesToRender;
-
- template <typename OutputItr>
- static void splitComaSeperatedList(const std::string &s, OutputItr outItr);
-
- static void processOptions();
-
- static void processFuncNames();
- static void processRegClassNames();
- static void processIntervalNumbers();
-
- static void processIntervalRange(const std::string &intervalRangeStr);
-
- MachineFunction *mf;
- const TargetRegisterInfo *tri;
- LiveIntervals *lis;
-
- mutable bool regClassesTranslatedToCurrentFunction;
- mutable RegClassSet regClassSet;
-
- mutable bool intervalsTranslatedToCurrentFunction;
- mutable IntervalSet intervalSet;
-
- void translateRegClassNamesToCurrentFunction() const;
-
- void translateIntervalNumbersToCurrentFunction() const;
- };
-
/// \brief Render MachineFunction objects and related information to a HTML
/// page.
class RenderMachineFunction : public MachineFunctionPass {
public:
static char ID;
- RenderMachineFunction() : MachineFunctionPass(ID) {}
+ RenderMachineFunction() : MachineFunctionPass(ID) {
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &au) const;
@@ -206,6 +212,13 @@
virtual void releaseMemory();
+ void rememberUseDefs(const LiveInterval *li);
+
+ void rememberSpills(const LiveInterval *li,
+ const std::vector<LiveInterval*> &spills);
+
+ bool isSpill(const LiveInterval *li) const;
+
/// \brief Render this machine function to HTML.
///
/// @param renderContextStr This parameter will be included in the top of
@@ -225,10 +238,8 @@
private:
class Spacer;
-
friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s);
-
std::string fqn;
MachineFunction *mf;
@@ -241,6 +252,8 @@
TargetRegisterExtraInfo trei;
MFRenderingOptions ro;
+
+
// Utilities.
typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState;
LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const;
@@ -249,6 +262,17 @@
PressureState getPressureStateAt(const TargetRegisterClass *trc,
SlotIndex i) const;
+ typedef std::map<const LiveInterval*, std::set<const LiveInterval*> >
+ SpillIntervals;
+ SpillIntervals spillIntervals;
+
+ typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap;
+ SpillForMap spillFor;
+
+ typedef std::set<SlotIndex> SlotSet;
+ typedef std::map<const LiveInterval*, SlotSet> UseDefs;
+ UseDefs useDefs;
+
// ---------- Rendering methods ----------
/// For inserting spaces when pretty printing.
diff --git a/src/LLVM/lib/CodeGen/ScheduleDAG.cpp b/src/LLVM/lib/CodeGen/ScheduleDAG.cpp
index 7d39dc4..1e9b5c8 100644
--- a/src/LLVM/lib/CodeGen/ScheduleDAG.cpp
+++ b/src/LLVM/lib/CodeGen/ScheduleDAG.cpp
@@ -15,24 +15,41 @@
#define DEBUG_TYPE "pre-RA-sched"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <climits>
using namespace llvm;
+#ifndef NDEBUG
+static cl::opt<bool> StressSchedOpt(
+ "stress-sched", cl::Hidden, cl::init(false),
+ cl::desc("Stress test instruction scheduling"));
+#endif
+
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
: TM(mf.getTarget()),
TII(TM.getInstrInfo()),
TRI(TM.getRegisterInfo()),
MF(mf), MRI(mf.getRegInfo()),
EntrySU(), ExitSU() {
+#ifndef NDEBUG
+ StressSched = StressSchedOpt;
+#endif
}
ScheduleDAG::~ScheduleDAG() {}
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return NULL;
+ return &TII->get(Node->getMachineOpcode());
+}
+
/// dump - dump the schedule.
void ScheduleDAG::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -68,12 +85,12 @@
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
-void SUnit::addPred(const SDep &D) {
+bool SUnit::addPred(const SDep &D) {
// If this node already has this depenence, don't add a redundant one.
for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
I != E; ++I)
if (*I == D)
- return;
+ return false;
// Now add a corresponding succ to N.
SDep P = D;
P.setSUnit(this);
@@ -99,6 +116,7 @@
this->setDepthDirty();
N->setHeightDirty();
}
+ return true;
}
/// removePred - This removes the specified edge as a pred of the current
@@ -122,6 +140,7 @@
break;
}
assert(FoundSucc && "Mismatching preds / succs lists!");
+ (void)FoundSucc;
Preds.erase(I);
// Update the bookkeeping.
if (P.getKind() == SDep::Data) {
@@ -278,6 +297,7 @@
dbgs() << " # preds left : " << NumPredsLeft << "\n";
dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
dbgs() << " Latency : " << Latency << "\n";
dbgs() << " Depth : " << Depth << "\n";
dbgs() << " Height : " << Height << "\n";
@@ -298,6 +318,8 @@
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << G->TRI->getName(I->getReg());
dbgs() << "\n";
}
}
@@ -463,7 +485,7 @@
#endif
}
-/// AddPred - Updates the topological ordering to accomodate an edge
+/// AddPred - Updates the topological ordering to accommodate an edge
/// to be added from SUnit X to SUnit Y.
void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
int UpperBound, LowerBound;
@@ -481,7 +503,7 @@
}
}
-/// RemovePred - Updates the topological ordering to accomodate an
+/// RemovePred - Updates the topological ordering to accommodate an
/// an edge to be removed from the specified node N from the predecessors
/// of the current node M.
void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
@@ -492,7 +514,7 @@
/// all nodes affected by the edge insertion. These nodes will later get new
/// topological indexes by means of the Shift method.
void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
- bool& HasLoop) {
+ bool &HasLoop) {
std::vector<const SUnit*> WorkList;
WorkList.reserve(SUnits.size());
diff --git a/src/LLVM/lib/CodeGen/ScheduleDAGEmit.cpp b/src/LLVM/lib/CodeGen/ScheduleDAGEmit.cpp
index 0a2fb37..f8b1bc7 100644
--- a/src/LLVM/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/src/LLVM/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -45,6 +45,7 @@
unsigned Reg = 0;
for (SUnit::const_succ_iterator II = SU->Succs.begin(),
EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
if (II->getReg()) {
Reg = II->getReg();
break;
@@ -57,7 +58,7 @@
assert(I->getReg() && "Unknown physical register!");
unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
.addReg(I->getReg());
diff --git a/src/LLVM/lib/CodeGen/ScheduleDAGInstrs.cpp b/src/LLVM/lib/CodeGen/ScheduleDAGInstrs.cpp
index ea93dd5..34b8ab0 100644
--- a/src/LLVM/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/src/LLVM/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -16,14 +16,16 @@
#include "ScheduleDAGInstrs.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
@@ -32,10 +34,11 @@
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt)
- : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()),
- Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
- MFI = mf.getFrameInfo();
- DbgValueVec.clear();
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
+ InstrItins(mf.getTarget().getInstrItineraryData()),
+ Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
+ LoopRegs(MLI, MDT), FirstDbgValue(0) {
+ DbgValues.clear();
}
/// Run - perform scheduling.
@@ -78,12 +81,12 @@
} while (1);
}
-/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
+/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
static const Value *getUnderlyingObject(const Value *V) {
// First just call Value::getUnderlyingObject to let it do what it does.
do {
- V = V->getUnderlyingObject();
+ V = GetUnderlyingObject(V);
// If it found an inttoptr, use special code to continue climing.
if (Operator::getOpcode(V) != Instruction::IntToPtr)
break;
@@ -119,7 +122,7 @@
// such aliases.
if (PSV->isAliased(MFI))
return 0;
-
+
MayAlias = PSV->mayAlias(MFI);
return V;
}
@@ -131,6 +134,7 @@
}
void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+ LoopRegs.Deps.clear();
if (MachineLoop *ML = MLI.getLoopFor(BB))
if (BB == ML->getLoopLatch()) {
MachineBasicBlock *Header = ML->getHeader();
@@ -141,6 +145,46 @@
}
}
+/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::AddSchedBarrierDeps() {
+ MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+ ExitSU.setInstr(ExitMI);
+ bool AllDepKnown = ExitMI &&
+ (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+ if (ExitMI && AllDepKnown) {
+ // If it's a call or a barrier, add dependencies on the defs and uses of
+ // instruction.
+ for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = ExitMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+ Uses[Reg].push_back(&ExitSU);
+ }
+ } else {
+ // For others, e.g. fallthrough, conditional branch, assume the exit
+ // uses all the registers that are livein to the successor blocks.
+ SmallSet<unsigned, 8> Seen;
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ if (Seen.insert(Reg))
+ Uses[Reg].push_back(&ExitSU);
+ }
+ }
+}
+
void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// We'll be allocating one SUnit for each instruction, plus one for
// the region exit node.
@@ -159,41 +203,48 @@
std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
- // Keep track of dangling debug references to registers.
- std::vector<std::pair<MachineInstr*, unsigned> >
- DanglingDebugValue(TRI->getNumRegs(),
- std::make_pair(static_cast<MachineInstr*>(0), 0));
-
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
// Ask the target if address-backscheduling is desirable, and if so how much.
- const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
- DbgValueVec.clear();
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+
+ // Model data dependencies between instructions being scheduled and the
+ // ExitSU.
+ AddSchedBarrierDeps();
+
+ for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
+ }
// Walk the list of instructions, from bottom moving up.
+ MachineInstr *PrevMI = NULL;
for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
MII != MIE; --MII) {
MachineInstr *MI = prior(MII);
- // DBG_VALUE does not have SUnit's built, so just remember these for later
- // reinsertion.
+ if (MI && PrevMI) {
+ DbgValues.push_back(std::make_pair(PrevMI, MI));
+ PrevMI = NULL;
+ }
+
if (MI->isDebugValue()) {
- if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() &&
- MI->getOperand(0).getReg())
- DanglingDebugValue[MI->getOperand(0).getReg()] =
- std::make_pair(MI, DbgValueVec.size());
- DbgValueVec.push_back(MI);
+ PrevMI = MI;
continue;
}
- const TargetInstrDesc &TID = MI->getDesc();
- assert(!TID.isTerminator() && !MI->isLabel() &&
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ assert(!MCID.isTerminator() && !MI->isLabel() &&
"Cannot schedule terminators or labels!");
// Create the SUnit for this MI.
SUnit *SU = NewSUnit(MI);
+ SU->isCall = MCID.isCall();
+ SU->isCommutable = MCID.isCommutable();
// Assign the Latency field of SU using target-provided information.
if (UnitLatencies)
@@ -210,13 +261,8 @@
assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
- if (MO.isDef() && DanglingDebugValue[Reg].first!=0) {
- SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first);
- DbgValueVec[DanglingDebugValue[Reg].second] = 0;
- DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0);
- }
-
std::vector<SUnit *> &UseList = Uses[Reg];
+ // Defs are push in the order they are visited and never reordered.
std::vector<SUnit *> &DefList = Defs[Reg];
// Optionally add output and anti dependencies. For anti
// dependencies we use a latency of 0 because for a multi-issue
@@ -228,15 +274,19 @@
unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
SUnit *DefSU = DefList[i];
+ if (DefSU == &ExitSU)
+ continue;
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(Reg)))
DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
}
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- std::vector<SUnit *> &DefList = Defs[*Alias];
- for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
- SUnit *DefSU = DefList[i];
+ std::vector<SUnit *> &MemDefList = Defs[*Alias];
+ for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) {
+ SUnit *DefSU = MemDefList[i];
+ if (DefSU == &ExitSU)
+ continue;
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(*Alias)))
@@ -258,14 +308,16 @@
// TODO: Perhaps we should get rid of
// SpecialAddressLatency and just move this into
// adjustSchedDependency for the targets that care about it.
- if (SpecialAddressLatency != 0 && !UnitLatencies) {
+ if (SpecialAddressLatency != 0 && !UnitLatencies &&
+ UseSU != &ExitSU) {
MachineInstr *UseMI = UseSU->getInstr();
- const TargetInstrDesc &UseTID = UseMI->getDesc();
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
- if ((UseTID.mayLoad() || UseTID.mayStore()) &&
- (unsigned)RegUseIndex < UseTID.getNumOperands() &&
- UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+ if (RegUseIndex >= 0 &&
+ (UseMCID.mayLoad() || UseMCID.mayStore()) &&
+ (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
+ UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
LDataLatency += SpecialAddressLatency;
}
// Adjust the dependence latency using operand def/use
@@ -302,29 +354,29 @@
unsigned Count = I->second.second;
const MachineInstr *UseMI = UseMO->getParent();
unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
- const TargetInstrDesc &UseTID = UseMI->getDesc();
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
// TODO: If we knew the total depth of the region here, we could
// handle the case where the whole loop is inside the region but
// is large enough that the isScheduleHigh trick isn't needed.
- if (UseMOIdx < UseTID.getNumOperands()) {
+ if (UseMOIdx < UseMCID.getNumOperands()) {
// Currently, we only support scheduling regions consisting of
// single basic blocks. Check to see if the instruction is in
// the same region by checking to see if it has the same parent.
if (UseMI->getParent() != MI->getParent()) {
unsigned Latency = SU->Latency;
- if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+ if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
Latency += SpecialAddressLatency;
// This is a wild guess as to the portion of the latency which
// will be overlapped by work done outside the current
// scheduling region.
Latency -= std::min(Latency, Count);
- // Add the artifical edge.
+ // Add the artificial edge.
ExitSU.addPred(SDep(SU, SDep::Order, Latency,
/*Reg=*/0, /*isNormalMemory=*/false,
/*isMustAlias=*/false,
/*isArtificial=*/true));
} else if (SpecialAddressLatency > 0 &&
- UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+ UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
// The entire loop body is within the current scheduling region
// and the latency of this operation is assumed to be greater
// than the latency of the loop.
@@ -340,6 +392,16 @@
UseList.clear();
if (!MO.isDead())
DefList.clear();
+
+ // Calls will not be reordered because of chain dependencies (see
+ // below). Since call operands are dead, calls may continue to be added
+ // to the DefList making dependence checking quadratic in the size of
+ // the block. Instead, we leave only one call at the back of the
+ // DefList.
+ if (SU->isCall) {
+ while (!DefList.empty() && DefList.back()->isCall)
+ DefList.pop_back();
+ }
DefList.push_back(SU);
} else {
UseList.push_back(SU);
@@ -357,12 +419,12 @@
// produce more precise dependence information.
#define STORE_LOAD_LATENCY 1
unsigned TrueMemOrderLatency = 0;
- if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
- (MI->hasVolatileMemoryRef() &&
- (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+ if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasVolatileMemoryRef() &&
+ (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
// Be conservative with these and add dependencies on all memory
// references, even those that are known to not alias.
- for (std::map<const Value *, SUnit *>::iterator I =
+ for (std::map<const Value *, SUnit *>::iterator I =
NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
}
@@ -398,16 +460,16 @@
PendingLoads.clear();
AliasMemDefs.clear();
AliasMemUses.clear();
- } else if (TID.mayStore()) {
+ } else if (MCID.mayStore()) {
bool MayAlias = true;
TrueMemOrderLatency = STORE_LOAD_LATENCY;
if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
// A store to a specific PseudoSourceValue. Add precise dependencies.
// Record the def in MemDefs, first adding a dep if there is
// an existing def.
- std::map<const Value *, SUnit *>::iterator I =
+ std::map<const Value *, SUnit *>::iterator I =
((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- std::map<const Value *, SUnit *>::iterator IE =
+ std::map<const Value *, SUnit *>::iterator IE =
((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
@@ -446,45 +508,55 @@
// Treat all other stores conservatively.
goto new_alias_chain;
}
- } else if (TID.mayLoad()) {
+
+ if (!ExitSU.isPred(SU))
+ // Push store's up a bit to avoid them getting in between cmp
+ // and branches.
+ ExitSU.addPred(SDep(SU, SDep::Order, 0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ } else if (MCID.mayLoad()) {
bool MayAlias = true;
TrueMemOrderLatency = 0;
if (MI->isInvariantLoad(AA)) {
// Invariant load, no chain dependencies needed!
} else {
- if (const Value *V =
+ if (const Value *V =
getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
// A load from a specific PseudoSourceValue. Add precise dependencies.
- std::map<const Value *, SUnit *>::iterator I =
+ std::map<const Value *, SUnit *>::iterator I =
((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- std::map<const Value *, SUnit *>::iterator IE =
+ std::map<const Value *, SUnit *>::iterator IE =
((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
/*isNormalMemory=*/true));
if (MayAlias)
AliasMemUses[V].push_back(SU);
- else
+ else
NonAliasMemUses[V].push_back(SU);
} else {
// A load with no underlying object. Depend on all
// potentially aliasing stores.
- for (std::map<const Value *, SUnit *>::iterator I =
+ for (std::map<const Value *, SUnit *>::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
-
+
PendingLoads.push_back(SU);
MayAlias = true;
}
-
+
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- }
+ }
}
}
+ if (PrevMI)
+ FirstDbgValue = PrevMI;
for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
Defs[i].clear();
@@ -498,25 +570,24 @@
}
void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-
// Compute the latency for the node.
- SU->Latency =
- InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
+ if (!InstrItins || InstrItins->isEmpty()) {
+ SU->Latency = 1;
- // Simplistic target-independent heuristic: assume that loads take
- // extra time.
- if (InstrItins.isEmpty())
+ // Simplistic target-independent heuristic: assume that loads take
+ // extra time.
if (SU->getInstr()->getDesc().mayLoad())
SU->Latency += 2;
+ } else {
+ SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+ }
}
-void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const {
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty())
+ if (!InstrItins || InstrItins->isEmpty())
return;
-
+
// For a data dependency with a known register...
if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
return;
@@ -528,14 +599,21 @@
MachineInstr *DefMI = Def->getInstr();
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
if (DefIdx != -1) {
- int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
- DefIdx);
- if (DefCycle >= 0) {
- MachineInstr *UseMI = Use->getInstr();
- const unsigned UseClass = UseMI->getDesc().getSchedClass();
-
- // For all uses of the register, calculate the maxmimum latency
- int Latency = -1;
+ const MachineOperand &MO = DefMI->getOperand(DefIdx);
+ if (MO.isReg() && MO.isImplicit() &&
+ DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+ // This is an implicit def, getOperandLatency() won't return the correct
+ // latency. e.g.
+ // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+ // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+ // What we want is to compute latency between def of %D6/%D7 and use of
+ // %Q3 instead.
+ DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ }
+ MachineInstr *UseMI = Use->getInstr();
+ // For all uses of the register, calculate the maxmimum latency
+ int Latency = -1;
+ if (UseMI) {
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = UseMI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
@@ -544,15 +622,21 @@
if (MOReg != Reg)
continue;
- int UseCycle = InstrItins.getOperandCycle(UseClass, i);
- if (UseCycle >= 0)
- Latency = std::max(Latency, DefCycle - UseCycle + 1);
+ int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
+ UseMI, i);
+ Latency = std::max(Latency, UseCycle);
}
-
- // If we found a latency, then replace the existing dependence latency.
- if (Latency >= 0)
- dep.setLatency(Latency);
+ } else {
+ // UseMI is null, then it must be a scheduling barrier.
+ if (!InstrItins || InstrItins->isEmpty())
+ return;
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
}
+
+ // If we found a latency, then replace the existing dependence latency.
+ if (Latency >= 0)
+ dep.setLatency(Latency);
}
}
@@ -582,39 +666,33 @@
BB->remove(I);
}
- // First reinsert any remaining debug_values; these are either constants,
- // or refer to live-in registers. The beginning of the block is the right
- // place for the latter. The former might reasonably be placed elsewhere
- // using some kind of ordering algorithm, but right now it doesn't matter.
- for (int i = DbgValueVec.size()-1; i>=0; --i)
- if (DbgValueVec[i])
- BB->insert(InsertPos, DbgValueVec[i]);
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue)
+ BB->insert(InsertPos, FirstDbgValue);
// Then re-insert them according to the given schedule.
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- SUnit *SU = Sequence[i];
- if (!SU) {
+ if (SUnit *SU = Sequence[i])
+ BB->insert(InsertPos, SU->getInstr());
+ else
// Null SUnit* is a noop.
EmitNoop();
- continue;
- }
-
- BB->insert(InsertPos, SU->getInstr());
- for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i)
- BB->insert(InsertPos, SU->DbgInstrList[i]);
}
// Update the Begin iterator, as the first instruction in the block
// may have been scheduled later.
- if (!DbgValueVec.empty()) {
- for (int i = DbgValueVec.size()-1; i>=0; --i)
- if (DbgValueVec[i]!=0) {
- Begin = DbgValueVec[DbgValueVec.size()-1];
- break;
- }
- } else if (!Sequence.empty())
+ if (!Sequence.empty())
Begin = Sequence[0]->getInstr();
- DbgValueVec.clear();
+ // Reinsert any remaining debug_values.
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineInstr *OrigPrivMI = P.second;
+ BB->insertAfter(OrigPrivMI, DbgValue);
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
return BB;
}
diff --git a/src/LLVM/lib/CodeGen/ScheduleDAGInstrs.h b/src/LLVM/lib/CodeGen/ScheduleDAGInstrs.h
index c8f543f..666bdf5 100644
--- a/src/LLVM/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/src/LLVM/lib/CodeGen/ScheduleDAGInstrs.h
@@ -48,7 +48,8 @@
/// VisitLoop - Clear out any previous state and analyze the given loop.
///
void VisitLoop(const MachineLoop *Loop) {
- Deps.clear();
+ assert(Deps.empty() && "stale loop dependencies");
+
MachineBasicBlock *Header = Loop->getHeader();
SmallSet<unsigned, 8> LoopLiveIns;
for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
@@ -101,6 +102,7 @@
const MachineLoopInfo &MLI;
const MachineDominatorTree &MDT;
const MachineFrameInfo *MFI;
+ const InstrItineraryData *InstrItins;
/// Defs, Uses - Remember where defs and uses of each physical register
/// are as we iterate upward through the instructions. This is allocated
@@ -108,10 +110,6 @@
/// initialized and destructed for each block.
std::vector<std::vector<SUnit *> > Defs;
std::vector<std::vector<SUnit *> > Uses;
-
- /// DbgValueVec - Remember DBG_VALUEs that refer to a particular
- /// register.
- std::vector<MachineInstr *>DbgValueVec;
/// PendingLoads - Remember where unknown loads are after the most recent
/// unknown store, as we iterate. As with Defs and Uses, this is here
@@ -127,6 +125,14 @@
///
SmallSet<unsigned, 8> LoopLiveInRegs;
+ protected:
+
+ /// DbgValues - Remember instruction that preceeds DBG_VALUE.
+ typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
+ DbgValueVector;
+ DbgValueVector DbgValues;
+ MachineInstr *FirstDbgValue;
+
public:
MachineBasicBlock::iterator Begin; // The beginning of the range to
// be scheduled. The range extends
@@ -163,6 +169,15 @@
/// input.
virtual void BuildSchedGraph(AliasAnalysis *AA);
+ /// AddSchedBarrierDeps - Add dependencies from instructions in the current
+ /// list of instructions being scheduled to scheduling barrier. We want to
+ /// make sure instructions which define registers that are either used by
+ /// the terminator or are live-out are properly scheduled. This is
+ /// especially important when the definition latency of the return value(s)
+ /// are too high to be hidden by the branch or when the liveout registers
+ /// used by instructions in the fallthrough block.
+ void AddSchedBarrierDeps();
+
/// ComputeLatency - Compute node latency.
///
virtual void ComputeLatency(SUnit *SU);
diff --git a/src/LLVM/lib/CodeGen/ScheduleDAGPrinter.cpp b/src/LLVM/lib/CodeGen/ScheduleDAGPrinter.cpp
index 027f615..4b55a22 100644
--- a/src/LLVM/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/src/LLVM/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -51,7 +51,8 @@
/// If you want to override the dot attributes printed for a particular
/// edge, override this method.
static std::string getEdgeAttributes(const SUnit *Node,
- SUnitIterator EI) {
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
if (EI.isArtificialDep())
return "color=cyan,style=dashed";
if (EI.isCtrlDep())
diff --git a/src/LLVM/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/src/LLVM/lib/CodeGen/ScoreboardHazardRecognizer.cpp
new file mode 100644
index 0000000..b80c01e
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -0,0 +1,242 @@
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *SchedDAG,
+ const char *ParentDebugType) :
+ ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+ IssueCount(0) {
+
+#ifndef NDEBUG
+ DebugType = ParentDebugType;
+#endif
+
+ // Determine the maximum depth of any itinerary. This determines the
+ // depth of the scoreboard. We always make the scoreboard at least 1
+ // cycle deep to avoid dealing with the boundary condition.
+ unsigned ScoreboardDepth = 1;
+ if (ItinData && !ItinData->isEmpty()) {
+ IssueWidth = ItinData->IssueWidth;
+
+ for (unsigned idx = 0; ; ++idx) {
+ if (ItinData->isEndMarker(idx))
+ break;
+
+ const InstrStage *IS = ItinData->beginStage(idx);
+ const InstrStage *E = ItinData->endStage(idx);
+ unsigned CurCycle = 0;
+ unsigned ItinDepth = 0;
+ for (; IS != E; ++IS) {
+ unsigned StageDepth = CurCycle + IS->getCycles();
+ if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+ CurCycle += IS->getNextCycles();
+ }
+
+ // Find the next power-of-2 >= ItinDepth
+ while (ItinDepth > ScoreboardDepth) {
+ ScoreboardDepth *= 2;
+ }
+ }
+ MaxLookAhead = ScoreboardDepth;
+ }
+
+ ReservedScoreboard.reset(ScoreboardDepth);
+ RequiredScoreboard.reset(ScoreboardDepth);
+
+ DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+ << ScoreboardDepth << '\n');
+}
+
+void ScoreboardHazardRecognizer::Reset() {
+ IssueCount = 0;
+ RequiredScoreboard.reset();
+ ReservedScoreboard.reset();
+}
+
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+ dbgs() << "Scoreboard:\n";
+
+ unsigned last = Depth - 1;
+ while ((last > 0) && ((*this)[last] == 0))
+ last--;
+
+ for (unsigned i = 0; i <= last; i++) {
+ unsigned FUs = (*this)[i];
+ dbgs() << "\t";
+ for (int j = 31; j >= 0; j--)
+ dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+ dbgs() << '\n';
+ }
+}
+
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+ if (IssueWidth == 0)
+ return false;
+
+ return IssueCount == IssueWidth;
+}
+
+ScheduleHazardRecognizer::HazardType
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (!ItinData || ItinData->isEmpty())
+ return NoHazard;
+
+ // Note that stalls will be negative for bottom-up scheduling.
+ int cycle = Stalls;
+
+ // Use the itinerary for the underlying instruction to check for
+ // free FU's in the scoreboard at the appropriate future cycles.
+
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (MCID == NULL) {
+ // Don't check hazards for non-machineinstr Nodes.
+ return NoHazard;
+ }
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must find one of the stage's units free for every cycle the
+ // stage is occupied. FIXME it would be more accurate to find the
+ // same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ int StageCycle = cycle + (int)i;
+ if (StageCycle < 0)
+ continue;
+
+ if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+ assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+ "Scoreboard depth exceeded!");
+ // This stage was stalled beyond pipeline depth, so cannot conflict.
+ break;
+ }
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ default:
+ assert(0 && "Invalid FU reservation");
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[StageCycle];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[StageCycle];
+ break;
+ }
+
+ if (!freeUnits) {
+ DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+ return Hazard;
+ }
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ return NoHazard;
+}
+
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (!ItinData || ItinData->isEmpty())
+ return;
+
+ // Use the itinerary for the underlying instruction to reserve FU's
+ // in the scoreboard at the appropriate future cycles.
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ assert(MCID && "The scheduler must filter non-machineinstrs");
+ if (DAG->TII->isZeroCost(MCID->Opcode))
+ return;
+
+ ++IssueCount;
+
+ unsigned cycle = 0;
+
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must reserve one of the stage's units for every cycle the
+ // stage is occupied. FIXME it would be more accurate to reserve
+ // the same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ default:
+ assert(0 && "Invalid FU reservation");
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[cycle + i];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[cycle + i];
+ break;
+ }
+
+ // reduce to a single unit
+ unsigned freeUnit = 0;
+ do {
+ freeUnit = freeUnits;
+ freeUnits = freeUnit & (freeUnit - 1);
+ } while (freeUnits);
+
+ if (IS->getReservationKind() == InstrStage::Required)
+ RequiredScoreboard[cycle + i] |= freeUnit;
+ else
+ ReservedScoreboard[cycle + i] |= freeUnit;
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ DEBUG(ReservedScoreboard.dump());
+ DEBUG(RequiredScoreboard.dump());
+}
+
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+ RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
+}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+ ReservedScoreboard.recede();
+ RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+ RequiredScoreboard.recede();
+}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7ffbf8d..7b87868 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -43,6 +42,7 @@
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
namespace {
static cl::opt<bool>
@@ -138,6 +138,10 @@
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
+ void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType);
+
/// combine - call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
@@ -165,6 +169,8 @@
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitSMULO(SDNode *N);
+ SDValue visitUMULO(SDNode *N);
SDValue visitSDIVREM(SDNode *N);
SDValue visitUDIVREM(SDNode *N);
SDValue visitAND(SDNode *N);
@@ -185,7 +191,7 @@
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
- SDValue visitBIT_CONVERT(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
@@ -210,6 +216,7 @@
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitMEMBARRIER(SDNode *N);
@@ -229,12 +236,16 @@
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
- SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildUDIV(SDNode *N);
+ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits = true);
+ SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -248,16 +259,19 @@
bool isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2,
const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2) const;
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const;
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
const Value *&SrcValue, int &SrcValueOffset,
- unsigned &SrcValueAlignment) const;
+ unsigned &SrcValueAlignment,
+ const MDNode *&TBAAInfo) const;
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
/// looking for a better chain (aliasing node.)
@@ -270,15 +284,15 @@
/// Run - runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
-
+
SelectionDAG &getDAG() const { return DAG; }
-
+
/// getShiftAmountTy - Returns a type large enough to hold any valid
/// shift amount - before type legalization these can be huge.
- EVT getShiftAmountTy() {
- return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
+ EVT getShiftAmountTy(EVT LHSTy) {
+ return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy();
}
-
+
/// isTypeLegal - This method returns true if we are running before type
/// legalization or if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
@@ -315,6 +329,10 @@
((DAGCombiner*)DC)->AddToWorkList(N);
}
+void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->removeFromWorkList(N);
+}
+
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
@@ -521,7 +539,8 @@
cast<ConstantSDNode>(N0.getOperand(1)),
cast<ConstantSDNode>(N1));
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
- } else if (N0.hasOneUse()) {
+ }
+ if (N0.hasOneUse()) {
// reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
N0.getOperand(0), N1);
@@ -538,7 +557,8 @@
cast<ConstantSDNode>(N1.getOperand(1)),
cast<ConstantSDNode>(N0));
return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
- } else if (N1.hasOneUse()) {
+ }
+ if (N1.hasOneUse()) {
// reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
N1.getOperand(0), N0);
@@ -631,7 +651,7 @@
// Replace the old value with the new one.
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.2 ";
+ DEBUG(dbgs() << "\nReplacing.2 ";
TLO.Old.getNode()->dump(&DAG);
dbgs() << "\nWith: ";
TLO.New.getNode()->dump(&DAG);
@@ -666,12 +686,13 @@
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
- return DAG.getExtLoad(ExtType, PVT, dl,
+ return DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
}
@@ -691,7 +712,7 @@
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, PVT, Op);
- }
+ }
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
@@ -889,11 +910,12 @@
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
- SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl,
+ SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
@@ -975,11 +997,14 @@
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DEBUG(dbgs() << "\nReplacing.3 ";
+ DEBUG(dbgs() << "\nReplacing.3 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
RV.getNode()->dump(&DAG);
dbgs() << '\n');
+
+ // Transfer debug value.
+ DAG.TransferDbgValues(SDValue(N, 0), RV);
WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
@@ -1035,6 +1060,8 @@
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SMULO: return visitSMULO(N);
+ case ISD::UMULO: return visitUMULO(N);
case ISD::SDIVREM: return visitSDIVREM(N);
case ISD::UDIVREM: return visitUDIVREM(N);
case ISD::AND: return visitAND(N);
@@ -1054,7 +1081,7 @@
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
- case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
+ case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
@@ -1079,6 +1106,7 @@
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::MEMBARRIER: return visitMEMBARRIER(N);
}
@@ -1225,7 +1253,7 @@
}
}
}
-
+
SDValue Result;
// If we've change things around then replace token factor.
@@ -1424,6 +1452,29 @@
N0.getOperand(0).getOperand(1),
N0.getOperand(1)));
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue AndOp0 = N1.getOperand(0);
+ ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+ unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+ // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+ // and similar xforms where the inner op is either ~0 or 0.
+ if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+ DebugLoc DL = N->getDebugLoc();
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ }
+ }
+
+ // add (sext i1), X -> sub X, (zext i1)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueType() == MVT::i1 &&
+ !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
return SDValue();
}
@@ -1438,7 +1489,7 @@
if (N->hasNUsesOfValue(0, 1))
return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
// canonicalize constant to RHS.
if (N0C && !N1C)
@@ -1447,7 +1498,7 @@
// fold (addc x, 0) -> x + no carry out
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
@@ -1464,7 +1515,7 @@
(LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
}
return SDValue();
@@ -1489,11 +1540,30 @@
return SDValue();
}
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ }
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+ &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
// fold vector ops
@@ -1503,8 +1573,9 @@
}
// fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
- return DAG.getConstant(0, N->getValueType(0));
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c1-c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
@@ -1515,12 +1586,21 @@
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
if (N0C && N0C->isAllOnesValue())
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
// fold (A+B)-B -> A
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
+ // fold C2-(A+C1) -> (C2-C1)-A
+ if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+ SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
+ N1.getOperand(0));
+ }
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
@@ -1598,7 +1678,7 @@
if (N1C && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(),
- getShiftAmountTy()));
+ getShiftAmountTy(N0.getValueType())));
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
@@ -1607,7 +1687,8 @@
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
DAG.getConstant(0, VT),
DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
- DAG.getConstant(Log2Val, getShiftAmountTy())));
+ DAG.getConstant(Log2Val,
+ getShiftAmountTy(N0.getValueType()))));
}
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
if (N1C && N0.getOpcode() == ISD::SHL &&
@@ -1704,18 +1785,18 @@
// Splat the sign bit into the register
SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
DAG.getConstant(VT.getSizeInBits()-1,
- getShiftAmountTy()));
+ getShiftAmountTy(N0.getValueType())));
AddToWorkList(SGN.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
DAG.getConstant(VT.getSizeInBits() - lg2,
- getShiftAmountTy()));
+ getShiftAmountTy(SGN.getValueType())));
SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
AddToWorkList(SRL.getNode());
AddToWorkList(ADD.getNode()); // Divide by pow2
SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
- DAG.getConstant(lg2, getShiftAmountTy()));
+ DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
@@ -1765,7 +1846,7 @@
if (N1C && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(),
- getShiftAmountTy()));
+ getShiftAmountTy(N0.getValueType())));
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
@@ -1897,6 +1978,7 @@
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
// fold (mulhs x, 0) -> 0
if (N1C && N1C->isNullValue())
@@ -1905,11 +1987,27 @@
if (N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
- getShiftAmountTy()));
+ getShiftAmountTy(N0.getValueType())));
// fold (mulhs x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
return SDValue();
}
@@ -1918,6 +2016,7 @@
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
// fold (mulhu x, 0) -> 0
if (N1C && N1C->isNullValue())
@@ -1929,6 +2028,22 @@
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
return SDValue();
}
@@ -1992,6 +2107,29 @@
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
if (Res.getNode()) return Res;
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
return SDValue();
}
@@ -1999,6 +2137,49 @@
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
if (Res.getNode()) return Res;
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMULO(SDNode *N) {
+ // (smulo x, 2) -> (saddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMULO(SDNode *N) {
+ // (umulo x, 2) -> (uaddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
return SDValue();
}
@@ -2116,7 +2297,7 @@
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
- Mask.trunc(N0Op0.getValueSizeInBits());
+ Mask = Mask.trunc(N0Op0.getValueSizeInBits());
if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
N0.getValueType(), N0Op0);
@@ -2198,10 +2379,9 @@
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getPointerInfo(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
@@ -2221,10 +2401,10 @@
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
@@ -2253,18 +2433,18 @@
if (ExtVT == LoadedVT &&
(!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-
- SDValue NewLoad =
- DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-
+
// Do not change the width of a volatile load.
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
@@ -2288,12 +2468,12 @@
}
AddToWorkList(NewPtr.getNode());
-
+
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
- DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
Alignment);
AddToWorkList(N);
@@ -2307,6 +2487,244 @@
return SDValue();
}
+/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
+///
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+ bool LookPassAnd0 = false;
+ bool LookPassAnd1 = false;
+ if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+ std::swap(N0, N1);
+ if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() == ISD::AND) {
+ if (!N0.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01C || N01C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N0 = N0.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ if (N1.getOpcode() == ISD::AND) {
+ if (!N1.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C || N11C->getZExtValue() != 0xFF)
+ return SDValue();
+ N1 = N1.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0.getNode()->hasOneUse() ||
+ !N1.getNode()->hasOneUse())
+ return SDValue();
+
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N01C || !N11C)
+ return SDValue();
+ if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+ return SDValue();
+
+ // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+ SDValue N00 = N0->getOperand(0);
+ if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+ if (!N00.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+ if (!N001C || N001C->getZExtValue() != 0xFF)
+ return SDValue();
+ N00 = N00.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ SDValue N10 = N1->getOperand(0);
+ if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+ if (!N10.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+ if (!N101C || N101C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N10 = N10.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N00 != N10)
+ return SDValue();
+
+ // Make sure everything beyond the low halfword is zero since the SRL 16
+ // will clear the top bits.
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ if (DemandHighBits && OpSizeInBits > 16 &&
+ (!LookPassAnd0 || !LookPassAnd1) &&
+ !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
+ return SDValue();
+
+ SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00);
+ if (OpSizeInBits > 16)
+ Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res,
+ DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+ return Res;
+}
+
+/// isBSwapHWordElement - Return true if the specified node is an element
+/// that makes up a 32-bit packed halfword byteswap. i.e.
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+ if (!N.getNode()->hasOneUse())
+ return false;
+
+ unsigned Opc = N.getOpcode();
+ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+ return false;
+
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!N1C)
+ return false;
+
+ unsigned Num;
+ switch (N1C->getZExtValue()) {
+ default:
+ return false;
+ case 0xFF: Num = 0; break;
+ case 0xFF00: Num = 1; break;
+ case 0xFF0000: Num = 2; break;
+ case 0xFF000000: Num = 3; break;
+ }
+
+ // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+ SDValue N0 = N.getOperand(0);
+ if (Opc == ISD::AND) {
+ if (Num == 0 || Num == 2) {
+ // (x >> 8) & 0xff
+ // (x >> 8) & 0xff0000
+ if (N0.getOpcode() != ISD::SRL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else {
+ // (x << 8) & 0xff00
+ // (x << 8) & 0xff000000
+ if (N0.getOpcode() != ISD::SHL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+ } else if (Opc == ISD::SHL) {
+ // (x & 0xff) << 8
+ // (x & 0xff0000) << 8
+ if (Num != 0 && Num != 2)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else { // Opc == ISD::SRL
+ // (x & 0xff00) >> 8
+ // (x & 0xff000000) >> 8
+ if (Num != 1 && Num != 3)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+
+ if (Parts[Num])
+ return false;
+
+ Parts[Num] = N0.getOperand(0).getNode();
+ return true;
+}
+
+/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+ // Look for either
+ // (or (or (and), (and)), (or (and), (and)))
+ // (or (or (or (and), (and)), (and)), (and))
+ if (N0.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (N1.getOpcode() == ISD::OR) {
+ // (or (or (and), (and)), (or (and), (and)))
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ SDValue N010 = N01.getOperand(0);
+ if (!isBSwapHWordElement(N010, Parts))
+ return SDValue();
+ SDValue N011 = N01.getOperand(1);
+ if (!isBSwapHWordElement(N011, Parts))
+ return SDValue();
+ } else {
+ // (or (or (or (and), (and)), (and)), (and))
+ if (!isBSwapHWordElement(N1, Parts))
+ return SDValue();
+ if (!isBSwapHWordElement(N01, Parts))
+ return SDValue();
+ if (N00.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ }
+
+ // Make sure the parts are all coming from the same node.
+ if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+ return SDValue();
+
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT,
+ SDValue(Parts[0],0));
+
+ // Result of the bswap should be rotated by 16. If it's not legal, than
+ // do (x << 16) | (x >> 16).
+ SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+ return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
+ else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
+ DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt));
+}
+
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2342,6 +2760,15 @@
// fold (or x, c) -> c iff (x & ~c) == 0
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
+
+ // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+ SDValue BSwap = MatchBSwapHWord(N, N0, N1);
+ if (BSwap.getNode() != 0)
+ return BSwap;
+ BSwap = MatchBSwapHWordLow(N, N0, N1);
+ if (BSwap.getNode() != 0)
+ return BSwap;
+
// reassociate or
SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
if (ROR.getNode() != 0)
@@ -2722,17 +3149,8 @@
N01C->getAPIntValue(), VT));
}
// fold (xor x, x) -> 0
- if (N0 == N1) {
- if (!VT.isVector()) {
- return DAG.getConstant(0, VT);
- } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
- // Produce a vector of zeros.
- SDValue El = DAG.getConstant(0, VT.getVectorElementType());
- std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &Ops[0], Ops.size());
- }
- }
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
@@ -2810,7 +3228,8 @@
LHS->getOperand(1), N->getOperand(1));
// Create the new shift.
- SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+ SDValue NewShift = DAG.getNode(N->getOpcode(),
+ LHS->getOperand(0).getDebugLoc(),
VT, LHS->getOperand(0), N->getOperand(1));
// Create the new binop.
@@ -2837,6 +3256,9 @@
// fold (shl x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+ // fold (shl undef, x) -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -2850,7 +3272,7 @@
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
DAG.getNode(ISD::TRUNCATE,
@@ -2868,31 +3290,58 @@
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 > OpSizeInBits)
+ if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
- // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
- // (srl (and x, (shl -1, c1)), (sub c1, c2))
+
+ // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+ DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+ N0.getOperand(0)->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ }
+
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
if (N1C && N0.getOpcode() == ISD::SRL &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {
uint64_t c2 = N1C->getZExtValue();
- SDValue HiBitsMask =
- DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
- VT.getSizeInBits() - c1),
- VT);
- SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
- N0.getOperand(0),
- HiBitsMask);
- if (c2 > c1)
- return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
- DAG.getConstant(c2-c1, N1.getValueType()));
- else
- return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
- DAG.getConstant(c1-c2, N1.getValueType()));
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask = Mask.shl(c2-c1);
+ Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ } else {
+ Mask = Mask.lshr(c1-c2);
+ Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
+ return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift,
+ DAG.getConstant(Mask, VT));
}
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
@@ -2973,7 +3422,8 @@
if (N01C && N1C) {
// Determine what the truncate's result bitsize and type would be.
EVT TruncVT =
- EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
+ EVT::getIntegerVT(*DAG.getContext(),
+ OpSizeInBits - N1C->getZExtValue());
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
@@ -2986,7 +3436,8 @@
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
TLI.isTruncateFree(VT, TruncVT)) {
- SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy());
+ SDValue Amt = DAG.getConstant(ShiftAmt,
+ getShiftAmountTy(N0.getOperand(0).getValueType()));
SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
N0.getOperand(0), Amt);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
@@ -3006,7 +3457,7 @@
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
@@ -3017,6 +3468,29 @@
}
}
+ // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ // if c1 is equal to the number of bits the trunc removes
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() &&
+ N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+ EVT LargeVT = N0.getOperand(0).getValueType();
+ ConstantSDNode *LargeShiftAmt =
+ cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+ if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+ LargeShiftAmt->getZExtValue()) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+ getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+ N0.getOperand(0).getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+ }
+ }
+
// Simplify, based on bits shifted out of the LHS.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -3065,12 +3539,33 @@
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 > OpSizeInBits)
+ if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
-
+
+ // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+ DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
+ N0.getOperand(0)->getOperand(0),
+ DAG.getConstant(c1 + c2, ShiftCountVT)));
+ }
+ }
+
// fold (srl (shl x, c), c) -> (and x, cst2)
if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
N0.getValueSizeInBits() <= 64) {
@@ -3078,7 +3573,7 @@
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(~0ULL >> ShAmt, VT));
}
-
+
// fold (srl (anyextend x), c) -> (anyextend (srl x, c))
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -3088,8 +3583,10 @@
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+ uint64_t ShiftAmt = N1C->getZExtValue();
SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
- N0.getOperand(0), N1);
+ N0.getOperand(0),
+ DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
AddToWorkList(SmallShift.getNode());
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
}
@@ -3129,7 +3626,7 @@
if (ShAmt) {
Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
- DAG.getConstant(ShAmt, getShiftAmountTy()));
+ DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
AddToWorkList(Op.getNode());
}
@@ -3147,7 +3644,7 @@
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
@@ -3182,7 +3679,7 @@
// brcond i32 %c ...
//
// into
- //
+ //
// %a = ...
// %b = and %a, 2
// %c = setcc eq %b, 0
@@ -3262,7 +3759,7 @@
if (VT.isInteger() &&
(VT0 == MVT::i1 ||
(VT0.isInteger() &&
- TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
+ TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) &&
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
SDValue XORNode;
if (VT == VT0)
@@ -3422,12 +3919,34 @@
}
if (BothLiveOut)
// Both unextended and extended values are live out. There had better be
- // good a reason for the transformation.
+ // a good reason for the transformation.
return ExtendNodes.size();
}
return true;
}
+void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType) {
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -3494,7 +4013,10 @@
}
// fold (sext (load x)) -> (sext (truncate (sextload x)))
- if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ // None of the supported targets knows how to perform load and sign extend
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
bool DoXform = true;
@@ -3503,10 +4025,9 @@
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3514,27 +4035,8 @@
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
- // Extend SetCC uses if necessary.
- for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
- SDNode *SetCC = SetCCs[i];
- SmallVector<SDValue, 4> Ops;
-
- for (unsigned j = 0; j != 2; ++j) {
- SDValue SOp = SetCC->getOperand(j);
- if (SOp == Trunc)
- Ops.push_back(ExtLoad);
- else
- Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
- N->getDebugLoc(), VT, SOp));
- }
-
- Ops.push_back(SetCC->getOperand(2));
- CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
- SetCC->getValueType(0),
- &Ops[0], Ops.size()));
- }
-
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3547,10 +4049,10 @@
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -3562,6 +4064,45 @@
}
}
+ // fold (sext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (sextload x), (sext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.sext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
if (N0.getOpcode() == ISD::SETCC) {
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
@@ -3573,7 +4114,7 @@
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N0VT.getSizeInBits())
- return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
@@ -3587,7 +4128,7 @@
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
@@ -3611,7 +4152,7 @@
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
NegOne, DAG.getConstant(0, VT));
- }
+ }
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3645,13 +4186,27 @@
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
- return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3677,13 +4232,16 @@
X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask.zext(VT.getSizeInBits());
+ Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
X, DAG.getConstant(Mask, VT));
}
// fold (zext (load x)) -> (zext (truncate (zextload x)))
- if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ // None of the supported targets knows how to perform load and vector_zext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
bool DoXform = true;
@@ -3692,10 +4250,9 @@
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3704,30 +4261,51 @@
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
- // Extend SetCC uses if necessary.
- for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
- SDNode *SetCC = SetCCs[i];
- SmallVector<SDValue, 4> Ops;
-
- for (unsigned j = 0; j != 2; ++j) {
- SDValue SOp = SetCC->getOperand(j);
- if (SOp == Trunc)
- Ops.push_back(ExtLoad);
- else
- Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
- N->getDebugLoc(), VT, SOp));
- }
-
- Ops.push_back(SetCC->getOperand(2));
- CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
- SetCC->getValueType(0),
- &Ops[0], Ops.size()));
- }
-
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+ // fold (zext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (zextload x), (zext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
// fold (zext (zextload x)) -> (zext (truncate (zextload x)))
// fold (zext ( extload x)) -> (zext (truncate (zextload x)))
if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
@@ -3736,10 +4314,10 @@
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -3759,37 +4337,36 @@
EVT EltVT = VT.getVectorElementType();
SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
DAG.getConstant(1, EltVT));
- if (VT.getSizeInBits() == N0VT.getSizeInBits()) {
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
// for that matter). Check to see that they are the same size. If so,
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
- DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
&OneOps[0], OneOps.size()));
- } else {
- // If the desired elements are smaller or larger than the source
- // elements we can use a matching integer vector type and then
- // truncate/sign extend
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
- DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
- DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &OneOps[0], OneOps.size()));
- }
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
}
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -3805,21 +4382,27 @@
isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.hasOneUse()) {
+ SDValue ShAmt = N0.getOperand(1);
+ unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
if (N0.getOpcode() == ISD::SHL) {
+ SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
- unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
- N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
- if (ShAmt > KnownZeroBits)
+ unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+ InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ if (ShAmtVal > KnownZeroBits)
return SDValue();
}
- DebugLoc dl = N->getDebugLoc();
- return DAG.getNode(N0.getOpcode(), dl, VT,
- DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
- DAG.getNode(ISD::ZERO_EXTEND, dl,
- N0.getOperand(1).getValueType(),
- N0.getOperand(1)));
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (VT.getSizeInBits() >= 256)
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+ ShAmt);
}
return SDValue();
@@ -3851,7 +4434,7 @@
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
- return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3879,13 +4462,16 @@
X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask.zext(VT.getSizeInBits());
+ Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
X, DAG.getConstant(Mask, VT));
}
// fold (aext (load x)) -> (aext (truncate (extload x)))
- if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ // None of the supported targets knows how to perform load and any_ext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
bool DoXform = true;
@@ -3894,10 +4480,9 @@
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3905,27 +4490,8 @@
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
- // Extend SetCC uses if necessary.
- for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
- SDNode *SetCC = SetCCs[i];
- SmallVector<SDValue, 4> Ops;
-
- for (unsigned j = 0; j != 2; ++j) {
- SDValue SOp = SetCC->getOperand(j);
- if (SOp == Trunc)
- Ops.push_back(ExtLoad);
- else
- Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
- N->getDebugLoc(), VT, SOp));
- }
-
- Ops.push_back(SetCC->getOperand(2));
- CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
- SetCC->getValueType(0),
- &Ops[0], Ops.size()));
- }
-
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ANY_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3938,11 +4504,9 @@
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
- N->getDebugLoc(),
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -3964,7 +4528,7 @@
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N0VT.getSizeInBits())
- return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
@@ -3978,7 +4542,7 @@
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
@@ -4053,11 +4617,8 @@
if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
- return SDValue();
} else if (Opc == ISD::SRL) {
- // Annother special-case: SRL is basically zero-extending a narrower
- // value.
+ // Another special-case: SRL is basically zero-extending a narrower value.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -4065,10 +4626,18 @@
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() - N01->getZExtValue());
}
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+ return SDValue();
unsigned EVTBits = ExtVT.getSizeInBits();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return SDValue();
+
unsigned ShAmt = 0;
- if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShAmt = N01->getZExtValue();
// Is the shift amount a multiple of size of VT?
@@ -4078,52 +4647,88 @@
if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
return SDValue();
}
+
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ // If the load was a sextload then the result is a splat of the sign bit
+ // of the extended byte. This is not worth optimizing for.
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ return SDValue();
}
}
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
- cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
- // Do not change the width of a volatile load.
- !cast<LoadSDNode>(N0)->isVolatile()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT PtrType = N0.getOperand(1).getValueType();
-
- // For big endian targets, we need to adjust the offset to the pointer to
- // load the correct bytes.
- if (TLI.isBigEndian()) {
- unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
- unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
- ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ // If the load is shifted left (and the result isn't shifted back right),
+ // we can fold the truncate through the shift.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
}
-
- uint64_t PtrOff = ShAmt / 8;
- unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
- SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
- PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, PtrType));
- AddToWorkList(NewPtr.getNode());
-
- SDValue Load = (ExtType == ISD::NON_EXTLOAD)
- ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
- : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- NewAlign);
-
- // Replace the old load's chain with the new load's chain.
- WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
- &DeadNodes);
-
- // Return the new loaded value.
- return Load;
}
- return SDValue();
+ // If we haven't found a load, we can't narrow it. Don't transform one with
+ // multiple uses, this would require adding a new load.
+ if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
+ // Don't change the width of a volatile load.
+ cast<LoadSDNode>(N0)->isVolatile())
+ return SDValue();
+
+ // Verify that we are actually reducing a load width here.
+ if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+ else
+ Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+ &DeadNodes);
+
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy(Result.getValueType());
+ if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+ Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+ }
+
+ // Return the new loaded value.
+ return Result;
}
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
@@ -4196,10 +4801,10 @@
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4213,16 +4818,26 @@
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+
+ // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+ if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+ SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false);
+ if (BSwap.getNode() != 0)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ BSwap, N1);
+ }
+
return SDValue();
}
@@ -4257,14 +4872,17 @@
}
// See if we can simplify the input to this truncate through knowledge that
- // only the low bits are being used. For example "trunc (or (shl x, 8), y)"
- // -> trunc y
- SDValue Shorter =
- GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
- VT.getSizeInBits()));
- if (Shorter.getNode())
- return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
-
+ // only the low bits are being used.
+ // For example "trunc (or (shl x, 8), y)" // -> trunc y
+ // Currently we only perform this optimization on scalars because vectors
+ // may have different active low bits.
+ if (!VT.isVector()) {
+ SDValue Shorter =
+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (Shorter.getNode())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+ }
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
@@ -4295,7 +4913,9 @@
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ LD1->getPointerInfo().getAddrSpace() !=
+ LD2->getPointerInfo().getAddrSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
@@ -4313,14 +4933,14 @@
if (NewAlign <= Align &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
- LD1->getBasePtr(), LD1->getSrcValue(),
- LD1->getSrcValueOffset(), false, false, Align);
+ LD1->getBasePtr(), LD1->getPointerInfo(),
+ false, false, Align);
}
return SDValue();
}
-SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4344,12 +4964,12 @@
assert(!DestEltVT.isVector() &&
"Element type of vector ValueType must not be vector!");
if (isSimple)
- return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
}
// If the input is a constant, let getNode fold it.
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
- SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+ SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
if (Res.getNode() != N) {
if (!LegalOperations ||
TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
@@ -4365,8 +4985,8 @@
}
// (conv (conv x, t1), t2) -> (conv x, t2)
- if (N0.getOpcode() == ISD::BIT_CONVERT)
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
N0.getOperand(0));
// fold (conv (load x)) -> (load (conv*)x)
@@ -4382,13 +5002,12 @@
if (Align <= OrigAlign) {
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
- LN0->getBasePtr(),
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
OrigAlign);
AddToWorkList(N);
CombineTo(N0.getNode(),
- DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
N0.getValueType(), Load),
Load.getValue(1));
return Load;
@@ -4400,7 +5019,7 @@
// This often reduces constant pool loads.
if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
- SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+ SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
N0.getOperand(0));
AddToWorkList(NewConv.getNode());
@@ -4423,7 +5042,7 @@
unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
- SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
IntXVT, N0.getOperand(1));
AddToWorkList(X.getNode());
@@ -4448,7 +5067,7 @@
X, DAG.getConstant(SignBit, VT));
AddToWorkList(X.getNode());
- SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
Cst, DAG.getConstant(~SignBit, VT));
@@ -4473,11 +5092,11 @@
return CombineConsecutiveLoads(N, VT);
}
-/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
/// destination element value type.
SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
// If this is already the right type, we're done.
@@ -4495,10 +5114,10 @@
// Due to the FP element handling below calling this routine recursively,
// we can end up with a scalar-to-vector node here.
if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
- DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
DstEltVT, BV->getOperand(0)));
-
+
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDValue Op = BV->getOperand(i);
@@ -4506,7 +5125,7 @@
// are promoted and implicitly truncated. Make that explicit here.
if (Op.getValueType() != SrcEltVT)
Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
- Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
DstEltVT, Op));
AddToWorkList(Ops.back().getNode());
}
@@ -4522,7 +5141,7 @@
// same sizes.
assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
- BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
SrcEltVT = IntVT;
}
@@ -4531,10 +5150,10 @@
if (DstEltVT.isFloatingPoint()) {
assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
- SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
// Next, convert to FP elements of the same size.
- return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
// Okay, we know the src/dst types are both integers of differing types.
@@ -4556,7 +5175,7 @@
if (Op.getOpcode() == ISD::UNDEF) continue;
EltIsUndef = false;
- NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+ NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
zextOrTrunc(SrcBitSize).zext(DstBitSize);
}
@@ -4586,13 +5205,13 @@
continue;
}
- APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
- getAPIntValue()).zextOrTrunc(SrcBitSize);
+ APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue().zextOrTrunc(SrcBitSize);
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
- APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+ APInt ThisVal = OpVal.trunc(DstBitSize);
Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
- if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+ if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
// Simply turn this into a SCALAR_TO_VECTOR of the new type.
return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
Ops[0]);
@@ -4842,7 +5461,10 @@
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
- if (N0C && OpVT != MVT::ppcf128)
+ if (N0C && OpVT != MVT::ppcf128 &&
+ // ...but only if the target supports immediate floating-point values
+ (Level == llvm::Unrestricted ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
// If the input is a legal type, and SINT_TO_FP is not legal on this target,
@@ -4864,7 +5486,10 @@
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
- if (N0C && OpVT != MVT::ppcf128)
+ if (N0C && OpVT != MVT::ppcf128 &&
+ // ...but only if the target supports immediate floating-point values
+ (Level == llvm::Unrestricted ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
// If the input is a legal type, and UINT_TO_FP is not legal on this target,
@@ -4984,10 +5609,9 @@
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -5011,7 +5635,7 @@
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BIT_CONVERT &&
+ if (N0.getOpcode() == ISD::BITCAST &&
!VT.isVector() &&
N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger()) {
@@ -5021,7 +5645,7 @@
Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
VT, Int);
}
}
@@ -5047,7 +5671,7 @@
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+ if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
@@ -5056,7 +5680,7 @@
Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
N->getValueType(0), Int);
}
}
@@ -5084,14 +5708,17 @@
N1.getOperand(0), N1.getOperand(1), N2);
}
- SDNode *Trunc = 0;
- if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
- // Look past truncate.
- Trunc = N1.getNode();
- N1 = N1.getOperand(0);
- }
+ if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+ ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+ (N1.getOperand(0).hasOneUse() &&
+ N1.getOperand(0).getOpcode() == ISD::SRL))) {
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE) {
+ // Look pass the truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
- if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
// Match this pattern so that we can generate simpler code:
//
// %a = ...
@@ -5100,7 +5727,7 @@
// brcond i32 %c ...
//
// into
- //
+ //
// %a = ...
// %b = and i32 %a, 2
// %c = setcc eq %b, 0
@@ -5146,8 +5773,12 @@
}
}
}
+
+ if (Trunc)
+ // Restore N1 if the above transformation doesn't match.
+ N1 = N->getOperand(1);
}
-
+
// Transform br(xor(x, y)) -> br(x != y)
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
@@ -5181,9 +5812,7 @@
Equal = true;
}
- SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
-
- EVT SetCCVT = NodeToReplace.getValueType();
+ EVT SetCCVT = N1.getValueType();
if (LegalTypes)
SetCCVT = TLI.getSetCCResultType(SetCCVT);
SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
@@ -5192,9 +5821,9 @@
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
- removeFromWorkList(NodeToReplace.getNode());
- DAG.DeleteNode(NodeToReplace.getNode());
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
MVT::Other, Chain, SetCC, N2);
}
@@ -5304,12 +5933,17 @@
// Now check for #3 and #4.
bool RealUse = false;
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
E = Ptr.getNode()->use_end(); I != E; ++I) {
SDNode *Use = *I;
if (Use == N)
continue;
- if (Use->isPredecessorOf(N))
+ if (N->hasPredecessorHelper(Use, Visited, Worklist))
return false;
if (!((Use->getOpcode() == ISD::LOAD &&
@@ -5554,8 +6188,7 @@
// value.
// TODO: Handle store large -> read small portion.
// TODO: Handle TRUNCSTORE/LOADEXT
- if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
- !LD->isVolatile()) {
+ if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
if (ISD::isNON_TRUNCStore(Chain.getNode())) {
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
if (PrevST->getBasePtr() == Ptr &&
@@ -5568,10 +6201,10 @@
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment())
- return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
- N->getDebugLoc(),
- Chain, Ptr, LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->getMemoryVT(),
+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), Align);
}
}
@@ -5587,15 +6220,13 @@
// Replace the chain to void dependency.
if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
- BetterChain, Ptr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ BetterChain, Ptr, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
} else {
- ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
- LD->getDebugLoc(),
- BetterChain, Ptr, LD->getSrcValue(),
- LD->getSrcValueOffset(),
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
LD->isVolatile(),
LD->isNonTemporal(),
@@ -5605,10 +6236,10 @@
// Create token factor to keep old chain connected.
SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
MVT::Other, Chain, ReplLoad.getValue(1));
-
+
// Make sure the new and old chains are cleaned up.
AddToWorkList(Token.getNode());
-
+
// Replace uses with load result and token factor. Don't add users
// to work list.
return CombineTo(N, ReplLoad.getValue(0), Token, false);
@@ -5628,17 +6259,17 @@
static std::pair<unsigned, unsigned>
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
std::pair<unsigned, unsigned> Result(0, 0);
-
+
// Check for the structure we're looking for.
if (V->getOpcode() != ISD::AND ||
!isa<ConstantSDNode>(V->getOperand(1)) ||
!ISD::isNormalLoad(V->getOperand(0).getNode()))
return Result;
-
+
// Check the chain and pointer.
LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
-
+
// The store should be chained directly to the load or be an operand of a
// tokenfactor.
if (LD == Chain.getNode())
@@ -5654,7 +6285,7 @@
}
if (!isOk) return Result;
}
-
+
// This only handles simple types.
if (V.getValueType() != MVT::i16 &&
V.getValueType() != MVT::i32 &&
@@ -5670,7 +6301,7 @@
unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
if (NotMaskLZ == 64) return Result; // All zero mask.
-
+
// See if we have a continuous run of bits. If so, we have 0*1+0*
if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
return Result;
@@ -5678,19 +6309,19 @@
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
if (V.getValueType() != MVT::i64 && NotMaskLZ)
NotMaskLZ -= 64-V.getValueSizeInBits();
-
+
unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
switch (MaskedBytes) {
- case 1:
- case 2:
+ case 1:
+ case 2:
case 4: break;
default: return Result; // All one mask, or 5-byte mask.
}
-
+
// Verify that the first bit starts at a multiple of mask so that the access
// is aligned the same as the access width.
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
-
+
Result.first = MaskedBytes;
Result.second = NotMaskTZ/8;
return Result;
@@ -5707,25 +6338,26 @@
unsigned NumBytes = MaskInfo.first;
unsigned ByteShift = MaskInfo.second;
SelectionDAG &DAG = DC->getDAG();
-
+
// Check to see if IVal is all zeros in the part being masked in by the 'or'
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
-
+
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
// legalization.
MVT VT = MVT::getIntegerVT(NumBytes*8);
if (!DC->isTypeLegal(VT))
return 0;
-
+
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
if (ByteShift)
IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
- DAG.getConstant(ByteShift*8, DC->getShiftAmountTy()));
+ DAG.getConstant(ByteShift*8,
+ DC->getShiftAmountTy(IVal.getValueType())));
// Figure out the offset for the store and the alignment of the access.
unsigned StOffset;
@@ -5735,20 +6367,20 @@
StOffset = ByteShift;
else
StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
-
+
SDValue Ptr = St->getBasePtr();
if (StOffset) {
Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
NewAlign = MinAlign(NewAlign, StOffset);
}
-
+
// Truncate down to the new size.
IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
-
+
++OpsNarrowed;
- return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
- St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+ return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
false, false, NewAlign).getNode();
}
@@ -5771,7 +6403,7 @@
return SDValue();
unsigned Opc = Value.getOpcode();
-
+
// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
// is a byte mask indicating a consecutive number of bytes, check to see if
// Y is known to provide just those bytes. If so, we try to replace the
@@ -5784,7 +6416,7 @@
if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(1), ST,this))
return SDValue(NewST, 0);
-
+
// Or is commutative, so try swapping X and Y.
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
if (MaskedLoad.first)
@@ -5792,15 +6424,18 @@
Value.getOperand(0), ST,this))
return SDValue(NewST, 0);
}
-
+
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
SDValue N0 = Value.getOperand(0);
- if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ Chain == SDValue(N0.getNode(), 1)) {
LoadSDNode *LD = cast<LoadSDNode>(N0);
- if (LD->getBasePtr() != Ptr)
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
return SDValue();
// Find the type to narrow it the load / op / store to.
@@ -5840,7 +6475,7 @@
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
- const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+ Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
return SDValue();
@@ -5849,14 +6484,14 @@
DAG.getConstant(PtrOff, Ptr.getValueType()));
SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
LD->getChain(), NewPtr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
NewAlign);
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
NewVal, NewPtr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo().getWithOffset(PtrOff),
false, false, NewAlign);
AddToWorkList(NewPtr.getNode());
@@ -5873,6 +6508,63 @@
return SDValue();
}
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ false, false, LDAlign);
+
+ SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+ NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ false, false, STAlign);
+
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewST.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -5881,7 +6573,7 @@
// If this is a store of a bit convert, store the input value if the
// resultant store does not need a higher alignment than the original.
- if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
unsigned OrigAlign = ST->getAlignment();
EVT SVT = Value.getOperand(0).getValueType();
@@ -5891,11 +6583,14 @@
((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), OrigAlign);
}
+ // Turn 'store undef, Ptr' -> nothing.
+ if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+ return Chain;
+
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
// NOTE: If the original store is volatile, this transform must not increase
@@ -5916,8 +6611,7 @@
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), MVT::i32);
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
}
break;
@@ -5928,11 +6622,12 @@
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), MVT::i64);
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
- } else if (!ST->isVolatile() &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
@@ -5941,23 +6636,20 @@
SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(),
+ Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
ST->getAlignment());
Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
- SVOffset += 4;
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
- Ptr, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
Alignment);
return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
St0, St1);
@@ -5973,12 +6665,17 @@
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align);
}
}
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ SDValue NewST = TransformFPLoadStorePair(N);
+ if (NewST.getNode())
+ return NewST;
+
if (CombinerAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5990,12 +6687,12 @@
// Replace the chain to avoid dependency.
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
- ST->getSrcValue(),ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->getMemoryVT(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
} else {
ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -6024,22 +6721,22 @@
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
SDValue Shorter =
GetDemandedBits(Value,
- APInt::getLowBitsSet(Value.getValueSizeInBits(),
- ST->getMemoryVT().getSizeInBits()));
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits()));
AddToWorkList(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value,
- APInt::getLowBitsSet(
- Value.getValueType().getScalarType().getSizeInBits(),
- ST->getMemoryVT().getScalarType().getSizeInBits())))
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits())))
return SDValue(N, 0);
}
@@ -6063,8 +6760,7 @@
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
ST->getMemoryVT())) {
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -6076,56 +6772,70 @@
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
SDValue EltNo = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
// If the inserted element is an UNDEF, just use the input vector.
if (InVal.getOpcode() == ISD::UNDEF)
return InVec;
- // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
- // vector with the inserted element.
- if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
- InVec.getNode()->op_end());
- if (Elt < Ops.size())
- Ops[Elt] = InVal;
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- InVec.getValueType(), &Ops[0], Ops.size());
- }
- // If the invec is an UNDEF and if EltNo is a constant, create a new
- // BUILD_VECTOR with undef elements and the inserted element.
- if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
- isa<ConstantSDNode>(EltNo)) {
- EVT VT = InVec.getValueType();
- EVT EltVT = VT.getVectorElementType();
- unsigned NElts = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
+ EVT VT = InVec.getValueType();
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- if (Elt < Ops.size())
- Ops[Elt] = InVal;
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- InVec.getValueType(), &Ops[0], Ops.size());
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
+ // Check that we know which element is being inserted
+ if (!isa<ConstantSDNode>(EltNo))
+ return SDValue();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
+ // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
+ // vector elements.
+ SmallVector<SDValue, 8> Ops;
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ Ops.append(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ } else if (InVec.getOpcode() == ISD::UNDEF) {
+ unsigned NElts = VT.getVectorNumElements();
+ Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
+ } else {
+ return SDValue();
}
- return SDValue();
+
+ // Insert the element
+ if (Elt < Ops.size()) {
+ // All the operands of BUILD_VECTOR must have the same type;
+ // we enforce that here.
+ EVT OpVT = Ops[0].getValueType();
+ if (InVal.getValueType() != OpVT)
+ InVal = OpVT.bitsGT(InVal.getValueType()) ?
+ DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
+ DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+ Ops[Elt] = InVal;
+ }
+
+ // Return the new vector
+ return DAG.getNode(ISD::BUILD_VECTOR, dl,
+ VT, &Ops[0], Ops.size());
}
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- // Check if the result type doesn't match the inserted element type. A
- // SCALAR_TO_VECTOR may truncate the inserted element and the
- // EXTRACT_VECTOR_ELT may widen the extracted vector.
- SDValue InOp = InVec.getOperand(0);
- EVT NVT = N->getValueType(0);
- if (InOp.getValueType() != NVT) {
- assert(InOp.getValueType().isInteger() && NVT.isInteger());
- return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
- }
- return InOp;
- }
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ SDValue InOp = InVec.getOperand(0);
+ EVT NVT = N->getValueType(0);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+ }
+ return InOp;
+ }
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
@@ -6137,14 +6847,14 @@
SDValue EltNo = N->getOperand(1);
if (isa<ConstantSDNode>(EltNo)) {
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
EVT VT = InVec.getValueType();
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
- if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+ if (InVec.getOpcode() == ISD::BITCAST) {
EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
@@ -6175,10 +6885,10 @@
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = VT.getVectorNumElements();
- int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+ int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
- if (InVec.getOpcode() == ISD::BIT_CONVERT)
+ if (InVec.getOpcode() == ISD::BITCAST)
InVec = InVec.getOperand(0);
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
@@ -6186,15 +6896,20 @@
}
}
- if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
return SDValue();
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LVT);
+
unsigned Align = LN0->getAlignment();
if (NewLoad) {
// Check the resultant load doesn't need a higher alignment than the
// original load.
unsigned NewAlign =
- TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+ TLI.getTargetData()
+ ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
return SDValue();
@@ -6203,8 +6918,10 @@
}
SDValue NewPtr = LN0->getBasePtr();
+ unsigned PtrOff = 0;
+
if (Elt) {
- unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+ PtrOff = LVT.getSizeInBits() * Elt / 8;
EVT PtrType = NewPtr.getValueType();
if (TLI.isBigEndian())
PtrOff = VT.getSizeInBits() / 8 - PtrOff;
@@ -6213,7 +6930,7 @@
}
return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(), Align);
}
@@ -6279,7 +6996,7 @@
unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
if (ExtIndex > VT.getVectorNumElements())
return SDValue();
-
+
Mask.push_back(ExtIndex);
continue;
}
@@ -6316,6 +7033,36 @@
return SDValue();
}
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
+ EVT NVT = N->getValueType(0);
+ SDValue V = N->getOperand(0);
+
+ if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ // Handle only simple case where vector being inserted and vector
+ // being extracted are of same type, and are half size of larger vectors.
+ EVT BigVT = V->getOperand(0).getValueType();
+ EVT SmallVT = V->getOperand(1).getValueType();
+ if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ return SDValue();
+
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indicies are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ //
+ SDValue InsIdx = N->getOperand(1);
+ SDValue ExtIdx = V->getOperand(2);
+
+ if (InsIdx == ExtIdx)
+ return V->getOperand(1);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
+ V->getOperand(0), N->getOperand(1));
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -6327,15 +7074,16 @@
// FIXME: implement canonicalizations from DAG.getVectorShuffle()
- // If it is a splat, check if the argument vector is a build_vector with
- // all scalar elements the same.
- if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector with all scalar elements the same.
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
- if (V->getOpcode() == ISD::BIT_CONVERT) {
+ if (V->getOpcode() == ISD::BITCAST) {
SDValue ConvInput = V->getOperand(0);
if (ConvInput.getValueType().isVector() &&
ConvInput.getValueType().getVectorNumElements() == NumElts)
@@ -6343,30 +7091,28 @@
}
if (V->getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElems = V->getNumOperands();
- unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
- if (NumElems > BaseIdx) {
- SDValue Base;
- bool AllSame = true;
- for (unsigned i = 0; i != NumElems; ++i) {
- if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
- Base = V->getOperand(i);
- break;
- }
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
}
- // Splat of <u, u, u, u>, return <u, u, u, u>
- if (!Base.getNode())
- return N0;
- for (unsigned i = 0; i != NumElems; ++i) {
- if (V->getOperand(i) != Base) {
- AllSame = false;
- break;
- }
- }
- // Splat of <x, x, x, x>, return <x, x, x, x>
- if (AllSame)
- return N0;
}
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
}
}
return SDValue();
@@ -6435,7 +7181,7 @@
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (N->getOpcode() == ISD::AND) {
- if (RHS.getOpcode() == ISD::BIT_CONVERT)
+ if (RHS.getOpcode() == ISD::BITCAST)
RHS = RHS.getOperand(0);
if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<int, 8> Indices;
@@ -6463,9 +7209,9 @@
DAG.getConstant(0, EltVT));
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
RVT, &ZeroOps[0], ZeroOps.size());
- LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
}
}
@@ -6479,10 +7225,9 @@
// things. Simplifying them may result in a loss of legality.
if (LegalOperations) return SDValue();
- EVT VT = N->getValueType(0);
- assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVBinOp only works on vectors!");
- EVT EltType = VT.getVectorElementType();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Shuffle = XformToShuffleWithZero(N);
@@ -6515,14 +7260,10 @@
break;
}
- // If the vector element type is not legal, the BUILD_VECTOR operands
- // are promoted and implicitly truncated. Make that explicit here.
- if (LHSOp.getValueType() != EltType)
- LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp);
- if (RHSOp.getValueType() != EltType)
- RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp);
-
- SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType,
+ EVT VT = LHSOp.getValueType();
+ assert(RHSOp.getValueType() == VT &&
+ "SimplifyVBinOp with different BUILD_VECTOR element types");
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
LHSOp, RHSOp);
if (FoldOp.getOpcode() != ISD::UNDEF &&
FoldOp.getOpcode() != ISD::Constant &&
@@ -6532,11 +7273,9 @@
AddToWorkList(FoldOp.getNode());
}
- if (Ops.size() == LHS.getNumOperands()) {
- EVT VT = LHS.getValueType();
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &Ops[0], Ops.size());
- }
+ if (Ops.size() == LHS.getNumOperands())
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ LHS.getValueType(), &Ops[0], Ops.size());
}
return SDValue();
@@ -6579,103 +7318,101 @@
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
// If this is a select from two identical things, try to pull the operation
// through the select.
- if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
- // If this is a load and the token chain is identical, replace the select
- // of two loads with a load through a select of the address to load from.
- // This triggers in things like "select bool X, 10.0, 123.0" after the FP
- // constants have been dropped into the constant pool.
- if (LHS.getOpcode() == ISD::LOAD &&
- // Do not let this transformation reduce the number of volatile loads.
- !cast<LoadSDNode>(LHS)->isVolatile() &&
- !cast<LoadSDNode>(RHS)->isVolatile() &&
- // Token chains must be identical.
- LHS.getOperand(0) == RHS.getOperand(0)) {
- LoadSDNode *LLD = cast<LoadSDNode>(LHS);
- LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
- // If this is an EXTLOAD, the VT's must match.
- if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
+ // Do not let this transformation reduce the number of volatile loads.
+ LLD->isVolatile() || RLD->isVolatile() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
// FIXME: this discards src value information. This is
// over-conservative. It would be beneficial to be able to remember
// both potential memory locations. Since we are discarding
// src value info, don't do the transformation if the memory
// locations are not in the default address space.
- unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
- if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
- if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
- LLDAddrSpace = PT->getAddressSpace();
- }
- if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
- if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
- RLDAddrSpace = PT->getAddressSpace();
- }
- SDValue Addr;
- if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
- if (TheSelect->getOpcode() == ISD::SELECT) {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if ((!LLD->hasAnyUseOfValue(1) ||
- !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
- (!RLD->hasAnyUseOfValue(1) ||
- !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
- Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0), LLD->getBasePtr(),
- RLD->getBasePtr());
- }
- } else {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if ((!LLD->hasAnyUseOfValue(1) ||
- (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
- (!RLD->hasAnyUseOfValue(1) ||
- (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
- Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0),
- TheSelect->getOperand(1),
- LLD->getBasePtr(), RLD->getBasePtr(),
- TheSelect->getOperand(4));
- }
- }
- }
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0)
+ return false;
- if (Addr.getNode()) {
- SDValue Load;
- if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
- Load = DAG.getLoad(TheSelect->getValueType(0),
- TheSelect->getDebugLoc(),
- LLD->getChain(),
- Addr, 0, 0,
- LLD->isVolatile(),
- LLD->isNonTemporal(),
- LLD->getAlignment());
- } else {
- Load = DAG.getExtLoad(LLD->getExtensionType(),
- TheSelect->getValueType(0),
- TheSelect->getDebugLoc(),
- LLD->getChain(), Addr, 0, 0,
- LLD->getMemoryVT(),
- LLD->isVolatile(),
- LLD->isNonTemporal(),
- LLD->getAlignment());
- }
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+ (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+ return false;
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
- // Users of the select now use the result of the load.
- CombineTo(TheSelect, Load);
+ if ((LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ (LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+ return false;
- // Users of the old loads now use the new load's chain. We know the
- // old-load value is dead now.
- CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
- CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
- return true;
- }
- }
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
}
+
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->isVolatile(), LLD->isNonTemporal(),
+ LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+ RLD->getExtensionType() : LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->getMemoryVT(), LLD->isVolatile(),
+ LLD->isNonTemporal(), LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
}
return false;
@@ -6688,7 +7425,7 @@
ISD::CondCode CC, bool NotExtCompare) {
// (x ? y : y) -> y.
if (N2 == N3) return N2;
-
+
EVT VT = N2.getValueType();
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
@@ -6724,7 +7461,7 @@
return DAG.getNode(ISD::FABS, DL, VT, N3);
}
}
-
+
// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
// in it. This is a win when the constant is not otherwise available because
@@ -6745,11 +7482,11 @@
const_cast<ConstantFP*>(FV->getConstantFPValue()),
const_cast<ConstantFP*>(TV->getConstantFPValue())
};
- const Type *FPTy = Elts[0]->getType();
+ Type *FPTy = Elts[0]->getType();
const TargetData &TD = *TLI.getTargetData();
-
+
// Create a ConstantArray of the two constants.
- Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
TD.getPrefTypeAlignment(FPTy));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -6759,20 +7496,23 @@
SDValue Zero = DAG.getIntPtrConstant(0);
unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
SDValue One = DAG.getIntPtrConstant(EltSize);
-
+
SDValue Cond = DAG.getSetCC(DL,
TLI.getSetCCResultType(N0.getValueType()),
N0, N1, CC);
+ AddToWorkList(Cond.getNode());
SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
Cond, One, Zero);
+ AddToWorkList(CstOffset.getNode());
CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
CstOffset);
+ AddToWorkList(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false,
+ MachinePointerInfo::getConstantPool(), false,
false, Alignment);
}
- }
+ }
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
@@ -6789,7 +7529,8 @@
if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
unsigned ShCtV = N2C->getAPIntValue().logBase2();
ShCtV = XType.getSizeInBits()-ShCtV-1;
- SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy());
+ SDValue ShCt = DAG.getConstant(ShCtV,
+ getShiftAmountTy(N0.getValueType()));
SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
XType, N0, ShCt);
AddToWorkList(Shift.getNode());
@@ -6805,7 +7546,7 @@
SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
XType, N0,
DAG.getConstant(XType.getSizeInBits()-1,
- getShiftAmountTy()));
+ getShiftAmountTy(N0.getValueType())));
AddToWorkList(Shift.getNode());
if (XType.bitsGT(AType)) {
@@ -6817,9 +7558,41 @@
}
}
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT &&
+ N1C && N1C->isNullValue() &&
+ N2C && N2C->isNullValue()) {
+ SDValue AndLHS = N0->getOperand(0);
+ ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ // Shift the tested bit over the sign bit.
+ APInt AndMask = ConstAndRHS->getAPIntValue();
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is either
+ // all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(AndMask.getBitWidth()-1,
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
- TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
+ TLI.getBooleanContents(N0.getValueType().isVector()) ==
+ TargetLowering::ZeroOrOneBooleanContent) {
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.
@@ -6854,7 +7627,7 @@
// shl setcc result by log2 n2c
return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
DAG.getConstant(N2C->getAPIntValue().logBase2(),
- getShiftAmountTy()));
+ getShiftAmountTy(Temp.getValueType())));
}
// Check to see if this is the equivalent of setcc
@@ -6877,7 +7650,7 @@
SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
DAG.getConstant(Log2_32(XType.getSizeInBits()),
- getShiftAmountTy()));
+ getShiftAmountTy(Ctlz.getValueType())));
}
// fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
@@ -6887,13 +7660,13 @@
return DAG.getNode(ISD::SRL, DL, XType,
DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
DAG.getConstant(XType.getSizeInBits()-1,
- getShiftAmountTy()));
+ getShiftAmountTy(XType)));
}
// fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
DAG.getConstant(XType.getSizeInBits()-1,
- getShiftAmountTy()));
+ getShiftAmountTy(N0.getValueType())));
return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
}
}
@@ -6920,7 +7693,7 @@
SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
N0,
DAG.getConstant(XType.getSizeInBits()-1,
- getShiftAmountTy()));
+ getShiftAmountTy(N0.getValueType())));
SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
XType, N0, Shift);
AddToWorkList(Shift.getNode());
@@ -6970,7 +7743,8 @@
}
/// FindBaseOffset - Return true if base is a frame index, which is known not
-// to alias with anything but itself. Provides base object and offset as results.
+// to alias with anything but itself. Provides base object and offset as
+// results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, void *&CV) {
// Assume it is a primitive operation.
@@ -6983,7 +7757,7 @@
Offset += C->getZExtValue();
}
}
-
+
// Return the underlying GlobalValue, and update the Offset. Return false
// for GlobalAddressSDNode since the same GlobalAddress may be represented
// by multiple nodes with different offsets.
@@ -7011,9 +7785,11 @@
bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2,
const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2) const {
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const {
// If they are the same then they must be aliases.
if (Ptr1 == Ptr2) return true;
@@ -7029,8 +7805,19 @@
if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
- // If we know what the bases are, and they aren't identical, then we know they
- // cannot alias.
+ // It is possible for different frame indices to alias each other, mostly
+ // when tail call optimization reuses return address slots for arguments.
+ // To catch this case, look up the actual index of frame indices to compute
+ // the real alias relationship.
+ if (isFrameIndex1 && isFrameIndex2) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ }
+
+ // Otherwise, if we know what the bases are, and they aren't identical, then
+ // we know they cannot alias.
if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
return false;
@@ -7043,20 +7830,21 @@
(Size1 == Size2) && (SrcValueAlign1 > Size1)) {
int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
-
+
// There is no overlap between these relatively aligned accesses of similar
// size, return no alias.
if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
return false;
}
-
+
if (CombinerGlobalAA) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
AliasAnalysis::AliasResult AAResult =
- AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+ AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+ AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
@@ -7069,27 +7857,29 @@
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue,
+ const Value *&SrcValue,
int &SrcValueOffset,
- unsigned &SrcValueAlign) const {
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
Size = LD->getMemoryVT().getSizeInBits() >> 3;
SrcValue = LD->getSrcValue();
SrcValueOffset = LD->getSrcValueOffset();
SrcValueAlign = LD->getOriginalAlignment();
+ TBAAInfo = LD->getTBAAInfo();
return true;
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ }
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
Size = ST->getMemoryVT().getSizeInBits() >> 3;
SrcValue = ST->getSrcValue();
SrcValueOffset = ST->getSrcValueOffset();
SrcValueAlign = ST->getOriginalAlignment();
- } else {
- llvm_unreachable("FindAliasInfo expected a memory operand");
+ TBAAInfo = ST->getTBAAInfo();
+ return false;
}
-
- return false;
+ llvm_unreachable("FindAliasInfo expected a memory operand");
}
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
@@ -7105,26 +7895,27 @@
const Value *SrcValue;
int SrcValueOffset;
unsigned SrcValueAlign;
- bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
- SrcValueAlign);
+ const MDNode *SrcTBAAInfo;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+ SrcValueAlign, SrcTBAAInfo);
// Starting off.
Chains.push_back(OriginalChain);
unsigned Depth = 0;
-
+
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.back();
Chains.pop_back();
-
- // For TokenFactor nodes, look at each operand and only continue up the
- // chain until we find two aliases. If we've seen two aliases, assume we'll
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we find two aliases. If we've seen two aliases, assume we'll
// find more and revert to original chain since the xform is unlikely to be
// profitable.
- //
- // FIXME: The depth check could be made to return the last non-aliasing
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
if (Depth > 6 || Aliases.size() == 2) {
@@ -7150,15 +7941,18 @@
const Value *OpSrcValue;
int OpSrcValueOffset;
unsigned OpSrcValueAlign;
+ const MDNode *OpSrcTBAAInfo;
bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign);
+ OpSrcValueAlign,
+ OpSrcTBAAInfo);
// If chain is alias then stop here.
if (!(IsLoad && IsOpLoad) &&
isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+ SrcTBAAInfo,
OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign)) {
+ OpSrcValueAlign, OpSrcTBAAInfo)) {
Aliases.push_back(Chain);
} else {
// Look further up the chain.
@@ -7198,16 +7992,16 @@
// Accumulate all the aliases to this node.
GatherAllAliases(N, OldChain, Aliases);
- if (Aliases.size() == 0) {
- // If no operands then chain to entry token.
+ // If no operands then chain to entry token.
+ if (Aliases.size() == 0)
return DAG.getEntryNode();
- } else if (Aliases.size() == 1) {
- // If a single operand then chain to it. We don't need to revisit it.
+
+ // If a single operand then chain to it. We don't need to revisit it.
+ if (Aliases.size() == 1)
return Aliases[0];
- }
-
+
// Construct a custom tailored token factor.
- return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
&Aliases[0], Aliases.size());
}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/FastISel.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 0000000..e8f8c73
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,1385 @@
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+/// startNewBlock - Set the current block to which generated machine
+/// instructions will be appended, and clear the local CSE map.
+///
+void FastISel::startNewBlock() {
+ LocalValueMap.clear();
+
+ EmitStartPt = 0;
+
+ // Advance the emit start point past any EH_LABEL instructions.
+ MachineBasicBlock::iterator
+ I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
+ while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
+ EmitStartPt = I;
+ ++I;
+ }
+ LastLocalValue = EmitStartPt;
+}
+
+void FastISel::flushLocalValueMap() {
+ LocalValueMap.clear();
+ LastLocalValue = EmitStartPt;
+ recomputeInsertPt();
+}
+
+bool FastISel::hasTrivialKill(const Value *V) const {
+ // Don't consider constants or arguments to have trivial kills.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // No-op casts are trivially coalesced by fast-isel.
+ if (const CastInst *Cast = dyn_cast<CastInst>(I))
+ if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+ !hasTrivialKill(Cast->getOperand(0)))
+ return false;
+
+ // Only instructions with a single use in the same basic block are considered
+ // to have trivial kills.
+ return I->hasOneUse() &&
+ !(I->getOpcode() == Instruction::BitCast ||
+ I->getOpcode() == Instruction::PtrToInt ||
+ I->getOpcode() == Instruction::IntToPtr) &&
+ cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
+}
+
+unsigned FastISel::getRegForValue(const Value *V) {
+ EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return 0;
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+ else
+ return 0;
+ }
+
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+
+ unsigned Reg = LocalValueMap[V];
+ if (Reg != 0)
+ return Reg;
+
+ // In bottom-up mode, just create the virtual register which will be used
+ // to hold the value. It will be materialized later.
+ if (isa<Instruction>(V) &&
+ (!isa<AllocaInst>(V) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+ return FuncInfo.InitializeRegForValue(V);
+
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ // Materialize the value in a register. Emit any instructions in the
+ // local value area.
+ Reg = materializeRegForValue(V, VT);
+
+ leaveLocalValueArea(SaveInsertPt);
+
+ return Reg;
+}
+
+/// materializeRegForValue - Helper for getRegForValue. This function is
+/// called when the value isn't already available in a register and must
+/// be materialized with new instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V)) {
+ Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg =
+ getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ if (CF->isNullValue()) {
+ Reg = TargetMaterializeFloatZero(CF);
+ } else {
+ // Try to emit the constant directly.
+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+ }
+
+ if (!Reg) {
+ // Try to emit the constant by using an integer constant with a cast.
+ const APFloat &Flt = CF->getValueAPF();
+ EVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ APInt IntVal(IntBitWidth, x);
+
+ unsigned IntegerReg =
+ getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ if (IntegerReg != 0)
+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+ IntegerReg, /*Kill=*/false);
+ }
+ }
+ } else if (const Operator *Op = dyn_cast<Operator>(V)) {
+ if (!SelectOperator(Op, Op->getOpcode()))
+ if (!isa<Instruction>(Op) ||
+ !TargetSelectInstruction(cast<Instruction>(Op)))
+ return 0;
+ Reg = lookUpRegForValue(Op);
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
+
+ // If target-independent code couldn't handle the value, give target-specific
+ // code a try.
+ if (!Reg && isa<Constant>(V))
+ Reg = TargetMaterializeConstant(cast<Constant>(V));
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg != 0) {
+ LocalValueMap[V] = Reg;
+ LastLocalValue = MRI.getVRegDef(Reg);
+ }
+ return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(const Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+ return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value. It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return;
+ }
+
+ unsigned &AssignedReg = FuncInfo.ValueMap[I];
+ if (AssignedReg == 0)
+ // Use the new register.
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+ for (unsigned i = 0; i < NumRegs; i++)
+ FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
+
+ AssignedReg = Reg;
+ }
+}
+
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+void FastISel::recomputeInsertPt() {
+ if (getLastLocalValue()) {
+ FuncInfo.InsertPt = getLastLocalValue();
+ FuncInfo.MBB = FuncInfo.InsertPt->getParent();
+ ++FuncInfo.InsertPt;
+ } else
+ FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+ // Now skip past any EH_LABELs, which must remain at the beginning.
+ while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+ FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+ ++FuncInfo.InsertPt;
+}
+
+FastISel::SavePoint FastISel::enterLocalValueArea() {
+ MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+ DebugLoc OldDL = DL;
+ recomputeInsertPt();
+ DL = DebugLoc();
+ SavePoint SP = { OldInsertPt, OldDL };
+ return SP;
+}
+
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
+ if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+ LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+
+ // Restore the previous insert position.
+ FuncInfo.InsertPt = OldInsertPt.InsertPt;
+ DL = OldInsertPt.DL;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
+ EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 &&
+ (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
+ VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+ else
+ return false;
+ }
+
+ // Check if the first operand is a constant, and handle it as "ri". At -O0,
+ // we don't have anything that canonicalizes operand order.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0) return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
+ Op1IsKill, CI->getZExtValue(),
+ VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t Imm = CI->getZExtValue();
+
+ // Transform "sdiv exact X, 8" -> "sra X, 3".
+ if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+ cast<BinaryOperator>(I)->isExact() &&
+ isPowerOf2_64(Imm)) {
+ Imm = Log2_64(Imm);
+ ISDOpcode = ISD::SRA;
+ }
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Op0IsKill, Imm, VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Check if the second operand is a constant float.
+ if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op0IsKill, CF);
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ // Now we have both operands in registers. Emit the instruction.
+ unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode,
+ Op0, Op0IsKill,
+ Op1, Op1IsKill);
+ if (ResultReg == 0)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectGetElementPtr(const User *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+ Type *Ty = I->getOperand(0)->getType();
+ MVT VT = TLI.getPointerTy();
+ for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
+ E = I->op_end(); OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
+ // FIXME: This can be optimized by combining the add with a
+ // subsequent one.
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero()) continue;
+ uint64_t Offs =
+ TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ IdxNIsKill = true;
+ }
+ N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::SelectCall(const User *I) {
+ const CallInst *Call = cast<CallInst>(I);
+
+ // Handle simple inline asms.
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
+ // Don't attempt to handle constraints.
+ if (!IA->getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::INLINEASM))
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
+ return true;
+ }
+
+ const Function *F = Call->getCalledFunction();
+ if (!F) return false;
+
+ // Handle selected intrinsic function calls.
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
+ if (!DIVariable(DI->getVariable()).Verify() ||
+ !FuncInfo.MF->getMMI().hasDebugInfo())
+ return true;
+
+ const Value *Address = DI->getAddress();
+ if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+ return true;
+
+ unsigned Reg = 0;
+ unsigned Offset = 0;
+ if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+ // Some arguments' frame index is recorded during argument lowering.
+ Offset = FuncInfo.getArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ }
+ if (!Reg)
+ Reg = getRegForValue(Address);
+
+ if (Reg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset)
+ .addMetadata(DI->getVariable());
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst *DI = cast<DbgValueInst>(Call);
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ const Value *V = DI->getValue();
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(0U).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addCImm(CI).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addFPImm(CF).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else if (unsigned Reg = lookUpRegForValue(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
+ return true;
+ }
+ case Intrinsic::eh_exception: {
+ EVT VT = TLI.getValueType(Call->getType());
+ if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
+ break;
+
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
+ case Intrinsic::eh_selector: {
+ EVT VT = TLI.getValueType(Call->getType());
+ if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
+ break;
+ if (FuncInfo.MBB->isLandingPad())
+ AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
+ else {
+#ifndef NDEBUG
+ FuncInfo.CatchInfoLost.insert(Call);
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+ }
+
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ EVT SrcVT = TLI.getPointerTy();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
+
+ bool ResultRegIsKill = hasTrivialKill(Call);
+
+ // Cast the register to the type of the selector.
+ if (SrcVT.bitsGT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+ ResultReg, ResultRegIsKill);
+ else if (SrcVT.bitsLT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+ ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
+ if (ResultReg == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ UpdateValueMap(Call, ResultReg);
+
+ return true;
+ }
+ case Intrinsic::objectsize: {
+ ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
+ unsigned long long Res = CI->isZero() ? -1ULL : 0;
+ Constant *ResCI = ConstantInt::get(Call->getType(), Res);
+ unsigned ResultReg = getRegForValue(ResCI);
+ if (ResultReg == 0)
+ return false;
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
+ }
+
+ // Usually, it does not make sense to initialize a value,
+ // make an unrelated function call and use the value, because
+ // it tends to be spilled on the stack. So, we move the pointer
+ // to the last local value to the beginning of the block, so that
+ // all the values which have already been materialized,
+ // appear after the call. It also makes sense to skip intrinsics
+ // since they tend to be inlined.
+ if (!isa<IntrinsicInst>(F))
+ flushLocalValueMap();
+
+ // An arbitrary call. Bail.
+ return false;
+}
+
+bool FastISel::SelectCast(const User *I, unsigned Opcode) {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal.
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ // Check if the source operand is legal.
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+ DstVT.getSimpleVT(),
+ Opcode,
+ InputReg, InputRegIsKill);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectBitCast(const User *I) {
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (I->getType() == I->getOperand(0)->getType()) {
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ // Bitcasts of other values become reg-reg copies or BITCAST operators.
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple() ||
+ !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // First, try to perform the bitcast by inserting a reg-reg copy.
+ unsigned ResultReg = 0;
+ if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+ TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ // Don't attempt a cross-class copy. It will likely fail.
+ if (SrcClass == DstClass) {
+ ResultReg = createResultReg(DstClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0);
+ }
+ }
+
+ // If the reg-reg copy failed, select a BITCAST opcode.
+ if (!ResultReg)
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ ISD::BITCAST, Op0, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectInstruction(const Instruction *I) {
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (isa<TerminatorInst>(I))
+ if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+ return false;
+
+ DL = I->getDebugLoc();
+
+ // First, try doing target-independent selection.
+ if (SelectOperator(I, I->getOpcode())) {
+ DL = DebugLoc();
+ return true;
+ }
+
+ // Next, try calling the target to attempt to handle the instruction.
+ if (TargetSelectInstruction(I)) {
+ DL = DebugLoc();
+ return true;
+ }
+
+ DL = DebugLoc();
+ return false;
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+ if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // The unconditional fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
+ SmallVector<MachineOperand, 0>(), DL);
+ }
+ FuncInfo.MBB->addSuccessor(MSucc);
+}
+
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(const User *I) {
+ unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+ if (OpReg == 0) return false;
+
+ bool OpRegIsKill = hasTrivialKill(I);
+
+ // If the target has ISD::FNEG, use it.
+ EVT VT = TLI.getValueType(I->getType());
+ unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::FNEG, OpReg, OpRegIsKill);
+ if (ResultReg != 0) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Bitcast the value to integer, twiddle the sign bit with xor,
+ // and then bitcast it back to floating-point.
+ if (VT.getSizeInBits() > 64) return false;
+ EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+ if (!TLI.isTypeLegal(IntVT))
+ return false;
+
+ unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::BITCAST, OpReg, OpRegIsKill);
+ if (IntReg == 0)
+ return false;
+
+ unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
+ IntReg, /*Kill=*/true,
+ UINT64_C(1) << (VT.getSizeInBits()-1),
+ IntVT.getSimpleVT());
+ if (IntResultReg == 0)
+ return false;
+
+ ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+ ISD::BITCAST, IntResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectExtractValue(const User *U) {
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI)
+ return false;
+
+ // Make sure we only try to handle extracts with a legal result. But also
+ // allow i1 because it's easy.
+ EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
+ if (!RealVT.isSimple())
+ return false;
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+ return false;
+
+ const Value *Op0 = EVI->getOperand(0);
+ Type *AggTy = Op0->getType();
+
+ // Get the base result register.
+ unsigned ResultReg;
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+ if (I != FuncInfo.ValueMap.end())
+ ResultReg = I->second;
+ else if (isa<Instruction>(Op0))
+ ResultReg = FuncInfo.InitializeRegForValue(Op0);
+ else
+ return false; // fast-isel can't handle aggregate constants at the moment
+
+ // Get the actual result register, which is an offset from the base register.
+ unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+
+ for (unsigned i = 0; i < VTIndex; i++)
+ ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+ UpdateValueMap(EVI, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectOperator(const User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return SelectBinaryOp(I, ISD::ADD);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::Sub:
+ return SelectBinaryOp(I, ISD::SUB);
+ case Instruction::FSub:
+ // FNeg is currently represented in LLVM IR as a special case of FSub.
+ if (BinaryOperator::isFNeg(I))
+ return SelectFNeg(I);
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::Mul:
+ return SelectBinaryOp(I, ISD::MUL);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return SelectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return SelectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return SelectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return SelectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return SelectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return SelectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return SelectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return SelectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return SelectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return SelectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return SelectBinaryOp(I, ISD::XOR);
+
+ case Instruction::GetElementPtr:
+ return SelectGetElementPtr(I);
+
+ case Instruction::Br: {
+ const BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ const BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+ FastEmitBranch(MSucc, BI->getDebugLoc());
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ // Nothing to emit.
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ return SelectCall(I);
+
+ case Instruction::BitCast:
+ return SelectBitCast(I);
+
+ case Instruction::FPToSI:
+ return SelectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return SelectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return SelectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return SelectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return SelectCast(I, ISD::TRUNCATE);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ case Instruction::ExtractValue:
+ return SelectExtractValue(I);
+
+ case Instruction::PHI:
+ llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+ : FuncInfo(funcInfo),
+ MRI(FuncInfo.MF->getRegInfo()),
+ MFI(*FuncInfo.MF->getFrameInfo()),
+ MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()),
+ TD(*TM.getTargetData()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()),
+ TRI(*TM.getRegisterInfo()) {
+}
+
+FastISel::~FastISel() {}
+
+unsigned FastISel::FastEmit_(MVT, MVT,
+ unsigned) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT, MVT,
+ unsigned, const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT, MVT,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm, MVT ImmType) {
+ // If this is a multiply by a power of two, emit this as a shift left.
+ if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+ Opcode = ISD::SHL;
+ Imm = Log2_64(Imm);
+ } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+ // div x, 8 -> srl x, 3
+ Opcode = ISD::SRL;
+ Imm = Log2_64(Imm);
+ }
+
+ // Horrible hack (to be removed), check to make sure shift amounts are
+ // in-range.
+ if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+ Imm >= VT.getSizeInBits())
+ return 0;
+
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ if (ResultReg != 0)
+ return ResultReg;
+ unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (MaterialReg == 0) {
+ // This is a bit ugly/slow, but failing here means falling out of
+ // fast-isel, which would be very slow.
+ IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+ VT.getSizeInBits());
+ MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+ }
+ return FastEmit_rr(VT, VT, Opcode,
+ Op0, Op0IsKill,
+ MaterialReg, /*Kill=*/true);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm1)
+ .addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx);
+ return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+}
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input. We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB. As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::const_iterator I = SuccBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary because FastISel doesn't
+ // use CreateRegs to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Promote MVT::i1.
+ if (VT == MVT::i1)
+ VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
+ else {
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ // Set the DebugLoc for the copy. Prefer the location of the operand
+ // if there is one; use the location of the PHI otherwise.
+ DL = PN->getDebugLoc();
+ if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+ DL = Inst->getDebugLoc();
+
+ unsigned Reg = getRegForValue(PHIOp);
+ if (Reg == 0) {
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ DL = DebugLoc();
+ }
+ }
+
+ return true;
+}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index c097f11..b052740 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
@@ -55,20 +54,6 @@
return false;
}
-/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
-/// entry block, return true. This includes arguments used by switches, since
-/// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A) {
- const BasicBlock *Entry = A->getParent()->begin();
- for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
- UI != E; ++UI) {
- const User *U = *UI;
- if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
- return false; // Use not in entry block.
- }
- return true;
-}
-
FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
: TLI(tli) {
}
@@ -82,16 +67,10 @@
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(Fn->getReturnType(),
Fn->getAttributes().getRetAttributes(), Outs, TLI);
- CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(),
+ CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
+ Fn->isVarArg(),
Outs, Fn->getContext());
- // Create a vreg for each argument register that is not dead and is used
- // outside of the entry block for the function.
- for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
- AI != E; ++AI)
- if (!isOnlyUsedInEntryBlock(AI))
- InitializeRegForValue(AI);
-
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
// them.
@@ -99,7 +78,7 @@
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
- const Type *Ty = AI->getAllocatedType();
+ Type *Ty = AI->getAllocatedType();
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
unsigned Align =
std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
@@ -127,6 +106,34 @@
if (!isa<AllocaInst>(I) ||
!StaticAllocaMap.count(cast<AllocaInst>(I)))
InitializeRegForValue(I);
+
+ // Collect llvm.dbg.declare information. This is done now instead of
+ // during the initial isel pass through the IR so that it is done
+ // in a predictable order.
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+ MachineModuleInfo &MMI = MF->getMMI();
+ if (MMI.hasDebugInfo() &&
+ DIVariable(DI->getVariable()).Verify() &&
+ !DI->getDebugLoc().isUnknown()) {
+ // Don't handle byval struct arguments or VLAs, for example.
+ // Non-byval arguments are handled here (they refer to the stack
+ // temporary alloca at this point).
+ const Value *Address = DI->getAddress();
+ if (Address) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ DenseMap<const AllocaInst *, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI != StaticAllocaMap.end()) { // Check for VLAs.
+ int FI = SI->second;
+ MMI.setVariableDbgInfo(DI->getVariable(),
+ FI, DI->getDebugLoc());
+ }
+ }
+ }
+ }
+ }
}
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
@@ -149,6 +156,10 @@
const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (PN->use_empty()) continue;
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
DebugLoc DL = PN->getDebugLoc();
unsigned PHIReg = ValueMap[PN];
assert(PHIReg && "PHI node does not have an assigned virtual register!");
@@ -165,6 +176,11 @@
}
}
}
+
+ // Mark landing pad blocks.
+ for (BB = Fn->begin(); BB != EB; ++BB)
+ if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+ MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
}
/// clear - Clear out all the function-specific state. This returns this
@@ -182,7 +198,9 @@
CatchInfoFound.clear();
#endif
LiveOutRegInfo.clear();
+ VisitedBBs.clear();
ArgDbgValues.clear();
+ ByValArgFrameIndexMap.clear();
RegFixups.clear();
}
@@ -198,7 +216,7 @@
/// In the case that the given value has struct or array type, this function
/// will assign registers for each member or element.
///
-unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, Ty, ValueVTs);
@@ -216,3 +234,252 @@
return FirstReg;
}
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+ if (!LiveOutRegInfo.inBounds(Reg))
+ return NULL;
+
+ LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+ if (!LOI->IsValid)
+ return NULL;
+
+ if (BitWidth > LOI->KnownZero.getBitWidth()) {
+ LOI->NumSignBits = 1;
+ LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
+ LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+ }
+
+ return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+ Type *Ty = PN->getType();
+ if (!Ty->isIntegerTy() || Ty->isVectorTy())
+ return;
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, Ty, ValueVTs);
+ assert(ValueVTs.size() == 1 &&
+ "PHIs with non-vector integer types should have a single VT.");
+ EVT IntVT = ValueVTs[0];
+
+ if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1)
+ return;
+ IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT);
+ unsigned BitWidth = IntVT.getSizeInBits();
+
+ unsigned DestReg = ValueMap[PN];
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ return;
+ LiveOutRegInfo.grow(DestReg);
+ LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+ Value *V = PN->getIncomingValue(0);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = Val.getNumSignBits();
+ DestLOI.KnownZero = ~Val;
+ DestLOI.KnownOne = Val;
+ } else {
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+ "CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI = *SrcLOI;
+ }
+
+ assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
+ DestLOI.KnownOne.getBitWidth() == BitWidth &&
+ "Masks should have the same bit width as the type.");
+
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+ DestLOI.KnownZero &= ~Val;
+ DestLOI.KnownOne &= Val;
+ continue;
+ }
+
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+ "its CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+ DestLOI.KnownZero &= SrcLOI->KnownZero;
+ DestLOI.KnownOne &= SrcLOI->KnownOne;
+ }
+}
+
+/// setArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
+ int FI) {
+ ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
+ DenseMap<const Argument *, int>::iterator I =
+ ByValArgFrameIndexMap.find(A);
+ if (I != ByValArgFrameIndexMap.end())
+ return I->second;
+ DEBUG(dbgs() << "Argument does not have assigned frame index!");
+ return 0;
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB) {
+ // Inform the MachineModuleInfo of the personality for this landing pad.
+ const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
+ assert(CE->getOpcode() == Instruction::BitCast &&
+ isa<Function>(CE->getOperand(0)) &&
+ "Personality should be a function");
+ MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+ // Gather all the type infos for this landing pad and pass them along to
+ // MachineModuleInfo.
+ std::vector<const GlobalVariable *> TyInfo;
+ unsigned N = I.getNumArgOperands();
+
+ for (unsigned i = N - 1; i > 1; --i) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
+ unsigned FilterLength = CI->getZExtValue();
+ unsigned FirstCatch = i + FilterLength + !FilterLength;
+ assert(FirstCatch <= N && "Invalid filter length");
+
+ if (FirstCatch < N) {
+ TyInfo.reserve(N - FirstCatch);
+ for (unsigned j = FirstCatch; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ if (!FilterLength) {
+ // Cleanup.
+ MMI->addCleanup(MBB);
+ } else {
+ // Filter.
+ TyInfo.reserve(FilterLength - 1);
+ for (unsigned j = i + 1; j < FirstCatch; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addFilterTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ N = i;
+ }
+ }
+
+ if (N > 2) {
+ TyInfo.reserve(N - 2);
+ for (unsigned j = 2; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ }
+}
+
+void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
+ MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+ SmallPtrSet<const BasicBlock*, 4> Visited;
+
+ // The 'eh.selector' call may not be in the direct successor of a basic block,
+ // but could be several successors deeper. If we don't find it, try going one
+ // level further. <rdar://problem/8824861>
+ while (Visited.insert(SuccBB)) {
+ for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
+ I != E; ++I)
+ if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+ // Apply the catch info to LPad.
+ AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
+#ifndef NDEBUG
+ if (!FLI.MBBMap[SuccBB]->isLandingPad())
+ FLI.CatchInfoFound.insert(EHSel);
+#endif
+ return;
+ }
+
+ const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
+ if (Br && Br->isUnconditional())
+ SuccBB = Br->getSuccessor(0);
+ else
+ break;
+ }
+}
+
+/// AddLandingPadInfo - Extract the exception handling information from the
+/// landingpad instruction and add them to the specified machine module info.
+void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
+ MachineBasicBlock *MBB) {
+ MMI.addPersonality(MBB,
+ cast<Function>(I.getPersonalityFn()->stripPointerCasts()));
+
+ if (I.isCleanup())
+ MMI.addCleanup(MBB);
+
+ // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
+ // but we need to do it this way because of how the DWARF EH emitter
+ // processes the clauses.
+ for (unsigned i = I.getNumClauses(); i != 0; --i) {
+ Value *Val = I.getClause(i - 1);
+ if (I.isCatch(i - 1)) {
+ MMI.addCatchTypeInfo(MBB,
+ dyn_cast<GlobalVariable>(Val->stripPointerCasts()));
+ } else {
+ // Add filters in a list.
+ Constant *CVal = cast<Constant>(Val);
+ SmallVector<const GlobalVariable*, 4> FilterList;
+ for (User::op_iterator
+ II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
+ FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts()));
+
+ MMI.addFilterTypeInfo(MBB, FilterList);
+ }
+ }
+}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/INSTALL.vcxproj b/src/LLVM/lib/CodeGen/SelectionDAG/INSTALL.vcxproj
new file mode 100644
index 0000000..c99ae94
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/INSTALL.vcxproj
@@ -0,0 +1,261 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="MinSizeRel|Win32">
+ <Configuration>MinSizeRel</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="RelWithDebInfo|Win32">
+ <Configuration>RelWithDebInfo</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID>
+ <Keyword>Win32Proj</Keyword>
+ <Platform>Win32</Platform>
+ <ProjectName>INSTALL</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\INSTALL_force.rule">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeLists.txt">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj">
+ <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/INSTALL.vcxproj.filters b/src/LLVM/lib/CodeGen/SelectionDAG/INSTALL.vcxproj.filters
new file mode 100644
index 0000000..251dd1d
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\INSTALL_force.rule">
+ <Filter>CMake Rules</Filter>
+ </CustomBuild>
+ <CustomBuild Include="CMakeLists.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <Filter Include="CMake Rules">
+ <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+</Project>
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 6246f8e..2ff66f8 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -30,12 +30,18 @@
#include "llvm/Support/MathExtras.h"
using namespace llvm;
+/// MinRCSize - Smallest register class we allow when constraining virtual
+/// registers. If satisfying all register class constraints would require
+/// using a smaller register class, emit a COPY to a new virtual register
+/// instead.
+const unsigned MinRCSize = 4;
+
/// CountResults - The results of target nodes have register or immediate
-/// operands first, then an optional chain, and optional flag operands (which do
+/// operands first, then an optional chain, and optional glue operands (which do
/// not go into the resulting MachineInstr).
unsigned InstrEmitter::CountResults(SDNode *Node) {
unsigned N = Node->getNumValues();
- while (N && Node->getValueType(N - 1) == MVT::Flag)
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
--N;
if (N && Node->getValueType(N - 1) == MVT::Other)
--N; // Skip over chain result.
@@ -43,12 +49,12 @@
}
/// CountOperands - The inputs to target nodes have any actual inputs first,
-/// followed by an optional chain operand, then an optional flag operand.
+/// followed by an optional chain operand, then an optional glue operand.
/// Compute the number of actual operands that will go into the resulting
/// MachineInstr.
unsigned InstrEmitter::CountOperands(SDNode *Node) {
unsigned N = Node->getNumOperands();
- while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
--N; // Ignore chain if it exists.
@@ -67,7 +73,7 @@
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
return;
}
@@ -76,12 +82,18 @@
// the CopyToReg'd destination register instead of creating a new vreg.
bool MatchReg = true;
const TargetRegisterClass *UseRC = NULL;
+ EVT VT = Node->getValueType(ResNo);
+
+ // Stick to the preferred register classes for legal types.
+ if (TLI->isTypeLegal(VT))
+ UseRC = TLI->getRegClassFor(VT);
+
if (!IsClone && !IsCloned)
for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
UI != E; ++UI) {
SDNode *User = *UI;
bool Match = true;
- if (User->getOpcode() == ISD::CopyToReg &&
+ if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == ResNo) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
@@ -96,18 +108,19 @@
if (Op.getNode() != Node || Op.getResNo() != ResNo)
continue;
EVT VT = Node->getValueType(Op.getResNo());
- if (VT == MVT::Other || VT == MVT::Flag)
+ if (VT == MVT::Other || VT == MVT::Glue)
continue;
Match = false;
if (User->isMachineOpcode()) {
- const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+ const MCInstrDesc &II = TII->get(User->getMachineOpcode());
const TargetRegisterClass *RC = 0;
if (i+II.getNumDefs() < II.getNumOperands())
- RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI);
+ RC = TII->getRegClass(II, i+II.getNumDefs(), TRI);
if (!UseRC)
UseRC = RC;
else if (RC) {
- const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
+ const TargetRegisterClass *ComRC =
+ TRI->getCommonSubClass(UseRC, RC);
// If multiple uses expect disjoint register classes, we emit
// copies in AddRegisterOperand.
if (ComRC)
@@ -121,10 +134,9 @@
break;
}
- EVT VT = Node->getValueType(ResNo);
const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
-
+
// Figure out the register class to create for the destreg.
if (VRBase) {
DstRC = MRI->getRegClass(VRBase);
@@ -134,7 +146,7 @@
} else {
DstRC = TLI->getRegClassFor(VT);
}
-
+
// If all uses are reading from the src physical register and copying the
// register is either impossible or very expensive, then don't create a copy.
if (MatchReg && SrcRC->getCopyCost() < 0) {
@@ -150,7 +162,7 @@
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -162,7 +174,7 @@
return 0;
SDNode *User = *Node->use_begin();
- if (User->getOpcode() == ISD::CopyToReg &&
+ if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == ResNo) {
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
@@ -173,7 +185,7 @@
}
void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
- const TargetInstrDesc &II,
+ const MCInstrDesc &II,
bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap) {
assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
@@ -184,7 +196,7 @@
// is a vreg in the same register class, use the CopyToReg'd destination
// register instead of creating a new vreg.
unsigned VRBase = 0;
- const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI);
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
unsigned NumResults = CountResults(Node);
@@ -197,7 +209,7 @@
for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
UI != E; ++UI) {
SDNode *User = *UI;
- if (User->getOpcode() == ISD::CopyToReg &&
+ if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == i) {
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
@@ -224,7 +236,7 @@
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
}
@@ -237,7 +249,7 @@
Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Add an IMPLICIT_DEF instruction before every use.
unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
- // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+ // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
// does not include operand register class info.
if (!VReg) {
const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
@@ -260,30 +272,31 @@
void
InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
- const TargetInstrDesc *II,
+ const MCInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned) {
assert(Op.getValueType() != MVT::Other &&
- Op.getValueType() != MVT::Flag &&
- "Chain and flag operands should occur at end of operand list!");
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
// Get/emit the operand.
unsigned VReg = getVR(Op, VRBaseMap);
assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
- const TargetInstrDesc &TID = MI->getDesc();
- bool isOptDef = IIOpNum < TID.getNumOperands() &&
- TID.OpInfo[IIOpNum].isOptionalDef();
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+ MCID.OpInfo[IIOpNum].isOptionalDef();
// If the instruction requires a register in a different class, create
- // a new virtual register and copy the value into it.
+ // a new virtual register and copy the value into it, but first attempt to
+ // shrink VReg's register class within reason. For example, if VReg == GR32
+ // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
if (II) {
- const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
const TargetRegisterClass *DstRC = 0;
if (IIOpNum < II->getNumOperands())
- DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
- assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+ DstRC = TII->getRegClass(*II, IIOpNum, TRI);
+ assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
"Don't have operand info for this instruction!");
- if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+ if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
@@ -307,7 +320,7 @@
while (Idx > 0 &&
MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
--Idx;
- bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
+ bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
if (isTied)
isKill = false;
}
@@ -321,11 +334,11 @@
/// AddOperand - Add the specified operand to the specified machine instr. II
/// specifies the instruction information for the node, and IIOpNum is the
-/// operand number (in the II) that we are adding. IIOpNum and II are used for
+/// operand number (in the II) that we are adding. IIOpNum and II are used for
/// assertions only.
void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
- const TargetInstrDesc *II,
+ const MCInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned) {
if (Op.isMachineOpcode()) {
@@ -351,7 +364,7 @@
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
int Offset = CP->getOffset();
unsigned Align = CP->getAlignment();
- const Type *Type = CP->getType();
+ Type *Type = CP->getType();
// MachineConstantPool wants an explicit alignment.
if (Align == 0) {
Align = TM->getTargetData()->getPrefTypeAlignment(Type);
@@ -360,7 +373,7 @@
Align = TM->getTargetData()->getTypeAllocSize(Type);
}
}
-
+
unsigned Idx;
MachineConstantPool *MCP = MF->getConstantPool();
if (CP->isMachineConstantPoolEntry())
@@ -377,42 +390,51 @@
BA->getTargetFlags()));
} else {
assert(Op.getValueType() != MVT::Other &&
- Op.getValueType() != MVT::Flag &&
- "Chain and flag operands should occur at end of operand list!");
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
IsDebug, IsClone, IsCloned);
}
}
-/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
-/// "SubIdx"'th sub-register class is the specified register class and whose
-/// type matches the specified type.
-static const TargetRegisterClass*
-getSuperRegisterRegClass(const TargetRegisterClass *TRC,
- unsigned SubIdx, EVT VT) {
- // Pick the register class of the superegister for this type
- for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
- E = TRC->superregclasses_end(); I != E; ++I)
- if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
- return *I;
- assert(false && "Couldn't find the register class");
- return 0;
+unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ EVT VT, DebugLoc DL) {
+ const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
+
+ // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg
+ // within reason.
+ if (RC && RC != VRC)
+ RC = MRI->constrainRegClass(VReg, RC, MinRCSize);
+
+ // VReg has been adjusted. It can be used with SubIdx operands now.
+ if (RC)
+ return VReg;
+
+ // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
+ // register instead.
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+ assert(RC && "No legal register class for VT supports that SubIdx");
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VReg);
+ return NewReg;
}
/// EmitSubregNode - Generate machine code for subreg nodes.
///
-void InstrEmitter::EmitSubregNode(SDNode *Node,
+void InstrEmitter::EmitSubregNode(SDNode *Node,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsClone, bool IsCloned) {
unsigned VRBase = 0;
unsigned Opc = Node->getMachineOpcode();
-
+
// If the node is only used by a CopyToReg and the dest reg is a vreg, use
// the CopyToReg'd destination register instead of creating a new vreg.
for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
UI != E; ++UI) {
SDNode *User = *UI;
- if (User->getOpcode() == ISD::CopyToReg &&
+ if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
@@ -421,63 +443,76 @@
}
}
}
-
+
if (Opc == TargetOpcode::EXTRACT_SUBREG) {
- // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no
+ // constraints on the %dst register, COPY can target all legal register
+ // classes.
unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getValueType(0));
- // Figure out the register class to create for the destreg.
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
- const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
- assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned SrcReg, DstReg, DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ } else {
+ // VReg may not support a SubIdx sub-register, and we may need to
+ // constrain its register class or issue a COPY to a compatible register
+ // class.
+ VReg = ConstrainForSubReg(VReg, SubIdx,
+ Node->getOperand(0).getValueType(),
+ Node->getDebugLoc());
- // Figure out the register class to create for the destreg.
- // Note that if we're going to directly use an existing register,
- // it must be precisely the required class, and not a subclass
- // thereof.
- if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
- // Create the reg
- assert(SRC && "Couldn't find source register class");
- VRBase = MRI->createVirtualRegister(SRC);
+ // Create the destreg if it is missing.
+ if (VRBase == 0)
+ VRBase = MRI->createVirtualRegister(TRC);
+
+ // Create the extract_subreg machine instruction.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx);
}
-
- // Create the extract_subreg machine instruction.
- MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
- TII->get(TargetOpcode::COPY), VRBase);
-
- // Add source, and subreg index
- AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
- IsClone, IsCloned);
- assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) &&
- "Cannot yet extract from physregs");
- MI->getOperand(1).setSubReg(SubIdx);
- MBB->insert(InsertPos, MI);
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
SDValue N2 = Node->getOperand(2);
- unsigned SubReg = getVR(N1, VRBaseMap);
unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
- const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
- const TargetRegisterClass *SRC =
- getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
- // Figure out the register class to create for the destreg.
- // Note that if we're going to directly use an existing register,
- // it must be precisely the required class, and not a subclass
- // thereof.
- if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
- // Create the reg
- assert(SRC && "Couldn't find source register class");
+ // Figure out the register class to create for the destreg. It should be
+ // the largest legal register class supporting SubIdx sub-registers.
+ // RegisterCoalescer will constrain it further if it decides to eliminate
+ // the INSERT_SUBREG instruction.
+ //
+ // %dst = INSERT_SUBREG %src, %sub, SubIdx
+ //
+ // is lowered by TwoAddressInstructionPass to:
+ //
+ // %dst = COPY %src
+ // %dst:SubIdx = COPY %sub
+ //
+ // There is no constraint on the %src register class.
+ //
+ const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getValueType(0));
+ SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
+ assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
+
+ if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase)))
VRBase = MRI->createVirtualRegister(SRC);
- }
// Create the insert_subreg or subreg_to_reg machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
MI->addOperand(MachineOperand::CreateReg(VRBase, true));
-
+
// If creating a subreg_to_reg, then the first input operand
// is an implicit value immediate, otherwise it's a register
if (Opc == TargetOpcode::SUBREG_TO_REG) {
@@ -493,10 +528,10 @@
MBB->insert(InsertPos, MI);
} else
llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
-
+
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -518,7 +553,7 @@
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -527,25 +562,24 @@
void InstrEmitter::EmitRegSequence(SDNode *Node,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsClone, bool IsCloned) {
- const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
unsigned NewVReg = MRI->createVirtualRegister(RC);
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
unsigned NumOps = Node->getNumOperands();
- assert((NumOps & 1) == 0 &&
- "REG_SEQUENCE must have an even number of operands!");
- const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
- for (unsigned i = 0; i != NumOps; ++i) {
+ assert((NumOps & 1) == 1 &&
+ "REG_SEQUENCE must have an odd number of operands!");
+ const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+ for (unsigned i = 1; i != NumOps; ++i) {
SDValue Op = Node->getOperand(i);
- if (i & 1) {
+ if ((i & 1) == 0) {
unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
const TargetRegisterClass *SRC =
TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
- if (!SRC)
- llvm_unreachable("Invalid subregister index in REG_SEQUENCE");
- if (SRC != RC) {
+ if (SRC && SRC != RC) {
MRI->setRegClass(NewVReg, SRC);
RC = SRC;
}
@@ -557,7 +591,7 @@
MBB->insert(InsertPos, MI);
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -577,7 +611,7 @@
return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
}
// Otherwise, we're going to create an instruction here.
- const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
if (SD->getKind() == SDDbgValue::SDNODE) {
SDNode *Node = SD->getSDNode();
@@ -596,12 +630,8 @@
} else if (SD->getKind() == SDDbgValue::CONST) {
const Value *V = SD->getConst();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- // FIXME: SDDbgValue constants aren't updated with legalization, so it's
- // possible to have i128 constants in them at this point. Dwarf writer
- // does not handle i128 constants at the moment so, as a crude workaround,
- // just drop the debug info if this happens.
- if (!CI->getValue().isSignedIntN(64))
- MIB.addReg(0U);
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
else
MIB.addImm(CI->getSExtValue());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
@@ -627,9 +657,9 @@
EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap) {
unsigned Opc = Node->getMachineOpcode();
-
+
// Handle subreg insert/extract specially
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
@@ -651,8 +681,8 @@
if (Opc == TargetOpcode::IMPLICIT_DEF)
// We want a unique VR for each IMPLICIT_DEF use.
return;
-
- const TargetInstrDesc &II = TII->get(Opc);
+
+ const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
unsigned NodeOperands = CountOperands(Node);
bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
@@ -673,35 +703,35 @@
// The MachineInstr constructor adds implicit-def operands. Scan through
// these to determine which are dead.
if (MI->getNumOperands() != 0 &&
- Node->getValueType(Node->getNumValues()-1) == MVT::Flag) {
+ Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
// First, collect all used registers.
SmallVector<unsigned, 8> UsedRegs;
- for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser())
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
if (F->getOpcode() == ISD::CopyFromReg)
UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
else {
// Collect declared implicit uses.
- const TargetInstrDesc &TID = TII->get(F->getMachineOpcode());
- UsedRegs.append(TID.getImplicitUses(),
- TID.getImplicitUses() + TID.getNumImplicitUses());
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
// In addition to declared implicit uses, we must also check for
// direct RegisterSDNode operands.
for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
unsigned Reg = R->getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
UsedRegs.push_back(Reg);
}
}
// Then mark unused registers as dead.
MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
}
-
+
// Add result register values for things that are defined by this
// instruction.
if (NumResults)
CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
-
+
// Emit all of the actual operands of this instruction, adding them to the
// instruction as appropriate.
bool HasOptPRefs = II.getNumDefs() > NumResults;
@@ -721,20 +751,7 @@
// hook knows where in the block to insert the replacement code.
MBB->insert(InsertPos, MI);
- if (II.usesCustomInsertionHook()) {
- // Insert this instruction into the basic block using a target
- // specific inserter which may returns a new basic block.
- bool AtEnd = InsertPos == MBB->end();
- MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
- if (NewMBB != MBB) {
- if (AtEnd)
- InsertPos = NewMBB->end();
- MBB = NewMBB;
- }
- return;
- }
-
- // Additional results must be an physical register def.
+ // Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
@@ -742,22 +759,28 @@
EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
// If there are no uses, mark the register as dead now, so that
// MachineLICM/Sink can see that it's dead. Don't do this if the
- // node has a Flag value, for the benefit of targets still using
- // Flag for values in physregs.
- else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ // node has a Glue value, for the benefit of targets still using
+ // Glue for values in physregs.
+ else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
MI->addRegisterDead(Reg, TRI);
}
}
-
+
// If the instruction has implicit defs and the node doesn't, mark the
- // implicit def as dead. If the node has any flag outputs, we don't do this
- // because we don't know what implicit defs are being used by flagged nodes.
- if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ // implicit def as dead. If the node has any glue outputs, we don't do this
+ // because we don't know what implicit defs are being used by glued nodes.
+ if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
if (const unsigned *IDList = II.getImplicitDefs()) {
for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
i != e; ++i)
MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
}
+
+ // Run post-isel target hook to adjust this instruction if needed.
+#ifdef NDEBUG
+ if (II.hasPostISelHook())
+#endif
+ TLI->AdjustInstrPostInstrSelection(MI, Node);
}
/// EmitSpecialNode - Generate machine code for a target-independent node and
@@ -785,7 +808,7 @@
SrcReg = R->getReg();
else
SrcReg = getVR(SrcVal, VRBaseMap);
-
+
unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
break;
@@ -799,6 +822,87 @@
EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
break;
}
+ case ISD::EH_LABEL: {
+ MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+ break;
+ }
+
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ // Create the inline asm machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::INLINEASM));
+
+ // Add the asm string as an external symbol operand.
+ SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+ const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
+ MI->addOperand(MachineOperand::CreateES(AsmStr));
+
+ // Add the HasSideEffect and isAlignStack bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
+ getZExtValue();
+ MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ MI->addOperand(MachineOperand::CreateImm(Flags));
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ // FIXME: Add dead flags for physical and virtual registers defined.
+ // For now, mark physical register defs as implicit to help fast
+ // regalloc. This makes inline asm look a lot like calls.
+ MI->addOperand(MachineOperand::CreateReg(Reg, true,
+ /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg)));
+ }
+ break;
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ case InlineAsm::Kind_Clobber:
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
+ /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
+ /*isKill=*/ false,
+ /*isDead=*/ false,
+ /*isUndef=*/false,
+ /*isEarlyClobber=*/ true));
+ }
+ break;
+ case InlineAsm::Kind_RegUse: // Use of register.
+ case InlineAsm::Kind_Imm: // Immediate.
+ case InlineAsm::Kind_Mem: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (; NumVals; --NumVals, ++i)
+ AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+ break;
+ }
+ }
+
+ // Get the mdnode from the asm if it exists and add it to the instruction.
+ SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+ const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+ if (MD)
+ MI->addOperand(MachineOperand::CreateMetadata(MD));
+
+ MBB->insert(InsertPos, MI);
+ break;
+ }
}
}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/InstrEmitter.h b/src/LLVM/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 02c044c..c081f38 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -22,7 +22,7 @@
namespace llvm {
-class TargetInstrDesc;
+class MCInstrDesc;
class SDDbgValue;
class InstrEmitter {
@@ -49,7 +49,7 @@
unsigned ResNo) const;
void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
- const TargetInstrDesc &II,
+ const MCInstrDesc &II,
bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap);
@@ -63,7 +63,7 @@
/// not in the required register class.
void AddRegisterOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
- const TargetInstrDesc *II,
+ const MCInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned);
@@ -73,10 +73,16 @@
/// assertions only.
void AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
- const TargetInstrDesc *II,
+ const MCInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned);
+ /// ConstrainForSubReg - Try to constrain VReg to a register class that
+ /// supports SubIdx sub-registers. Emit a copy if that isn't possible.
+ /// Return the virtual register to use.
+ unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ EVT VT, DebugLoc DL);
+
/// EmitSubregNode - Generate machine code for subreg nodes.
///
void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LLVMSelectionDAG.vcxproj b/src/LLVM/lib/CodeGen/SelectionDAG/LLVMSelectionDAG.vcxproj
new file mode 100644
index 0000000..eaec317
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LLVMSelectionDAG.vcxproj
@@ -0,0 +1,393 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Profile|Win32">
+ <Configuration>Profile</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Profile|x64">
+ <Configuration>Profile</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGUID>{CDC80E5C-954B-4BCB-8C81-58645F5103A5}</ProjectGUID>
+ <Keyword>Win32Proj</Keyword>
+ <Platform>Win32</Platform>
+ <ProjectName>LLVMSelectionDAG</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>NotSet</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)</TargetName>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.lib</TargetExt>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</OutDir>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</IntDir>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ProjectName)</TargetName>
+ <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(ProjectName)</TargetName>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.lib</TargetExt>
+ <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">.lib</TargetExt>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>
+ </ExceptionHandling>
+ <InlineFunctionExpansion>Disabled</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Disabled</Optimization>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Debug</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Debug/LLVMSelectionDAG.pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>
+ </ExceptionHandling>
+ <InlineFunctionExpansion>Disabled</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Disabled</Optimization>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Debug</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Debug/LLVMSelectionDAG.pdb</ProgramDataBaseFileName>
+ <OmitFramePointers>false</OmitFramePointers>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>
+ </DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Release/LLVMSelectionDAG.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Release/LLVMSelectionDAG.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>false</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>
+ </DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Release/LLVMSelectionDAG.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>true</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">
+ <ClCompile>
+ <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <CompileAs>CompileAsCpp</CompileAs>
+ <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings>
+ <ExceptionHandling>false</ExceptionHandling>
+ <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <Optimization>Full</Optimization>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <RuntimeTypeInfo>false</RuntimeTypeInfo>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>
+ </DebugInformationFormat>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AssemblerListingLocation>Release</AssemblerListingLocation>
+ <ObjectFileName>$(IntDir)</ObjectFileName>
+ <ProgramDataBaseFileName>..\..\Release/LLVMSelectionDAG.pdb</ProgramDataBaseFileName>
+ <FavorSizeOrSpeed>Size</FavorSizeOrSpeed>
+ <OmitFramePointers>false</OmitFramePointers>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <StringPooling>true</StringPooling>
+ <BufferSecurityCheck>false</BufferSecurityCheck>
+ <FunctionLevelLinking>false</FunctionLevelLinking>
+ <FloatingPointExceptions>false</FloatingPointExceptions>
+ <CreateHotpatchableImage>false</CreateHotpatchableImage>
+ </ClCompile>
+ <ResourceCompile>
+ <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ResourceCompile>
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="DAGCombiner.cpp" />
+ <ClCompile Include="FastISel.cpp" />
+ <ClCompile Include="FunctionLoweringInfo.cpp" />
+ <ClCompile Include="InstrEmitter.cpp" />
+ <ClCompile Include="LegalizeDAG.cpp" />
+ <ClCompile Include="LegalizeFloatTypes.cpp" />
+ <ClCompile Include="LegalizeIntegerTypes.cpp" />
+ <ClCompile Include="LegalizeTypes.cpp" />
+ <ClCompile Include="LegalizeTypesGeneric.cpp" />
+ <ClCompile Include="LegalizeVectorOps.cpp" />
+ <ClCompile Include="LegalizeVectorTypes.cpp" />
+ <ClCompile Include="ScheduleDAGList.cpp" />
+ <ClCompile Include="ScheduleDAGRRList.cpp" />
+ <ClCompile Include="ScheduleDAGSDNodes.cpp" />
+ <ClCompile Include="SelectionDAG.cpp" />
+ <ClCompile Include="SelectionDAGBuilder.cpp" />
+ <ClCompile Include="SelectionDAGISel.cpp" />
+ <ClCompile Include="SelectionDAGPrinter.cpp" />
+ <ClCompile Include="TargetLowering.cpp" />
+ <ClCompile Include="TargetSelectionDAGInfo.cpp" />
+ <ClInclude Include="InstrEmitter.h" />
+ <ClInclude Include="LegalizeTypes.h" />
+ <ClInclude Include="ScheduleDAGSDNodes.h" />
+ <ClInclude Include="SDNodeDbgValue.h" />
+ <ClInclude Include="SDNodeOrdering.h" />
+ <ClInclude Include="SelectionDAGBuilder.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\..\Analysis/LLVMAnalysis.vcxproj">
+ <Project>97EDF19C-6360-4770-9255-EBA2F1A13E9B</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\LLVMCodeGen.vcxproj">
+ <Project>7BE5F26F-0525-42BB-AAED-56C5B4582B99</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\VMCore/LLVMCore.vcxproj">
+ <Project>00F3295C-F7A0-43D3-BD0B-1BC0515B30E1</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\MC/LLVMMC.vcxproj">
+ <Project>8AB70E5D-2814-4682-AF9F-3062758BAEAB</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\Support/LLVMSupport.vcxproj">
+ <Project>C688DD59-C6CB-4B33-B56F-A7D6F3761524</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\Target/LLVMTarget.vcxproj">
+ <Project>76F7B8C2-C825-40DC-BB68-9D987275E320</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\Transforms/Utils/LLVMTransformUtils.vcxproj">
+ <Project>5C514254-58EE-4850-8743-F5D7BEAA3E66</Project>
+ </ProjectReference>
+ <ProjectReference Include="..\..\..\include/llvm/intrinsics_gen.vcxproj">
+ <Project>E9B87B46-1EB0-4D95-9049-41B148FBADCD</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LLVMSelectionDAG.vcxproj.filters b/src/LLVM/lib/CodeGen/SelectionDAG/LLVMSelectionDAG.vcxproj.filters
new file mode 100644
index 0000000..70f28e8
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LLVMSelectionDAG.vcxproj.filters
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <ClCompile Include="DAGCombiner.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="FastISel.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="FunctionLoweringInfo.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="InstrEmitter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LegalizeDAG.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LegalizeFloatTypes.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LegalizeIntegerTypes.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LegalizeTypes.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LegalizeTypesGeneric.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LegalizeVectorOps.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="LegalizeVectorTypes.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ScheduleDAGList.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ScheduleDAGRRList.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ScheduleDAGSDNodes.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="SelectionDAG.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="SelectionDAGBuilder.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="SelectionDAGISel.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="SelectionDAGPrinter.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="TargetLowering.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="TargetSelectionDAGInfo.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="InstrEmitter.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="LegalizeTypes.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ScheduleDAGSDNodes.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="SDNodeDbgValue.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="SDNodeOrdering.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="SelectionDAGBuilder.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{1733179C-6FE4-462E-9EA5-4A29A1ACFE25}</UniqueIdentifier>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{CFA0CD99-0550-4E94-A4D9-080C3F5D695C}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 4628561..63255ae 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,25 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
#include "llvm/LLVMContext.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -56,7 +50,6 @@
const TargetMachine &TM;
const TargetLowering &TLI;
SelectionDAG &DAG;
- CodeGenOpt::Level OptLevel;
// Libcall insertion helpers.
@@ -70,17 +63,6 @@
/// being legalized (which could lead to non-serialized call sequences).
bool IsLegalizingCall;
- enum LegalizeAction {
- Legal, // The target natively supports this operation.
- Promote, // This operation should be executed in a larger type.
- Expand // Try to expand this to other ops, otherwise use a libcall.
- };
-
- /// ValueTypeActions - This is a bitvector that contains two bits for each
- /// value type, where the two bits correspond to the LegalizeAction enum.
- /// This can be queried with "getTypeAction(VT)".
- TargetLowering::ValueTypeActionImpl ValueTypeActions;
-
/// LegalizedNodes - For nodes that are of legal width, and that have more
/// than one use, this map indicates what regularized operand to use. This
/// allows us to avoid legalizing the same thing more than once.
@@ -91,31 +73,19 @@
// If someone requests legalization of the new node, return itself.
if (From != To)
LegalizedNodes.insert(std::make_pair(To, To));
+
+ // Transfer SDDbgValues.
+ DAG.TransferDbgValues(From, To);
}
public:
- SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
-
- /// getTypeAction - Return how we should legalize values of this type, either
- /// it is already legal or we need to expand it into multiple registers of
- /// smaller integer type, or we need to promote it to a larger type.
- LegalizeAction getTypeAction(EVT VT) const {
- return
- (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT);
- }
-
- /// isTypeLegal - Return true if this type is legal on this target.
- ///
- bool isTypeLegal(EVT VT) const {
- return getTypeAction(VT) == Legal;
- }
+ explicit SelectionDAGLegalize(SelectionDAG &DAG);
void LegalizeDAG();
private:
- /// LegalizeOp - We know that the specified value has a legal type.
- /// Recursively ensure that the operands have legal types, then return the
- /// result.
+ /// LegalizeOp - Return a legal replacement for the given operation, with
+ /// all legal operands.
SDValue LegalizeOp(SDValue O);
SDValue OptimizeFloatStore(StoreSDNode *ST);
@@ -144,6 +114,9 @@
DebugLoc dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+ unsigned NumOps, bool isSigned, DebugLoc dl);
+
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -155,6 +128,7 @@
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
+ void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
@@ -173,6 +147,7 @@
SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
@@ -214,13 +189,9 @@
return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
}
-SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
- CodeGenOpt::Level ol)
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
: TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
- DAG(dag), OptLevel(ol),
- ValueTypeActions(TLI.getValueTypeActions()) {
- assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
- "Too many value types for ValueTypeActions to hold!");
+ DAG(dag) {
}
void SelectionDAGLegalize::LegalizeDAG() {
@@ -252,9 +223,16 @@
/// FindCallEndFromCallStart - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node) {
- if (Node->getOpcode() == ISD::CALLSEQ_END)
- return Node;
+static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+ // Nested CALLSEQ_START/END constructs aren't yet legal,
+ // but we can DTRT and handle them correctly here.
+ if (Node->getOpcode() == ISD::CALLSEQ_START)
+ depth++;
+ else if (Node->getOpcode() == ISD::CALLSEQ_END) {
+ depth--;
+ if (depth == 0)
+ return Node;
+ }
if (Node->use_empty())
return 0; // No CallSeqEnd
@@ -284,7 +262,7 @@
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User))
+ if (SDNode *Result = FindCallEndFromCallStart(User, depth))
return Result;
}
return 0;
@@ -293,12 +271,26 @@
/// FindCallStartFromCallEnd - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_START node that initiates the call sequence.
static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+ int nested = 0;
assert(Node && "Didn't find callseq_start for a call??");
- if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
-
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+ while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
+ Node = Node->getOperand(0).getNode();
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ switch (Node->getOpcode()) {
+ default:
+ break;
+ case ISD::CALLSEQ_START:
+ if (!nested)
+ return Node;
+ nested--;
+ break;
+ case ISD::CALLSEQ_END:
+ nested++;
+ break;
+ }
+ }
+ return 0;
}
/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -368,7 +360,7 @@
// smaller type.
TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
TLI.ShouldShrinkFPConstant(OrigVT)) {
- const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ Type *SType = SVT.getTypeForEVT(*DAG.getContext());
LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
VT = SVT;
Extend = true;
@@ -378,12 +370,12 @@
SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend)
- return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
DAG.getEntryNode(),
- CPIdx, PseudoSourceValue::getConstantPool(),
- 0, VT, false, false, Alignment);
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false, false,
+ MachinePointerInfo::getConstantPool(), false, false,
Alignment);
}
@@ -396,7 +388,6 @@
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
- int SVOffset = ST->getSrcValueOffset();
DebugLoc dl = ST->getDebugLoc();
if (ST->getMemoryVT().isFloatingPoint() ||
ST->getMemoryVT().isVector()) {
@@ -405,69 +396,70 @@
// Expand to a bitconvert of the value to the integer type of the
// same size, then a (misaligned) int store.
// FIXME: Does not handle truncating floating point stores!
- SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
- return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
- SVOffset, ST->isVolatile(), ST->isNonTemporal(),
- Alignment);
- } else {
- // Do a (aligned) store to a stack slot, then copy from the stack slot
- // to the final destination using (unaligned) integer loads and stores.
- EVT StoredVT = ST->getMemoryVT();
- EVT RegVT =
- TLI.getRegisterType(*DAG.getContext(),
- EVT::getIntegerVT(*DAG.getContext(),
- StoredVT.getSizeInBits()));
- unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
- unsigned RegBytes = RegVT.getSizeInBits() / 8;
- unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
-
- // Make sure the stack slot is also aligned for the register type.
- SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
-
- // Perform the original store, only redirected to the stack slot.
- SDValue Store = DAG.getTruncStore(Chain, dl,
- Val, StackPtr, NULL, 0, StoredVT,
- false, false, 0);
- SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
- SmallVector<SDValue, 8> Stores;
- unsigned Offset = 0;
-
- // Do all but one copies using the full register width.
- for (unsigned i = 1; i < NumRegs; i++) {
- // Load one integer register's worth from the stack slot.
- SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
- false, false, 0);
- // Store it to the final location. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
- ST->getSrcValue(), SVOffset + Offset,
- ST->isVolatile(), ST->isNonTemporal(),
- MinAlign(ST->getAlignment(), Offset)));
- // Increment the pointers.
- Offset += RegBytes;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- Increment);
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- }
-
- // The last store may be partial. Do a truncating store. On big-endian
- // machines this requires an extending load from the stack slot to ensure
- // that the bits are in the right place.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (StoredBytes - Offset));
-
- // Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
- NULL, 0, MemVT, false, false, 0);
-
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
- ST->getSrcValue(), SVOffset + Offset,
- MemVT, ST->isVolatile(),
- ST->isNonTemporal(),
- MinAlign(ST->getAlignment(), Offset)));
- // The order of the stores doesn't matter - say it with a TokenFactor.
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
}
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ EVT StoredVT = ST->getMemoryVT();
+ EVT RegVT =
+ TLI.getRegisterType(*DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ StoredVT.getSizeInBits()));
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl,
+ Val, StackPtr, MachinePointerInfo(),
+ StoredVT, false, false, 0);
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ ST->isVolatile(), ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
+
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo()
+ .getWithOffset(Offset),
+ MemVT, ST->isVolatile(),
+ ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
}
assert(ST->getMemoryVT().isInteger() &&
!ST->getMemoryVT().isVector() &&
@@ -478,20 +470,21 @@
int IncrementSize = NumBits / 8;
// Divide the stored value in two parts.
- SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+ SDValue ShiftAmount = DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(Val.getValueType()));
SDValue Lo = Val;
SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
// Store the two parts
SDValue Store1, Store2;
Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
- ST->getSrcValue(), SVOffset, NewStoredVT,
+ ST->getPointerInfo(), NewStoredVT,
ST->isVolatile(), ST->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
Alignment = MinAlign(Alignment, IncrementSize);
Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
- ST->getSrcValue(), SVOffset + IncrementSize,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
Alignment);
@@ -502,7 +495,6 @@
static
SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
const TargetLowering &TLI) {
- int SVOffset = LD->getSrcValueOffset();
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0);
@@ -513,74 +505,75 @@
if (TLI.isTypeLegal(intVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
- SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, LD->isVolatile(),
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
- SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (VT.isFloatingPoint() && LoadedVT != VT)
Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
SDValue Ops[] = { Result, Chain };
return DAG.getMergeValues(Ops, 2, dl);
- } else {
- // Copy the value to a (aligned) stack slot using (unaligned) integer
- // loads and stores, then do a (aligned) load from the stack slot.
- EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
- unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
- unsigned RegBytes = RegVT.getSizeInBits() / 8;
- unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
-
- // Make sure the stack slot is also aligned for the register type.
- SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
- SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
- SmallVector<SDValue, 8> Stores;
- SDValue StackPtr = StackBase;
- unsigned Offset = 0;
-
- // Do all but one copies using the full register width.
- for (unsigned i = 1; i < NumRegs; i++) {
- // Load one integer register's worth from the original location.
- SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + Offset, LD->isVolatile(),
- LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
- // Follow the load with a store to the stack slot. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0, false, false, 0));
- // Increment the pointers.
- Offset += RegBytes;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- Increment);
- }
-
- // The last copy may be partial. Do an extending load.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (LoadedBytes - Offset));
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
- LD->getSrcValue(), SVOffset + Offset,
- MemVT, LD->isVolatile(),
- LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
- // Follow the load with a store to the stack slot. Remember the store.
- // On big-endian machines this requires a truncating store to ensure
- // that the bits end up in the right place.
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0, MemVT, false, false, 0));
-
- // The order of the stores doesn't matter - say it with a TokenFactor.
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
-
- // Finally, perform the original load only redirected to the stack slot.
- Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
- NULL, 0, LoadedVT, false, false, 0);
-
- // Callers expect a MERGE_VALUES node.
- SDValue Ops[] = { Load, TF };
- return DAG.getMergeValues(Ops, 2, dl);
}
+
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), false, false, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), MemVT,
+ false, false, 0));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo(), LoadedVT, false, false, 0);
+
+ // Callers expect a MERGE_VALUES node.
+ SDValue Ops[] = { Load, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
}
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
"Unaligned load of unsupported type.");
@@ -603,27 +596,30 @@
// Load the value in two parts
SDValue Lo, Hi;
if (TLI.isLittleEndian()) {
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
} else {
- Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(),
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
}
// aggregate the two parts
- SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+ SDValue ShiftAmount = DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(Hi.getValueType()));
SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
@@ -661,7 +657,7 @@
// Store the vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
false, false, 0);
// Truncate or zero extend offset to target pointer type.
@@ -672,13 +668,11 @@
Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
// Store the scalar value.
- Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
- PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
- false, false, 0);
+ MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
}
@@ -720,53 +714,55 @@
SDValue Tmp1 = ST->getChain();
SDValue Tmp2 = ST->getBasePtr();
SDValue Tmp3;
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
DebugLoc dl = ST->getDebugLoc();
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
if (CFP->getValueType(0) == MVT::f32 &&
- getTypeAction(MVT::i32) == Legal) {
+ TLI.isTypeLegal(MVT::i32)) {
Tmp3 = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
- } else if (CFP->getValueType(0) == MVT::f64) {
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
// If this target supports 64-bit registers, do a single 64-bit store.
- if (getTypeAction(MVT::i64) == Legal) {
+ if (TLI.isTypeLegal(MVT::i64)) {
Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
- } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
// Otherwise, if the target supports 32-bit registers, use 2 32-bit
// stores. If the target supports neither 32- nor 64-bits, this
// xform is certainly not worth it.
const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
- SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
+ Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(4));
- Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+ Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
}
}
- return SDValue();
+ return SDValue(0, 0);
}
-/// LegalizeOp - We know that the specified value has a legal type, and
-/// that its operands are legal. Now ensure that the operation itself
-/// is legal, recursively ensuring that the operands' operations remain
-/// legal.
+/// LegalizeOp - Return a legal replacement for the given operation, with
+/// all legal operands.
SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
return Op;
@@ -775,11 +771,14 @@
DebugLoc dl = Node->getDebugLoc();
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- assert(getTypeAction(Node->getValueType(i)) == Legal &&
+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal &&
"Unexpected illegal type!");
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
- assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+ assert((TLI.getTypeAction(*DAG.getContext(),
+ Node->getOperand(i).getValueType()) ==
+ TargetLowering::TypeLegal ||
Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
"Unexpected illegal type!");
@@ -793,7 +792,7 @@
bool isCustom = false;
// Figure out the correct action; the way to query this varies by opcode
- TargetLowering::LegalizeAction Action;
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
bool SimpleFinishLegalizing = true;
switch (Node->getOpcode()) {
case ISD::INTRINSIC_W_CHAIN:
@@ -815,6 +814,11 @@
Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
break;
}
+ case ISD::ATOMIC_STORE: {
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(2).getValueType());
+ break;
+ }
case ISD::SELECT_CC:
case ISD::SETCC:
case ISD::BR_CC: {
@@ -857,14 +861,19 @@
case ISD::UMULO:
case ISD::FPOWI:
case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_SJLJ_SETJMP:
+ case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
if (Action == TargetLowering::Legal)
Action = TargetLowering::Expand;
break;
- case ISD::TRAMPOLINE:
+ case ISD::INIT_TRAMPOLINE:
+ case ISD::ADJUST_TRAMPOLINE:
case ISD::FRAMEADDR:
case ISD::RETURNADDR:
// These operations lie about being legal: when they claim to be legal,
@@ -906,7 +915,7 @@
case ISD::BRCOND:
// Branches tweak the chain to include LastCALLSEQ_END
Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
- LastCALLSEQ_END);
+ LastCALLSEQ_END);
Ops[0] = LegalizeOp(Ops[0]);
LastCALLSEQ_END = DAG.getEntryNode();
break;
@@ -918,7 +927,8 @@
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
if (!Ops[1].getValueType().isVector())
- Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
+ Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+ Ops[1]));
break;
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
@@ -926,7 +936,8 @@
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
if (!Ops[2].getValueType().isVector())
- Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2]));
+ Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+ Ops[2]));
break;
}
@@ -978,6 +989,31 @@
#endif
assert(0 && "Do not know how to legalize this operator!");
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+ SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+ Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Scalars[0], Scalars.size());
+ break;
+ }
+
case ISD::BUILD_VECTOR:
switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
default: assert(0 && "This action is not supported yet!");
@@ -1060,7 +1096,7 @@
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
// Do not try to legalize the target-specific arguments (#1+), except for
// an optional flag input.
- if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
if (Tmp1 != Node->getOperand(0)) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
@@ -1109,7 +1145,7 @@
// If this is an unaligned load and the target doesn't support it,
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
@@ -1134,11 +1170,10 @@
// Change base type to a different vector type.
EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
- Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(),
+ Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
- Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
Tmp4 = LegalizeOp(Tmp1.getValue(1));
break;
}
@@ -1148,218 +1183,311 @@
AddLegalizedOperand(SDValue(Node, 0), Tmp3);
AddLegalizedOperand(SDValue(Node, 1), Tmp4);
return Op.getResNo() ? Tmp4 : Tmp3;
- } else {
- EVT SrcVT = LD->getMemoryVT();
- unsigned SrcWidth = SrcVT.getSizeInBits();
- int SVOffset = LD->getSrcValueOffset();
- unsigned Alignment = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
+ }
- if (SrcWidth != SrcVT.getStoreSizeInBits() &&
- // Some targets pretend to have an i1 loading operation, and actually
- // load an i8. This trick is correct for ZEXTLOAD because the top 7
- // bits are guaranteed to be zero; it helps the optimizers understand
- // that these bits are zero. It is also useful for EXTLOAD, since it
- // tells the optimizers that those bits are undefined. It would be
- // nice to have an effective generic way of getting these benefits...
- // Until such a way is found, don't insist on promoting i1 here.
- (SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
- // Promote to a byte-sized load if not loading an integral number of
- // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
- unsigned NewWidth = SrcVT.getStoreSizeInBits();
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
- SDValue Ch;
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
- // The extra bits are guaranteed to be zero, since we stored them that
- // way. A zext load from NVT thus automatically gives zext from SrcVT.
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
- ISD::LoadExtType NewExtType =
- ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
- Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
- Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
- NVT, isVolatile, isNonTemporal, Alignment);
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Ch = Result.getValue(1); // The chain.
+ Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
- if (ExtType == ISD::SEXTLOAD)
- // Having the top bits zero doesn't help when sign extending.
- Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
- // All the top bits are guaranteed to be zero - inform the optimizers.
- Result = DAG.getNode(ISD::AssertZext, dl,
- Result.getValueType(), Result,
- DAG.getValueType(SrcVT));
+ Ch = Result.getValue(1); // The chain.
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
- } else if (SrcWidth & (SrcWidth - 1)) {
- // If not loading a power-of-2 number of bits, expand as two loads.
- assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned RoundWidth = 1 << Log2_32(SrcWidth);
- assert(RoundWidth < SrcWidth);
- unsigned ExtraWidth = SrcWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Load size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi, Ch;
- unsigned IncrementSize;
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
- if (TLI.isLittleEndian()) {
- // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
- // Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
- Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- isNonTemporal, Alignment);
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
- // Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- } else {
- // Big endian - avoid unaligned loads.
- // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
- // Load the top RoundWidth bits.
- Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- isNonTemporal, Alignment);
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
- Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
-
- // Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- }
-
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
} else {
- switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: assert(0 && "This action is not supported yet!");
- case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
- case TargetLowering::Legal:
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp1 = Result.getValue(0);
- Tmp2 = Result.getValue(1);
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
- if (isCustom) {
- Tmp3 = TLI.LowerOperation(Result, DAG);
- if (Tmp3.getNode()) {
- Tmp1 = LegalizeOp(Tmp3);
- Tmp2 = LegalizeOp(Tmp3.getValue(1));
- }
- } else {
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- const Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp1 = Result.getOperand(0);
- Tmp2 = Result.getOperand(1);
- Tmp1 = LegalizeOp(Tmp1);
- Tmp2 = LegalizeOp(Tmp2);
- }
- }
- }
- break;
- case TargetLowering::Expand:
- // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
- // f128 = EXTLOAD {f32,f64} too
- if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 ||
- Node->getValueType(0) == MVT::f128)) ||
- (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- Result = DAG.getNode(ISD::FP_EXTEND, dl,
- Node->getValueType(0), Load);
- Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Load.getValue(1));
- break;
- }
- assert(ExtType != ISD::EXTLOAD &&
- "EXTLOAD should always be supported!");
- // Turn the unsupported load into an EXTLOAD followed by an explicit
- // zero/sign extend inreg.
- Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
- Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- SDValue ValRes;
- if (ExtType == ISD::SEXTLOAD)
- ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
- break;
- }
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
}
- // Since loads produce two values, make sure to remember that we legalized
- // both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp1);
- AddLegalizedOperand(SDValue(Node, 1), Tmp2);
- return Op.getResNo() ? Tmp2 : Tmp1;
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else {
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ DAG, TLI);
+ Tmp1 = Result.getOperand(0);
+ Tmp2 = Result.getOperand(1);
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
+ }
+ }
+ }
+ break;
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Load.getValue(1));
+ break;
+ }
+
+ // If this is a promoted vector load, and the vector element types are
+ // legal, then scalarize it.
+ if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+ TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
+ SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+ SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ Node->getValueType(0).getScalarType(),
+ Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
+ break;
+ }
+
+ // If this is a promoted vector load, and the vector element types are
+ // illegal, create the promoted vector from bitcasted segments.
+ if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+ EVT MemElemTy = Node->getValueType(0).getScalarType();
+ EVT SrcSclrTy = SrcVT.getScalarType();
+ unsigned SizeRatio =
+ (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+ SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+ SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ SrcVT.getScalarType(),
+ Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ if (TLI.isBigEndian()) {
+ // MSB (which is garbage, comes first)
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ for (unsigned i = 0; i<SizeRatio-1; ++i)
+ LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+ } else {
+ // LSB (which is data, comes first)
+ for (unsigned i = 0; i<SizeRatio-1; ++i)
+ LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ }
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getScalarType(), NumElem*SizeRatio);
+ SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ TempWideVector, &LoadVals[0], LoadVals.size());
+
+ // Cast to the correct type
+ ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
+ break;
+
+ }
+
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ break;
+ }
}
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+ return Op.getResNo() ? Tmp2 : Tmp1;
}
case ISD::STORE: {
StoreSDNode *ST = cast<StoreSDNode>(Node);
Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -1384,7 +1512,7 @@
// If this is an unaligned store and the target doesn't support it,
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
@@ -1397,10 +1525,10 @@
break;
case TargetLowering::Promote:
assert(VT.isVector() && "Unknown legal promote case!");
- Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl,
TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getSrcValue(), SVOffset, isVolatile,
+ ST->getPointerInfo(), isVolatile,
isNonTemporal, Alignment);
break;
}
@@ -1419,9 +1547,8 @@
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
StVT.getStoreSizeInBits());
Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, NVT, isVolatile, isNonTemporal,
- Alignment);
+ Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1439,8 +1566,8 @@
if (TLI.isLittleEndian()) {
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
// Store the bottom RoundWidth bits.
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, RoundVT,
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ RoundVT,
isVolatile, isNonTemporal, Alignment);
// Store the remaining ExtraWidth bits.
@@ -1448,28 +1575,29 @@
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
- DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
- SVOffset + IncrementSize, ExtraVT, isVolatile,
- isNonTemporal,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Tmp3.getValueType())));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
} else {
// Big endian - avoid unaligned stores.
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
// Store the top RoundWidth bits.
Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
- DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
- SVOffset, RoundVT, isVolatile, isNonTemporal,
- Alignment);
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Tmp3.getValueType())));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset + IncrementSize, ExtraVT, isVolatile,
- isNonTemporal,
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
}
@@ -1489,7 +1617,7 @@
// If this is an unaligned store and the target doesn't support it,
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
@@ -1499,13 +1627,113 @@
case TargetLowering::Custom:
Result = TLI.LowerOperation(Result, DAG);
break;
- case Expand:
+ case TargetLowering::Expand:
+
+ EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+ EVT NarrowScalarVT = StVT.getScalarType();
+
+ if (StVT.isVector()) {
+ unsigned NumElem = StVT.getVectorNumElements();
+ // The type of the data we want to save
+ EVT RegVT = Tmp3.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ bool RegScalarLegal = TLI.isTypeLegal(RegSclVT);
+ bool MemScalarLegal = TLI.isTypeLegal(MemSclVT);
+
+ // We need to expand this store. If the register element type
+ // is legal then we can scalarize the vector and use
+ // truncating stores.
+ if (RegScalarLegal) {
+ // Cast floats into integers
+ unsigned ScalarSize = MemSclVT.getSizeInBits();
+ EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize))
+ ScalarSize = NextPowerOf2(ScalarSize);
+
+ // Store Stride in bytes
+ unsigned Stride = ScalarSize/8;
+ // Extract each of the elements from the original vector
+ // and save them into memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+
+ // This scalar TruncStore may be illegal, but we lehalize it
+ // later.
+ SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2,
+ ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ Stores.push_back(Store);
+ }
+
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ break;
+ }
+
+ // The scalar register type is illegal.
+ // For example saving <2 x i64> -> <2 x i32> on a x86.
+ // In here we bitcast the value into a vector of smaller parts and
+ // save it using smaller scalars.
+ if (!RegScalarLegal && MemScalarLegal) {
+ // Store Stride in bytes
+ unsigned Stride = MemSclVT.getSizeInBits()/8;
+
+ unsigned SizeRatio =
+ (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits());
+
+ EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(),
+ MemSclVT,
+ SizeRatio * NumElem);
+
+ // Cast the wide elem vector to wider vec with smaller elem type.
+ // Example <2 x i64> -> <4 x i32>
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) {
+ // Extract the Ith element.
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+ // Bump pointer.
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+
+ // Store if, this element is:
+ // - First element on big endian, or
+ // - Last element on little endian
+ if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) ||
+ ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) {
+ SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+ ST->getPointerInfo().getWithOffset(Idx*Stride),
+ isVolatile, isNonTemporal, Alignment);
+ Stores.push_back(Store);
+ }
+ }
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ break;
+ }
+
+ assert(false && "Unable to legalize the vector trunc store!");
+ }// is vector
+
+
// TRUNCSTORE:i16 i32 -> STORE i16
- assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
+ assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal,
- Alignment);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
break;
}
}
@@ -1532,8 +1760,8 @@
DebugLoc dl = Op.getDebugLoc();
// Store the value to a temporary stack slot, then LOAD the returned part.
SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Add the offset to the index.
unsigned EltSize =
@@ -1549,12 +1777,56 @@
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
if (Op.getValueType().isVector())
- return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType(),
+ false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Then store the inserted part.
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
else
- return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
- NULL, 0, Vec.getValueType().getVectorElementType(),
- false, false, 0);
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ StackPtr);
+
+ // Store the subvector.
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+ false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1567,7 +1839,7 @@
DebugLoc dl = Node->getDebugLoc();
SDValue FIPtr = DAG.CreateStackTemporary(VT);
int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(FI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
@@ -1586,11 +1858,13 @@
// element type, only store the bits necessary.
if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
EltVT, false, false, 0));
} else
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
false, false, 0));
}
@@ -1602,7 +1876,7 @@
StoreChain = DAG.getEntryNode();
// Result is a load from the stack slot.
- return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1615,9 +1889,9 @@
SDValue SignBit;
EVT FloatVT = Tmp2.getValueType();
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
- if (isTypeLegal(IVT)) {
+ if (TLI.isTypeLegal(IVT)) {
// Convert to an integer with the same sign bit.
- SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+ SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
} else {
// Store the float to memory, then load the sign part out as an integer.
MVT LoadTy = TLI.getPointerTy();
@@ -1625,12 +1899,13 @@
SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
// Then store the float to it.
SDValue Ch =
- DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0,
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
false, false, 0);
if (TLI.isBigEndian()) {
assert(FloatVT.isByteSized() && "Unsupported floating point type!");
// Load out a legal integer with the same sign bit as the float.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0);
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+ false, false, 0);
} else { // Little endian
SDValue LoadPtr = StackPtr;
// The float may be wider than the integer we are going to load. Advance
@@ -1640,14 +1915,16 @@
LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
LoadPtr, DAG.getIntPtrConstant(ByteOffset));
// Load a legal integer containing the sign bit.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0);
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+ false, false, 0);
// Move the sign bit to the top bit of the loaded integer.
unsigned BitShift = LoadTy.getSizeInBits() -
(FloatVT.getSizeInBits() - 8 * ByteOffset);
assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
if (BitShift)
SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
- DAG.getConstant(BitShift,TLI.getShiftAmountTy()));
+ DAG.getConstant(BitShift,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
}
}
// Now get the sign bit proper, by seeing whether the value is negative.
@@ -1683,7 +1960,7 @@
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Align > StackAlign)
SP = DAG.getNode(ISD::AND, dl, VT, SP,
DAG.getConstant(-(uint64_t)Align, VT));
@@ -1757,12 +2034,12 @@
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
int SPFI = StackPtrFI->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
unsigned SlotSize = SlotVT.getSizeInBits();
unsigned DestSize = DestVT.getSizeInBits();
- const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
// Emit a store to the stack slot. Use a truncstore if the input value is
@@ -1771,21 +2048,21 @@
if (SrcSize > SlotSize)
Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, SlotVT, false, false, SrcAlign);
+ PtrInfo, SlotVT, false, false, SrcAlign);
else {
assert(SrcSize == SlotSize && "Invalid store");
Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, false, false, SrcAlign);
+ PtrInfo, false, false, SrcAlign);
}
// Result is a load from the stack slot.
if (SlotSize == DestSize)
- return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
- DestAlign);
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+ false, false, DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
- return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT,
- false, false, DestAlign);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+ PtrInfo, SlotVT, false, false, DestAlign);
}
SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1799,11 +2076,11 @@
SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
Node->getValueType(0).getVectorElementType(),
false, false, 0);
return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
false, false, 0);
}
@@ -1869,7 +2146,7 @@
}
} else {
assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
- const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+ Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
CV.push_back(UndefValue::get(OpNTy));
}
}
@@ -1877,7 +2154,7 @@
SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment);
}
@@ -1923,7 +2200,7 @@
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
EVT ArgVT = Node->getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
Entry.isZExt = !isSigned;
@@ -1933,13 +2210,21 @@
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
- const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), isTailCall,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
// Legalize the call sequence, starting with the chain. This will advance
// the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
@@ -1947,6 +2232,40 @@
return CallInfo.first;
}
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue,SDValue> CallInfo =
+ TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+
+ return CallInfo.first;
+}
+
// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
// ExpandLibCall except that the first operand is the in-chain.
std::pair<SDValue, SDValue>
@@ -1960,7 +2279,7 @@
TargetLowering::ArgListEntry Entry;
for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
EVT ArgVT = Node->getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
@@ -1971,10 +2290,10 @@
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
- const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
@@ -2019,6 +2338,113 @@
return ExpandLibCall(LC, Node, isSigned);
}
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != 0;
+}
+
+/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+ unsigned OtherOpcode = 0;
+ if (isSigned)
+ OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
+ else
+ OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ if (User->getOpcode() == OtherOpcode &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1)
+ return true;
+ }
+ return false;
+}
+
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = Opcode == ISD::SDIVREM;
+
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the return address of the remainder.
+ SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = FIPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ DebugLoc dl = Node->getDebugLoc();
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+
+ // Remainder is loaded back from the stack frame.
+ SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
+ MachinePointerInfo(), false, false, 0);
+ Results.push_back(CallInfo.first);
+ Results.push_back(Rem);
+}
+
/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
@@ -2053,16 +2479,17 @@
}
// store the lo of the constructed double - based on integer input
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
- Op0Mapped, Lo, NULL, 0,
+ Op0Mapped, Lo, MachinePointerInfo(),
false, false, 0);
// initial hi portion of constructed double
SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
// store the hi of the constructed double - biased exponent
- SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
- false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+ MachinePointerInfo(),
+ false, false, 0);
// load the constructed double
- SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
- false, false, 0);
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+ MachinePointerInfo(), false, false, 0);
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2105,18 +2532,41 @@
DAG.getConstant(32, MVT::i64));
SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
- SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
- SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+ SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+ SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
TwoP84PlusTwoP52);
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
- // Implementation of unsigned i64 to f32. This implementation has the
- // advantage of performing rounding correctly.
+ // Implementation of unsigned i64 to f32.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
- EVT SHVT = TLI.getShiftAmountTy();
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ if (!isSigned) {
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+ SDValue ShiftConst =
+ DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, MVT::i64);
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ //pseudo-op, or, even better, for whole-function isel.
+ SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+ return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+ }
+
+ // Otherwise, implement the fully general conversion.
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
@@ -2129,8 +2579,9 @@
SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
- ISD::SETUGE);
+ ISD::SETUGE);
SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+ EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
DAG.getConstant(32, SHVT));
@@ -2144,7 +2595,6 @@
SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
DAG.getIntPtrConstant(0));
-
}
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
@@ -2178,13 +2628,13 @@
SDValue FudgeInReg;
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment);
else {
FudgeInReg =
- LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
MVT::f32, false, false, Alignment));
}
@@ -2278,7 +2728,7 @@
///
SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
EVT VT = Op.getValueType();
- EVT SHVT = TLI.getShiftAmountTy();
+ EVT SHVT = TLI.getShiftAmountTy(VT);
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(0 && "Unhandled Expand type in BSWAP!");
@@ -2321,6 +2771,18 @@
}
}
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
/// ExpandBitCount - Expand the specified bitcount instruction into operations.
///
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
@@ -2328,26 +2790,45 @@
switch (Opc) {
default: assert(0 && "Cannot expand this yet!");
case ISD::CTPOP: {
- static const uint64_t mask[6] = {
- 0x5555555555555555ULL, 0x3333333333333333ULL,
- 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
- 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
- };
EVT VT = Op.getValueType();
- EVT ShVT = TLI.getShiftAmountTy();
- unsigned len = VT.getSizeInBits();
- for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
- //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
- unsigned EltSize = VT.isVector() ?
- VT.getVectorElementType().getSizeInBits() : len;
- SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
- SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
- Op = DAG.getNode(ISD::ADD, dl, VT,
- DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
- DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
- Tmp2));
- }
+ EVT ShVT = TLI.getShiftAmountTy(VT);
+ unsigned Len = VT.getSizeInBits();
+
+ assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+ "CTPOP not implemented for this type.");
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+ SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+ SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+ SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+ SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ Op = DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, ShVT));
+
return Op;
}
case ISD::CTLZ: {
@@ -2361,7 +2842,7 @@
//
// but see also: http://www.hackersdelight.org/HDcode/nlz.cc
EVT VT = Op.getValueType();
- EVT ShVT = TLI.getShiftAmountTy();
+ EVT ShVT = TLI.getShiftAmountTy(VT);
unsigned len = VT.getSizeInBits();
for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
@@ -2500,16 +2981,31 @@
case ISD::FLT_ROUNDS_:
Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
break;
+ case ISD::EH_RETURN:
+ case ISD::EH_LABEL:
case ISD::PREFETCH:
case ISD::VAEND:
+ case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
Results.push_back(Node->getOperand(0));
break;
+ case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.push_back(DAG.getConstant(0, MVT::i32));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::ATOMIC_FENCE:
case ISD::MEMBARRIER: {
// If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
@@ -2517,10 +3013,35 @@
Results.push_back(CallResult.second);
break;
}
+ case ISD::ATOMIC_LOAD: {
+ // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
+ SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ // There is no libcall for atomic store; fake it with ATOMIC_SWAP.
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
// By default, atomic intrinsics are marked Legal and lowered. Targets
// which don't support them directly, however, may want libcalls, in which
// case they mark them Expand, and we get here.
- // FIXME: Unimplemented for now. Add libcalls.
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -2560,7 +3081,8 @@
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
@@ -2568,7 +3090,7 @@
break;
}
case ISD::FP_ROUND:
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
Node->getValueType(0), dl);
Results.push_back(Tmp1);
@@ -2584,7 +3106,7 @@
// SAR. However, it is doubtful that any exist.
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
EVT VT = Node->getValueType(0);
- EVT ShiftAmountTy = TLI.getShiftAmountTy();
+ EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
if (VT.isVector())
ShiftAmountTy = VT;
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
@@ -2598,7 +3120,7 @@
}
case ISD::FP_ROUND_INREG: {
// The only way we can lower this is to turn it into a TRUNCSTORE,
- // EXTLOAD pair, targetting a temporary location (a stack slot).
+ // EXTLOAD pair, targeting a temporary location (a stack slot).
// NOTE: there is a choice here between constantly creating new stack
// slots and always reusing the same one. We currently always create
@@ -2619,8 +3141,7 @@
SDValue True, False;
EVT VT = Node->getOperand(0).getValueType();
EVT NVT = Node->getValueType(0);
- const uint64_t zero[] = {0, 0};
- APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
APInt x = APInt::getSignBit(NVT.getSizeInBits());
(void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
Tmp1 = DAG.getConstantFP(apf, VT);
@@ -2644,8 +3165,8 @@
Tmp2 = Node->getOperand(1);
unsigned Align = Node->getConstantOperandVal(3);
- SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
- false, false, 0);
+ SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -2656,7 +3177,7 @@
TLI.getPointerTy()));
VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
- DAG.getConstant(-Align,
+ DAG.getConstant(-(int64_t)Align,
TLI.getPointerTy()));
}
@@ -2666,10 +3187,10 @@
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
TLI.getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0,
- false, false, 0);
+ Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
- Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
false, false, 0));
Results.push_back(Results[0].getValue(1));
break;
@@ -2680,16 +3201,17 @@
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
- Node->getOperand(2), VS, 0, false, false, 0);
- Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
- false, false, 0);
+ Node->getOperand(2), MachinePointerInfo(VS),
+ false, false, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
Results.push_back(Tmp1);
break;
}
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
Node->getOperand(0));
else
Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
@@ -2698,6 +3220,9 @@
case ISD::EXTRACT_SUBVECTOR:
Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
case ISD::CONCAT_VECTORS: {
Results.push_back(ExpandVectorBuildThroughStack(Node));
break;
@@ -2716,7 +3241,7 @@
EVT VT = Node->getValueType(0);
EVT EltVT = VT.getVectorElementType();
- if (getTypeAction(EltVT) == Promote)
+ if (!TLI.isTypeLegal(EltVT))
EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
@@ -2745,7 +3270,7 @@
// 1 -> Hi
Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
DAG.getConstant(OpTy.getSizeInBits()/2,
- TLI.getShiftAmountTy()));
+ TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
} else {
// 0 -> Lo
@@ -2869,6 +3394,10 @@
Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
RTLIB::REM_F80, RTLIB::REM_PPCF128));
break;
+ case ISD::FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_PPCF128));
+ break;
case ISD::FP16_TO_FP32:
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
break;
@@ -2885,6 +3414,22 @@
Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
break;
}
+ case ISD::EHSELECTION: {
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::EXCEPTIONADDR: {
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
case ISD::SUB: {
EVT VT = Node->getValueType(0);
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
@@ -2905,24 +3450,25 @@
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
Tmp2 = Node->getOperand(0);
Tmp3 = Node->getOperand(1);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ UseDivRem(Node, isSigned, false))) {
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
- } else if (isSigned) {
+ } else if (isSigned)
Tmp1 = ExpandIntLibCall(Node, true,
RTLIB::SREM_I8,
RTLIB::SREM_I16, RTLIB::SREM_I32,
RTLIB::SREM_I64, RTLIB::SREM_I128);
- } else {
+ else
Tmp1 = ExpandIntLibCall(Node, false,
RTLIB::UREM_I8,
RTLIB::UREM_I16, RTLIB::UREM_I32,
RTLIB::UREM_I64, RTLIB::UREM_I128);
- }
Results.push_back(Tmp1);
break;
}
@@ -2932,7 +3478,9 @@
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ UseDivRem(Node, isSigned, true)))
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
else if (isSigned)
@@ -2961,6 +3509,11 @@
Results.push_back(Tmp1.getValue(1));
break;
}
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
case ISD::MUL: {
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3045,6 +3598,7 @@
case ISD::UMULO:
case ISD::SMULO: {
EVT VT = Node->getValueType(0);
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDValue BottomHalf;
@@ -3060,15 +3614,8 @@
BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
RHS);
TopHalf = BottomHalf.getValue(1);
- } else {
- // FIXME: We should be able to fall back to a libcall with an illegal
- // type in some cases.
- // Also, we can fall back to a division in some cases, but that's a big
- // performance hit in the general case.
- assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() * 2)) &&
- "Don't know how to expand this operation yet!");
- EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() * 2))) {
LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -3076,9 +3623,45 @@
DAG.getIntPtrConstant(0));
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
DAG.getIntPtrConstant(1));
+ } else {
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(1));
}
+
if (isSigned) {
- Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
+ Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+ TLI.getShiftAmountTy(BottomHalf.getValueType()));
Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
ISD::SETNE);
@@ -3096,7 +3679,7 @@
Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
DAG.getConstant(PairTy.getSizeInBits()/2,
- TLI.getShiftAmountTy()));
+ TLI.getShiftAmountTy(PairTy)));
Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
break;
}
@@ -3131,8 +3714,8 @@
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0, MemVT,
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT,
false, false, 0);
Addr = LD;
if (TM.getRelocationModel() == Reloc::PIC_) {
@@ -3157,9 +3740,13 @@
Tmp2.getOperand(0), Tmp2.getOperand(1),
Node->getOperand(2));
} else {
+ // We test only the i1 bit. Skip the AND if UNDEF.
+ Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+ DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, Tmp2.getValueType()));
Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
- DAG.getCondCode(ISD::SETNE), Tmp2,
- DAG.getConstant(0, Tmp2.getValueType()),
+ DAG.getCondCode(ISD::SETNE), Tmp3,
+ DAG.getConstant(0, Tmp3.getValueType()),
Node->getOperand(2));
}
Results.push_back(Tmp1);
@@ -3274,7 +3861,7 @@
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
- DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
Results.push_back(Tmp1);
break;
}
@@ -3295,8 +3882,8 @@
case ISD::XOR: {
unsigned ExtOp, TruncOp;
if (OVT.isVector()) {
- ExtOp = ISD::BIT_CONVERT;
- TruncOp = ISD::BIT_CONVERT;
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
} else {
assert(OVT.isInteger() && "Cannot promote logic operation");
ExtOp = ISD::ANY_EXTEND;
@@ -3313,8 +3900,8 @@
case ISD::SELECT: {
unsigned ExtOp, TruncOp;
if (Node->getValueType(0).isVector()) {
- ExtOp = ISD::BIT_CONVERT;
- TruncOp = ISD::BIT_CONVERT;
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
} else if (Node->getValueType(0).isInteger()) {
ExtOp = ISD::ANY_EXTEND;
TruncOp = ISD::TRUNCATE;
@@ -3341,12 +3928,12 @@
cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
// Cast the two input vectors.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
- Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
// Convert the shuffle mask to the right # elements.
Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
Results.push_back(Tmp1);
break;
}
@@ -3368,9 +3955,8 @@
// SelectionDAG::Legalize - This is the entry point for the file.
//
-void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
+void SelectionDAG::Legalize() {
/// run - This is the main entry point to this class.
///
- SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+ SelectionDAGLegalize(*this).LegalizeDAG();
}
-
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 650ee5a..7c1cc69 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -55,7 +55,8 @@
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
- case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+ case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
case ISD::ConstantFP:
R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
@@ -74,6 +75,7 @@
case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
@@ -102,10 +104,16 @@
SetSoftenedFloat(SDValue(N, ResNo), R);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return BitConvertToInteger(Op);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
// Convert the inputs to integers, and build a new pair out of them.
return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
@@ -133,8 +141,9 @@
unsigned Size = NVT.getSizeInBits();
// Mask = ~(1 << (Size-1))
- SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
- NVT);
+ APInt API = APInt::getAllOnesValue(Size);
+ API.clearBit(Size-1);
+ SDValue Mask = DAG.getConstant(API, NVT);
SDValue Op = GetSoftenedFloat(N->getOperand(0));
return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
}
@@ -176,25 +185,27 @@
// First get the sign bit of second operand.
SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
DAG.getConstant(RSize - 1,
- TLI.getShiftAmountTy()));
+ TLI.getShiftAmountTy(RVT)));
SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
// Shift right or sign-extend it if the two operands have different types.
int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
if (SizeDiff > 0) {
SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
- DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));
+ DAG.getConstant(SizeDiff,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
} else if (SizeDiff < 0) {
SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
- DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy()));
+ DAG.getConstant(-SizeDiff,
+ TLI.getShiftAmountTy(SignBit.getValueType())));
}
// Clear the sign bit of the first operand.
SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
DAG.getConstant(LSize - 1,
- TLI.getShiftAmountTy()));
+ TLI.getShiftAmountTy(LVT)));
Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
@@ -291,6 +302,19 @@
NVT, &Op, 1, false, N->getDebugLoc());
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)),
+ GetSoftenedFloat(N->getOperand(2)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, 3, false, N->getDebugLoc());
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
@@ -455,7 +479,7 @@
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getSrcValue(), L->getSrcValueOffset(), NVT,
+ L->getPointerInfo(), NVT,
L->isVolatile(), L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -466,8 +490,7 @@
// Do a non-extending load followed by FP_EXTEND.
NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
L->getMemoryVT(), dl, L->getChain(),
- L->getBasePtr(), L->getOffset(),
- L->getSrcValue(), L->getSrcValueOffset(),
+ L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
@@ -558,7 +581,7 @@
#endif
llvm_unreachable("Do not know how to soften this operator's operand!");
- case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
@@ -670,8 +693,8 @@
}
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
GetSoftenedFloat(N->getOperand(0)));
}
@@ -780,7 +803,7 @@
Val = GetSoftenedFloat(Val);
return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -811,12 +834,12 @@
#endif
llvm_unreachable("Do not know how to expand the result of this operator!");
- case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -835,6 +858,7 @@
case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
@@ -862,10 +886,10 @@
assert(NVT.getSizeInBits() == integerPartWidth &&
"Do not know how to expand this float constant!");
APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
- Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
- &C.getRawData()[1])), NVT);
- Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
- &C.getRawData()[0])), NVT);
+ Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[1])),
+ NVT);
+ Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[0])),
+ NVT);
}
void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
@@ -987,6 +1011,19 @@
GetPairElements(Call, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_PPCF128),
+ N->getValueType(0), Ops, 3, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
@@ -1110,9 +1147,8 @@
assert(NVT.isByteSized() && "Expanded type not byte sized!");
assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
- Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->getMemoryVT(), LD->isVolatile(),
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
// Remember the chain.
@@ -1172,7 +1208,7 @@
static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
- const uint64_t *Parts = 0;
+ ArrayRef<uint64_t> Parts;
switch (SrcVT.getSimpleVT().SimpleTy) {
default:
@@ -1189,7 +1225,7 @@
}
Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
- DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),
+ DAG.getConstantFP(APFloat(APInt(128, Parts)),
MVT::ppcf128));
Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
Lo, Hi, DAG.getCondCode(ISD::SETLT));
@@ -1222,7 +1258,7 @@
#endif
llvm_unreachable("Do not know how to expand this operator's operand!");
- case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -1262,8 +1298,7 @@
GetExpandedFloat(NewLHS, LHSLo, LHSHi);
GetExpandedFloat(NewRHS, RHSLo, RHSHi);
- EVT VT = NewLHS.getValueType();
- assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
+ assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!");
// FIXME: This generated code sucks. We want to generate
// FCMPU crN, hi1, hi2
@@ -1344,7 +1379,7 @@
assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
"Logic only correct for ppcf128!");
const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
- APFloat APF = APFloat(APInt(128, 2, TwoE31));
+ APFloat APF = APFloat(APInt(128, TwoE31));
SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
// FIXME: generated code sucks.
@@ -1416,12 +1451,13 @@
ST->getValue().getValueType());
assert(NVT.isByteSized() && "Expanded type not byte sized!");
assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+ (void)NVT;
SDValue Lo, Hi;
GetExpandedOp(ST->getValue(), Lo, Hi);
return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->getMemoryVT(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f8c5890..a5c4c2d 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -19,6 +19,7 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -47,9 +48,10 @@
N->dump(&DAG); dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to promote this operator!");
+ case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
- case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
@@ -62,6 +64,7 @@
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
@@ -73,6 +76,19 @@
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::BUILD_VECTOR:
+ Res = PromoteIntRes_BUILD_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS:
+ Res = PromoteIntRes_CONCAT_VECTORS(N); break;
+
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
@@ -102,6 +118,9 @@
case ISD::SMULO:
case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+ case ISD::ATOMIC_LOAD:
+ Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
+
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
@@ -124,6 +143,12 @@
SetPromotedInteger(SDValue(N, ResNo), Res);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetPromotedInteger(Op);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
// Sign-extend the new bits, and continue the assertion.
SDValue Op = SExtPromotedInteger(N->getOperand(0));
@@ -138,12 +163,26 @@
Op.getValueType(), Op, N->getOperand(1));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
+ EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), ResVT,
+ N->getChain(), N->getBasePtr(),
+ N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
SDValue Op2 = GetPromotedInteger(N->getOperand(2));
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
- Op2, N->getSrcValue(), N->getAlignment());
+ Op2, N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -155,14 +194,15 @@
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(), N->getChain(), N->getBasePtr(),
- Op2, Op3, N->getSrcValue(), N->getAlignment());
+ Op2, Op3, N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
@@ -174,26 +214,27 @@
default:
assert(false && "Unknown type action!");
break;
- case Legal:
+ case TargetLowering::TypeLegal:
break;
- case PromoteInteger:
+ case TargetLowering::TypePromoteInteger:
if (NOutVT.bitsEq(NInVT))
// The input promotes to the same size. Convert the promoted value.
- return DAG.getNode(ISD::BIT_CONVERT, dl,
- NOutVT, GetPromotedInteger(InOp));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
break;
- case SoftenFloat:
+ case TargetLowering::TypeSoftenFloat:
// Promote the integer operand by hand.
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
- case ExpandInteger:
- case ExpandFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
break;
- case ScalarizeVector:
+ case TargetLowering::TypeScalarizeVector:
// Convert the element to an integer and promote it by hand.
- return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
- BitConvertToInteger(GetScalarizedVector(InOp)));
- case SplitVector: {
- // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ break;
+ case TargetLowering::TypeSplitVector: {
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
// pieces of the input into integers and reassemble in the final type.
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
@@ -207,12 +248,12 @@
EVT::getIntegerVT(*DAG.getContext(),
NOutVT.getSizeInBits()),
JoinIntegers(Lo, Hi));
- return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
- case WidenVector:
+ case TargetLowering::TypeWidenVector:
if (OutVT.bitsEq(NInVT))
// The input is widened to the same size. Convert to the widened value.
- return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+ return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -293,7 +334,7 @@
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
APInt TopBit(NVT.getSizeInBits(), 0);
- TopBit.set(OVT.getSizeInBits());
+ TopBit.setBit(OVT.getSizeInBits());
Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
}
@@ -325,8 +366,8 @@
// (eg: because the value being converted is too big), then the result of the
// original operation was undefined anyway, so the assert is still correct.
return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
- ISD::AssertZext : ISD::AssertSext, dl,
- NVT, Res, DAG.getValueType(N->getValueType(0)));
+ ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+ DAG.getValueType(N->getValueType(0).getScalarType()));
}
SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
@@ -343,7 +384,8 @@
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
DebugLoc dl = N->getDebugLoc();
- if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
SDValue Res = GetPromotedInteger(N->getOperand(0));
assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
@@ -355,7 +397,8 @@
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
DAG.getValueType(N->getOperand(0).getValueType()));
if (N->getOpcode() == ISD::ZERO_EXTEND)
- return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+ return DAG.getZeroExtendInReg(Res, dl,
+ N->getOperand(0).getValueType().getScalarType());
assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
return Res;
}
@@ -371,8 +414,8 @@
ISD::LoadExtType ExtType =
ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
DebugLoc dl = N->getDebugLoc();
- SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(),
- N->getSrcValue(), N->getSrcValueOffset(),
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getPointerInfo(),
N->getMemoryVT(), N->isVolatile(),
N->isNonTemporal(), N->getAlignment());
@@ -442,6 +485,14 @@
LHS.getValueType(), N->getOperand(0),LHS,RHS);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
+ SDValue Mask = GetPromotedInteger(N->getOperand(0));
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
+ LHS.getValueType(), Mask, LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(2));
SDValue RHS = GetPromotedInteger(N->getOperand(3));
@@ -452,16 +503,24 @@
SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
- assert(isTypeLegal(SVT) && "Illegal SetCC type!");
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
DebugLoc dl = N->getDebugLoc();
+ assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+ "Vector compare must return a vector result!");
// Get the SETCC result using the canonical SETCC type.
- SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),
+ SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
N->getOperand(1), N->getOperand(2));
- // Convert to the expected type.
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+ // Convert to the expected type.
return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
}
@@ -505,20 +564,44 @@
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Res;
+ SDValue InOp = N->getOperand(0);
+ DebugLoc dl = N->getDebugLoc();
- switch (getTypeAction(N->getOperand(0).getValueType())) {
+ switch (getTypeAction(InOp.getValueType())) {
default: llvm_unreachable("Unknown type action!");
- case Legal:
- case ExpandInteger:
- Res = N->getOperand(0);
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypeExpandInteger:
+ Res = InOp;
break;
- case PromoteInteger:
- Res = GetPromotedInteger(N->getOperand(0));
+ case TargetLowering::TypePromoteInteger:
+ Res = GetPromotedInteger(InOp);
break;
+ case TargetLowering::TypeSplitVector:
+ EVT InVT = InOp.getValueType();
+ assert(InVT.isVector() && "Cannot split scalar types");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert(NumElts == NVT.getVectorNumElements() &&
+ "Dst and Src must have the same number of elements");
+ EVT EltVT = InVT.getScalarType();
+ assert(isPowerOf2_32(NumElts) &&
+ "Promoted vector type must be a power of two");
+
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2);
+ EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+ NumElts/2);
+
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+ DAG.getIntPtrConstant(0));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+ DAG.getIntPtrConstant(NumElts/2));
+ EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+ EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
}
// Truncate to NVT instead of VT
- return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
}
SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
@@ -549,6 +632,48 @@
return Res;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the
+ // input to the larger type, do the multiply, then check the high bits of
+ // the result to see if the overflow happened.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+
+ // Overflow occurred iff the high part of the result does not
+ // zero/sign-extend the low part.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred iff the high part is non-zero.
+ SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+ } else {
+ // Signed overflow occurred iff the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
@@ -602,11 +727,6 @@
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
- assert(ResNo == 1 && "Only boolean result promotion currently supported!");
- return PromoteIntRes_Overflow(N);
-}
-
//===----------------------------------------------------------------------===//
// Integer Operand Promotion
//===----------------------------------------------------------------------===//
@@ -631,11 +751,16 @@
llvm_unreachable("Do not know how to promote this operator's operand!");
case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
- case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break;
+ case ISD::ATOMIC_STORE:
+ Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
+ break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONVERT_RNDSAT:
Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
case ISD::INSERT_VECTOR_ELT:
@@ -643,6 +768,7 @@
case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
@@ -713,7 +839,14 @@
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ return DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(),
+ N->getOrdering(), N->getSynchScope());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
// This should only occur in unusual situations like bitcasting to an
// x86_fp80, so just turn it into a store+load
return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
@@ -835,14 +968,17 @@
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
- assert(OpNo == 0 && "Only know how to promote condition");
+ assert(OpNo == 0 && "Only know how to promote the condition!");
+ SDValue Cond = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
// Promote all the way up to the canonical SetCC type.
- EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
- SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
+ EVT SVT = TLI.getSetCCResultType(N->getOpcode() == ISD::SELECT ?
+ OpTy.getScalarType() : OpTy);
+ Cond = PromoteTargetBoolean(Cond, SVT);
- return SDValue(DAG.UpdateNodeOperands(N, Cond,
- N->getOperand(1), N->getOperand(2)), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
+ N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
@@ -889,7 +1025,6 @@
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -898,8 +1033,8 @@
SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
// Truncate the value and store the result.
- return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
- SVOffset, N->getMemoryVT(),
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(),
isVolatile, isNonTemporal, Alignment);
}
@@ -917,7 +1052,8 @@
DebugLoc dl = N->getDebugLoc();
SDValue Op = GetPromotedInteger(N->getOperand(0));
Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
- return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+ return DAG.getZeroExtendInReg(Op, dl,
+ N->getOperand(0).getValueType().getScalarType());
}
@@ -946,12 +1082,12 @@
#endif
llvm_unreachable("Do not know how to expand the result of this operator!");
- case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -977,6 +1113,24 @@
case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
case ISD::AND:
case ISD::OR:
@@ -999,6 +1153,8 @@
case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
case ISD::UADDO:
case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1006,11 +1162,98 @@
SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
}
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ break;
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
/// and the shift amount is a constant 'Amt'. Expand the operation.
void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi) {
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
// Expand the incoming operand to be shifted, so that we have its parts
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
@@ -1025,8 +1268,8 @@
Lo = Hi = DAG.getConstant(0, NVT);
} else if (Amt > NVTBits) {
Lo = DAG.getConstant(0, NVT);
- Hi = DAG.getNode(ISD::SHL, dl,
- NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
} else if (Amt == NVTBits) {
Lo = DAG.getConstant(0, NVT);
Hi = InL;
@@ -1034,17 +1277,17 @@
TLI.isOperationLegalOrCustom(ISD::ADDC,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
// Emit this X << 1 as X+X.
- SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
SDValue LoOps[2] = { InL, InL };
- Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
- Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
} else {
- Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
- Hi = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(NVTBits-Amt, ShTy)));
}
return;
@@ -1055,43 +1298,43 @@
Lo = DAG.getConstant(0, NVT);
Hi = DAG.getConstant(0, NVT);
} else if (Amt > NVTBits) {
- Lo = DAG.getNode(ISD::SRL, dl,
+ Lo = DAG.getNode(ISD::SRL, DL,
NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
Hi = DAG.getConstant(0, NVT);
} else if (Amt == NVTBits) {
Lo = InH;
Hi = DAG.getConstant(0, NVT);
} else {
- Lo = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(NVTBits-Amt, ShTy)));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
}
return;
}
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
if (Amt > VTBits) {
- Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else if (Amt > NVTBits) {
- Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(Amt-NVTBits, ShTy));
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else if (Amt == NVTBits) {
Lo = InH;
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else {
- Lo = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(NVTBits-Amt, ShTy)));
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
}
}
@@ -1269,7 +1512,7 @@
// Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
// them. TODO: Teach operation legalization how to expand unsupported
// ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
- // a carry of type MVT::Flag, but there doesn't seem to be any way to
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
// generate a value of this type in the expanded code sequence.
bool hasCarry =
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
@@ -1277,7 +1520,7 @@
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (hasCarry) {
- SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
HiOps[2] = Lo.getValue(1);
@@ -1287,31 +1530,32 @@
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
}
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
} else {
- if (N->getOpcode() == ISD::ADD) {
- Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
- SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
- ISD::SETULT);
- SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
- DAG.getConstant(1, NVT),
- DAG.getConstant(0, NVT));
- SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
- ISD::SETULT);
- SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
- DAG.getConstant(1, NVT), Carry1);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
- } else {
- Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
- SDValue Cmp =
- DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
- LoOps[0], LoOps[1], ISD::SETULT);
- SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
- DAG.getConstant(1, NVT),
- DAG.getConstant(0, NVT));
- Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
- }
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
@@ -1322,7 +1566,7 @@
DebugLoc dl = N->getDebugLoc();
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
- SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
@@ -1348,7 +1592,7 @@
DebugLoc dl = N->getDebugLoc();
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
- SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
SDValue HiOps[3] = { LHSH, RHSH };
@@ -1361,6 +1605,12 @@
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
+void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Res = DisintegrateMERGE_VALUES(N, ResNo);
+ SplitInteger(Res, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -1373,7 +1623,8 @@
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
- assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
"Only know how to promote this result!");
SDValue Res = GetPromotedInteger(Op);
assert(Res.getValueType() == N->getValueType(0) &&
@@ -1437,7 +1688,7 @@
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
- Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
}
@@ -1524,7 +1775,6 @@
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
ISD::LoadExtType ExtType = N->getExtensionType();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -1535,7 +1785,7 @@
if (N->getMemoryVT().bitsLE(NVT)) {
EVT MemVT = N->getMemoryVT();
- Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
MemVT, isVolatile, isNonTemporal, Alignment);
// Remember the chain.
@@ -1557,7 +1807,7 @@
}
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
- Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
@@ -1568,8 +1818,8 @@
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize, NEVT,
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1586,7 +1836,7 @@
unsigned ExcessBits = (EBytes - IncrementSize)*8;
// Load both the high bits and maybe some of the low bits.
- Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
isVolatile, isNonTemporal, Alignment);
@@ -1595,8 +1845,8 @@
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
// Load the rest of the low bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize,
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1891,7 +2141,8 @@
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
- assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
"Only know how to promote this result!");
SDValue Res = GetPromotedInteger(Op);
assert(Res.getValueType() == N->getValueType(0) &&
@@ -1987,6 +2238,86 @@
ReplaceValueWith(SDValue(N, 1), Ofl);
}
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+ EVT PtrVT = TLI.getPointerTy();
+ Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+ DebugLoc dl = N->getDebugLoc();
+
+ // A divide for UMULO should be faster than a function call.
+ if (N->getOpcode() == ISD::UMULO) {
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+ SplitInteger(MUL, Lo, Hi);
+
+ // A divide for UMULO will be faster than a function call. Select to
+ // make sure we aren't using 0.
+ SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ RHS, DAG.getConstant(0, VT), ISD::SETNE);
+ SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+ DAG.getConstant(1, VT), RHS);
+ SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
+ SDValue Overflow;
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
+
+ // Replace this with a libcall that will check overflow.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::MULO_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MULO_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MULO_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+ // Temporary for the overflow value, default it to zero.
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
+ DAG.getConstant(0, PtrVT), Temp,
+ MachinePointerInfo(), false, false, 0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = N->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = N->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the address of the overflow check.
+ Entry.Node = Temp;
+ Entry.Ty = PtrTy->getPointerTo();
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
+ 0, TLI.getLibcallCallingConv(LC), false,
+ true, Func, Args, DAG, dl);
+
+ SplitInteger(CallInfo.first, Lo, Hi);
+ SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
+ MachinePointerInfo(), false, false, 0);
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+ DAG.getConstant(0, PtrVT),
+ ISD::SETNE);
+ // Use the overflow from the libcall everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -2039,7 +2370,8 @@
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
- assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
"Only know how to promote this result!");
SDValue Res = GetPromotedInteger(Op);
assert(Res.getValueType() == N->getValueType(0) &&
@@ -2054,6 +2386,20 @@
}
}
+void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
+ N->getOperand(0),
+ N->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
+}
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
@@ -2078,7 +2424,7 @@
#endif
llvm_unreachable("Do not know how to expand this operator's operand!");
- case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -2098,6 +2444,8 @@
case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
case ISD::RETURNADDR:
case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+
+ case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2308,7 +2656,6 @@
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -2319,14 +2666,16 @@
if (N->getMemoryVT().bitsLE(NVT)) {
GetExpandedInteger(N->getValue(), Lo, Hi);
- return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getMemoryVT(), isVolatile, isNonTemporal,
Alignment);
- } else if (TLI.isLittleEndian()) {
+ }
+
+ if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
@@ -2337,50 +2686,49 @@
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize, NEVT,
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- } else {
- // Big-endian - high bits are at low addresses. Favor aligned stores at
- // the cost of some bit-fiddling.
- GetExpandedInteger(N->getValue(), Lo, Hi);
-
- EVT ExtVT = N->getMemoryVT();
- unsigned EBytes = ExtVT.getStoreSize();
- unsigned IncrementSize = NVT.getSizeInBits()/8;
- unsigned ExcessBits = (EBytes - IncrementSize)*8;
- EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
- ExtVT.getSizeInBits() - ExcessBits);
-
- if (ExcessBits < NVT.getSizeInBits()) {
- // Transfer high bits from the top of Lo to the bottom of Hi.
- Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
- DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
- TLI.getPointerTy()));
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SRL, dl, NVT, Lo,
- DAG.getConstant(ExcessBits,
- TLI.getPointerTy())));
- }
-
- // Store both the high bits and maybe some of the low bits.
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
- SVOffset, HiVT, isVolatile, isNonTemporal,
- Alignment);
-
- // Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
- // Store the lowest ExcessBits bits in the second half.
- Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize,
- EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, isNonTemporal,
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
+
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+ HiVT, isVolatile, isNonTemporal, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
@@ -2460,8 +2808,10 @@
// Load the value out, extending it from f32 to the destination float type.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(),
- FudgePtr, NULL, 0, MVT::f32,
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32,
false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
@@ -2472,3 +2822,200 @@
"Don't know how to expand this UINT_TO_FP!");
return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(),
+ N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+ return Swap.getValue(1);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned OutNumElems = OutVT.getVectorNumElements();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue BaseIdx = N->getOperand(1);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(OutNumElems);
+ for (unsigned i = 0; i != OutNumElems; ++i) {
+
+ // Extract the element from the original vector.
+ SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+ BaseIdx, DAG.getIntPtrConstant(i));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InVT.getVectorElementType(), N->getOperand(0), Index);
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+ // Insert the converted element to the new vector.
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ NewMask.push_back(SV->getMaskElt(i));
+ }
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumElems = N->getNumOperands();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(NumElems);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(!N->getOperand(0).getValueType().isVector() &&
+ "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Op0 = N->getOperand(1);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType() &&
+ "Invalid input vector types");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT OutElemTy = NOutVT.getVectorElementType();
+
+ unsigned NumElem0 = Op0.getValueType().getVectorNumElements();
+ unsigned NumElem1 = Op1.getValueType().getVectorNumElements();
+ unsigned NumOutElem = NOutVT.getVectorNumElements();
+ assert(NumElem0 + NumElem1 == NumOutElem &&
+ "Invalid number of incoming elements");
+
+ // Take the elements from the first vector.
+ SmallVector<SDValue, 8> Ops(NumOutElem);
+ for (unsigned i = 0; i < NumElem0; ++i) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ Op0.getValueType().getScalarType(), Op0,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
+
+ // Take the elements from the second vector
+ for (unsigned i = 0; i < NumElem1; ++i) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ Op1.getValueType().getScalarType(), Op1,
+ DAG.getIntPtrConstant(i));
+ Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+
+ SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+ NOutVTElem, N->getOperand(1));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT,
+ V0, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = N->getOperand(1);
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ V0->getValueType(0).getScalarType(), V0, V1);
+
+ // EXTRACT_VECTOR_ELT can return types which are wider than the incoming
+ // element types. If this is the case then we need to expand the outgoing
+ // value and not truncate it.
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElems = N->getNumOperands();
+
+ EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+ SmallVector<SDValue, 8> NewOps;
+ NewOps.reserve(NumElems);
+
+ // For each incoming vector
+ for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) {
+ SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+ EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+ unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+ for (unsigned i=0; i<NumElem; ++i) {
+ // Extract element from incoming vector
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
+ Incoming, DAG.getIntPtrConstant(i));
+ SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+ NewOps.push_back(Tr);
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0),
+ &NewOps[0], NewOps.size());
+ }
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 6e56c98..a4bb577 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -224,38 +224,38 @@
switch (getTypeAction(ResultVT)) {
default:
assert(false && "Unknown action!");
- case Legal:
+ case TargetLowering::TypeLegal:
break;
// The following calls must take care of *all* of the node's results,
// not just the illegal result they were passed (this includes results
// with a legal type). Results can be remapped using ReplaceValueWith,
// or their promoted/expanded/etc values registered in PromotedIntegers,
// ExpandedIntegers etc.
- case PromoteInteger:
+ case TargetLowering::TypePromoteInteger:
PromoteIntegerResult(N, i);
Changed = true;
goto NodeDone;
- case ExpandInteger:
+ case TargetLowering::TypeExpandInteger:
ExpandIntegerResult(N, i);
Changed = true;
goto NodeDone;
- case SoftenFloat:
+ case TargetLowering::TypeSoftenFloat:
SoftenFloatResult(N, i);
Changed = true;
goto NodeDone;
- case ExpandFloat:
+ case TargetLowering::TypeExpandFloat:
ExpandFloatResult(N, i);
Changed = true;
goto NodeDone;
- case ScalarizeVector:
+ case TargetLowering::TypeScalarizeVector:
ScalarizeVectorResult(N, i);
Changed = true;
goto NodeDone;
- case SplitVector:
+ case TargetLowering::TypeSplitVector:
SplitVectorResult(N, i);
Changed = true;
goto NodeDone;
- case WidenVector:
+ case TargetLowering::TypeWidenVector:
WidenVectorResult(N, i);
Changed = true;
goto NodeDone;
@@ -277,36 +277,36 @@
switch (getTypeAction(OpVT)) {
default:
assert(false && "Unknown action!");
- case Legal:
+ case TargetLowering::TypeLegal:
continue;
// The following calls must either replace all of the node's results
// using ReplaceValueWith, and return "false"; or update the node's
// operands in place, and return "true".
- case PromoteInteger:
+ case TargetLowering::TypePromoteInteger:
NeedsReanalyzing = PromoteIntegerOperand(N, i);
Changed = true;
break;
- case ExpandInteger:
+ case TargetLowering::TypeExpandInteger:
NeedsReanalyzing = ExpandIntegerOperand(N, i);
Changed = true;
break;
- case SoftenFloat:
+ case TargetLowering::TypeSoftenFloat:
NeedsReanalyzing = SoftenFloatOperand(N, i);
Changed = true;
break;
- case ExpandFloat:
+ case TargetLowering::TypeExpandFloat:
NeedsReanalyzing = ExpandFloatOperand(N, i);
Changed = true;
break;
- case ScalarizeVector:
+ case TargetLowering::TypeScalarizeVector:
NeedsReanalyzing = ScalarizeVectorOperand(N, i);
Changed = true;
break;
- case SplitVector:
+ case TargetLowering::TypeSplitVector:
NeedsReanalyzing = SplitVectorOperand(N, i);
Changed = true;
break;
- case WidenVector:
+ case TargetLowering::TypeWidenVector:
NeedsReanalyzing = WidenVectorOperand(N, i);
Changed = true;
break;
@@ -714,6 +714,11 @@
if (M->getNodeId() == Processed)
RemapValue(NewVal);
DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ ReplacedValues[OldVal] = NewVal;
}
// The original node continues to exist in the DAG, marked NewNode.
}
@@ -858,7 +863,7 @@
/// BitConvertToInteger - Convert to an integer of the same size.
SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
unsigned BitWidth = Op.getValueType().getSizeInBits();
- return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
}
@@ -869,7 +874,7 @@
unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
unsigned NumElts = Op.getValueType().getVectorNumElements();
- return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
}
@@ -880,10 +885,11 @@
// the source and destination types.
SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Result is a load from the stack slot.
- return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, 0);
}
/// CustomLowerNode - Replace the node's results with custom code provided
@@ -940,6 +946,13 @@
return true;
}
+SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ if (i != ResNo)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+ return SDValue(N, ResNo);
+}
+
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
/// which is split into two not necessarily identical pieces.
void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
@@ -1040,7 +1053,7 @@
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, TLI.getLibcallCallingConv(LC), false,
@@ -1049,29 +1062,46 @@
return CallInfo.first;
}
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+
+ return CallInfo;
+}
+
/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
/// of the given type. A target boolean is an integer value, not necessarily of
/// type i1, the bits of which conform to getBooleanContents.
SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
DebugLoc dl = Bool.getDebugLoc();
- ISD::NodeType ExtendCode;
- switch (TLI.getBooleanContents()) {
- default:
- assert(false && "Unknown BooleanContent!");
- case TargetLowering::UndefinedBooleanContent:
- // Extend to VT by adding rubbish bits.
- ExtendCode = ISD::ANY_EXTEND;
- break;
- case TargetLowering::ZeroOrOneBooleanContent:
- // Extend to VT by adding zero bits.
- ExtendCode = ISD::ZERO_EXTEND;
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent: {
- // Extend to VT by copying the sign bit.
- ExtendCode = ISD::SIGN_EXTEND;
- break;
- }
- }
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector()));
return DAG.getNode(ExtendCode, dl, VT, Bool);
}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bd86694..abacdac 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -57,16 +57,6 @@
// 1+ - This is a node which has this many unprocessed operands.
};
private:
- enum LegalizeAction {
- Legal, // The target natively supports this type.
- PromoteInteger, // Replace this integer type with a larger one.
- ExpandInteger, // Split this integer type into two of half the size.
- SoftenFloat, // Convert this float type to a same size integer type.
- ExpandFloat, // Split this float type into two of half the size.
- ScalarizeVector, // Replace this one-element vector with its element type.
- SplitVector, // Split this vector type into two of half the size.
- WidenVector // This vector type should be widened into a larger vector.
- };
/// ValueTypeActions - This is a bitvector that contains two bits for each
/// simple value type, where the two bits correspond to the LegalizeAction
@@ -74,44 +64,13 @@
TargetLowering::ValueTypeActionImpl ValueTypeActions;
/// getTypeAction - Return how we should legalize values of this type.
- LegalizeAction getTypeAction(EVT VT) const {
- switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) {
- default:
- assert(false && "Unknown legalize action!");
- case TargetLowering::Legal:
- return Legal;
- case TargetLowering::Promote:
- // Promote can mean
- // 1) For integers, use a larger integer type (e.g. i8 -> i32).
- // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
- if (!VT.isVector())
- return PromoteInteger;
- else
- return WidenVector;
- case TargetLowering::Expand:
- // Expand can mean
- // 1) split scalar in half, 2) convert a float to an integer,
- // 3) scalarize a single-element vector, 4) split a vector in two.
- if (!VT.isVector()) {
- if (VT.isInteger())
- return ExpandInteger;
- else if (VT.getSizeInBits() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
- return SoftenFloat;
- else
- return ExpandFloat;
- } else if (VT.getVectorNumElements() == 1) {
- return ScalarizeVector;
- } else {
- return SplitVector;
- }
- }
+ TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT);
}
/// isTypeLegal - Return true if this type is legal on this target.
bool isTypeLegal(EVT VT) const {
- return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) ==
- TargetLowering::Legal);
+ return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
}
/// IgnoreNodeResults - Pretend all of this node's results are legal.
@@ -189,12 +148,23 @@
SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
bool CustomWidenLowerNode(SDNode *N, EVT VT);
+
+ /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES
+ /// node with the corresponding input operand, except for the result 'ResNo',
+ /// which is returned.
+ SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
+
SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
const SDValue *Ops, unsigned NumOps, bool isSigned,
DebugLoc dl);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
@@ -238,16 +208,24 @@
EVT OldVT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
Op = GetPromotedInteger(Op);
- return DAG.getZeroExtendInReg(Op, dl, OldVT);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
}
// Integer Result Promotion.
void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_AssertSext(SDNode *N);
SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
- SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
@@ -264,6 +242,7 @@
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_SDIV(SDNode *N);
SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
SDValue PromoteIntRes_SHL(SDNode *N);
@@ -281,18 +260,23 @@
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
- SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
@@ -318,6 +302,8 @@
// Integer Result Expansion.
void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -347,6 +333,9 @@
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi);
@@ -355,7 +344,7 @@
// Integer Operand Expansion.
bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
- SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+ SDValue ExpandIntOp_BITCAST(SDNode *N);
SDValue ExpandIntOp_BR_CC(SDNode *N);
SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
@@ -367,6 +356,7 @@
SDValue ExpandIntOp_TRUNCATE(SDNode *N);
SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+ SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, DebugLoc dl);
@@ -390,7 +380,8 @@
// Result Float to Integer Conversion.
void SoftenFloatResult(SDNode *N, unsigned OpNo);
- SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
@@ -406,6 +397,7 @@
SDValue SoftenFloatRes_FLOG(SDNode *N);
SDValue SoftenFloatRes_FLOG2(SDNode *N);
SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMA(SDNode *N);
SDValue SoftenFloatRes_FMUL(SDNode *N);
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
SDValue SoftenFloatRes_FNEG(SDNode *N);
@@ -429,7 +421,7 @@
// Operand Float to Integer Conversion.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
- SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
@@ -470,6 +462,7 @@
void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -514,13 +507,15 @@
// Vector Result Scalarization: <1 x ty> -> ty.
void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_BinOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
- SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@@ -535,7 +530,7 @@
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
- SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -560,11 +555,10 @@
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -580,10 +574,13 @@
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
- SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+ SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_VSETCC(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -605,7 +602,8 @@
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
- SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+ SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
@@ -630,7 +628,7 @@
// Widen Vector Operand.
bool WidenVectorOperand(SDNode *N, unsigned ResNo);
- SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -701,7 +699,8 @@
void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
// Generic Result Splitting.
- void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -723,7 +722,9 @@
}
// Generic Result Expansion.
- void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -731,7 +732,7 @@
void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
// Generic Operand Expansion.
- SDValue ExpandOp_BIT_CONVERT (SDNode *N);
+ SDValue ExpandOp_BITCAST (SDNode *N);
SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 9c2b1d9..8e7e498 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -31,9 +31,13 @@
// These routines assume that the Lo/Hi part is stored first in memory on
// little/big-endian machines, followed by the Hi/Lo part. This means that
// they cannot be used as is on vectors, for which Lo is always stored first.
+void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetExpandedOp(Op, Lo, Hi);
+}
-void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
SDValue InOp = N->getOperand(0);
@@ -44,37 +48,37 @@
switch (getTypeAction(InVT)) {
default:
assert(false && "Unknown type action!");
- case Legal:
- case PromoteInteger:
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
break;
- case SoftenFloat:
+ case TargetLowering::TypeSoftenFloat:
// Convert the integer operand instead.
SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- case ExpandInteger:
- case ExpandFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
// Convert the expanded pieces of the input.
GetExpandedOp(InOp, Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- case SplitVector:
+ case TargetLowering::TypeSplitVector:
GetSplitVector(InOp, Lo, Hi);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- case ScalarizeVector:
+ case TargetLowering::TypeScalarizeVector:
// Convert the element instead.
SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- case WidenVector: {
- assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+ case TargetLowering::TypeWidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
InVT.getVectorNumElements()/2);
@@ -84,19 +88,19 @@
DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
}
}
if (InVT.isVector() && OutVT.isInteger()) {
- // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
// is legal but the result is not.
EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
if (isTypeLegal(NVT)) {
- SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
DAG.getIntPtrConstant(0));
Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
@@ -119,14 +123,14 @@
getTypeForEVT(*DAG.getContext()));
SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
false, false, 0);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +138,8 @@
DAG.getIntPtrConstant(IncrementSize));
// Load the second half from the stack slot.
- Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), false,
false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
@@ -172,7 +177,7 @@
EVT OldVT = N->getValueType(0);
EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
- SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
EVT::getVectorVT(*DAG.getContext(),
NewVT, 2*OldElts),
OldVec);
@@ -204,22 +209,21 @@
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
- Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset+IncrementSize,
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -262,14 +266,14 @@
// Generic Operand Expansion.
//===--------------------------------------------------------------------===//
-SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
if (N->getValueType(0).isVector()) {
// An illegal expanding type is being converted to a legal vector type.
// Make a two element vector out of the expanded parts and convert that
// instead, but only if the new vector type is legal (otherwise there
// is no point, and it might create expansion loops). For example, on
- // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
EVT OVT = N->getOperand(0).getValueType();
EVT NVT = EVT::getVectorVT(*DAG.getContext(),
TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
@@ -283,7 +287,7 @@
std::swap(Parts[0], Parts[1]);
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
}
@@ -322,7 +326,7 @@
&NewElts[0], NewElts.size());
// Convert the new vector to the old vector type.
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
}
SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
@@ -347,7 +351,7 @@
// Bitconvert to a vector of twice the length with elements of the expanded
// type, insert the expanded vector elements, and then convert back.
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
- SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
NewVecVT, N->getOperand(0));
SDValue Lo, Hi;
@@ -363,7 +367,7 @@
NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
// Convert the new vector to the old vector type.
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
}
SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
@@ -390,7 +394,6 @@
St->getValue().getValueType());
SDValue Chain = St->getChain();
SDValue Ptr = St->getBasePtr();
- int SVOffset = St->getSrcValueOffset();
unsigned Alignment = St->getAlignment();
bool isVolatile = St->isVolatile();
bool isNonTemporal = St->isNonTemporal();
@@ -404,14 +407,14 @@
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
- Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
- SVOffset + IncrementSize,
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ St->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -428,37 +431,34 @@
// bytes; for integers and floats it is Lo first if and only if the machine is
// little-endian).
-void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi) {
- // A MERGE_VALUES node can produce any number of values. We know that the
- // first illegal one needs to be expanded into Lo/Hi.
- unsigned i;
-
- // The string of legal results gets turned into input operands, which have
- // the same type.
- for (i = 0; isTypeLegal(N->getValueType(i)); ++i)
- ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
-
- // The first illegal result must be the one that needs to be expanded.
- GetSplitOp(N->getOperand(i), Lo, Hi);
-
- // Legalize the rest of the results into the input operands whether they are
- // legal or not.
- unsigned e = N->getNumValues();
- for (++i; i != e; ++i)
- ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetSplitOp(Op, Lo, Hi);
}
void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue LL, LH, RL, RH;
+ SDValue LL, LH, RL, RH, CL, CH;
DebugLoc dl = N->getDebugLoc();
GetSplitOp(N->getOperand(1), LL, LH);
GetSplitOp(N->getOperand(2), RL, RH);
SDValue Cond = N->getOperand(0);
- Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);
- Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);
+ CL = CH = Cond;
+ if (Cond.getValueType().isVector()) {
+ assert(Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ "Condition legalized before result?");
+ unsigned NumElements = Cond.getValueType().getVectorNumElements();
+ EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
+ CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getIntPtrConstant(0));
+ CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+ DAG.getIntPtrConstant(NumElements / 2));
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH);
}
void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 621c087..f815b00 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -58,6 +58,12 @@
SDValue UnrollVSETCC(SDValue Op);
// Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
// isn't legal.
+ // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ // SINT_TO_FLOAT and SHR on vectors isn't legal.
+ SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+ // Implement vselect in terms of XOR, AND, OR when blend is not supported
+ // by the target.
+ SDValue ExpandVSELECT(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
// Implements vector promotion; this is essentially just bitcasting the
// operands to a different type and bitcasting the result back to the
@@ -154,8 +160,9 @@
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::SELECT:
+ case ISD::VSELECT:
case ISD::SELECT_CC:
- case ISD::VSETCC:
+ case ISD::SETCC:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::TRUNCATE:
@@ -179,9 +186,9 @@
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FFLOOR:
+ case ISD::SIGN_EXTEND_INREG:
QueryType = Node->getValueType(0);
break;
- case ISD::SIGN_EXTEND_INREG:
case ISD::FP_ROUND_INREG:
QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
break;
@@ -207,9 +214,13 @@
// FALL THROUGH
}
case TargetLowering::Expand:
- if (Node->getOpcode() == ISD::FNEG)
+ if (Node->getOpcode() == ISD::VSELECT)
+ Result = ExpandVSELECT(Op);
+ else if (Node->getOpcode() == ISD::UINT_TO_FP)
+ Result = ExpandUINT_TO_FLOAT(Op);
+ else if (Node->getOpcode() == ISD::FNEG)
Result = ExpandFNEG(Op);
- else if (Node->getOpcode() == ISD::VSETCC)
+ else if (Node->getOpcode() == ISD::SETCC)
Result = UnrollVSETCC(Op);
else
Result = DAG.UnrollVectorOp(Op.getNode());
@@ -241,16 +252,90 @@
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
- Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
+SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
+ // Implement VSELECT in terms of XOR, AND, OR
+ // on platforms which do not support blend natively.
+ EVT VT = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::OR, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
+ && "Invalid mask size");
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
+ return DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
+}
+
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+ EVT VT = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Make sure that the SINT_TO_FP and SRL instructions are available.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ EVT SVT = VT.getScalarType();
+ assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
+ "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+ unsigned BW = SVT.getSizeInBits();
+ SDValue HalfWord = DAG.getConstant(BW/2, VT);
+
+ // Constants to clear the upper part of the word.
+ // Notice that we can also use SHL+SHR, but using a constant is slightly
+ // faster on x86.
+ uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+ SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+
+ // Two to the power of half-word-size.
+ SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+
+ // Clear upper part of LO, lower HI
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+ // Add the two halves
+ return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+
SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 93aeff5..107a42b 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -44,12 +44,15 @@
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to scalarize the result of this operator!");
+ report_fatal_error("Do not know how to scalarize the result of this "
+ "operator!\n");
- case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break;
+ case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
@@ -61,29 +64,33 @@
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
- case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break;
-
+ case ISD::ANY_EXTEND:
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::FABS:
+ case ISD::FCEIL:
case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
case ISD::FNEG:
+ case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FRINT:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
- case ISD::FFLOOR:
- case ISD::FCEIL:
- case ISD::FRINT:
- case ISD::FNEARBYINT:
- case ISD::UINT_TO_FP:
+ case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
- case ISD::SIGN_EXTEND:
+ case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
R = ScalarizeVecRes_UnaryOp(N);
break;
@@ -122,9 +129,15 @@
LHS.getValueType(), LHS, RHS);
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetScalarizedVector(Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
NewVT, N->getOperand(0));
}
@@ -145,6 +158,13 @@
N->getOperand(0), N->getOperand(1));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ NewVT, Op, N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
@@ -171,7 +191,7 @@
N->getDebugLoc(),
N->getChain(), N->getBasePtr(),
DAG.getUNDEF(N->getBasePtr().getValueType()),
- N->getSrcValue(), N->getSrcValueOffset(),
+ N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->getOriginalAlignment());
@@ -223,6 +243,12 @@
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+
+ if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
+
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
DebugLoc DL = N->getDebugLoc();
@@ -245,35 +271,23 @@
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
EVT NVT = N->getValueType(0).getVectorElementType();
- EVT SVT = TLI.getSetCCResultType(LHS.getValueType());
DebugLoc DL = N->getDebugLoc();
// Turn it into a scalar SETCC.
- SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2));
-
- // VSETCC always returns a sign-extended value, while SETCC may not. The
- // SETCC result type may not match the vector element type. Correct these.
- if (NVT.bitsLE(SVT)) {
- // The SETCC result type is bigger than the vector element type.
- // Ensure the SETCC result is sign-extended.
- if (TLI.getBooleanContents() !=
- TargetLowering::ZeroOrNegativeOneBooleanContent)
- Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res,
- DAG.getValueType(MVT::i1));
- // Truncate to the final type.
- return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res);
- }
-
- // The SETCC result type is smaller than the vector element type.
- // If the SetCC result is not sign-extended, chop it down to MVT::i1.
- if (TLI.getBooleanContents() !=
- TargetLowering::ZeroOrNegativeOneBooleanContent)
- Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res);
- // Sign extend to the final type.
- return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res);
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
+ return DAG.getNode(ExtendCode, DL, NVT, Res);
}
@@ -296,8 +310,8 @@
dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to scalarize this operator's operand!");
- case ISD::BIT_CONVERT:
- Res = ScalarizeVecOp_BIT_CONVERT(N);
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
@@ -326,11 +340,11 @@
return false;
}
-/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
/// to be scalarized, it must be <1 x ty>. Convert the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
SDValue Elt = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
N->getValueType(0), Elt);
}
@@ -365,14 +379,13 @@
if (N->isTruncatingStore())
return DAG.getTruncStore(N->getChain(), dl,
GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(),
- N->getSrcValue(), N->getSrcValueOffset(),
+ N->getBasePtr(), N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->getAlignment());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+ N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
N->getOriginalAlignment());
}
@@ -402,15 +415,14 @@
#endif
llvm_unreachable("Do not know how to split the result of this operator!");
- case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::VSELECT:
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
-
- case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
- case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
@@ -421,39 +433,41 @@
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
case ISD::SETCC:
- case ISD::VSETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
case ISD::VECTOR_SHUFFLE:
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
- case ISD::CTTZ:
+ case ISD::ANY_EXTEND:
+ case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
case ISD::CTPOP:
- case ISD::FNEG:
+ case ISD::CTTZ:
case ISD::FABS:
- case ISD::FSQRT:
- case ISD::FSIN:
- case ISD::FCOS:
- case ISD::FTRUNC:
- case ISD::FFLOOR:
case ISD::FCEIL:
- case ISD::FRINT:
- case ISD::FNEARBYINT:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- case ISD::TRUNCATE:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
+ case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FFLOOR:
case ISD::FLOG:
- case ISD::FLOG2:
case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -497,8 +511,8 @@
Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
}
-void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
// We know the result is a vector. The input may be either a vector or a
// scalar value.
EVT LoVT, HiVT;
@@ -510,15 +524,14 @@
// Handle some special cases efficiently.
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
- case Legal:
- case PromoteInteger:
- case SoftenFloat:
- case ScalarizeVector:
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeWidenVector:
break;
- case ExpandInteger:
- case ExpandFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
// A scalar to vector conversion, where the scalar needs expansion.
// If the vector is being split in two then we can just convert the
// expanded pieces.
@@ -526,17 +539,17 @@
GetExpandedOp(InOp, Lo, Hi);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
break;
- case SplitVector:
+ case TargetLowering::TypeSplitVector:
// If the input is a vector that needs to be split, convert each split
// piece of the input now.
GetSplitVector(InOp, Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
@@ -550,8 +563,8 @@
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
}
void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
@@ -588,74 +601,19 @@
Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
}
-void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- EVT LoVT, HiVT;
- DebugLoc dl = N->getDebugLoc();
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
-
- SDValue DTyOpLo = DAG.getValueType(LoVT);
- SDValue DTyOpHi = DAG.getValueType(HiVT);
-
- SDValue RndOp = N->getOperand(3);
- SDValue SatOp = N->getOperand(4);
- ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
-
- // Split the input.
- SDValue VLo, VHi;
- EVT InVT = N->getOperand(0).getValueType();
- switch (getTypeAction(InVT)) {
- default: llvm_unreachable("Unexpected type action!");
- case Legal: {
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
- DAG.getIntPtrConstant(0));
- VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- case SplitVector:
- GetSplitVector(N->getOperand(0), VLo, VHi);
- break;
- case WidenVector: {
- // If the result needs to be split and the input needs to be widened,
- // the two types must have different lengths. Use the widened result
- // and extract from it to do the split.
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
- VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- }
-
- SDValue STyOpLo = DAG.getValueType(VLo.getValueType());
- SDValue STyOpHi = DAG.getValueType(VHi.getValueType());
-
- Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp,
- CvtCode);
- Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp,
- CvtCode);
-}
-
void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
SDValue Idx = N->getOperand(1);
- EVT IdxVT = Idx.getValueType();
DebugLoc dl = N->getDebugLoc();
EVT LoVT, HiVT;
GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
- Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
- DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -705,20 +663,20 @@
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment =
TLI.getTargetData()->getPrefTypeAlignment(VecType);
- Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the Lo part from the stack slot.
- Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
false, false, 0);
// Increment the pointer to the other part.
@@ -727,8 +685,8 @@
DAG.getIntPtrConstant(IncrementSize));
// Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
- false, MinAlign(Alignment, IncrementSize));
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -751,8 +709,6 @@
SDValue Ch = LD->getChain();
SDValue Ptr = LD->getBasePtr();
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
- const Value *SV = LD->getSrcValue();
- int SVOffset = LD->getSrcValueOffset();
EVT MemoryVT = LD->getMemoryVT();
unsigned Alignment = LD->getOriginalAlignment();
bool isVolatile = LD->isVolatile();
@@ -762,14 +718,15 @@
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
- SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
+ LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+ Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- SVOffset += IncrementSize;
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
- SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVolatile, isNonTemporal, Alignment);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -782,6 +739,10 @@
}
void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
EVT LoVT, HiVT;
DebugLoc DL = N->getDebugLoc();
GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
@@ -816,7 +777,7 @@
EVT InVT = N->getOperand(0).getValueType();
switch (getTypeAction(InVT)) {
default: llvm_unreachable("Unexpected type action!");
- case Legal: {
+ case TargetLowering::TypeLegal: {
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
LoVT.getVectorNumElements());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
@@ -825,10 +786,21 @@
DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
break;
}
- case SplitVector:
+ case TargetLowering::TypePromoteInteger: {
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
+ InOp.getValueType().getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case TargetLowering::TypeSplitVector:
GetSplitVector(N->getOperand(0), Lo, Hi);
break;
- case WidenVector: {
+ case TargetLowering::TypeWidenVector: {
// If the result needs to be split and the input needs to be widened,
// the two types must have different lengths. Use the widened result
// and extract from it to do the split.
@@ -843,8 +815,25 @@
}
}
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ if (N->getOpcode() == ISD::FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+ SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ }
}
void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
@@ -979,10 +968,12 @@
dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to split this operator's operand!");
-
- case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -990,10 +981,12 @@
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
+ case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::FTRUNC:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@@ -1035,8 +1028,8 @@
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
-SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
- // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
// end up being split all the way down to individual components. Convert the
// split pieces into integers and reassemble.
SDValue Lo, Hi;
@@ -1047,13 +1040,12 @@
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
JoinIntegers(Lo, Hi));
}
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
- // We know that the extracted result type is legal. For now, assume the index
- // is a constant.
+ // We know that the extracted result type is legal.
EVT SubVT = N->getValueType(0);
SDValue Idx = N->getOperand(1);
DebugLoc dl = N->getDebugLoc();
@@ -1091,34 +1083,30 @@
return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
return SDValue(DAG.UpdateNodeOperands(N, Hi,
DAG.getConstant(IdxVal - LoElts,
- Idx.getValueType())),
- 0);
+ Idx.getValueType())), 0);
}
// Store the vector to the stack.
EVT EltVT = VecVT.getVectorElementType();
DebugLoc dl = N->getDebugLoc();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr,
- SV, 0, EltVT, false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo(), EltVT, false, false, 0);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
assert(N->isUnindexed() && "Indexed store of vector?");
assert(OpNo == 1 && "Can only split the stored value");
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
bool isTruncating = N->isTruncatingStore();
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
bool isVol = N->isVolatile();
@@ -1132,27 +1120,90 @@
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
- Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
LoMemVT, isVol, isNT, Alignment);
else
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
isVol, isNT, Alignment);
// Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- SVOffset += IncrementSize;
if (isTruncating)
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
HiMemVT, isVol, isNT, Alignment);
else
- Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
isVol, isNT, Alignment);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // The input operands all must have the same type, and we know the result the
+ // result type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ SDValue Op = N->getOperand(op);
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ Op, DAG.getIntPtrConstant(i)));
+
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
+ &Elts[0], Elts.size());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ // The result has a legal vector type, but the input needs splitting.
+ SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo0, Hi0);
+ GetSplitVector(N->getOperand(1), Lo1, Hi1);
+ unsigned PartElements = Lo0.getValueType().getVectorNumElements();
+ EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
+ EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
+ return PromoteTargetBoolean(Con, N->getValueType(0));
+}
+
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+
//===----------------------------------------------------------------------===//
// Result Vector Widening
@@ -1177,7 +1228,8 @@
#endif
llvm_unreachable("Do not know how to widen the result of this operator!");
- case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break;
+ case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
@@ -1194,10 +1246,6 @@
case ISD::VECTOR_SHUFFLE:
Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
break;
- case ISD::VSETCC:
- Res = WidenVecRes_VSETCC(N);
- break;
-
case ISD::ADD:
case ISD::AND:
case ISD::BSWAP:
@@ -1231,15 +1279,16 @@
Res = WidenVecRes_Shift(N);
break;
+ case ISD::ANY_EXTEND:
+ case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
Res = WidenVecRes_Convert(N);
break;
@@ -1247,15 +1296,20 @@
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::FABS:
+ case ISD::FCEIL:
case ISD::FCOS:
- case ISD::FNEG:
- case ISD::FSIN:
- case ISD::FSQRT:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FFLOOR:
case ISD::FLOG:
- case ISD::FLOG2:
case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
Res = WidenVecRes_Unary(N);
break;
}
@@ -1273,9 +1327,9 @@
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
- while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
- NumElts = NumElts / 2;
- VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
@@ -1283,129 +1337,128 @@
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
- } else if (NumElts == 1) {
- // No legal vector version so unroll the vector operation and then widen.
- return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
- } else {
- // Since the operation can trap, apply operation on the original vector.
- EVT MaxVT = VT;
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
-
- SmallVector<SDValue, 16> ConcatOps(CurNumElts);
- unsigned ConcatEnd = 0; // Current ConcatOps index.
- int Idx = 0; // Current Idx into input vectors.
-
- // NumElts := greatest synthesizable vector size (at most WidenVT)
- // while (orig. vector has unhandled elements) {
- // take munches of size NumElts from the beginning and add to ConcatOps
- // NumElts := next smaller supported vector size or 1
- // }
- while (CurNumElts != 0) {
- while (CurNumElts >= NumElts) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
- DAG.getIntPtrConstant(Idx));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
- DAG.getIntPtrConstant(Idx));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
- Idx += NumElts;
- CurNumElts -= NumElts;
- }
- do {
- NumElts = NumElts / 2;
- VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
-
- if (NumElts == 1) {
- for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp1, DAG.getIntPtrConstant(Idx));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp2, DAG.getIntPtrConstant(Idx));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
- EOp1, EOp2);
- }
- CurNumElts = 0;
- }
- }
-
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
-
- // while (Some element of ConcatOps is not of type MaxVT) {
- // From the end of ConcatOps, collect elements of the same type and put
- // them into an op of the next larger supported type
- // }
- while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
- Idx = ConcatEnd - 1;
- VT = ConcatOps[Idx--].getValueType();
- while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
- Idx--;
-
- int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
- EVT NextVT;
- do {
- NextSize *= 2;
- NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
- } while (!TLI.isTypeSynthesizable(NextVT));
-
- if (!VT.isVector()) {
- // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
- SDValue VecOp = DAG.getUNDEF(NextVT);
- unsigned NumToInsert = ConcatEnd - Idx - 1;
- for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
- ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
- }
- ConcatOps[Idx+1] = VecOp;
- ConcatEnd = Idx + 2;
- }
- else {
- // Vector type, create a CONCAT_VECTORS of type NextVT
- SDValue undefVec = DAG.getUNDEF(VT);
- unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
- SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
- unsigned RealVals = ConcatEnd - Idx - 1;
- unsigned SubConcatEnd = 0;
- unsigned SubConcatIdx = Idx + 1;
- while (SubConcatEnd < RealVals)
- SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
- while (SubConcatEnd < OpsToConcat)
- SubConcatOps[SubConcatEnd++] = undefVec;
- ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
- NextVT, &SubConcatOps[0],
- OpsToConcat);
- ConcatEnd = SubConcatIdx + 1;
- }
- }
-
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
-
- // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
- unsigned NumOps =
- WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
- if (NumOps != ConcatEnd ) {
- SDValue UndefVal = DAG.getUNDEF(MaxVT);
- for (unsigned j = ConcatEnd; j < NumOps; ++j)
- ConcatOps[j] = UndefVal;
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
}
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, &SubConcatOps[0],
+ OpsToConcat);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InOp = N->getOperand(0);
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1417,15 +1470,18 @@
unsigned Opcode = N->getOpcode();
unsigned InVTNumElts = InVT.getVectorNumElements();
- if (getTypeAction(InVT) == WidenVector) {
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts)
- return DAG.getNode(Opcode, dl, WidenVT, InOp);
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ }
}
- if (TLI.isTypeSynthesizable(InWidenVT)) {
+ if (TLI.isTypeLegal(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1439,16 +1495,20 @@
SDValue UndefVal = DAG.getUNDEF(InVT);
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(Opcode, dl, WidenVT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
- &Ops[0], NumConcat));
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+ &Ops[0], NumConcat);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
}
if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0));
// Extract the input and convert the shorten input vector.
- return DAG.getNode(Opcode, dl, WidenVT,
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
- InOp, DAG.getIntPtrConstant(0)));
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
}
}
@@ -1457,16 +1517,20 @@
EVT EltVT = WidenVT.getVectorElementType();
unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
unsigned i;
- for (i=0; i < MinElts; ++i)
- Ops[i] = DAG.getNode(Opcode, dl, EltVT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getIntPtrConstant(i)));
+ for (i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ }
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
}
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
@@ -1482,7 +1546,7 @@
SDValue ShOp = N->getOperand(1);
EVT ShVT = ShOp.getValueType();
- if (getTypeAction(ShVT) == WidenVector) {
+ if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
ShOp = GetWidenedVector(ShOp);
ShVT = ShOp.getValueType();
}
@@ -1513,7 +1577,12 @@
WidenVT, WidenLHS, DAG.getValueType(ExtVT));
}
-SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
+ SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetWidenedVector(WidenVec);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT VT = N->getValueType(0);
@@ -1524,36 +1593,37 @@
default:
assert(false && "Unknown type action!");
break;
- case Legal:
+ case TargetLowering::TypeLegal:
break;
- case PromoteInteger:
+ case TargetLowering::TypePromoteInteger:
// If the InOp is promoted to the same size, convert it. Otherwise,
// fall out of the switch and widen the promoted input.
InOp = GetPromotedInteger(InOp);
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
- case SoftenFloat:
- case ExpandInteger:
- case ExpandFloat:
- case ScalarizeVector:
- case SplitVector:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeSplitVector:
break;
- case WidenVector:
+ case TargetLowering::TypeWidenVector:
// If the InOp is widened to the same size, convert it. Otherwise, fall
// out of the switch and widen the widened input.
InOp = GetWidenedVector(InOp);
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
// The input widens to the same size. Convert to the widen value.
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
}
unsigned WidenSize = WidenVT.getSizeInBits();
unsigned InSize = InVT.getSizeInBits();
- if (WidenSize % InSize == 0) {
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
// Determine new input vector type. The new input vector type will use
// the same element type (if its a vector) or use the input type as a
// vector. It is the same size as the type to widen to.
@@ -1561,13 +1631,13 @@
unsigned NewNumElts = WidenSize / InSize;
if (InVT.isVector()) {
EVT InEltVT = InVT.getVectorElementType();
- NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT,
- WidenSize / InEltVT.getSizeInBits());
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
} else {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
- if (TLI.isTypeSynthesizable(NewInVT)) {
+ if (TLI.isTypeLegal(NewInVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1586,7 +1656,7 @@
else
NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
NewInVT, &Ops[0], NewNumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
@@ -1616,10 +1686,11 @@
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
DebugLoc dl = N->getDebugLoc();
unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
unsigned NumOperands = N->getNumOperands();
bool InputWidened = false; // Indicates we need to widen the input.
- if (getTypeAction(InVT) != WidenVector) {
+ if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
// Add undef vectors to widen to correct length.
unsigned NumConcat = WidenVT.getVectorNumElements() /
@@ -1641,17 +1712,17 @@
if (N->getOperand(i).getOpcode() != ISD::UNDEF)
break;
- if (i > NumOperands)
+ if (i == NumOperands)
// Everything but the first operand is an UNDEF so just return the
// widened first operand.
return GetWidenedVector(N->getOperand(0));
if (NumOperands == 2) {
// Replace concat of two operands with a shuffle.
- SmallVector<int, 16> MaskOps(WidenNumElts);
- for (unsigned i=0; i < WidenNumElts/2; ++i) {
+ SmallVector<int, 16> MaskOps(WidenNumElts, -1);
+ for (unsigned i = 0; i < NumInElts; ++i) {
MaskOps[i] = i;
- MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
+ MaskOps[i + NumInElts] = i + WidenNumElts;
}
return DAG.getVectorShuffle(WidenVT, dl,
GetWidenedVector(N->getOperand(0)),
@@ -1663,7 +1734,6 @@
// Fall back to use extracts and build vector.
EVT EltVT = WidenVT.getVectorElementType();
- unsigned NumInElts = InVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Idx = 0;
for (unsigned i=0; i < NumOperands; ++i) {
@@ -1686,8 +1756,7 @@
SDValue RndOp = N->getOperand(3);
SDValue SatOp = N->getOperand(4);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),
- N->getValueType(0));
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
EVT InVT = InOp.getValueType();
@@ -1699,7 +1768,7 @@
ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
unsigned InVTNumElts = InVT.getVectorNumElements();
- if (getTypeAction(InVT) == WidenVector) {
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(InOp);
InVT = InOp.getValueType();
InVTNumElts = InVT.getVectorNumElements();
@@ -1708,7 +1777,7 @@
SatOp, CvtCode);
}
- if (TLI.isTypeSynthesizable(InWidenVT)) {
+ if (TLI.isTypeLegal(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1720,9 +1789,9 @@
SmallVector<SDValue, 16> Ops(NumConcat);
Ops[0] = InOp;
SDValue UndefVal = DAG.getUNDEF(InVT);
- for (unsigned i = 1; i != NumConcat; ++i) {
+ for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- }
+
InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
SatOp, CvtCode);
@@ -1767,44 +1836,30 @@
SDValue Idx = N->getOperand(1);
DebugLoc dl = N->getDebugLoc();
- if (getTypeAction(InOp.getValueType()) == WidenVector)
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
- ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
- if (CIdx) {
- unsigned IdxVal = CIdx->getZExtValue();
- // Check if we can just return the input vector after widening.
- if (IdxVal == 0 && InVT == WidenVT)
- return InOp;
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
- // Check if we can extract from the vector.
- unsigned InNumElts = InVT.getVectorNumElements();
- if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
- }
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = Idx.getValueType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
- if (CIdx) {
- unsigned IdxVal = CIdx->getZExtValue();
- for (i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(IdxVal+i, IdxVT));
- } else {
- Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
- for (i=1; i < NumElts; ++i) {
- SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
- DAG.getConstant(i, IdxVT));
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
- }
- }
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(IdxVal+i));
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
@@ -1863,7 +1918,7 @@
EVT CondEltVT = CondVT.getVectorElementType();
EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
CondEltVT, WidenNumElts);
- if (getTypeAction(CondVT) == WidenVector)
+ if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
Cond1 = GetWidenedVector(Cond1);
if (Cond1.getValueType() != CondWidenVT)
@@ -1886,6 +1941,11 @@
}
SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+ if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
+
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
@@ -1924,6 +1984,9 @@
}
SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operands must be vectors");
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1940,7 +2003,8 @@
assert(InOp1.getValueType() == WidenInVT &&
InOp2.getValueType() == WidenInVT &&
"Input not widened to expected type!");
- return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),
+ (void)WidenInVT;
+ return DAG.getNode(ISD::SETCC, N->getDebugLoc(),
WidenVT, InOp1, InOp2, N->getOperand(2));
}
@@ -1963,13 +2027,13 @@
#endif
llvm_unreachable("Do not know how to widen this operator's operand!");
- case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
- case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
@@ -2007,7 +2071,7 @@
DebugLoc dl = N->getDebugLoc();
unsigned NumElts = VT.getVectorNumElements();
SDValue InOp = N->getOperand(0);
- if (getTypeAction(InOp.getValueType()) == WidenVector)
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
EVT InEltVT = InVT.getVectorElementType();
@@ -2022,7 +2086,7 @@
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
}
-SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue InOp = GetWidenedVector(N->getOperand(0));
EVT InWidenVT = InOp.getValueType();
@@ -2031,11 +2095,12 @@
// Check if we can convert between two legal vector types and extract.
unsigned InWidenSize = InWidenVT.getSizeInBits();
unsigned Size = VT.getSizeInBits();
- if (InWidenSize % Size == 0 && !VT.isVector()) {
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
unsigned NewNumElts = InWidenSize / Size;
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
- if (TLI.isTypeSynthesizable(NewVT)) {
- SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
DAG.getIntPtrConstant(0));
}
@@ -2061,7 +2126,7 @@
unsigned NumOperands = N->getNumOperands();
for (unsigned i=0; i < NumOperands; ++i) {
SDValue InOp = N->getOperand(i);
- if (getTypeAction(InOp.getValueType()) == WidenVector)
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
@@ -2124,7 +2189,7 @@
if (Width == WidenEltWidth)
return RetVT;
- // See if there is larger legal integer than the element type to load/store
+ // See if there is larger legal integer than the element type to load/store
unsigned VT;
for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
@@ -2132,7 +2197,8 @@
unsigned MemVTWidth = MemVT.getSizeInBits();
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
- if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
RetVT = MemVT;
@@ -2146,8 +2212,9 @@
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
@@ -2179,7 +2246,7 @@
if (NewLdTy != LdTy) {
NumElts = Width / NewLdTy.getSizeInBits();
NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
- VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
// Readjust position and vector position based on new load type
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
LdTy = NewLdTy;
@@ -2187,11 +2254,11 @@
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
DAG.getIntPtrConstant(Idx++));
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
-SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
- LoadSDNode * LD) {
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+ LoadSDNode *LD) {
// The strategy assumes that we can efficiently load powers of two widths.
// The routines chops the vector into the largest vector loads with the same
// element type or scalar loads and then recombines it to the widen vector
@@ -2206,11 +2273,9 @@
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const Value *SV = LD->getSrcValue();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2219,31 +2284,30 @@
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
isVolatile, isNonTemporal, Align);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
if (LdWidth <= NewVTWidth) {
- if (NewVT.isVector()) {
- if (NewVT != WidenVT) {
- assert(WidenWidth % NewVTWidth == 0);
- unsigned NumConcat = WidenWidth / NewVTWidth;
- SmallVector<SDValue, 16> ConcatOps(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(NewVT);
- ConcatOps[0] = LdOp;
- for (unsigned i = 1; i != NumConcat; ++i)
- ConcatOps[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
- NumConcat);
- } else
- return LdOp;
- } else {
+ if (!NewVT.isVector()) {
unsigned NumElts = WidenWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
+ if (NewVT == WidenVT)
+ return LdOp;
+
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+ NumConcat);
}
// Load vector by using multiple loads from largest vector to scalar
@@ -2265,8 +2329,9 @@
NewVTWidth = NewVT.getSizeInBits();
}
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
- SVOffset+Offset, isVolatile,
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ isVolatile,
isNonTemporal, MinAlign(Align, Increment));
LdChain.push_back(LdOp.getValue(1));
LdOps.push_back(LdOp);
@@ -2276,52 +2341,55 @@
// Build the vector from the loads operations
unsigned End = LdOps.size();
- if (LdOps[0].getValueType().isVector()) {
- // If the load contains vectors, build the vector using concat vector.
- // All of the vectors used to loads are power of 2 and the scalars load
- // can be combined to make a power of 2 vector.
- SmallVector<SDValue, 16> ConcatOps(End);
- int i = End - 1;
- int Idx = End;
- EVT LdTy = LdOps[i].getValueType();
- // First combine the scalar loads to a vector
- if (!LdTy.isVector()) {
- for (--i; i >= 0; --i) {
- LdTy = LdOps[i].getValueType();
- if (LdTy.isVector())
- break;
- }
- ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to loads are power of 2 and the scalars load
+ // can be combined to make a power of 2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First combine the scalar loads to a vector
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ &ConcatOps[Idx], End - Idx);
+ Idx = End - 1;
+ LdTy = NewLdTy;
}
ConcatOps[--Idx] = LdOps[i];
- for (--i; i >= 0; --i) {
- EVT NewLdTy = LdOps[i].getValueType();
- if (NewLdTy != LdTy) {
- // Create a larger vector
- ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
- &ConcatOps[Idx], End - Idx);
- Idx = End - 1;
- LdTy = NewLdTy;
- }
- ConcatOps[--Idx] = LdOps[i];
- }
+ }
- if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
- // We need to fill the rest with undefs to build the vector
- unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
- SmallVector<SDValue, 16> WidenOps(NumOps);
- SDValue UndefVal = DAG.getUNDEF(LdTy);
- unsigned i = 0;
- for (; i != End-Idx; ++i)
- WidenOps[i] = ConcatOps[Idx+i];
- for (; i != NumOps; ++i)
- WidenOps[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
- } else
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- &ConcatOps[Idx], End - Idx);
- } else // All the loads are scalar loads.
- return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+ if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ &ConcatOps[Idx], End - Idx);
+
+ // We need to fill the rest with undefs to build the vector
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
}
SDValue
@@ -2338,11 +2406,9 @@
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const Value *SV = LD->getSrcValue();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
@@ -2352,16 +2418,17 @@
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Increment = LdEltVT.getSizeInBits() / 8;
- Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset,
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+ LD->getPointerInfo(),
LdEltVT, isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
BasePtr, DAG.getIntPtrConstant(Offset));
- Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV,
- SVOffset + Offset, LdEltVT, isVolatile,
- isNonTemporal, Align);
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -2381,8 +2448,6 @@
// element type or scalar stores.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- const Value *SV = ST->getSrcValue();
- int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -2409,9 +2474,9 @@
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
DAG.getIntPtrConstant(Idx));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset + Offset, isVolatile,
- isNonTemporal,
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
@@ -2423,15 +2488,16 @@
// Cast the vector to the scalar type we can store
unsigned NumElts = ValWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
- SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
// Readjust index position based on new vector type
Idx = Idx * ValEltWidth / NewVTWidth;
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getIntPtrConstant(Idx++));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset + Offset, isVolatile,
- isNonTemporal, MinAlign(Align, Offset)));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -2450,14 +2516,12 @@
// and then store it. Instead, we extract each element and then store it.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- const Value *SV = ST->getSrcValue();
- int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
SDValue ValOp = GetWidenedVector(ST->getValue());
DebugLoc dl = ST->getDebugLoc();
-
+
EVT StVT = ST->getMemoryVT();
EVT ValVT = ValOp.getValueType();
@@ -2475,8 +2539,8 @@
unsigned NumElts = StVT.getVectorNumElements();
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset, StEltVT,
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT,
isVolatile, isNonTemporal, Align));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
@@ -2484,9 +2548,9 @@
BasePtr, DAG.getIntPtrConstant(Offset));
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
- SVOffset + Offset, StEltVT,
- isVolatile, isNonTemporal,
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, isVolatile, isNonTemporal,
MinAlign(Align, Offset)));
}
}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/PACKAGE.vcxproj b/src/LLVM/lib/CodeGen/SelectionDAG/PACKAGE.vcxproj
new file mode 100644
index 0000000..3fdc75e
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/PACKAGE.vcxproj
@@ -0,0 +1,277 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="MinSizeRel|Win32">
+ <Configuration>MinSizeRel</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="RelWithDebInfo|Win32">
+ <Configuration>RelWithDebInfo</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID>
+ <Keyword>Win32Proj</Keyword>
+ <Platform>Win32</Platform>
+ <ProjectName>PACKAGE</ProjectName>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration">
+ <ConfigurationType></ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">
+ <Midl>
+ <AdditionalIncludeDirectories>..\SelectionDAG;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <OutputDirectory>$(IntDir)</OutputDirectory>
+ <HeaderFileName>%(Filename).h</HeaderFileName>
+ <TypeLibraryName>%(Filename).tlb</TypeLibraryName>
+ <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName>
+ <ProxyFileName>%(Filename)_p.c</ProxyFileName>
+ </Midl>
+ <PostBuildEvent>
+ <Message></Message>
+ <Command>setlocal
+cd ..\..\..\..\LLVM
+if %errorlevel% neq 0 goto :cmEnd
+D:
+if %errorlevel% neq 0 goto :cmEnd
+"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ </PostBuildEvent>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\PACKAGE_force.rule">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+cd .
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeLists.txt">
+ <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs>
+ <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message>
+ <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal
+"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp
+if %errorlevel% neq 0 goto :cmEnd
+:cmEnd
+endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone
+:cmErrorLevel
+exit /b %1
+:cmDone
+if %errorlevel% neq 0 goto :VCEnd</Command>
+ <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs>
+ <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs>
+ </CustomBuild>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj">
+ <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/PACKAGE.vcxproj.filters b/src/LLVM/lib/CodeGen/SelectionDAG/PACKAGE.vcxproj.filters
new file mode 100644
index 0000000..a570359
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+ <ItemGroup>
+ <CustomBuild Include="CMakeFiles\PACKAGE_force.rule">
+ <Filter>CMake Rules</Filter>
+ </CustomBuild>
+ <CustomBuild Include="CMakeLists.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <Filter Include="CMake Rules">
+ <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ </ItemGroup>
+</Project>
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/src/LLVM/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index ac2d338..2dcb229 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DebugLoc.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 3b86c32..b275c63 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "pre-RA-sched"
#include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -204,7 +205,7 @@
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
- if (SU->getNode()->getFlaggedNode())
+ if (SU->getNode()->getGluedNode())
return NULL;
SDNode *N = SU->getNode();
@@ -215,7 +216,7 @@
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
else if (VT == MVT::Other)
TryUnfold = true;
@@ -223,7 +224,7 @@
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
}
@@ -248,14 +249,14 @@
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
- const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
- for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
- if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
NewSU->isTwoAddress = true;
break;
}
}
- if (TID.isCommutable())
+ if (MCID.isCommutable())
NewSU->isCommutable = true;
// LoadNode may already exist. This can happen when there is another
@@ -421,10 +422,10 @@
/// FIXME: Move to SelectionDAG?
static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const TargetInstrInfo *TII) {
- const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
- assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
- unsigned NumRes = TID.getNumDefs();
- for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = MCID.getNumDefs();
+ for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -432,6 +433,30 @@
return N->getValueType(NumRes);
}
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+ if (RegAdded.insert(Reg)) {
+ LRegs.push_back(Reg);
+ Added = true;
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias)) {
+ LRegs.push_back(*Alias);
+ Added = true;
+ }
+ }
+ return Added;
+}
+
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
@@ -446,37 +471,45 @@
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
- unsigned Reg = I->getReg();
- if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
- if (RegAdded.insert(Reg))
- LRegs.push_back(Reg);
- }
- for (const unsigned *Alias = TRI->getAliasSet(Reg);
- *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
- if (RegAdded.insert(*Alias))
- LRegs.push_back(*Alias);
- }
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
}
}
- for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
if (!Node->isMachineOpcode())
continue;
- const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
- if (!TID.ImplicitDefs)
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
- if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
- if (RegAdded.insert(*Reg))
- LRegs.push_back(*Reg);
- }
- for (const unsigned *Alias = TRI->getAliasSet(*Reg);
- *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
- if (RegAdded.insert(*Alias))
- LRegs.push_back(*Alias);
- }
+ for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
return !LRegs.empty();
@@ -538,13 +571,20 @@
TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
- // If cross copy register class is null, then it must be possible copy
- // the value directly. Do not try duplicate the def.
+ // If cross copy register class is the same as RC, then it must be
+ // possible copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's
+ // possible to copy the value but it require cross register class copies
+ // and it is expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
SUnit *NewDef = 0;
- if (DestRC)
+ if (DestRC != RC) {
NewDef = CopyAndMoveSuccessors(LRDef);
- else
- DestRC = RC;
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical "
+ "register dependency!");
+ }
if (!NewDef) {
// Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index 56f5ded..430283d 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -40,7 +40,7 @@
static RegisterScheduler
tdListDAGScheduler("list-td", "Top-down list scheduler",
createTDListDAGScheduler);
-
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGList - The actual list scheduler implementation. This supports
@@ -51,7 +51,7 @@
/// AvailableQueue - The priority queue to use for the available SUnits.
///
SchedulingPriorityQueue *AvailableQueue;
-
+
/// PendingQueue - This contains all of the instructions whose operands have
/// been issued, but their results are not ready yet (due to the latency of
/// the operation). Once the operands become available, the instruction is
@@ -63,11 +63,12 @@
public:
ScheduleDAGList(MachineFunction &mf,
- SchedulingPriorityQueue *availqueue,
- ScheduleHazardRecognizer *HR)
- : ScheduleDAGSDNodes(mf),
- AvailableQueue(availqueue), HazardRec(HR) {
- }
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+
+ const TargetMachine &tm = mf.getTarget();
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
~ScheduleDAGList() {
delete HazardRec;
@@ -87,14 +88,14 @@
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGList::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
-
+
// Build the scheduling graph.
BuildSchedGraph(NULL);
AvailableQueue->initNodes(SUnits);
-
+
ListScheduleTopDown();
-
+
AvailableQueue->releaseState();
}
@@ -118,7 +119,7 @@
--SuccSU->NumPredsLeft;
SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
-
+
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
@@ -142,7 +143,7 @@
void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
-
+
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
@@ -168,7 +169,7 @@
SUnits[i].isAvailable = true;
}
}
-
+
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
std::vector<SUnit*> NotReady;
@@ -187,7 +188,7 @@
assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
}
}
-
+
// If there are no instructions available, don't try to issue anything, and
// don't advance the hazard recognizer.
if (AvailableQueue->empty()) {
@@ -196,24 +197,24 @@
}
SUnit *FoundSUnit = 0;
-
+
bool HasNoopHazards = false;
while (!AvailableQueue->empty()) {
SUnit *CurSUnit = AvailableQueue->pop();
-
+
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(CurSUnit);
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
if (HT == ScheduleHazardRecognizer::NoHazard) {
FoundSUnit = CurSUnit;
break;
}
-
+
// Remember if this is a noop hazard.
HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
-
+
NotReady.push_back(CurSUnit);
}
-
+
// Add the nodes that aren't ready back onto the available list.
if (!NotReady.empty()) {
AvailableQueue->push_all(NotReady);
@@ -228,7 +229,7 @@
// If this is a pseudo-op node, we don't want to increment the current
// cycle.
if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
- ++CurCycle;
+ ++CurCycle;
} else if (!HasNoopHazards) {
// Otherwise, we have a pipeline stall, but no other problem, just advance
// the current cycle and try again.
@@ -257,12 +258,8 @@
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-/// createTDListDAGScheduler - This creates a top-down list scheduler with a
-/// new hazard recognizer. This scheduler takes ownership of the hazard
-/// recognizer and deletes it when done.
+/// createTDListDAGScheduler - This creates a top-down list scheduler.
ScheduleDAGSDNodes *
llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
- return new ScheduleDAGList(*IS->MF,
- new LatencyPriorityQueue(),
- IS->CreateTargetHazardRecognizer());
+ return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 3a5c616..e757def 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -20,6 +20,7 @@
#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -65,6 +66,54 @@
"which tries to balance ILP and register pressure",
createILPListDAGScheduler);
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+ "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+ cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+ "disable-sched-live-uses", cl::Hidden, cl::init(true),
+ cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+ "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+ cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+ "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+ cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+ "disable-sched-stalls", cl::Hidden, cl::init(true),
+ cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+ "disable-sched-critical-path", cl::Hidden, cl::init(false),
+ cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+ "disable-sched-height", cl::Hidden, cl::init(false),
+ cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+
+static cl::opt<int> MaxReorderWindow(
+ "max-sched-reorder", cl::Hidden, cl::init(6),
+ cl::desc("Number of instructions to allow ahead of the critical path "
+ "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+ "sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+#ifndef NDEBUG
+namespace {
+ // For sched=list-ilp, Count the number of times each factor comes into play.
+ enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
+ FactStatic, FactOther, NumFactors };
+}
+static const char *FactorName[NumFactors] =
+{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
+static int FactorCount[NumFactors];
+#endif //!NDEBUG
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -83,31 +132,60 @@
/// AvailableQueue - The priority queue to use for the available SUnits.
SchedulingPriorityQueue *AvailableQueue;
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle;
+
+ /// IssueCount - Count instructions issued in this cycle
+ /// Currently valid only for bottom-up scheduling.
+ unsigned IssueCount;
+
/// LiveRegDefs - A set of physical registers and their definition
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
unsigned NumLiveRegs;
std::vector<SUnit*> LiveRegDefs;
- std::vector<unsigned> LiveRegCycles;
+ std::vector<SUnit*> LiveRegGens;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
public:
- ScheduleDAGRRList(MachineFunction &mf,
- bool isbottomup, bool needlatency,
- SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
- AvailableQueue(availqueue), Topo(SUnits) {
- }
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+ NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ Topo(SUnits) {
+
+ const TargetMachine &tm = mf.getTarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
~ScheduleDAGRRList() {
+ delete HazardRec;
delete AvailableQueue;
}
void Schedule();
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
/// IsReachable - Checks if SU is reachable from TargetSU.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
return Topo.IsReachable(SU, TargetSU);
@@ -136,24 +214,37 @@
}
private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
void ReleasePred(SUnit *SU, const SDep *PredEdge);
- void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ReleasePredecessors(SUnit *SU);
void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
void CapturePred(SDep *PredEdge);
- void ScheduleNodeBottomUp(SUnit*, unsigned);
- void ScheduleNodeTopDown(SUnit*, unsigned);
void UnscheduleNodeBottomUp(SUnit*);
- void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*,
const TargetRegisterClass*,
SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
- void ListScheduleTopDown();
+
+ SUnit *PickNodeToScheduleBottomUp();
void ListScheduleBottomUp();
+ void ScheduleNodeTopDown(SUnit*);
+ void ListScheduleTopDown();
+
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
/// Updates the topological ordering if required.
@@ -185,16 +276,61 @@
};
} // end anonymous namespace
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ unsigned &RegClass, unsigned &Cost) {
+ EVT VT = RegDefPos.GetValue();
+
+ // Special handling for untyped values. These values can only come from
+ // the expansion of custom DAG-to-DAG patterns.
+ if (VT == MVT::untyped) {
+ const SDNode *Node = RegDefPos.GetNode();
+ unsigned Opcode = Node->getMachineOpcode();
+
+ if (Opcode == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Idx = RegDefPos.GetIdx();
+ const MCInstrDesc Desc = TII->get(Opcode);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
+ RegClass = RC->getID();
+ // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+ // better way to determine it.
+ Cost = 1;
+ } else {
+ RegClass = TLI->getRepRegClassFor(VT)->getID();
+ Cost = TLI->getRepRegClassCostFor(VT);
+ }
+}
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs()
<< "********** List Scheduling BB#" << BB->getNumber()
- << " **********\n");
+ << " '" << BB->getName() << "' **********\n");
+#ifndef NDEBUG
+ for (int i = 0; i < NumFactors; ++i) {
+ FactorCount[i] = 0;
+ }
+#endif //!NDEBUG
+ CurCycle = 0;
+ IssueCount = 0;
+ MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
- LiveRegCycles.resize(TRI->getNumRegs(), 0);
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegGens.resize(TRI->getNumRegs(), NULL);
// Build the scheduling graph.
BuildSchedGraph(NULL);
@@ -204,13 +340,20 @@
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
-
+
+ HazardRec->Reset();
+
// Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
if (isBottomUp)
ListScheduleBottomUp();
else
ListScheduleTopDown();
-
+
+#ifndef NDEBUG
+ for (int i = 0; i < NumFactors; ++i) {
+ DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
+ }
+#endif // !NDEBUG
AvailableQueue->releaseState();
}
@@ -243,71 +386,276 @@
// to be scheduled. Ignore the special EntrySU node.
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
PredSU->isAvailable = true;
- AvailableQueue->push(PredSU);
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(PredSU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
}
}
-void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
// Bottom up: release predecessors
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
ReleasePred(SU, &*I);
if (I->isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
- // expensive to copy the register. Make sure nothing that can
+ // expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
- if (!LiveRegDefs[I->getReg()]) {
+ SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ if (!LiveRegGens[I->getReg()]) {
++NumLiveRegs;
- LiveRegDefs[I->getReg()] = I->getSUnit();
- LiveRegCycles[I->getReg()] = CurCycle;
+ LiveRegGens[I->getReg()] = SU;
}
}
}
}
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle =
+ isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ IssueCount = 0;
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ if (isBottomUp)
+ HazardRec->RecedeCycle();
+ else
+ HazardRec->AdvanceCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ // FIXME: Nodes such as CopyFromReg probably should not advance the current
+ // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+ // has predecessors the cycle will be advanced when they are scheduled.
+ // But given the crude nature of modeling latency though such nodes, we
+ // currently need to treat these nodes like real instructions.
+ // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
+ unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (isBottomUp && SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (isBottomUp && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+
+ if (!isBottomUp && SU->isCall) {
+ HazardRec->Reset();
+ }
+}
+
+static void resetVRegCycle(SUnit *SU);
+
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
#ifndef NDEBUG
if (CurCycle < SU->getHeight())
- DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
+ DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
#endif
- // FIXME: Handle noop hazard.
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node its ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled intruction.
+ EmitNode(SU);
+
Sequence.push_back(SU);
AvailableQueue->ScheduledNode(SU);
- ReleasePredecessors(SU, CurCycle);
+ // If HazardRec is disabled, and each inst counts as one cycle, then
+ // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+ // PendingQueue for schedulers that implement HasReadyFilter.
+ if (!HazardRec->isEnabled() && AvgIPC < 2)
+ AdvanceToCycle(CurCycle + 1);
+
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
// Release all the implicit physical register defs that are live.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (I->isAssignedRegDep()) {
- if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
- assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
- assert(LiveRegDefs[I->getReg()] == SU &&
- "Physical register dependency violated?");
- --NumLiveRegs;
- LiveRegDefs[I->getReg()] = NULL;
- LiveRegCycles[I->getReg()] = 0;
- }
+ // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
}
}
+ resetVRegCycle(SU);
+
SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, the cycle was pre-advanced before calling
+ // ReleasePredecessors. In that case, IssueCount should remain 0.
+ //
+ // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+ if (HazardRec->isEnabled() || AvgIPC > 1) {
+ if (SU->getNode() && SU->getNode()->isMachineOpcode())
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+ AdvanceToCycle(CurCycle + 1);
+ }
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
/// unscheduled, incrcease the succ left count of its predecessors. Remove
/// them from AvailableQueue if necessary.
-void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
if (PredSU->isAvailable) {
PredSU->isAvailable = false;
@@ -328,59 +676,98 @@
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
- if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
+ if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?");
--NumLiveRegs;
LiveRegDefs[I->getReg()] = NULL;
- LiveRegCycles[I->getReg()] = 0;
+ LiveRegGens[I->getReg()] = NULL;
}
}
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[I->getReg()] = SU;
if (!LiveRegDefs[I->getReg()]) {
- LiveRegDefs[I->getReg()] = SU;
++NumLiveRegs;
}
- if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
- LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+ if (LiveRegGens[I->getReg()] == NULL ||
+ I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+ LiveRegGens[I->getReg()] = I->getSUnit();
}
}
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
SU->setHeightDirty();
SU->isScheduled = false;
SU->isAvailable = true;
- AvailableQueue->push(SU);
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
AvailableQueue->UnscheduledNode(SU);
}
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
/// BTCycle in order to schedule a specific node.
-void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
- unsigned &CurCycle) {
- SUnit *OldSU = NULL;
- while (CurCycle > BtCycle) {
- OldSU = Sequence.back();
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
Sequence.pop_back();
if (SU->isSucc(OldSU))
// Don't try to remove SU from AvailableQueue.
SU->isAvailable = false;
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
UnscheduleNodeBottomUp(OldSU);
- --CurCycle;
AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
}
assert(!SU->isSucc(OldSU) && "Something is wrong!");
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
++NumBacktracks;
}
static bool isOperandOf(const SUnit *SU, SDNode *N) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
- SUNode = SUNode->getFlaggedNode()) {
+ SUNode = SUNode->getGluedNode()) {
if (SUNode->isOperandOf(N))
return true;
}
@@ -390,18 +777,18 @@
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
- if (SU->getNode()->getFlaggedNode())
- return NULL;
-
SDNode *N = SU->getNode();
if (!N)
return NULL;
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
else if (VT == MVT::Other)
TryUnfold = true;
@@ -409,7 +796,7 @@
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
}
@@ -441,22 +828,26 @@
} else {
LoadSU = CreateNewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
ComputeLatency(LoadSU);
}
SUnit *NewSU = CreateNewSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
-
- const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
- for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
- if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
NewSU->isTwoAddress = true;
break;
}
}
- if (TID.isCommutable())
+ if (MCID.isCommutable())
NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
ComputeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
@@ -507,6 +898,10 @@
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+ && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
}
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
SDep D = ChainSuccs[i];
@@ -517,7 +912,7 @@
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
- }
+ }
// Add a data dependency to reflect that NewSU reads the value defined
// by LoadSU.
@@ -599,6 +994,15 @@
AddPred(SuccSU, D);
DelDeps.push_back(std::make_pair(SuccSU, *I));
}
+ else {
+ // Avoid scheduling the def-side copy before other successors. Otherwise
+ // we could introduce another physreg interference on the copy and
+ // continue inserting copies indefinitely.
+ SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true);
+ AddPred(SuccSU, D);
+ }
}
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second);
@@ -620,10 +1024,10 @@
/// FIXME: Move to SelectionDAG?
static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const TargetInstrInfo *TII) {
- const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
- assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
- unsigned NumRes = TID.getNumDefs();
- for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = MCID.getNumDefs();
+ for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -633,66 +1037,234 @@
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
-static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
std::vector<SUnit*> &LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- bool Added = false;
- if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
- if (RegAdded.insert(Reg)) {
- LRegs.push_back(Reg);
- Added = true;
+ for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AliasI]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AliasI] == SU) continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AliasI)) {
+ LRegs.push_back(*AliasI);
}
}
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
- if (RegAdded.insert(*Alias)) {
- LRegs.push_back(*Alias);
- Added = true;
- }
- }
- return Added;
}
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
-bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
- SmallVector<unsigned, 4> &LRegs){
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
if (NumLiveRegs == 0)
return false;
SmallSet<unsigned, 4> RegAdded;
// If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
- if (I->isAssignedRegDep())
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
RegAdded, LRegs, TRI);
}
- for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
if (!Node->isMachineOpcode())
continue;
- const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
- if (!TID.ImplicitDefs)
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
+ for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
+
return !LRegs.empty();
}
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SmallVector<SUnit*, 4> Interferences;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+
+ SUnit *CurSU = AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ CurSU = AvailableQueue->pop();
+ }
+ if (CurSU) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ assert(Interferences[i]->isAvailable && "must still be available");
+ AvailableQueue->push(Interferences[i]);
+ }
+ return CurSU;
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ SUnit *TrySU = Interferences[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = NULL;
+ unsigned LiveCycle = UINT_MAX;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer avaialable. Schedule a successor that's now
+ // available instead.
+ if (!TrySU->isAvailable) {
+ CurSU = AvailableQueue->pop();
+ }
+ else {
+ CurSU = TrySU;
+ TrySU->isPending = false;
+ Interferences.erase(Interferences.begin()+i);
+ }
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be possible
+ // copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's possible to
+ // copy the value but it require cross register class copies and it is
+ // expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = 0;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (Interferences[i]->isAvailable) {
+ AvailableQueue->push(Interferences[i]);
+ }
+ }
+ return CurSU;
+}
/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
/// schedulers.
void ScheduleDAGRRList::ListScheduleBottomUp() {
- unsigned CurCycle = 0;
-
// Release any predecessors of the special Exit node.
- ReleasePredecessors(&ExitSU, CurCycle);
+ ReleasePredecessors(&ExitSU);
// Add root to Available queue.
if (!SUnits.empty()) {
@@ -704,135 +1276,29 @@
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
- SmallVector<SUnit*, 4> NotReady;
- DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
- bool Delayed = false;
- LRegsMap.clear();
- SUnit *CurSU = AvailableQueue->pop();
- while (CurSU) {
- SmallVector<unsigned, 4> LRegs;
- if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
- break;
- Delayed = true;
- LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ DEBUG(dbgs() << "\nExamining Available:\n";
+ AvailableQueue->dump(this));
- CurSU->isPending = true; // This SU is not in AvailableQueue right now.
- NotReady.push_back(CurSU);
- CurSU = AvailableQueue->pop();
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
+
+ AdvancePastStalls(SU);
+
+ ScheduleNodeBottomUp(SU);
+
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
}
-
- // All candidates are delayed due to live physical reg dependencies.
- // Try backtracking, code duplication, or inserting cross class copies
- // to resolve it.
- if (Delayed && !CurSU) {
- for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
- SUnit *TrySU = NotReady[i];
- SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
-
- // Try unscheduling up to the point where it's safe to schedule
- // this node.
- unsigned LiveCycle = CurCycle;
- for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
- unsigned Reg = LRegs[j];
- unsigned LCycle = LiveRegCycles[Reg];
- LiveCycle = std::min(LiveCycle, LCycle);
- }
- SUnit *OldSU = Sequence[LiveCycle];
- if (!WillCreateCycle(TrySU, OldSU)) {
- BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
- // Force the current node to be scheduled before the node that
- // requires the physical reg dep.
- if (OldSU->isAvailable) {
- OldSU->isAvailable = false;
- AvailableQueue->remove(OldSU);
- }
- AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false, /*isArtificial=*/true));
- // If one or more successors has been unscheduled, then the current
- // node is no longer avaialable. Schedule a successor that's now
- // available instead.
- if (!TrySU->isAvailable)
- CurSU = AvailableQueue->pop();
- else {
- CurSU = TrySU;
- TrySU->isPending = false;
- NotReady.erase(NotReady.begin()+i);
- }
- break;
- }
- }
-
- if (!CurSU) {
- // Can't backtrack. If it's too expensive to copy the value, then try
- // duplicate the nodes that produces these "too expensive to copy"
- // values to break the dependency. In case even that doesn't work,
- // insert cross class copies.
- // If it's not too expensive, i.e. cost != -1, issue copies.
- SUnit *TrySU = NotReady[0];
- SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
- assert(LRegs.size() == 1 && "Can't handle this yet!");
- unsigned Reg = LRegs[0];
- SUnit *LRDef = LiveRegDefs[Reg];
- EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(Reg, VT);
- const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-
- // If cross copy register class is null, then it must be possible copy
- // the value directly. Do not try duplicate the def.
- SUnit *NewDef = 0;
- if (DestRC)
- NewDef = CopyAndMoveSuccessors(LRDef);
- else
- DestRC = RC;
- if (!NewDef) {
- // Issue copies, these can be expensive cross register class copies.
- SmallVector<SUnit*, 2> Copies;
- InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
- << " to SU #" << Copies.front()->NodeNum << "\n");
- AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- NewDef = Copies.back();
- }
-
- DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
- << " to SU #" << TrySU->NodeNum << "\n");
- LiveRegDefs[Reg] = NewDef;
- AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- TrySU->isAvailable = false;
- CurSU = NewDef;
- }
-
- assert(CurSU && "Unable to resolve live physical register dependencies!");
- }
-
- // Add the nodes that aren't ready back onto the available list.
- for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
- NotReady[i]->isPending = false;
- // May no longer be available due to backtracking.
- if (NotReady[i]->isAvailable)
- AvailableQueue->push(NotReady[i]);
- }
- NotReady.clear();
-
- if (CurSU)
- ScheduleNodeBottomUp(CurSU, CurCycle);
- ++CurCycle;
- AvailableQueue->setCurCycle(CurCycle);
}
// Reverse the order if it is bottom up.
std::reverse(Sequence.begin(), Sequence.end());
-
+
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
@@ -879,7 +1345,7 @@
/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -895,7 +1361,6 @@
/// ListScheduleTopDown - The main loop of list scheduling for top-down
/// schedulers.
void ScheduleDAGRRList::ListScheduleTopDown() {
- unsigned CurCycle = 0;
AvailableQueue->setCurCycle(CurCycle);
// Release any successors of the special Entry node.
@@ -909,19 +1374,19 @@
SUnits[i].isAvailable = true;
}
}
-
+
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
SUnit *CurSU = AvailableQueue->pop();
-
+
if (CurSU)
- ScheduleNodeTopDown(CurSU, CurCycle);
+ ScheduleNodeTopDown(CurSU);
++CurCycle;
AvailableQueue->setCurCycle(CurCycle);
}
-
+
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
@@ -929,70 +1394,334 @@
//===----------------------------------------------------------------------===//
-// RegReductionPriorityQueue Implementation
+// RegReductionPriorityQueue Definition
//===----------------------------------------------------------------------===//
//
// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
// to reduce register pressure.
-//
+//
namespace {
- template<class SF>
- class RegReductionPriorityQueue;
-
- /// bu_ls_rr_sort - Priority function for bottom up register pressure
- // reduction scheduler.
- struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
- bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
- bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+ SF &SortFunc;
+ reverse_sort(SF &sf) : SortFunc(sf) {}
+ reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+ bool operator()(SUnit* left, SUnit* right) const {
+ // reverse left/right rather than simply !SortFunc(left, right)
+ // to expose different paths in the comparison logic.
+ return SortFunc(right, left);
+ }
+};
+#endif // NDEBUG
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
- // td_ls_rr_sort - Priority function for top down register pressure reduction
- // scheduler.
- struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
- td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
- td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+ RegReductionPQBase *SPQ;
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// td_ls_rr_sort - Priority function for top down register pressure reduction
+// scheduler.
+struct td_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = false,
+ HasReadyFilter = false
};
- // src_ls_rr_sort - Priority function for source order scheduler.
- struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
- src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
- : SPQ(spq) {}
- src_ls_rr_sort(const src_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+ RegReductionPQBase *SPQ;
+ td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
- // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
- struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
- hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
- : SPQ(spq) {}
- hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
+ RegReductionPQBase *SPQ;
+ src_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ src_ls_rr_sort(const src_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
- bool operator()(const SUnit* left, const SUnit* right) const;
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
- // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
- // scheduler.
- struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
- ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
- : SPQ(spq) {}
- ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
+ RegReductionPQBase *SPQ;
+ hybrid_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
- bool operator()(const SUnit* left, const SUnit* right) const;
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
-} // end anonymous namespace
+
+ RegReductionPQBase *SPQ;
+ ilp_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit*> Queue;
+ unsigned CurQueueId;
+ bool TracksRegPressure;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter),
+ CurQueueId(0), TracksRegPressure(tracksrp),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits);
+
+ void addNode(const SUnit *SU);
+
+ void updateNode(const SUnit *SU);
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ if (!SU->getNode()) return 0;
+
+ return scheduleDAG->DAG->GetOrdering(SU->getNode());
+ }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU) const;
+
+ int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
+
+ void ScheduledNode(SUnit *SU);
+
+ void UnscheduledNode(SUnit *SU);
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+ if (DAG->StressSched) {
+ reverse_sort<SF> RPicker(Picker);
+ return popFromQueueImpl(Q, RPicker);
+ }
+#endif
+ (void)DAG;
+ return popFromQueueImpl(Q, Picker);
+}
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() {
+ if (Queue.empty()) return NULL;
+
+ SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+ void dump(ScheduleDAG *DAG) const {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit*> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
+ if (isBottomUp())
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ else
+ dbgs() << "Depth " << SU->getDepth() << ": ";
+ SU->dump(DAG);
+ }
+ }
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<td_ls_rr_sort>
+TDRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+ bool LSchedLow = left->isScheduleLow;
+ bool RSchedLow = right->isScheduleLow;
+ if (LSchedLow != RSchedLow)
+ return LSchedLow < RSchedLow ? 1 : -1;
+ return 0;
+}
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
@@ -1019,413 +1748,331 @@
if (SethiUllmanNumber == 0)
SethiUllmanNumber = 1;
-
+
return SethiUllmanNumber;
}
-namespace {
- template<class SF>
- class RegReductionPriorityQueue : public SchedulingPriorityQueue {
- std::vector<SUnit*> Queue;
- SF Picker;
- unsigned CurQueueId;
- bool TracksRegPressure;
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
- protected:
- // SUnits - The SUnits for the current graph.
- std::vector<SUnit> *SUnits;
-
- MachineFunction &MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const TargetLowering *TLI;
- ScheduleDAGRRList *scheduleDAG;
-
- // SethiUllmanNumbers - The SethiUllman number for each node.
- std::vector<unsigned> SethiUllmanNumbers;
-
- /// RegPressure - Tracking current reg pressure per register class.
- ///
- std::vector<unsigned> RegPressure;
-
- /// RegLimit - Tracking the number of allocatable registers per register
- /// class.
- std::vector<unsigned> RegLimit;
-
- public:
- RegReductionPriorityQueue(MachineFunction &mf,
- bool tracksrp,
- const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri,
- const TargetLowering *tli)
- : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
- MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
- if (TracksRegPressure) {
- unsigned NumRC = TRI->getNumRegClasses();
- RegLimit.resize(NumRC);
- RegPressure.resize(NumRC);
- std::fill(RegLimit.begin(), RegLimit.end(), 0);
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
- }
- }
-
- void initNodes(std::vector<SUnit> &sunits) {
- SUnits = &sunits;
- // Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
- // Reroute edges to nodes with multiple uses.
- PrescheduleNodesWithMultipleUses();
- // Calculate node priorities.
- CalculateSethiUllmanNumbers();
- }
-
- void addNode(const SUnit *SU) {
- unsigned SUSize = SethiUllmanNumbers.size();
- if (SUnits->size() > SUSize)
- SethiUllmanNumbers.resize(SUSize*2, 0);
- CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
- }
-
- void updateNode(const SUnit *SU) {
- SethiUllmanNumbers[SU->NodeNum] = 0;
- CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
- }
-
- void releaseState() {
- SUnits = 0;
- SethiUllmanNumbers.clear();
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- }
-
- unsigned getNodePriority(const SUnit *SU) const {
- assert(SU->NodeNum < SethiUllmanNumbers.size());
- unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
- if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
- // CopyToReg should be close to its uses to facilitate coalescing and
- // avoid spilling.
- return 0;
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::INSERT_SUBREG)
- // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
- // close to their uses to facilitate coalescing.
- return 0;
- if (SU->NumSuccs == 0 && SU->NumPreds != 0)
- // If SU does not have a register use, i.e. it doesn't produce a value
- // that would be consumed (e.g. store), then it terminates a chain of
- // computation. Give it a large SethiUllman number so it will be
- // scheduled right before its predecessors that it doesn't lengthen
- // their live ranges.
- return 0xffff;
- if (SU->NumPreds == 0 && SU->NumSuccs != 0)
- // If SU does not have a register def, schedule it close to its uses
- // because it does not lengthen any live ranges.
- return 0;
- return SethiUllmanNumbers[SU->NodeNum];
- }
-
- unsigned getNodeOrdering(const SUnit *SU) const {
- return scheduleDAG->DAG->GetOrdering(SU->getNode());
- }
-
- bool empty() const { return Queue.empty(); }
-
- void push(SUnit *U) {
- assert(!U->NodeQueueId && "Node in the queue already");
- U->NodeQueueId = ++CurQueueId;
- Queue.push_back(U);
- }
-
- SUnit *pop() {
- if (empty()) return NULL;
- std::vector<SUnit *>::iterator Best = Queue.begin();
- for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
- E = Queue.end(); I != E; ++I)
- if (Picker(*Best, *I))
- Best = I;
- SUnit *V = *Best;
- if (Best != prior(Queue.end()))
- std::swap(*Best, Queue.back());
- Queue.pop_back();
- V->NodeQueueId = 0;
- return V;
- }
-
- void remove(SUnit *SU) {
- assert(!Queue.empty() && "Queue is empty!");
- assert(SU->NodeQueueId != 0 && "Not in queue!");
- std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
- SU);
- if (I != prior(Queue.end()))
- std::swap(*I, Queue.back());
- Queue.pop_back();
- SU->NodeQueueId = 0;
- }
-
- bool HighRegPressure(const SUnit *SU) const {
- if (!TLI)
- return false;
-
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] >= RegLimit[RCId])
- return true; // Reg pressure already high.
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- }
- }
-
- return false;
- }
-
- void ScheduledNode(SUnit *SU) {
- if (!TracksRegPressure)
- return;
-
- const SDNode *N = SU->getNode();
- if (!N->isMachineOpcode()) {
- if (N->getOpcode() != ISD::CopyToReg)
- return;
- } else {
- unsigned Opc = N->getMachineOpcode();
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::REG_SEQUENCE ||
- Opc == TargetOpcode::IMPLICIT_DEF)
- return;
- }
-
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
- continue;
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- }
-
- // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
- // may transfer data dependencies to CopyToReg.
- if (SU->NumSuccs && N->isMachineOpcode()) {
- unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = N->getValueType(i);
- if (!N->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
- // Register pressure tracking is imprecise. This can happen.
- RegPressure[RCId] = 0;
- else
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
- }
- }
-
- dumpRegPressure();
- }
-
- void UnscheduledNode(SUnit *SU) {
- if (!TracksRegPressure)
- return;
-
- const SDNode *N = SU->getNode();
- if (!N->isMachineOpcode()) {
- if (N->getOpcode() != ISD::CopyToReg)
- return;
- } else {
- unsigned Opc = N->getMachineOpcode();
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::REG_SEQUENCE ||
- Opc == TargetOpcode::IMPLICIT_DEF)
- return;
- }
-
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
- continue;
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
- // Register pressure tracking is imprecise. This can happen.
- RegPressure[RCId] = 0;
- else
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
- }
- }
-
- // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
- // may transfer data dependencies to CopyToReg.
- if (SU->NumSuccs && N->isMachineOpcode()) {
- unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
- if (VT == MVT::Flag || VT == MVT::Other)
- continue;
- if (!N->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- }
-
- dumpRegPressure();
- }
-
- void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
- scheduleDAG = scheduleDag;
- }
-
- void dumpRegPressure() const {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
- unsigned Id = RC->getID();
- unsigned RP = RegPressure[Id];
- if (!RP) continue;
- DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
- << '\n');
- }
- }
-
- protected:
- bool canClobber(const SUnit *SU, const SUnit *Op);
- void AddPseudoTwoAddrDeps();
- void PrescheduleNodesWithMultipleUses();
- void CalculateSethiUllmanNumbers();
- };
-
- typedef RegReductionPriorityQueue<bu_ls_rr_sort>
- BURegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<td_ls_rr_sort>
- TDRegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<src_ls_rr_sort>
- SrcRegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
- HybridBURRPriorityQueue;
-
- typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
- ILPBURRPriorityQueue;
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
}
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+#if 1
+ return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
+
+void RegReductionPQBase::dumpRegPressure() const {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+ << '\n');
+ }
+}
+
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
+ const SDNode *N = SU->getNode();
+
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+ LiveUses = 0;
+ int PDiff = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ if (PredSU->getNode()->isMachineOpcode())
+ ++LiveUses;
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ ++PDiff;
+ }
+ }
+ const SDNode *N = SU->getNode();
+
+ if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+ return PDiff;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ --PDiff;
+ }
+ return PDiff;
+}
+
+void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ if (!SU->getNode())
+ return;
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ RegPressure[RCId] += Cost;
+ break;
+ }
+ }
+
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ if (RegPressure[RCId] < Cost) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= Cost;
+ }
+ }
+ dumpRegPressure();
+}
+
+void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N) return;
+
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+ POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ dumpRegPressure();
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
/// closestSucc - Returns the scheduled cycle of the successor which is
/// closest to the current cycle.
static unsigned closestSucc(const SUnit *SU) {
@@ -1457,13 +2104,242 @@
return Scratches;
}
-template <typename RRSort>
-static bool BURRSort(const SUnit *left, const SUnit *right,
- const RegReductionPriorityQueue<RRSort> *SPQ) {
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *PredSU = I->getSUnit();
+ if (PredSU->getNode() &&
+ PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+ if (DisableSchedVRegCycle)
+ return;
+
+ if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+ return;
+
+ DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+ SU->isVRegCycle = true;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ I->getSUnit()->isVRegCycle = true;
+ }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+ if (!SU->isVRegCycle)
+ return;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isVRegCycle) {
+ assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+ "VRegCycle def must be CopyFromReg");
+ I->getSUnit()->isVRegCycle = 0;
+ }
+ }
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+ // If this SU also defines the VReg, don't hoist it as a "use".
+ if (SU->isVRegCycle)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->isVRegCycle &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ return true;
+ }
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+ int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+ int LHeight = (int)left->getHeight() + LPenalty;
+ int RHeight = (int)right->getHeight() + RPenalty;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall) {
+ DEBUG(++FactorCount[FactStall]);
+ return 1;
+ }
+ if (LHeight != RHeight) {
+ DEBUG(++FactorCount[FactStall]);
+ return LHeight > RHeight ? 1 : -1;
+ }
+ } else if (RStall) {
+ DEBUG(++FactorCount[FactStall]);
+ return -1;
+ }
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::Latency ||
+ right->SchedulingPref == Sched::Latency)) {
+ if (DisableSchedCycles) {
+ if (LHeight != RHeight) {
+ DEBUG(++FactorCount[FactHeight]);
+ return LHeight > RHeight ? 1 : -1;
+ }
+ }
+ else {
+ // If neither instruction stalls (!LStall && !RStall) then
+ // its height is already covered so only its depth matters. We also reach
+ // this if both stall but have the same height.
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
+ if (LDepth != RDepth) {
+ DEBUG(++FactorCount[FactDepth]);
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ }
+ if (left->Latency != right->Latency) {
+ DEBUG(++FactorCount[FactOther]);
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+ // Schedule physical register definitions close to their use. This is
+ // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+ // long as shortening physreg live ranges is generally good, we can defer
+ // creating a subtarget hook.
+ if (!DisableSchedPhysRegJoin) {
+ bool LHasPhysReg = left->hasPhysRegDefs;
+ bool RHasPhysReg = right->hasPhysRegDefs;
+ if (LHasPhysReg != RHasPhysReg) {
+ DEBUG(++FactorCount[FactRegUses]);
+ #ifndef NDEBUG
+ const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
+ #endif
+ DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+ << PhysRegMsg[RHasPhysReg] << "\n");
+ return LHasPhysReg < RHasPhysReg;
+ }
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
- if (LPriority != RPriority)
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
+ if (LPriority != RPriority) {
+ DEBUG(++FactorCount[FactStatic]);
return LPriority > RPriority;
+ }
+
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
// e.g.
@@ -1484,33 +2360,62 @@
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
- if (LDist != RDist)
+ if (LDist != RDist) {
+ DEBUG(++FactorCount[FactOther]);
return LDist < RDist;
+ }
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
- if (LScratch != RScratch)
+ if (LScratch != RScratch) {
+ DEBUG(++FactorCount[FactOther]);
return LScratch > RScratch;
+ }
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
-
- if (left->getDepth() != right->getDepth())
- return left->getDepth() < right->getDepth();
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
- assert(left->NodeQueueId && right->NodeQueueId &&
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight()) {
+ DEBUG(++FactorCount[FactHeight]);
+ return left->getHeight() > right->getHeight();
+ }
+
+ if (left->getDepth() != right->getDepth()) {
+ DEBUG(++FactorCount[FactDepth]);
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
+ DEBUG(++FactorCount[FactOther]);
return (left->NodeQueueId > right->NodeQueueId);
}
// Bottom up
-bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
return BURRSort(left, right, SPQ);
}
// Source order, otherwise bottom up.
-bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);
@@ -1522,79 +2427,190 @@
return BURRSort(left, right, SPQ);
}
-bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
bool LHigh = SPQ->HighRegPressure(left);
bool RHigh = SPQ->HighRegPressure(right);
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
- if (LHigh && !RHigh)
+ if (LHigh && !RHigh) {
+ DEBUG(++FactorCount[FactPressureDiff]);
+ DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
return true;
- else if (!LHigh && RHigh)
+ }
+ else if (!LHigh && RHigh) {
+ DEBUG(++FactorCount[FactPressureDiff]);
+ DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
return false;
- else if (!LHigh && !RHigh) {
- // Low register pressure situation, schedule for latency if possible.
- bool LStall = left->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < left->getHeight();
- bool RStall = right->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < right->getHeight();
- // If scheduling one of the node will cause a pipeline stall, delay it.
- // If scheduling either one of the node will cause a pipeline stall, sort
- // them according to their height.
- // If neither will cause a pipeline stall, try to reduce register pressure.
- if (LStall) {
- if (!RStall)
- return true;
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- } else if (RStall)
- return false;
+ }
+ if (!LHigh && !RHigh) {
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ return BURRSort(left, right, SPQ);
+}
- // If either node is scheduling for latency, sort them by height and latency
- // first.
- if (left->SchedulingPref == Sched::Latency ||
- right->SchedulingPref == Sched::Latency) {
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- if (left->Latency != right->Latency)
- return left->Latency > right->Latency;
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+static bool canEnableCoalescing(SUnit *SU) {
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return true;
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return true;
+
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return true;
+
+ return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ unsigned LLiveUses = 0, RLiveUses = 0;
+ int LPDiff = 0, RPDiff = 0;
+ if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+ LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+ RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+ }
+ if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+ DEBUG(++FactorCount[FactPressureDiff]);
+ DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
+ << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+ return LPDiff > RPDiff;
+ }
+
+ if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+ bool LReduce = canEnableCoalescing(left);
+ bool RReduce = canEnableCoalescing(right);
+ DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
+ if (LReduce && !RReduce) return false;
+ if (RReduce && !LReduce) return true;
+ }
+
+ if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+ DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+ DEBUG(++FactorCount[FactRegUses]);
+ return LLiveUses < RLiveUses;
+ }
+
+ if (!DisableSchedStalls) {
+ bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+ bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+ if (LStall != RStall) {
+ DEBUG(++FactorCount[FactHeight]);
+ return left->getHeight() > right->getHeight();
+ }
+ }
+
+ if (!DisableSchedCriticalPath) {
+ int spread = (int)left->getDepth() - (int)right->getDepth();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum << "): "
+ << right->getDepth() << "\n");
+ DEBUG(++FactorCount[FactDepth]);
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+ int spread = (int)left->getHeight() - (int)right->getHeight();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(++FactorCount[FactHeight]);
+ return left->getHeight() > right->getHeight();
}
}
return BURRSort(left, right, SPQ);
}
-bool ilp_ls_rr_sort::operator()(const SUnit *left,
- const SUnit *right) const {
- bool LHigh = SPQ->HighRegPressure(left);
- bool RHigh = SPQ->HighRegPressure(right);
- // Avoid causing spills. If register pressure is high, schedule for
- // register pressure reduction.
- if (LHigh && !RHigh)
- return true;
- else if (!LHigh && RHigh)
- return false;
- else if (!LHigh && !RHigh) {
- // Low register pressure situation, schedule to maximize instruction level
- // parallelism.
- if (left->NumPreds > right->NumPreds)
- return false;
- else if (left->NumPreds < right->NumPreds)
- return false;
- }
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
- return BURRSort(left, right, SPQ);
+ // For single block loops, mark nodes that look like canonical IV increments.
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
+ for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
+ initVRegCycle(&sunits[i]);
+ }
+ }
}
-template<class SF>
-bool
-RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
if (SU->isTwoAddress) {
unsigned Opc = SU->getNode()->getMachineOpcode();
- const TargetInstrDesc &TID = TII->get(Opc);
- unsigned NumRes = TID.getNumDefs();
- unsigned NumOps = TID.getNumOperands() - NumRes;
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
for (unsigned i = 0; i != NumOps; ++i) {
- if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
+ if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
SDNode *DU = SU->getNode()->getOperand(i).getNode();
if (DU->getNodeId() != -1 &&
Op->OrigNode == &(*SUnits)[DU->getNodeId()])
@@ -1605,15 +2621,35 @@
return false;
}
-/// hasCopyToRegUse - Return true if SU has a value successor that is a
-/// CopyToReg node.
-static bool hasCopyToRegUse(const SUnit *SU) {
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl()) continue;
- const SUnit *SuccSU = I->getSUnit();
- if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
- return true;
+/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
+/// successor's explicit physregs whose definition can reach DepSU.
+/// i.e. DepSU should not be scheduled above SU.
+static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
+ ScheduleDAGRRList *scheduleDAG,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ const unsigned *ImpDefs
+ = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
+ if(!ImpDefs)
+ return false;
+
+ for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
+ SI != SE; ++SI) {
+ SUnit *SuccSU = SI->getSUnit();
+ for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
+ PE = SuccSU->Preds.end(); PI != PE; ++PI) {
+ if (!PI->isAssignedRegDep())
+ continue;
+
+ for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries a
+ // topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+ }
+ }
}
return false;
}
@@ -1628,7 +2664,7 @@
const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
- SUNode = SUNode->getFlaggedNode()) {
+ SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
const unsigned *SUImpDefs =
@@ -1637,7 +2673,7 @@
return false;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag || VT == MVT::Other)
+ if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (!N->hasAnyUseOfValue(i))
continue;
@@ -1683,8 +2719,7 @@
/// after N, which shortens the U->N live range, reducing
/// register pressure.
///
-template<class SF>
-void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Visit all the nodes in topological order, working top-down.
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
@@ -1722,7 +2757,7 @@
if (PredSU->NumSuccs == 1)
continue;
// Avoid prescheduling to copies from virtual registers, which don't behave
- // like other nodes from the perspective of scheduling // heuristics.
+ // like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU->getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
TargetRegisterInfo::isVirtualRegister
@@ -1776,23 +2811,23 @@
/// one that has a CopyToReg use (more likely to be a loop induction update).
/// If both are two-address, but one is commutable while the other is not
/// commutable, favor the one that's not commutable.
-template<class SF>
-void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
if (!SU->isTwoAddress)
continue;
SDNode *Node = SU->getNode();
- if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
continue;
+ bool isLiveOut = hasOnlyLiveOutUses(SU);
unsigned Opc = Node->getMachineOpcode();
- const TargetInstrDesc &TID = TII->get(Opc);
- unsigned NumRes = TID.getNumDefs();
- unsigned NumOps = TID.getNumOperands() - NumRes;
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
for (unsigned j = 0; j != NumOps; ++j) {
- if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
+ if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
continue;
SDNode *DU = SU->getNode()->getOperand(j).getNode();
if (DU->getNodeId() == -1)
@@ -1835,8 +2870,9 @@
SuccOpc == TargetOpcode::INSERT_SUBREG ||
SuccOpc == TargetOpcode::SUBREG_TO_REG)
continue;
- if ((!canClobber(SuccSU, DUSU) ||
- (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+ if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
+ (!canClobber(SuccSU, DUSU) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
(!SU->isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, SU)) {
DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
@@ -1851,20 +2887,10 @@
}
}
-/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
-/// scheduling units.
-template<class SF>
-void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
- SethiUllmanNumbers.assign(SUnits->size(), 0);
-
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
- CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
-}
-
/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
/// predecessors of the successors of the SUnit SU. Stop when the provided
/// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
unsigned Limit) {
unsigned Sum = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -1884,6 +2910,9 @@
// Top down
bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res < 0;
+
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
@@ -1916,7 +2945,7 @@
if (left->NumSuccsLeft != right->NumSuccsLeft)
return left->NumSuccsLeft > right->NumSuccsLeft;
- assert(left->NodeQueueId && right->NodeQueueId &&
+ assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId);
}
@@ -1926,68 +2955,74 @@
//===----------------------------------------------------------------------===//
llvm::ScheduleDAGSDNodes *
-llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
BURegReductionPriorityQueue *PQ =
new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
- PQ->setScheduleDAG(SD);
- return SD;
-}
-
-llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
- TDRegReductionPriorityQueue *PQ =
- new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
+ TDRegReductionPriorityQueue *PQ =
+ new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
SrcRegReductionPriorityQueue *PQ =
new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const TargetLowering *TLI = &IS->getTargetLowering();
-
+
HybridBURRPriorityQueue *PQ =
new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const TargetLowering *TLI = &IS->getTargetLowering();
-
+
ILPBURRPriorityQueue *PQ =
new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index f1bf82a..71f07d6 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,25 +17,35 @@
#include "ScheduleDAGSDNodes.h"
#include "InstrEmitter.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
STATISTIC(LoadsClustered, "Number of loads clustered together");
+// This allows latency based scheduler to notice high latency instructions
+// without a target itinerary. The choise if number here has more to do with
+// balancing scheduler heursitics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency'"
+ "instructions take for targets with no itinerary"));
+
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf) {
-}
+ : ScheduleDAG(mf),
+ InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
@@ -72,10 +82,15 @@
SUnit *SU = NewSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
+ SU->isVRegCycle = Old->isVRegCycle;
+ SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
SU->isTwoAddress = Old->isTwoAddress;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->isScheduleHigh = Old->isScheduleHigh;
+ SU->isScheduleLow = Old->isScheduleLow;
SU->SchedulingPref = Old->SchedulingPref;
Old->isCloned = true;
return SU;
@@ -85,7 +100,7 @@
/// a specified operand is a physical register dependency. If so, returns the
/// register and the cost of copying the register.
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
+ const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
unsigned &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
@@ -97,7 +112,7 @@
unsigned ResNo = User->getOperand(2).getResNo();
if (Def->isMachineOpcode()) {
- const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+ const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
if (ResNo >= II.getNumDefs() &&
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
PhysReg = Reg;
@@ -108,29 +123,28 @@
}
}
-static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
- SelectionDAG *DAG) {
+static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
SmallVector<EVT, 4> VTs;
- SDNode *FlagDestNode = Flag.getNode();
+ SDNode *GlueDestNode = Glue.getNode();
- // Don't add a flag from a node to itself.
- if (FlagDestNode == N) return;
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return;
- // Don't add a flag to something which already has a flag.
- if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+ // Don't add glue to something which already has glue.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
VTs.push_back(N->getValueType(I));
- if (AddFlag)
- VTs.push_back(MVT::Flag);
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
SmallVector<SDValue, 4> Ops;
for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
Ops.push_back(N->getOperand(I));
- if (FlagDestNode)
- Ops.push_back(Flag);
+ if (GlueDestNode)
+ Ops.push_back(Glue);
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
MachineSDNode::mmo_iterator Begin = 0, End = 0;
@@ -149,9 +163,9 @@
MN->setMemRefs(Begin, End);
}
-/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
/// This function finds loads of the same base and different offsets. If the
-/// offsets are not far apart (target specific), it add MVT::Flag inputs and
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
@@ -213,20 +227,20 @@
if (NumLoads == 0)
return;
- // Cluster loads by adding MVT::Flag outputs and inputs. This also
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
// ensure they are scheduled in order of increasing addresses.
SDNode *Lead = Loads[0];
- AddFlags(Lead, SDValue(0, 0), true, DAG);
+ AddGlue(Lead, SDValue(0, 0), true, DAG);
- SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+ SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
- bool OutFlag = I < E - 1;
+ bool OutGlue = I < E - 1;
SDNode *Load = Loads[I];
- AddFlags(Load, InFlag, OutFlag, DAG);
+ AddGlue(Load, InGlue, OutGlue, DAG);
- if (OutFlag)
- InFlag = SDValue(Load, Load->getNumValues() - 1);
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
++LoadsClustered;
}
@@ -242,8 +256,8 @@
continue;
unsigned Opc = Node->getMachineOpcode();
- const TargetInstrDesc &TID = TII->get(Opc);
- if (TID.mayLoad())
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.mayLoad())
// Cluster loads from "near" addresses into combined SUnits.
ClusterNeighboringLoads(Node);
}
@@ -266,75 +280,106 @@
// FIXME: Multiply by 2 because we may clone nodes during scheduling.
// This is a temporary workaround.
SUnits.reserve(NumNodes * 2);
-
+
// Add all nodes in depth first order.
SmallVector<SDNode*, 64> Worklist;
SmallPtrSet<SDNode*, 64> Visited;
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());
-
+
+ SmallVector<SUnit*, 8> CallSUnits;
while (!Worklist.empty()) {
SDNode *NI = Worklist.pop_back_val();
-
+
// Add all operands to the worklist unless they've already been added.
for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
if (Visited.insert(NI->getOperand(i).getNode()))
Worklist.push_back(NI->getOperand(i).getNode());
-
+
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
continue;
-
+
// If this node has already been processed, stop now.
if (NI->getNodeId() != -1) continue;
-
+
SUnit *NodeSUnit = NewSUnit(NI);
-
- // See if anything is flagged to this node, if so, add them to flagged
- // nodes. Nodes can have at most one flag input and one flag output. Flags
- // are required to be the last operand and result of a node.
-
- // Scan up to find flagged preds.
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
SDNode *N = NI;
while (N->getNumOperands() &&
- N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
N = N->getOperand(N->getNumOperands()-1).getNode();
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
}
-
- // Scan down to find any flagged succs.
+
+ // Scan down to find any glued succs.
N = NI;
- while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
- SDValue FlagVal(N, N->getNumValues()-1);
-
- // There are either zero or one users of the Flag result.
- bool HasFlagUse = false;
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
UI != E; ++UI)
- if (FlagVal.isOperandOf(*UI)) {
- HasFlagUse = true;
+ if (GlueVal.isOperandOf(*UI)) {
+ HasGlueUse = true;
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
N = *UI;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
break;
}
- if (!HasFlagUse) break;
+ if (!HasGlueUse) break;
}
-
- // If there are flag operands involved, N is now the bottom-most node
- // of the sequence of nodes that are flagged together.
+
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
+ // Schedule zero-latency TokenFactor below any nodes that may increase the
+ // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+ // have false stalls.
+ if (NI->getOpcode() == ISD::TokenFactor)
+ NodeSUnit->isScheduleLow = true;
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
// Update the SUnit.
NodeSUnit->setNode(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
}
void ScheduleDAGSDNodes::AddSchedEdges() {
- const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
@@ -343,22 +388,22 @@
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
SUnit *SU = &SUnits[su];
SDNode *MainNode = SU->getNode();
-
+
if (MainNode->isMachineOpcode()) {
unsigned Opc = MainNode->getMachineOpcode();
- const TargetInstrDesc &TID = TII->get(Opc);
- for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
- if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ const MCInstrDesc &MCID = TII->get(Opc);
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
SU->isTwoAddress = true;
break;
}
}
- if (TID.isCommutable())
+ if (MCID.isCommutable())
SU->isCommutable = true;
}
-
+
// Find all predecessors and successors of the group.
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
if (N->isMachineOpcode() &&
TII->get(N->getMachineOpcode()).getImplicitDefs()) {
SU->hasPhysRegClobbers = true;
@@ -368,7 +413,7 @@
if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
SU->hasPhysRegDefs = true;
}
-
+
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDNode *OpN = N->getOperand(i).getNode();
if (isPassiveNode(OpN)) continue; // Not scheduled.
@@ -377,7 +422,7 @@
if (OpSU == SU) continue; // In the same group.
EVT OpVT = N->getOperand(i).getValueType();
- assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
bool isChain = OpVT == MVT::Other;
unsigned PhysReg = 0;
@@ -391,11 +436,15 @@
// it requires a cross class copy (cost < 0). That means we are only
// treating "expensive to copy" register dependency as physical register
// dependency. This may change in the future though.
- if (Cost >= 0)
+ if (Cost >= 0 && !StressSched)
PhysReg = 0;
// If this is a ctrl dep, latency is 1.
unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ // Special-case TokenFactor chains as zero-latency.
+ if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+ OpLatency = 0;
+
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
@@ -403,7 +452,17 @@
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
- SU->addPred(dep);
+ if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ //
+ // We can't tell (without more book-keeping) if this results from
+ // glued nodes or duplicate operands. As long as we don't reduce
+ // NumRegDefsLeft to zero, we handle the common cases well.
+ --OpSU->NumRegDefsLeft;
+ }
}
}
}
@@ -412,7 +471,7 @@
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
-/// flagged together nodes with a single SUnit.
+/// glued together nodes with a single SUnit.
void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
// Cluster certain nodes which should be scheduled together.
ClusterNodes();
@@ -422,27 +481,98 @@
AddSchedEdges();
}
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ // Check for phys reg copy.
+ if (!Node)
+ return;
+
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ ValueType = Node->getValueType(DefIdx);
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (Node == NULL) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+ SDNode *N = SU->getNode();
+
+ // TokenFactor operands are considered zero latency, and some schedulers
+ // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+ // whenever node latency is nonzero.
+ if (N && N->getOpcode() == ISD::TokenFactor) {
+ SU->Latency = 0;
+ return;
+ }
+
// Check to see if the scheduler cares about latencies.
if (ForceUnitLatencies()) {
SU->Latency = 1;
return;
}
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty()) {
- SU->Latency = 1;
+ if (!InstrItins || InstrItins->isEmpty()) {
+ if (N && N->isMachineOpcode() &&
+ TII->isHighLatencyDef(N->getMachineOpcode()))
+ SU->Latency = HighLatencyCycles;
+ else
+ SU->Latency = 1;
return;
}
-
+
// Compute the latency for the node. We use the sum of the latencies for
- // all nodes flagged together into this SUnit.
+ // all nodes glued together into this SUnit.
SU->Latency = 0;
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
- if (N->isMachineOpcode()) {
- SU->Latency += InstrItins.
- getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
- }
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
}
void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
@@ -451,32 +581,25 @@
if (ForceUnitLatencies())
return;
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty())
- return;
-
if (dep.getKind() != SDep::Data)
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
- if (Def->isMachineOpcode()) {
- const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
- if (DefIdx >= II.getNumDefs())
- return;
- int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
- if (DefCycle < 0)
- return;
- int UseCycle = 1;
- if (Use->isMachineOpcode()) {
- const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
- UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
- }
- if (UseCycle >= 0) {
- int Latency = DefCycle - UseCycle + 1;
- if (Latency >= 0)
- dep.setLatency(Latency);
- }
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
}
+ if (Latency >= 0)
+ dep.setLatency(Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
@@ -487,14 +610,14 @@
SU->getNode()->dump(DAG);
dbgs() << "\n";
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
dbgs() << " ";
- FlaggedNodes.back()->dump(DAG);
+ GluedNodes.back()->dump(DAG);
dbgs() << "\n";
- FlaggedNodes.pop_back();
+ GluedNodes.pop_back();
}
}
@@ -507,6 +630,35 @@
};
}
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ unsigned Order) {
+ if (!N->getHasDebugValue())
+ return;
+
+ // Opportunistically insert immediate dbg_value uses, i.e. those with source
+ // order number right after the N.
+ MachineBasicBlock *BB = Emitter.getBlock();
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
+ for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
+ if (DVs[i]->isInvalidated())
+ continue;
+ unsigned DVOrder = DVs[i]->getOrder();
+ if (!Order || DVOrder == ++Order) {
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+ if (DbgMI) {
+ Orders.push_back(std::make_pair(DVOrder, DbgMI));
+ BB->insert(InsertPos, DbgMI);
+ }
+ DVs[i]->setIsInvalidated();
+ }
+ }
+}
+
// ProcessSourceNode - Process nodes with source order numbers. These are added
// to a vector which EmitSchedule uses to determine how to insert dbg_value
// instructions in the right order.
@@ -516,8 +668,12 @@
SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
SmallSet<unsigned, 8> &Seen) {
unsigned Order = DAG->GetOrdering(N);
- if (!Order || !Seen.insert(Order))
+ if (!Order || !Seen.insert(Order)) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
return;
+ }
MachineBasicBlock *BB = Emitter.getBlock();
if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
@@ -527,25 +683,7 @@
}
Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
- if (!N->getHasDebugValue())
- return;
- // Opportunistically insert immediate dbg_value uses, i.e. those with source
- // order number right after the N.
- MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
- SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
- for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
- if (DVs[i]->isInvalidated())
- continue;
- unsigned DVOrder = DVs[i]->getOrder();
- if (DVOrder == ++Order) {
- MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
- if (DbgMI) {
- Orders.push_back(std::make_pair(DVOrder, DbgMI));
- BB->insert(InsertPos, DbgMI);
- }
- DVs[i]->setIsInvalidated();
- }
- }
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
@@ -578,25 +716,25 @@
}
// For pre-regalloc scheduling, create instructions corresponding to the
- // SDNode and any flagged SDNodes and append them to the block.
+ // SDNode and any glued SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
EmitPhysRegCopy(SU, CopyVRBaseMap);
continue;
}
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
- N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
- SDNode *N = FlaggedNodes.back();
- Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N;
+ N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
- FlaggedNodes.pop_back();
+ GluedNodes.pop_back();
}
Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
@@ -625,16 +763,8 @@
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
-#ifndef NDEBUG
- unsigned LastDIOrder = 0;
-#endif
for (; DI != DE &&
(*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
-#ifndef NDEBUG
- assert((*DI)->getOrder() >= LastDIOrder &&
- "SDDbgValue nodes must be in source order!");
- LastDIOrder = (*DI)->getOrder();
-#endif
if ((*DI)->isInvalidated())
continue;
MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 842fc8c..9c27b2e 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -20,13 +20,13 @@
namespace llvm {
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
- ///
+ ///
/// Edges between SUnits are initially based on edges in the SelectionDAG,
/// and additional edges can be added by the schedulers as heuristics.
/// SDNodes such as Constants, Registers, and a few others that are not
/// interesting to schedulers are not allocated SUnits.
///
- /// SDNodes with MVT::Flag operands are grouped along with the flagged
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
/// nodes into a single SUnit so that they are scheduled together.
///
/// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
@@ -36,6 +36,7 @@
class ScheduleDAGSDNodes : public ScheduleDAG {
public:
SelectionDAG *DAG; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
explicit ScheduleDAGSDNodes(MachineFunction &mf);
@@ -72,13 +73,23 @@
/// predecessors / successors info nor the temporary scheduling states.
///
SUnit *Clone(SUnit *N);
-
+
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
virtual void BuildSchedGraph(AliasAnalysis *AA);
+ /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
+ /// CopyToReg and its only active data operands are CopyFromReg within a
+ /// single block loop.
+ ///
+ void InitVRegCycleFlag(SUnit *SU);
+
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
/// ComputeLatency - Compute node latency.
///
virtual void ComputeLatency(SUnit *SU);
@@ -105,6 +116,38 @@
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ EVT ValueType;
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != NULL; }
+
+ EVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ const SDNode *GetNode() const {
+ return Node;
+ }
+
+ unsigned GetIdx() const {
+ return DefIdx-1;
+ }
+
+ void Advance();
+ private:
+ void InitNodeNumDefs();
+ };
+
private:
/// ClusterNeighboringLoads - Cluster loads from "near" addresses into
/// combined SUnits.
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8b77d7b..20bea8e 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetOptions.h"
@@ -44,7 +43,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -111,7 +110,7 @@
/// BUILD_VECTOR where all of the elements are ~0 or undef.
bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BIT_CONVERT)
+ if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -152,7 +151,7 @@
/// BUILD_VECTOR where all of the elements are 0 or undef.
bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BIT_CONVERT)
+ if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -199,6 +198,8 @@
if (N->getOperand(0).getOpcode() == ISD::UNDEF)
return false;
unsigned NumElems = N->getNumOperands();
+ if (NumElems == 1)
+ return false;
for (unsigned i = 1; i < NumElems; ++i) {
SDValue V = N->getOperand(i);
if (V.getOpcode() != ISD::UNDEF)
@@ -402,7 +403,7 @@
ID.AddInteger(CP->getAlignment());
ID.AddInteger(CP->getOffset());
if (CP->isMachineConstantPoolEntry())
- CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+ CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
else
ID.AddPointer(CP->getConstVal());
ID.AddInteger(CP->getTargetFlags());
@@ -431,7 +432,9 @@
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX: {
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: {
const AtomicSDNode *AT = cast<AtomicSDNode>(N);
ID.AddInteger(AT->getMemoryVT().getRawBits());
ID.AddInteger(AT->getRawSubclassData());
@@ -489,18 +492,19 @@
/// doNotCSE - Return true if CSE should not be performed for this node.
static bool doNotCSE(SDNode *N) {
- if (N->getValueType(0) == MVT::Flag)
+ if (N->getValueType(0) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
switch (N->getOpcode()) {
default: break;
case ISD::HANDLENODE:
+ case ISD::EH_LABEL:
return true; // Never CSE these nodes.
}
// Check that remaining values produced are not flags.
for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
- if (N->getValueType(i) == MVT::Flag)
+ if (N->getValueType(i) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
return false;
@@ -596,7 +600,7 @@
Ordering->remove(N);
// If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
- SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N);
+ ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
DbgVals[i]->setIsInvalidated();
}
@@ -608,9 +612,6 @@
bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
bool Erased = false;
switch (N->getOpcode()) {
- case ISD::EntryToken:
- llvm_unreachable("EntryToken should not be in CSEMaps!");
- return false;
case ISD::HANDLENODE: return false; // noop.
case ISD::CONDCODE:
assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
@@ -640,6 +641,8 @@
}
default:
// Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
Erased = CSEMap.RemoveNode(N);
break;
}
@@ -647,7 +650,7 @@
// Verify that the node was actually in one of the CSE maps, unless it has a
// flag result (which cannot be CSE'd) or is one of the special cases that are
// not subject to CSE.
- if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
!N->isMachineOpcode() && !doNotCSE(N)) {
N->dump(this);
dbgs() << "\n";
@@ -742,8 +745,9 @@
return Node;
}
-/// VerifyNode - Sanity check the given node. Aborts if it is invalid.
-void SelectionDAG::VerifyNode(SDNode *N) {
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
switch (N->getOpcode()) {
default:
break;
@@ -767,21 +771,62 @@
assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
"Wrong number of operands!");
EVT EltVT = N->getValueType(0).getVectorElementType();
- for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
assert((I->getValueType() == EltVT ||
(EltVT.isInteger() && I->getValueType().isInteger() &&
EltVT.bitsLE(I->getValueType()))) &&
"Wrong operand type!");
+ assert(I->getValueType() == N->getOperand(0).getValueType() &&
+ "Operands must all have the same type");
+ }
break;
}
}
}
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ // The SDNode allocators cannot be used to allocate nodes with fields that are
+ // not present in an SDNode!
+ assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+ assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+ assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+ assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+ assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+ assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+ assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+ assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+ assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+ assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+ assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+ assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+ assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+ assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+ assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+ assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+ assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+ assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+ assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+ VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+ // The MachineNode allocators cannot be used to allocate nodes with fields
+ // that are not present in a MachineNode!
+ // Currently there are no such nodes.
+
+ VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
/// getEVTAlignment - Compute the default alignment value for the
/// given type.
///
unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
- const Type *Ty = VT == MVT::iPTR ?
+ Type *Ty = VT == MVT::iPTR ?
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
VT.getTypeForEVT(*getContext());
@@ -836,6 +881,12 @@
DbgInfo->clear();
}
+SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ANY_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
@@ -885,13 +936,25 @@
assert(VT.isInteger() && "Cannot create FP integer constant!");
EVT EltVT = VT.getScalarType();
- assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
- "APInt size does not match type size!");
+ const ConstantInt *Elt = &Val;
+ // In some cases the vector type is legal but the element type is illegal and
+ // needs to be promoted, for example v8i8 on ARM. In this case, promote the
+ // inserted value (the type does not need to match the vector element type).
+ // Any extra bits introduced will be truncated away.
+ if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
+ }
+
+ assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
- ID.AddPointer(&Val);
+ ID.AddPointer(Elt);
void *IP = 0;
SDNode *N = NULL;
if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
@@ -899,7 +962,7 @@
return SDValue(N, 0);
if (!N) {
- N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT);
+ N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
}
@@ -1091,7 +1154,7 @@
AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
ID.AddInteger(Alignment);
ID.AddInteger(Offset);
- C->AddSelectionDAGCSEId(ID);
+ C->addSelectionDAGCSEId(ID);
ID.AddInteger(TargetFlags);
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
@@ -1306,6 +1369,22 @@
return SDValue(N, 0);
}
+SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1);
+ ID.AddPointer(Label);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
bool isTarget,
unsigned char TargetFlags) {
@@ -1348,11 +1427,11 @@
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
ID.AddPointer(MD);
-
+
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
-
+
SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -1362,9 +1441,9 @@
/// getShiftAmountOperand - Return the specified value casted to
/// the target's desired shift amount type.
-SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT OpTy = Op.getValueType();
- MVT ShTy = TLI.getShiftAmountTy();
+ MVT ShTy = TLI.getShiftAmountTy(LHSTy);
if (OpTy == ShTy || OpTy.isVector()) return Op;
ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1376,7 +1455,7 @@
SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
unsigned ByteSize = VT.getStoreSize();
- const Type *Ty = VT.getTypeForEVT(*getContext());
+ Type *Ty = VT.getTypeForEVT(*getContext());
unsigned StackAlign =
std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
@@ -1389,8 +1468,8 @@
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
VT2.getStoreSizeInBits())/8;
- const Type *Ty1 = VT1.getTypeForEVT(*getContext());
- const Type *Ty2 = VT2.getTypeForEVT(*getContext());
+ Type *Ty1 = VT1.getTypeForEVT(*getContext());
+ Type *Ty2 = VT2.getTypeForEVT(*getContext());
const TargetData *TD = TLI.getTargetData();
unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
TD->getPrefTypeAlignment(Ty2));
@@ -1596,7 +1675,7 @@
// Also compute a conserative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
- KnownOne.clear();
+ KnownOne.clearAllBits();
unsigned TrailZ = KnownZero.countTrailingOnes() +
KnownZero2.countTrailingOnes();
unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
@@ -1619,8 +1698,8 @@
AllOnes, KnownZero2, KnownOne2, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
- KnownOne2.clear();
- KnownZero2.clear();
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
ComputeMaskedBits(Op.getOperand(1),
AllOnes, KnownZero2, KnownOne2, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
@@ -1662,8 +1741,8 @@
// The boolean result conforms to getBooleanContents. Fall through.
case ISD::SETCC:
// If we know the result of a setcc has the top bits zero, use this info.
- if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&
- BitWidth > 1)
+ if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
+ TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1)
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
return;
case ISD::SHL:
@@ -1748,7 +1827,7 @@
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
- InSignBit.zext(BitWidth);
+ InSignBit = InSignBit.zext(BitWidth);
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
@@ -1775,7 +1854,7 @@
case ISD::CTPOP: {
unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
- KnownOne.clear();
+ KnownOne.clearAllBits();
return;
}
case ISD::LOAD: {
@@ -1791,13 +1870,12 @@
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask;
- InMask.trunc(InBits);
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
return;
}
@@ -1806,16 +1884,15 @@
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InSignBit = APInt::getSignBit(InBits);
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask;
- InMask.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
// If any of the sign extended bits are demanded, we know that the sign
// bit is demanded. Temporarily set this bit in the mask for our callee.
if (NewBits.getBoolValue())
InMask |= InSignBit;
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
// Note if the sign bit is known to be zero or one.
@@ -1827,13 +1904,12 @@
// If the sign bit wasn't actually demanded by our caller, we don't
// want it set in the KnownZero and KnownOne result values. Reset the
// mask and reapply it to the result values.
- InMask = Mask;
- InMask.trunc(InBits);
+ InMask = Mask.trunc(InBits);
KnownZero &= InMask;
KnownOne &= InMask;
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
// If the sign bit is known zero or one, the top bits match.
if (SignBitKnownZero)
@@ -1845,26 +1921,24 @@
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask;
- InMask.trunc(InBits);
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
return;
}
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask;
- InMask.zext(InBits);
- KnownZero.zext(InBits);
- KnownOne.zext(InBits);
+ APInt InMask = Mask.zext(InBits);
+ KnownZero = KnownZero.zext(InBits);
+ KnownOne = KnownOne.zext(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
break;
}
case ISD::AssertZext: {
@@ -1904,7 +1978,8 @@
}
}
// fall through
- case ISD::ADD: {
+ case ISD::ADD:
+ case ISD::ADDE: {
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
@@ -1919,7 +1994,17 @@
KnownZeroOut = std::min(KnownZeroOut,
KnownZero2.countTrailingOnes());
- KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ if (Op.getOpcode() == ISD::ADD) {
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear. We
+ // then return to the caller that the low bit is unknown but that other bits
+ // are known zero.
+ if (KnownZeroOut >= 2) // ADDE
+ KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
return;
}
case ISD::SREM:
@@ -1974,19 +2059,29 @@
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
- KnownOne.clear();
+ KnownOne.clearAllBits();
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
return;
}
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ if (unsigned Align = InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ return;
+ }
+ break;
+
default:
- // Allow the target to implement this method for its nodes.
- if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+ break;
+ // Fallthrough
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
- TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
- Depth);
- }
+ // Allow the target to implement this method for its nodes.
+ TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+ Depth);
return;
}
}
@@ -2017,12 +2112,7 @@
case ISD::Constant: {
const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
- // If negative, return # leading ones.
- if (Val.isNegative())
- return Val.countLeadingOnes();
-
- // Return # leading zeros.
- return Val.countLeadingZeros();
+ return Val.getNumSignBits();
}
case ISD::SIGN_EXTEND:
@@ -2086,7 +2176,7 @@
// The boolean result conforms to getBooleanContents. Fall through.
case ISD::SETCC:
// If setcc returns 0/-1, all bits are sign bits.
- if (TLI.getBooleanContents() ==
+ if (TLI.getBooleanContents(Op.getValueType().isVector()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
@@ -2217,6 +2307,25 @@
return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
}
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD. This handles the equivalence:
+/// X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0),
+ cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ return false;
+
+ return true;
+}
+
+
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
if (NoNaNsFPMath)
@@ -2237,6 +2346,13 @@
return !C->isZero();
// TODO: Recognize more cases here.
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return !C->isNullValue();
+ break;
+ }
return false;
}
@@ -2254,45 +2370,6 @@
return false;
}
-bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
- GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
- if (!GA) return false;
- if (GA->getOffset() != 0) return false;
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
- if (!GV) return false;
- return MF->getMMI().hasDebugInfo();
-}
-
-
-/// getShuffleScalarElt - Returns the scalar element that will make up the ith
-/// element of the result of the vector shuffle.
-SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
- unsigned i) {
- EVT VT = N->getValueType(0);
- if (N->getMaskElt(i) < 0)
- return getUNDEF(VT.getVectorElementType());
- unsigned Index = N->getMaskElt(i);
- unsigned NumElems = VT.getVectorNumElements();
- SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
- Index %= NumElems;
-
- if (V.getOpcode() == ISD::BIT_CONVERT) {
- V = V.getOperand(0);
- EVT VVT = V.getValueType();
- if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
- return SDValue();
- }
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return (Index == 0) ? V.getOperand(0)
- : getUNDEF(VT.getVectorElementType());
- if (V.getOpcode() == ISD::BUILD_VECTOR)
- return V.getOperand(Index);
- if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V))
- return getShuffleScalarElt(SVN, Index);
- return SDValue();
-}
-
-
/// getNode - Gets or creates the specified node.
///
SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
@@ -2307,7 +2384,7 @@
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -2320,23 +2397,22 @@
switch (Opcode) {
default: break;
case ISD::SIGN_EXTEND:
- return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::TRUNCATE:
- return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
- const uint64_t zero[] = {0, 0};
// No compile time operations on ppcf128.
if (VT == MVT::ppcf128) break;
- APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
return getConstantFP(apf, VT);
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(Val.bitsToFloat(), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
@@ -2384,10 +2460,10 @@
APFloat::rmTowardZero, &ignored);
if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
break;
- APInt api(VT.getSizeInBits(), 2, x);
+ APInt api(VT.getSizeInBits(), x);
return getConstant(api, VT);
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
@@ -2427,6 +2503,9 @@
"Vector element count mismatch!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // sext(undef) = 0, because the top bits will all be the same.
+ return getConstant(0, VT);
break;
case ISD::ZERO_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2441,6 +2520,9 @@
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
return getNode(ISD::ZERO_EXTEND, DL, VT,
Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // zext(undef) = 0, because the top bits will be zero.
+ return getConstant(0, VT);
break;
case ISD::ANY_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2457,6 +2539,8 @@
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
// (ext (trunx x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
@@ -2489,13 +2573,13 @@
return Operand.getNode()->getOperand(0);
}
break;
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
// Basic sanity checking.
assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
- && "Cannot BIT_CONVERT between types of different sizes!");
+ && "Cannot BITCAST between types of different sizes!");
if (VT == Operand.getValueType()) return Operand; // noop conversion.
- if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x)
- return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
@@ -2531,7 +2615,7 @@
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;
SDValue Ops[1] = { Operand };
AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
@@ -2547,7 +2631,7 @@
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -2688,6 +2772,13 @@
"Shift operators return type must be the same as their first arg");
assert(VT.isInteger() && N2.getValueType().isInteger() &&
"Shifts only work on integers");
+ // Verify that the shift amount VT is bit enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueType().getSizeInBits() >=
+ Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
// handle them. Since we know the size of the shift has to be less than the
@@ -2709,6 +2800,7 @@
EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
"Vector element counts must match in FP_ROUND_INREG");
assert(EVT.bitsLE(VT) && "Not rounding down!");
+ (void)EVT;
if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
break;
}
@@ -2816,6 +2908,7 @@
assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
assert(!N1.getValueType().isVector() && !VT.isVector() &&
(N1.getValueType().isInteger() == VT.isInteger()) &&
+ N1.getValueType() != VT &&
"Wrong types for EXTRACT_ELEMENT!");
// EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
@@ -2832,11 +2925,30 @@
return getConstant(ShiftedVal.trunc(ElementSize), VT);
}
break;
- case ISD::EXTRACT_SUBVECTOR:
- if (N1.getValueType() == VT) // Trivial extraction.
- return N1;
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Index = N2;
+ if (VT.isSimple() && N1.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Extract subvector VTs must be a vectors!");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Extract subvector must be from larger vector to smaller vector!");
+
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((VT.getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= N1.getValueType().getVectorNumElements())
+ && "Extract subvector overflow!");
+ }
+
+ // Trivial extraction.
+ if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+ return N1;
+ }
break;
}
+ }
if (N1C) {
if (N2C) {
@@ -2973,7 +3085,7 @@
// Memoize this node if possible.
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
@@ -2989,7 +3101,7 @@
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -3031,7 +3143,31 @@
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
break;
- case ISD::BIT_CONVERT:
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Index = N3;
+ if (VT.isSimple() && N1.getValueType().isSimple()
+ && N2.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ N2.getValueType().isVector() &&
+ "Insert subvector VTs must be a vectors");
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((N2.getValueType().getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= VT.getVectorNumElements())
+ && "Insert subvector overflow!");
+ }
+
+ // Trivial insertion.
+ if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+ return N2;
+ }
+ break;
+ }
+ case ISD::BITCAST:
// Fold bit_convert nodes from a type to themselves.
if (N1.getValueType() == VT)
return N1;
@@ -3041,7 +3177,7 @@
// Memoize node if it doesn't produce a flag.
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2, N3 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
@@ -3057,7 +3193,7 @@
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -3099,6 +3235,17 @@
&ArgChains[0], ArgChains.size());
}
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3107,27 +3254,18 @@
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = APInt(NumBits, C->getZExtValue() & 255);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- }
+ APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
if (VT.isInteger())
return DAG.getConstant(Val, VT);
return DAG.getConstantFP(APFloat(Val), VT);
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Value = DAG.getNode(ISD::OR, dl, VT,
- DAG.getNode(ISD::SHL, dl, VT, Value,
- DAG.getConstant(Shift,
- TLI.getShiftAmountTy())),
- Value);
- Shift <<= 1;
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = SplatByte(NumBits, 0x01);
+ Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
}
return Value;
@@ -3143,13 +3281,12 @@
if (Str.empty()) {
if (VT.isInteger())
return DAG.getConstant(0, VT);
- else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
- VT.getSimpleVT().SimpleTy == MVT::f64)
+ else if (VT == MVT::f32 || VT == MVT::f64)
return DAG.getConstantFP(0.0, VT);
else if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
EltVT, NumElts)));
} else
@@ -3214,13 +3351,13 @@
const TargetLowering &TLI) {
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
"Expecting memcpy / memset source to meet alignment requirement!");
- // If 'SrcAlign' is zero, that means the memory operation does not need load
- // the value, i.e. memset or memcpy from constant string. Otherwise, it's
- // the inferred alignment of the source. 'DstAlign', on the other hand, is the
- // specified alignment of the memory operation. If it is zero, that means
- // it's possible to change the alignment of the destination. 'MemcpyStrSrc'
- // indicates whether the memcpy source is constant so it does not need to be
- // loaded.
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
NonScalarIntSafe, MemcpyStrSrc,
DAG.getMachineFunction());
@@ -3246,15 +3383,6 @@
if (VT.bitsGT(LVT))
VT = LVT;
}
-
- // If we're optimizing for size, and there is a limit, bump the maximum number
- // of operations inserted down to 4. This is a wild guess that approximates
- // the size of a call to memcpy or memset (3 arguments + call).
- if (Limit != ~0U) {
- const Function *F = DAG.getMachineFunction().getFunction();
- if (F->hasFnAttr(Attribute::OptimizeForSize))
- Limit = 4;
- }
unsigned NumMemOps = 0;
while (Size != 0) {
@@ -3288,18 +3416,22 @@
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Turn a memcpy of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
// Expand memcpy to a series of load and store ops if the size operand falls
// below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3309,8 +3441,8 @@
std::string Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
-
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroStr ? 0 : SrcAlign),
@@ -3318,7 +3450,7 @@
return SDValue();
if (DstAlignCanChange) {
- const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
@@ -3346,7 +3478,8 @@
Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, Align);
+ DstPtrInfo.getWithOffset(DstOff), isVol,
+ false, Align);
} else {
// The type might not be legal for the target. This should only happen
// if the type is smaller than a legal type, as on PPC, so the right
@@ -3355,14 +3488,14 @@
// FIXME does the case above also need this?
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
assert(NVT.bitsGE(VT));
- Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain,
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
+ SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
MinAlign(SrcAlign, SrcOff));
Store = DAG.getTruncStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, VT, isVol, false,
- Align);
+ DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+ false, Align);
}
OutChains.push_back(Store);
SrcOff += VTSize;
@@ -3378,8 +3511,8 @@
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Turn a memmove of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
@@ -3389,14 +3522,16 @@
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
@@ -3404,7 +3539,7 @@
return SDValue();
if (DstAlignCanChange) {
- const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
@@ -3426,7 +3561,8 @@
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign);
+ SrcPtrInfo.getWithOffset(SrcOff), isVol,
+ false, SrcAlign);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -3441,7 +3577,7 @@
Store = DAG.getStore(Chain, dl, LoadValues[i],
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, Align);
+ DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -3454,7 +3590,7 @@
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff) {
+ MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
@@ -3464,19 +3600,21 @@
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool NonScalarIntSafe =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
- if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
NonScalarIntSafe, false, DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
- const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
@@ -3489,15 +3627,34 @@
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
- unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, 0);
+ DstPtrInfo.getWithOffset(DstOff),
+ isVol, false, Align);
OutChains.push_back(Store);
- DstOff += VTSize;
+ DstOff += VT.getSizeInBits() / 8;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3507,8 +3664,8 @@
SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol, bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3520,7 +3677,7 @@
SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(),Align,
- isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ isVol, false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -3530,7 +3687,7 @@
SDValue Result =
TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
isVol, AlwaysInline,
- DstSV, DstSVOff, SrcSV, SrcSVOff);
+ DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -3540,7 +3697,7 @@
assert(ConstantSize && "AlwaysInline requires a constant size!");
return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
- true, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ true, DstPtrInfo, SrcPtrInfo);
}
// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
@@ -3571,8 +3728,8 @@
SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3585,7 +3742,7 @@
SDValue Result =
getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
- false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -3594,7 +3751,7 @@
// code. If the target chooses to do this, this is the next best.
SDValue Result =
TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstSV, DstSVOff, SrcSV, SrcSVOff);
+ DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -3623,7 +3780,7 @@
SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff) {
+ MachinePointerInfo DstPtrInfo) {
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3635,7 +3792,7 @@
SDValue Result =
getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
- Align, isVol, DstSV, DstSVOff);
+ Align, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
@@ -3645,12 +3802,12 @@
// code. If the target chooses to do this, this is the next best.
SDValue Result =
TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstSV, DstSVOff);
+ DstPtrInfo);
if (Result.getNode())
return Result;
- // Emit a library call.
- const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+ // Emit a library call.
+ Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -3681,36 +3838,35 @@
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
- SDValue Chain,
- SDValue Ptr, SDValue Cmp,
- SDValue Swp, const Value* PtrVal,
- unsigned Alignment) {
+ SDValue Chain, SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachinePointerInfo PtrInfo,
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
- // Check if the memory reference references a frame index
- if (!PtrVal)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
// For now, atomics are considered to be volatile always.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrVal, Flags, 0,
- MemVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
- return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
+ Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
SDValue Chain,
SDValue Ptr, SDValue Cmp,
- SDValue Swp, MachineMemOperand *MMO) {
+ SDValue Swp, MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
@@ -3727,7 +3883,8 @@
return SDValue(E, 0);
}
SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
- Ptr, Cmp, Swp, MMO);
+ Ptr, Cmp, Swp, MMO, Ordering,
+ SynchScope);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -3737,33 +3894,39 @@
SDValue Chain,
SDValue Ptr, SDValue Val,
const Value* PtrVal,
- unsigned Alignment) {
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
- // Check if the memory reference references a frame index
- if (!PtrVal)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
MachineFunction &MF = getMachineFunction();
- unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ // A monotonic store does not load; a release store "loads" in the sense
+ // that other stores cannot be sunk past it.
+ // (An atomicrmw obviously both loads and stores.)
+ unsigned Flags = MachineMemOperand::MOStore;
+ if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic)
+ Flags |= MachineMemOperand::MOLoad;
// For now, atomics are considered to be volatile always.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrVal, Flags, 0,
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
MemVT.getStoreSize(), Alignment);
- return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO,
+ Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
SDValue Chain,
SDValue Ptr, SDValue Val,
- MachineMemOperand *MMO) {
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
Opcode == ISD::ATOMIC_LOAD_AND ||
@@ -3774,12 +3937,14 @@
Opcode == ISD::ATOMIC_LOAD_MAX ||
Opcode == ISD::ATOMIC_LOAD_UMIN ||
Opcode == ISD::ATOMIC_LOAD_UMAX ||
- Opcode == ISD::ATOMIC_SWAP) &&
+ Opcode == ISD::ATOMIC_SWAP ||
+ Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
EVT VT = Val.getValueType();
- SDVTList VTs = getVTList(VT, MVT::Other);
+ SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
+ getVTList(VT, MVT::Other);
FoldingSetNodeID ID;
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Val};
@@ -3790,14 +3955,69 @@
return SDValue(E, 0);
}
SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
- Ptr, Val, MMO);
+ Ptr, Val, MMO,
+ Ordering, SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ EVT VT, SDValue Chain,
+ SDValue Ptr,
+ const Value* PtrVal,
+ unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ // A monotonic load does not store; an acquire load "stores" in the sense
+ // that other loads cannot be hoisted past it.
+ unsigned Flags = MachineMemOperand::MOLoad;
+ if (Ordering > Monotonic)
+ Flags |= MachineMemOperand::MOStore;
+
+ // For now, atomics are considered to be volatile always.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ Flags |= MachineMemOperand::MOVolatile;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+ EVT VT, SDValue Chain,
+ SDValue Ptr,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+ Ptr, MMO, Ordering, SynchScope);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
-/// Allowed to return something different (and simpler) if Simplify is true.
SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
DebugLoc dl) {
if (NumOps == 1)
@@ -3815,18 +4035,18 @@
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
const EVT *VTs, unsigned NumVTs,
const SDValue *Ops, unsigned NumOps,
- EVT MemVT, const Value *srcValue, int SVOff,
+ EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
bool ReadMem, bool WriteMem) {
return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
- MemVT, srcValue, SVOff, Align, Vol,
+ MemVT, PtrInfo, Align, Vol,
ReadMem, WriteMem);
}
SDValue
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
const SDValue *Ops, unsigned NumOps,
- EVT MemVT, const Value *srcValue, int SVOff,
+ EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
bool ReadMem, bool WriteMem) {
if (Align == 0) // Ensure that codegen never sees alignment 0
@@ -3841,8 +4061,7 @@
if (Vol)
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(srcValue, Flags, SVOff,
- MemVT.getStoreSize(), Align);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
}
@@ -3853,13 +4072,14 @@
EVT MemVT, MachineMemOperand *MMO) {
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
(Opcode <= INT_MAX &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
// Memoize the node unless it returns a flag.
MemIntrinsicSDNode *N;
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -3879,36 +4099,72 @@
return SDValue(N, 0);
}
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return MachinePointerInfo();
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(FI, Offset+
+ cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.getOpcode() == ISD::UNDEF)
+ return InferPointerInfo(Ptr);
+ return MachinePointerInfo();
+}
+
+
SDValue
SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
- const Value *SV, int SVOffset, EVT MemVT,
+ MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(VT);
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr, Offset);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset,
- MemVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+ TBAAInfo);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset, EVT MemVT,
MachineMemOperand *MMO) {
@@ -3955,25 +4211,26 @@
SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
- const Value *SV, int SVOffset,
+ MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
- SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
+ PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
}
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
SDValue Chain, SDValue Ptr,
- const Value *SV,
- int SVOffset, EVT MemVT,
+ MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
+ PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
}
+
SDValue
SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM) {
@@ -3981,39 +4238,42 @@
assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
"Load is already a indexed load!");
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
- LD->getChain(), Base, Offset, LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
}
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
- SDValue Ptr, const Value *SV, int SVOffset,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(Val.getValueType());
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset,
- Val.getValueType().getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags,
+ Val.getValueType().getStoreSize(), Alignment,
+ TBAAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
SDValue Ptr, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);
SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -4036,27 +4296,28 @@
}
SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
- SDValue Ptr, const Value *SV,
- int SVOffset, EVT SVT,
- bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT,bool isVolatile, bool isNonTemporal,
+ unsigned Alignment,
+ const MDNode *TBAAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(SVT);
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+ TBAAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -4066,6 +4327,8 @@
MachineMemOperand *MMO) {
EVT VT = Val.getValueType();
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
if (VT == SVT)
return getStore(Chain, dl, Val, Ptr, MMO);
@@ -4182,7 +4445,7 @@
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
void *IP = 0;
@@ -4198,7 +4461,7 @@
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -4248,7 +4511,7 @@
// Memoize the node unless it returns a flag.
SDNode *N;
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -4280,7 +4543,7 @@
}
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -4657,7 +4920,7 @@
unsigned NumOps) {
// If an identical node already exists, use it.
void *IP = 0;
- if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
@@ -4857,9 +5120,9 @@
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
const SDValue *Ops, unsigned NumOps) {
- bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
MachineSDNode *N;
- void *IP;
+ void *IP = 0;
if (DoCSE) {
FoldingSetNodeID ID;
@@ -4888,7 +5151,7 @@
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifyMachineNode(N);
#endif
return N;
}
@@ -4919,7 +5182,7 @@
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
const SDValue *Ops, unsigned NumOps) {
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -5352,6 +5615,29 @@
SD->setHasDebugValue(true);
}
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+ if (From == To || !From.getNode()->getHasDebugValue())
+ return;
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
+ I != E; ++I) {
+ SDDbgValue *Dbg = *I;
+ if (Dbg->getKind() == SDDbgValue::SDNODE) {
+ SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+ Dbg->getOffset(), Dbg->getDebugLoc(),
+ Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+ }
+ }
+ for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+ E = ClonedDVs.end(); I != E; ++I)
+ AddDbgValue(*I, ToNode, false);
+}
+
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
@@ -5379,7 +5665,7 @@
}
MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
- const SDValue *Ops, unsigned NumOps, EVT memvt,
+ const SDValue *Ops, unsigned NumOps, EVT memvt,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs, Ops, NumOps),
MemoryVT(memvt), MMO(mmo) {
@@ -5398,7 +5684,7 @@
namespace {
struct EVTArray {
std::vector<EVT> VTs;
-
+
EVTArray() {
VTs.reserve(MVT::LAST_VALUETYPE);
for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
@@ -5418,7 +5704,7 @@
sys::SmartScopedLock<true> Lock(*VTMutex);
return &(*EVTs->insert(VT).first);
} else {
- assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+ assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
"Value type out of range!");
return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
}
@@ -5490,9 +5776,9 @@
/// reachesChainWithoutSideEffects - Return true if this operand (which must
/// be a chain) reaches the specified operand without crossing any
-/// side-effecting instructions. In practice, this looks through token
-/// factors and non-volatile loads. In order to remain efficient, this only
-/// looks a couple of nodes in, it does not do an exhaustive search.
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
unsigned Depth) const {
if (*this == Dest) return true;
@@ -5502,12 +5788,12 @@
if (Depth == 0) return false;
// If this is a token factor, all inputs to the TF happen in parallel. If any
- // of the operands of the TF reach dest, then we can do the xform.
+ // of the operands of the TF does not reach dest, then we cannot do the xform.
if (getOpcode() == ISD::TokenFactor) {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
- return true;
- return false;
+ if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return false;
+ return true;
}
// Loads don't have side effects, look through them.
@@ -5518,24 +5804,39 @@
return false;
}
-/// isPredecessorOf - Return true if this node is a predecessor of N. This node
-/// is either an operand of N or it can be reached by traversing up the operands.
-/// NOTE: this is an expensive method. Use it carefully.
-bool SDNode::isPredecessorOf(SDNode *N) const {
- SmallPtrSet<SDNode *, 32> Visited;
- SmallVector<SDNode *, 16> Worklist;
- Worklist.push_back(N);
+/// hasPredecessor - Return true if N is a predecessor of this node.
+/// N is either an operand of this node, or can be reached by recursively
+/// traversing up the operands.
+/// NOTE: This is an expensive method. Use it carefully.
+bool SDNode::hasPredecessor(const SDNode *N) const {
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ return hasPredecessorHelper(N, Visited, Worklist);
+}
- do {
- N = Worklist.pop_back_val();
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- SDNode *Op = N->getOperand(i).getNode();
- if (Op == this)
- return true;
+bool SDNode::hasPredecessorHelper(const SDNode *N,
+ SmallPtrSet<const SDNode *, 32> &Visited,
+ SmallVector<const SDNode *, 16> &Worklist) const {
+ if (Visited.empty()) {
+ Worklist.push_back(this);
+ } else {
+ // Take a look in the visited set. If we've already encountered this node
+ // we needn't search further.
+ if (Visited.count(N))
+ return true;
+ }
+
+ // Haven't visited N yet. Continue the search.
+ while (!Worklist.empty()) {
+ const SDNode *M = Worklist.pop_back_val();
+ for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+ SDNode *Op = M->getOperand(i).getNode();
if (Visited.insert(Op))
Worklist.push_back(Op);
+ if (Op == N)
+ return true;
}
- } while (!Worklist.empty());
+ }
return false;
}
@@ -5571,6 +5872,7 @@
#endif
case ISD::PREFETCH: return "Prefetch";
case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
case ISD::ATOMIC_SWAP: return "AtomicSwap";
case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
@@ -5583,6 +5885,8 @@
case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
case ISD::SRCVALUE: return "SrcValue";
@@ -5606,7 +5910,13 @@
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
case ISD::LSDAADDR: return "LSDAADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
case ISD::ConstantPool: return "ConstantPool";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
@@ -5637,6 +5947,8 @@
case ISD::CopyFromReg: return "CopyFromReg";
case ISD::UNDEF: return "undef";
case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
case ISD::HANDLENODE: return "handlenode";
// Unary operators
@@ -5682,6 +5994,7 @@
case ISD::FSUB: return "fsub";
case ISD::FMUL: return "fmul";
case ISD::FDIV: return "fdiv";
+ case ISD::FMA: return "fma";
case ISD::FREM: return "frem";
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
@@ -5689,12 +6002,13 @@
case ISD::FPOWI: return "fpowi";
case ISD::SETCC: return "setcc";
- case ISD::VSETCC: return "vsetcc";
case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
@@ -5728,7 +6042,7 @@
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
- case ISD::BIT_CONVERT: return "bit_convert";
+ case ISD::BITCAST: return "bitcast";
case ISD::FP16_TO_FP32: return "fp16_to_fp32";
case ISD::FP32_TO_FP16: return "fp32_to_fp16";
@@ -5777,7 +6091,8 @@
case ISD::CTLZ: return "ctlz";
// Trampolines
- case ISD::TRAMPOLINE: return "trampoline";
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
case ISD::CONDCODE:
switch (cast<CondCodeSDNode>(this)->get()) {
@@ -5940,12 +6255,7 @@
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- if (G && R->getReg() &&
- TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
- OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg());
- } else {
- OS << " %reg" << R->getReg();
- }
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
OS << "'" << ES->getSymbol() << "'";
@@ -5991,7 +6301,7 @@
const char *AM = getIndexedModeName(ST->getAddressingMode());
if (*AM)
OS << ", " << AM;
-
+
OS << ">";
} else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
OS << "<" << *M->getMemOperand() << ">";
@@ -6042,8 +6352,7 @@
static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
const SelectionDAG *G, unsigned depth,
- unsigned indent)
-{
+ unsigned indent) {
if (depth == 0)
return;
@@ -6055,6 +6364,9 @@
return;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ // Don't follow chain operands.
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ continue;
OS << '\n';
printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
}
@@ -6063,11 +6375,11 @@
void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
unsigned depth) const {
printrWithDepthHelper(OS, this, G, depth, 0);
-}
+}
void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
// Don't print impossibly deep things.
- printrWithDepth(OS, G, 100);
+ printrWithDepth(OS, G, 10);
}
void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
@@ -6076,8 +6388,8 @@
void SDNode::dumprFull(const SelectionDAG *G) const {
// Don't print impossibly deep things.
- dumprWithDepth(G, 100);
-}
+ dumprWithDepth(G, 10);
+}
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
@@ -6122,7 +6434,7 @@
Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
OperandEltVT,
Operand,
- getConstant(i, MVT::i32));
+ getConstant(i, TLI.getPointerTy()));
} else {
// A scalar operand; just use it as is.
Operands[j] = Operand;
@@ -6134,13 +6446,18 @@
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
&Operands[0], Operands.size()));
break;
+ case ISD::VSELECT:
+ Scalars.push_back(getNode(ISD::SELECT, dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
- getShiftAmountOperand(Operands[1])));
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
break;
case ISD::SIGN_EXTEND_INREG:
case ISD::FP_ROUND_INREG: {
@@ -6161,10 +6478,10 @@
}
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
-/// location that is 'Dist' units away from the location that the 'Base' load
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
/// is loading from.
-bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
unsigned Bytes, int Dist) const {
if (LD->getChain() != Base->getChain())
return false;
@@ -6185,11 +6502,11 @@
if (FS != BFS || FS != (int)Bytes) return false;
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
- if (V && (V->getSExtValue() == Dist*Bytes))
- return true;
- }
+
+ // Handle X+C
+ if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
const GlobalValue *GV1 = NULL;
const GlobalValue *GV2 = NULL;
@@ -6220,6 +6537,8 @@
Align = TD->getPreferredAlignment(GVar);
}
}
+ if (!Align)
+ Align = TLI.getTargetData()->getABITypeAlignment(GV->getType());
}
return MinAlign(Align, GVOffset);
}
@@ -6230,15 +6549,14 @@
int64_t FrameOffset = 0;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
FrameIdx = FI->getIndex();
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ } else if (isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
FrameOffset = Ptr.getConstantOperandVal(1);
}
if (FrameIdx != (1 << 31)) {
- // FIXME: Handle FI+CST.
const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
@@ -6322,7 +6640,7 @@
}
-const Type *ConstantPoolSDNode::getType() const {
+Type *ConstantPoolSDNode::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
return Val.ConstVal->getType();
@@ -6359,7 +6677,7 @@
if (OpVal.getOpcode() == ISD::UNDEF)
SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
- SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+ SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
zextOrTrunc(sz) << BitPos;
else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
@@ -6374,10 +6692,10 @@
while (sz > 8) {
unsigned HalfSize = sz / 2;
- APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
- APInt LowValue = APInt(SplatValue).trunc(HalfSize);
- APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
- APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+ APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatValue.trunc(HalfSize);
+ APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = SplatUndef.trunc(HalfSize);
// If the two halves do not match (ignoring undef bits), stop here.
if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
@@ -6417,7 +6735,7 @@
// If this node has already been checked, don't check it again.
if (Checked.count(N))
return;
-
+
// If a node has already been visited on this depth-first walk, reject it as
// a cycle.
if (!Visited.insert(N)) {
@@ -6426,10 +6744,10 @@
errs() << "Detected cycle in SelectionDAG\n";
abort();
}
-
+
for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
-
+
Checked.insert(N);
Visited.erase(N);
}
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index aa6df1d..095b400 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,6 +15,7 @@
#include "SDNodeDbgValue.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -43,14 +44,12 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -70,22 +69,45 @@
cl::location(LimitFloatPrecision),
cl::init(0));
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static const unsigned MaxParallelChains = 64;
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ EVT PartVT, EVT ValueVT);
+
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger then ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts,
unsigned NumParts, EVT PartVT, EVT ValueVT,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ if (ValueVT.isVector())
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
if (NumParts > 1) {
// Assemble the value from multiple parts.
- if (!ValueVT.isVector() && ValueVT.isInteger()) {
+ if (ValueVT.isInteger()) {
unsigned PartBits = PartVT.getSizeInBits();
unsigned ValueBits = ValueVT.getSizeInBits();
@@ -100,25 +122,25 @@
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
if (RoundParts > 2) {
- Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
+ Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
PartVT, HalfVT);
- Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT);
} else {
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
}
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
if (RoundParts < NumParts) {
// Assemble the trailing non-power-of-2 part.
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
- Hi = getCopyFromParts(DAG, dl,
+ Hi = getCopyFromParts(DAG, DL,
Parts + RoundParts, OddParts, PartVT, OddVT);
// Combine the round and odd parts.
@@ -126,68 +148,29 @@
if (TLI.isBigEndian())
std::swap(Lo, Hi);
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
- Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
DAG.getConstant(Lo.getValueType().getSizeInBits(),
TLI.getPointerTy()));
- Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
- Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
}
- } else if (ValueVT.isVector()) {
- // Handle a multi-element vector.
- EVT IntermediateVT, RegisterVT;
- unsigned NumIntermediates;
- unsigned NumRegs =
- TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
- NumIntermediates, RegisterVT);
- assert(NumRegs == NumParts
- && "Part count doesn't match vector breakdown!");
- NumParts = NumRegs; // Silence a compiler warning.
- assert(RegisterVT == PartVT
- && "Part type doesn't match vector breakdown!");
- assert(RegisterVT == Parts[0].getValueType() &&
- "Part type doesn't match part!");
-
- // Assemble the parts into intermediate operands.
- SmallVector<SDValue, 8> Ops(NumIntermediates);
- if (NumIntermediates == NumParts) {
- // If the register was not expanded, truncate or copy the value,
- // as appropriate.
- for (unsigned i = 0; i != NumParts; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
- PartVT, IntermediateVT);
- } else if (NumParts > 0) {
- // If the intermediate type was expanded, build the intermediate
- // operands from the parts.
- assert(NumParts % NumIntermediates == 0 &&
- "Must expand into a divisible number of parts!");
- unsigned Factor = NumParts / NumIntermediates;
- for (unsigned i = 0; i != NumIntermediates; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
- PartVT, IntermediateVT);
- }
-
- // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
- // intermediate operands.
- Val = DAG.getNode(IntermediateVT.isVector() ?
- ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
- ValueVT, &Ops[0], NumIntermediates);
} else if (PartVT.isFloatingPoint()) {
// FP split into multiple FP parts (for ppcf128)
assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
"Unexpected split");
SDValue Lo, Hi;
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
} else {
// FP split into integer parts (soft fp)
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
}
}
@@ -197,183 +180,315 @@
if (PartVT == ValueVT)
return Val;
- if (PartVT.isVector()) {
- assert(ValueVT.isVector() && "Unknown vector conversion!");
- return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
- }
-
- if (ValueVT.isVector()) {
- assert(ValueVT.getVectorElementType() == PartVT &&
- ValueVT.getVectorNumElements() == 1 &&
- "Only trivial scalar-to-vector conversions should get here!");
- return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
- }
-
- if (PartVT.isInteger() &&
- ValueVT.isInteger()) {
+ if (PartVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartVT)) {
// For a truncate, see if we have any information to
// indicate whether the truncated bits will always be
// zero or sign-extension.
if (AssertOp != ISD::DELETED_NODE)
- Val = DAG.getNode(AssertOp, dl, PartVT, Val,
+ Val = DAG.getNode(AssertOp, DL, PartVT, Val,
DAG.getValueType(ValueVT));
- return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
- } else {
- return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
}
if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
- if (ValueVT.bitsLT(Val.getValueType())) {
- // FP_ROUND's are always exact here.
- return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
+ // FP_ROUND's are always exact here.
+ if (ValueVT.bitsLT(Val.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
DAG.getIntPtrConstant(1));
- }
- return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
+ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
- return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
return SDValue();
}
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ EVT PartVT, EVT ValueVT) {
+ assert(ValueVT.isVector() && "Not a vector value");
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ // Handle a multi-element vector.
+ if (NumParts > 1) {
+ EVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+ PartVT, IntermediateVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ?
+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
+ ValueVT, &Ops[0], NumIntermediates);
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ PartVT = Val.getValueType();
+
+ if (PartVT == ValueVT)
+ return Val;
+
+ if (PartVT.isVector()) {
+ // If the element type of the source/dest vectors are the same, but the
+ // parts vector has more elements than the value vector, then we have a
+ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
+ // elements we want.
+ if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ "Cannot narrow, it would be a lossy transformation");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+ "Cannot handle this kind of promotion");
+ // Promoted vector extract
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT, Val);
+
+ }
+
+ // Trivial bitcast if the types are the same size and the destination
+ // vector type is legal.
+ if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+ TLI.isTypeLegal(ValueVT))
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle cases such as i8 -> <1 x i1>
+ assert(ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial scalar-to-vector conversions should get here!");
+
+ if (ValueVT.getVectorNumElements() == 1 &&
+ ValueVT.getVectorElementType() != PartVT) {
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT.getScalarType(), Val);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+
+
+
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT);
+
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
EVT PartVT,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT PtrVT = TLI.getPointerTy();
EVT ValueVT = Val.getValueType();
+
+ // Handle the vector case separately.
+ if (ValueVT.isVector())
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
- if (!NumParts)
+ if (NumParts == 0)
return;
- if (!ValueVT.isVector()) {
- if (PartVT == ValueVT) {
- assert(NumParts == 1 && "No-op copy with multiple parts!");
- Parts[0] = Val;
- return;
- }
-
- if (NumParts * PartBits > ValueVT.getSizeInBits()) {
- // If the parts cover more bits than the value has, promote the value.
- if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
- assert(NumParts == 1 && "Do not know what to promote to!");
- Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
- } else if (PartVT.isInteger() && ValueVT.isInteger()) {
- ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
- } else {
- llvm_unreachable("Unknown mismatch!");
- }
- } else if (PartBits == ValueVT.getSizeInBits()) {
- // Different types of the same size.
- assert(NumParts == 1 && PartVT != ValueVT);
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
- } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
- // If the parts cover less bits than value has, truncate the value.
- if (PartVT.isInteger() && ValueVT.isInteger()) {
- ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
- } else {
- llvm_unreachable("Unknown mismatch!");
- }
- }
-
- // The value may have changed - recompute ValueVT.
- ValueVT = Val.getValueType();
- assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
- "Failed to tile the value with PartVT!");
-
- if (NumParts == 1) {
- assert(PartVT == ValueVT && "Type conversion failed!");
- Parts[0] = Val;
- return;
- }
-
- // Expand the value into multiple parts.
- if (NumParts & (NumParts - 1)) {
- // The number of parts is not a power of 2. Split off and copy the tail.
- assert(PartVT.isInteger() && ValueVT.isInteger() &&
- "Do not know what to expand to!");
- unsigned RoundParts = 1 << Log2_32(NumParts);
- unsigned RoundBits = RoundParts * PartBits;
- unsigned OddParts = NumParts - RoundParts;
- SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
- DAG.getConstant(RoundBits,
- TLI.getPointerTy()));
- getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
- OddParts, PartVT);
-
- if (TLI.isBigEndian())
- // The odd parts were reversed by getCopyToParts - unreverse them.
- std::reverse(Parts + RoundParts, Parts + NumParts);
-
- NumParts = RoundParts;
- ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
- }
-
- // The number of parts is a power of 2. Repeatedly bisect the value using
- // EXTRACT_ELEMENT.
- Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
- EVT::getIntegerVT(*DAG.getContext(),
- ValueVT.getSizeInBits()),
- Val);
-
- for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
- for (unsigned i = 0; i < NumParts; i += StepSize) {
- unsigned ThisBits = StepSize * PartBits / 2;
- EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
- SDValue &Part0 = Parts[i];
- SDValue &Part1 = Parts[i+StepSize/2];
-
- Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- ThisVT, Part0,
- DAG.getConstant(1, PtrVT));
- Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- ThisVT, Part0,
- DAG.getConstant(0, PtrVT));
-
- if (ThisBits == PartBits && ThisVT != PartVT) {
- Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
- PartVT, Part0);
- Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
- PartVT, Part1);
- }
- }
- }
-
- if (TLI.isBigEndian())
- std::reverse(Parts, Parts + OrigNumParts);
-
+ assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+ if (PartVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
return;
}
- // Vector ValueVT.
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else {
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartVT != ValueVT);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
if (NumParts == 1) {
- if (PartVT != ValueVT) {
- if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
- } else {
- assert(ValueVT.getVectorElementType() == PartVT &&
- ValueVT.getVectorNumElements() == 1 &&
- "Only trivial vector-to-scalar conversions should get here!");
- Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- PartVT, Val,
- DAG.getConstant(0, PtrVT));
+ assert(PartVT == ValueVT && "Type conversion failed!");
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(RoundBits));
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+
+ if (TLI.isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(1));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(0));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
}
}
+ }
+
+ if (TLI.isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+}
+
+
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT) {
+ EVT ValueVT = Val.getValueType();
+ assert(ValueVT.isVector() && "Not a vector");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (NumParts == 1) {
+ if (PartVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (PartVT.isVector() &&
+ PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
+ PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+ for (unsigned i = ValueVT.getVectorNumElements(),
+ e = PartVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ElementVT));
+
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+
+ //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+ //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else if (PartVT.isVector() &&
+ PartVT.getVectorElementType().bitsGE(
+ ValueVT.getVectorElementType()) &&
+ PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+ // Promoted vector extract
+ bool Smaller = PartVT.bitsLE(ValueVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, PartVT, Val);
+ } else{
+ // Vector -> scalar conversion.
+ assert(ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ PartVT, Val, DAG.getIntPtrConstant(0));
+
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, PartVT, Val);
+ }
Parts[0] = Val;
return;
@@ -383,7 +498,8 @@
EVT IntermediateVT, RegisterVT;
unsigned NumIntermediates;
unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
- IntermediateVT, NumIntermediates, RegisterVT);
+ IntermediateVT,
+ NumIntermediates, RegisterVT);
unsigned NumElements = ValueVT.getVectorNumElements();
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
@@ -394,14 +510,12 @@
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
if (IntermediateVT.isVector())
- Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
IntermediateVT, Val,
- DAG.getConstant(i * (NumElements / NumIntermediates),
- PtrVT));
+ DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
else
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- IntermediateVT, Val,
- DAG.getConstant(i, PtrVT));
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntermediateVT, Val, DAG.getIntPtrConstant(i));
}
// Split the intermediate operands into legal parts.
@@ -409,7 +523,7 @@
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -417,10 +531,13 @@
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
}
}
+
+
+
namespace {
/// RegsForValue - This struct represents the registers (physical or virtual)
/// that a particular set of values is assigned, and the type information
@@ -460,13 +577,8 @@
EVT regvt, EVT valuevt)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
- RegsForValue(const SmallVector<unsigned, 4> ®s,
- const SmallVector<EVT, 4> ®vts,
- const SmallVector<EVT, 4> &valuevts)
- : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
-
RegsForValue(LLVMContext &Context, const TargetLowering &tli,
- unsigned Reg, const Type *Ty) {
+ unsigned Reg, Type *Ty) {
ComputeValueVTs(tli, Ty, ValueVTs);
for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
@@ -556,48 +668,50 @@
}
Chain = P.getValue(1);
+ Parts[i] = P;
// If the source register was virtual and if we know something about it,
// add an assert node.
- if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
- RegisterVT.isInteger() && !RegisterVT.isVector()) {
- unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
- if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
- const FunctionLoweringInfo::LiveOutInfo &LOI =
- FuncInfo.LiveOutRegInfo[SlotNo];
+ if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ !RegisterVT.isInteger() || RegisterVT.isVector())
+ continue;
- unsigned RegSize = RegisterVT.getSizeInBits();
- unsigned NumSignBits = LOI.NumSignBits;
- unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+ const FunctionLoweringInfo::LiveOutInfo *LOI =
+ FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+ if (!LOI)
+ continue;
- // FIXME: We capture more information than the dag can represent. For
- // now, just use the tightest assertzext/assertsext possible.
- bool isSExt = true;
- EVT FromVT(MVT::Other);
- if (NumSignBits == RegSize)
- isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
- else if (NumZeroBits >= RegSize-1)
- isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
- else if (NumSignBits > RegSize-8)
- isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
- else if (NumZeroBits >= RegSize-8)
- isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
- else if (NumSignBits > RegSize-16)
- isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
- else if (NumZeroBits >= RegSize-16)
- isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
- else if (NumSignBits > RegSize-32)
- isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
- else if (NumZeroBits >= RegSize-32)
- isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI->NumSignBits;
+ unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
- if (FromVT != MVT::Other)
- P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
- RegisterVT, P, DAG.getValueType(FromVT));
- }
- }
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+ else
+ continue;
- Parts[i] = P;
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
}
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
@@ -627,8 +741,7 @@
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
EVT RegisterVT = RegVTs[Value];
- getCopyToParts(DAG, dl,
- Val.getValue(Val.getResNo() + Value),
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
&Parts[Part], NumParts, RegisterVT);
Part += NumParts;
}
@@ -675,6 +788,18 @@
unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
if (HasMatching)
Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ else if (!Regs.empty() &&
+ TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+ // Put the register class of the virtual registers in the flag word. That
+ // way, later passes can recompute register class constraints for inline
+ // assembly as well as normal instructions.
+ // Don't do this for tied operands that can use the regclass information
+ // from the def.
+ const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+
SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
Ops.push_back(Res);
@@ -688,9 +813,11 @@
}
}
-void SelectionDAGBuilder::init(AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
AA = &aa;
+ GFI = gfi;
TD = DAG.getTarget().getTargetData();
+ LPadToCallSiteMap.clear();
}
/// clear - Clear out the current SelectionDAG and the associated
@@ -708,6 +835,16 @@
HasTailCall = false;
}
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is seperated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+ DanglingDebugInfoMap.clear();
+}
+
/// getRoot - Return the current virtual root of the Selection DAG,
/// flushing any PendingLoad items. This must be done before emitting
/// a store or any other node that may need to be ordered after any
@@ -808,7 +945,31 @@
}
}
-// getValue - Return an SDValue for the given Value.
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+ SDValue Val) {
+ DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+ if (DDI.getDI()) {
+ const DbgValueInst *DI = DDI.getDI();
+ DebugLoc dl = DDI.getdl();
+ unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+ MDNode *Variable = DI->getVariable();
+ uint64_t Offset = DI->getOffset();
+ SDDbgValue *SDV;
+ if (Val.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+ SDV = DAG.getDbgValue(Variable, Val.getNode(),
+ Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, Val.getNode(), false);
+ }
+ } else
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DanglingDebugInfoMap[V] = DanglingDebugInfo();
+ }
+}
+
+/// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If we already have an SDValue for this value, use it. It's important
// to do this first, so that we don't create a CopyFromReg if we already
@@ -823,13 +984,15 @@
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ resolveDanglingDebugInfo(V, N);
+ return N;
}
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
-
+ resolveDanglingDebugInfo(V, Val);
return Val;
}
@@ -843,7 +1006,7 @@
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
-
+ resolveDanglingDebugInfo(V, Val);
return Val;
}
@@ -919,7 +1082,7 @@
if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
return DAG.getBlockAddress(BA, VT);
- const VectorType *VecTy = cast<VectorType>(V->getType());
+ VectorType *VecTy = cast<VectorType>(V->getType());
unsigned NumElements = VecTy->getNumElements();
// Now that we know the number and type of the elements, get that number of
@@ -991,14 +1154,15 @@
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
- EVT PtrVT = PtrValueVTs[0];
for (unsigned i = 0; i != NumValues; ++i) {
- SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
- DAG.getConstant(Offsets[i], PtrVT));
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ RetPtr.getValueType(), RetPtr,
+ DAG.getIntPtrConstant(Offsets[i]));
Chains[i] =
DAG.getStore(Chain, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
- Add, NULL, Offsets[i], false, false, 0);
+ // FIXME: better loc info would be nice.
+ Add, MachinePointerInfo(), false, false, 0);
}
Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -1020,15 +1184,8 @@
else if (F->paramHasAttr(0, Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
- // FIXME: C calling convention requires the return type to be promoted
- // to at least 32-bit. But this is not necessary for non-C calling
- // conventions. The frontend should mark functions whose return values
- // require promoting with signext or zeroext attributes.
- if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
- if (VT.bitsLT(MinVT))
- VT = MinVT;
- }
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+ VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
@@ -1043,9 +1200,9 @@
Flags.setInReg();
// Propagate extension type if any
- if (F->paramHasAttr(0, Attribute::SExt))
+ if (ExtendKind == ISD::SIGN_EXTEND)
Flags.setSExt();
- else if (F->paramHasAttr(0, Attribute::ZExt))
+ else if (ExtendKind == ISD::ZERO_EXTEND)
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i) {
@@ -1075,6 +1232,10 @@
/// created for it, emit nodes to copy the value into the virtual
/// registers.
void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ return;
+
DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
assert(!V->use_empty() && "Unused value assigned virtual registers!");
@@ -1123,6 +1284,26 @@
return true;
}
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) {
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (!BPI)
+ return 0;
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
+ return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::
+addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ uint32_t Weight /* = 0 */) {
+ if (!Weight)
+ Weight = getEdgeWeight(Src, Dst);
+ Src->addSuccessor(Dst, Weight);
+}
+
+
static bool InBlock(const Value *V, const BasicBlock *BB) {
if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() == BB;
@@ -1257,7 +1438,7 @@
if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
return false;
}
-
+
return true;
}
@@ -1293,6 +1474,7 @@
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive, this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
@@ -1307,7 +1489,8 @@
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (BOp->hasOneUse() &&
+ if (!TLI.isJumpExpensive() &&
+ BOp->hasOneUse() &&
(BOp->getOpcode() == Instruction::And ||
BOp->getOpcode() == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
@@ -1390,8 +1573,8 @@
}
// Update successor info
- SwitchBB->addSuccessor(CB.TrueBB);
- SwitchBB->addSuccessor(CB.FalseBB);
+ addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1412,10 +1595,11 @@
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
- // Insert the false branch.
- if (CB.FalseBB != NextBlock)
- BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
- DAG.getBasicBlock(CB.FalseBB));
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
DAG.setRoot(BrCond);
}
@@ -1502,12 +1686,28 @@
Sub, DAG.getConstant(B.Range, VT),
ISD::SETUGT);
- SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
- TLI.getPointerTy());
+ // Determine the type of the test operands.
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT))
+ UsePtrType = true;
+ else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ if (UsePtrType) {
+ VT = TLI.getPointerTy();
+ Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+ }
- B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
+ B.RegVT = VT;
+ B.Reg = FuncInfo.CreateReg(VT);
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
- B.Reg, ShiftOp);
+ B.Reg, Sub);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1518,8 +1718,8 @@
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- SwitchBB->addSuccessor(B.Default);
- SwitchBB->addSuccessor(MBB);
+ addSuccessorWithWeight(SwitchBB, B.Default);
+ addSuccessorWithWeight(SwitchBB, MBB);
SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
MVT::Other, CopyTo, RangeCmp,
@@ -1533,41 +1733,47 @@
}
/// visitBitTestCase - this function produces one "bit test"
-void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
- SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
- TLI.getPointerTy());
+ EVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ Reg, VT);
SDValue Cmp;
- if (CountPopulation_64(B.Mask) == 1) {
+ unsigned PopCount = CountPopulation_64(B.Mask);
+ if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
Cmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(ShiftOp.getValueType()),
+ TLI.getSetCCResultType(VT),
ShiftOp,
- DAG.getConstant(CountTrailingZeros_64(B.Mask),
- TLI.getPointerTy()),
+ DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
ISD::SETEQ);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(VT),
+ ShiftOp,
+ DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+ ISD::SETNE);
} else {
// Make desired shift
- SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
- TLI.getPointerTy(),
- DAG.getConstant(1, TLI.getPointerTy()),
- ShiftOp);
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+ DAG.getConstant(1, VT), ShiftOp);
// Emit bit tests and jumps
SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
- TLI.getPointerTy(), SwitchVal,
- DAG.getConstant(B.Mask, TLI.getPointerTy()));
+ VT, SwitchVal, DAG.getConstant(B.Mask, VT));
Cmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(AndOp.getValueType()),
- AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+ TLI.getSetCCResultType(VT),
+ AndOp, DAG.getConstant(0, VT),
ISD::SETNE);
}
- SwitchBB->addSuccessor(B.TargetBB);
- SwitchBB->addSuccessor(NextMBB);
+ addSuccessorWithWeight(SwitchBB, B.TargetBB);
+ addSuccessorWithWeight(SwitchBB, NextMBB);
SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
MVT::Other, getControlRoot(),
@@ -1587,6 +1793,79 @@
DAG.setRoot(BrAnd);
}
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+ MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
+
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ const Value *Callee(I.getCalledValue());
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(&I);
+ else
+ LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ CopyToExportRegsIfNeeded(&I);
+
+ // Update successor info
+ InvokeMBB->addSuccessor(Return);
+ InvokeMBB->addSuccessor(LandingPad);
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
+}
+
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to landingpad not in landing pad!");
+
+ MachineBasicBlock *MBB = FuncInfo.MBB;
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ AddLandingPadInfo(LP, MMI, MBB);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+
+ // Insert the EXCEPTIONADDR instruction.
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[2];
+ Ops[0] = DAG.getRoot();
+ SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1);
+ SDValue Chain = Op1.getValue(1);
+
+ // Insert the EHSELECTION instruction.
+ VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ Ops[0] = Op1;
+ Ops[1] = Chain;
+ SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2);
+ Chain = Op2.getValue(1);
+ Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32);
+
+ Ops[0] = Op1;
+ Ops[1] = Op2;
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Ops[0], 2);
+
+ std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
+ setValue(&LP, RetPair.first);
+ DAG.setRoot(RetPair.second);
+}
+
/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
/// small case ranges).
bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
@@ -1612,10 +1891,56 @@
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
- // TODO: If any two of the cases has the same destination, and if one value
+ // If any two of the cases has the same destination, and if one value
// is the same as the other, but has one bit unset that the other has set,
// use bit manipulation to do two compares at once. For example:
// "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+ // TODO: Handle cases where CR.CaseBB != SwitchBB.
+ if (Size == 2 && CR.CaseBB == SwitchBB) {
+ Case &Small = *CR.Range.first;
+ Case &Big = *(CR.Range.second-1);
+
+ if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+ const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+ const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+ // Check that there is only one bit different.
+ if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+ (SmallValue | BigValue) == BigValue) {
+ // Isolate the common bit.
+ APInt CommonBit = BigValue & ~SmallValue;
+ assert((SmallValue | CommonBit) == BigValue &&
+ CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+ SDValue CondLHS = getValue(SV);
+ EVT VT = CondLHS.getValueType();
+ DebugLoc DL = getCurDebugLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, VT));
+ SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+ Or, DAG.getConstant(BigValue, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ addSuccessorWithWeight(SwitchBB, Small.BB);
+ addSuccessorWithWeight(SwitchBB, Default);
+
+ // Insert the true branch.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+ getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.BB));
+
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(Default));
+
+ DAG.setRoot(BrCond);
+ return true;
+ }
+ }
+ }
// Rearrange the case blocks so that the last one falls through if possible.
if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
@@ -1656,7 +1981,11 @@
CC = ISD::SETLE;
LHS = I->Low; MHS = SV; RHS = I->High;
}
- CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+ uint32_t ExtraWeight = I->ExtraWeight;
+ CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
+ /* me */ CurBlock,
+ /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
// If emitting the first comparison, just call visitSwitchCase to emit the
// code into the current block. Otherwise, push the CaseBlock onto the
@@ -1680,17 +2009,16 @@
}
static APInt ComputeRange(const APInt &First, const APInt &Last) {
- APInt LastExt(Last), FirstExt(First);
uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
- LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+ APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
return (LastExt - FirstExt + 1ULL);
}
/// handleJTSwitchCase - Emit jumptable for current switch case range
-bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
- CaseRecVector& WorkList,
- const Value* SV,
- MachineBasicBlock* Default,
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
+ CaseRecVector &WorkList,
+ const Value *SV,
+ MachineBasicBlock *Default,
MachineBasicBlock *SwitchBB) {
Case& FrontCase = *CR.Range.first;
Case& BackCase = *(CR.Range.second-1);
@@ -1699,22 +2027,24 @@
const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
APInt TSize(First.getBitWidth(), 0);
- for (CaseItr I = CR.Range.first, E = CR.Range.second;
- I!=E; ++I)
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
TSize += I->size();
if (!areJTsAllowed(TLI) || TSize.ult(4))
return false;
APInt Range = ComputeRange(First, Last);
- double Density = TSize.roundToDouble() / Range.roundToDouble();
- if (Density < 0.4)
+ // The density is TSize / Range. Require at least 40%.
+ // It should not be possible for IntTSize to saturate for sane code, but make
+ // sure we handle Range saturation correctly.
+ uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
+ uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
+ if (IntTSize * 10 < IntRange * 4)
return false;
DEBUG(dbgs() << "Lowering jump table\n"
<< "First entry: " << First << ". Last entry: " << Last << '\n'
- << "Range: " << Range
- << "Size: " << TSize << ". Density: " << Density << "\n\n");
+ << "Range: " << Range << ". Size: " << TSize << ".\n\n");
// Get the MachineFunction which holds the current MBB. This is used when
// inserting any additional MBBs necessary to represent the switch.
@@ -1732,8 +2062,9 @@
// table.
MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
CurMF->insert(BBI, JumpTableBB);
- CR.CaseBB->addSuccessor(Default);
- CR.CaseBB->addSuccessor(JumpTableBB);
+
+ addSuccessorWithWeight(CR.CaseBB, Default);
+ addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
// Build a vector of destination BBs, corresponding to each target
// of the jump table. If the value of the jump table slot corresponds to
@@ -1760,7 +2091,7 @@
E = DestBBs.end(); I != E; ++I) {
if (!SuccsHandled[(*I)->getNumber()]) {
SuccsHandled[(*I)->getNumber()] = true;
- JumpTableBB->addSuccessor(*I);
+ addSuccessorWithWeight(JumpTableBB, *I);
}
}
@@ -1777,7 +2108,6 @@
visitJumpTableHeader(JT, JTH, SwitchBB);
JTCases.push_back(JumpTableBlock(JTH, JT));
-
return true;
}
@@ -1827,9 +2157,13 @@
APInt Range = ComputeRange(LEnd, RBegin);
assert((Range - 2ULL).isNonNegative() &&
"Invalid case distance");
- double LDensity = (double)LSize.roundToDouble() /
+ // Use volatile double here to avoid excess precision issues on some hosts,
+ // e.g. that use 80-bit X87 registers.
+ volatile double LDensity =
+ (double)LSize.roundToDouble() /
(LEnd - First + 1ULL).roundToDouble();
- double RDensity = (double)RSize.roundToDouble() /
+ volatile double RDensity =
+ (double)RSize.roundToDouble() /
(Last - RBegin + 1ULL).roundToDouble();
double Metric = Range.logBase2()*(LDensity+RDensity);
// Should always split in some non-trivial place
@@ -2031,7 +2365,7 @@
}
BitTestBlock BTB(lowBound, cmpRange, SV,
- -1U, (CR.CaseBB == SwitchBB),
+ -1U, MVT::Other, (CR.CaseBB == SwitchBB),
CR.CaseBB, Default, BTC);
if (CR.CaseBB == SwitchBB)
@@ -2047,12 +2381,17 @@
const SwitchInst& SI) {
size_t numCmps = 0;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
- MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+ BasicBlock *SuccBB = SI.getSuccessor(i);
+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
+
+ uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
+
Cases.push_back(Case(SI.getSuccessorValue(i),
SI.getSuccessorValue(i),
- SMBB));
+ SMBB, ExtraWeight));
}
std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -2060,7 +2399,8 @@
if (Cases.size() >= 2)
// Must recompute end() each iteration because it may be
// invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+ for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+ J != Cases.end(); ) {
const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
MachineBasicBlock* nextBB = J->BB;
@@ -2071,6 +2411,16 @@
if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
I->High = J->High;
J = Cases.erase(J);
+
+ if (BranchProbabilityInfo *BPI = FuncInfo.BPI) {
+ uint32_t CurWeight = currentBB->getBasicBlock() ?
+ BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16;
+ uint32_t NextWeight = nextBB->getBasicBlock() ?
+ BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16;
+
+ BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(),
+ CurWeight + NextWeight);
+ }
} else {
I = J++;
}
@@ -2085,6 +2435,19 @@
return numCmps;
}
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+ if (JTCases[i].first.HeaderBB == First)
+ JTCases[i].first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+ if (BitTestCases[i].Parent == First)
+ BitTestCases[i].Parent = Last;
+}
+
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
@@ -2094,7 +2457,7 @@
// If there is only the default destination, branch to it if it is not the
// next basic block. Otherwise, just fall through.
- if (SI.getNumOperands() == 2) {
+ if (SI.getNumCases() == 1) {
// Update machine-CFG edges.
// If this is not a fall-through branch, emit the branch.
@@ -2114,12 +2477,12 @@
size_t numCmps = Clusterify(Cases, SI);
DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
<< ". Total compares: " << numCmps << '\n');
- numCmps = 0;
+ (void)numCmps;
// Get the Value to be switched on and default basic blocks, which will be
// inserted into CaseBlock records, representing basic blocks in the binary
// search tree.
- const Value *SV = SI.getOperand(0);
+ const Value *SV = SI.getCondition();
// Push the initial CaseRec onto the worklist
CaseRecVector WorkList;
@@ -2161,8 +2524,10 @@
succs.push_back(I.getSuccessor(i));
array_pod_sort(succs.begin(), succs.end());
succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
- for (unsigned i = 0, e = succs.size(); i != e; ++i)
- IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+ for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+ addSuccessorWithWeight(IndirectBrMBB, Succ);
+ }
DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
MVT::Other, getControlRoot(),
@@ -2171,31 +2536,15 @@
void SelectionDAGBuilder::visitFSub(const User &I) {
// -0.0 - X --> fneg
- const Type *Ty = I.getType();
- if (Ty->isVectorTy()) {
- if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
- const VectorType *DestTy = cast<VectorType>(I.getType());
- const Type *ElTy = DestTy->getElementType();
- unsigned VL = DestTy->getNumElements();
- std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
- Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
- if (CV == CNZ) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
- Op2.getValueType(), Op2));
- return;
- }
- }
+ Type *Ty = I.getType();
+ if (isa<Constant>(I.getOperand(0)) &&
+ I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
}
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
- if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
- Op2.getValueType(), Op2));
- return;
- }
-
visitBinary(I, ISD::FSUB);
}
@@ -2209,37 +2558,51 @@
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- if (!I.getType()->isVectorTy() &&
- Op2.getValueType() != TLI.getShiftAmountTy()) {
+
+ MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+
+ // Coerce the shift amount to the right type if we can.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ unsigned ShiftSize = ShiftTy.getSizeInBits();
+ unsigned Op2Size = Op2.getValueType().getSizeInBits();
+ DebugLoc DL = getCurDebugLoc();
+
// If the operand is smaller than the shift count type, promote it.
- EVT PTy = TLI.getPointerTy();
- EVT STy = TLI.getShiftAmountTy();
- if (STy.bitsGT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
- TLI.getShiftAmountTy(), Op2);
+ if (ShiftSize > Op2Size)
+ Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
// If the operand is larger than the shift count type but the shift
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
- else if (STy.getSizeInBits() >=
- Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
- Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
- TLI.getShiftAmountTy(), Op2);
- // Otherwise we'll need to temporarily settle for some other
- // convenient type; type legalization will make adjustments as
- // needed.
- else if (PTy.bitsLT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
- TLI.getPointerTy(), Op2);
- else if (PTy.bitsGT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
- TLI.getPointerTy(), Op2);
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+ // Otherwise we'll need to temporarily settle for some other convenient
+ // type. Type legalization will make adjustments once the shiftee is split.
+ else
+ Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
}
setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
Op1.getValueType(), Op1, Op2));
}
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ // Turn exact SDivs into multiplications.
+ // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+ // exact bit.
+ if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+ !isa<ConstantSDNode>(Op1) &&
+ isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+ setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+ else
+ setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+ Op1, Op2));
+}
+
void SelectionDAGBuilder::visitICmp(const User &I) {
ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
@@ -2277,10 +2640,12 @@
SDValue Cond = getValue(I.getOperand(0));
SDValue TrueVal = getValue(I.getOperand(1));
SDValue FalseVal = getValue(I.getOperand(2));
+ ISD::NodeType OpCode = Cond.getValueType().isVector() ?
+ ISD::VSELECT : ISD::SELECT;
for (unsigned i = 0; i != NumValues; ++i)
- Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
- TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+ Values[i] = DAG.getNode(OpCode, getCurDebugLoc(),
+ TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
Cond,
SDValue(TrueVal.getNode(),
TrueVal.getResNo() + i),
@@ -2379,9 +2744,9 @@
EVT DestVT = TLI.getValueType(I.getType());
// BitCast assures us that source and destination are the same size so this is
- // either a BIT_CONVERT or a no-op.
+ // either a BITCAST or a no-op.
if (DestVT != N.getValueType())
- setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
DestVT, N)); // convert types.
else
setValue(&I, N); // noop cast.
@@ -2493,7 +2858,8 @@
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle. The analysis is done by calculating
// the range of elements the mask access on both vectors.
- int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+ int MinRange[2] = { static_cast<int>(SrcNumElts+1),
+ static_cast<int>(SrcNumElts+1)};
int MaxRange[2] = {-1, -1};
for (unsigned i = 0; i != MaskNumElts; ++i) {
@@ -2530,7 +2896,7 @@
} else {
StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts < SrcNumElts)
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
RangeUse[Input] = 1; // Extract from a multiple of the mask length.
}
}
@@ -2601,13 +2967,12 @@
void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
const Value *Op0 = I.getOperand(0);
const Value *Op1 = I.getOperand(1);
- const Type *AggTy = I.getType();
- const Type *ValTy = Op1->getType();
+ Type *AggTy = I.getType();
+ Type *ValTy = Op1->getType();
bool IntoUndef = isa<UndefValue>(Op0);
bool FromUndef = isa<UndefValue>(Op1);
- unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
- I.idx_begin(), I.idx_end());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
SmallVector<EVT, 4> AggValueVTs;
ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2619,16 +2984,18 @@
SmallVector<SDValue, 4> Values(NumAggValues);
SDValue Agg = getValue(Op0);
- SDValue Val = getValue(Op1);
unsigned i = 0;
// Copy the beginning value(s) from the original aggregate.
for (; i != LinearIndex; ++i)
Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
SDValue(Agg.getNode(), Agg.getResNo() + i);
// Copy values from the inserted value(s).
- for (; i != LinearIndex + NumValValues; ++i)
- Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
- SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ if (NumValValues) {
+ SDValue Val = getValue(Op1);
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ }
// Copy remaining value(s) from the original aggregate.
for (; i != NumAggValues; ++i)
Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
@@ -2641,17 +3008,23 @@
void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
const Value *Op0 = I.getOperand(0);
- const Type *AggTy = Op0->getType();
- const Type *ValTy = I.getType();
+ Type *AggTy = Op0->getType();
+ Type *ValTy = I.getType();
bool OutOfUndef = isa<UndefValue>(Op0);
- unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
- I.idx_begin(), I.idx_end());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
SmallVector<EVT, 4> ValValueVTs;
ComputeValueVTs(TLI, ValTy, ValValueVTs);
unsigned NumValValues = ValValueVTs.size();
+
+ // Ignore a extractvalue that produces an empty object
+ if (!NumValValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
SmallVector<SDValue, 4> Values(NumValValues);
SDValue Agg = getValue(Op0);
@@ -2669,12 +3042,12 @@
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue N = getValue(I.getOperand(0));
- const Type *Ty = I.getOperand(0)->getType();
+ Type *Ty = I.getOperand(0)->getType();
for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
OI != E; ++OI) {
const Value *Idx = *OI;
- if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ if (StructType *StTy = dyn_cast<StructType>(Ty)) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
@@ -2684,11 +3057,6 @@
}
Ty = StTy->getElementType(Field);
- } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
- unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
-
- // Offset canonically 0 for unions, but type changes
- Ty = UnTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
@@ -2750,7 +3118,7 @@
if (FuncInfo.StaticAllocaMap.count(&I))
return; // getValue will auto-populate this.
- const Type *Ty = I.getAllocatedType();
+ Type *Ty = I.getAllocatedType();
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
unsigned Align =
std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
@@ -2769,7 +3137,7 @@
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Align <= StackAlign)
Align = 0;
@@ -2797,14 +3165,18 @@
}
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+ if (I.isAtomic())
+ return visitAtomicLoad(I);
+
const Value *SV = I.getOperand(0);
SDValue Ptr = getValue(SV);
- const Type *Ty = I.getType();
+ Type *Ty = I.getType();
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
@@ -2815,10 +3187,11 @@
SDValue Root;
bool ConstantMemory = false;
- if (I.isVolatile())
+ if (I.isVolatile() || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(SV)) {
+ else if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -2828,23 +3201,38 @@
}
SmallVector<SDValue, 4> Values(NumValues);
- SmallVector<SDValue, 4> Chains(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
EVT PtrVT = Ptr.getValueType();
- for (unsigned i = 0; i != NumValues; ++i) {
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
PtrVT, Ptr,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
- A, SV, Offsets[i], isVolatile,
- isNonTemporal, Alignment);
+ A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+ isNonTemporal, Alignment, TBAAInfo);
Values[i] = L;
- Chains[i] = L.getValue(1);
+ Chains[ChainI] = L.getValue(1);
}
if (!ConstantMemory) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
- MVT::Other, &Chains[0], NumValues);
+ MVT::Other, &Chains[0], ChainI);
if (isVolatile)
DAG.setRoot(Chain);
else
@@ -2857,6 +3245,9 @@
}
void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+ if (I.isAtomic())
+ return visitAtomicStore(I);
+
const Value *SrcV = I.getOperand(0);
const Value *PtrV = I.getOperand(1);
@@ -2874,23 +3265,210 @@
SDValue Ptr = getValue(PtrV);
SDValue Root = getRoot();
- SmallVector<SDValue, 4> Chains(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
EVT PtrVT = Ptr.getValueType();
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
- for (unsigned i = 0; i != NumValues; ++i) {
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
DAG.getConstant(Offsets[i], PtrVT));
- Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
- SDValue(Src.getNode(), Src.getResNo() + i),
- Add, PtrV, Offsets[i], isVolatile,
- isNonTemporal, Alignment);
+ SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ Add, MachinePointerInfo(PtrV, Offsets[i]),
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ Chains[ChainI] = St;
}
- DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
- MVT::Other, &Chains[0], NumValues));
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ ++SDNodeOrder;
+ AssignOrderingToNode(StoreNode.getNode());
+ DAG.setRoot(StoreNode);
+}
+
+static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
+ SynchronizationScope Scope,
+ bool Before, DebugLoc dl,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // Fence, if necessary
+ if (Before) {
+ if (Order == AcquireRelease || Order == SequentiallyConsistent)
+ Order = Release;
+ else if (Order == Acquire || Order == Monotonic)
+ return Chain;
+ } else {
+ if (Order == AcquireRelease)
+ Order = Acquire;
+ else if (Order == Release || Order == Monotonic)
+ return Chain;
+ }
+ SDValue Ops[3];
+ Ops[0] = Chain;
+ Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
+ return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
+}
+
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+ getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()),
+ MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ ISD::NodeType NT;
+ switch (I.getOperation()) {
+ default: llvm_unreachable("Unknown atomicrmw operation"); return;
+ case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+ case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
+ case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
+ case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
+ case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+ case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
+ case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
+ case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
+ case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
+ case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+ case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+ }
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue L =
+ DAG.getAtomic(NT, dl,
+ getValue(I.getValOperand()).getValueType().getSimpleVT(),
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValOperand()),
+ I.getPointerOperand(), 0 /* Alignment */,
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+ DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ EVT VT = EVT::getEVT(I.getType());
+
+ if (I.getAlignment() * 8 < VT.getSizeInBits())
+ report_fatal_error("Cannot generate unaligned atomic load");
+
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+ getValue(I.getPointerOperand()),
+ I.getPointerOperand(), I.getAlignment(),
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+ DebugLoc dl = getCurDebugLoc();
+
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+
+ if (I.getAlignment() * 8 < VT.getSizeInBits())
+ report_fatal_error("Cannot generate unaligned atomic store");
+
+ if (TLI.getInsertFencesForAtomic())
+ InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+ DAG, TLI);
+
+ SDValue OutChain =
+ DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValueOperand()),
+ I.getPointerOperand(), I.getAlignment(),
+ TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
+
+ if (TLI.getInsertFencesForAtomic())
+ OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+ DAG, TLI);
+
+ DAG.setRoot(OutChain);
}
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
@@ -2916,7 +3494,8 @@
bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
- if (!IsTgtIntrinsic)
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
@@ -2947,7 +3526,8 @@
// This is target intrinsic that touches memory
Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
VTs, &Ops[0], Ops.size(),
- Info.memVT, Info.ptrVal, Info.offset,
+ Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
Info.align, Info.vol,
Info.readMem, Info.writeMem);
} else if (!HasChain) {
@@ -2970,9 +3550,9 @@
}
if (!I.getType()->isVoidTy()) {
- if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
EVT VT = TLI.getValueType(PTy);
- Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+ Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
}
setValue(&I, Result);
@@ -2991,7 +3571,7 @@
DAG.getConstant(0x007fffff, MVT::i32));
SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
DAG.getConstant(0x3f800000, MVT::i32));
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
}
/// GetExponent - Get the exponent:
@@ -3017,25 +3597,6 @@
return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
}
-/// Inlined utility function to implement binary input atomic intrinsics for
-/// visitIntrinsicCall: I is a call instruction
-/// Op is the associated NodeType for I
-const char *
-SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
- ISD::NodeType Op) {
- SDValue Root = getRoot();
- SDValue L =
- DAG.getAtomic(Op, getCurDebugLoc(),
- getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
- Root,
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)),
- I.getArgOperand(0));
- setValue(&I, L);
- DAG.setRoot(L.getValue(1));
- return 0;
-}
-
// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
const char *
SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
@@ -3090,13 +3651,13 @@
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
// Add the exponent into the result in integer domain.
SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
@@ -3116,13 +3677,13 @@
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
// Add the exponent into the result in integer domain.
SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
@@ -3154,14 +3715,14 @@
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
MVT::i32, t13);
// Add the exponent into the result in integer domain.
SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
}
} else {
// No special expansion.
@@ -3183,7 +3744,7 @@
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log(2) [0.69314718f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3293,7 +3854,7 @@
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Get the exponent.
SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
@@ -3402,7 +3963,7 @@
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3530,11 +4091,11 @@
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
@@ -3555,11 +4116,11 @@
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
@@ -3591,11 +4152,11 @@
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
}
} else {
@@ -3663,11 +4224,11 @@
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
@@ -3688,11 +4249,11 @@
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
@@ -3724,11 +4285,11 @@
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
}
} else {
@@ -3795,10 +4356,96 @@
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+ if (N.getOpcode() != ISD::TRUNCATE)
+ return 0;
+
+ const SDValue &Ext = N.getOperand(0);
+ if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+ const SDValue &CFR = Ext.getOperand(0);
+ if (CFR.getOpcode() == ISD::CopyFromReg)
+ return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+ else
+ if (CFR.getOpcode() == ISD::TRUNCATE)
+ return getTruncatedArgReg(CFR);
+ }
+ return 0;
+}
+
+/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
+/// argument, create the corresponding DBG_VALUE machine instruction for it now.
+/// At the end of instruction selection, they will be inserted to the entry BB.
+bool
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset,
+ const SDValue &N) {
+ const Argument *Arg = dyn_cast<Argument>(V);
+ if (!Arg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
+ // Ignore inlined function arguments here.
+ DIVariable DV(Variable);
+ if (DV.isInlinedFnArgument(MF.getFunction()))
+ return false;
+
+ unsigned Reg = 0;
+ // Some arguments' frame index is recorded during argument lowering.
+ Offset = FuncInfo.getArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI->getFrameRegister(MF);
+
+ if (!Reg && N.getNode()) {
+ if (N.getOpcode() == ISD::CopyFromReg)
+ Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ else
+ Reg = getTruncatedArgReg(N);
+ if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+ if (PR)
+ Reg = PR;
+ }
+ }
+
+ if (!Reg) {
+ // Check if ValueMap has reg number.
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end())
+ Reg = VMI->second;
+ }
+
+ if (!Reg && N.getNode()) {
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FINode->getIndex();
+ }
+ }
+
+ if (!Reg)
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
+ TII->get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
+ FuncInfo.ArgDbgValues.push_back(&*MIB);
+ return true;
+}
+
// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
#endif
/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
@@ -3814,6 +4461,9 @@
// By default, turn this into a target intrinsic node.
visitTargetIntrinsic(I, Intrinsic);
return 0;
+ case Intrinsic::vastart: visitVAStart(I); return 0;
+ case Intrinsic::vaend: visitVAEnd(I); return 0;
+ case Intrinsic::vacopy: visitVACopy(I); return 0;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
getValue(I.getArgOperand(0))));
@@ -3840,7 +4490,8 @@
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
- I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
return 0;
}
case Intrinsic::memset: {
@@ -3855,7 +4506,7 @@
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getArgOperand(0), 0));
+ MachinePointerInfo(I.getArgOperand(0))));
return 0;
}
case Intrinsic::memmove: {
@@ -3871,22 +4522,323 @@
SDValue Op3 = getValue(I.getArgOperand(2));
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
+ return 0;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ MDNode *Variable = DI.getVariable();
+ const Value *Address = DI.getAddress();
+ if (!Address || !DIVariable(Variable).Verify())
+ return 0;
- // If the source and destination are known to not be aliases, we can
- // lower memmove as memcpy.
- uint64_t Size = -1ULL;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
- Size = C->getZExtValue();
- if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
- AliasAnalysis::NoAlias) {
- DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- false, I.getArgOperand(0), 0,
- I.getArgOperand(1), 0));
+ // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
+ // but do not always have a corresponding SDNode built. The SDNodeOrder
+ // absolute, but not relative, values are different depending on whether
+ // debug info exists.
+ ++SDNodeOrder;
+
+ // Check if address has undef value.
+ if (isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
}
- DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+ SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
+ SDDbgValue *SDV;
+ if (N.getNode()) {
+ // Parameters are handled specially.
+ bool isParameter =
+ DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+
+ if (isParameter && !AI) {
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (FINode)
+ // Byval parameter. We have a frame index at this point.
+ SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
+ 0, dl, SDNodeOrder);
+ else {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, 0, N);
+ return 0;
+ }
+ } else if (AI)
+ SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+ 0, dl, SDNodeOrder);
+ else {
+ // Can't do anything with other non-AI cases yet.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ return 0;
+ }
+ DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+ } else {
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+ // If variable is pinned by a alloca in dominating bb then
+ // use StaticAllocaMap.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ if (AI->getParent() != DI.getParent()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ SDV = DAG.getDbgValue(Variable, SI->second,
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ return 0;
+ }
+ }
+ }
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
+ }
+ return 0;
+ }
+ case Intrinsic::dbg_value: {
+ const DbgValueInst &DI = cast<DbgValueInst>(I);
+ if (!DIVariable(DI.getVariable()).Verify())
+ return 0;
+
+ MDNode *Variable = DI.getVariable();
+ uint64_t Offset = DI.getOffset();
+ const Value *V = DI.getValue();
+ if (!V)
+ return 0;
+
+ // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
+ // but do not always have a corresponding SDNode built. The SDNodeOrder
+ // absolute, but not relative, values are different depending on whether
+ // debug info exists.
+ ++SDNodeOrder;
+ SDDbgValue *SDV;
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
+ SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ } else {
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
+ SDV = DAG.getDbgValue(Variable, N.getNode(),
+ N.getResNo(), Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ }
+ } else if (!V->use_empty() ) {
+ // Do not call getValue(V) yet, as we don't want to generate code.
+ // Remember it for later.
+ DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+ DanglingDebugInfoMap[V] = DDI;
+ } else {
+ // We may expand this to cover more cases. One case where we have no
+ // data available is an unreferenced parameter.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
+ }
+
+ // Build a debug info table entry.
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ V = BCI->getOperand(0);
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI)
+ return 0;
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return 0; // VLAs.
+ int FI = SI->second;
+
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+ MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+ return 0;
+ }
+ case Intrinsic::eh_exception: {
+ // Insert the EXCEPTIONADDR instruction.
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[1];
+ Ops[0] = DAG.getRoot();
+ SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
+ setValue(&I, Op);
+ DAG.setRoot(Op.getValue(1));
+ return 0;
+ }
+
+ case Intrinsic::eh_selector: {
+ MachineBasicBlock *CallMBB = FuncInfo.MBB;
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (CallMBB->isLandingPad())
+ AddCatchInfo(I, &MMI, CallMBB);
+ else {
+#ifndef NDEBUG
+ FuncInfo.CatchInfoLost.insert(&I);
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+ }
+
+ // Insert the EHSELECTION instruction.
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[2];
+ Ops[0] = getValue(I.getArgOperand(0));
+ Ops[1] = getRoot();
+ SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+ DAG.setRoot(Op.getValue(1));
+ setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
+ return 0;
+ }
+
+ case Intrinsic::eh_typeid_for: {
+ // Find the type id for the given typeinfo.
+ GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
+ unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+ Res = DAG.getConstant(TypeID, MVT::i32);
+ setValue(&I, Res);
+ return 0;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return 0;
+ case Intrinsic::eh_unwind_init:
+ DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+ return 0;
+ case Intrinsic::eh_dwarf_cfa: {
+ SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
+ TLI.getPointerTy());
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ TLI.getPointerTy()),
+ CfaArg);
+ SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
+ TLI.getPointerTy(),
+ DAG.getConstant(0, TLI.getPointerTy()));
+ setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+ FA, Offset));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_callsite: {
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
+ assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+ assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+ MMI.setCurrentCallSite(CI->getZExtValue());
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_functioncontext: {
+ // Get and store the index of the function context.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ AllocaInst *FnCtx =
+ cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+ int FI = FuncInfo.StaticAllocaMap[FnCtx];
+ MFI->setFunctionContextIndex(FI);
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_setjmp: {
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Ops, 2);
+ setValue(&I, Op.getValue(0));
+ DAG.setRoot(Op.getValue(1));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_longjmp: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_dispatch_setup: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
+ return 0;
+ }
+
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = getValue(I.getArgOperand(1));
+ if (isa<ConstantSDNode>(ShAmt)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ }
+ unsigned NewIntrinsic = 0;
+ EVT ShAmtVT = MVT::v2i32;
+ switch (Intrinsic) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ // We must do this early because v2i32 is not a legal type.
+ DebugLoc dl = getCurDebugLoc();
+ SDValue ShOps[2];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ EVT DestVT = TLI.getValueType(I.getType());
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(NewIntrinsic, MVT::i32),
+ getValue(I.getArgOperand(0)), ShAmt);
+ setValue(&I, Res);
return 0;
}
case Intrinsic::convertff:
@@ -3958,6 +4910,13 @@
case Intrinsic::pow:
visitPow(I);
return 0;
+ case Intrinsic::fma:
+ setValue(&I, DAG.getNode(ISD::FMA, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ return 0;
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
MVT::i16, getValue(I.getArgOperand(0))));
@@ -4032,8 +4991,8 @@
// Store the stack protector onto the stack.
Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
- PseudoSourceValue::getFixedStack(FI),
- 0, true, false, 0);
+ MachinePointerInfo::getFixedStack(FI),
+ true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
return 0;
@@ -4070,20 +5029,56 @@
Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
Ops[5] = DAG.getSrcValue(F);
- Res = DAG.getNode(ISD::TRAMPOLINE, dl,
- DAG.getVTList(TLI.getPointerTy(), MVT::Other),
- Ops, 6);
+ Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
- setValue(&I, Res);
- DAG.setRoot(Res.getValue(1));
+ DAG.setRoot(Res);
return 0;
}
+ case Intrinsic::adjust_trampoline: {
+ setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
+ TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::gcroot:
+ if (GFI) {
+ const Value *Alloca = I.getArgOperand(0);
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ }
+ return 0;
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+ return 0;
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
return 0;
- case Intrinsic::trap:
- DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ setValue(&I, getValue(I.getArgOperand(0)));
return 0;
+ }
+
+ case Intrinsic::trap: {
+ StringRef TrapFuncName = getTrapFunctionName();
+ if (TrapFuncName.empty()) {
+ DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ return 0;
+ }
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> Result =
+ TLI.LowerCallTo(getRoot(), I.getType(),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+ Args, DAG, getCurDebugLoc());
+ DAG.setRoot(Result.second);
+ return 0;
+ }
case Intrinsic::uadd_with_overflow:
return implVisitAluOverflow(I, ISD::UADDO);
case Intrinsic::sadd_with_overflow:
@@ -4098,61 +5093,25 @@
return implVisitAluOverflow(I, ISD::SMULO);
case Intrinsic::prefetch: {
- SDValue Ops[4];
+ SDValue Ops[5];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
- DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+ Ops[4] = getValue(I.getArgOperand(3));
+ DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+ DAG.getVTList(MVT::Other),
+ &Ops[0], 5,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ false, /* volatile */
+ rw==0, /* read */
+ rw==1)); /* write */
return 0;
}
- case Intrinsic::memory_barrier: {
- SDValue Ops[6];
- Ops[0] = getRoot();
- for (int x = 1; x < 6; ++x)
- Ops[x] = getValue(I.getArgOperand(x - 1));
-
- DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
- return 0;
- }
- case Intrinsic::atomic_cmp_swap: {
- SDValue Root = getRoot();
- SDValue L =
- DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
- getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
- Root,
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)),
- getValue(I.getArgOperand(2)),
- I.getArgOperand(0));
- setValue(&I, L);
- DAG.setRoot(L.getValue(1));
- return 0;
- }
- case Intrinsic::atomic_load_add:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
- case Intrinsic::atomic_load_sub:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
- case Intrinsic::atomic_load_or:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
- case Intrinsic::atomic_load_xor:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
- case Intrinsic::atomic_load_and:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
- case Intrinsic::atomic_load_nand:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
- case Intrinsic::atomic_load_max:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
- case Intrinsic::atomic_load_min:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
- case Intrinsic::atomic_load_umin:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
- case Intrinsic::atomic_load_umax:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
- case Intrinsic::atomic_swap:
- return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
-
case Intrinsic::invariant_start:
case Intrinsic::lifetime_start:
// Discard region information.
@@ -4166,10 +5125,11 @@
}
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
- bool isTailCall) {
- const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- const Type *RetTy = FTy->getReturnType();
+ bool isTailCall,
+ MachineBasicBlock *LandingPad) {
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FTy->getReturnType();
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
MCSymbol *BeginLabel = 0;
@@ -4184,9 +5144,12 @@
Outs, TLI, &Offsets);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- FTy->isVarArg(), Outs, FTy->getContext());
+ DAG.getMachineFunction(),
+ FTy->isVarArg(), Outs,
+ FTy->getContext());
SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
if (!CanLowerReturn) {
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
@@ -4194,10 +5157,10 @@
unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(
FTy->getReturnType());
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
- const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
- DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
Entry.isSExt = false;
@@ -4213,8 +5176,14 @@
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
- SDValue ArgNode = getValue(*i);
- Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+ const Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
unsigned attrInd = i - CS.arg_begin() + 1;
Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
@@ -4227,12 +5196,39 @@
Args.push_back(Entry);
}
+ if (LandingPad) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI.getContext().CreateTempSymbol();
+
+ // For SjLj, keep track of which landing pads go with which invokes
+ // so as to maintain the ordering of pads in the LSDA.
+ unsigned CallSiteIndex = MMI.getCurrentCallSite();
+ if (CallSiteIndex) {
+ MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+
+ // Now that the call site is handled, stop tracking it.
+ MMI.setCurrentCallSite(0);
+ }
+
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
+ }
+
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI.LowerCallTo.
if (isTailCall &&
!isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
isTailCall = false;
+ // If there's a possibility that fast-isel has already selected some amount
+ // of the current basic block, don't emit a tail call.
+ if (isTailCall && EnableFastISel)
+ isTailCall = false;
+
std::pair<SDValue,SDValue> Result =
TLI.LowerCallTo(getRoot(), RetTy,
CS.paramHasAttr(0, Attribute::SExt),
@@ -4252,7 +5248,7 @@
// The instruction result is the result of loading from the
// hidden sret parameter.
SmallVector<EVT, 1> PVTs;
- const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+ Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
ComputeValueVTs(TLI, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
@@ -4266,7 +5262,9 @@
DemoteStackSlot,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
- Add, NULL, Offsets[i], false, false, 1);
+ Add,
+ MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+ false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
}
@@ -4274,7 +5272,7 @@
SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
MVT::Other, &Chains[0], NumValues);
PendingLoads.push_back(Chain);
-
+
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
SmallVector<EVT, 4> RetTys;
@@ -4287,7 +5285,7 @@
EVT VT = RetTys[I];
EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-
+
SDValue ReturnValue =
getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
RegisterVT, VT, AssertOp);
@@ -4299,15 +5297,31 @@
DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
DAG.getVTList(&RetTys[0], RetTys.size()),
&ReturnValues[0], ReturnValues.size()));
-
}
- // As a special case, a null chain means that a tail call has been emitted and
- // the DAG root is already updated.
- if (Result.second.getNode())
- DAG.setRoot(Result.second);
- else
+ // Assign order to nodes here. If the call does not produce a result, it won't
+ // be mapped to a SDNode and visit() will not assign it an order number.
+ if (!Result.second.getNode()) {
+ // As a special case, a null chain means that a tail call has been emitted and
+ // the DAG root is already updated.
HasTailCall = true;
+ ++SDNodeOrder;
+ AssignOrderingToNode(DAG.getRoot().getNode());
+ } else {
+ DAG.setRoot(Result.second);
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.second.getNode());
+ }
+
+ if (LandingPad) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
+ DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
+
+ // Inform MachineModuleInfo of range.
+ MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+ }
}
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
@@ -4327,7 +5341,7 @@
}
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
- const Type *LoadTy,
+ Type *LoadTy,
SelectionDAGBuilder &Builder) {
// Check to see if this load can be trivially constant folded, e.g. if the
@@ -4359,7 +5373,7 @@
SDValue Ptr = Builder.getValue(PtrVal);
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
- Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+ Ptr, MachinePointerInfo(PtrVal),
false /*volatile*/,
false /*nontemporal*/, 1 /* align=1 */);
@@ -4390,7 +5404,7 @@
if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
bool ActuallyDoIt = true;
MVT LoadVT;
- const Type *LoadTy;
+ Type *LoadTy;
switch (Size->getZExtValue()) {
default:
LoadVT = MVT::Other;
@@ -4450,6 +5464,30 @@
void SelectionDAGBuilder::visitCall(const CallInst &I) {
+ // Handle inline assembly differently.
+ if (isa<InlineAsm>(I.getCalledValue())) {
+ visitInlineAsm(&I);
+ return;
+ }
+
+ // See if any floating point values are being passed to this function. This is
+ // used to emit an undefined reference to fltused on Windows.
+ FunctionType *FT =
+ cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (FT->isVarArg() &&
+ !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (!i->isFloatingPointTy()) continue;
+ MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
+ break;
+ }
+ }
+ }
+
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
@@ -4527,7 +5565,7 @@
}
}
}
-
+
SDValue Callee;
if (!RenameFn)
Callee = getValue(I.getCalledValue());
@@ -4539,12 +5577,11 @@
LowerCallTo(&I, Callee, I.isTailCall());
}
-namespace llvm {
+namespace {
/// AsmOperandInfo - This contains information for each constraint that we are
/// lowering.
-class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
- public TargetLowering::AsmOperandInfo {
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
public:
/// CallOperand - If this is the result output operand or a clobber
/// this is null, otherwise it is the incoming operand to the CallInst.
@@ -4555,7 +5592,7 @@
/// contains the set of register corresponding to the operand.
RegsForValue AssignedRegs;
- explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
: TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
}
@@ -4586,17 +5623,23 @@
if (isa<BasicBlock>(CallOperandVal))
return TLI.getPointerTy();
- const llvm::Type *OpTy = CallOperandVal->getType();
+ llvm::Type *OpTy = CallOperandVal->getType();
+ // FIXME: code duplicated from TargetLowering::ParseConstraints().
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (isIndirect) {
- const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
if (!PtrTy)
report_fatal_error("Indirect operand for inline asm not a pointer!");
OpTy = PtrTy->getElementType();
}
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
@@ -4630,55 +5673,9 @@
}
};
-} // end llvm namespace.
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
-/// isAllocatableRegister - If the specified register is safe to allocate,
-/// i.e. it isn't a stack pointer or some other special register, return the
-/// register class for the register. Otherwise, return null.
-static const TargetRegisterClass *
-isAllocatableRegister(unsigned Reg, MachineFunction &MF,
- const TargetLowering &TLI,
- const TargetRegisterInfo *TRI) {
- EVT FoundVT = MVT::Other;
- const TargetRegisterClass *FoundRC = 0;
- for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
- E = TRI->regclass_end(); RCI != E; ++RCI) {
- EVT ThisVT = MVT::Other;
-
- const TargetRegisterClass *RC = *RCI;
- // If none of the value types for this register class are valid, we
- // can't use it. For example, 64-bit reg classes on 32-bit targets.
- for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
- I != E; ++I) {
- if (TLI.isTypeLegal(*I)) {
- // If we have already found this register in a different register class,
- // choose the one with the largest VT specified. For example, on
- // PowerPC, we favor f64 register classes over f32.
- if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
- ThisVT = *I;
- break;
- }
- }
- }
-
- if (ThisVT == MVT::Other) continue;
-
- // NOTE: This isn't ideal. In particular, this might allocate the
- // frame pointer in functions that need it (due to them not being taken
- // out of allocation, because a variable sized allocation hasn't been seen
- // yet). This is a slight code pessimization, but should still work.
- for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
- E = RC->allocation_order_end(MF); I != E; ++I)
- if (*I == Reg) {
- // We found a matching register class. Keep looking at others in case
- // we find one with larger registers that this physreg is also in.
- FoundRC = RC;
- FoundVT = ThisVT;
- break;
- }
- }
- return FoundRC;
-}
+} // end anonymous namespace
/// GetRegistersForValue - Assign registers (virtual or physical) for the
/// specified operand. We prefer to assign virtual registers, to allow the
@@ -4689,11 +5686,13 @@
/// OpInfo describes the operand.
/// Input and OutputRegs are the set of already allocated physical registers.
///
-void SelectionDAGBuilder::
-GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs) {
- LLVMContext &Context = FuncInfo.Fn->getContext();
+static void GetRegistersForValue(SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ DebugLoc DL,
+ SDISelAsmOperandInfo &OpInfo,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs) {
+ LLVMContext &Context = *DAG.getContext();
// Compute whether this value requires an input register, an output register,
// or both.
@@ -4739,7 +5738,7 @@
// vector types).
EVT RegVT = *PhysReg.second->vt_begin();
if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
- OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -4749,7 +5748,7 @@
// machine.
RegVT = EVT::getIntegerVT(Context,
OpInfo.ConstraintVT.getSizeInBits());
- OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
}
@@ -4812,53 +5811,511 @@
return;
}
- // This is a reference to a register class that doesn't directly correspond
- // to an LLVM register class. Allocate NumRegs consecutive, available,
- // registers from the class.
- std::vector<unsigned> RegClassRegs
- = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
+ // Otherwise, we couldn't allocate enough registers for this.
+}
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
- unsigned NumAllocated = 0;
- for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
- unsigned Reg = RegClassRegs[i];
- // See if this register is available.
- if ((isOutReg && OutputRegs.count(Reg)) || // Already used.
- (isInReg && InputRegs.count(Reg))) { // Already used.
- // Make sure we find consecutive registers.
- NumAllocated = 0;
- continue;
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ SDISelAsmOperandInfoVector ConstraintOperands;
+
+ std::set<unsigned> OutputRegs, InputRegs;
+
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI.ParseConstraints(CS);
+
+ bool hasMemory = false;
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ EVT OpVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpVT = TLI.getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpVT = TLI.getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
}
- // Check to see if this register is allocatable (i.e. don't give out the
- // stack pointer).
- const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
- if (!RC) { // Couldn't allocate this register.
- // Reset NumAllocated to make sure we return consecutive registers.
- NumAllocated = 0;
- continue;
+ // If this is an input or an indirect output, process the call argument.
+ // BasicBlocks are labels, currently appearing only in asm's.
+ if (OpInfo.CallOperandVal) {
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ }
+
+ OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
}
- // Okay, this register is good, we can use it.
- ++NumAllocated;
+ OpInfo.ConstraintVT = OpVT;
- // If we allocated enough consecutive registers, succeed.
- if (NumAllocated == NumRegs) {
- unsigned RegStart = (i-NumAllocated)+1;
- unsigned RegEnd = i+1;
- // Mark all of the allocated registers used.
- for (unsigned i = RegStart; i != RegEnd; ++i)
- Regs.push_back(RegClassRegs[i]);
-
- OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
- OpInfo.ConstraintVT);
- OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
- return;
+ // Indirect operand accesses access memory.
+ if (OpInfo.isIndirect)
+ hasMemory = true;
+ else {
+ for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType
+ CType = TLI.getConstraintType(OpInfo.Codes[j]);
+ if (CType == TargetLowering::C_Memory) {
+ hasMemory = true;
+ break;
+ }
+ }
}
}
- // Otherwise, we couldn't allocate enough registers for this.
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
+ // Second pass over the constraints: compute which constraint option to use
+ // and assign registers to constraints that want a specific physreg.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+ Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ Input.ConstraintVT = OpInfo.ConstraintVT;
+ }
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert((OpInfo.isMultipleAlternative ||
+ (OpInfo.Type == InlineAsm::isInput)) &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value. If we don't have
+ // an indirect input, put it in the constpool if we can, otherwise spill
+ // it to a stack slot.
+ // TODO: This isn't quite right. We need to handle these according to
+ // the addressing mode that the constraint wants. Also, this may take
+ // an additional register for the computation and we don't want that
+ // either.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ const Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+ TLI.getPointerTy());
+ } else {
+ // Otherwise, create a stack slot and emit a store to it before the
+ // asm.
+ Type *Ty = OpVal->getType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Chain = DAG.getStore(Chain, getCurDebugLoc(),
+ OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
+ false, false, 0);
+ OpInfo.CallOperand = StackSlot;
+ }
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = 0;
+
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ // If this constraint is for a specific register, allocate it before
+ // anything else.
+ if (OpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+ InputRegs);
+ }
+
+ // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // to register class operands.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // C_Register operands have already been allocated, Other/Memory don't need
+ // to be.
+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+ InputRegs);
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(
+ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+ TLI.getPointerTy()));
+
+ // If we have a !srcloc metadata node associated with it, we want to attach
+ // this to the ultimately generated inline asm machineinstr. To do this, we
+ // pass in the third operand as this (potentially null) inline asm MDNode.
+ const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+ AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+
+ // Remember the HasSideEffect and AlignStack bits as operand 3.
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+ TLI.getPointerTy()));
+
+ // Loop over all of the inputs, copying the operand values into the
+ // appropriate registers and processing the output regs.
+ RegsForValue RetValRegs;
+
+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput: {
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ // Memory output, or 'other' output (e.g. 'X' constraint).
+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ }
+
+ // Otherwise, this is a register or register class output.
+
+ // Copy the output from the appropriate register. Find a register that
+ // we can use.
+ if (OpInfo.AssignedRegs.Regs.empty())
+ report_fatal_error("Couldn't allocate output reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'!");
+
+ // If this is an indirect operand, store through the pointer after the
+ // asm.
+ if (OpInfo.isIndirect) {
+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+ OpInfo.CallOperandVal));
+ } else {
+ // This is the result value of the call.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ // Concatenate this output onto the outputs list.
+ RetValRegs.append(OpInfo.AssignedRegs);
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+ InlineAsm::Kind_RegDefEarlyClobber :
+ InlineAsm::Kind_RegDef,
+ false,
+ 0,
+ DAG,
+ AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isInput: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ unsigned OperandNo = OpInfo.getMatchedOperand();
+
+ // Scan until we find the definition we already emitted of this operand.
+ // When we find it, create a RegsForValue operand.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert((InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+ InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+ }
+
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if (InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ if (OpInfo.isIndirect) {
+ // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:"
+ " don't know how to handle tied "
+ "indirect register inputs");
+ }
+
+ RegsForValue MatchedRegs;
+ MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+ EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+ MatchedRegs.RegVTs.push_back(RegVT);
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+ i != e; ++i)
+ MatchedRegs.Regs.push_back
+ (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag);
+ MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
+ true, OpInfo.getMatchedOperand(),
+ DAG, AsmNodeOperands);
+ break;
+ }
+
+ assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+ assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+ OpInfo.getMatchedOperand());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+
+ // Treat indirect 'X' constraint as memory.
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
+ OpInfo.ConstraintType = TargetLowering::C_Memory;
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+ Ops, DAG);
+ if (Ops.empty())
+ report_fatal_error("Invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'!");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+ "Memory operands expect pointer values");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect register inputs yet!");
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty())
+ report_fatal_error("Couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'!");
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag);
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
+ DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber: {
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+ false, 0, DAG,
+ AsmNodeOperands);
+ break;
+ }
+ }
+ }
+
+ // Finish up input operands. Set the input chain and add the flag last.
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+ Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+ DAG.getVTList(MVT::Other, MVT::Glue),
+ &AsmNodeOperands[0], AsmNodeOperands.size());
+ Flag = Chain.getValue(1);
+
+ // If this asm returns a register value, copy the result from that register
+ // and set it as the value of the call.
+ if (!RetValRegs.Regs.empty()) {
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+ Chain, &Flag);
+
+ // FIXME: Why don't we do this for inline asms with MRVs?
+ if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+ EVT ResultType = TLI.getValueType(CS.getType());
+
+ // If any of the results of the inline asm is a vector, it may have the
+ // wrong width/num elts. This can happen for register classes that can
+ // contain multiple different value types. The preg or vreg allocated may
+ // not have the same VT as was expected. Convert it to the right type
+ // with bit_convert.
+ if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+ ResultType, Val);
+
+ } else if (ResultType != Val.getValueType() &&
+ ResultType.isInteger() && Val.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result may
+ // have a wider width than the expected result. Extract the relevant
+ // portion.
+ Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+ }
+
+ assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+ }
+
+ setValue(CS.getInstruction(), Val);
+ // Don't need to use this as a chain in this case.
+ if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+ return;
+ }
+
+ std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+
+ // Process indirect outputs, first output all of the flagged copies out of
+ // physregs.
+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+ const Value *Ptr = IndirectStoresToEmit[i].second;
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+ Chain, &Flag);
+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+ }
+
+ // Emit the non-flagged stores from the physregs.
+ SmallVector<SDValue, 8> OutChains;
+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
+ SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
+ StoresToEmit[i].first,
+ getValue(StoresToEmit[i].second),
+ MachinePointerInfo(StoresToEmit[i].second),
+ false, false, 0);
+ OutChains.push_back(Val);
+ }
+
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &OutChains[0], OutChains.size());
+
+ DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+ const TargetData &TD = *TLI.getTargetData();
+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+ getRoot(), getValue(I.getOperand(0)),
+ DAG.getSrcValue(I.getOperand(0)),
+ TD.getABITypeAlignment(I.getType()));
+ setValue(&I, V);
+ DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getSrcValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(1))));
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
@@ -4866,7 +6323,7 @@
/// FIXME: When all targets are
/// migrated to using LowerCall, this hook should be integrated into SDISel.
std::pair<SDValue, SDValue>
-TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
bool RetSExt, bool RetZExt, bool isVarArg,
bool isInreg, unsigned NumFixedArgs,
CallingConv::ID CallConv, bool isTailCall,
@@ -4883,7 +6340,7 @@
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
- const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+ Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
@@ -4900,16 +6357,17 @@
Flags.setSRet();
if (Args[i].isByVal) {
Flags.setByVal();
- const PointerType *Ty = cast<PointerType>(Args[i].Ty);
- const Type *ElementTy = Ty->getElementType();
- unsigned FrameAlign = getByValTypeAlignment(ElementTy);
- unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy);
+ PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
// For ByVal, alignment should come from FE. BE will guess if this
// info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
if (Args[i].Alignment)
FrameAlign = Args[i].Alignment;
+ else
+ FrameAlign = getByValTypeAlignment(ElementTy);
Flags.setByValAlign(FrameAlign);
- Flags.setByValSize(FrameSize);
}
if (Args[i].isNest)
Flags.setNest();
@@ -4953,7 +6411,7 @@
unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT;
+ MyFlags.VT = RegisterVT.getSimpleVT();
MyFlags.Used = isReturnValueUsed;
if (RetSExt)
MyFlags.Flags.setSExt();
@@ -4989,7 +6447,7 @@
DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerCall emitted a null value!");
- assert(Ins[i].VT == InVals[i].getValueType() &&
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerCall emitted a value with the wrong type!");
});
@@ -5015,7 +6473,7 @@
// For a function returning void, there is no return value. We can't create
// such a node, so we just return a null return value in that case. In
- // that case, nothing will actualy look at the value.
+ // that case, nothing will actually look at the value.
if (ReturnValues.empty())
return std::make_pair(SDValue(), Chain);
@@ -5054,6 +6512,25 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ if (EnableFastISel)
+ return A->use_empty();
+
+ const BasicBlock *Entry = A->getParent()->begin();
+ for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+ return false; // Use not in entry block.
+ }
+ return true;
+}
+
void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// If this is the entry block, emit arguments.
const Function &F = *LLVMBB->getParent();
@@ -5091,7 +6568,7 @@
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
- const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
unsigned OriginalAlignment =
TD->getABITypeAlignment(ArgTy);
@@ -5106,16 +6583,17 @@
Flags.setSRet();
if (F.paramHasAttr(Idx, Attribute::ByVal)) {
Flags.setByVal();
- const PointerType *Ty = cast<PointerType>(I->getType());
- const Type *ElementTy = Ty->getElementType();
- unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
- unsigned FrameSize = TD->getTypeAllocSize(ElementTy);
+ PointerType *Ty = cast<PointerType>(I->getType());
+ Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
if (F.getParamAlignment(Idx))
FrameAlign = F.getParamAlignment(Idx);
+ else
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
Flags.setByValAlign(FrameAlign);
- Flags.setByValSize(FrameSize);
}
if (F.paramHasAttr(Idx, Attribute::Nest))
Flags.setNest();
@@ -5150,7 +6628,7 @@
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerFormalArguments emitted a null value!");
- assert(Ins[i].VT == InVals[i].getValueType() &&
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerFormalArguments emitted a value with the wrong type!");
}
});
@@ -5197,8 +6675,8 @@
if (I->use_empty() && NumValues)
SDB->setUnusedArgValue(I, InVals[i]);
- for (unsigned Value = 0; Value != NumValues; ++Value) {
- EVT VT = ValueVTs[Value];
+ for (unsigned Val = 0; Val != NumValues; ++Val) {
+ EVT VT = ValueVTs[Val];
EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
@@ -5217,15 +6695,42 @@
i += NumParts;
}
- if (!I->use_empty()) {
- SDValue Res;
- if (!ArgValues.empty())
- Res = DAG.getMergeValues(&ArgValues[0], NumValues,
- SDB->getCurDebugLoc());
- SDB->setValue(I, Res);
+ // We don't need to do anything else for unused arguments.
+ if (ArgValues.empty())
+ continue;
- // If this argument is live outside of the entry block, insert a copy from
- // whereever we got it to the vreg that other BB's will reference it as.
+ // Note down frame index.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+
+ SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+ SDB->getCurDebugLoc());
+
+ SDB->setValue(I, Res);
+ if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ if (LoadSDNode *LNode =
+ dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ }
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // wherever we got it to the vreg that other BB's will reference it as.
+ if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ // If we can, though, try to skip creating an unnecessary vreg.
+ // FIXME: This isn't very clean... it would be nice to make this more
+ // general. It's also subtly incompatible with the hacks FastISel
+ // uses with vregs.
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ FuncInfo->ValueMap[I] = Reg;
+ continue;
+ }
+ }
+ if (!isOnlyUsedInEntryBlock(I)) {
+ FuncInfo->InitializeRegForValue(I);
SDB->CopyToExportRegsIfNeeded(I);
}
}
@@ -5271,6 +6776,10 @@
// Ignore dead phi's.
if (PN->use_empty()) continue;
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
unsigned Reg;
const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 6bd7eb3..0a21ca3 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,15 +18,11 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SmallSet.h"
-#endif
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include <vector>
-#include <set>
namespace llvm {
@@ -36,6 +32,7 @@
class BitCastInst;
class BranchInst;
class CallInst;
+class DbgValueInst;
class ExtractElementInst;
class ExtractValueInst;
class FCmpInst;
@@ -46,9 +43,11 @@
class Function;
class FunctionLoweringInfo;
class GetElementPtrInst;
+class GCFunctionInfo;
class ICmpInst;
class IntToPtrInst;
class IndirectBrInst;
+class InvokeInst;
class InsertElementInst;
class InsertValueInst;
class Instruction;
@@ -60,7 +59,6 @@
class PHINode;
class PtrToIntInst;
class ReturnInst;
-class SDISelAsmOperandInfo;
class SDDbgValue;
class SExtInst;
class SelectInst;
@@ -73,6 +71,7 @@
class TruncInst;
class UIToFPInst;
class UnreachableInst;
+class UnwindInst;
class VAArgInst;
class ZExtInst;
@@ -90,6 +89,24 @@
/// to preserve debug information for incoming arguments.
DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+ /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+ class DanglingDebugInfo {
+ const DbgValueInst* DI;
+ DebugLoc dl;
+ unsigned SDNodeOrder;
+ public:
+ DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
+ DI(di), dl(DL), SDNodeOrder(SDNO) { }
+ const DbgValueInst* getDI() { return DI; }
+ DebugLoc getdl() { return dl; }
+ unsigned getSDNodeOrder() { return SDNodeOrder; }
+ };
+
+ /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
+ /// yet seen the referent. We defer handling these until we do see it.
+ DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+
public:
/// PendingLoads - Loads are not emitted to the program immediately. We bunch
/// them up and then emit token factor nodes when possible. This allows us to
@@ -115,10 +132,13 @@
Constant* Low;
Constant* High;
MachineBasicBlock* BB;
+ uint32_t ExtraWeight;
- Case() : Low(0), High(0), BB(0) { }
- Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
- Low(low), High(high), BB(bb) { }
+ Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
+ Case(Constant* low, Constant* high, MachineBasicBlock* bb,
+ uint32_t extraweight) : Low(low), High(high), BB(bb),
+ ExtraWeight(extraweight) { }
+
APInt size() const {
const APInt &rHigh = cast<ConstantInt>(High)->getValue();
const APInt &rLow = cast<ConstantInt>(Low)->getValue();
@@ -186,20 +206,30 @@
CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
const Value *cmpmiddle,
MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
- MachineBasicBlock *me)
+ MachineBasicBlock *me,
+ uint32_t trueweight = 0, uint32_t falseweight = 0)
: CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
- TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
+ TrueWeight(trueweight), FalseWeight(falseweight) { }
+
// CC - the condition code to use for the case block's setcc node
ISD::CondCode CC;
+
// CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
// Emit by default LHS op RHS. MHS is used for range comparisons:
// If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
const Value *CmpLHS, *CmpMHS, *CmpRHS;
+
// TrueBB/FalseBB - the block to branch to if the setcc is true/false.
MachineBasicBlock *TrueBB, *FalseBB;
+
// ThisBB - the block into which to emit the code for the setcc and branches
MachineBasicBlock *ThisBB;
+
+ // TrueWeight/FalseWeight - branch weights.
+ uint32_t TrueWeight, FalseWeight;
};
+
struct JumpTable {
JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
@@ -239,15 +269,16 @@
struct BitTestBlock {
BitTestBlock(APInt F, APInt R, const Value* SV,
- unsigned Rg, bool E,
+ unsigned Rg, EVT RgVT, bool E,
MachineBasicBlock* P, MachineBasicBlock* D,
const BitTestInfo& C):
- First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+ First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
Parent(P), Default(D), Cases(C) { }
APInt First;
APInt Range;
const Value *SValue;
unsigned Reg;
+ EVT RegVT;
bool Emitted;
MachineBasicBlock *Parent;
MachineBasicBlock *Default;
@@ -285,6 +316,12 @@
/// OptLevel - What optimization level we're generating code for.
///
CodeGenOpt::Level OptLevel;
+
+ /// GFI - Garbage collection metadata for the function.
+ GCFunctionInfo *GFI;
+
+ /// LPadToCallSiteMap - Map a landing pad to the call site indexes.
+ DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
/// HasTailCall - This is set to true if a call in the current
/// block has been translated as a tail call. In this case,
@@ -301,7 +338,7 @@
HasTailCall(false), Context(dag.getContext()) {
}
- void init(AliasAnalysis &aa);
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
/// clear - Clear out the current SelectionDAG and the associated
/// state and prepare this SelectionDAGBuilder object to be used
@@ -311,6 +348,14 @@
/// consumed.
void clear();
+ /// clearDanglingDebugInfo - Clear the dangling debug information
+ /// map. This function is seperated from the clear so that debug
+ /// information that is dangling in a basic block can be properly
+ /// resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached
+ /// to PHI nodes.
+ void clearDanglingDebugInfo();
+
/// getRoot - Return the current virtual root of the Selection DAG,
/// flushing any PendingLoad items. This must be done before emitting
/// a store or any other node that may need to be ordered after any
@@ -339,6 +384,9 @@
void visit(unsigned Opcode, const User &I);
+ // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+ // generate the debug data structures now that we've seen its definition.
+ void resolveDanglingDebugInfo(const Value *V, SDValue Val);
SDValue getValue(const Value *V);
SDValue getNonRegisterValue(const Value *V);
SDValue getValueImpl(const Value *V);
@@ -354,10 +402,6 @@
assert(N.getNode() == 0 && "Already set a value for this node!");
N = NewN;
}
-
- void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs);
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
@@ -370,7 +414,12 @@
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
- void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall);
+ void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+ MachineBasicBlock *LandingPad = NULL);
+
+ /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// references that ned to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
private:
// Terminator instructions.
@@ -401,11 +450,16 @@
const Value* SV,
MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
+
+ uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+ void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ uint32_t Weight = 0);
public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
- void visitBitTestCase(MachineBasicBlock* NextMBB,
+ void visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB);
@@ -414,6 +468,11 @@
MachineBasicBlock *SwitchBB);
private:
+ // These all get lowered before this pass.
+ void visitInvoke(const InvokeInst &I);
+ void visitResume(const ResumeInst &I);
+ void visitUnwind(const UnwindInst &I);
+
void visitBinary(const User &I, unsigned OpCode);
void visitShift(const User &I, unsigned Opcode);
void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
@@ -426,7 +485,7 @@
void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
- void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); }
+ void visitSDiv(const User &I);
void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
void visitOr (const User &I) { visitBinary(I, ISD::OR); }
@@ -456,6 +515,7 @@
void visitExtractValue(const ExtractValueInst &I);
void visitInsertValue(const InsertValueInst &I);
+ void visitLandingPad(const LandingPadInst &I);
void visitGetElementPtr(const User &I);
void visitSelect(const User &I);
@@ -463,10 +523,16 @@
void visitAlloca(const AllocaInst &I);
void visitLoad(const LoadInst &I);
void visitStore(const StoreInst &I);
+ void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
+ void visitAtomicRMW(const AtomicRMWInst &I);
+ void visitFence(const FenceInst &I);
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
-
+ void visitAtomicLoad(const LoadInst &I);
+ void visitAtomicStore(const StoreInst &I);
+
+ void visitInlineAsm(ImmutableCallSite CS);
const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
@@ -477,6 +543,11 @@
void visitLog2(const CallInst &I);
void visitLog10(const CallInst &I);
+ void visitVAStart(const CallInst &I);
+ void visitVAArg(const VAArgInst &I);
+ void visitVAEnd(const CallInst &I);
+ void visitVACopy(const CallInst &I);
+
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
}
@@ -484,10 +555,15 @@
llvm_unreachable("UserOp2 should not exist at instruction selection time!");
}
- const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op);
const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+ /// EmitFuncArgumentDbgValue - If V is an function argument then create
+ /// corresponding DBG_VALUE machine instruction for it now. At the end of
+ /// instruction selection, they will be inserted to the entry BB.
+ bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset, const SDValue &N);
};
} // end namespace llvm
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 4f93fd2..68b9146 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -43,16 +44,21 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include <algorithm>
using namespace llvm;
STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
static cl::opt<bool>
@@ -63,6 +69,11 @@
EnableFastISelAbort("fast-isel-abort", cl::Hidden,
cl::desc("Enable abort calls when \"fast\" instruction fails"));
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+ cl::desc("use Machine Branch Probability Info"),
+ cl::init(true), cl::Hidden);
+
#ifndef NDEBUG
static cl::opt<bool>
ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
@@ -166,18 +177,30 @@
return 0;
}
+void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ assert(!MI->getDesc().hasPostISelHook() &&
+ "If a target marks an instruction with 'hasPostISelHook', "
+ "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
+}
+
//===----------------------------------------------------------------------===//
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+ CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
CurDAG(new SelectionDAG(tm)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+ GFI(),
OptLevel(OL),
- DAGSize(0)
-{}
+ DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ }
SelectionDAGISel::~SelectionDAGISel() {
delete SDB;
@@ -188,43 +211,58 @@
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<GCModuleInfo>();
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or
-/// other function that gcc recognizes as "returning twice". This is used to
-/// limit code-gen optimizations on the machine function.
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it. In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
///
-/// FIXME: Remove after <rdar://problem/8031714> is fixed.
-static bool FunctionCallsSetJmp(const Function *F) {
- const Module *M = F->getParent();
- static const char *ReturnsTwiceFns[] = {
- "setjmp",
- "sigsetjmp",
- "setjmp_syscall",
- "savectx",
- "qsetjmp",
- "vfork",
- "getcontext"
- };
-#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *)
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+ // Loop for blocks with phi nodes.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) continue;
- for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I)
- if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
- if (!Callee->use_empty())
- for (Value::const_use_iterator
- I = Callee->use_begin(), E = Callee->use_end();
- I != E; ++I)
- if (const CallInst *CI = dyn_cast<CallInst>(*I))
- if (CI->getParent()->getParent() == F)
- return true;
- }
+ ReprocessBlock:
+ // For each block with a PHI node, check to see if any of the input values
+ // are potentially trapping constant expressions. Constant expressions are
+ // the only potentially trapping value that can occur as the argument to a
+ // PHI.
+ for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+ if (CE == 0 || !CE->canTrap()) continue;
- return false;
-#undef NUM_RETURNS_TWICE_FNS
+ // The only case we have to worry about is when the edge is critical.
+ // Since this block has a PHI Node, we assume it has multiple input
+ // edges: check to see if the pred has multiple successors.
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // Okay, we have to split this edge.
+ SplitCriticalEdge(Pred->getTerminator(),
+ GetSuccessorNumber(Pred, BB), SDISel, true);
+ goto ReprocessBlock;
+ }
+ }
}
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelVerbose || EnableFastISel) &&
+ "-fast-isel-verbose requires -fast-isel");
+ assert((!EnableFastISelAbort || EnableFastISel) &&
+ "-fast-isel-abort requires -fast-isel");
+
const Function &Fn = *mf.getFunction();
const TargetInstrInfo &TII = *TM.getInstrInfo();
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
@@ -232,12 +270,21 @@
MF = &mf;
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+ SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF);
- SDB->init(*AA);
+
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+ else
+ FuncInfo->BPI = 0;
+
+ SDB->init(GFI, *AA);
SelectAllBasicBlocks(Fn);
@@ -251,7 +298,7 @@
if (!FuncInfo->ArgDbgValues.empty())
for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
E = RegInfo->livein_end(); LI != E; ++LI)
- if (LI->second)
+ if (LI->second)
LiveInMap.insert(std::make_pair(LI->first, LI->second));
// Insert DBG_VALUE instructions for function arguments to the entry block.
@@ -272,14 +319,37 @@
if (LDI != LiveInMap.end()) {
MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
MachineBasicBlock::iterator InsertPos = Def;
- const MDNode *Variable =
+ const MDNode *Variable =
MI->getOperand(MI->getNumOperands()-1).getMetadata();
unsigned Offset = MI->getOperand(1).getImm();
// Def is never a terminator here, so it is ok to increment InsertPos.
- BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+ BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
TII.get(TargetOpcode::DBG_VALUE))
.addReg(LDI->second, RegState::Debug)
.addImm(Offset).addMetadata(Variable);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = NULL;
+ for (MachineRegisterInfo::use_iterator
+ UI = RegInfo->use_begin(LDI->second);
+ MachineInstr *UseMI = UI.skipInstruction();) {
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = NULL; break;
+ }
+ if (CopyUseMI) {
+ MachineInstr *NewMI =
+ BuildMI(*MF, CopyUseMI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+ EntryMBB->insertAfter(CopyUseMI, NewMI);
+ }
}
}
@@ -291,11 +361,10 @@
const MachineBasicBlock *MBB = I;
for (MachineBasicBlock::const_iterator
II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
- const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
- // Operand 1 of an inline asm instruction indicates whether the asm
- // needs stack or not.
- if (TID.isCall() && !TID.isReturn()) {
+ if ((MCID.isCall() && !MCID.isReturn()) ||
+ II->isStackAligningInlineAsm()) {
MFI->setHasCalls(true);
goto done;
}
@@ -305,7 +374,7 @@
}
// Determine if there is a call to setjmp in the machine function.
- MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn));
+ MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
// Replace forward-declared registers with the registers containing
// the desired value.
@@ -334,10 +403,9 @@
return true;
}
-void
-SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
- BasicBlock::const_iterator End,
- bool &HadTailCall) {
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End,
+ bool &HadTailCall) {
// Lower all of the non-terminator instructions. If a call is emitted
// as a tail call, cease emitting nodes for this block. Terminators
// are handled below.
@@ -353,38 +421,6 @@
CodeGenAndEmitDAG();
}
-namespace {
-/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
-/// nodes from the worklist.
-class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener {
- SmallVector<SDNode*, 128> &Worklist;
- SmallPtrSet<SDNode*, 128> &InWorklist;
-public:
- SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl,
- SmallPtrSet<SDNode*, 128> &inwl)
- : Worklist(wl), InWorklist(inwl) {}
-
- void RemoveFromWorklist(SDNode *N) {
- if (!InWorklist.erase(N)) return;
-
- SmallVector<SDNode*, 128>::iterator I =
- std::find(Worklist.begin(), Worklist.end(), N);
- assert(I != Worklist.end() && "Not in worklist");
-
- *I = Worklist.back();
- Worklist.pop_back();
- }
-
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
- RemoveFromWorklist(N);
- }
-
- virtual void NodeUpdated(SDNode *N) {
- // Ignore updates.
- }
-};
-}
-
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SmallPtrSet<SDNode*, 128> VisitedNodes;
SmallVector<SDNode*, 128> Worklist;
@@ -424,18 +460,7 @@
unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
-
- // Only install this information if it tells us something.
- if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
- DestReg -= TargetRegisterInfo::FirstVirtualRegister;
- if (DestReg >= FuncInfo->LiveOutRegInfo.size())
- FuncInfo->LiveOutRegInfo.resize(DestReg+1);
- FunctionLoweringInfo::LiveOutInfo &LOI =
- FuncInfo->LiveOutRegInfo[DestReg];
- LOI.NumSignBits = NumSignBits;
- LOI.KnownOne = KnownOne;
- LOI.KnownZero = KnownZero;
- }
+ FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
} while (!Worklist.empty());
}
@@ -444,13 +469,20 @@
if (TimePassesIsEnabled)
GroupName = "Instruction Selection and Scheduling";
std::string BlockName;
+ int BlockNumber = -1;
+ (void)BlockNumber;
+#ifdef NDEBUG
if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
ViewSUnitDAGs)
+#endif
+ {
+ BlockNumber = FuncInfo->MBB->getNumber();
BlockName = MF->getFunction()->getNameStr() + ":" +
FuncInfo->MBB->getBasicBlock()->getNameStr();
-
- DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump());
+ }
+ DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -460,7 +492,8 @@
CurDAG->Combine(Unrestricted, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
@@ -473,7 +506,8 @@
Changed = CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (Changed) {
if (ViewDAGCombineLT)
@@ -486,8 +520,8 @@
CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n";
- CurDAG->dump());
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
}
{
@@ -511,18 +545,19 @@
CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n";
- CurDAG->dump());
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
+ << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
}
if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
{
NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
- CurDAG->Legalize(OptLevel);
+ CurDAG->Legalize();
}
- DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -532,7 +567,8 @@
CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
@@ -546,7 +582,8 @@
DoInstructionSelection();
}
- DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -562,13 +599,19 @@
// Emit machine code to BB. This can change 'BB' to the last block being
// inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
{
NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
- FuncInfo->MBB = Scheduler->EmitSchedule();
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
FuncInfo->InsertPt = Scheduler->InsertPos;
}
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
// Free the scheduler state.
{
NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
@@ -581,22 +624,24 @@
}
void SelectionDAGISel::DoInstructionSelection() {
- DEBUG(errs() << "===== Instruction selection begins:\n");
+ DEBUG(errs() << "===== Instruction selection begins: BB#"
+ << FuncInfo->MBB->getNumber()
+ << " '" << FuncInfo->MBB->getName() << "'\n");
PreprocessISelDAG();
-
+
// Select target instructions for the DAG.
{
// Number all nodes with a topological order and set DAGSize.
DAGSize = CurDAG->AssignTopologicalOrder();
-
+
// Create a dummy node (which is not added to allnodes), that adds
// a reference to the root node, preventing it from being deleted,
// and tracking any changes of the root.
HandleSDNode Dummy(CurDAG->getRoot());
ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
++ISelPosition;
-
+
// The AllNodes list is now topological-sorted. Visit the
// nodes by starting at the end of the list (the root of the
// graph) and preceding back toward the beginning (the entry
@@ -608,19 +653,19 @@
// makes it theoretically possible to disable the DAGCombiner.
if (Node->use_empty())
continue;
-
+
SDNode *ResNode = Select(Node);
-
+
// FIXME: This is pretty gross. 'Select' should be changed to not return
// anything at all and this code should be nuked with a tactical strike.
-
+
// If node should not be replaced, continue with the next one.
if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
continue;
// Replace node.
if (ResNode)
ReplaceUses(Node, ResNode);
-
+
// If after the replacement this node is not used any more,
// remove this dead node.
if (Node->use_empty()) { // Don't delete EntryToken, etc.
@@ -628,19 +673,190 @@
CurDAG->RemoveDeadNode(Node, &ISU);
}
}
-
+
CurDAG->setRoot(Dummy.getValue());
- }
+ }
DEBUG(errs() << "===== Instruction selection ends:\n");
PostprocessISelDAG();
}
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+void SelectionDAGISel::PrepareEHLandingPad() {
+ MachineBasicBlock *MBB = FuncInfo->MBB;
+
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
+
+ // Assign the call site to the landing pad's begin label.
+ MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+
+ const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ .addSym(Label);
+
+ // Mark exception register as live in.
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ if (Reg) MBB->addLiveIn(Reg);
+
+ // Mark exception selector register as live in.
+ Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) MBB->addLiveIn(Reg);
+
+ // FIXME: Hack around an exception handling flaw (PR1508): the personality
+ // function and list of typeids logically belong to the invoke (or, if you
+ // like, the basic block containing the invoke), and need to be associated
+ // with it in the dwarf exception handling tables. Currently however the
+ // information is provided by an intrinsic (eh.selector) that can be moved
+ // to unexpected places by the optimizers: if the unwind edge is critical,
+ // then breaking it can result in the intrinsics being in the successor of
+ // the landing pad, not the landing pad itself. This results
+ // in exceptions not being caught because no typeids are associated with
+ // the invoke. This may not be the only way things can go wrong, but it
+ // is the only way we try to work around for the moment.
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
+ const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+ if (Br && Br->isUnconditional()) { // Critical edge?
+ BasicBlock::const_iterator I, E;
+ for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+ if (isa<EHSelectorInst>(I))
+ break;
+
+ if (I == E)
+ // No catch info found - try to extract some from the successor.
+ CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
+ }
+}
+
+/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
+/// load into the specified FoldInst. Note that we could have a sequence where
+/// multiple LLVM IR instructions are folded into the same machineinstr. For
+/// example we could have:
+/// A: x = load i32 *P
+/// B: y = icmp A, 42
+/// C: br y, ...
+///
+/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
+/// any other folded instructions) because it is between A and C.
+///
+/// If we succeed in folding the load into the operation, return true.
+///
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+ const Instruction *FoldInst,
+ FastISel *FastIS) {
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->use_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->use_back();
+ }
+
+ // If we didn't find the fold instruction, then we failed to collapse the
+ // sequence.
+ if (TheUser != FoldInst)
+ return false;
+
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile()) return false;
+
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
+ unsigned LoadReg = FastIS->getRegForValue(LI);
+ if (LoadReg == 0)
+ return false;
+
+ // Check to see what the uses of this vreg are. If it has no uses, or more
+ // than one use (at the machine instr level) then we can't fold it.
+ MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+ if (RI == RegInfo->reg_end())
+ return false;
+
+ // See if there is exactly one use of the vreg. If there are multiple uses,
+ // then the instruction got lowered to multiple machine instructions or the
+ // use of the loaded value ended up being multiple operands of the result, in
+ // either case, we can't fold this.
+ MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+ if (PostRI != RegInfo->reg_end())
+ return false;
+
+ assert(RI.getOperand().isUse() &&
+ "The only use of the vreg must be a use, we haven't emitted the def!");
+
+ MachineInstr *User = &*RI;
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes, make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo->InsertPt = User;
+ FuncInfo->MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+ FunctionLoweringInfo *FuncInfo) {
+ return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded.
+ !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+}
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = 0;
+ if (EnableFastISel)
+ FastIS = TLI.createFastISel(*FuncInfo);
+
// Iterate over all basic blocks in the function.
- for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
- const BasicBlock *LLVMBB = &*I;
+ ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+ for (ReversePostOrderTraversal<const Function*>::rpo_iterator
+ I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+ const BasicBlock *LLVMBB = *I;
+
+ if (OptLevel != CodeGenOpt::None) {
+ bool AllPredsVisited = true;
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ if (!FuncInfo->VisitedBBs.count(*PI)) {
+ AllPredsVisited = false;
+ break;
+ }
+ }
+
+ if (AllPredsVisited) {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ isa<PHINode>(I); ++I)
+ FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
+ } else {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ isa<PHINode>(I); ++I)
+ FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
+ }
+
+ FuncInfo->VisitedBBs.insert(LLVMBB);
+ }
+
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
@@ -649,20 +865,136 @@
BasicBlock::const_iterator BI = End;
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
-
+
+ // Setup an EH landing-pad block.
+ if (FuncInfo->MBB->isLandingPad())
+ PrepareEHLandingPad();
+
// Lower any arguments needed in this block if this is the entry block.
if (LLVMBB == &Fn.getEntryBlock())
LowerArguments(LLVMBB);
- // Run SelectionDAG instruction selection on the remainder of the block
- // not handled by FastISel. If FastISel is not run, this is the entire
- // block.
- bool HadTailCall;
- SelectBasicBlock(Begin, BI, HadTailCall);
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS) {
+ FastIS->startNewBlock();
+
+ // Emit code for any incoming arguments. This must happen before
+ // beginning FastISel on the entry block.
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(0);
+ }
+
+ // Do FastISel on as many instructions as possible.
+ for (; BI != Begin; --BI) {
+ const Instruction *Inst = llvm::prior(BI);
+
+ // If we no longer require this instruction, skip it.
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo))
+ continue;
+
+ // Bottom-up: reset the insert pos at the top, after any local-value
+ // instructions.
+ FastIS->recomputeInsertPt();
+
+ // Try to select the instruction with FastISel.
+ if (FastIS->SelectInstruction(Inst)) {
+ ++NumFastIselSuccess;
+ // If fast isel succeeded, skip over all the folded instructions, and
+ // then see if there is a load right before the selected instructions.
+ // Try to fold the load if so.
+ const Instruction *BeforeInst = Inst;
+ while (BeforeInst != Begin) {
+ BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+ if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ break;
+ }
+ if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() &&
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+ // If we succeeded, don't re-select the load.
+ BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+ continue;
+ }
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ if (isa<CallInst>(Inst)) {
+ ++NumFastIselFailures;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed call: ";
+ Inst->dump();
+ }
+
+ if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+ unsigned &R = FuncInfo->ValueMap[Inst];
+ if (!R)
+ R = FuncInfo->CreateRegs(Inst->getType());
+ }
+
+ bool HadTailCall = false;
+ SelectBasicBlock(Inst, BI, HadTailCall);
+
+ // If the call was emitted as a tail call, we're done with the block.
+ if (HadTailCall) {
+ --BI;
+ break;
+ }
+
+ continue;
+ }
+
+ if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
+ // Don't abort, and use a different message for terminator misses.
+ ++NumFastIselFailures;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed terminator: ";
+ Inst->dump();
+ }
+ } else {
+ ++NumFastIselFailures;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel miss: ";
+ Inst->dump();
+ }
+ if (EnableFastISelAbort)
+ // The "fast" selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ llvm_unreachable("FastISel didn't select the entire block");
+ }
+ break;
+ }
+
+ FastIS->recomputeInsertPt();
+ }
+
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
+ if (Begin != BI) {
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
+ }
FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
}
+
+ delete FastIS;
+ SDB->clearDanglingDebugInfo();
}
void
@@ -712,12 +1044,14 @@
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
if (j+1 != ej)
- SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Cases[j+1].ThisBB,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
else
- SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Default,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
@@ -832,7 +1166,7 @@
// additional DAGs necessary.
for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
// Set the current basic block to the mbb we wish to insert the code into
- MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Determine the unique successors.
@@ -841,13 +1175,15 @@
if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
Succs.push_back(SDB->SwitchCases[i].FalseBB);
- // Emit the code. Note that this could result in ThisBB being split, so
- // we need to check for updates.
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
- ThisBB = FuncInfo->MBB;
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
// Handle any PHI nodes in successors of this chunk, as if we were coming
// from the original BB before switch expansion. Note that PHI nodes can
@@ -897,10 +1233,6 @@
return Ctor(this, OptLevel);
}
-ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
- return new ScheduleHazardRecognizer();
-}
-
//===----------------------------------------------------------------------===//
// Helper functions used by the generated instruction selector.
//===----------------------------------------------------------------------===//
@@ -980,11 +1312,11 @@
Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
- Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
- if (InOps[e-1].getValueType() == MVT::Flag)
- --e; // Don't process a flag operand if it is here.
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
while (i != e) {
unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
@@ -1011,15 +1343,15 @@
}
}
- // Add the flag input back if present.
+ // Add the glue input back if present.
if (e != InOps.size())
Ops.push_back(InOps.back());
}
-/// findFlagUse - Return use of EVT::Flag value produced by the specified
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
/// SDNode.
///
-static SDNode *findFlagUse(SDNode *N) {
+static SDNode *findGlueUse(SDNode *N) {
unsigned FlagResNo = N->getNumValues()-1;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDUse &Use = I.getUse();
@@ -1041,11 +1373,11 @@
// never find it.
//
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
- // happen because we scan down to newly selected nodes in the case of flag
+ // happen because we scan down to newly selected nodes in the case of glue
// uses.
if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
return false;
-
+
// Don't revisit nodes if we already scanned it and didn't fail, we know we
// won't fail if we scan it again.
if (!Visited.insert(Use))
@@ -1055,7 +1387,7 @@
// Ignore chain uses, they are validated by HandleMergeInputChains.
if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
continue;
-
+
SDNode *N = Use->getOperand(i).getNode();
if (N == Def) {
if (Use == ImmedUse || Use == Root)
@@ -1102,8 +1434,8 @@
//
// * indicates nodes to be folded together.
//
- // If Root produces a flag, then it gets (even more) interesting. Since it
- // will be "glued" together with its flag use in the scheduler, we need to
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
// check if it might reach N.
//
// [N*] //
@@ -1121,45 +1453,58 @@
// ^ / //
// f / //
// | / //
- // [FU] //
+ // [GU] //
//
- // If FU (flag use) indirectly reaches N (the load), and Root folds N
- // (call it Fold), then X is a predecessor of FU and a successor of
- // Fold. But since Fold and FU are flagged together, this will create
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
// a cycle in the scheduling graph.
- // If the node has flags, walk down the graph to the "lowest" node in the
- // flagged set.
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
EVT VT = Root->getValueType(Root->getNumValues()-1);
- while (VT == MVT::Flag) {
- SDNode *FU = findFlagUse(Root);
- if (FU == NULL)
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (GU == NULL)
break;
- Root = FU;
+ Root = GU;
VT = Root->getValueType(Root->getNumValues()-1);
-
- // If our query node has a flag result with a use, we've walked up it. If
+
+ // If our query node has a glue result with a use, we've walked up it. If
// the user (which has already been selected) has a chain or indirectly uses
// the chain, our WalkChainUsers predicate will not consider it. Because of
// this, we cannot ignore chains in this predicate.
IgnoreChains = false;
}
-
+
SmallPtrSet<SDNode*, 16> Visited;
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
}
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+ std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+ SelectInlineAsmMemoryOperands(Ops);
+
+ std::vector<EVT> VTs;
+ VTs.push_back(MVT::Other);
+ VTs.push_back(MVT::Glue);
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
}
/// GetVBR - decode a vbr encoding whose top bit is set.
-ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
assert(Val >= 128 && "Not a VBR");
Val &= 127; // Remove first vbr bit.
-
+
unsigned Shift = 7;
uint64_t NextBits;
do {
@@ -1167,25 +1512,25 @@
Val |= (NextBits&127) << Shift;
Shift += 7;
} while (NextBits & 128);
-
+
return Val;
}
-/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
-/// interior flag and chain results to use the new flag and chain results.
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
void SelectionDAGISel::
-UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
- const SmallVectorImpl<SDNode*> &ChainNodesMatched,
- SDValue InputFlag,
- const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
- bool isMorphNodeTo) {
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SDValue InputGlue,
+ const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+ bool isMorphNodeTo) {
SmallVector<SDNode*, 4> NowDeadNodes;
-
+
ISelUpdater ISU(ISelPosition);
// Now that all the normal results are replaced, we replace the chain and
- // flag results if present.
+ // glue results if present.
if (!ChainNodesMatched.empty()) {
assert(InputChain.getNode() != 0 &&
"Matched input chains but didn't produce a chain");
@@ -1193,55 +1538,55 @@
// Replace all the chain results with the final chain we ended up with.
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
SDNode *ChainNode = ChainNodesMatched[i];
-
+
// If this node was already deleted, don't look at it.
if (ChainNode->getOpcode() == ISD::DELETED_NODE)
continue;
-
+
// Don't replace the results of the root node if we're doing a
// MorphNodeTo.
if (ChainNode == NodeToMatch && isMorphNodeTo)
continue;
-
+
SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
- if (ChainVal.getValueType() == MVT::Flag)
+ if (ChainVal.getValueType() == MVT::Glue)
ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
-
+
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode->use_empty() &&
!std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
NowDeadNodes.push_back(ChainNode);
}
}
-
- // If the result produces a flag, update any flag results in the matched
- // pattern with the flag result.
- if (InputFlag.getNode() != 0) {
+
+ // If the result produces glue, update any glue results in the matched
+ // pattern with the glue result.
+ if (InputGlue.getNode() != 0) {
// Handle any interior nodes explicitly marked.
- for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
- SDNode *FRN = FlagResultNodesMatched[i];
-
+ for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+ SDNode *FRN = GlueResultNodesMatched[i];
+
// If this node was already deleted, don't look at it.
if (FRN->getOpcode() == ISD::DELETED_NODE)
continue;
-
- assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
- "Doesn't have a flag result");
+
+ assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+ "Doesn't have a glue result");
CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
- InputFlag, &ISU);
-
+ InputGlue, &ISU);
+
// If the node became dead and we haven't already seen it, delete it.
if (FRN->use_empty() &&
!std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
NowDeadNodes.push_back(FRN);
}
}
-
+
if (!NowDeadNodes.empty())
CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
-
+
DEBUG(errs() << "ISEL: Match complete!\n");
}
@@ -1260,17 +1605,17 @@
///
/// The walk we do here is guaranteed to be small because we quickly get down to
/// already selected nodes "below" us.
-static ChainResult
+static ChainResult
WalkChainUsers(SDNode *ChainedNode,
SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
ChainResult Result = CR_Simple;
-
+
for (SDNode::use_iterator UI = ChainedNode->use_begin(),
E = ChainedNode->use_end(); UI != E; ++UI) {
// Make sure the use is of the chain, not some other value we produce.
if (UI.getUse().getValueType() != MVT::Other) continue;
-
+
SDNode *User = *UI;
// If we see an already-selected machine node, then we've gone beyond the
@@ -1279,9 +1624,11 @@
if (User->isMachineOpcode() ||
User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
continue;
-
+
if (User->getOpcode() == ISD::CopyToReg ||
- User->getOpcode() == ISD::CopyFromReg) {
+ User->getOpcode() == ISD::CopyFromReg ||
+ User->getOpcode() == ISD::INLINEASM ||
+ User->getOpcode() == ISD::EH_LABEL) {
// If their node ID got reset to -1 then they've already been selected.
// Treat them like a MachineOpcode.
if (User->getNodeId() == -1)
@@ -1303,7 +1650,7 @@
if (!std::count(ChainedNodesInPattern.begin(),
ChainedNodesInPattern.end(), User))
return CR_InducesCycle;
-
+
// Otherwise we found a node that is part of our pattern. For example in:
// x = load ptr
// y = x+4
@@ -1315,7 +1662,7 @@
InteriorChainedNodes.push_back(User);
continue;
}
-
+
// If we found a TokenFactor, there are two cases to consider: first if the
// TokenFactor is just hanging "below" the pattern we're matching (i.e. no
// uses of the TF are in our pattern) we just want to ignore it. Second,
@@ -1352,7 +1699,7 @@
case CR_LeadsToInteriorNode:
break; // Otherwise, keep processing.
}
-
+
// Okay, we know we're in the interesting interior case. The TokenFactor
// is now going to be considered part of the pattern so that we rewrite its
// uses (it may have uses that are not part of the pattern) with the
@@ -1363,7 +1710,7 @@
InteriorChainedNodes.push_back(User);
continue;
}
-
+
return Result;
}
@@ -1385,7 +1732,7 @@
InteriorChainedNodes) == CR_InducesCycle)
return SDValue(); // Would induce a cycle.
}
-
+
// Okay, we have walked all the matched nodes and collected TokenFactor nodes
// that we are interested in. Form our input TokenFactor node.
SmallVector<SDValue, 3> InputChains;
@@ -1396,14 +1743,14 @@
if (N->getOpcode() != ISD::TokenFactor) {
if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
continue;
-
+
// Otherwise, add the input chain.
SDValue InChain = ChainNodesMatched[i]->getOperand(0);
assert(InChain.getValueType() == MVT::Other && "Not a chain");
InputChains.push_back(InChain);
continue;
}
-
+
// If we have a token factor, we want to add all inputs of the token factor
// that are not part of the pattern we're matching.
for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
@@ -1412,13 +1759,13 @@
InputChains.push_back(N->getOperand(op));
}
}
-
+
SDValue Res;
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
MVT::Other, &InputChains[0], InputChains.size());
-}
+}
/// MorphNode - Handle morphing a node in place for the selector.
SDNode *SelectionDAGISel::
@@ -1426,15 +1773,15 @@
const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
// It is possible we're using MorphNodeTo to replace a node with no
// normal results with one that has a normal result (or we could be
- // adding a chain) and the input could have flags and chains as well.
+ // adding a chain) and the input could have glue and chains as well.
// In this case we need to shift the operands down.
// FIXME: This is a horrible hack and broken in obscure cases, no worse
// than the old isel though.
- int OldFlagResultNo = -1, OldChainResultNo = -1;
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
unsigned NTMNumResults = Node->getNumValues();
- if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
- OldFlagResultNo = NTMNumResults-1;
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
if (NTMNumResults != 1 &&
Node->getValueType(NTMNumResults-2) == MVT::Other)
OldChainResultNo = NTMNumResults-2;
@@ -1455,54 +1802,55 @@
}
unsigned ResNumResults = Res->getNumValues();
- // Move the flag if needed.
- if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
- (unsigned)OldFlagResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo),
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
SDValue(Res, ResNumResults-1));
- if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
--ResNumResults;
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
(unsigned)OldChainResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
SDValue(Res, ResNumResults-1));
// Otherwise, no replacement happened because the node already exists. Replace
// Uses of the old node with the new one.
if (Res != Node)
CurDAG->ReplaceAllUsesWith(Node, Res);
-
+
return Res;
}
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
// Accept if it is exactly the same as a previously recorded node.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- return N == RecordedNodes[RecNo];
+ return N == RecordedNodes[RecNo].first;
}
-
+
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -1510,17 +1858,17 @@
return N->getOpcode() == Opc;
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (N.getValueType() == VT) return true;
-
+
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI,
unsigned ChildNo) {
@@ -1530,57 +1878,57 @@
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
(ISD::CondCode)MatcherTable[MatcherIndex++];
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (cast<VTSDNode>(N)->getVT() == VT)
return true;
-
+
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
return C != 0 && C->getSExtValue() == Val;
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
if (N->getOpcode() != ISD::AND) return false;
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
if (N->getOpcode() != ISD::OR) return false;
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
}
@@ -1590,11 +1938,11 @@
/// fail, set Result=true and return anything. If the current predicate is
/// known to pass, set Result=false and return the MatcherIndex to continue
/// with. If the current predicate is unknown, set Result=false and return the
-/// MatcherIndex to continue with.
+/// MatcherIndex to continue with.
static unsigned IsPredicateKnownToFail(const unsigned char *Table,
unsigned Index, SDValue N,
bool &Result, SelectionDAGISel &SDISel,
- SmallVectorImpl<SDValue> &RecordedNodes){
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
switch (Table[Index++]) {
default:
Result = false;
@@ -1648,21 +1996,21 @@
struct MatchScope {
/// FailIndex - If this match fails, this is the index to continue with.
unsigned FailIndex;
-
+
/// NodeStack - The node stack when the scope was formed.
SmallVector<SDValue, 4> NodeStack;
-
+
/// NumRecordedNodes - The number of recorded nodes when the scope was formed.
unsigned NumRecordedNodes;
-
+
/// NumMatchedMemRefs - The number of matched memref entries.
unsigned NumMatchedMemRefs;
-
- /// InputChain/InputFlag - The current chain/flag
- SDValue InputChain, InputFlag;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
/// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
- bool HasChainNodesMatched, HasFlagResultNodesMatched;
+ bool HasChainNodesMatched, HasGlueResultNodesMatched;
};
}
@@ -1693,6 +2041,7 @@
case ISD::TokenFactor:
case ISD::CopyFromReg:
case ISD::CopyToReg:
+ case ISD::EH_LABEL:
NodeToMatch->setNodeId(-1); // Mark selected.
return 0;
case ISD::AssertSext:
@@ -1700,9 +2049,10 @@
CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
NodeToMatch->getOperand(0));
return 0;
+ case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
}
-
+
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
// Set up the node stack with NodeToMatch as the only node on the stack.
@@ -1713,37 +2063,38 @@
// MatchScopes - Scopes used when matching, if a match failure happens, this
// indicates where to continue checking.
SmallVector<MatchScope, 8> MatchScopes;
-
+
// RecordedNodes - This is the set of nodes that have been recorded by the
- // state machine.
- SmallVector<SDValue, 8> RecordedNodes;
-
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
// MatchedMemRefs - This is the set of MemRef's we've seen in the input
// pattern.
SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
-
- // These are the current input chain and flag for use when generating nodes.
+
+ // These are the current input chain and glue for use when generating nodes.
// Various Emit operations change these. For example, emitting a copytoreg
// uses and updates these.
- SDValue InputChain, InputFlag;
-
+ SDValue InputChain, InputGlue;
+
// ChainNodesMatched - If a pattern matches nodes that have input/output
// chains, the OPC_EmitMergeInputChains operation is emitted which indicates
// which ones they are. The result is captured into this list so that we can
// update the chain results when the pattern is complete.
SmallVector<SDNode*, 3> ChainNodesMatched;
- SmallVector<SDNode*, 3> FlagResultNodesMatched;
-
+ SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
NodeToMatch->dump(CurDAG);
errs() << '\n');
-
+
// Determine where to start the interpreter. Normally we start at opcode #0,
// but if the state machine starts with an OPC_SwitchOpcode, then we
// accelerate the first lookup (which is guaranteed to be hot) with the
// OpcodeOffset table.
unsigned MatcherIndex = 0;
-
+
if (!OpcodeOffset.empty()) {
// Already computed the OpcodeOffset table, just index into it.
if (N.getOpcode() < OpcodeOffset.size())
@@ -1775,7 +2126,7 @@
if (N.getOpcode() < OpcodeOffset.size())
MatcherIndex = OpcodeOffset[N.getOpcode()];
}
-
+
while (1) {
assert(MatcherIndex < TableSize && "Invalid index");
#ifndef NDEBUG
@@ -1790,7 +2141,7 @@
// determine immediately that the first check (or first several) will
// immediately fail, don't even bother pushing a scope for them.
unsigned FailIndex;
-
+
while (1) {
unsigned NumToSkip = MatcherTable[MatcherIndex++];
if (NumToSkip & 128)
@@ -1800,12 +2151,12 @@
FailIndex = 0;
break;
}
-
+
FailIndex = MatcherIndex+NumToSkip;
-
+
unsigned MatcherIndexOfPredicate = MatcherIndex;
(void)MatcherIndexOfPredicate; // silence warning.
-
+
// If we can't evaluate this predicate without pushing a scope (e.g. if
// it is a 'MoveParent') or if the predicate succeeds on this node, we
// push the scope and evaluate the full predicate chain.
@@ -1814,20 +2165,20 @@
Result, *this, RecordedNodes);
if (!Result)
break;
-
+
DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
<< "index " << MatcherIndexOfPredicate
<< ", continuing at " << FailIndex << "\n");
++NumDAGIselRetries;
-
+
// Otherwise, we know that this case of the Scope is guaranteed to fail,
// move to the next case.
MatcherIndex = FailIndex;
}
-
+
// If the whole scope failed to match, bail.
if (FailIndex == 0) break;
-
+
// Push a MatchScope which indicates where to go if the first child fails
// to match.
MatchScope NewEntry;
@@ -1836,17 +2187,21 @@
NewEntry.NumRecordedNodes = RecordedNodes.size();
NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
NewEntry.InputChain = InputChain;
- NewEntry.InputFlag = InputFlag;
+ NewEntry.InputGlue = InputGlue;
NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
- NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+ NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
MatchScopes.push_back(NewEntry);
continue;
}
- case OPC_RecordNode:
+ case OPC_RecordNode: {
// Remember this node, it may end up being an operand in the pattern.
- RecordedNodes.push_back(N);
+ SDNode *Parent = 0;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
continue;
-
+ }
+
case OPC_RecordChild0: case OPC_RecordChild1:
case OPC_RecordChild2: case OPC_RecordChild3:
case OPC_RecordChild4: case OPC_RecordChild5:
@@ -1855,20 +2210,21 @@
if (ChildNo >= N.getNumOperands())
break; // Match fails if out of range child #.
- RecordedNodes.push_back(N->getOperand(ChildNo));
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
continue;
}
case OPC_RecordMemRef:
MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
continue;
-
- case OPC_CaptureFlagInput:
- // If the current node has an input flag, capture it in InputFlag.
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
if (N->getNumOperands() != 0 &&
- N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
- InputFlag = N->getOperand(N->getNumOperands()-1);
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
continue;
-
+
case OPC_MoveChild: {
unsigned ChildNo = MatcherTable[MatcherIndex++];
if (ChildNo >= N.getNumOperands())
@@ -1877,14 +2233,14 @@
NodeStack.push_back(N);
continue;
}
-
+
case OPC_MoveParent:
// Pop the current node off the NodeStack.
NodeStack.pop_back();
assert(!NodeStack.empty() && "Node stack imbalance!");
- N = NodeStack.back();
+ N = NodeStack.back();
continue;
-
+
case OPC_CheckSame:
if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
continue;
@@ -1900,7 +2256,8 @@
unsigned CPNum = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
- if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
RecordedNodes))
break;
continue;
@@ -1908,11 +2265,11 @@
case OPC_CheckOpcode:
if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
continue;
-
+
case OPC_CheckType:
if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
continue;
-
+
case OPC_SwitchOpcode: {
unsigned CurNodeOpcode = N.getOpcode();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
@@ -1930,22 +2287,22 @@
// If the opcode matches, then we will execute this case.
if (CurNodeOpcode == Opc)
break;
-
+
// Otherwise, skip over this case.
MatcherIndex += CaseSize;
}
-
+
// If no cases matched, bail out.
if (CaseSize == 0) break;
-
+
// Otherwise, execute the case we found.
DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
<< " to " << MatcherIndex << "\n");
continue;
}
-
+
case OPC_SwitchType: {
- MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+ MVT CurNodeVT = N.getValueType().getSimpleVT();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
while (1) {
@@ -1954,23 +2311,22 @@
if (CaseSize & 128)
CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
if (CaseSize == 0) break;
-
- MVT::SimpleValueType CaseVT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (CaseVT == MVT::iPTR)
- CaseVT = TLI.getPointerTy().SimpleTy;
-
+ CaseVT = TLI.getPointerTy();
+
// If the VT matches, then we will execute this case.
if (CurNodeVT == CaseVT)
break;
-
+
// Otherwise, skip over this case.
MatcherIndex += CaseSize;
}
-
+
// If no cases matched, bail out.
if (CaseSize == 0) break;
-
+
// Otherwise, execute the case we found.
DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
<< "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
@@ -1999,7 +2355,7 @@
case OPC_CheckOrImm:
if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
continue;
-
+
case OPC_CheckFoldableChainNode: {
assert(NodeStack.size() != 1 && "No parent node");
// Verify that all intermediate nodes between the root and this one have
@@ -2020,7 +2376,7 @@
NodeToMatch, OptLevel,
true/*We validate our own chains*/))
break;
-
+
continue;
}
case OPC_EmitInteger: {
@@ -2029,22 +2385,36 @@
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
- RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
continue;
}
case OPC_EmitRegister: {
MVT::SimpleValueType VT =
(MVT::SimpleValueType)MatcherTable[MatcherIndex++];
unsigned RegNo = MatcherTable[MatcherIndex++];
- RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
continue;
}
-
+ case OPC_EmitRegister2: {
+ // For targets w/ more than 256 register names, the register enum
+ // values are stored in two bytes in the matcher table (just like
+ // opcodes).
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RegNo |= MatcherTable[MatcherIndex++] << 8;
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ continue;
+ }
+
case OPC_EmitConvertToTarget: {
// Convert from IMM/FPIMM to target version.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- SDValue Imm = RecordedNodes[RecNo];
+ SDValue Imm = RecordedNodes[RecNo].first;
if (Imm->getOpcode() == ISD::Constant) {
int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
@@ -2053,11 +2423,11 @@
const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
}
-
- RecordedNodes.push_back(Imm);
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
continue;
}
-
+
case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
// These are space-optimized forms of OPC_EmitMergeInputChains.
@@ -2065,28 +2435,28 @@
"EmitMergeInputChains should be the first chain producing node");
assert(ChainNodesMatched.empty() &&
"Should only have one EmitMergeInputChains per match");
-
+
// Read all of the chained nodes.
unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
- !RecordedNodes[RecNo].hasOneUse()) {
+ !RecordedNodes[RecNo].first.hasOneUse()) {
ChainNodesMatched.clear();
break;
}
-
+
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-
+
if (InputChain.getNode() == 0)
break; // Failed to merge.
continue;
}
-
+
case OPC_EmitMergeInputChains: {
assert(InputChain.getNode() == 0 &&
"EmitMergeInputChains should be the first chain producing node");
@@ -2106,54 +2476,55 @@
for (unsigned i = 0; i != NumChains; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
- !RecordedNodes[RecNo].hasOneUse()) {
+ !RecordedNodes[RecNo].first.hasOneUse()) {
ChainNodesMatched.clear();
break;
}
}
-
+
// If the inner loop broke out, the match fails.
if (ChainNodesMatched.empty())
break;
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-
+
if (InputChain.getNode() == 0)
break; // Failed to merge.
continue;
}
-
+
case OPC_EmitCopyToReg: {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
-
+
if (InputChain.getNode() == 0)
InputChain = CurDAG->getEntryNode();
-
+
InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
- DestPhysReg, RecordedNodes[RecNo],
- InputFlag);
-
- InputFlag = InputChain.getValue(1);
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
continue;
}
-
+
case OPC_EmitNodeXForm: {
unsigned XFormNo = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
continue;
}
-
+
case OPC_EmitNode:
case OPC_MorphNodeTo: {
uint16_t TargetOpc = MatcherTable[MatcherIndex++];
@@ -2168,12 +2539,12 @@
if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
VTs.push_back(VT);
}
-
+
if (EmitNodeInfo & OPFL_Chain)
VTs.push_back(MVT::Other);
- if (EmitNodeInfo & OPFL_FlagOutput)
- VTs.push_back(MVT::Flag);
-
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
// This is hot code, so optimize the two most common cases of 1 and 2
// results.
SDVTList VTList;
@@ -2191,11 +2562,11 @@
unsigned RecNo = MatcherTable[MatcherIndex++];
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
-
+
assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
- Ops.push_back(RecordedNodes[RecNo]);
+ Ops.push_back(RecordedNodes[RecNo].first);
}
-
+
// If there are variadic operands to add, handle them now.
if (EmitNodeInfo & OPFL_VariadicInfo) {
// Determine the start index to copy from.
@@ -2203,22 +2574,22 @@
FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
"Invalid variadic node");
- // Copy all of the variadic operands, not including a potential flag
+ // Copy all of the variadic operands, not including a potential glue
// input.
for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
i != e; ++i) {
SDValue V = NodeToMatch->getOperand(i);
- if (V.getValueType() == MVT::Flag) break;
+ if (V.getValueType() == MVT::Glue) break;
Ops.push_back(V);
}
}
-
- // If this has chain/flag inputs, add them.
+
+ // If this has chain/glue inputs, add them.
if (EmitNodeInfo & OPFL_Chain)
Ops.push_back(InputChain);
- if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
- Ops.push_back(InputFlag);
-
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+ Ops.push_back(InputGlue);
+
// Create the node.
SDNode *Res = 0;
if (Opcode != OPC_MorphNodeTo) {
@@ -2226,71 +2597,106 @@
// add the results to the RecordedNodes list.
Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
VTList, Ops.data(), Ops.size());
-
- // Add all the non-flag/non-chain results to the RecordedNodes list.
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
- if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
- RecordedNodes.push_back(SDValue(Res, i));
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ (SDNode*) 0));
}
-
+
} else {
Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
EmitNodeInfo);
}
-
- // If the node had chain/flag results, update our notion of the current
- // chain and flag.
- if (EmitNodeInfo & OPFL_FlagOutput) {
- InputFlag = SDValue(Res, VTs.size()-1);
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
if (EmitNodeInfo & OPFL_Chain)
InputChain = SDValue(Res, VTs.size()-2);
} else if (EmitNodeInfo & OPFL_Chain)
InputChain = SDValue(Res, VTs.size()-1);
- // If the OPFL_MemRefs flag is set on this node, slap all of the
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
// accumulated memrefs onto it.
//
// FIXME: This is vastly incorrect for patterns with multiple outputs
// instructions that access memory and for ComplexPatterns that match
// loads.
if (EmitNodeInfo & OPFL_MemRefs) {
+ // Only attach load or store memory operands if the generated
+ // instruction may load or store.
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc);
+ bool mayLoad = MCID.mayLoad();
+ bool mayStore = MCID.mayStore();
+
+ unsigned NumMemRefs = 0;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ ++NumMemRefs;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ ++NumMemRefs;
+ } else {
+ ++NumMemRefs;
+ }
+ }
+
MachineSDNode::mmo_iterator MemRefs =
- MF->allocateMemRefsArray(MatchedMemRefs.size());
- std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs);
+ MF->allocateMemRefsArray(NumMemRefs);
+
+ MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ *MemRefsPos++ = *I;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ *MemRefsPos++ = *I;
+ } else {
+ *MemRefsPos++ = *I;
+ }
+ }
+
cast<MachineSDNode>(Res)
- ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
+ ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
}
-
+
DEBUG(errs() << " "
<< (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
<< " node: "; Res->dump(CurDAG); errs() << "\n");
-
+
// If this was a MorphNodeTo then we're completely done!
if (Opcode == OPC_MorphNodeTo) {
- // Update chain and flag uses.
- UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
- InputFlag, FlagResultNodesMatched, true);
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, true);
return Res;
}
-
+
continue;
}
-
- case OPC_MarkFlagResults: {
+
+ case OPC_MarkGlueResults: {
unsigned NumNodes = MatcherTable[MatcherIndex++];
-
- // Read and remember all the flag-result nodes.
+
+ // Read and remember all the glue-result nodes.
for (unsigned i = 0; i != NumNodes; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+ GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
}
continue;
}
-
+
case OPC_CompleteMatch: {
// The match has been completed, and any new nodes (if any) have been
// created. Patch up references to the matched dag to use the newly
@@ -2301,13 +2707,13 @@
unsigned ResSlot = MatcherTable[MatcherIndex++];
if (ResSlot & 128)
ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
-
+
assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
- SDValue Res = RecordedNodes[ResSlot];
-
+ SDValue Res = RecordedNodes[ResSlot].first;
+
assert(i < NodeToMatch->getNumValues() &&
NodeToMatch->getValueType(i) != MVT::Other &&
- NodeToMatch->getValueType(i) != MVT::Flag &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
"Invalid number of results to complete!");
assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
NodeToMatch->getValueType(i) == MVT::iPTR ||
@@ -2318,24 +2724,23 @@
CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
}
- // If the root node defines a flag, add it to the flag nodes to update
- // list.
- if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
- FlagResultNodesMatched.push_back(NodeToMatch);
-
- // Update chain and flag uses.
- UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
- InputFlag, FlagResultNodesMatched, false);
-
+ // If the root node defines glue, add it to the glue nodes to update list.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+ GlueResultNodesMatched.push_back(NodeToMatch);
+
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, false);
+
assert(NodeToMatch->use_empty() &&
"Didn't replace all uses of the node?");
-
+
// FIXME: We just return here, which interacts correctly with SelectRoot
// above. We should fix this to not return an SDNode* anymore.
return 0;
}
}
-
+
// If the code reached this point, then the match failed. See if there is
// another child to try in the current 'Scope', otherwise pop it until we
// find a case to check.
@@ -2358,15 +2763,15 @@
if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
MatcherIndex = LastScope.FailIndex;
-
+
DEBUG(errs() << " Continuing at " << MatcherIndex << "\n");
-
+
InputChain = LastScope.InputChain;
- InputFlag = LastScope.InputFlag;
+ InputGlue = LastScope.InputGlue;
if (!LastScope.HasChainNodesMatched)
ChainNodesMatched.clear();
- if (!LastScope.HasFlagResultNodesMatched)
- FlagResultNodesMatched.clear();
+ if (!LastScope.HasGlueResultNodesMatched)
+ GlueResultNodesMatched.clear();
// Check to see what the offset is at the new MatcherIndex. If it is zero
// we have reached the end of this scope, otherwise we have another child
@@ -2381,21 +2786,21 @@
LastScope.FailIndex = MatcherIndex+NumToSkip;
break;
}
-
+
// End of this scope, pop it and try the next child in the containing
// scope.
MatchScopes.pop_back();
}
}
}
-
+
void SelectionDAGISel::CannotYetSelect(SDNode *N) {
std::string msg;
raw_string_ostream Msg(msg);
- Msg << "Cannot yet select: ";
-
+ Msg << "Cannot select: ";
+
if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_VOID) {
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 8313de5..cd1647b 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -90,10 +90,11 @@
/// If you want to override the dot attributes printed for a particular
/// edge, override this method.
template<typename EdgeIter>
- static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+ const SelectionDAG *Graph) {
SDValue Op = EI.getNode()->getOperand(EI.getOperand());
EVT VT = Op.getValueType();
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return "color=red,style=bold";
else if (VT == MVT::Other)
return "color=blue,style=dashed";
@@ -273,14 +274,14 @@
raw_string_ostream O(s);
O << "SU(" << SU->NodeNum << "): ";
if (SU->getNode()) {
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
O << DOTGraphTraits<SelectionDAG*>
- ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
- FlaggedNodes.pop_back();
- if (!FlaggedNodes.empty())
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
O << "\n ";
}
} else {
diff --git a/src/LLVM/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/src/LLVM/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c6a4b71..907d8d9 100644
--- a/src/LLVM/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/src/LLVM/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -26,10 +26,19 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <cctype>
using namespace llvm;
+/// We are in the process of implementing a new TypeLegalization action
+/// - the promotion of vector elements. This feature is disabled by default
+/// and only enabled using this flag.
+static cl::opt<bool>
+AllowPromoteIntElem("promote-elements", cl::Hidden,
+ cl::desc("Allow promotion of integer vector element types"));
+
namespace llvm {
TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
bool isLocal = GV->hasLocalLinkage();
@@ -72,6 +81,9 @@
Names[RTLIB::MUL_I32] = "__mulsi3";
Names[RTLIB::MUL_I64] = "__muldi3";
Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::MULO_I32] = "__mulosi4";
+ Names[RTLIB::MULO_I64] = "__mulodi4";
+ Names[RTLIB::MULO_I128] = "__muloti4";
Names[RTLIB::SDIV_I8] = "__divqi3";
Names[RTLIB::SDIV_I16] = "__divhi3";
Names[RTLIB::SDIV_I32] = "__divsi3";
@@ -92,6 +104,19 @@
Names[RTLIB::UREM_I32] = "__umodsi3";
Names[RTLIB::UREM_I64] = "__umoddi3";
Names[RTLIB::UREM_I128] = "__umodti3";
+
+ // These are generally not available.
+ Names[RTLIB::SDIVREM_I8] = 0;
+ Names[RTLIB::SDIVREM_I16] = 0;
+ Names[RTLIB::SDIVREM_I32] = 0;
+ Names[RTLIB::SDIVREM_I64] = 0;
+ Names[RTLIB::SDIVREM_I128] = 0;
+ Names[RTLIB::UDIVREM_I8] = 0;
+ Names[RTLIB::UDIVREM_I16] = 0;
+ Names[RTLIB::UDIVREM_I32] = 0;
+ Names[RTLIB::UDIVREM_I64] = 0;
+ Names[RTLIB::UDIVREM_I128] = 0;
+
Names[RTLIB::NEG_I32] = "__negsi2";
Names[RTLIB::NEG_I64] = "__negdi2";
Names[RTLIB::ADD_F32] = "__addsf3";
@@ -114,6 +139,10 @@
Names[RTLIB::REM_F64] = "fmod";
Names[RTLIB::REM_F80] = "fmodl";
Names[RTLIB::REM_PPCF128] = "fmodl";
+ Names[RTLIB::FMA_F32] = "fmaf";
+ Names[RTLIB::FMA_F64] = "fma";
+ Names[RTLIB::FMA_F80] = "fmal";
+ Names[RTLIB::FMA_PPCF128] = "fmal";
Names[RTLIB::POWI_F32] = "__powisf2";
Names[RTLIB::POWI_F64] = "__powidf2";
Names[RTLIB::POWI_F80] = "__powixf2";
@@ -288,7 +317,7 @@
Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
- Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
@@ -512,8 +541,10 @@
}
/// NOTE: The constructor takes ownership of TLOF.
-TargetLowering::TargetLowering(const TargetMachine &tm)
- : TM(tm), TD(TM.getTargetData()) {
+TargetLowering::TargetLowering(const TargetMachine &tm,
+ const TargetLoweringObjectFile *tlof)
+ : TM(tm), TD(TM.getTargetData()), TLOF(*tlof),
+ mayPromoteElements(AllowPromoteIntElem) {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -529,7 +560,7 @@
setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
}
-
+
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
@@ -537,8 +568,8 @@
// Most targets ignore the @llvm.prefetch intrinsic.
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
-
- // ConstantFP nodes default to expand. Targets can either change this to
+
+ // ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
@@ -559,26 +590,35 @@
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
-
+
IsLittleEndian = TD->isLittleEndian();
- ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+ PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+ = maxStoresPerMemmoveOptSize = 4;
benefitFromCodePlacementOpt = false;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
SelectIsExpensive = false;
IntDivIsCheap = false;
Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
+ BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::Latency;
JumpBufSize = 0;
JumpBufAlignment = 0;
+ MinFunctionAlignment = 0;
+ PrefFunctionAlignment = 0;
PrefLoopAlignment = 0;
MinStackArgumentAlignment = 1;
ShouldFoldAtomicFences = false;
+ InsertFencesForAtomic = false;
InitLibcallNames(LibcallRoutineNames);
InitCmpLibcallCCs(CmpLibcallCCs);
@@ -586,6 +626,11 @@
}
TargetLowering::~TargetLowering() {
+ delete &TLOF;
+}
+
+MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
+ return MVT::getIntegerVT(8*TD->getPointerSize());
}
/// canOpTrap - Returns true if the operation can trap for the value type.
@@ -613,16 +658,16 @@
// Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType();
-
+
unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
-
+
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
@@ -631,17 +676,23 @@
}
NumIntermediates = NumVectorRegs;
-
+
MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
if (!TLI->isTypeLegal(NewVT))
NewVT = EltTy;
IntermediateVT = NewVT;
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
EVT DestVT = TLI->getRegisterType(NewVT);
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
- return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
@@ -693,6 +744,7 @@
return std::make_pair(BestRC, 1);
}
+
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLowering::computeRegisterProperties() {
@@ -721,7 +773,7 @@
NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
- ValueTypeActions.setTypeAction(ExpandedVT, Expand);
+ ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger);
}
// Inspect all of the ValueType's smaller than the largest integer
@@ -735,7 +787,7 @@
} else {
RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
(MVT::SimpleValueType)LegalIntReg;
- ValueTypeActions.setTypeAction(IVT, Promote);
+ ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
}
}
@@ -744,8 +796,8 @@
NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
TransformToType[MVT::ppcf128] = MVT::f64;
- ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
- }
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+ }
// Decide how to handle f64. If the target does not have native f64 support,
// expand it to i64 and we will be generating soft float library calls.
@@ -753,7 +805,7 @@
NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
TransformToType[MVT::f64] = MVT::i64;
- ValueTypeActions.setTypeAction(MVT::f64, Expand);
+ ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
}
// Decide how to handle f32. If the target does not have native support for
@@ -763,21 +815,67 @@
NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
TransformToType[MVT::f32] = MVT::f64;
- ValueTypeActions.setTypeAction(MVT::f32, Promote);
+ ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
} else {
NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
TransformToType[MVT::f32] = MVT::i32;
- ValueTypeActions.setTypeAction(MVT::f32, Expand);
+ ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
}
}
-
+
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (isTypeLegal(VT)) continue;
-
+
+ // Determine if there is a legal wider type. If so, we should promote to
+ // that wider vector type.
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ if (NElts != 1) {
+ bool IsLegalWiderType = false;
+ // If we allow the promotion of vector elements using a flag,
+ // then return TypePromoteInteger on vector elements.
+ // First try to promote the elements of integer vectors. If no legal
+ // promotion was found, fallback to the widen-vector method.
+ if (mayPromoteElements)
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ // Promote vectors of integers to vectors with the same number
+ // of elements, with a wider element type.
+ if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+ && SVT.getVectorNumElements() == NElts &&
+ isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+
+ if (IsLegalWiderType) continue;
+
+ // Try to widen the vector.
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts &&
+ isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType) continue;
+ }
+
MVT IntermediateVT;
EVT RegisterVT;
unsigned NumIntermediates;
@@ -785,31 +883,17 @@
getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
-
- // Determine if there is a legal wider type.
- bool IsLegalWiderType = false;
- EVT EltVT = VT.getVectorElementType();
- unsigned NElts = VT.getVectorNumElements();
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- EVT SVT = (MVT::SimpleValueType)nVT;
- if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts && NElts != 1) {
- TransformToType[i] = SVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- IsLegalWiderType = true;
- break;
- }
- }
- if (!IsLegalWiderType) {
- EVT NVT = VT.getPow2VectorType();
- if (NVT == VT) {
- // Type is already a power of 2. The default action is to split.
- TransformToType[i] = MVT::Other;
- ValueTypeActions.setTypeAction(VT, Expand);
- } else {
- TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- }
+
+ EVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ unsigned NumElts = VT.getVectorNumElements();
+ ValueTypeActions.setTypeAction(VT,
+ NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
}
}
@@ -832,7 +916,8 @@
}
-MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+EVT TargetLowering::getSetCCResultType(EVT VT) const {
+ assert(!VT.isVector() && "No default SetCC type for vectors!");
return PointerTy.SimpleTy;
}
@@ -853,19 +938,32 @@
EVT &IntermediateVT,
unsigned &NumIntermediates,
EVT &RegisterVT) const {
- // Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
+
+ // If there is a wider vector type with the same element type as this one,
+ // we should widen to that legal vector type. This handles things like
+ // <2 x float> -> <4 x float>.
+ if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) {
+ RegisterVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterVT)) {
+ IntermediateVT = RegisterVT;
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
-
+
unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
-
+
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !isTypeLegal(
@@ -875,7 +973,7 @@
}
NumIntermediates = NumVectorRegs;
-
+
EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
if (!isTypeLegal(NewVT))
NewVT = EltTy;
@@ -883,23 +981,25 @@
EVT DestVT = getRegisterType(Context, NewVT);
RegisterVT = DestVT;
- if (DestVT.bitsLT(NewVT)) {
- // Value is expanded, e.g. i64 -> i16.
- return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
- } else {
- // Otherwise, promotion or legal types use the same number of registers as
- // the vector decimated to the appropriate level.
- return NumVectorRegs;
- }
-
- return 1;
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
}
-/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
+void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI,
SmallVectorImpl<uint64_t> *Offsets) {
@@ -957,7 +1057,7 @@
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
-unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+unsigned TargetLowering::getByValTypeAlignment(Type *Ty) const {
return TD->getCallFrameTypeAlignment(Ty);
}
@@ -968,11 +1068,11 @@
// In non-pic modes, just use the address of a block.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return MachineJumpTableInfo::EK_BlockAddress;
-
+
// In PIC mode, if the target supports a GPRel32 directive, use it.
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
-
+
// Otherwise, use a label difference.
return MachineJumpTableInfo::EK_LabelDifference32;
}
@@ -1016,11 +1116,11 @@
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
/// specified instruction is a constant integer. If so, check to see if there
/// are any bits set in the constant that are not demanded. If so, shrink the
/// constant and return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
const APInt &Demanded) {
DebugLoc dl = Op.getDebugLoc();
@@ -1042,7 +1142,7 @@
EVT VT = Op.getValueType();
SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
DAG.getConstant(Demanded &
- C->getAPIntValue(),
+ C->getAPIntValue(),
VT));
return CombineTo(Op, New);
}
@@ -1119,9 +1219,9 @@
KnownZero = KnownOne = APInt(BitWidth, 0);
// Other users may use these bits.
- if (!Op.getNode()->hasOneUse()) {
+ if (!Op.getNode()->hasOneUse()) {
if (Depth != 0) {
- // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
// simplify things downstream.
TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
return false;
@@ -1129,7 +1229,7 @@
// If this is the root being simplified, allow it to have multiple uses,
// just set the NewMask to all bits.
NewMask = APInt::getAllOnesValue(BitWidth);
- } else if (DemandedMask == 0) {
+ } else if (DemandedMask == 0) {
// Not demanding any bits from Op.
if (Op.getOpcode() != ISD::UNDEF)
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
@@ -1152,8 +1252,9 @@
// the RHS.
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
APInt LHSZero, LHSOne;
+ // Do not increment Depth here; that can cause an infinite loop.
TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
- LHSZero, LHSOne, Depth+1);
+ LHSZero, LHSOne, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1162,16 +1263,16 @@
if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
return true;
}
-
+
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
KnownZero2, KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
@@ -1194,15 +1295,15 @@
KnownZero |= KnownZero2;
break;
case ISD::OR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
KnownZero2, KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
@@ -1228,15 +1329,15 @@
KnownOne |= KnownOne2;
break;
case ISD::XOR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if ((KnownZero & NewMask) == NewMask)
@@ -1254,12 +1355,12 @@
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
Op.getOperand(0),
Op.getOperand(1)));
-
+
// Output known-0 bits are known if clear or set in both the LHS & RHS.
KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-
+
// If all of the demanded bits on one side are known, and all of the set
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
@@ -1268,11 +1369,11 @@
if ((KnownOne & KnownOne2) == KnownOne) {
EVT VT = Op.getValueType();
SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
Op.getOperand(0), ANDC));
}
}
-
+
// If the RHS is a constant, see if we can simplify it.
// for XOR, we prefer to force bits to 1 if they will make a -1.
// if we can't force bits, try to shrink constant
@@ -1297,37 +1398,37 @@
KnownOne = KnownOneOut;
break;
case ISD::SELECT:
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If the operands are constants, see if we can simplify them.
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
-
+
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
case ISD::SELECT_CC:
- if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If the operands are constants, see if we can simplify them.
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
-
+
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
@@ -1353,16 +1454,16 @@
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
- }
-
- SDValue NewSA =
+ }
+
+ SDValue NewSA =
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
EVT VT = Op.getValueType();
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0), NewSA));
}
- }
-
+ }
+
if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
@@ -1376,7 +1477,7 @@
BitWidth - InnerVT.getSizeInBits()) &
DemandedMask) == 0 &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
- EVT ShTy = getShiftAmountTy();
+ EVT ShTy = getShiftAmountTy(InnerVT);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
ShTy = InnerVT;
SDValue NarrowShl =
@@ -1401,7 +1502,7 @@
unsigned ShAmt = SA->getZExtValue();
unsigned VTSize = VT.getSizeInBits();
SDValue InOp = Op.getOperand(0);
-
+
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
break;
@@ -1418,20 +1519,20 @@
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
- }
-
+ }
+
SDValue NewSA =
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0), NewSA));
}
- }
-
+ }
+
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
@@ -1452,7 +1553,7 @@
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
EVT VT = Op.getValueType();
unsigned ShAmt = SA->getZExtValue();
-
+
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
break;
@@ -1464,21 +1565,21 @@
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
if (HighBits.intersects(NewMask))
InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
-
+
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
-
+
// Handle the sign bit, adjusted to where it is now in the mask.
APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
-
+
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
Op.getOperand(0),
Op.getOperand(1)));
} else if (KnownOne.intersects(SignBit)) { // New bits are known one.
@@ -1489,23 +1590,23 @@
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- // Sign extension. Compute the demanded bits in the result that are not
+ // Sign extension. Compute the demanded bits in the result that are not
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
BitWidth - EVT.getScalarType().getSizeInBits());
-
+
// If none of the extended bits are demanded, eliminate the sextinreg.
if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
- APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
- InSignBit.zext(BitWidth);
+ APInt InSignBit =
+ APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
EVT.getScalarType().getSizeInBits()) &
NewMask;
-
+
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits |= InSignBit;
@@ -1513,16 +1614,16 @@
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
-
+
// If the input sign bit is known zero, convert this into a zero extension.
if (KnownZero.intersects(InSignBit))
- return TLO.CombineTo(Op,
+ return TLO.CombineTo(Op,
TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
-
+
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
KnownOne |= NewBits;
KnownZero &= ~NewBits;
@@ -1535,23 +1636,22 @@
case ISD::ZERO_EXTEND: {
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt InMask = NewMask;
- InMask.trunc(OperandBitWidth);
-
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+
// If none of the top bits are demanded, convert this into an any_extend.
APInt NewBits =
APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
if (!NewBits.intersects(NewMask))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
- Op.getValueType(),
+ Op.getValueType(),
Op.getOperand(0)));
-
+
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
break;
}
@@ -1561,31 +1661,31 @@
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
APInt NewBits = ~InMask & NewMask;
-
+
// If none of the top bits are demanded, convert this into an any_extend.
if (NewBits == 0)
return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
-
+
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
APInt InDemandedBits = InMask & NewMask;
InDemandedBits |= InSignBit;
- InDemandedBits.trunc(InBits);
-
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ InDemandedBits = InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
-
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
// If the sign bit is known zero, convert this to a zero extend.
if (KnownZero.intersects(InSignBit))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
- Op.getValueType(),
+ Op.getValueType(),
Op.getOperand(0)));
-
+
// If the sign bit is known one, the top bits match.
if (KnownOne.intersects(InSignBit)) {
KnownOne |= NewBits;
@@ -1599,14 +1699,13 @@
case ISD::ANY_EXTEND: {
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt InMask = NewMask;
- InMask.trunc(OperandBitWidth);
+ APInt InMask = NewMask.trunc(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
@@ -1614,14 +1713,13 @@
// zero/one bits live out.
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt TruncMask = NewMask;
- TruncMask.zext(OperandBitWidth);
+ APInt TruncMask = NewMask.zext(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
-
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Op.getOperand(0).getNode()->hasOneUse()) {
@@ -1639,65 +1737,72 @@
ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
if (!ShAmt)
break;
+ SDValue Shift = In.getOperand(1);
+ if (TLO.LegalTypes()) {
+ uint64_t ShVal = ShAmt->getZExtValue();
+ Shift =
+ TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+ }
+
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
OperandBitWidth - BitWidth);
- HighBits = HighBits.lshr(ShAmt->getZExtValue());
- HighBits.trunc(BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
- Op.getValueType(),
+ Op.getValueType(),
In.getOperand(0));
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
Op.getValueType(),
- NewTrunc,
- In.getOperand(1)));
+ NewTrunc,
+ Shift));
}
break;
}
}
-
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
- // Demand all the bits of the input that are demanded in the output.
- // The low bits are obvious; the high bits are demanded because we're
- // asserting that they're zero here.
- if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
- KnownZero, KnownOne, TLO, Depth+1))
- return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-
+ // AssertZext demands all of the high bits, plus any of the low bits
+ // demanded by its users.
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth,
VT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
KnownZero |= ~InMask & NewMask;
break;
}
- case ISD::BIT_CONVERT:
-#if 0
- // If this is an FP->Int bitcast and if the sign bit is the only thing that
- // is demanded, turn this into a FGETSIGN.
- if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) &&
- MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
- !MVT::isVector(Op.getOperand(0).getValueType())) {
- // Only do this xform if FGETSIGN is valid or if before legalize.
- if (!TLO.AfterLegalize ||
- isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
+ case ISD::BITCAST:
+ // If this is an FP->Int bitcast and if the sign bit is the only
+ // thing demanded, turn this into a FGETSIGN.
+ if (!Op.getOperand(0).getValueType().isVector() &&
+ NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+ Op.getOperand(0).getValueType().isFloatingPoint()) {
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+ EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
- SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
- Op.getOperand(0));
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+ unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+ if (!OpVTLegal && OpVTSizeInBits > 32)
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
unsigned ShVal = Op.getValueType().getSizeInBits()-1;
- SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(),
Sign, ShAmt));
}
}
-#endif
break;
case ISD::ADD:
case ISD::MUL:
@@ -1722,21 +1827,21 @@
TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
break;
}
-
+
// If we know the value of all of the demanded bits, return this as a
// constant.
if ((NewMask & (KnownZero|KnownOne)) == NewMask)
return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
-
+
return false;
}
-/// computeMaskedBitsForTargetNode - Determine which of the bits specified
-/// in Mask are known to be either zero or one and return them in the
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
-void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
- APInt &KnownZero,
+ APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
@@ -1797,14 +1902,13 @@
(KnownOne.countPopulation() == 1);
}
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
/// and cc. If it is unable to simplify it, return a null SDValue.
SDValue
TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, bool foldBooleans,
DAGCombinerInfo &DCI, DebugLoc dl) const {
SelectionDAG &DAG = DCI.DAG;
- LLVMContext &Context = *DAG.getContext();
// These setcc operations always fold.
switch (Cond) {
@@ -1815,11 +1919,10 @@
case ISD::SETTRUE2: return DAG.getConstant(1, VT);
}
- if (isa<ConstantSDNode>(N0.getNode())) {
- // Ensure that the constant occurs on the RHS, and fold constant
- // comparisons.
+ // Ensure that the constant occurs on the RHS, and fold constant
+ // comparisons.
+ if (isa<ConstantSDNode>(N0.getNode()))
return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
- }
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
@@ -1849,6 +1952,66 @@
}
}
+ SDValue CTPOP = N0;
+ // Look through truncs that don't change the value of a ctpop.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+ (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+ Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+ DAG.getConstant(1, CTVT));
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+ }
+
+ // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ }
+
+ // (zext x) == C --> x == (trunc C)
+ if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreZExt;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreZExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countTrailingOnes();
+ PreZExt = N0->getOperand(0);
+ }
+ } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreZExt = N0;
+ }
+ }
+
+ // Make sure we're not loosing bits from the constant.
+ if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+ }
+ }
+
// If the LHS is '(and load, const)', the RHS is 0,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.
@@ -1864,7 +2027,7 @@
if (!Lod->isVolatile() && Lod->isUnindexed()) {
unsigned origWidth = N0.getValueType().getSizeInBits();
unsigned maskWidth = origWidth;
- // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
// 8 bits, but have to be careful...
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
origWidth = Lod->getMemoryVT().getSizeInBits();
@@ -1887,7 +2050,7 @@
}
}
if (bestWidth) {
- EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
if (newVT.isRound()) {
EVT PtrType = Lod->getOperand(1).getValueType();
SDValue Ptr = Lod->getBasePtr();
@@ -1896,10 +2059,9 @@
DAG.getConstant(bestOffset, PtrType));
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
- Lod->getSrcValue(),
- Lod->getSrcValueOffset() + bestOffset,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
false, false, NewAlign);
- return DAG.getSetCC(dl, VT,
+ return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
newVT)),
@@ -1949,7 +2111,7 @@
(isOperationLegal(ISD::SETCC, newVT) &&
getCondCodeAction(Cond, newVT)==Legal))
return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+ DAG.getConstant(C1.trunc(InSize), newVT),
Cond);
break;
}
@@ -1967,7 +2129,7 @@
// the sign extension, it is impossible for both sides to be equal.
if (C1.getMinSignedBits() > ExtSrcTyBits)
return DAG.getConstant(Cond == ISD::SETNE, VT);
-
+
SDValue ZextOp;
EVT Op0Ty = N0.getOperand(0).getValueType();
if (Op0Ty == ExtSrcTy) {
@@ -1980,10 +2142,10 @@
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(ZextOp.getNode());
// Otherwise, make this a use of a zext.
- return DAG.getSetCC(dl, VT, ZextOp,
+ return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & APInt::getLowBitsSet(
ExtDstTyBits,
- ExtSrcTyBits),
+ ExtSrcTyBits),
ExtDstTy),
Cond);
} else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
@@ -1993,16 +2155,16 @@
isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
if (TrueWhenTrue)
- return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- CC = ISD::getSetCCInverse(CC,
+ CC = ISD::getSetCCInverse(CC,
N0.getOperand(0).getValueType().isInteger());
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
if ((N0.getOpcode() == ISD::XOR ||
- (N0.getOpcode() == ISD::AND &&
+ (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -2018,7 +2180,7 @@
if (N0.getOpcode() == ISD::XOR)
Val = N0.getOperand(0);
else {
- assert(N0.getOpcode() == ISD::AND &&
+ assert(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR);
// ((X^1)&1)^1 -> X & 1
Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
@@ -2031,7 +2193,7 @@
}
} else if (N1C->getAPIntValue() == 1 &&
(VT == MVT::i1 ||
- getBooleanContents() == ZeroOrOneBooleanContent)) {
+ getBooleanContents(false) == ZeroOrOneBooleanContent)) {
SDValue Op0 = N0;
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
@@ -2062,7 +2224,7 @@
}
}
}
-
+
APInt MinVal, MaxVal;
unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
if (ISD::isSignedIntSetCC(Cond)) {
@@ -2077,7 +2239,7 @@
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
// X >= C0 --> X > (C0-1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C1-1, N1.getValueType()),
(Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
}
@@ -2085,7 +2247,7 @@
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
// X <= C0 --> X < (C0+1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C1+1, N1.getValueType()),
(Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
}
@@ -2108,12 +2270,12 @@
// If we have setult X, 1, turn it into seteq X, 0
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(MinVal, N0.getValueType()),
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
ISD::SETEQ);
// If we have setugt X, Max-1, turn it into seteq X, Max
else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(MaxVal, N0.getValueType()),
ISD::SETEQ);
@@ -2121,9 +2283,9 @@
// by changing cc.
// SETUGT X, SINTMAX -> SETLT X, 0
- if (Cond == ISD::SETUGT &&
+ if (Cond == ISD::SETUGT &&
C1 == APInt::getSignedMaxValue(OperandBitSize))
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(0, N1.getValueType()),
ISD::SETLT);
@@ -2144,7 +2306,7 @@
if (ConstantSDNode *AndRHS =
dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
EVT ShiftTy = DCI.isBeforeLegalize() ?
- getPointerTy() : getShiftAmountTy();
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
if (AndRHS->getAPIntValue().isPowerOf2()) {
@@ -2183,7 +2345,7 @@
return DAG.getUNDEF(VT);
}
}
-
+
// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
// constant if knowing that the operand is non-nan is enough. We prefer to
// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
@@ -2258,14 +2420,14 @@
if (DAG.isCommutativeBinOp(N0.getOpcode())) {
// If X op Y == Y op X, try other combinations.
if (N0.getOperand(0) == N1.getOperand(1))
- return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
Cond);
if (N0.getOperand(1) == N1.getOperand(0))
- return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
Cond);
}
}
-
+
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
@@ -2275,7 +2437,7 @@
LHSR->getAPIntValue(),
N0.getValueType()), Cond);
}
-
+
// Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
if (N0.getOpcode() == ISD::XOR)
// If we know that all of the inverted bits are zero, don't bother
@@ -2288,7 +2450,7 @@
N0.getValueType()),
Cond);
}
-
+
// Turn (C1-X) == C2 --> X == C1-C2
if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
@@ -2299,7 +2461,7 @@
N0.getValueType()),
Cond);
}
- }
+ }
}
// Simplify (X+Z) == X --> Z == 0
@@ -2314,8 +2476,8 @@
assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
// (Z-X) == X --> Z == X<<1
SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
- N1,
- DAG.getConstant(1, getShiftAmountTy()));
+ N1,
+ DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
@@ -2336,8 +2498,8 @@
} else if (N1.getNode()->hasOneUse()) {
assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
// X == (Z-X) --> X<<1 == Z
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
- DAG.getConstant(1, getShiftAmountTy()));
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ DAG.getConstant(1, getShiftAmountTy(N0.getValueType())));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
@@ -2423,7 +2585,7 @@
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
int64_t &Offset) const {
if (isa<GlobalAddressSDNode>(N)) {
GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
@@ -2449,6 +2611,7 @@
}
}
}
+
return false;
}
@@ -2466,7 +2629,6 @@
TargetLowering::ConstraintType
TargetLowering::getConstraintType(const std::string &Constraint) const {
- // FIXME: lots more standard ones to handle.
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
@@ -2477,7 +2639,10 @@
return C_Memory;
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
case 's': // Relocatable Constant
+ case 'p': // Address.
case 'X': // Allow ANY value.
case 'I': // Target registers.
case 'J':
@@ -2487,11 +2652,13 @@
case 'N':
case 'O':
case 'P':
+ case '<':
+ case '>':
return C_Other;
}
}
-
- if (Constraint.size() > 1 && Constraint[0] == '{' &&
+
+ if (Constraint.size() > 1 && Constraint[0] == '{' &&
Constraint[Constraint.size()-1] == '}')
return C_Register;
return C_Unknown;
@@ -2511,9 +2678,13 @@
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- char ConstraintLetter,
+ std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
+
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
case 'X': // Allows any operand; labels (basic block) use this.
@@ -2530,7 +2701,7 @@
// is possible and fine if either GV or C are missing.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-
+
// If we have "(add GV, C)", pull out GV/C
if (Op.getOpcode() == ISD::ADD) {
C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
@@ -2542,14 +2713,14 @@
if (C == 0 || GA == 0)
C = 0, GA = 0;
}
-
+
// If we find a valid operand, map to the TargetXXX version so that the
// value itself doesn't get selected.
if (GA) { // Either &GV or &GV+C
if (ConstraintLetter != 'n') {
int64_t Offs = GA->getOffset();
if (C) Offs += C->getZExtValue();
- Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
C ? C->getDebugLoc() : DebugLoc(),
Op.getValueType(), Offs));
return;
@@ -2571,13 +2742,6 @@
}
}
-std::vector<unsigned> TargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
- return std::vector<unsigned>();
-}
-
-
std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
@@ -2593,27 +2757,19 @@
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
E = RI->regclass_end(); RCI != E; ++RCI) {
const TargetRegisterClass *RC = *RCI;
-
- // If none of the value types for this register class are valid, we
+
+ // If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
- bool isLegal = false;
- for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
- I != E; ++I) {
- if (isTypeLegal(*I)) {
- isLegal = true;
- break;
- }
- }
-
- if (!isLegal) continue;
-
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ if (!isLegalRC(RC))
+ continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
if (RegName.equals_lower(RI->getName(*I)))
return std::make_pair(*I, RC);
}
}
-
+
return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
}
@@ -2635,6 +2791,195 @@
}
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+ ImmutableCallSite CS) const {
+ /// ConstraintOperands - Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ InlineAsm::ConstraintInfoVector
+ ConstraintInfos = IA->ParseConstraints();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpInfo.ConstraintVT =
+ EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ break;
+ }
+ } else if (dyn_cast<PointerType>(OpTy)) {
+ OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+ } else {
+ OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+ }
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (ConstraintInfos.size()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ if (cInfo.Type == InlineAsm::isClobber)
+ continue;
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+
/// getConstraintGenerality - Return an integer indicating how general CT
/// is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
@@ -2652,6 +2997,79 @@
}
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
/// ChooseConstraint - If there are multiple different constraints that we
/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
/// This is somewhat tricky: constraints fall into four classes:
@@ -2693,7 +3111,7 @@
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;
- TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0],
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
ResultOps, *DAG);
if (!ResultOps.empty()) {
BestType = CType;
@@ -2701,12 +3119,12 @@
break;
}
}
-
+
// Things with matching constraints can only be registers, per gcc
// documentation. This mainly affects "g" constraints.
if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
continue;
-
+
// This constraint letter is more general than the previous one, use it.
int Generality = getConstraintGenerality(CType);
if (Generality > BestGenerality) {
@@ -2715,7 +3133,7 @@
BestGenerality = Generality;
}
}
-
+
OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
OpInfo.ConstraintType = BestType;
}
@@ -2724,10 +3142,10 @@
/// type to use for the specific AsmOperandInfo, setting
/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
- SDValue Op,
+ SDValue Op,
SelectionDAG *DAG) const {
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
-
+
// Single-letter constraints ('r') are very common.
if (OpInfo.Codes.size() == 1) {
OpInfo.ConstraintCode = OpInfo.Codes[0];
@@ -2735,7 +3153,7 @@
} else {
ChooseConstraint(OpInfo, *this, Op, DAG);
}
-
+
// 'X' matches anything.
if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
// Labels and constants are handled elsewhere ('X' is the only thing
@@ -2746,7 +3164,7 @@
OpInfo.CallOperandVal = v;
return;
}
-
+
// Otherwise, try to resolve it to something we know about by looking at
// the actual operand type.
if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
@@ -2762,20 +3180,20 @@
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
// Allows a sign-extended 16-bit immediate field.
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
return false;
-
+
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
-
- // Only support r+r,
+
+ // Only support r+r,
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
@@ -2790,27 +3208,53 @@
// Allow 2*r as r+r.
break;
}
-
+
return true;
}
+/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+ SelectionDAG &DAG) const {
+ ConstantSDNode *C = cast<ConstantSDNode>(Op2);
+ APInt d = C->getAPIntValue();
+ assert(d != 0 && "Division by zero!");
+
+ // Shift the value upfront if it is even, so the LSB is one.
+ unsigned ShAmt = d.countTrailingZeros();
+ if (ShAmt) {
+ // TODO: For UDIV use SRL instead of SRA.
+ SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+ Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+ d = d.ashr(ShAmt);
+ }
+
+ // Calculate the multiplicative inverse, using Newton's method.
+ APInt t, xn = d;
+ while ((t = d*xn) != 1)
+ xn *= APInt(d.getBitWidth(), 2) - t;
+
+ Op2 = DAG.getConstant(xn, Op1.getValueType());
+ return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+}
+
/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl= N->getDebugLoc();
-
+
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
if (!isTypeLegal(VT))
return SDValue();
-
+
APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
APInt::ms magics = d.magic();
-
+
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
SDValue Q;
@@ -2824,7 +3268,7 @@
else
return SDValue(); // No mulhs or equvialent
// If d > 0 and m < 0, add the numerator
- if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
if (Created)
Created->push_back(Q.getNode());
@@ -2837,15 +3281,15 @@
}
// Shift right algebraic if shift value is nonzero
if (magics.s > 0) {
- Q = DAG.getNode(ISD::SRA, dl, VT, Q,
- DAG.getConstant(magics.s, getShiftAmountTy()));
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
if (Created)
Created->push_back(Q.getNode());
}
// Extract the sign bit and add it to the quotient
SDValue T =
DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
- getShiftAmountTy()));
+ getShiftAmountTy(Q.getValueType())));
if (Created)
Created->push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
@@ -2867,41 +3311,54 @@
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
- ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
- APInt::mu magics = N1C->getAPIntValue().magicu();
+ const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::mu magics = N1C.magicu();
+
+ SDValue Q = N->getOperand(0);
+
+ // If the divisor is even, we can avoid using the expensive fixup by shifting
+ // the divided value upfront.
+ if (magics.a != 0 && !N1C[0]) {
+ unsigned Shift = N1C.countTrailingZeros();
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ // Get magic number for the shifted divisor.
+ magics = N1C.lshr(Shift).magicu(Shift);
+ assert(magics.a == 0 && "Should use cheap fixup now");
+ }
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
- SDValue Q;
if (isOperationLegalOrCustom(ISD::MULHU, VT))
- Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),
- DAG.getConstant(magics.m, VT));
+ Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
- Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),
- N->getOperand(0),
- DAG.getConstant(magics.m, VT)).getNode(), 1);
+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
else
return SDValue(); // No mulhu or equvialent
if (Created)
Created->push_back(Q.getNode());
if (magics.a == 0) {
- assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+ assert(magics.s < N1C.getBitWidth() &&
"We shouldn't generate an undefined shift!");
- return DAG.getNode(ISD::SRL, dl, VT, Q,
- DAG.getConstant(magics.s, getShiftAmountTy()));
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
} else {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
if (Created)
Created->push_back(NPQ.getNode());
- NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
- DAG.getConstant(1, getShiftAmountTy()));
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
if (Created)
Created->push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
if (Created)
Created->push_back(NPQ.getNode());
- return DAG.getNode(ISD::SRL, dl, VT, NPQ,
- DAG.getConstant(magics.s-1, getShiftAmountTy()));
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
}
}
diff --git a/src/LLVM/lib/CodeGen/ShadowStackGC.cpp b/src/LLVM/lib/CodeGen/ShadowStackGC.cpp
new file mode 100644
index 0000000..2609256c
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/ShadowStackGC.cpp
@@ -0,0 +1,453 @@
+//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics for targets that do
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as algorithms which generate stack maps
+// to identify roots.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
+//
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// ShadowStackGC.
+//
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ShadowStackGC : public GCStrategy {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ ///
+ StructType *StackEntryTy;
+ StructType *FrameMapTy;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+
+ public:
+ ShadowStackGC();
+
+ bool initializeCustomLowering(Module &M);
+ bool performCustomLowering(Function &F);
+
+ private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ Type* GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, const char *Name);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Value *BasePtr,
+ int Idx1, int Idx2, const char *Name);
+ };
+
+}
+
+static GCRegistry::Add<ShadowStackGC>
+X("shadow-stack", "Very portable GC for uncooperative code generators");
+
+namespace {
+ /// EscapeEnumerator - This is a little algorithm to find all escape points
+ /// from a function so that "finally"-style code can be inserted. In addition
+ /// to finding the existing return and unwind instructions, it also (if
+ /// necessary) transforms any call instructions into invokes and sends them to
+ /// a landing pad.
+ ///
+ /// It's wrapped up in a state machine using the same transform C# uses for
+ /// 'yield return' enumerators, This transform allows it to be non-allocating.
+ class EscapeEnumerator {
+ Function &F;
+ const char *CleanupBBName;
+
+ // State.
+ int State;
+ Function::iterator StateBB, StateE;
+ IRBuilder<> Builder;
+
+ public:
+ EscapeEnumerator(Function &F, const char *N = "cleanup")
+ : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+ IRBuilder<> *Next() {
+ switch (State) {
+ default:
+ return 0;
+
+ case 0:
+ StateBB = F.begin();
+ StateE = F.end();
+ State = 1;
+
+ case 1:
+ // Find all 'return', 'resume', and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = StateBB++;
+
+ // Branches and invokes do not escape, only unwind, resume, and return
+ // do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI) &&
+ !isa<ResumeInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI->getParent(), TI);
+ return &Builder;
+ }
+
+ State = 2;
+
+ // Find all 'call' instructions.
+ SmallVector<Instruction*,16> Calls;
+ for (Function::iterator BB = F.begin(),
+ E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(),
+ EE = BB->end(); II != EE; ++II)
+ if (CallInst *CI = dyn_cast<CallInst>(II))
+ if (!CI->getCalledFunction() ||
+ !CI->getCalledFunction()->getIntrinsicID())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return 0;
+
+ // Create a cleanup block.
+ LLVMContext &C = F.getContext();
+ BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+ Type *ExnTy = StructType::get(Type::getInt8PtrTy(C),
+ Type::getInt32Ty(C), NULL);
+ Constant *PersFn =
+ F.getParent()->
+ getOrInsertFunction("__gcc_personality_v0",
+ FunctionType::get(Type::getInt32Ty(C), true));
+ LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1,
+ "cleanup.lpad",
+ CleanupBB);
+ LPad->setCleanup(true);
+ ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value*,16> Args;
+ for (unsigned I = Calls.size(); I != 0; ) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+
+ // Split the basic block containing the function call.
+ BasicBlock *CallBB = CI->getParent();
+ BasicBlock *NewBB =
+ CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+ // Remove the unconditional branch inserted at the end of CallBB.
+ CallBB->getInstList().pop_back();
+ NewBB->getInstList().remove(CI);
+
+ // Create a new invoke instruction.
+ Args.clear();
+ CallSite CS(CI);
+ Args.append(CS.arg_begin(), CS.arg_end());
+
+ InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
+ NewBB, CleanupBB,
+ Args, CI->getName(), CallBB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ delete CI;
+ }
+
+ Builder.SetInsertPoint(RI->getParent(), RI);
+ return &Builder;
+ }
+ }
+ };
+}
+
+// -----------------------------------------------------------------------------
+
+void llvm::linkShadowStackGC() { }
+
+ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+ InitRoots = true;
+ CustomRoots = true;
+}
+
+Constant *ShadowStackGC::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+ Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant*, 16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+ Metadata.resize(NumMeta);
+
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Int32Ty, Roots.size(), false),
+ ConstantInt::get(Int32Ty, NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(FrameMapTy, BaseElts),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
+ };
+
+ Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
+ StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta));
+
+ Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage,
+ FrameMap, "__gc_" + F.getName());
+
+ Constant *GEPIndices[2] = {
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
+ };
+ return ConstantExpr::getGetElementPtr(GV, GEPIndices);
+}
+
+Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<Type*> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (size_t I = 0; I != Roots.size(); I++)
+ EltTys.push_back(Roots[I].second->getAllocatedType());
+
+ return StructType::create(EltTys, "gc_stackentry."+F.getName().str());
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGC::initializeCustomLowering(Module &M) {
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<Type*> EltTys;
+ // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ // Specifies length of variable length array.
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ FrameMapTy = StructType::create(EltTys, "gc_map");
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+
+ StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+ EltTys.push_back(FrameMapPtrTy);
+ StackEntryTy->setBody(EltTys);
+ PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(M, StackEntryPtrTy, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy),
+ "llvm_gc_root_chain");
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGC::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGC::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
+ CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ if (IsNullValue(CI->getArgOperand(1)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, int Idx2, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
+ Value* Val = B.CreateGEP(BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx) };
+ Value *Val = B.CreateGEP(BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGC::performCustomLowering(Function &F) {
+ LLVMContext &Context = F.getContext();
+
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+ "gc_frame");
+
+ while (isa<AllocaInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry,
+ 0,1,"gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
+ StackEntry,0,0,"gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(Context, AtEntry,
+ StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup");
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
+ "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ Roots[I].first->eraseFromParent();
+ Roots[I].second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/src/LLVM/lib/CodeGen/ShrinkWrapping.cpp b/src/LLVM/lib/CodeGen/ShrinkWrapping.cpp
index aeaa38b..160f38f 100644
--- a/src/LLVM/lib/CodeGen/ShrinkWrapping.cpp
+++ b/src/LLVM/lib/CodeGen/ShrinkWrapping.cpp
@@ -226,7 +226,7 @@
// AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
CSRegSet prevAnticIn = AnticIn[MBB];
AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
- if (prevAnticIn |= AnticIn[MBB])
+ if (prevAnticIn != AnticIn[MBB])
changed = true;
return changed;
}
@@ -264,7 +264,7 @@
// AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
CSRegSet prevAvailOut = AvailOut[MBB];
AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
- if (prevAvailOut |= AvailOut[MBB])
+ if (prevAvailOut != AvailOut[MBB])
changed = true;
return changed;
}
@@ -277,7 +277,7 @@
// Initialize data flow sets.
clearAnticAvailSets();
- // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+ // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
bool changed = true;
unsigned iterations = 0;
while (changed) {
diff --git a/src/LLVM/lib/CodeGen/SjLjEHPrepare.cpp b/src/LLVM/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 0000000..ded2459
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,1026 @@
+//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sjljehprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+static cl::opt<bool> DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden,
+ cl::desc("Disable the old SjLj EH preparation pass"));
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+ class SjLjEHPass : public FunctionPass {
+ const TargetLowering *TLI;
+ Type *FunctionContextTy;
+ Constant *RegisterFn;
+ Constant *UnregisterFn;
+ Constant *BuiltinSetjmpFn;
+ Constant *FrameAddrFn;
+ Constant *StackAddrFn;
+ Constant *StackRestoreFn;
+ Constant *LSDAAddrFn;
+ Value *PersonalityFn;
+ Constant *SelectorFn;
+ Constant *ExceptionFn;
+ Constant *CallSiteFn;
+ Constant *DispatchSetupFn;
+ Constant *FuncCtxFn;
+ Value *CallSite;
+ DenseMap<InvokeInst*, BasicBlock*> LPadSuccMap;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit SjLjEHPass(const TargetLowering *tli = NULL)
+ : FunctionPass(ID), TLI(tli) { }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
+ const char *getPassName() const {
+ return "SJLJ Exception Handling preparation";
+ }
+
+ private:
+ bool setupEntryBlockAndCallSites(Function &F);
+ Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
+ void lowerIncomingArguments(Function &F);
+ void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
+
+ void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
+ void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
+ SwitchInst *CatchSwitch);
+ void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+ void splitLandingPad(InvokeInst *II);
+ bool insertSjLjEHSupport(Function &F);
+ };
+} // end anonymous namespace
+
+char SjLjEHPass::ID = 0;
+
+// Public Interface To the SjLjEHPass pass.
+FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
+ return new SjLjEHPass(TLI);
+}
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPass::doInitialization(Module &M) {
+ // Build the function context structure.
+ // builtin_setjmp uses a five word jbuf
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ FunctionContextTy =
+ StructType::get(VoidPtrTy, // __prev
+ Int32Ty, // call_site
+ ArrayType::get(Int32Ty, 4), // __data
+ VoidPtrTy, // __personality
+ VoidPtrTy, // __lsda
+ ArrayType::get(VoidPtrTy, 5), // __jbuf
+ NULL);
+ RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ UnregisterFn =
+ M.getOrInsertFunction("_Unwind_SjLj_Unregister",
+ Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy),
+ (Type *)0);
+ FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+ LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+ SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
+ ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
+ CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ DispatchSetupFn
+ = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
+ FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
+ PersonalityFn = 0;
+
+ return true;
+}
+
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
+ Value *CallSite) {
+ ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Number);
+ // Insert a store of the call-site number
+ new StoreInst(CallSiteNoC, CallSite, true, I); // volatile
+}
+
+/// splitLandingPad - Split a landing pad. This takes considerable care because
+/// of PHIs and other nasties. The problem is that the jump table needs to jump
+/// to the landing pad block. However, the landing pad block can be jumped to
+/// only by an invoke instruction. So we clone the landingpad instruction into
+/// its own basic block, have the invoke jump to there. The landingpad
+/// instruction's basic block's successor is now the target for the jump table.
+///
+/// But because of PHI nodes, we need to create another basic block for the jump
+/// table to jump to. This is definitely a hack, because the values for the PHI
+/// nodes may not be defined on the edge from the jump table. But that's okay,
+/// because the jump table is simply a construct to mimic what is happening in
+/// the CFG. So the values are mysteriously there, even though there is no value
+/// for the PHI from the jump table's edge (hence calling this a hack).
+void SjLjEHPass::splitLandingPad(InvokeInst *II) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(),
+ ".1", ".2", this, NewBBs);
+
+ // Create an empty block so that the jump table has something to jump to
+ // which doesn't have any PHI nodes.
+ BasicBlock *LPad = NewBBs[0];
+ BasicBlock *Succ = *succ_begin(LPad);
+ BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land",
+ LPad->getParent(), Succ);
+ LPad->getTerminator()->eraseFromParent();
+ BranchInst::Create(JumpTo, LPad);
+ BranchInst::Create(Succ, JumpTo);
+ LPadSuccMap[II] = JumpTo;
+
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ Value *Val = PN->removeIncomingValue(LPad, false);
+ PN->addIncoming(Val, JumpTo);
+ }
+}
+
+/// markInvokeCallSite - Insert code to mark the call_site for this invoke
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
+ Value *CallSite,
+ SwitchInst *CatchSwitch) {
+ ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo);
+ // The runtime comes back to the dispatcher with the call_site - 1 in
+ // the context. Odd, but there it is.
+ ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo - 1);
+
+ // If the unwind edge has phi nodes, split the edge.
+ if (isa<PHINode>(II->getUnwindDest()->begin())) {
+ // FIXME: New EH - This if-condition will be always true in the new scheme.
+ if (II->getUnwindDest()->isLandingPad())
+ splitLandingPad(II);
+ else
+ SplitCriticalEdge(II, 1, this);
+
+ // If there are any phi nodes left, they must have a single predecessor.
+ while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ PN->eraseFromParent();
+ }
+ }
+
+ // Insert the store of the call site value
+ insertCallSiteStore(II, InvokeNo, CallSite);
+
+ // Record the call site value for the back end so it stays associated with
+ // the invoke.
+ CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
+
+ // Add a switch case to our unwind block.
+ if (BasicBlock *SuccBB = LPadSuccMap[II]) {
+ CatchSwitch->addCase(SwitchValC, SuccBB);
+ } else {
+ CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
+ }
+
+ // We still want this to look like an invoke so we emit the LSDA properly,
+ // so we don't transform the invoke into a call here.
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
+/// we spill into a stack location, guaranteeing that there is nothing live
+/// across the unwind edge. This process also splits all critical edges
+/// coming out of invoke's.
+/// FIXME: Move this function to a common utility file (Local.cpp?) so
+/// both SjLj and LowerInvoke can use it.
+void SjLjEHPass::
+splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
+ // First step, split all critical edges from invoke instructions.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ InvokeInst *II = Invokes[i];
+ SplitCriticalEdge(II, 0, this);
+
+ // FIXME: New EH - This if-condition will be always true in the new scheme.
+ if (II->getUnwindDest()->isLandingPad())
+ splitLandingPad(II);
+ else
+ SplitCriticalEdge(II, 1, this);
+
+ assert(!isa<PHINode>(II->getNormalDest()) &&
+ !isa<PHINode>(II->getUnwindDest()) &&
+ "Critical edge splitting left single entry phi nodes?");
+ }
+
+ Function *F = Invokes.back()->getParent()->getParent();
+
+ // To avoid having to handle incoming arguments specially, we lower each arg
+ // to a copy instruction in the entry block. This ensures that the argument
+ // value itself cannot be live across the entry block.
+ BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+ ++AfterAllocaInsertPt;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ Type *Ty = AI->getType();
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However,
+ // we're replacing it here with the same value it was constructed with.
+ // We do this because the above replaceAllUsesWith() clobbered the
+ // operand, but we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*,16> Users;
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock))
+ NeedsSpill = true;
+ }
+
+ // If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+}
+
+/// CreateLandingPadLoad - Load the exception handling values and insert them
+/// into a structure.
+static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr,
+ Value *SelAddr,
+ BasicBlock::iterator InsertPt) {
+ Value *Exn = new LoadInst(ExnAddr, "exn", false,
+ InsertPt);
+ Type *Ty = Type::getInt8PtrTy(F.getContext());
+ Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt);
+ Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt);
+
+ Ty = StructType::get(Exn->getType(), Sel->getType(), NULL);
+ InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty),
+ Exn, 0,
+ "lpad.val", InsertPt);
+ return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt);
+}
+
+/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a
+/// load from the stored values (via CreateLandingPadLoad). This looks through
+/// PHI nodes, and removes them if they are dead.
+static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr,
+ Value *SelAddr) {
+ if (Inst->use_empty()) return;
+
+ while (!Inst->use_empty()) {
+ Instruction *I = cast<Instruction>(Inst->use_back());
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr);
+ if (PN->use_empty()) PN->eraseFromParent();
+ continue;
+ }
+
+ I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I));
+ }
+}
+
+bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
+ SmallVector<ReturnInst*,16> Returns;
+ SmallVector<UnwindInst*,16> Unwinds;
+ SmallVector<InvokeInst*,16> Invokes;
+
+ // Look through the terminators of the basic blocks to find invokes, returns
+ // and unwinds.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ // Remember all return instructions in case we insert an invoke into this
+ // function.
+ Returns.push_back(RI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ Unwinds.push_back(UI);
+ }
+ }
+
+ NumInvokes += Invokes.size();
+ NumUnwinds += Unwinds.size();
+
+ // If we don't have any invokes, there's nothing to do.
+ if (Invokes.empty()) return false;
+
+ // Find the eh.selector.*, eh.exception and alloca calls.
+ //
+ // Remember any allocas() that aren't in the entry block, as the
+ // jmpbuf saved SP will need to be updated for them.
+ //
+ // We'll use the first eh.selector to determine the right personality
+ // function to use. For SJLJ, we always use the same personality for the
+ // whole function, not on a per-selector basis.
+ // FIXME: That's a bit ugly. Better way?
+ SmallVector<CallInst*,16> EH_Selectors;
+ SmallVector<CallInst*,16> EH_Exceptions;
+ SmallVector<Instruction*,16> JmpbufUpdatePoints;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ // Note: Skip the entry block since there's nothing there that interests
+ // us. eh.selector and eh.exception shouldn't ever be there, and we
+ // want to disregard any allocas that are there.
+ //
+ // FIXME: This is awkward. The new EH scheme won't need to skip the entry
+ // block.
+ if (BB == F.begin()) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(F.begin()->getTerminator())) {
+ // FIXME: This will be always non-NULL in the new EH.
+ if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
+ if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
+ }
+
+ continue;
+ }
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->getCalledFunction() == SelectorFn) {
+ if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
+ EH_Selectors.push_back(CI);
+ } else if (CI->getCalledFunction() == ExceptionFn) {
+ EH_Exceptions.push_back(CI);
+ } else if (CI->getCalledFunction() == StackRestoreFn) {
+ JmpbufUpdatePoints.push_back(CI);
+ }
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ JmpbufUpdatePoints.push_back(AI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+ // FIXME: This will be always non-NULL in the new EH.
+ if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
+ if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
+ }
+ }
+ }
+
+ // If we don't have any eh.selector calls, we can't determine the personality
+ // function. Without a personality function, we can't process exceptions.
+ if (!PersonalityFn) return false;
+
+ // We have invokes, so we need to add register/unregister calls to get this
+ // function onto the global unwind stack.
+ //
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge we
+ // spill into a stack location, guaranteeing that there is nothing live across
+ // the unwind edge. This process also splits all critical edges coming out of
+ // invoke's.
+ splitLiveRangesAcrossInvokes(Invokes);
+
+
+ SmallVector<LandingPadInst*, 16> LandingPads;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ // FIXME: This will be always non-NULL in the new EH.
+ if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
+ LandingPads.push_back(LPI);
+ }
+
+
+ BasicBlock *EntryBB = F.begin();
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be added to the global context list.
+ unsigned Align = 4; // FIXME: Should be a TLI check?
+ AllocaInst *FunctionContext =
+ new AllocaInst(FunctionContextTy, 0, Align,
+ "fcn_context", F.begin()->begin());
+
+ Value *Idxs[2];
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ // We need to also keep around a reference to the call_site field
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site",
+ EntryBB->getTerminator());
+
+ // The exception selector comes back in context->data[1]
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data",
+ EntryBB->getTerminator());
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
+ "exc_selector_gep",
+ EntryBB->getTerminator());
+ // The exception value comes back in context->data[0]
+ Idxs[1] = Zero;
+ Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
+ "exception_gep",
+ EntryBB->getTerminator());
+
+ // The result of the eh.selector call will be replaced with a a reference to
+ // the selector value returned in the function context. We leave the selector
+ // itself so the EH analysis later can use it.
+ for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+ CallInst *I = EH_Selectors[i];
+ Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+ I->replaceAllUsesWith(SelectorVal);
+ }
+
+ // eh.exception calls are replaced with references to the proper location in
+ // the context. Unlike eh.selector, the eh.exception calls are removed
+ // entirely.
+ for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+ CallInst *I = EH_Exceptions[i];
+ // Possible for there to be duplicates, so check to make sure the
+ // instruction hasn't already been removed.
+ if (!I->getParent()) continue;
+ Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+ Type *Ty = Type::getInt8PtrTy(F.getContext());
+ Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+ I->replaceAllUsesWith(Val);
+ I->eraseFromParent();
+ }
+
+ for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
+ ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr);
+
+ // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+ // branch to a dispatch block for non-zero returns. If we return normally,
+ // we're not handling an exception and just register the function context and
+ // continue.
+
+ // Create the dispatch block. The dispatch block is basically a big switch
+ // statement that goes to all of the invoke landing pads.
+ BasicBlock *DispatchBlock =
+ BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+ // Insert a load of the callsite in the dispatch block, and a switch on its
+ // value. By default, we issue a trap statement.
+ BasicBlock *TrapBlock =
+ BasicBlock::Create(F.getContext(), "trapbb", &F);
+ CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap),
+ "", TrapBlock);
+ new UnreachableInst(F.getContext(), TrapBlock);
+
+ Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+ DispatchBlock);
+ SwitchInst *DispatchSwitch =
+ SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(),
+ DispatchBlock);
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "eh.sjlj.setjmp.cont");
+
+ // Populate the Function Context
+ // 1. LSDA address
+ // 2. Personality function address
+ // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
+
+ // LSDA address
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 4);
+ Value *LSDAFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
+ EntryBB->getTerminator());
+ Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+ EntryBB->getTerminator());
+ new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+ Idxs[1] = ConstantInt::get(Int32Ty, 3);
+ Value *PersonalityFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
+ EntryBB->getTerminator());
+ new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+ EntryBB->getTerminator());
+
+ // Save the frame pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 5);
+ Value *JBufPtr
+ = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep",
+ EntryBB->getTerminator());
+ Idxs[1] = ConstantInt::get(Int32Ty, 0);
+ Value *FramePtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
+ EntryBB->getTerminator());
+
+ Value *Val = CallInst::Create(FrameAddrFn,
+ ConstantInt::get(Int32Ty, 0),
+ "fp",
+ EntryBB->getTerminator());
+ new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+ // Save the stack pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *StackPtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
+ EntryBB->getTerminator());
+
+ Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+ new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+ Value *SetjmpArg =
+ CastInst::Create(Instruction::BitCast, JBufPtr,
+ Type::getInt8PtrTy(F.getContext()), "",
+ EntryBB->getTerminator());
+ Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+ "",
+ EntryBB->getTerminator());
+
+ // Add a call to dispatch_setup after the setjmp call. This is expanded to any
+ // target-specific setup that needs to be done.
+ CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator());
+
+ // check the return value of the setjmp. non-zero goes to dispatcher.
+ Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+ ICmpInst::ICMP_EQ, DispatchVal, Zero,
+ "notunwind");
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register =
+ CallInst::Create(RegisterFn, FunctionContext, "",
+ ContBlock->getTerminator());
+ Register->setDoesNotThrow();
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values, and fill in the dispatch switch accordingly.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+ for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Ignore calls to the EH builtins (eh.selector, eh.exception)
+ Constant *Callee = CI->getCalledFunction();
+ if (Callee != SelectorFn && Callee != ExceptionFn
+ && !CI->doesNotThrow())
+ insertCallSiteStore(CI, -1, CallSite);
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
+ insertCallSiteStore(RI, -1, CallSite);
+ }
+ }
+
+ // Replace all unwinds with a branch to the unwind handler.
+ // ??? Should this ever happen with sjlj exceptions?
+ for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+ BranchInst::Create(TrapBlock, Unwinds[i]);
+ Unwinds[i]->eraseFromParent();
+ }
+
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+ Instruction *AI = JmpbufUpdatePoints[i];
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(AI);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+ CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+
+ return true;
+}
+
+/// setupFunctionContext - Allocate the function context on the stack and fill
+/// it with all of the data that we know at this point.
+Value *SjLjEHPass::
+setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
+ BasicBlock *EntryBB = F.begin();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an alloca
+ // because the value needs to be added to the global context list.
+ unsigned Align =
+ TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy);
+ AllocaInst *FuncCtx =
+ new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
+
+ // Fill in the function context structure.
+ Value *Idxs[2];
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ Value *One = ConstantInt::get(Int32Ty, 1);
+
+ // Keep around a reference to the call_site field.
+ Idxs[0] = Zero;
+ Idxs[1] = One;
+ CallSite = GetElementPtrInst::Create(FuncCtx, Idxs, "call_site",
+ EntryBB->getTerminator());
+
+ // Reference the __data field.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *FCData = GetElementPtrInst::Create(FuncCtx, Idxs, "__data",
+ EntryBB->getTerminator());
+
+ // The exception value comes back in context->__data[0].
+ Idxs[1] = Zero;
+ Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
+ "exception_gep",
+ EntryBB->getTerminator());
+
+ // The exception selector comes back in context->__data[1].
+ Idxs[1] = One;
+ Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
+ "exn_selector_gep",
+ EntryBB->getTerminator());
+
+ for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
+ LandingPadInst *LPI = LPads[I];
+ IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
+
+ Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
+ ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
+ Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
+
+ Type *LPadType = LPI->getType();
+ Value *LPadVal = UndefValue::get(LPadType);
+ LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+
+ LPI->replaceAllUsesWith(LPadVal);
+ }
+
+ // Personality function
+ Idxs[1] = ConstantInt::get(Int32Ty, 3);
+ if (!PersonalityFn)
+ PersonalityFn = LPads[0]->getPersonalityFn();
+ Value *PersonalityFieldPtr =
+ GetElementPtrInst::Create(FuncCtx, Idxs, "pers_fn_gep",
+ EntryBB->getTerminator());
+ new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+ EntryBB->getTerminator());
+
+ // LSDA address
+ Idxs[1] = ConstantInt::get(Int32Ty, 4);
+ Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
+ EntryBB->getTerminator());
+ Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+ EntryBB->getTerminator());
+ new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+ return FuncCtx;
+}
+
+/// lowerIncomingArguments - To avoid having to handle incoming arguments
+/// specially, we lower each arg to a copy instruction in the entry block. This
+/// ensures that the argument value itself cannot be live out of the entry
+/// block.
+void SjLjEHPass::lowerIncomingArguments(Function &F) {
+ BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
+ ++AfterAllocaInsPt;
+
+ for (Function::arg_iterator
+ AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) {
+ Type *Ty = AI->getType();
+
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC =
+ new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp",
+ AfterAllocaInsPt);
+ AI->replaceAllUsesWith(NC);
+
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However, we're
+ // replacing it here with the same value it was constructed with. We do
+ // this because the above replaceAllUsesWith() clobbered the operand, but
+ // we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+}
+
+/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
+/// edge and spill them.
+void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst*> Invokes) {
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator
+ BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+ for (BasicBlock::iterator
+ II = BB->begin(), IIE = BB->end(); II != IIE; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*, 16> Users;
+ for (Value::use_iterator
+ UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ NeedsSpill = true;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+ }
+}
+
+/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
+/// the function context and marking the call sites with the appropriate
+/// values. These values are used by the DWARF EH emitter.
+bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
+ SmallVector<ReturnInst*, 16> Returns;
+ SmallVector<InvokeInst*, 16> Invokes;
+ SmallVector<LandingPadInst*, 16> LPads;
+
+ // Look through the terminators of the basic blocks to find invokes.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ LPads.push_back(II->getUnwindDest()->getLandingPadInst());
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ Returns.push_back(RI);
+ }
+
+ if (Invokes.empty()) return false;
+
+ lowerIncomingArguments(F);
+ lowerAcrossUnwindEdges(F, Invokes);
+
+ Value *FuncCtx = setupFunctionContext(F, LPads);
+ BasicBlock *EntryBB = F.begin();
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+ Value *Idxs[2] = {
+ ConstantInt::get(Int32Ty, 0), 0
+ };
+
+ // Get a reference to the jump buffer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 5);
+ Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep",
+ EntryBB->getTerminator());
+
+ // Save the frame pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 0);
+ Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
+ EntryBB->getTerminator());
+
+ Value *Val = CallInst::Create(FrameAddrFn,
+ ConstantInt::get(Int32Ty, 0),
+ "fp",
+ EntryBB->getTerminator());
+ new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+ // Save the stack pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
+ EntryBB->getTerminator());
+
+ Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+ new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+ Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr,
+ Type::getInt8PtrTy(F.getContext()), "",
+ EntryBB->getTerminator());
+ CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator());
+
+ // Store a pointer to the function context so that the back-end will know
+ // where to look for it.
+ Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx,
+ Type::getInt8PtrTy(F.getContext()), "",
+ EntryBB->getTerminator());
+ CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator());
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values.
+ for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
+ insertCallSiteStore(Invokes[I], I + 1, CallSite);
+
+ ConstantInt *CallSiteNum =
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
+
+ // Record the call site value for the back end so it stays associated with
+ // the invoke.
+ CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
+ }
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;)
+ for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!CI->doesNotThrow())
+ insertCallSiteStore(CI, -1, CallSite);
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
+ insertCallSiteStore(RI, -1, CallSite);
+ }
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "",
+ EntryBB->getTerminator());
+ Register->setDoesNotThrow();
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (unsigned I = 0, E = Returns.size(); I != E; ++I)
+ CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]);
+
+ return true;
+}
+
+bool SjLjEHPass::runOnFunction(Function &F) {
+ bool Res = false;
+ if (!DisableOldSjLjEH)
+ Res = insertSjLjEHSupport(F);
+ else
+ Res = setupEntryBlockAndCallSites(F);
+ return Res;
+}
diff --git a/src/LLVM/lib/CodeGen/SlotIndexes.cpp b/src/LLVM/lib/CodeGen/SlotIndexes.cpp
index 1bc148f..ca79caf 100644
--- a/src/LLVM/lib/CodeGen/SlotIndexes.cpp
+++ b/src/LLVM/lib/CodeGen/SlotIndexes.cpp
@@ -10,47 +10,20 @@
#define DEBUG_TYPE "slotindexes"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
-
-// Yep - these are thread safe. See the header for details.
-namespace {
-
-
- class EmptyIndexListEntry : public IndexListEntry {
- public:
- EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {}
- };
-
- class TombstoneIndexListEntry : public IndexListEntry {
- public:
- TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {}
- };
-
- // The following statics are thread safe. They're read only, and you
- // can't step from them to any other list entries.
- ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey;
- ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey;
-}
-
char SlotIndexes::ID = 0;
INITIALIZE_PASS(SlotIndexes, "slotindexes",
- "Slot index numbering", false, false);
+ "Slot index numbering", false, false)
-IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
- return &*IndexListEntryEmptyKey;
-}
-
-IndexListEntry* IndexListEntry::getTombstoneKeyEntry() {
- return &*IndexListEntryTombstoneKey;
-}
-
+STATISTIC(NumLocalRenum, "Number of local renumberings");
+STATISTIC(NumGlobalRenum, "Number of global renumberings");
void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesAll();
@@ -59,9 +32,8 @@
void SlotIndexes::releaseMemory() {
mi2iMap.clear();
- mbb2IdxMap.clear();
+ MBBRanges.clear();
idx2MBBMap.clear();
- terminatorGaps.clear();
clearList();
}
@@ -86,13 +58,15 @@
"Index list non-empty at initial numbering?");
assert(idx2MBBMap.empty() &&
"Index -> MBB mapping non-empty at initial numbering?");
- assert(mbb2IdxMap.empty() &&
+ assert(MBBRanges.empty() &&
"MBB -> Index mapping non-empty at initial numbering?");
assert(mi2iMap.empty() &&
"MachineInstr -> Index mapping non-empty at initial numbering?");
functionSize = 0;
unsigned index = 0;
+ MBBRanges.resize(mf->getNumBlockIDs());
+ idx2MBBMap.reserve(mf->size());
push_back(createEntry(0, index));
@@ -104,51 +78,26 @@
// Insert an index for the MBB start.
SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
- index += SlotIndex::NUM;
-
for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
miItr != miEnd; ++miItr) {
MachineInstr *mi = miItr;
if (mi->isDebugValue())
continue;
- if (miItr == mbb->getFirstTerminator()) {
- push_back(createEntry(0, index));
- terminatorGaps.insert(
- std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += SlotIndex::NUM;
- }
-
// Insert a store index for the instr.
- push_back(createEntry(mi, index));
+ push_back(createEntry(mi, index += SlotIndex::InstrDist));
// Save this base index in the maps.
- mi2iMap.insert(
- std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+ mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
++functionSize;
-
- unsigned Slots = mi->getDesc().getNumDefs();
- if (Slots == 0)
- Slots = 1;
-
- index += (Slots + 1) * SlotIndex::NUM;
}
- if (mbb->getFirstTerminator() == mbb->end()) {
- push_back(createEntry(0, index));
- terminatorGaps.insert(
- std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += SlotIndex::NUM;
- }
+ // We insert one blank instructions between basic blocks.
+ push_back(createEntry(0, index += SlotIndex::InstrDist));
- // One blank instruction at the end.
- push_back(createEntry(0, index));
-
- SlotIndex blockEndIndex(back(), SlotIndex::LOAD);
- mbb2IdxMap.insert(
- std::make_pair(mbb, std::make_pair(blockStartIndex, blockEndIndex)));
-
+ MBBRanges[mbb->getNumber()].first = blockStartIndex;
+ MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD);
idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
}
@@ -162,37 +111,41 @@
}
void SlotIndexes::renumberIndexes() {
-
// Renumber updates the index of every element of the index list.
- // If all instrs in the function have been allocated an index (which has been
- // placed in the index list in the order of instruction iteration) then the
- // resulting numbering will match what would have been generated by the
- // pass during the initial numbering of the function if the new instructions
- // had been present.
+ DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
+ ++NumGlobalRenum;
- functionSize = 0;
unsigned index = 0;
for (IndexListEntry *curEntry = front(); curEntry != getTail();
curEntry = curEntry->getNext()) {
-
curEntry->setIndex(index);
-
- if (curEntry->getInstr() == 0) {
- // MBB start entry or terminator gap. Just step index by 1.
- index += SlotIndex::NUM;
- }
- else {
- ++functionSize;
- unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs();
- if (Slots == 0)
- Slots = 1;
-
- index += (Slots + 1) * SlotIndex::NUM;
- }
+ index += SlotIndex::InstrDist;
}
}
+// Renumber indexes locally after curEntry was inserted, but failed to get a new
+// index.
+void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) {
+ // Number indexes with half the default spacing so we can catch up quickly.
+ const unsigned Space = SlotIndex::InstrDist/2;
+ assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+
+ IndexListEntry *start = curEntry->getPrev();
+ unsigned index = start->getIndex();
+ IndexListEntry *tail = getTail();
+ do {
+ curEntry->setIndex(index += Space);
+ curEntry = curEntry->getNext();
+ // If the next index is bigger, we have caught up.
+ } while (curEntry != tail && curEntry->getIndex() <= index);
+
+ DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-'
+ << index << " ***\n");
+ ++NumLocalRenum;
+}
+
+
void SlotIndexes::dump() const {
for (const IndexListEntry *itr = front(); itr != getTail();
itr = itr->getNext()) {
@@ -205,20 +158,17 @@
}
}
- for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin();
- itr != mbb2IdxMap.end(); ++itr) {
- dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
- << itr->second.first << ", " << itr->second.second << "]\n";
- }
+ for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
+ dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
+ << MBBRanges[i].second << ")\n";
}
// Print a SlotIndex to a raw_ostream.
void SlotIndex::print(raw_ostream &os) const {
- os << entry().getIndex();
- if (isPHI())
- os << "*";
+ if (isValid())
+ os << entry().getIndex() << "LudS"[getSlot()];
else
- os << "LudS"[getSlot()];
+ os << "invalid";
}
// Dump a SlotIndex to stderr.
diff --git a/src/LLVM/lib/CodeGen/SpillPlacement.cpp b/src/LLVM/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 0000000..6f33f54
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,369 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<EdgeBundles>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+ /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle.
+ /// Ideally, these two numbers should be identical, but inaccuracies in the
+ /// block frequency estimates means that we need to normalize ingoing and
+ /// outgoing frequencies separately so they are commensurate.
+ float Scale[2];
+
+ /// Bias - Normalized contributions from non-transparent blocks.
+ /// A bundle connected to a MustSpill block has a huge negative bias,
+ /// otherwise it is a number in the range [-2;2].
+ float Bias;
+
+ /// Value - Output value of this node computed from the Bias and links.
+ /// This is always in the range [-1;1]. A positive number means the variable
+ /// should go in a register through this bundle.
+ float Value;
+
+ typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+ /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+ /// bundles. The weights are all positive and add up to at most 2, weights
+ /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+ /// sum can be less than 2 when the variable is not live into / out of some
+ /// connected basic blocks.
+ LinkVector Links;
+
+ /// preferReg - Return true when this node prefers to be in a register.
+ bool preferReg() const {
+ // Undecided nodes (Value==0) go on the stack.
+ return Value > 0;
+ }
+
+ /// mustSpill - Return True if this node is so biased that it must spill.
+ bool mustSpill() const {
+ // Actually, we must spill if Bias < sum(weights).
+ // It may be worth it to compute the weight sum here?
+ return Bias < -2.0f;
+ }
+
+ /// Node - Create a blank Node.
+ Node() {
+ Scale[0] = Scale[1] = 0;
+ }
+
+ /// clear - Reset per-query data, but preserve frequencies that only depend on
+ // the CFG.
+ void clear() {
+ Bias = Value = 0;
+ Links.clear();
+ }
+
+ /// addLink - Add a link to bundle b with weight w.
+ /// out=0 for an ingoing link, and 1 for an outgoing link.
+ void addLink(unsigned b, float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w *= Scale[out];
+
+ // There can be multiple links to the same bundle, add them up.
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ if (I->second == b) {
+ I->first += w;
+ return;
+ }
+ // This must be the first link to b.
+ Links.push_back(std::make_pair(w, b));
+ }
+
+ /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+ /// Return the change to the total number of positive biases.
+ void addBias(float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w *= Scale[out];
+ Bias += w;
+ }
+
+ /// update - Recompute Value from Bias and Links. Return true when node
+ /// preference changes.
+ bool update(const Node nodes[]) {
+ // Compute the weighted sum of inputs.
+ float Sum = Bias;
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ Sum += I->first * nodes[I->second].Value;
+
+ // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+ // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+ // two reasons:
+ // 1. It avoids arbitrary bias when all links are 0 as is possible during
+ // initial iterations.
+ // 2. It helps tame rounding errors when the links nominally sum to 0.
+ const float Thres = 1e-4f;
+ bool Before = preferReg();
+ if (Sum < -Thres)
+ Value = -1;
+ else if (Sum > Thres)
+ Value = 1;
+ else
+ Value = 0;
+ return Before != preferReg();
+ }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ bundles = &getAnalysis<EdgeBundles>();
+ loops = &getAnalysis<MachineLoopInfo>();
+
+ assert(!nodes && "Leaking node array");
+ nodes = new Node[bundles->getNumBundles()];
+
+ // Compute total ingoing and outgoing block frequencies for all bundles.
+ BlockFrequency.resize(mf.getNumBlockIDs());
+ for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+ float Freq = LiveIntervals::getSpillWeight(true, false,
+ loops->getLoopDepth(I));
+ unsigned Num = I->getNumber();
+ BlockFrequency[Num] = Freq;
+ nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq;
+ nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq;
+ }
+
+ // Scales are reciprocal frequencies.
+ for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i)
+ for (unsigned d = 0; d != 2; ++d)
+ if (nodes[i].Scale[d] > 0)
+ nodes[i].Scale[d] = 1 / nodes[i].Scale[d];
+
+ // We never change the function.
+ return false;
+}
+
+void SpillPlacement::releaseMemory() {
+ delete[] nodes;
+ nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+ if (ActiveNodes->test(n))
+ return;
+ ActiveNodes->set(n);
+ nodes[n].clear();
+}
+
+
+/// addConstraints - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
+ for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(),
+ E = LiveBlocks.end(); I != E; ++I) {
+ float Freq = getBlockFrequency(I->Number);
+ const float Bias[] = {
+ 0, // DontCare,
+ 1, // PrefReg,
+ -1, // PrefSpill
+ 0, // PrefBoth
+ -HUGE_VALF // MustSpill
+ };
+
+ // Live-in to block?
+ if (I->Entry != DontCare) {
+ unsigned ib = bundles->getBundle(I->Number, 0);
+ activate(ib);
+ nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+ }
+
+ // Live-out from block?
+ if (I->Exit != DontCare) {
+ unsigned ob = bundles->getBundle(I->Number, 1);
+ activate(ob);
+ nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+ }
+ }
+}
+
+/// addPrefSpill - Same as addConstraints(PrefSpill)
+void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ float Freq = getBlockFrequency(*I);
+ if (Strong)
+ Freq += Freq;
+ unsigned ib = bundles->getBundle(*I, 0);
+ unsigned ob = bundles->getBundle(*I, 1);
+ activate(ib);
+ activate(ob);
+ nodes[ib].addBias(-Freq, 1);
+ nodes[ob].addBias(-Freq, 0);
+ }
+}
+
+void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
+ for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
+ ++I) {
+ unsigned Number = *I;
+ unsigned ib = bundles->getBundle(Number, 0);
+ unsigned ob = bundles->getBundle(Number, 1);
+
+ // Ignore self-loops.
+ if (ib == ob)
+ continue;
+ activate(ib);
+ activate(ob);
+ if (nodes[ib].Links.empty() && !nodes[ib].mustSpill())
+ Linked.push_back(ib);
+ if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
+ Linked.push_back(ob);
+ float Freq = getBlockFrequency(Number);
+ nodes[ib].addLink(ob, Freq, 1);
+ nodes[ob].addLink(ib, Freq, 0);
+ }
+}
+
+bool SpillPlacement::scanActiveBundles() {
+ Linked.clear();
+ RecentPositive.clear();
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+ nodes[n].update(nodes);
+ // A node that must spill, or a node without any links is not going to
+ // change its value ever again, so exclude it from iterations.
+ if (nodes[n].mustSpill())
+ continue;
+ if (!nodes[n].Links.empty())
+ Linked.push_back(n);
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ return !RecentPositive.empty();
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate() {
+ // First update the recently positive nodes. They have likely received new
+ // negative bias that will turn them off.
+ while (!RecentPositive.empty())
+ nodes[RecentPositive.pop_back_val()].update(nodes);
+
+ if (Linked.empty())
+ return;
+
+ // Run up to 10 iterations. The edge bundle numbering is closely related to
+ // basic block numbering, so there is a strong tendency towards chains of
+ // linked nodes with sequential numbers. By scanning the linked nodes
+ // backwards and forwards, we make it very likely that a single node can
+ // affect the entire network in a single iteration. That means very fast
+ // convergence, usually in a single iteration.
+ for (unsigned iteration = 0; iteration != 10; ++iteration) {
+ // Scan backwards, skipping the last node which was just updated.
+ bool Changed = false;
+ for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+ llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+ unsigned n = *I;
+ if (nodes[n].update(nodes)) {
+ Changed = true;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ }
+ if (!Changed || !RecentPositive.empty())
+ return;
+
+ // Scan forwards, skipping the first node which was just updated.
+ Changed = false;
+ for (SmallVectorImpl<unsigned>::const_iterator I =
+ llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+ unsigned n = *I;
+ if (nodes[n].update(nodes)) {
+ Changed = true;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ }
+ if (!Changed || !RecentPositive.empty())
+ return;
+ }
+}
+
+void SpillPlacement::prepare(BitVector &RegBundles) {
+ Linked.clear();
+ RecentPositive.clear();
+ // Reuse RegBundles as our ActiveNodes vector.
+ ActiveNodes = &RegBundles;
+ ActiveNodes->clear();
+ ActiveNodes->resize(bundles->getNumBundles());
+}
+
+bool
+SpillPlacement::finish() {
+ assert(ActiveNodes && "Call prepare() first");
+
+ // Write preferences back to ActiveNodes.
+ bool Perfect = true;
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
+ if (!nodes[n].preferReg()) {
+ ActiveNodes->reset(n);
+ Perfect = false;
+ }
+ ActiveNodes = 0;
+ return Perfect;
+}
diff --git a/src/LLVM/lib/CodeGen/SpillPlacement.h b/src/LLVM/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 0000000..fc412f8
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,156 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by prepare(), addConstraints(), and
+// finish() which are called by the register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+
+class SpillPlacement : public MachineFunctionPass {
+ struct Node;
+ const MachineFunction *MF;
+ const EdgeBundles *bundles;
+ const MachineLoopInfo *loops;
+ Node *nodes;
+
+ // Nodes that are active in the current computation. Owned by the prepare()
+ // caller.
+ BitVector *ActiveNodes;
+
+ // Nodes with active links. Populated by scanActiveBundles.
+ SmallVector<unsigned, 8> Linked;
+
+ // Nodes that went positive during the last call to scanActiveBundles or
+ // iterate.
+ SmallVector<unsigned, 8> RecentPositive;
+
+ // Block frequencies are computed once. Indexed by block number.
+ SmallVector<float, 4> BlockFrequency;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+ ~SpillPlacement() { releaseMemory(); }
+
+ /// BorderConstraint - A basic block has separate constraints for entry and
+ /// exit.
+ enum BorderConstraint {
+ DontCare, ///< Block doesn't care / variable not live.
+ PrefReg, ///< Block entry/exit prefers a register.
+ PrefSpill, ///< Block entry/exit prefers a stack slot.
+ PrefBoth, ///< Block entry prefers both register and stack.
+ MustSpill ///< A register is impossible, variable must be spilled.
+ };
+
+ /// BlockConstraint - Entry and exit constraints for a basic block.
+ struct BlockConstraint {
+ unsigned Number; ///< Basic block number (from MBB::getNumber()).
+ BorderConstraint Entry : 8; ///< Constraint on block entry.
+ BorderConstraint Exit : 8; ///< Constraint on block exit.
+
+ /// True when this block changes the value of the live range. This means
+ /// the block has a non-PHI def. When this is false, a live-in value on
+ /// the stack can be live-out on the stack without inserting a spill.
+ bool ChangesValue;
+ };
+
+ /// prepare - Reset state and prepare for a new spill placement computation.
+ /// @param RegBundles Bit vector to receive the edge bundles where the
+ /// variable should be kept in a register. Each bit
+ /// corresponds to an edge bundle, a set bit means the
+ /// variable should be kept in a register through the
+ /// bundle. A clear bit means the variable should be
+ /// spilled. This vector is retained.
+ void prepare(BitVector &RegBundles);
+
+ /// addConstraints - Add constraints and biases. This method may be called
+ /// more than once to accumulate constraints.
+ /// @param LiveBlocks Constraints for blocks that have the variable live in or
+ /// live out.
+ void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
+
+ /// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is
+ /// equivalent to calling addConstraint with identical BlockConstraints with
+ /// Entry = Exit = PrefSpill, and ChangesValue = false.
+ ///
+ /// @param Blocks Array of block numbers that prefer to spill in and out.
+ /// @param Strong When true, double the negative bias for these blocks.
+ void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong);
+
+ /// addLinks - Add transparent blocks with the given numbers.
+ void addLinks(ArrayRef<unsigned> Links);
+
+ /// scanActiveBundles - Perform an initial scan of all bundles activated by
+ /// addConstraints and addLinks, updating their state. Add all the bundles
+ /// that now prefer a register to RecentPositive.
+ /// Prepare internal data structures for iterate.
+ /// Return true is there are any positive nodes.
+ bool scanActiveBundles();
+
+ /// iterate - Update the network iteratively until convergence, or new bundles
+ /// are found.
+ void iterate();
+
+ /// getRecentPositive - Return an array of bundles that became positive during
+ /// the previous call to scanActiveBundles or iterate.
+ ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
+
+ /// finish - Compute the optimal spill code placement given the
+ /// constraints. No MustSpill constraints will be violated, and the smallest
+ /// possible number of PrefX constraints will be violated, weighted by
+ /// expected execution frequencies.
+ /// The selected bundles are returned in the bitvector passed to prepare().
+ /// @return True if a perfect solution was found, allowing the variable to be
+ /// in a register through all relevant bundles.
+ bool finish();
+
+ /// getBlockFrequency - Return the estimated block execution frequency per
+ /// function invocation.
+ float getBlockFrequency(unsigned Number) const {
+ return BlockFrequency[Number];
+ }
+
+private:
+ virtual bool runOnMachineFunction(MachineFunction&);
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ virtual void releaseMemory();
+
+ void activate(unsigned);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/src/LLVM/lib/CodeGen/Spiller.cpp b/src/LLVM/lib/CodeGen/Spiller.cpp
index 1c17af8..b6bbcd7 100644
--- a/src/LLVM/lib/CodeGen/Spiller.cpp
+++ b/src/LLVM/lib/CodeGen/Spiller.cpp
@@ -11,7 +11,9 @@
#include "Spiller.h"
#include "VirtRegMap.h"
+#include "LiveRangeEdit.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -23,12 +25,11 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <set>
using namespace llvm;
namespace {
- enum SpillerName { trivial, standard, splitting, inline_ };
+ enum SpillerName { trivial, standard, inline_ };
}
static cl::opt<SpillerName>
@@ -37,7 +38,6 @@
cl::Prefix,
cl::values(clEnumVal(trivial, "trivial spiller"),
clEnumVal(standard, "default spiller"),
- clEnumVal(splitting, "splitting spiller"),
clEnumValN(inline_, "inline", "inline spiller"),
clEnumValEnd),
cl::init(standard));
@@ -74,13 +74,13 @@
/// immediately before each use, and stores after each def. No folding or
/// remat is attempted.
void trivialSpillEverywhere(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals) {
+ SmallVectorImpl<LiveInterval*> &newIntervals) {
DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
assert(li->weight != HUGE_VALF &&
"Attempting to spill already spilled value.");
- assert(!li->isStackSlot() &&
+ assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
"Trying to spill a stack slot.");
DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
@@ -144,7 +144,7 @@
vrm->addSpillSlotUse(ss, loadInstr);
SlotIndex endIndex = loadIndex.getNextIndex();
VNInfo *loadVNI =
- newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator());
+ newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
}
@@ -158,7 +158,7 @@
vrm->addSpillSlotUse(ss, storeInstr);
SlotIndex beginIndex = storeIndex.getPrevIndex();
VNInfo *storeVNI =
- newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator());
+ newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
}
@@ -180,12 +180,9 @@
VirtRegMap &vrm)
: SpillerBase(pass, mf, vrm) {}
- void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &,
- SlotIndex*) {
+ void spill(LiveRangeEdit &LRE) {
// Ignore spillIs - we don't use it.
- trivialSpillEverywhere(li, newIntervals);
+ trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs());
}
};
@@ -196,326 +193,42 @@
/// Falls back on LiveIntervals::addIntervalsForSpills.
class StandardSpiller : public Spiller {
protected:
+ MachineFunction *mf;
LiveIntervals *lis;
+ LiveStacks *lss;
MachineLoopInfo *loopInfo;
VirtRegMap *vrm;
public:
StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
VirtRegMap &vrm)
- : lis(&pass.getAnalysis<LiveIntervals>()),
+ : mf(&mf),
+ lis(&pass.getAnalysis<LiveIntervals>()),
+ lss(&pass.getAnalysis<LiveStacks>()),
loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
vrm(&vrm) {}
/// Falls back on LiveIntervals::addIntervalsForSpills.
- void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex*) {
+ void spill(LiveRangeEdit &LRE) {
std::vector<LiveInterval*> added =
- lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
- newIntervals.insert(newIntervals.end(), added.begin(), added.end());
+ lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(),
+ loopInfo, *vrm);
+ LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(),
+ added.begin(), added.end());
+
+ // Update LiveStacks.
+ int SS = vrm->getStackSlot(LRE.getReg());
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ return;
+ const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg());
+ LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
+ if (!SI.hasAtLeastOneValue())
+ SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
+ SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0));
}
};
} // end anonymous namespace
-namespace {
-
-/// When a call to spill is placed this spiller will first try to break the
-/// interval up into its component values (one new interval per value).
-/// If this fails, or if a call is placed to spill a previously split interval
-/// then the spiller falls back on the standard spilling mechanism.
-class SplittingSpiller : public StandardSpiller {
-public:
- SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf,
- VirtRegMap &vrm)
- : StandardSpiller(pass, mf, vrm) {
- mri = &mf.getRegInfo();
- tii = mf.getTarget().getInstrInfo();
- tri = mf.getTarget().getRegisterInfo();
- }
-
- void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestStart) {
- if (worthTryingToSplit(li))
- tryVNISplit(li, earliestStart);
- else
- StandardSpiller::spill(li, newIntervals, spillIs, earliestStart);
- }
-
-private:
-
- MachineRegisterInfo *mri;
- const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
- DenseSet<LiveInterval*> alreadySplit;
-
- bool worthTryingToSplit(LiveInterval *li) const {
- return (!alreadySplit.count(li) && li->getNumValNums() > 1);
- }
-
- /// Try to break a LiveInterval into its component values.
- std::vector<LiveInterval*> tryVNISplit(LiveInterval *li,
- SlotIndex *earliestStart) {
-
- DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
-
- std::vector<LiveInterval*> added;
- SmallVector<VNInfo*, 4> vnis;
-
- std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
-
- for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
- vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
- VNInfo *vni = *vniItr;
-
- // Skip unused VNIs.
- if (vni->isUnused())
- continue;
-
- DEBUG(dbgs() << " Extracted Val #" << vni->id << " as ");
- LiveInterval *splitInterval = extractVNI(li, vni);
-
- if (splitInterval != 0) {
- DEBUG(dbgs() << *splitInterval << "\n");
- added.push_back(splitInterval);
- alreadySplit.insert(splitInterval);
- if (earliestStart != 0) {
- if (splitInterval->beginIndex() < *earliestStart)
- *earliestStart = splitInterval->beginIndex();
- }
- } else {
- DEBUG(dbgs() << "0\n");
- }
- }
-
- DEBUG(dbgs() << "Original LI: " << *li << "\n");
-
- // If there original interval still contains some live ranges
- // add it to added and alreadySplit.
- if (!li->empty()) {
- added.push_back(li);
- alreadySplit.insert(li);
- if (earliestStart != 0) {
- if (li->beginIndex() < *earliestStart)
- *earliestStart = li->beginIndex();
- }
- }
-
- return added;
- }
-
- /// Extract the given value number from the interval.
- LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
- assert(vni->isDefAccurate() || vni->isPHIDef());
-
- // Create a new vreg and live interval, copy VNI ranges over.
- const TargetRegisterClass *trc = mri->getRegClass(li->reg);
- unsigned newVReg = mri->createVirtualRegister(trc);
- vrm->grow();
- LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
- VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
-
- // Start by copying all live ranges in the VN to the new interval.
- for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
- rItr != rEnd; ++rItr) {
- if (rItr->valno == vni) {
- newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI));
- }
- }
-
- // Erase the old VNI & ranges.
- li->removeValNo(vni);
-
- // Collect all current uses of the register belonging to the given VNI.
- // We'll use this to rename the register after we've dealt with the def.
- std::set<MachineInstr*> uses;
- for (MachineRegisterInfo::use_iterator
- useItr = mri->use_begin(li->reg), useEnd = mri->use_end();
- useItr != useEnd; ++useItr) {
- uses.insert(&*useItr);
- }
-
- // Process the def instruction for this VNI.
- if (newVNI->isPHIDef()) {
- // Insert a copy at the start of the MBB. The range proceeding the
- // copy will be attached to the original LiveInterval.
- MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
- MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(),
- tii->get(TargetOpcode::COPY), newVReg)
- .addReg(li->reg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
- 0, false, lis->getVNInfoAllocator());
- phiDefVNI->setIsPHIDef(true);
- li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
- LiveRange *oldPHIDefRange =
- newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
-
- // If the old phi def starts in the middle of the range chop it up.
- if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) {
- LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end,
- oldPHIDefRange->valno);
- oldPHIDefRange->end = lis->getMBBStartIdx(defMBB);
- newLI->addRange(oldPHIDefRange2);
- } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) {
- // Otherwise if it's at the start of the range just trim it.
- oldPHIDefRange->start = copyIdx.getDefIndex();
- } else {
- assert(false && "PHI def range doesn't cover PHI def?");
- }
-
- newVNI->def = copyIdx.getDefIndex();
- newVNI->setCopy(copyMI);
- newVNI->setIsPHIDef(false); // not a PHI def anymore.
- newVNI->setIsDefAccurate(true);
- } else {
- // non-PHI def. Rename the def. If it's two-addr that means renaming the
- // use and inserting a new copy too.
- MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
- // We'll rename this now, so we can remove it from uses.
- uses.erase(defInst);
- unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg);
- bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx),
- twoAddrUseIsUndef = false;
-
- for (unsigned i = 0; i < defInst->getNumOperands(); ++i) {
- MachineOperand &mo = defInst->getOperand(i);
- if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) {
- mo.setReg(newVReg);
- if (isTwoAddr && mo.isUse() && mo.isUndef())
- twoAddrUseIsUndef = true;
- }
- }
-
- SlotIndex defIdx = lis->getInstructionIndex(defInst);
- newVNI->def = defIdx.getDefIndex();
-
- if (isTwoAddr && !twoAddrUseIsUndef) {
- MachineBasicBlock *defMBB = defInst->getParent();
- MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(),
- tii->get(TargetOpcode::COPY), newVReg)
- .addReg(li->reg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- LiveRange *origUseRange =
- li->getLiveRangeContaining(newVNI->def.getUseIndex());
- origUseRange->end = copyIdx.getDefIndex();
- VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
- true, lis->getVNInfoAllocator());
- LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
- newLI->addRange(copyRange);
- }
- }
-
- for (std::set<MachineInstr*>::iterator
- usesItr = uses.begin(), usesEnd = uses.end();
- usesItr != usesEnd; ++usesItr) {
- MachineInstr *useInst = *usesItr;
- SlotIndex useIdx = lis->getInstructionIndex(useInst);
- LiveRange *useRange =
- newLI->getLiveRangeContaining(useIdx.getUseIndex());
-
- // If this use doesn't belong to the new interval skip it.
- if (useRange == 0)
- continue;
-
- // This use doesn't belong to the VNI, skip it.
- if (useRange->valno != newVNI)
- continue;
-
- // Check if this instr is two address.
- unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
- bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
-
- // Rename uses (and defs for two-address instrs).
- for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
- MachineOperand &mo = useInst->getOperand(i);
- if (mo.isReg() && (mo.isUse() || isTwoAddress) &&
- (mo.getReg() == li->reg)) {
- mo.setReg(newVReg);
- }
- }
-
- // If this is a two address instruction we've got some extra work to do.
- if (isTwoAddress) {
- // We modified the def operand, so we need to copy back to the original
- // reg.
- MachineBasicBlock *useMBB = useInst->getParent();
- MachineBasicBlock::iterator useItr(useInst);
- MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(),
- tii->get(TargetOpcode::COPY), newVReg)
- .addReg(li->reg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
- // Change the old two-address defined range & vni to start at
- // (and be defined by) the copy.
- LiveRange *origDefRange =
- li->getLiveRangeContaining(useIdx.getDefIndex());
- origDefRange->start = copyIdx.getDefIndex();
- origDefRange->valno->def = copyIdx.getDefIndex();
- origDefRange->valno->setCopy(copyMI);
-
- // Insert a new range & vni for the two-address-to-copy value. This
- // will be attached to the new live interval.
- VNInfo *copyVNI =
- newLI->getNextValue(useIdx.getDefIndex(), 0, true,
- lis->getVNInfoAllocator());
- LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
- newLI->addRange(copyRange);
- }
- }
-
- // Iterate over any PHI kills - we'll need to insert new copies for them.
- for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end();
- LRI != LRE; ++LRI) {
- if (LRI->valno != newVNI || LRI->end.isPHI())
- continue;
- SlotIndex killIdx = LRI->end;
- MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
- MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(),
- DebugLoc(), tii->get(TargetOpcode::COPY),
- li->reg)
- .addReg(newVReg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
- // Save the current end. We may need it to add a new range if the
- // current range runs of the end of the MBB.
- SlotIndex newKillRangeEnd = LRI->end;
- LRI->end = copyIdx.getDefIndex();
-
- if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
- assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
- "PHI kill range doesn't reach kill-block end. Not sane.");
- newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
- newKillRangeEnd, newVNI));
- }
-
- VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
- copyMI, true,
- lis->getVNInfoAllocator());
- newKillVNI->setHasPHIKill(true);
- li->addRange(LiveRange(copyIdx.getDefIndex(),
- lis->getMBBEndIdx(killMBB),
- newKillVNI));
- }
- newVNI->setHasPHIKill(false);
-
- return newLI;
- }
-
-};
-
-} // end anonymous namespace
-
-
-namespace llvm {
-Spiller *createInlineSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm);
-}
-
llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm) {
@@ -523,7 +236,6 @@
default: assert(0 && "unknown spiller");
case trivial: return new TrivialSpiller(pass, mf, vrm);
case standard: return new StandardSpiller(pass, mf, vrm);
- case splitting: return new SplittingSpiller(pass, mf, vrm);
case inline_: return createInlineSpiller(pass, mf, vrm);
}
}
diff --git a/src/LLVM/lib/CodeGen/Spiller.h b/src/LLVM/lib/CodeGen/Spiller.h
index 3d5172a..41f1727 100644
--- a/src/LLVM/lib/CodeGen/Spiller.h
+++ b/src/LLVM/lib/CodeGen/Spiller.h
@@ -10,15 +10,11 @@
#ifndef LLVM_CODEGEN_SPILLER_H
#define LLVM_CODEGEN_SPILLER_H
-#include "llvm/ADT/SmallVector.h"
-#include <vector>
-
namespace llvm {
- class LiveInterval;
+ class LiveRangeEdit;
class MachineFunction;
class MachineFunctionPass;
- class SlotIndex;
class VirtRegMap;
/// Spiller interface.
@@ -29,19 +25,8 @@
public:
virtual ~Spiller() = 0;
- /// spill - Spill the given live interval. The method used will depend on
- /// the Spiller implementation selected.
- ///
- /// @param li The live interval to be spilled.
- /// @param spillIs A list of intervals that are about to be spilled,
- /// and so cannot be used for remat etc.
- /// @param newIntervals The newly created intervals will be appended here.
- /// @param earliestIndex The earliest point for splitting. (OK, it's another
- /// pointer to the allocator guts).
- virtual void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex = 0) = 0;
+ /// spill - Spill the LRE.getParent() live interval.
+ virtual void spill(LiveRangeEdit &LRE) = 0;
};
@@ -49,6 +34,13 @@
Spiller* createSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm);
+
+ /// Create and return a spiller that will insert spill code directly instead
+ /// of deferring though VirtRegMap.
+ Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+
}
#endif
diff --git a/src/LLVM/lib/CodeGen/SplitKit.cpp b/src/LLVM/lib/CodeGen/SplitKit.cpp
index a37a4d6..6362780 100644
--- a/src/LLVM/lib/CodeGen/SplitKit.cpp
+++ b/src/LLVM/lib/CodeGen/SplitKit.cpp
@@ -12,16 +12,16 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "splitter"
+#define DEBUG_TYPE "regalloc"
#include "SplitKit.h"
+#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -29,594 +29,1382 @@
using namespace llvm;
-static cl::opt<bool>
-AllowSplit("spiller-splits-edges",
- cl::desc("Allow critical edge splitting during spilling"));
+STATISTIC(NumFinished, "Number of splits finished");
+STATISTIC(NumSimple, "Number of splits that were simple");
+STATISTIC(NumCopies, "Number of copies inserted for splitting");
+STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
+STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
//===----------------------------------------------------------------------===//
// Split Analysis
//===----------------------------------------------------------------------===//
-SplitAnalysis::SplitAnalysis(const MachineFunction &mf,
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
const LiveIntervals &lis,
const MachineLoopInfo &mli)
- : mf_(mf),
- lis_(lis),
- loops_(mli),
- tii_(*mf.getTarget().getInstrInfo()),
- curli_(0) {}
+ : MF(vrm.getMachineFunction()),
+ VRM(vrm),
+ LIS(lis),
+ Loops(mli),
+ TII(*MF.getTarget().getInstrInfo()),
+ CurLI(0),
+ LastSplitPoint(MF.getNumBlockIDs()) {}
void SplitAnalysis::clear() {
- usingInstrs_.clear();
- usingBlocks_.clear();
- usingLoops_.clear();
- curli_ = 0;
+ UseSlots.clear();
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ CurLI = 0;
+ DidRepairRange = false;
}
-bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) {
- MachineBasicBlock *T, *F;
- SmallVector<MachineOperand, 4> Cond;
- return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
+SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
+ const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
+ const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
+ std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+
+ // Compute split points on the first call. The pair is independent of the
+ // current live interval.
+ if (!LSP.first.isValid()) {
+ MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
+ if (FirstTerm == MBB->end())
+ LSP.first = LIS.getMBBEndIdx(MBB);
+ else
+ LSP.first = LIS.getInstructionIndex(FirstTerm);
+
+ // If there is a landing pad successor, also find the call instruction.
+ if (!LPad)
+ return LSP.first;
+ // There may not be a call instruction (?) in which case we ignore LPad.
+ LSP.second = LSP.first;
+ for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
+ I != E;) {
+ --I;
+ if (I->getDesc().isCall()) {
+ LSP.second = LIS.getInstructionIndex(I);
+ break;
+ }
+ }
+ }
+
+ // If CurLI is live into a landing pad successor, move the last split point
+ // back to the call that may throw.
+ if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad))
+ return LSP.second;
+ else
+ return LSP.first;
}
-/// analyzeUses - Count instructions, basic blocks, and loops using curli.
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
void SplitAnalysis::analyzeUses() {
- const MachineRegisterInfo &MRI = mf_.getRegInfo();
- for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg);
- MachineInstr *MI = I.skipInstruction();) {
- if (MI->isDebugValue() || !usingInstrs_.insert(MI))
- continue;
- MachineBasicBlock *MBB = MI->getParent();
- if (usingBlocks_[MBB]++)
- continue;
- if (MachineLoop *Loop = loops_.getLoopFor(MBB))
- usingLoops_.insert(Loop);
+ assert(UseSlots.empty() && "Call clear first");
+
+ // First get all the defs from the interval values. This provides the correct
+ // slots for early clobbers.
+ for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(),
+ E = CurLI->vni_end(); I != E; ++I)
+ if (!(*I)->isPHIDef() && !(*I)->isUnused())
+ UseSlots.push_back((*I)->def);
+
+ // Get use slots form the use-def chain.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
+ ++I)
+ if (!I.getOperand().isUndef())
+ UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex());
+
+ array_pod_sort(UseSlots.begin(), UseSlots.end());
+
+ // Remove duplicates, keeping the smaller slot for each instruction.
+ // That is what we want for early clobbers.
+ UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(),
+ SlotIndex::isSameInstr),
+ UseSlots.end());
+
+ // Compute per-live block info.
+ if (!calcLiveBlockInfo()) {
+ // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
+ // I am looking at you, RegisterCoalescer!
+ DidRepairRange = true;
+ ++NumRepairs;
+ DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
+ const_cast<LiveIntervals&>(LIS)
+ .shrinkToUses(const_cast<LiveInterval*>(CurLI));
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ bool fixed = calcLiveBlockInfo();
+ (void)fixed;
+ assert(fixed && "Couldn't fix broken live interval");
}
- DEBUG(dbgs() << "Counted "
- << usingInstrs_.size() << " instrs, "
- << usingBlocks_.size() << " blocks, "
- << usingLoops_.size() << " loops in "
- << *curli_ << "\n");
+
+ DEBUG(dbgs() << "Analyze counted "
+ << UseSlots.size() << " instrs in "
+ << UseBlocks.size() << " blocks, through "
+ << NumThroughBlocks << " blocks.\n");
}
-// Get three sets of basic blocks surrounding a loop: Blocks inside the loop,
-// predecessor blocks, and exit blocks.
-void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) {
- Blocks.clear();
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+bool SplitAnalysis::calcLiveBlockInfo() {
+ ThroughBlocks.resize(MF.getNumBlockIDs());
+ NumThroughBlocks = NumGapBlocks = 0;
+ if (CurLI->empty())
+ return true;
- // Blocks in the loop.
- Blocks.Loop.insert(Loop->block_begin(), Loop->block_end());
+ LiveInterval::const_iterator LVI = CurLI->begin();
+ LiveInterval::const_iterator LVE = CurLI->end();
- // Predecessor blocks.
- const MachineBasicBlock *Header = Loop->getHeader();
- for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(),
- E = Header->pred_end(); I != E; ++I)
- if (!Blocks.Loop.count(*I))
- Blocks.Preds.insert(*I);
+ SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+ UseI = UseSlots.begin();
+ UseE = UseSlots.end();
- // Exit blocks.
- for (MachineLoop::block_iterator I = Loop->block_begin(),
- E = Loop->block_end(); I != E; ++I) {
- const MachineBasicBlock *MBB = *I;
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if (!Blocks.Loop.count(*SI))
- Blocks.Exits.insert(*SI);
- }
-}
+ // Loop over basic blocks where CurLI is live.
+ MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ for (;;) {
+ BlockInfo BI;
+ BI.MBB = MFI;
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
-/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
-/// and around the Loop.
-SplitAnalysis::LoopPeripheralUse SplitAnalysis::
-analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) {
- LoopPeripheralUse use = ContainedInLoop;
- for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
- I != E; ++I) {
- const MachineBasicBlock *MBB = I->first;
- // Is this a peripheral block?
- if (use < MultiPeripheral &&
- (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) {
- if (I->second > 1) use = MultiPeripheral;
- else use = SinglePeripheral;
- continue;
- }
- // Is it a loop block?
- if (Blocks.Loop.count(MBB))
- continue;
- // It must be an unrelated block.
- return OutsideLoop;
- }
- return use;
-}
-
-/// getCriticalExits - It may be necessary to partially break critical edges
-/// leaving the loop if an exit block has phi uses of curli. Collect the exit
-/// blocks that need special treatment into CriticalExits.
-void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
- BlockPtrSet &CriticalExits) {
- CriticalExits.clear();
-
- // A critical exit block contains a phi def of curli, and has a predecessor
- // that is not in the loop nor a loop predecessor.
- // For such an exit block, the edges carrying the new variable must be moved
- // to a new pre-exit block.
- for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end();
- I != E; ++I) {
- const MachineBasicBlock *Succ = *I;
- SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ);
- VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx);
- // This exit may not have curli live in at all. No need to split.
- if (!SuccVNI)
- continue;
- // If this is not a PHI def, it is either using a value from before the
- // loop, or a value defined inside the loop. Both are safe.
- if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx)
- continue;
- // This exit block does have a PHI. Does it also have a predecessor that is
- // not a loop block or loop predecessor?
- for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
- PE = Succ->pred_end(); PI != PE; ++PI) {
- const MachineBasicBlock *Pred = *PI;
- if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred))
- continue;
- // This is a critical exit block, and we need to split the exit edge.
- CriticalExits.insert(Succ);
- break;
- }
- }
-}
-
-/// canSplitCriticalExits - Return true if it is possible to insert new exit
-/// blocks before the blocks in CriticalExits.
-bool
-SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
- BlockPtrSet &CriticalExits) {
- // If we don't allow critical edge splitting, require no critical exits.
- if (!AllowSplit)
- return CriticalExits.empty();
-
- for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end();
- I != E; ++I) {
- const MachineBasicBlock *Succ = *I;
- // We want to insert a new pre-exit MBB before Succ, and change all the
- // in-loop blocks to branch to the pre-exit instead of Succ.
- // Check that all the in-loop predecessors can be changed.
- for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
- PE = Succ->pred_end(); PI != PE; ++PI) {
- const MachineBasicBlock *Pred = *PI;
- // The external predecessors won't be altered.
- if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred))
- continue;
- if (!canAnalyzeBranch(Pred))
+ // If the block contains no uses, the range must be live through. At one
+ // point, RegisterCoalescer could create dangling ranges that ended
+ // mid-block.
+ if (UseI == UseE || *UseI >= Stop) {
+ ++NumThroughBlocks;
+ ThroughBlocks.set(BI.MBB->getNumber());
+ // The range shouldn't end mid-block if there are no uses. This shouldn't
+ // happen.
+ if (LVI->end < Stop)
return false;
+ } else {
+ // This block has uses. Find the first and last uses in the block.
+ BI.FirstInstr = *UseI;
+ assert(BI.FirstInstr >= Start);
+ do ++UseI;
+ while (UseI != UseE && *UseI < Stop);
+ BI.LastInstr = UseI[-1];
+ assert(BI.LastInstr < Stop);
+
+ // LVI is the first live segment overlapping MBB.
+ BI.LiveIn = LVI->start <= Start;
+
+ // When not live in, the first use should be a def.
+ if (!BI.LiveIn) {
+ assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ assert(LVI->start == BI.FirstInstr && "First instr should be a def");
+ BI.FirstDef = BI.FirstInstr;
+ }
+
+ // Look for gaps in the live range.
+ BI.LiveOut = true;
+ while (LVI->end < Stop) {
+ SlotIndex LastStop = LVI->end;
+ if (++LVI == LVE || LVI->start >= Stop) {
+ BI.LiveOut = false;
+ BI.LastInstr = LastStop;
+ break;
+ }
+
+ if (LastStop < LVI->start) {
+ // There is a gap in the live range. Create duplicate entries for the
+ // live-in snippet and the live-out snippet.
+ ++NumGapBlocks;
+
+ // Push the Live-in part.
+ BI.LiveOut = false;
+ UseBlocks.push_back(BI);
+ UseBlocks.back().LastInstr = LastStop;
+
+ // Set up BI for the live-out part.
+ BI.LiveIn = false;
+ BI.LiveOut = true;
+ BI.FirstInstr = BI.FirstDef = LVI->start;
+ }
+
+ // A LiveRange that starts in the middle of the block must be a def.
+ assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ if (!BI.FirstDef)
+ BI.FirstDef = LVI->start;
+ }
+
+ UseBlocks.push_back(BI);
+
+ // LVI is now at LVE or LVI->end >= Stop.
+ if (LVI == LVE)
+ break;
}
- // If Succ's layout predecessor falls through, that too must be analyzable.
- // We need to insert the pre-exit block in the gap.
- MachineFunction::const_iterator MFI = Succ;
- if (MFI == mf_.begin())
- continue;
- if (!canAnalyzeBranch(--MFI))
- return false;
+ // Live segment ends exactly at Stop. Move to the next segment.
+ if (LVI->end == Stop && ++LVI == LVE)
+ break;
+
+ // Pick the next basic block.
+ if (LVI->start < Stop)
+ ++MFI;
+ else
+ MFI = LIS.getMBBFromIndex(LVI->start);
}
- // No problems found.
+
+ assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
return true;
}
+unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
+ if (cli->empty())
+ return 0;
+ LiveInterval *li = const_cast<LiveInterval*>(cli);
+ LiveInterval::iterator LVI = li->begin();
+ LiveInterval::iterator LVE = li->end();
+ unsigned Count = 0;
+
+ // Loop over basic blocks where li is live.
+ MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+ for (;;) {
+ ++Count;
+ LVI = li->advanceTo(LVI, Stop);
+ if (LVI == LVE)
+ return Count;
+ do {
+ ++MFI;
+ Stop = LIS.getMBBEndIdx(MFI);
+ } while (Stop <= LVI->start);
+ }
+}
+
+bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
+ unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+ const LiveInterval &Orig = LIS.getInterval(OrigReg);
+ assert(!Orig.empty() && "Splitting empty interval?");
+ LiveInterval::const_iterator I = Orig.find(Idx);
+
+ // Range containing Idx should begin at Idx.
+ if (I != Orig.end() && I->start <= Idx)
+ return I->start == Idx;
+
+ // Range does not contain Idx, previous must end at Idx.
+ return I != Orig.begin() && (--I)->end == Idx;
+}
+
void SplitAnalysis::analyze(const LiveInterval *li) {
clear();
- curli_ = li;
+ CurLI = li;
analyzeUses();
}
-const MachineLoop *SplitAnalysis::getBestSplitLoop() {
- assert(curli_ && "Call analyze() before getBestSplitLoop");
- if (usingLoops_.empty())
- return 0;
-
- LoopPtrSet Loops, SecondLoops;
- LoopBlocks Blocks;
- BlockPtrSet CriticalExits;
-
- // Find first-class and second class candidate loops.
- // We prefer to split around loops where curli is used outside the periphery.
- for (LoopPtrSet::const_iterator I = usingLoops_.begin(),
- E = usingLoops_.end(); I != E; ++I) {
- getLoopBlocks(*I, Blocks);
-
- // FIXME: We need an SSA updater to properly handle multiple exit blocks.
- if (Blocks.Exits.size() > 1) {
- DEBUG(dbgs() << "MultipleExits: " << **I);
- continue;
- }
-
- LoopPtrSet *LPS = 0;
- switch(analyzeLoopPeripheralUse(Blocks)) {
- case OutsideLoop:
- LPS = &Loops;
- break;
- case MultiPeripheral:
- LPS = &SecondLoops;
- break;
- case ContainedInLoop:
- DEBUG(dbgs() << "ContainedInLoop: " << **I);
- continue;
- case SinglePeripheral:
- DEBUG(dbgs() << "SinglePeripheral: " << **I);
- continue;
- }
- // Will it be possible to split around this loop?
- getCriticalExits(Blocks, CriticalExits);
- DEBUG(dbgs() << CriticalExits.size() << " critical exits: " << **I);
- if (!canSplitCriticalExits(Blocks, CriticalExits))
- continue;
- // This is a possible split.
- assert(LPS);
- LPS->insert(*I);
- }
-
- DEBUG(dbgs() << "Got " << Loops.size() << " + " << SecondLoops.size()
- << " candidate loops\n");
-
- // If there are no first class loops available, look at second class loops.
- if (Loops.empty())
- Loops = SecondLoops;
-
- if (Loops.empty())
- return 0;
-
- // Pick the earliest loop.
- // FIXME: Are there other heuristics to consider?
- const MachineLoop *Best = 0;
- SlotIndex BestIdx;
- for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E;
- ++I) {
- SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader());
- if (!Best || Idx < BestIdx)
- Best = *I, BestIdx = Idx;
- }
- DEBUG(dbgs() << "Best: " << *Best);
- return Best;
-}
-
//===----------------------------------------------------------------------===//
// Split Editor
//===----------------------------------------------------------------------===//
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
- std::vector<LiveInterval*> &intervals)
- : sa_(sa), lis_(lis), vrm_(vrm),
- mri_(vrm.getMachineFunction().getRegInfo()),
- tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()),
- curli_(sa_.getCurLI()),
- dupli_(0), openli_(0),
- intervals_(intervals),
- firstInterval(intervals_.size())
-{
- assert(curli_ && "SplitEditor created from empty SplitAnalysis");
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+ LiveIntervals &lis,
+ VirtRegMap &vrm,
+ MachineDominatorTree &mdt)
+ : SA(sa), LIS(lis), VRM(vrm),
+ MRI(vrm.getMachineFunction().getRegInfo()),
+ MDT(mdt),
+ TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+ TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+ Edit(0),
+ OpenIdx(0),
+ SpillMode(SM_Partition),
+ RegAssign(Allocator)
+{}
- // Make sure curli_ is assigned a stack slot, so all our intervals get the
- // same slot as curli_.
- if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT)
- vrm_.assignVirt2StackSlot(curli_->reg);
+void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
+ Edit = &LRE;
+ SpillMode = SM;
+ OpenIdx = 0;
+ RegAssign.clear();
+ Values.clear();
+ // Reset the LiveRangeCalc instances needed for this spill mode.
+ LRCalc[0].reset(&VRM.getMachineFunction());
+ if (SpillMode)
+ LRCalc[1].reset(&VRM.getMachineFunction());
+
+ // We don't need an AliasAnalysis since we will only be performing
+ // cheap-as-a-copy remats anyway.
+ Edit->anyRematerializable(LIS, TII, 0);
}
-LiveInterval *SplitEditor::createInterval() {
- unsigned curli = sa_.getCurLI()->reg;
- unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli));
- LiveInterval &Intv = lis_.getOrCreateInterval(Reg);
- vrm_.grow();
- vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli));
- return &Intv;
-}
-
-LiveInterval *SplitEditor::getDupLI() {
- if (!dupli_) {
- // Create an interval for dupli that is a copy of curli.
- dupli_ = createInterval();
- dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator());
- DEBUG(dbgs() << "SplitEditor DupLI: " << *dupli_ << '\n');
+void SplitEditor::dump() const {
+ if (RegAssign.empty()) {
+ dbgs() << " empty\n";
+ return;
}
- return dupli_;
+
+ for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+ dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+ dbgs() << '\n';
}
-VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) {
- VNInfo *&VNI = valueMap_[curliVNI];
- if (!VNI)
- VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator());
+VNInfo *SplitEditor::defValue(unsigned RegIdx,
+ const VNInfo *ParentVNI,
+ SlotIndex Idx) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // Create a new value.
+ VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator, bool> InsP =
+ Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id),
+ ValueForcePair(VNI, false)));
+
+ // This was the first time (RegIdx, ParentVNI) was mapped.
+ // Keep it as a simple def without any liveness.
+ if (InsP.second)
+ return VNI;
+
+ // If the previous value was a simple mapping, add liveness for it now.
+ if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
+ SlotIndex Def = OldVNI->def;
+ LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
+ // No longer a simple mapping. Switch to a complex, non-forced mapping.
+ InsP.first->second = ValueForcePair();
+ }
+
+ // This is a complex mapping, add liveness for VNI
+ SlotIndex Def = VNI->def;
+ LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+
return VNI;
}
-/// Insert a COPY instruction curli -> li. Allocate a new value from li
-/// defined by the COPY. Note that rewrite() will deal with the curli
-/// register, so this function can be used to copy from any interval - openli,
-/// curli, or dupli.
-VNInfo *SplitEditor::insertCopy(LiveInterval &LI,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
- MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY),
- LI.reg).addReg(curli_->reg);
- SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
- return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator());
+void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
+ assert(ParentVNI && "Mapping NULL value");
+ ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)];
+ VNInfo *VNI = VFP.getPointer();
+
+ // ParentVNI was either unmapped or already complex mapped. Either way, just
+ // set the force bit.
+ if (!VNI) {
+ VFP.setInt(true);
+ return;
+ }
+
+ // This was previously a single mapping. Make sure the old def is represented
+ // by a trivial live range.
+ SlotIndex Def = VNI->def;
+ Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+ // Mark as complex mapped, forced.
+ VFP = ValueForcePair(0, true);
+}
+
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineInstr *CopyMI = 0;
+ SlotIndex Def;
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // We may be trying to avoid interference that ends at a deleted instruction,
+ // so always begin RegIdx 0 early and all others late.
+ bool Late = RegIdx != 0;
+
+ // Attempt cheap-as-a-copy rematerialization.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
+ Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late);
+ ++NumRemats;
+ } else {
+ // Can't remat, just insert a copy from parent.
+ CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+ .addReg(Edit->getReg());
+ Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
+ .getDefIndex();
+ ++NumCopies;
+ }
+
+ // Define the value in Reg.
+ VNInfo *VNI = defValue(RegIdx, ParentVNI, Def);
+ VNI->setCopy(CopyMI);
+ return VNI;
}
/// Create a new virtual register and live interval.
-void SplitEditor::openIntv() {
- assert(!openli_ && "Previous LI not closed before openIntv");
- openli_ = createInterval();
- intervals_.push_back(openli_);
- liveThrough_ = false;
+unsigned SplitEditor::openIntv() {
+ // Create the complement as index 0.
+ if (Edit->empty())
+ Edit->create(LIS, VRM);
+
+ // Create the open interval.
+ OpenIdx = Edit->size();
+ Edit->create(LIS, VRM);
+ return OpenIdx;
}
-/// enterIntvAtEnd - Enter openli at the end of MBB.
-/// PhiMBB is a successor inside openli where a PHI value is created.
-/// Currently, all entries must share the same PhiMBB.
-void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) {
- assert(openli_ && "openIntv not called before enterIntvAtEnd");
-
- SlotIndex EndA = lis_.getMBBEndIdx(&A);
- VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex());
- if (!CurVNIA) {
- DEBUG(dbgs() << " ignoring enterIntvAtEnd, curli not live out of BB#"
- << A.getNumber() << ".\n");
- return;
- }
-
- // Add a phi kill value and live range out of A.
- VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator());
- openli_->addRange(LiveRange(VNIA->def, EndA, VNIA));
-
- // FIXME: If this is the only entry edge, we don't need the extra PHI value.
- // FIXME: If there are multiple entry blocks (so not a loop), we need proper
- // SSA update.
-
- // Now look at the start of B.
- SlotIndex StartB = lis_.getMBBStartIdx(&B);
- SlotIndex EndB = lis_.getMBBEndIdx(&B);
- const LiveRange *CurB = curli_->getLiveRangeContaining(StartB);
- if (!CurB) {
- DEBUG(dbgs() << " enterIntvAtEnd: curli not live in to BB#"
- << B.getNumber() << ".\n");
- return;
- }
-
- VNInfo *VNIB = openli_->getVNInfoAt(StartB);
- if (!VNIB) {
- // Create a phi value.
- VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false,
- lis_.getVNInfoAllocator());
- VNIB->setIsPHIDef(true);
- // Add a minimal range for the new value.
- openli_->addRange(LiveRange(VNIB->def, std::min(EndB, CurB->end), VNIB));
-
- VNInfo *&mapVNI = valueMap_[CurB->valno];
- if (mapVNI) {
- // Multiple copies - must create PHI value.
- abort();
- } else {
- // This is the first copy of dupLR. Mark the mapping.
- mapVNI = VNIB;
- }
-
- }
-
- DEBUG(dbgs() << " enterIntvAtEnd: " << *openli_ << '\n');
+void SplitEditor::selectIntv(unsigned Idx) {
+ assert(Idx != 0 && "Cannot select the complement interval");
+ assert(Idx < Edit->size() && "Can only select previously opened interval");
+ DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n');
+ OpenIdx = Idx;
}
-/// useIntv - indicate that all instructions in MBB should use openli.
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvBefore");
+ DEBUG(dbgs() << " enterIntvBefore " << Idx);
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvAfter");
+ DEBUG(dbgs() << " enterIntvAfter " << Idx);
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvAfter called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+ SlotIndex End = LIS.getMBBEndIdx(&MBB);
+ SlotIndex Last = End.getPrevSlot();
+ DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return End;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id);
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+ LIS.getLastSplitPoint(Edit->getParent(), &MBB));
+ RegAssign.insert(VNI->def, End, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
- useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB));
+ useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
}
void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
- assert(openli_ && "openIntv not called before useIntv");
-
- // Map the curli values from the interval into openli_
- LiveInterval::const_iterator B = curli_->begin(), E = curli_->end();
- LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
-
- if (I != B) {
- --I;
- // I begins before Start, but overlaps. openli may already have a value.
- if (I->end > Start && !openli_->liveAt(Start))
- openli_->addRange(LiveRange(Start, std::min(End, I->end),
- mapValue(I->valno)));
- ++I;
- }
-
- // The remaining ranges begin after Start.
- for (;I != E && I->start < End; ++I)
- openli_->addRange(LiveRange(I->start, std::min(End, I->end),
- mapValue(I->valno)));
- DEBUG(dbgs() << " added range [" << Start << ';' << End << "): " << *openli_
- << '\n');
+ assert(OpenIdx && "openIntv not called before useIntv");
+ DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
}
-/// leaveIntvAtTop - Leave the interval at the top of MBB.
-/// Currently, only one value can leave the interval.
-void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
- assert(openli_ && "openIntv not called before leaveIntvAtTop");
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx);
- SlotIndex Start = lis_.getMBBStartIdx(&MBB);
- const LiveRange *CurLR = curli_->getLiveRangeContaining(Start);
+ // The interval must be live beyond the instruction at Idx.
+ SlotIndex Boundary = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Boundary.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Boundary);
+ assert(MI && "No instruction at index");
- // Is curli even live-in to MBB?
- if (!CurLR) {
- DEBUG(dbgs() << " leaveIntvAtTop at " << Start << ": not live\n");
- return;
+ // In spill mode, make live ranges as short as possible by inserting the copy
+ // before MI. This is only possible if that instruction doesn't redefine the
+ // value. The inserted COPY is not a kill, and we don't need to recompute
+ // the source live range. The spiller also won't try to hoist this copy.
+ if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) &&
+ MI->readsVirtualRegister(Edit->getReg())) {
+ forceRecompute(0, ParentVNI);
+ defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return Idx;
}
- // Is curli defined by PHI at the beginning of MBB?
- bool isPHIDef = CurLR->valno->isPHIDef() &&
- CurLR->valno->def.getBaseIndex() == Start;
-
- // If MBB is using a value of curli that was defined outside the openli range,
- // we don't want to copy it back here.
- if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) {
- DEBUG(dbgs() << " leaveIntvAtTop at " << Start
- << ": using external value\n");
- liveThrough_ = true;
- return;
- }
-
- // We are going to insert a back copy, so we must have a dupli_.
- LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start);
- assert(DupLR && "dupli not live into black, but curli is?");
-
- // Insert the COPY instruction.
- MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(),
- tii_.get(TargetOpcode::COPY), dupli_->reg)
- .addReg(openli_->reg);
- SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-
- // Adjust dupli and openli values.
- if (isPHIDef) {
- // dupli was already a PHI on entry to MBB. Simply insert an openli PHI,
- // and shift the dupli def down to the COPY.
- VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
- lis_.getVNInfoAllocator());
- VNI->setIsPHIDef(true);
- openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
- dupli_->removeRange(Start, Idx);
- DupLR->valno->def = Idx;
- DupLR->valno->setIsPHIDef(false);
- } else {
- // The dupli value was defined somewhere inside the openli range.
- DEBUG(dbgs() << " leaveIntvAtTop source value defined at "
- << DupLR->valno->def << "\n");
- // FIXME: We may not need a PHI here if all predecessors have the same
- // value.
- VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
- lis_.getVNInfoAllocator());
- VNI->setIsPHIDef(true);
- openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
- // FIXME: What if DupLR->valno is used by multiple exits? SSA Update.
-
- // closeIntv is going to remove the superfluous live ranges.
- DupLR->valno->def = Idx;
- DupLR->valno->setIsPHIDef(false);
- }
-
- DEBUG(dbgs() << " leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n');
+ VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
}
-/// closeIntv - Indicate that we are done editing the currently open
-/// LiveInterval, and ranges can be trimmed.
-void SplitEditor::closeIntv() {
- assert(openli_ && "openIntv not called before closeIntv");
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+ DEBUG(dbgs() << " leaveIntvBefore " << Idx);
- DEBUG(dbgs() << " closeIntv cleaning up\n");
- DEBUG(dbgs() << " open " << *openli_ << '\n');
+ // The interval must be live into the instruction at Idx.
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
- if (liveThrough_) {
- DEBUG(dbgs() << " value live through region, leaving dupli as is.\n");
- } else {
- // live out with copies inserted, or killed by region. Either way we need to
- // remove the overlapping region from dupli.
- getDupLI();
- for (LiveInterval::iterator I = openli_->begin(), E = openli_->end();
- I != E; ++I) {
- dupli_->removeRange(I->start, I->end);
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+ SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
+
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Start;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+ MBB.SkipPHIsAndLabels(MBB.begin()));
+ RegAssign.insert(Start, VNI->def, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before overlapIntv");
+ const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) &&
+ "Parent changes value in extended range");
+ assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+ "Range cannot span basic blocks");
+
+ // The complement interval will be extended as needed by LRCalc.extend().
+ if (ParentVNI)
+ forceRecompute(0, ParentVNI);
+ DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
+
+//===----------------------------------------------------------------------===//
+// Spill modes
+//===----------------------------------------------------------------------===//
+
+void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
+ LiveInterval *LI = Edit->get(0);
+ DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");
+ RegAssignMap::iterator AssignI;
+ AssignI.setMap(RegAssign);
+
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ VNInfo *VNI = Copies[i];
+ SlotIndex Def = VNI->def;
+ MachineInstr *MI = LIS.getInstructionFromIndex(Def);
+ assert(MI && "No instruction for back-copy");
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ bool AtBegin;
+ do AtBegin = MBBI == MBB->begin();
+ while (!AtBegin && (--MBBI)->isDebugValue());
+
+ DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
+ LI->removeValNo(VNI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+
+ // Adjust RegAssign if a register assignment is killed at VNI->def. We
+ // want to avoid calculating the live range of the source register if
+ // possible.
+ AssignI.find(VNI->def.getPrevSlot());
+ if (!AssignI.valid() || AssignI.start() >= Def)
+ continue;
+ // If MI doesn't kill the assigned register, just leave it.
+ if (AssignI.stop() != Def)
+ continue;
+ unsigned RegIdx = AssignI.value();
+ if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) {
+ DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
+ forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
+ } else {
+ SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex();
+ DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
+ AssignI.setStop(Kill);
}
- // FIXME: A block branching to the entry block may also branch elsewhere
- // curli is live. We need both openli and curli to be live in that case.
- DEBUG(dbgs() << " dup2 " << *dupli_ << '\n');
}
- openli_ = 0;
}
-/// rewrite - after all the new live ranges have been created, rewrite
-/// instructions using curli to use the new intervals.
-void SplitEditor::rewrite() {
- assert(!openli_ && "Previous LI not closed before rewrite");
- const LiveInterval *curli = sa_.getCurLI();
- for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg),
- RE = mri_.reg_end(); RI != RE;) {
+MachineBasicBlock*
+SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB) {
+ if (MBB == DefMBB)
+ return MBB;
+ assert(MDT.dominates(DefMBB, MBB) && "MBB must be dominated by the def.");
+
+ const MachineLoopInfo &Loops = SA.Loops;
+ const MachineLoop *DefLoop = Loops.getLoopFor(DefMBB);
+ MachineDomTreeNode *DefDomNode = MDT[DefMBB];
+
+ // Best candidate so far.
+ MachineBasicBlock *BestMBB = MBB;
+ unsigned BestDepth = UINT_MAX;
+
+ for (;;) {
+ const MachineLoop *Loop = Loops.getLoopFor(MBB);
+
+ // MBB isn't in a loop, it doesn't get any better. All dominators have a
+ // higher frequency by definition.
+ if (!Loop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth 0\n");
+ return MBB;
+ }
+
+ // We'll never be able to exit the DefLoop.
+ if (Loop == DefLoop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " in the same loop\n");
+ return MBB;
+ }
+
+ // Least busy dominator seen so far.
+ unsigned Depth = Loop->getLoopDepth();
+ if (Depth < BestDepth) {
+ BestMBB = MBB;
+ BestDepth = Depth;
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth " << Depth << '\n');
+ }
+
+ // Leave loop by going to the immediate dominator of the loop header.
+ // This is a bigger stride than simply walking up the dominator tree.
+ MachineDomTreeNode *IDom = MDT[Loop->getHeader()]->getIDom();
+
+ // Too far up the dominator tree?
+ if (!IDom || !MDT.dominates(DefDomNode, IDom))
+ return BestMBB;
+
+ MBB = IDom->getBlock();
+ }
+}
+
+void SplitEditor::hoistCopiesForSize() {
+ // Get the complement interval, always RegIdx 0.
+ LiveInterval *LI = Edit->get(0);
+ LiveInterval *Parent = &Edit->getParent();
+
+ // Track the nearest common dominator for all back-copies for each ParentVNI,
+ // indexed by ParentVNI->id.
+ typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
+ SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
+
+ // Find the nearest common dominator for parent values with multiple
+ // back-copies. If a single back-copy dominates, put it in DomPair.second.
+ for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ assert(ParentVNI && "Parent not live at complement def");
+
+ // Don't hoist remats. The complement is probably going to disappear
+ // completely anyway.
+ if (Edit->didRematerialize(ParentVNI))
+ continue;
+
+ MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
+ DomPair &Dom = NearestDom[ParentVNI->id];
+
+ // Keep directly defined parent values. This is either a PHI or an
+ // instruction in the complement range. All other copies of ParentVNI
+ // should be eliminated.
+ if (VNI->def == ParentVNI->def) {
+ DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n');
+ Dom = DomPair(ValMBB, VNI->def);
+ continue;
+ }
+ // Skip the singly mapped values. There is nothing to gain from hoisting a
+ // single back-copy.
+ if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) {
+ DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n');
+ continue;
+ }
+
+ if (!Dom.first) {
+ // First time we see ParentVNI. VNI dominates itself.
+ Dom = DomPair(ValMBB, VNI->def);
+ } else if (Dom.first == ValMBB) {
+ // Two defs in the same block. Pick the earlier def.
+ if (!Dom.second.isValid() || VNI->def < Dom.second)
+ Dom.second = VNI->def;
+ } else {
+ // Different basic blocks. Check if one dominates.
+ MachineBasicBlock *Near =
+ MDT.findNearestCommonDominator(Dom.first, ValMBB);
+ if (Near == ValMBB)
+ // Def ValMBB dominates.
+ Dom = DomPair(ValMBB, VNI->def);
+ else if (Near != Dom.first)
+ // None dominate. Hoist to common dominator, need new def.
+ Dom = DomPair(Near, SlotIndex());
+ }
+
+ DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
+ << " for parent " << ParentVNI->id << '@' << ParentVNI->def
+ << " hoist to BB#" << Dom.first->getNumber() << ' '
+ << Dom.second << '\n');
+ }
+
+ // Insert the hoisted copies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ DomPair &Dom = NearestDom[i];
+ if (!Dom.first || Dom.second.isValid())
+ continue;
+ // This value needs a hoisted copy inserted at the end of Dom.first.
+ VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
+ // Get a less loopy dominator than Dom.first.
+ Dom.first = findShallowDominator(Dom.first, DefMBB);
+ SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
+ Dom.second =
+ defFromParent(0, ParentVNI, Last, *Dom.first,
+ LIS.getLastSplitPoint(Edit->getParent(), Dom.first))->def;
+ }
+
+ // Remove redundant back-copies that are now known to be dominated by another
+ // def with the same value.
+ SmallVector<VNInfo*, 8> BackCopies;
+ for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
+ VI != VE; ++VI) {
+ VNInfo *VNI = *VI;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ const DomPair &Dom = NearestDom[ParentVNI->id];
+ if (!Dom.first || Dom.second == VNI->def)
+ continue;
+ BackCopies.push_back(VNI);
+ forceRecompute(0, ParentVNI);
+ }
+ removeBackCopies(BackCopies);
+}
+
+
+/// transferValues - Transfer all possible values to the new live ranges.
+/// Values that were rematerialized are left alone, they need LRCalc.extend().
+bool SplitEditor::transferValues() {
+ bool Skipped = false;
+ RegAssignMap::const_iterator AssignI = RegAssign.begin();
+ for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
+ ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
+ DEBUG(dbgs() << " blit " << *ParentI << ':');
+ VNInfo *ParentVNI = ParentI->valno;
+ // RegAssign has holes where RegIdx 0 should be used.
+ SlotIndex Start = ParentI->start;
+ AssignI.advanceTo(Start);
+ do {
+ unsigned RegIdx;
+ SlotIndex End = ParentI->end;
+ if (!AssignI.valid()) {
+ RegIdx = 0;
+ } else if (AssignI.start() <= Start) {
+ RegIdx = AssignI.value();
+ if (AssignI.stop() < End) {
+ End = AssignI.stop();
+ ++AssignI;
+ }
+ } else {
+ RegIdx = 0;
+ End = std::min(End, AssignI.start());
+ }
+
+ // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
+ DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
+ LiveInterval *LI = Edit->get(RegIdx);
+
+ // Check for a simply defined value that can be blitted directly.
+ ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
+ if (VNInfo *VNI = VFP.getPointer()) {
+ DEBUG(dbgs() << ':' << VNI->id);
+ LI->addRange(LiveRange(Start, End, VNI));
+ Start = End;
+ continue;
+ }
+
+ // Skip values with forced recomputation.
+ if (VFP.getInt()) {
+ DEBUG(dbgs() << "(recalc)");
+ Skipped = true;
+ Start = End;
+ continue;
+ }
+
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
+
+ // This value has multiple defs in RegIdx, but it wasn't rematerialized,
+ // so the live range is accurate. Add live-in blocks in [Start;End) to the
+ // LiveInBlocks.
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex BlockStart, BlockEnd;
+ tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+
+ // The first block may be live-in, or it may have its own def.
+ if (Start != BlockStart) {
+ VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped value");
+ DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+ // MBB has its own def. Is it also live-out?
+ if (BlockEnd <= End)
+ LRC.setLiveOutValue(MBB, VNI);
+
+ // Skip to the next block for live-in.
+ ++MBB;
+ BlockStart = BlockEnd;
+ }
+
+ // Handle the live-in blocks covered by [Start;End).
+ assert(Start <= BlockStart && "Expected live-in block");
+ while (BlockStart < End) {
+ DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+ BlockEnd = LIS.getMBBEndIdx(MBB);
+ if (BlockStart == ParentVNI->def) {
+ // This block has the def of a parent PHI, so it isn't live-in.
+ assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
+ VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped parent PHI");
+ if (End >= BlockEnd)
+ LRC.setLiveOutValue(MBB, VNI); // Live-out as well.
+ } else {
+ // This block needs a live-in value. The last block covered may not
+ // be live-out.
+ if (End < BlockEnd)
+ LRC.addLiveInBlock(LI, MDT[MBB], End);
+ else {
+ // Live-through, and we don't know the value.
+ LRC.addLiveInBlock(LI, MDT[MBB]);
+ LRC.setLiveOutValue(MBB, 0);
+ }
+ }
+ BlockStart = BlockEnd;
+ ++MBB;
+ }
+ Start = End;
+ } while (Start != ParentI->end);
+ DEBUG(dbgs() << '\n');
+ }
+
+ LRCalc[0].calculateValues(LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ if (SpillMode)
+ LRCalc[1].calculateValues(LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+
+ return Skipped;
+}
+
+void SplitEditor::extendPHIKillRanges() {
+ // Extend live ranges to be live-out for successor PHI values.
+ for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+ E = Edit->getParent().vni_end(); I != E; ++I) {
+ const VNInfo *PHIVNI = *I;
+ if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+ LiveInterval *LI = Edit->get(RegIdx);
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex End = LIS.getMBBEndIdx(*PI);
+ SlotIndex LastUse = End.getPrevSlot();
+ // The predecessor may not have a live-out value. That is OK, like an
+ // undef PHI operand.
+ if (Edit->getParent().liveAt(LastUse)) {
+ assert(RegAssign.lookup(LastUse) == RegIdx &&
+ "Different register assignment in phi predecessor");
+ LRC.extend(LI, End,
+ LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator());
+ }
+ }
+ }
+}
+
+/// rewriteAssigned - Rewrite all uses of Edit->getReg().
+void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
+ RE = MRI.reg_end(); RI != RE;) {
MachineOperand &MO = RI.getOperand();
MachineInstr *MI = MO.getParent();
++RI;
+ // LiveDebugVariables should have handled all DBG_VALUE instructions.
if (MI->isDebugValue()) {
DEBUG(dbgs() << "Zapping " << *MI);
- // FIXME: We can do much better with debug values.
MO.setReg(0);
continue;
}
- SlotIndex Idx = lis_.getInstructionIndex(MI);
- Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
- LiveInterval *LI = dupli_;
- for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
- LiveInterval *testli = intervals_[i];
- if (testli->liveAt(Idx)) {
- LI = testli;
- break;
- }
+
+ // <undef> operands don't really read the register, so it doesn't matter
+ // which register we choose. When the use operand is tied to a def, we must
+ // use the same register as the def, so just do that always.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (MO.isDef() || MO.isUndef())
+ Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex();
+
+ // Rewrite to the mapped register at Idx.
+ unsigned RegIdx = RegAssign.lookup(Idx);
+ LiveInterval *LI = Edit->get(RegIdx);
+ MO.setReg(LI->reg);
+ DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ << Idx << ':' << RegIdx << '\t' << *MI);
+
+ // Extend liveness to Idx if the instruction reads reg.
+ if (!ExtendRanges || MO.isUndef())
+ continue;
+
+ // Skip instructions that don't read Reg.
+ if (MO.isDef()) {
+ if (!MO.getSubReg() && !MO.isEarlyClobber())
+ continue;
+ // We may wan't to extend a live range for a partial redef, or for a use
+ // tied to an early clobber.
+ Idx = Idx.getPrevSlot();
+ if (!Edit->getParent().liveAt(Idx))
+ continue;
+ } else
+ Idx = Idx.getUseIndex();
+
+ getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(),
+ &MDT, &LIS.getVNInfoAllocator());
+ }
+}
+
+void SplitEditor::deleteRematVictims() {
+ SmallVector<MachineInstr*, 8> Dead;
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
+ LiveInterval *LI = *I;
+ for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
+ LII != LIE; ++LII) {
+ // Dead defs end at the store slot.
+ if (LII->end != LII->valno->def.getNextSlot())
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
+ assert(MI && "Missing instruction for dead def");
+ MI->addRegisterDead(LI->reg, &TRI);
+
+ if (!MI->allDefsAreDead())
+ continue;
+
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ Dead.push_back(MI);
}
- if (LI)
- MO.setReg(LI->reg);
- DEBUG(dbgs() << "rewrite " << Idx << '\t' << *MI);
}
- // dupli_ goes in last, after rewriting.
- if (dupli_) {
- dupli_->RenumberValues();
- intervals_.push_back(dupli_);
+ if (Dead.empty())
+ return;
+
+ Edit->eliminateDeadDefs(Dead, LIS, VRM, TII);
+}
+
+void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
+ ++NumFinished;
+
+ // At this point, the live intervals in Edit contain VNInfos corresponding to
+ // the inserted copies.
+
+ // Add the original defs from the parent interval.
+ for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+ E = Edit->getParent().vni_end(); I != E; ++I) {
+ const VNInfo *ParentVNI = *I;
+ if (ParentVNI->isUnused())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
+ VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
+ VNI->setIsPHIDef(ParentVNI->isPHIDef());
+ VNI->setCopy(ParentVNI->getCopy());
+
+ // Force rematted values to be recomputed everywhere.
+ // The new live ranges may be truncated.
+ if (Edit->didRematerialize(ParentVNI))
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ forceRecompute(i, ParentVNI);
+ }
+
+ // Hoist back-copies to the complement interval when in spill mode.
+ switch (SpillMode) {
+ case SM_Partition:
+ // Leave all back-copies as is.
+ break;
+ case SM_Size:
+ hoistCopiesForSize();
+ break;
+ case SM_Speed:
+ llvm_unreachable("Spill mode 'speed' not implemented yet");
+ break;
+ }
+
+ // Transfer the simply mapped values, check if any are skipped.
+ bool Skipped = transferValues();
+ if (Skipped)
+ extendPHIKillRanges();
+ else
+ ++NumSimple;
+
+ // Rewrite virtual registers, possibly extending ranges.
+ rewriteAssigned(Skipped);
+
+ // Delete defs that were rematted everywhere.
+ if (Skipped)
+ deleteRematVictims();
+
+ // Get rid of unused values and set phi-kill flags.
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
+ (*I)->RenumberValues(LIS);
+
+ // Provide a reverse mapping from original indices to Edit ranges.
+ if (LRMap) {
+ LRMap->clear();
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ LRMap->push_back(i);
+ }
+
+ // Now check if any registers were separated into multiple components.
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
+ // Don't use iterators, they are invalidated by create() below.
+ LiveInterval *li = Edit->get(i);
+ unsigned NumComp = ConEQ.Classify(li);
+ if (NumComp <= 1)
+ continue;
+ DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n');
+ SmallVector<LiveInterval*, 8> dups;
+ dups.push_back(li);
+ for (unsigned j = 1; j != NumComp; ++j)
+ dups.push_back(&Edit->create(LIS, VRM));
+ ConEQ.Distribute(&dups[0], MRI);
+ // The new intervals all map back to i.
+ if (LRMap)
+ LRMap->resize(Edit->size(), i);
}
// Calculate spill weight and allocation hints for new intervals.
- VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_);
- for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
- LiveInterval &li = *intervals_[i];
- vrai.CalculateRegClass(li.reg);
- vrai.CalculateWeightAndHint(li);
- DEBUG(dbgs() << "new intv " << mri_.getRegClass(li.reg)->getName() << ":"
- << li << '\n');
- }
+ Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops);
+
+ assert(!LRMap || LRMap->size() == Edit->size());
}
//===----------------------------------------------------------------------===//
-// Loop Splitting
+// Single Block Splitting
//===----------------------------------------------------------------------===//
-bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
- SplitAnalysis::LoopBlocks Blocks;
- sa_.getLoopBlocks(Loop, Blocks);
+bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
+ bool SingleInstrs) const {
+ // Always split for multiple instructions.
+ if (!BI.isOneInstr())
+ return true;
+ // Don't split for single instructions unless explicitly requested.
+ if (!SingleInstrs)
+ return false;
+ // Splitting a live-through range always makes progress.
+ if (BI.LiveIn && BI.LiveOut)
+ return true;
+ // No point in isolating a copy. It has no register class constraints.
+ if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike())
+ return false;
+ // Finally, don't isolate an end point that was created by earlier splits.
+ return isOriginalEndpoint(BI.FirstInstr);
+}
- // Break critical edges as needed.
- SplitAnalysis::BlockPtrSet CriticalExits;
- sa_.getCriticalExits(Blocks, CriticalExits);
- assert(CriticalExits.empty() && "Cannot break critical exits yet");
-
- // Create new live interval for the loop.
+void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
openIntv();
-
- // Insert copies in the predecessors.
- for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(),
- E = Blocks.Preds.end(); I != E; ++I) {
- MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
- enterIntvAtEnd(MBB, *Loop->getHeader());
+ SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
+ SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr,
+ LastSplitPoint));
+ if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) {
+ useIntv(SegStart, leaveIntvAfter(BI.LastInstr));
+ } else {
+ // The last use is after the last valid split point.
+ SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
+ useIntv(SegStart, SegStop);
+ overlapIntv(SegStop, BI.LastInstr);
}
-
- // Switch all loop blocks.
- for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(),
- E = Blocks.Loop.end(); I != E; ++I)
- useIntv(**I);
-
- // Insert back copies in the exit blocks.
- for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(),
- E = Blocks.Exits.end(); I != E; ++I) {
- MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
- leaveIntvAtTop(MBB);
- }
-
- // Done.
- closeIntv();
- rewrite();
- return dupli_;
}
+
+//===----------------------------------------------------------------------===//
+// Global Live Range Splitting Support
+//===----------------------------------------------------------------------===//
+
+// These methods support a method of global live range splitting that uses a
+// global algorithm to decide intervals for CFG edges. They will insert split
+// points and color intervals in basic blocks while avoiding interference.
+//
+// Note that splitSingleBlock is also useful for blocks where both CFG edges
+// are on the stack.
+
+void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter){
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+
+ DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
+ << ") intf " << LeaveBefore << '-' << EnterAfter
+ << ", live-through " << IntvIn << " -> " << IntvOut);
+
+ assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+
+ assert((!LeaveBefore || LeaveBefore < Stop) && "Interference after block");
+ assert((!IntvIn || !LeaveBefore || LeaveBefore > Start) && "Impossible intf");
+ assert((!EnterAfter || EnterAfter >= Start) && "Interference before block");
+
+ MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+
+ if (!IntvOut) {
+ DEBUG(dbgs() << ", spill on entry.\n");
+ //
+ // <<<<<<<<< Possible LeaveBefore interference.
+ // |-----------| Live through.
+ // -____________ Spill on entry.
+ //
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAtTop(*MBB);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (!IntvIn) {
+ DEBUG(dbgs() << ", reload on exit.\n");
+ //
+ // >>>>>>> Possible EnterAfter interference.
+ // |-----------| Live through.
+ // ___________-- Reload on exit.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAtEnd(*MBB);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
+ DEBUG(dbgs() << ", straight through.\n");
+ //
+ // |-----------| Live through.
+ // ------------- Straight through, same intv, no interference.
+ //
+ selectIntv(IntvOut);
+ useIntv(Start, Stop);
+ return;
+ }
+
+ // We cannot legally insert splits after LSP.
+ SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+ assert((!IntvOut || !EnterAfter || EnterAfter < LSP) && "Impossible intf");
+
+ if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
+ LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
+ DEBUG(dbgs() << ", switch avoiding interference.\n");
+ //
+ // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ------======= Switch intervals between interference.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx;
+ if (LeaveBefore && LeaveBefore < LSP) {
+ Idx = enterIntvBefore(LeaveBefore);
+ useIntv(Idx, Stop);
+ } else {
+ Idx = enterIntvAtEnd(*MBB);
+ }
+ selectIntv(IntvIn);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ DEBUG(dbgs() << ", create local intv for interference.\n");
+ //
+ // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ==---------== Switch intervals before/after interference.
+ //
+ assert(LeaveBefore <= EnterAfter && "Missed case");
+
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ selectIntv(IntvIn);
+ Idx = leaveIntvBefore(LeaveBefore);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+}
+
+
+void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
+ << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
+ << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+
+ assert(IntvIn && "Must have register in");
+ assert(BI.LiveIn && "Must be live-in");
+ assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
+
+ if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) {
+ DEBUG(dbgs() << " before interference.\n");
+ //
+ // <<< Interference after kill.
+ // |---o---x | Killed in block.
+ // ========= Use IntvIn everywhere.
+ //
+ selectIntv(IntvIn);
+ useIntv(Start, BI.LastInstr);
+ return;
+ }
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) {
+ //
+ // <<< Possible interference after last use.
+ // |---o---o---| Live-out on stack.
+ // =========____ Leave IntvIn after last use.
+ //
+ // < Interference after last use.
+ // |---o---o--o| Live-out on stack, late last use.
+ // ============ Copy to stack after LSP, overlap IntvIn.
+ // \_____ Stack interval is live-out.
+ //
+ if (BI.LastInstr < LSP) {
+ DEBUG(dbgs() << ", spill after last use before interference.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAfter(BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ } else {
+ DEBUG(dbgs() << ", spill before last split point.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvBefore(LSP);
+ overlapIntv(Idx, BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ }
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvIn. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ unsigned LocalIntv = openIntv();
+ (void)LocalIntv;
+ DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+
+ if (!BI.LiveOut || BI.LastInstr < LSP) {
+ //
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o---| Live-out on stack.
+ // =====----____ Leave IntvIn before interference, then spill.
+ //
+ SlotIndex To = leaveIntvAfter(BI.LastInstr);
+ SlotIndex From = enterIntvBefore(LeaveBefore);
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+ return;
+ }
+
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o--o| Live-out on stack, late last use.
+ // =====------- Copy to stack before LSP, overlap LocalIntv.
+ // \_____ Stack interval is live-out.
+ //
+ SlotIndex To = leaveIntvBefore(LSP);
+ overlapIntv(To, BI.LastInstr);
+ SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
+ << ", reg-out " << IntvOut << ", enter after " << EnterAfter
+ << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ assert(IntvOut && "Must have register out");
+ assert(BI.LiveOut && "Must be live-out");
+ assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
+
+ if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) {
+ DEBUG(dbgs() << " after interference.\n");
+ //
+ // >>>> Interference before def.
+ // | o---o---| Defined in block.
+ // ========= Use IntvOut everywhere.
+ //
+ selectIntv(IntvOut);
+ useIntv(BI.FirstInstr, Stop);
+ return;
+ }
+
+ if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) {
+ DEBUG(dbgs() << ", reload after interference.\n");
+ //
+ // >>>> Interference before def.
+ // |---o---o---| Live-through, stack-in.
+ // ____========= Enter IntvOut before first use.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstInstr));
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvOut. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ DEBUG(dbgs() << ", interference overlaps uses.\n");
+ //
+ // >>>>>>> Interference overlapping uses.
+ // |---o---o---| Live-through, stack-in.
+ // ____---====== Create local interval for interference range.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ openIntv();
+ SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr));
+ useIntv(From, Idx);
+}
diff --git a/src/LLVM/lib/CodeGen/SplitKit.h b/src/LLVM/lib/CodeGen/SplitKit.h
index d125a45..d8fc212 100644
--- a/src/LLVM/lib/CodeGen/SplitKit.h
+++ b/src/LLVM/lib/CodeGen/SplitKit.h
@@ -1,4 +1,4 @@
-//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,114 +12,183 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallPtrSet.h"
+#ifndef LLVM_CODEGEN_SPLITKIT_H
+#define LLVM_CODEGEN_SPLITKIT_H
+
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
namespace llvm {
+class ConnectedVNInfoEqClasses;
class LiveInterval;
class LiveIntervals;
+class LiveRangeEdit;
class MachineInstr;
-class MachineLoop;
class MachineLoopInfo;
class MachineRegisterInfo;
class TargetInstrInfo;
+class TargetRegisterInfo;
class VirtRegMap;
class VNInfo;
+class raw_ostream;
/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
/// opportunities.
class SplitAnalysis {
public:
- const MachineFunction &mf_;
- const LiveIntervals &lis_;
- const MachineLoopInfo &loops_;
- const TargetInstrInfo &tii_;
+ const MachineFunction &MF;
+ const VirtRegMap &VRM;
+ const LiveIntervals &LIS;
+ const MachineLoopInfo &Loops;
+ const TargetInstrInfo &TII;
+
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
+
+ /// Additional information about basic blocks where the current variable is
+ /// live. Such a block will look like one of these templates:
+ ///
+ /// 1. | o---x | Internal to block. Variable is only live in this block.
+ /// 2. |---x | Live-in, kill.
+ /// 3. | o---| Def, live-out.
+ /// 4. |---x o---| Live-in, kill, def, live-out. Counted by NumGapBlocks.
+ /// 5. |---o---o---| Live-through with uses or defs.
+ /// 6. |-----------| Live-through without uses. Counted by NumThroughBlocks.
+ ///
+ /// Two BlockInfo entries are created for template 4. One for the live-in
+ /// segment, and one for the live-out segment. These entries look as if the
+ /// block were split in the middle where the live range isn't live.
+ ///
+ /// Live-through blocks without any uses don't get BlockInfo entries. They
+ /// are simply listed in ThroughBlocks instead.
+ ///
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ SlotIndex FirstInstr; ///< First instr accessing current reg.
+ SlotIndex LastInstr; ///< Last instr accessing current reg.
+ SlotIndex FirstDef; ///< First non-phi valno->def, or SlotIndex().
+ bool LiveIn; ///< Current reg is live in.
+ bool LiveOut; ///< Current reg is live out.
+
+ /// isOneInstr - Returns true when this BlockInfo describes a single
+ /// instruction.
+ bool isOneInstr() const {
+ return SlotIndex::isSameInstr(FirstInstr, LastInstr);
+ }
+ };
private:
// Current live interval.
- const LiveInterval *curli_;
+ const LiveInterval *CurLI;
- // Instructions using the the current register.
- typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet;
- InstrPtrSet usingInstrs_;
+ /// LastSplitPoint - Last legal split point in each basic block in the current
+ /// function. The first entry is the first terminator, the second entry is the
+ /// last valid split point for a variable that is live in to a landing pad
+ /// successor.
+ SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint;
- // The number of instructions using curli in each basic block.
- typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap;
- BlockCountMap usingBlocks_;
+ /// UseBlocks - Blocks where CurLI has uses.
+ SmallVector<BlockInfo, 8> UseBlocks;
- // Loops where the curent interval is used.
- typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet;
- LoopPtrSet usingLoops_;
+ /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
+ /// the live range has a gap.
+ unsigned NumGapBlocks;
- // Sumarize statistics by counting instructions using curli_.
+ /// ThroughBlocks - Block numbers where CurLI is live through without uses.
+ BitVector ThroughBlocks;
+
+ /// NumThroughBlocks - Number of live-through blocks.
+ unsigned NumThroughBlocks;
+
+ /// DidRepairRange - analyze was forced to shrinkToUses().
+ bool DidRepairRange;
+
+ SlotIndex computeLastSplitPoint(unsigned Num);
+
+ // Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
- /// canAnalyzeBranch - Return true if MBB ends in a branch that can be
- /// analyzed.
- bool canAnalyzeBranch(const MachineBasicBlock *MBB);
+ /// calcLiveBlockInfo - Compute per-block information about CurLI.
+ bool calcLiveBlockInfo();
public:
- SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis,
+ SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
const MachineLoopInfo &mli);
- /// analyze - set curli to the specified interval, and analyze how it may be
+ /// analyze - set CurLI to the specified interval, and analyze how it may be
/// split.
void analyze(const LiveInterval *li);
- const LiveInterval *getCurLI() { return curli_; }
+ /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
+ /// by analyze(). This really shouldn't happen, but sometimes the coalescer
+ /// can create live ranges that end in mid-air.
+ bool didRepairRange() const { return DidRepairRange; }
/// clear - clear all data structures so SplitAnalysis is ready to analyze a
/// new interval.
void clear();
+ /// getParent - Return the last analyzed interval.
+ const LiveInterval &getParent() const { return *CurLI; }
+
+ /// getLastSplitPoint - Return that base index of the last valid split point
+ /// in the basic block numbered Num.
+ SlotIndex getLastSplitPoint(unsigned Num) {
+ // Inline the common simple case.
+ if (LastSplitPoint[Num].first.isValid() &&
+ !LastSplitPoint[Num].second.isValid())
+ return LastSplitPoint[Num].first;
+ return computeLastSplitPoint(Num);
+ }
+
+ /// isOriginalEndpoint - Return true if the original live range was killed or
+ /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
+ /// and 'use' for an early-clobber def.
+ /// This can be used to recognize code inserted by earlier live range
+ /// splitting.
+ bool isOriginalEndpoint(SlotIndex Idx) const;
+
+ /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
+ /// where CurLI has uses.
+ ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
+
+ /// getNumThroughBlocks - Return the number of through blocks.
+ unsigned getNumThroughBlocks() const { return NumThroughBlocks; }
+
+ /// isThroughBlock - Return true if CurLI is live through MBB without uses.
+ bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); }
+
+ /// getThroughBlocks - Return the set of through blocks.
+ const BitVector &getThroughBlocks() const { return ThroughBlocks; }
+
+ /// getNumLiveBlocks - Return the number of blocks where CurLI is live.
+ unsigned getNumLiveBlocks() const {
+ return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks();
+ }
+
+ /// countLiveBlocks - Return the number of blocks where li is live. This is
+ /// guaranteed to return the same number as getNumLiveBlocks() after calling
+ /// analyze(li).
+ unsigned countLiveBlocks(const LiveInterval *li) const;
+
typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
- // Sets of basic blocks surrounding a machine loop.
- struct LoopBlocks {
- BlockPtrSet Loop; // Blocks in the loop.
- BlockPtrSet Preds; // Loop predecessor blocks.
- BlockPtrSet Exits; // Loop exit blocks.
-
- void clear() {
- Loop.clear();
- Preds.clear();
- Exits.clear();
- }
- };
-
- // Calculate the block sets surrounding the loop.
- void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks);
-
- /// LoopPeripheralUse - how is a variable used in and around a loop?
- /// Peripheral blocks are the loop predecessors and exit blocks.
- enum LoopPeripheralUse {
- ContainedInLoop, // All uses are inside the loop.
- SinglePeripheral, // At most one instruction per peripheral block.
- MultiPeripheral, // Multiple instructions in some peripheral blocks.
- OutsideLoop // Uses outside loop periphery.
- };
-
- /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
- /// and around the Loop.
- LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&);
-
- /// getCriticalExits - It may be necessary to partially break critical edges
- /// leaving the loop if an exit block has phi uses of curli. Collect the exit
- /// blocks that need special treatment into CriticalExits.
- void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits);
-
- /// canSplitCriticalExits - Return true if it is possible to insert new exit
- /// blocks before the blocks in CriticalExits.
- bool canSplitCriticalExits(const LoopBlocks &Blocks,
- BlockPtrSet &CriticalExits);
-
- /// getBestSplitLoop - Return the loop where curli may best be split to a
- /// separate register, or NULL.
- const MachineLoop *getBestSplitLoop();
+ /// shouldSplitSingleBlock - Returns true if it would help to create a local
+ /// live range for the instructions in BI. There is normally no benefit to
+ /// creating a live range for a single instruction, but it does enable
+ /// register class inflation if the instruction has a restricted register
+ /// class.
+ ///
+ /// @param BI The block to be isolated.
+ /// @param SingleInstrs True when single instructions should be isolated.
+ bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const;
};
+
/// SplitEditor - Edit machine code and LiveIntervals for live range
/// splitting.
///
@@ -129,100 +198,265 @@
/// - Mark the ranges where the new interval is used with useIntv*
/// - Mark the places where the interval is exited with exitIntv*.
/// - Finish the current interval with closeIntv and repeat from 2.
-/// - Rewrite instructions with rewrite().
+/// - Rewrite instructions with finish().
///
class SplitEditor {
- SplitAnalysis &sa_;
- LiveIntervals &lis_;
- VirtRegMap &vrm_;
- MachineRegisterInfo &mri_;
- const TargetInstrInfo &tii_;
+ SplitAnalysis &SA;
+ LiveIntervals &LIS;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ MachineDominatorTree &MDT;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
- /// curli_ - The immutable interval we are currently splitting.
- const LiveInterval *const curli_;
+public:
- /// dupli_ - Created as a copy of curli_, ranges are carved out as new
- /// intervals get added through openIntv / closeIntv. This is used to avoid
- /// editing curli_.
- LiveInterval *dupli_;
+ /// ComplementSpillMode - Select how the complement live range should be
+ /// created. SplitEditor automatically creates interval 0 to contain
+ /// anything that isn't added to another interval. This complement interval
+ /// can get quite complicated, and it can sometimes be an advantage to allow
+ /// it to overlap the other intervals. If it is going to spill anyway, no
+ /// registers are wasted by keeping a value in two places at the same time.
+ enum ComplementSpillMode {
+ /// SM_Partition(Default) - Try to create the complement interval so it
+ /// doesn't overlap any other intervals, and the original interval is
+ /// partitioned. This may require a large number of back copies and extra
+ /// PHI-defs. Only segments marked with overlapIntv will be overlapping.
+ SM_Partition,
- /// Currently open LiveInterval.
- LiveInterval *openli_;
+ /// SM_Size - Overlap intervals to minimize the number of inserted COPY
+ /// instructions. Copies to the complement interval are hoisted to their
+ /// common dominator, so only one COPY is required per value in the
+ /// complement interval. This also means that no extra PHI-defs need to be
+ /// inserted in the complement interval.
+ SM_Size,
- /// createInterval - Create a new virtual register and LiveInterval with same
- /// register class and spill slot as curli.
- LiveInterval *createInterval();
+ /// SM_Speed - Overlap intervals to minimize the expected execution
+ /// frequency of the inserted copies. This is very similar to SM_Size, but
+ /// the complement interval may get some extra PHI-defs.
+ SM_Speed
+ };
- /// getDupLI - Ensure dupli is created and return it.
- LiveInterval *getDupLI();
+private:
- /// valueMap_ - Map values in dupli to values in openIntv. These are direct 1-1
- /// mappings, and do not include values created by inserted copies.
- DenseMap<const VNInfo*, VNInfo*> valueMap_;
+ /// Edit - The current parent register and new intervals created.
+ LiveRangeEdit *Edit;
- /// mapValue - Return the openIntv value that corresponds to the given curli
- /// value.
- VNInfo *mapValue(const VNInfo *curliVNI);
+ /// Index into Edit of the currently open interval.
+ /// The index 0 is used for the complement, so the first interval started by
+ /// openIntv will be 1.
+ unsigned OpenIdx;
- /// A dupli value is live through openIntv.
- bool liveThrough_;
+ /// The current spill mode, selected by reset().
+ ComplementSpillMode SpillMode;
- /// All the new intervals created for this split are added to intervals_.
- std::vector<LiveInterval*> &intervals_;
+ typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
- /// The index into intervals_ of the first interval we added. There may be
- /// others from before we got it.
- unsigned firstInterval;
+ /// Allocator for the interval map. This will eventually be shared with
+ /// SlotIndexes and LiveIntervals.
+ RegAssignMap::Allocator Allocator;
- /// Insert a COPY instruction curli -> li. Allocate a new value from li
- /// defined by the COPY
- VNInfo *insertCopy(LiveInterval &LI,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I);
+ /// RegAssign - Map of the assigned register indexes.
+ /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+ /// Idx.
+ RegAssignMap RegAssign;
+
+ typedef PointerIntPair<VNInfo*, 1> ValueForcePair;
+ typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap;
+
+ /// Values - keep track of the mapping from parent values to values in the new
+ /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
+ ///
+ /// 1. No entry - the value is not mapped to Edit.get(RegIdx).
+ /// 2. (Null, false) - the value is mapped to multiple values in
+ /// Edit.get(RegIdx). Each value is represented by a minimal live range at
+ /// its def. The full live range can be inferred exactly from the range
+ /// of RegIdx in RegAssign.
+ /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and
+ /// the live range must be recomputed using LiveRangeCalc::extend().
+ /// 4. (VNI, false) The value is mapped to a single new value.
+ /// The new value has no live ranges anywhere.
+ ValueMap Values;
+
+ /// LRCalc - Cache for computing live ranges and SSA update. Each instance
+ /// can only handle non-overlapping live ranges, so use a separate
+ /// LiveRangeCalc instance for the complement interval when in spill mode.
+ LiveRangeCalc LRCalc[2];
+
+ /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the
+ /// complement interval can overlap the other intervals, so it gets its own
+ /// LRCalc instance. When not in spill mode, all intervals can share one.
+ LiveRangeCalc &getLRCalc(unsigned RegIdx) {
+ return LRCalc[SpillMode != SM_Partition && RegIdx != 0];
+ }
+
+ /// defValue - define a value in RegIdx from ParentVNI at Idx.
+ /// Idx does not have to be ParentVNI->def, but it must be contained within
+ /// ParentVNI's live range in ParentLI. The new value is added to the value
+ /// map.
+ /// Return the new LI value.
+ VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+
+ /// forceRecompute - Force the live range of ParentVNI in RegIdx to be
+ /// recomputed by LiveRangeCalc::extend regardless of the number of defs.
+ /// This is used for values whose live range doesn't match RegAssign exactly.
+ /// They could have rematerialized, or back-copies may have been moved.
+ void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI);
+
+ /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+ /// rematerialization or a COPY from parent. Return the new value.
+ VNInfo *defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+ /// removeBackCopies - Remove the copy instructions that defines the values
+ /// in the vector in the complement interval.
+ void removeBackCopies(SmallVectorImpl<VNInfo*> &Copies);
+
+ /// getShallowDominator - Returns the least busy dominator of MBB that is
+ /// also dominated by DefMBB. Busy is measured by loop depth.
+ MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB);
+
+ /// hoistCopiesForSize - Hoist back-copies to the complement interval in a
+ /// way that minimizes code size. This implements the SM_Size spill mode.
+ void hoistCopiesForSize();
+
+ /// transferValues - Transfer values to the new ranges.
+ /// Return true if any ranges were skipped.
+ bool transferValues();
+
+ /// extendPHIKillRanges - Extend the ranges of all values killed by original
+ /// parent PHIDefs.
+ void extendPHIKillRanges();
+
+ /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+ void rewriteAssigned(bool ExtendRanges);
+
+ /// deleteRematVictims - Delete defs that are dead after rematerializing.
+ void deleteRematVictims();
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
- std::vector<LiveInterval*> &newIntervals);
+ MachineDominatorTree&);
- /// getAnalysis - Get the corresponding analysis.
- SplitAnalysis &getAnalysis() { return sa_; }
+ /// reset - Prepare for a new split.
+ void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
/// Create a new virtual register and live interval.
- void openIntv();
+ /// Return the interval index, starting from 1. Interval index 0 is the
+ /// implicit complement interval.
+ unsigned openIntv();
- /// enterIntvAtEnd - Enter openli at the end of MBB.
- /// PhiMBB is a successor inside openli where a PHI value is created.
- /// Currently, all entries must share the same PhiMBB.
- void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB);
+ /// currentIntv - Return the current interval index.
+ unsigned currentIntv() const { return OpenIdx; }
- /// useIntv - indicate that all instructions in MBB should use openli.
+ /// selectIntv - Select a previously opened interval index.
+ void selectIntv(unsigned Idx);
+
+ /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+ /// If the parent interval is not live before Idx, a COPY is not inserted.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvBefore(SlotIndex Idx);
+
+ /// enterIntvAfter - Enter the open interval after the instruction at Idx.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAfter(SlotIndex Idx);
+
+ /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+ /// Use the open interval from he inserted copy to the MBB end.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in MBB should use OpenLI.
void useIntv(const MachineBasicBlock &MBB);
- /// useIntv - indicate that all instructions in range should use openli.
+ /// useIntv - indicate that all instructions in range should use OpenLI.
void useIntv(SlotIndex Start, SlotIndex End);
+ /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvBefore(SlotIndex Idx);
+
/// leaveIntvAtTop - Leave the interval at the top of MBB.
- /// Currently, only one value can leave the interval.
- void leaveIntvAtTop(MachineBasicBlock &MBB);
+ /// Add liveness from the MBB top to the copy.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
- /// closeIntv - Indicate that we are done editing the currently open
- /// LiveInterval, and ranges can be trimmed.
- void closeIntv();
+ /// overlapIntv - Indicate that all instructions in range should use the open
+ /// interval, but also let the complement interval be live.
+ ///
+ /// This doubles the register pressure, but is sometimes required to deal with
+ /// register uses after the last valid split point.
+ ///
+ /// The Start index should be a return value from a leaveIntv* call, and End
+ /// should be in the same basic block. The parent interval must have the same
+ /// value across the range.
+ ///
+ void overlapIntv(SlotIndex Start, SlotIndex End);
- /// rewrite - after all the new live ranges have been created, rewrite
- /// instructions using curli to use the new intervals.
- void rewrite();
+ /// finish - after all the new live ranges have been created, compute the
+ /// remaining live range, and rewrite instructions to use the new registers.
+ /// @param LRMap When not null, this vector will map each live range in Edit
+ /// back to the indices returned by openIntv.
+ /// There may be extra indices created by dead code elimination.
+ void finish(SmallVectorImpl<unsigned> *LRMap = 0);
+
+ /// dump - print the current interval maping to dbgs().
+ void dump() const;
// ===--- High level methods ---===
- /// splitAroundLoop - Split curli into a separate live interval inside
- /// the loop. Return true if curli has been completely replaced, false if
- /// curli is still intact, and needs to be spilled or split further.
- bool splitAroundLoop(const MachineLoop*);
+ /// splitSingleBlock - Split CurLI into a separate live interval around the
+ /// uses in a single block. This is intended to be used as part of a larger
+ /// split, and doesn't call finish().
+ void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
+ /// splitLiveThroughBlock - Split CurLI in the given block such that it
+ /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
+ /// the block, but they will be ignored when placing split points.
+ ///
+ /// @param MBBNum Block number.
+ /// @param IntvIn Interval index entering the block.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter);
+
+ /// splitRegInBlock - Split CurLI in the given block such that it enters the
+ /// block in IntvIn and leaves it on the stack (or not at all). Split points
+ /// are placed in a way that avoids putting uses in the stack interval. This
+ /// may require creating a local interval when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvIn Interval index entering the block. Not 0.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ void splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore);
+
+ /// splitRegOutBlock - Split CurLI in the given block such that it enters the
+ /// block on the stack (or isn't live-in at all) and leaves it in IntvOut.
+ /// Split points are placed to avoid interference and such that the uses are
+ /// not in the stack interval. This may require creating a local interval
+ /// when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter);
};
-
}
+
+#endif
diff --git a/src/LLVM/lib/CodeGen/Splitter.cpp b/src/LLVM/lib/CodeGen/Splitter.cpp
new file mode 100644
index 0000000..77973b7
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/Splitter.cpp
@@ -0,0 +1,827 @@
+//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopsplitter"
+
+#include "Splitter.h"
+
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char LoopSplitter::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false)
+
+namespace llvm {
+
+ class StartSlotComparator {
+ public:
+ StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
+ bool operator()(const MachineBasicBlock *mbb1,
+ const MachineBasicBlock *mbb2) const {
+ return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
+ }
+ private:
+ LiveIntervals &lis;
+ };
+
+ class LoopSplit {
+ public:
+ LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
+ : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
+ assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+ "Cannot split physical registers.");
+ }
+
+ LiveInterval& getLI() const { return li; }
+
+ MachineLoop& getLoop() const { return loop; }
+
+ bool isValid() const { return valid; }
+
+ bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
+
+ void invalidate() { valid = false; }
+
+ void splitIncoming() { inSplit = true; }
+
+ void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
+
+ void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
+
+ void apply() {
+ assert(valid && "Attempt to apply invalid split.");
+ applyIncoming();
+ applyOutgoing();
+ copyRanges();
+ renameInside();
+ }
+
+ private:
+ LoopSplitter &ls;
+ LiveInterval &li;
+ MachineLoop &loop;
+ bool valid, inSplit;
+ std::set<MachineLoop::Edge> outSplits;
+ std::vector<MachineInstr*> loopInstrs;
+
+ LiveInterval *newLI;
+ std::map<VNInfo*, VNInfo*> vniMap;
+
+ LiveInterval* getNewLI() {
+ if (newLI == 0) {
+ const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
+ unsigned vreg = ls.mri->createVirtualRegister(trc);
+ newLI = &ls.lis->getOrCreateInterval(vreg);
+ }
+ return newLI;
+ }
+
+ VNInfo* getNewVNI(VNInfo *oldVNI) {
+ VNInfo *newVNI = vniMap[oldVNI];
+
+ if (newVNI == 0) {
+ newVNI = getNewLI()->createValueCopy(oldVNI,
+ ls.lis->getVNInfoAllocator());
+ vniMap[oldVNI] = newVNI;
+ }
+
+ return newVNI;
+ }
+
+ void applyIncoming() {
+ if (!inSplit) {
+ return;
+ }
+
+ MachineBasicBlock *preHeader = loop.getLoopPreheader();
+ if (preHeader == 0) {
+ assert(ls.canInsertPreHeader(loop) &&
+ "Can't insert required preheader.");
+ preHeader = &ls.insertPreHeader(loop);
+ }
+
+ LiveRange *preHeaderRange =
+ ls.lis->findExitingRange(li, preHeader);
+ assert(preHeaderRange != 0 && "Range not live into preheader.");
+
+ // Insert the new copy.
+ MachineInstr *copy = BuildMI(*preHeader,
+ preHeader->getFirstTerminator(),
+ DebugLoc(),
+ ls.tii->get(TargetOpcode::COPY))
+ .addReg(getNewLI()->reg, RegState::Define)
+ .addReg(li.reg, RegState::Kill);
+
+ ls.lis->InsertMachineInstrInMaps(copy);
+
+ SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+ VNInfo *newVal = getNewVNI(preHeaderRange->valno);
+ newVal->def = copyDefIdx;
+ newVal->setCopy(copy);
+ li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
+
+ getNewLI()->addRange(LiveRange(copyDefIdx,
+ ls.lis->getMBBEndIdx(preHeader),
+ newVal));
+ }
+
+ void applyOutgoing() {
+
+ for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
+ osEnd = outSplits.end();
+ osItr != osEnd; ++osItr) {
+ MachineLoop::Edge edge = *osItr;
+ MachineBasicBlock *outBlock = edge.second;
+ if (ls.isCriticalEdge(edge)) {
+ assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
+ outBlock = &ls.splitEdge(edge, loop);
+ }
+ LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
+ assert(outRange != 0 && "No exiting range?");
+
+ MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
+ DebugLoc(),
+ ls.tii->get(TargetOpcode::COPY))
+ .addReg(li.reg, RegState::Define)
+ .addReg(getNewLI()->reg, RegState::Kill);
+
+ ls.lis->InsertMachineInstrInMaps(copy);
+
+ SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+ // Blow away output range definition.
+ outRange->valno->def = ls.lis->getInvalidIndex();
+ li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
+
+ SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
+ assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal =
+ getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
+
+ getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
+ copyDefIdx, newVal));
+
+ }
+ }
+
+ void copyRange(LiveRange &lr) {
+ std::pair<bool, LoopSplitter::SlotPair> lsr =
+ ls.getLoopSubRange(lr, loop);
+
+ if (!lsr.first)
+ return;
+
+ LiveRange loopRange(lsr.second.first, lsr.second.second,
+ getNewVNI(lr.valno));
+
+ li.removeRange(loopRange.start, loopRange.end, true);
+
+ getNewLI()->addRange(loopRange);
+ }
+
+ void copyRanges() {
+ for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+ iEnd = loopInstrs.end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr &instr = **iItr;
+ SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
+ if (instr.modifiesRegister(li.reg, 0)) {
+ LiveRange *defRange =
+ li.getLiveRangeContaining(instrIdx.getDefIndex());
+ if (defRange != 0) // May have caught this already.
+ copyRange(*defRange);
+ }
+ if (instr.readsRegister(li.reg, 0)) {
+ LiveRange *useRange =
+ li.getLiveRangeContaining(instrIdx.getUseIndex());
+ if (useRange != 0) { // May have caught this already.
+ copyRange(*useRange);
+ }
+ }
+ }
+
+ for (MachineLoop::block_iterator bbItr = loop.block_begin(),
+ bbEnd = loop.block_end();
+ bbItr != bbEnd; ++bbItr) {
+ MachineBasicBlock &loopBlock = **bbItr;
+ LiveRange *enteringRange =
+ ls.lis->findEnteringRange(li, &loopBlock);
+ if (enteringRange != 0) {
+ copyRange(*enteringRange);
+ }
+ }
+ }
+
+ void renameInside() {
+ for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+ iEnd = loopInstrs.end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr &instr = **iItr;
+ for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
+ MachineOperand &mop = instr.getOperand(i);
+ if (mop.isReg() && mop.getReg() == li.reg) {
+ mop.setReg(getNewLI()->reg);
+ }
+ }
+ }
+ }
+
+ };
+
+ void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addPreservedID(RegisterCoalescerPassID);
+ au.addPreserved<CalculateSpillWeights>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
+
+ mf = &fn;
+ mri = &mf->getRegInfo();
+ tii = mf->getTarget().getInstrInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ sis = &getAnalysis<SlotIndexes>();
+ lis = &getAnalysis<LiveIntervals>();
+ mli = &getAnalysis<MachineLoopInfo>();
+ mdt = &getAnalysis<MachineDominatorTree>();
+
+ fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+ mf->getFunction()->getName().str();
+
+ dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
+
+ dumpOddTerminators();
+
+// dbgs() << "----------------------------------------\n";
+// lis->dump();
+// dbgs() << "----------------------------------------\n";
+
+// std::deque<MachineLoop*> loops;
+// std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+// dbgs() << "Loops:\n";
+// while (!loops.empty()) {
+// MachineLoop &loop = *loops.front();
+// loops.pop_front();
+// std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+
+// dumpLoopInfo(loop);
+// }
+
+ //lis->dump();
+ //exit(0);
+
+ // Setup initial intervals.
+ for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
+ !lis->intervalIsInOneMBB(*li)) {
+ intervals.push_back(li);
+ }
+ }
+
+ processIntervals();
+
+ intervals.clear();
+
+// dbgs() << "----------------------------------------\n";
+// lis->dump();
+// dbgs() << "----------------------------------------\n";
+
+ dumpOddTerminators();
+
+ //exit(1);
+
+ return false;
+ }
+
+ void LoopSplitter::releaseMemory() {
+ fqn.clear();
+ intervals.clear();
+ loopRangeMap.clear();
+ }
+
+ void LoopSplitter::dumpOddTerminators() {
+ for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
+ bbItr != bbEnd; ++bbItr) {
+ MachineBasicBlock *mbb = &*bbItr;
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+ if (tii->AnalyzeBranch(*mbb, a, b, c)) {
+ dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
+ dbgs() << " Terminators:\n";
+ for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr *instr= &*iItr;
+ dbgs() << " " << *instr << "";
+ }
+ dbgs() << "\n Listed successors: [ ";
+ for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
+ sItr != sEnd; ++sItr) {
+ MachineBasicBlock *succMBB = *sItr;
+ dbgs() << succMBB->getNumber() << " ";
+ }
+ dbgs() << "]\n\n";
+ }
+ }
+ }
+
+ void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
+ MachineBasicBlock &headerBlock = *loop.getHeader();
+ typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+ ExitEdgesList exitEdges;
+ loop.getExitEdges(exitEdges);
+
+ dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
+ for (std::vector<MachineBasicBlock*>::const_iterator
+ subBlockItr = loop.getBlocks().begin(),
+ subBlockEnd = loop.getBlocks().end();
+ subBlockItr != subBlockEnd; ++subBlockItr) {
+ MachineBasicBlock &subBlock = **subBlockItr;
+ dbgs() << "BB#" << subBlock.getNumber() << " ";
+ }
+ dbgs() << "], Exit edges: [ ";
+ for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+ exitEdgeEnd = exitEdges.end();
+ exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+ MachineLoop::Edge &exitEdge = *exitEdgeItr;
+ dbgs() << "(MBB#" << exitEdge.first->getNumber()
+ << ", MBB#" << exitEdge.second->getNumber() << ") ";
+ }
+ dbgs() << "], Sub-Loop Headers: [ ";
+ for (MachineLoop::iterator subLoopItr = loop.begin(),
+ subLoopEnd = loop.end();
+ subLoopItr != subLoopEnd; ++subLoopItr) {
+ MachineLoop &subLoop = **subLoopItr;
+ MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
+ dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
+ }
+ dbgs() << "]\n";
+ }
+
+ void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
+ mbb.updateTerminator();
+
+ for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
+ miItr != miEnd; ++miItr) {
+ if (lis->isNotInMIMap(miItr)) {
+ lis->InsertMachineInstrInMaps(miItr);
+ }
+ }
+ }
+
+ bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
+ MachineBasicBlock *header = loop.getHeader();
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+
+ for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
+ pbEnd = header->pred_end();
+ pbItr != pbEnd; ++pbItr) {
+ MachineBasicBlock *predBlock = *pbItr;
+ if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
+ return false;
+ }
+ }
+
+ MachineFunction::iterator headerItr(header);
+ if (headerItr == mf->begin())
+ return true;
+ MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
+ assert(headerLayoutPred != 0 && "Header should have layout pred.");
+
+ return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
+ }
+
+ MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
+ assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
+
+ MachineBasicBlock &header = *loop.getHeader();
+
+ // Save the preds - we'll need to update them once we insert the preheader.
+ typedef std::set<MachineBasicBlock*> HeaderPreds;
+ HeaderPreds headerPreds;
+
+ for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+ predEnd = header.pred_end();
+ predItr != predEnd; ++predItr) {
+ if (!loop.contains(*predItr))
+ headerPreds.insert(*predItr);
+ }
+
+ assert(!headerPreds.empty() && "No predecessors for header?");
+
+ //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
+
+ MachineBasicBlock *preHeader =
+ mf->CreateMachineBasicBlock(header.getBasicBlock());
+
+ assert(preHeader != 0 && "Failed to create pre-header.");
+
+ mf->insert(header, preHeader);
+
+ for (HeaderPreds::iterator hpItr = headerPreds.begin(),
+ hpEnd = headerPreds.end();
+ hpItr != hpEnd; ++hpItr) {
+ assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
+ MachineBasicBlock &hp = **hpItr;
+ hp.ReplaceUsesOfBlockWith(&header, preHeader);
+ }
+ preHeader->addSuccessor(&header);
+
+ MachineBasicBlock *oldLayoutPred =
+ llvm::prior(MachineFunction::iterator(preHeader));
+ if (oldLayoutPred != 0) {
+ updateTerminators(*oldLayoutPred);
+ }
+
+ lis->InsertMBBInMaps(preHeader);
+
+ if (MachineLoop *parentLoop = loop.getParentLoop()) {
+ assert(parentLoop->getHeader() != loop.getHeader() &&
+ "Parent loop has same header?");
+ parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
+
+ // Invalidate all parent loop ranges.
+ while (parentLoop != 0) {
+ loopRangeMap.erase(parentLoop);
+ parentLoop = parentLoop->getParentLoop();
+ }
+ }
+
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval &li = *liItr->second;
+
+ // Is this safe for physregs?
+ // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
+ if (!lis->isLiveInToMBB(li, &header))
+ continue;
+
+ if (lis->isLiveInToMBB(li, preHeader)) {
+ assert(lis->isLiveOutOfMBB(li, preHeader) &&
+ "Range terminates in newly added preheader?");
+ continue;
+ }
+
+ bool insertRange = false;
+
+ for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
+ predEnd = preHeader->pred_end();
+ predItr != predEnd; ++predItr) {
+ MachineBasicBlock *predMBB = *predItr;
+ if (lis->isLiveOutOfMBB(li, predMBB)) {
+ insertRange = true;
+ break;
+ }
+ }
+
+ if (!insertRange)
+ continue;
+
+ SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
+ assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
+ li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
+ lis->getMBBEndIdx(preHeader),
+ newVal));
+ }
+
+
+ //dbgs() << "Dumping SlotIndexes:\n";
+ //sis->dump();
+
+ //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
+
+ return *preHeader;
+ }
+
+ bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
+ assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
+ if (edge.second->pred_size() > 1)
+ return true;
+ return false;
+ }
+
+ bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
+ MachineFunction::iterator outBlockItr(edge.second);
+ if (outBlockItr == mf->begin())
+ return true;
+ MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
+ assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+ return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
+ !tii->AnalyzeBranch(*edge.first, a, b, c));
+ }
+
+ MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
+ MachineLoop &loop) {
+
+ MachineBasicBlock &inBlock = *edge.first;
+ MachineBasicBlock &outBlock = *edge.second;
+
+ assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
+ "Splitting non-critical edge?");
+
+ //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
+ // << " -> MBB#" << outBlock.getNumber() << ")...";
+
+ MachineBasicBlock *splitBlock =
+ mf->CreateMachineBasicBlock();
+
+ assert(splitBlock != 0 && "Failed to create split block.");
+
+ mf->insert(&outBlock, splitBlock);
+
+ inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
+ splitBlock->addSuccessor(&outBlock);
+
+ MachineBasicBlock *oldLayoutPred =
+ llvm::prior(MachineFunction::iterator(splitBlock));
+ if (oldLayoutPred != 0) {
+ updateTerminators(*oldLayoutPred);
+ }
+
+ lis->InsertMBBInMaps(splitBlock);
+
+ loopRangeMap.erase(&loop);
+
+ MachineLoop *splitParentLoop = loop.getParentLoop();
+ while (splitParentLoop != 0 &&
+ !splitParentLoop->contains(&outBlock)) {
+ splitParentLoop = splitParentLoop->getParentLoop();
+ }
+
+ if (splitParentLoop != 0) {
+ assert(splitParentLoop->contains(&loop) &&
+ "Split-block parent doesn't contain original loop?");
+ splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
+
+ // Invalidate all parent loop ranges.
+ while (splitParentLoop != 0) {
+ loopRangeMap.erase(splitParentLoop);
+ splitParentLoop = splitParentLoop->getParentLoop();
+ }
+ }
+
+
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval &li = *liItr->second;
+ bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
+ lis->isLiveInToMBB(li, &outBlock);
+ if (lis->isLiveInToMBB(li, splitBlock)) {
+ if (!intersects) {
+ li.removeRange(lis->getMBBStartIdx(splitBlock),
+ lis->getMBBEndIdx(splitBlock), true);
+ }
+ } else if (intersects) {
+ SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
+ assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal = li.getNextValue(newDefIdx, 0,
+ lis->getVNInfoAllocator());
+ li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
+ lis->getMBBEndIdx(splitBlock),
+ newVal));
+ }
+ }
+
+ //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
+
+ return *splitBlock;
+ }
+
+ LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
+ typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
+ LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
+ if (lrItr == loopRangeMap.end()) {
+ LoopMBBSet loopMBBs((StartSlotComparator(*lis)));
+ std::copy(loop.block_begin(), loop.block_end(),
+ std::inserter(loopMBBs, loopMBBs.begin()));
+
+ assert(!loopMBBs.empty() && "No blocks in loop?");
+
+ LoopRanges &loopRanges = loopRangeMap[&loop];
+ assert(loopRanges.empty() && "Loop encountered but not processed?");
+ SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
+ loopRanges.push_back(
+ std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
+ lis->getInvalidIndex()));
+ for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
+ curBlockEnd = loopMBBs.end();
+ curBlockItr != curBlockEnd; ++curBlockItr) {
+ SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
+ if (newStart != oldEnd) {
+ loopRanges.back().second = oldEnd;
+ loopRanges.push_back(std::make_pair(newStart,
+ lis->getInvalidIndex()));
+ }
+ oldEnd = lis->getMBBEndIdx(*curBlockItr);
+ }
+
+ loopRanges.back().second =
+ lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
+
+ return loopRanges;
+ }
+ return lrItr->second;
+ }
+
+ std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
+ const LiveRange &lr,
+ MachineLoop &loop) {
+ LoopRanges &loopRanges = getLoopRanges(loop);
+ LoopRanges::iterator lrItr = loopRanges.begin(),
+ lrEnd = loopRanges.end();
+ while (lrItr != lrEnd && lr.start >= lrItr->second) {
+ ++lrItr;
+ }
+
+ if (lrItr == lrEnd) {
+ SlotIndex invalid = lis->getInvalidIndex();
+ return std::make_pair(false, SlotPair(invalid, invalid));
+ }
+
+ SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
+ SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
+
+ return std::make_pair(true, SlotPair(srStart, srEnd));
+ }
+
+ void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
+ LoopRanges &loopRanges = getLoopRanges(loop);
+ dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
+ for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
+ lrItr != lrEnd; ++lrItr) {
+ dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
+ }
+ dbgs() << "]\n";
+ }
+
+ void LoopSplitter::processHeader(LoopSplit &split) {
+ MachineBasicBlock &header = *split.getLoop().getHeader();
+ //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n";
+
+ if (!lis->isLiveInToMBB(split.getLI(), &header))
+ return; // Not live in, but nothing wrong so far.
+
+ MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
+ if (!preHeader) {
+
+ if (!canInsertPreHeader(split.getLoop())) {
+ split.invalidate();
+ return; // Couldn't insert a pre-header. Bail on this interval.
+ }
+
+ for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+ predEnd = header.pred_end();
+ predItr != predEnd; ++predItr) {
+ if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
+ split.splitIncoming();
+ break;
+ }
+ }
+ } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
+ split.splitIncoming();
+ }
+ }
+
+ void LoopSplitter::processLoopExits(LoopSplit &split) {
+ typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+ ExitEdgesList exitEdges;
+ split.getLoop().getExitEdges(exitEdges);
+
+ //dbgs() << " Processing loop exits:\n";
+
+ for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+ exitEdgeEnd = exitEdges.end();
+ exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+ MachineLoop::Edge exitEdge = *exitEdgeItr;
+
+ LiveRange *outRange =
+ split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
+
+ if (outRange != 0) {
+ if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
+ split.invalidate();
+ return;
+ }
+
+ split.splitOutgoing(exitEdge);
+ }
+ }
+ }
+
+ void LoopSplitter::processLoopUses(LoopSplit &split) {
+ std::set<MachineInstr*> processed;
+
+ for (MachineRegisterInfo::reg_iterator
+ rItr = mri->reg_begin(split.getLI().reg),
+ rEnd = mri->reg_end();
+ rItr != rEnd; ++rItr) {
+ MachineInstr &instr = *rItr;
+ if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
+ split.addLoopInstr(&instr);
+ processed.insert(&instr);
+ }
+ }
+
+ //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg
+ // << " in blocks [ ";
+ //dbgs() << "]\n";
+ }
+
+ bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
+ assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+ "Attempt to split physical register.");
+
+ LoopSplit split(*this, li, loop);
+ processHeader(split);
+ if (split.isValid())
+ processLoopExits(split);
+ if (split.isValid())
+ processLoopUses(split);
+ if (split.isValid() /* && split.isWorthwhile() */) {
+ split.apply();
+ DEBUG(dbgs() << "Success.\n");
+ return true;
+ }
+ DEBUG(dbgs() << "Failed.\n");
+ return false;
+ }
+
+ void LoopSplitter::processInterval(LiveInterval &li) {
+ std::deque<MachineLoop*> loops;
+ std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+
+ while (!loops.empty()) {
+ MachineLoop &loop = *loops.front();
+ loops.pop_front();
+ DEBUG(
+ dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
+ << loop.getHeader()->getNumber() << " ";
+ );
+ if (!splitOverLoop(li, loop)) {
+ // Couldn't split over outer loop, schedule sub-loops to be checked.
+ std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+ }
+ }
+ }
+
+ void LoopSplitter::processIntervals() {
+ while (!intervals.empty()) {
+ LiveInterval &li = *intervals.front();
+ intervals.pop_front();
+
+ assert(!lis->intervalIsInOneMBB(li) &&
+ "Single interval in process worklist.");
+
+ processInterval(li);
+ }
+ }
+
+}
diff --git a/src/LLVM/lib/CodeGen/Splitter.h b/src/LLVM/lib/CodeGen/Splitter.h
index a726a7b..9fb1b8b 100644
--- a/src/LLVM/lib/CodeGen/Splitter.h
+++ b/src/LLVM/lib/CodeGen/Splitter.h
@@ -36,7 +36,9 @@
public:
static char ID;
- LoopSplitter() : MachineFunctionPass(ID) {}
+ LoopSplitter() : MachineFunctionPass(ID) {
+ initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &au) const;
diff --git a/src/LLVM/lib/CodeGen/StackProtector.cpp b/src/LLVM/lib/CodeGen/StackProtector.cpp
index a16feb0..1f0e5a2 100644
--- a/src/LLVM/lib/CodeGen/StackProtector.cpp
+++ b/src/LLVM/lib/CodeGen/StackProtector.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "stack-protector"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Attributes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -45,6 +46,8 @@
Function *F;
Module *M;
+ DominatorTree* DT;
+
/// InsertStackProtectors - Insert code into the prologue and epilogue of
/// the function.
///
@@ -62,9 +65,17 @@
bool RequiresStackProtector() const;
public:
static char ID; // Pass identification, replacement for typeid.
- StackProtector() : FunctionPass(ID), TLI(0) {}
+ StackProtector() : FunctionPass(ID), TLI(0) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
StackProtector(const TargetLowering *tli)
- : FunctionPass(ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ }
virtual bool runOnFunction(Function &Fn);
};
@@ -72,7 +83,7 @@
char StackProtector::ID = 0;
INITIALIZE_PASS(StackProtector, "stack-protector",
- "Insert stack protectors", false, false);
+ "Insert stack protectors", false, false)
FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
return new StackProtector(tli);
@@ -81,6 +92,7 @@
bool StackProtector::runOnFunction(Function &Fn) {
F = &Fn;
M = F->getParent();
+ DT = getAnalysisIfAvailable<DominatorTree>();
if (!RequiresStackProtector()) return false;
@@ -111,7 +123,7 @@
// protectors.
return true;
- if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
// We apparently only care about character arrays.
if (!AT->getElementType()->isIntegerTy(8))
continue;
@@ -135,12 +147,12 @@
/// value. It calls __stack_chk_fail if they differ.
bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
+ BasicBlock *FailBBDom = 0; // FailBB's dominator.
AllocaInst *AI = 0; // Place on stack that stores the stack guard.
Value *StackGuardVar = 0; // The stack guard variable.
for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
BasicBlock *BB = I++;
-
ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
if (!RI) continue;
@@ -153,7 +165,7 @@
// StackGuard = load __stack_chk_guard
// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
//
- const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+ PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
unsigned AddressSpace, Offset;
if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
Constant *OffsetVal =
@@ -168,13 +180,13 @@
BasicBlock &Entry = F->getEntryBlock();
Instruction *InsPt = &Entry.front();
- AI = new AllocaInst(PtrTy, InsPt);
- LoadInst *LI = new LoadInst(StackGuardVar, false, InsPt);
+ AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+ LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
Value *Args[] = { LI, AI };
CallInst::
Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
- &Args[0], array_endof(Args), "", InsPt);
+ Args, "", InsPt);
// Create the basic block to jump to when the guard check fails.
FailBB = CreateFailBB();
@@ -203,7 +215,12 @@
// unreachable
// Split the basic block before the return instruction.
- BasicBlock *NewBB = BB->splitBasicBlock(RI);
+ BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+
+ if (DT && DT->isReachableFromEntry(BB)) {
+ DT->addNewBlock(NewBB, BB);
+ FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB;
+ }
// Remove default branch instruction to the new BB.
BB->getTerminator()->eraseFromParent();
@@ -215,7 +232,7 @@
// Generate the stack protector instructions in the old basic block.
LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
LoadInst *LI2 = new LoadInst(AI, "", true, BB);
- ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2);
+ ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
BranchInst::Create(NewBB, FailBB, Cmp, BB);
}
@@ -223,6 +240,9 @@
// statements in the function.
if (!FailBB) return false;
+ if (DT && FailBBDom)
+ DT->addNewBlock(FailBB, FailBBDom);
+
return true;
}
@@ -230,7 +250,7 @@
/// check fails.
BasicBlock *StackProtector::CreateFailBB() {
BasicBlock *FailBB = BasicBlock::Create(F->getContext(),
- F);
+ "CallStackCheckFailBlk", F);
Constant *StackChkFail =
M->getOrInsertFunction("__stack_chk_fail",
Type::getVoidTy(F->getContext()), NULL);
diff --git a/src/LLVM/lib/CodeGen/StackSlotColoring.cpp b/src/LLVM/lib/CodeGen/StackSlotColoring.cpp
index dab6bc3..57cbe1b 100644
--- a/src/LLVM/lib/CodeGen/StackSlotColoring.cpp
+++ b/src/LLVM/lib/CodeGen/StackSlotColoring.cpp
@@ -95,9 +95,13 @@
public:
static char ID; // Pass identification
StackSlotColoring() :
- MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
StackSlotColoring(bool RegColor) :
- MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -119,7 +123,6 @@
private:
void InitializeSlots();
- bool CheckForSetJmpCall(const MachineFunction &MF) const;
void ScanForSpillSlotRefs(MachineFunction &MF);
bool OverlapWithAssignments(LiveInterval *li, int Color) const;
int ColorSlot(LiveInterval *li);
@@ -146,8 +149,14 @@
char StackSlotColoring::ID = 0;
-INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring",
- "Stack Slot Coloring", false, false);
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
return new StackSlotColoring(RegColor);
@@ -209,7 +218,7 @@
for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
LiveInterval &li = i->second;
DEBUG(li.dump());
- int FI = li.getStackSlotIndex();
+ int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
if (MFI->isDeadObjectIndex(FI))
continue;
SSIntervals.push_back(&li);
@@ -252,7 +261,7 @@
DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = li->getStackSlotIndex();
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
if (!UsedColors[SS] || li->weight < 20)
// If the weight is < 20, i.e. two references in a loop with depth 1,
// don't bother with it.
@@ -341,7 +350,7 @@
// Record the assignment.
Assignments[Color].push_back(li);
- int FI = li->getStackSlotIndex();
+ int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
// Change size and alignment of the allocated slot. If there are multiple
@@ -370,7 +379,7 @@
bool Changed = false;
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = li->getStackSlotIndex();
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
int NewSS = ColorSlot(li);
assert(NewSS >= 0 && "Stack coloring failed?");
SlotMapping[SS] = NewSS;
@@ -383,7 +392,7 @@
DEBUG(dbgs() << "\nSpill slots after coloring:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = li->getStackSlotIndex();
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
li->weight = SlotWeights[SS];
}
// Sort them by new weight.
@@ -495,7 +504,7 @@
bool FoundDef = false; // Not counting 2address def.
Uses.clear();
- const TargetInstrDesc &TID = MII->getDesc();
+ const MCInstrDesc &MCID = MII->getDesc();
for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MII->getOperand(i);
if (!MO.isReg())
@@ -512,7 +521,7 @@
if (MO.getSubReg() || MII->isSubregToReg())
return false;
- const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
if (RC && !RC->contains(NewReg))
return false;
@@ -557,7 +566,7 @@
SmallVector<MachineOperand*, 4> Uses;
while (++MII != MBB->end()) {
bool FoundKill = false;
- const TargetInstrDesc &TID = MII->getDesc();
+ const MCInstrDesc &MCID = MII->getDesc();
for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MII->getOperand(i);
if (!MO.isReg())
@@ -574,7 +583,7 @@
if (MO.getSubReg())
return false;
- const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
if (RC && !RC->contains(NewReg))
return false;
if (MO.isKill())
@@ -637,7 +646,7 @@
} else {
SmallVector<MachineInstr*, 4> NewMIs;
bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
- Success = Success; // Silence compiler warning.
+ (void)Success; // Silence compiler warning.
assert(Success && "Failed to unfold!");
MachineInstr *NewMI = NewMIs[0];
MBB->insert(MI, NewMI);
diff --git a/src/LLVM/lib/CodeGen/StrongPHIElimination.cpp b/src/LLVM/lib/CodeGen/StrongPHIElimination.cpp
index 5e2d414..260cc0e 100644
--- a/src/LLVM/lib/CodeGen/StrongPHIElimination.cpp
+++ b/src/LLVM/lib/CodeGen/StrongPHIElimination.cpp
@@ -1,4 +1,4 @@
-//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,1040 +7,824 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass eliminates machine instruction PHI nodes by inserting copy
-// instructions, using an intelligent copy-folding technique based on
-// dominator information. This is technique is derived from:
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
//
// Budimlic, et al. Fast copy coalescing and live-range identification.
// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
// PLDI '02. ACM, New York, NY, 25-32.
-// DOI= http://doi.acm.org/10.1145/512529.512534
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest. This technique is explained in:
+//
+// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+// Quality and Efficiency,
+// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+// CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "strongphielim"
+#include "PHIEliminationUtils.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
namespace {
- struct StrongPHIElimination : public MachineFunctionPass {
+ class StrongPHIElimination : public MachineFunctionPass {
+ public:
static char ID; // Pass identification, replacement for typeid
- StrongPHIElimination() : MachineFunctionPass(ID) {}
-
- // Waiting stores, for each MBB, the set of copies that need to
- // be inserted into that MBB
- DenseMap<MachineBasicBlock*,
- std::multimap<unsigned, unsigned> > Waiting;
-
- // Stacks holds the renaming stack for each register
- std::map<unsigned, std::vector<unsigned> > Stacks;
-
- // Registers in UsedByAnother are PHI nodes that are themselves
- // used as operands to another PHI node
- std::set<unsigned> UsedByAnother;
-
- // RenameSets are the is a map from a PHI-defined register
- // to the input registers to be coalesced along with the
- // predecessor block for those input registers.
- std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets;
-
- // PhiValueNumber holds the ID numbers of the VNs for each phi that we're
- // eliminating, indexed by the register defined by that phi.
- std::map<unsigned, unsigned> PhiValueNumber;
-
- // Store the DFS-in number of each block
- DenseMap<MachineBasicBlock*, unsigned> preorder;
-
- // Store the DFS-out number of each block
- DenseMap<MachineBasicBlock*, unsigned> maxpreorder;
-
- bool runOnMachineFunction(MachineFunction &Fn);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
- AU.addRequired<LiveIntervals>();
-
- // TODO: Actually make this true.
- AU.addPreserved<LiveIntervals>();
- AU.addPreserved<RegisterCoalescer>();
- MachineFunctionPass::getAnalysisUsage(AU);
+ StrongPHIElimination() : MachineFunctionPass(ID) {
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
}
-
- virtual void releaseMemory() {
- preorder.clear();
- maxpreorder.clear();
-
- Waiting.clear();
- Stacks.clear();
- UsedByAnother.clear();
- RenameSets.clear();
- }
+
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
private:
-
- /// DomForestNode - Represents a node in the "dominator forest". This is
- /// a forest in which the nodes represent registers and the edges
- /// represent a dominance relation in the block defining those registers.
- struct DomForestNode {
- private:
- // Store references to our children
- std::vector<DomForestNode*> children;
- // The register we represent
- unsigned reg;
-
- // Add another node as our child
- void addChild(DomForestNode* DFN) { children.push_back(DFN); }
-
- public:
- typedef std::vector<DomForestNode*>::iterator iterator;
-
- // Create a DomForestNode by providing the register it represents, and
- // the node to be its parent. The virtual root node has register 0
- // and a null parent.
- DomForestNode(unsigned r, DomForestNode* parent) : reg(r) {
- if (parent)
- parent->addChild(this);
- }
-
- ~DomForestNode() {
- for (iterator I = begin(), E = end(); I != E; ++I)
- delete *I;
- }
-
- /// getReg - Return the regiser that this node represents
- inline unsigned getReg() { return reg; }
-
- // Provide iterator access to our children
- inline DomForestNode::iterator begin() { return children.begin(); }
- inline DomForestNode::iterator end() { return children.end(); }
+ /// This struct represents a single node in the union-find data structure
+ /// representing the variable congruence classes. There is one difference
+ /// from a normal union-find data structure. We steal two bits from the parent
+ /// pointer . One of these bits is used to represent whether the register
+ /// itself has been isolated, and the other is used to represent whether the
+ /// PHI with that register as its destination has been isolated.
+ ///
+ /// Note that this leads to the strange situation where the leader of a
+ /// congruence class may no longer logically be a member, due to being
+ /// isolated.
+ struct Node {
+ enum Flags {
+ kRegisterIsolatedFlag = 1,
+ kPHIIsolatedFlag = 2
+ };
+ Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+ Node *getLeader();
+
+ PointerIntPair<Node*, 2> parent;
+ unsigned value;
+ unsigned rank;
};
-
- void computeDFS(MachineFunction& MF);
- void processBlock(MachineBasicBlock* MBB);
-
- std::vector<DomForestNode*> computeDomForest(
- std::map<unsigned, MachineBasicBlock*>& instrs,
- MachineRegisterInfo& MRI);
- void processPHIUnion(MachineInstr* Inst,
- std::map<unsigned, MachineBasicBlock*>& PHIUnion,
- std::vector<StrongPHIElimination::DomForestNode*>& DF,
- std::vector<std::pair<unsigned, unsigned> >& locals);
- void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed);
- void InsertCopies(MachineDomTreeNode* MBB,
- SmallPtrSet<MachineBasicBlock*, 16>& v);
- bool mergeLiveIntervals(unsigned primary, unsigned secondary);
+
+ /// Add a register in a new congruence class containing only itself.
+ void addReg(unsigned);
+
+ /// Join the congruence classes of two registers. This function is biased
+ /// towards the left argument, i.e. after
+ ///
+ /// addReg(r2);
+ /// unionRegs(r1, r2);
+ ///
+ /// the leader of the unioned congruence class is the same as the leader of
+ /// r1's congruence class prior to the union. This is actually relied upon
+ /// in the copy insertion code.
+ void unionRegs(unsigned, unsigned);
+
+ /// Get the color of a register. The color is 0 if the register has been
+ /// isolated.
+ unsigned getRegColor(unsigned);
+
+ // Isolate a register.
+ void isolateReg(unsigned);
+
+ /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+ /// isolated. Otherwise, it is the original color of its destination and
+ /// all of its operands (before they were isolated, if they were).
+ unsigned getPHIColor(MachineInstr*);
+
+ /// Isolate a PHI.
+ void isolatePHI(MachineInstr*);
+
+ /// Traverses a basic block, splitting any interferences found between
+ /// registers in the same congruence class. It takes two DenseMaps as
+ /// arguments that it also updates: CurrentDominatingParent, which maps
+ /// a color to the register in that congruence class whose definition was
+ /// most recently seen, and ImmediateDominatingParent, which maps a register
+ /// to the register in the same congruence class that most immediately
+ /// dominates it.
+ ///
+ /// This function assumes that it is being called in a depth-first traversal
+ /// of the dominator tree.
+ void SplitInterferencesForBasicBlock(
+ MachineBasicBlock&,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+ // Lowers a PHI instruction, inserting copies of the source and destination
+ // registers as necessary.
+ void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+ // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+ // everywhere that Reg appears. Requires Reg and NewReg to have non-
+ // overlapping lifetimes.
+ void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DT;
+ LiveIntervals *LI;
+
+ BumpPtrAllocator Allocator;
+
+ DenseMap<unsigned, Node*> RegNodeMap;
+
+ // Maps a basic block to a list of its defs of registers that appear as PHI
+ // sources.
+ DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+ // Maps a color to a pair of a MachineInstr* and a virtual register, which
+ // is the operand of that PHI corresponding to the current basic block.
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+ // FIXME: Can these two data structures be combined? Would a std::multimap
+ // be any better?
+
+ // Stores pairs of predecessor basic blocks and the source registers of
+ // inserted copy instructions.
+ typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+ SrcCopySet InsertedSrcCopySet;
+
+ // Maps pairs of predecessor basic blocks and colors to their defining copy
+ // instructions.
+ typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+ SrcCopyMap;
+ SrcCopyMap InsertedSrcCopyMap;
+
+ // Maps inserted destination copy registers to their defining copy
+ // instructions.
+ typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+ DestCopyMap InsertedDestCopies;
};
-}
+
+ struct MIIndexCompare {
+ MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
+ }
+
+ LiveIntervals *LI;
+ };
+} // namespace
+
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
char StrongPHIElimination::ID = 0;
-static RegisterPass<StrongPHIElimination>
-X("strong-phi-node-elimination",
- "Eliminate PHI nodes for register allocation, intelligently");
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
-/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
-/// of the given MachineFunction. These numbers are then used in other parts
-/// of the PHI elimination process.
-void StrongPHIElimination::computeDFS(MachineFunction& MF) {
- SmallPtrSet<MachineDomTreeNode*, 8> frontier;
- SmallPtrSet<MachineDomTreeNode*, 8> visited;
-
- unsigned time = 0;
-
- MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>();
-
- MachineDomTreeNode* node = DT.getRootNode();
-
- std::vector<MachineDomTreeNode*> worklist;
- worklist.push_back(node);
-
- while (!worklist.empty()) {
- MachineDomTreeNode* currNode = worklist.back();
-
- if (!frontier.count(currNode)) {
- frontier.insert(currNode);
- ++time;
- preorder.insert(std::make_pair(currNode->getBlock(), time));
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+ // FIXME: This only needs to check from the first terminator, as only the
+ // first terminator can use a virtual register.
+ for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+ assert (RI != MBB->rend());
+ MachineInstr *MI = &*RI;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ MachineOperand &MO = *OI;
+ if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+ return &MO;
}
-
- bool inserted = false;
- for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end();
- I != E; ++I)
- if (!frontier.count(*I) && !visited.count(*I)) {
- worklist.push_back(*I);
- inserted = true;
+ }
+ return NULL;
+}
+
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LI = &getAnalysis<LiveIntervals>();
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+ PHISrcDefs[I].push_back(BBI);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = BBI->getOperand(i);
+ unsigned SrcReg = SrcMO.getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI)
+ PHISrcDefs[DefMI->getParent()].push_back(DefMI);
+ }
+ }
+ }
+
+ // Perform a depth-first traversal of the dominator tree, splitting
+ // interferences amongst PHI-congruence classes.
+ DenseMap<unsigned, unsigned> CurrentDominatingParent;
+ DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+ for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+ SplitInterferencesForBasicBlock(*DI->getBlock(),
+ CurrentDominatingParent,
+ ImmediateDominatingParent);
+ }
+
+ // Insert copies for all PHI source and destination registers.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ InsertCopiesForPHI(BBI, I);
+ }
+ }
+
+ // FIXME: Preserve the equivalence classes during copy insertion and use
+ // the preversed equivalence classes instead of recomputing them.
+ RegNodeMap.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ unsigned SrcReg = BBI->getOperand(i).getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+ }
+ }
+ }
+
+ DenseMap<unsigned, unsigned> RegRenamingMap;
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ while (BBI != BBE && BBI->isPHI()) {
+ MachineInstr *PHI = BBI;
+
+ assert(PHI->getNumOperands() > 0);
+
+ unsigned SrcReg = PHI->getOperand(1).getReg();
+ unsigned SrcColor = getRegColor(SrcReg);
+ unsigned NewReg = RegRenamingMap[SrcColor];
+ if (!NewReg) {
+ NewReg = SrcReg;
+ RegRenamingMap[SrcColor] = SrcReg;
+ }
+ MergeLIsAndRename(SrcReg, NewReg);
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ if (!InsertedDestCopies.count(DestReg))
+ MergeLIsAndRename(DestReg, NewReg);
+
+ for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcReg = PHI->getOperand(i).getReg();
+ MergeLIsAndRename(SrcReg, NewReg);
+ }
+
+ ++BBI;
+ LI->RemoveMachineInstrFromMaps(PHI);
+ PHI->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ // Due to the insertion of copies to split live ranges, the live intervals are
+ // guaranteed to not overlap, except in one case: an original PHI source and a
+ // PHI destination copy. In this case, they have the same value and thus don't
+ // truly intersect, so we merge them into the value live at that point.
+ // FIXME: Is there some better way we can handle this?
+ for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+ E = InsertedDestCopies.end(); I != E; ++I) {
+ unsigned DestReg = I->first;
+ unsigned DestColor = getRegColor(DestReg);
+ unsigned NewReg = RegRenamingMap[DestColor];
+
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ assert(DestLI.ranges.size() == 1
+ && "PHI destination copy's live interval should be a single live "
+ "range from the beginning of the BB to the copy instruction.");
+ LiveRange *DestLR = DestLI.begin();
+ VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+ if (!NewVNI) {
+ NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+ MachineInstr *CopyInstr = I->second;
+ CopyInstr->getOperand(1).setIsKill(true);
+ }
+
+ LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+ NewLI.addRange(NewLR);
+
+ LI->removeInterval(DestReg);
+ MRI->replaceRegWith(DestReg, NewReg);
+ }
+
+ // Adjust the live intervals of all PHI source registers to handle the case
+ // where the PHIs in successor blocks were the only later uses of the source
+ // register.
+ for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+ E = InsertedSrcCopySet.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I->first;
+ unsigned SrcReg = I->second;
+ if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+ SrcReg = RenamedRegister;
+
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+ isLiveOut = true;
break;
}
-
- if (!inserted) {
- frontier.erase(currNode);
- visited.insert(currNode);
- maxpreorder.insert(std::make_pair(currNode->getBlock(), time));
-
- worklist.pop_back();
}
- }
-}
-namespace {
-
-/// PreorderSorter - a helper class that is used to sort registers
-/// according to the preorder number of their defining blocks
-class PreorderSorter {
-private:
- DenseMap<MachineBasicBlock*, unsigned>& preorder;
- MachineRegisterInfo& MRI;
-
-public:
- PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p,
- MachineRegisterInfo& M) : preorder(p), MRI(M) { }
-
- bool operator()(unsigned A, unsigned B) {
- if (A == B)
- return false;
-
- MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent();
- MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent();
-
- if (preorder[ABlock] < preorder[BBlock])
- return true;
- else if (preorder[ABlock] > preorder[BBlock])
- return false;
-
- return false;
- }
-};
-
-}
-
-/// computeDomForest - compute the subforest of the DomTree corresponding
-/// to the defining blocks of the registers in question
-std::vector<StrongPHIElimination::DomForestNode*>
-StrongPHIElimination::computeDomForest(
- std::map<unsigned, MachineBasicBlock*>& regs,
- MachineRegisterInfo& MRI) {
- // Begin by creating a virtual root node, since the actual results
- // may well be a forest. Assume this node has maximum DFS-out number.
- DomForestNode* VirtualRoot = new DomForestNode(0, 0);
- maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL));
-
- // Populate a worklist with the registers
- std::vector<unsigned> worklist;
- worklist.reserve(regs.size());
- for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(),
- E = regs.end(); I != E; ++I)
- worklist.push_back(I->first);
-
- // Sort the registers by the DFS-in number of their defining block
- PreorderSorter PS(preorder, MRI);
- std::sort(worklist.begin(), worklist.end(), PS);
-
- // Create a "current parent" stack, and put the virtual root on top of it
- DomForestNode* CurrentParent = VirtualRoot;
- std::vector<DomForestNode*> stack;
- stack.push_back(VirtualRoot);
-
- // Iterate over all the registers in the previously computed order
- for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end();
- I != E; ++I) {
- unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()];
- MachineBasicBlock* parentBlock = CurrentParent->getReg() ?
- MRI.getVRegDef(CurrentParent->getReg())->getParent() :
- 0;
-
- // If the DFS-in number of the register is greater than the DFS-out number
- // of the current parent, repeatedly pop the parent stack until it isn't.
- while (pre > maxpreorder[parentBlock]) {
- stack.pop_back();
- CurrentParent = stack.back();
-
- parentBlock = CurrentParent->getReg() ?
- MRI.getVRegDef(CurrentParent->getReg())->getParent() :
- 0;
- }
-
- // Now that we've found the appropriate parent, create a DomForestNode for
- // this register and attach it to the forest
- DomForestNode* child = new DomForestNode(*I, CurrentParent);
-
- // Push this new node on the "current parent" stack
- stack.push_back(child);
- CurrentParent = child;
- }
-
- // Return a vector containing the children of the virtual root node
- std::vector<DomForestNode*> ret;
- ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end());
- return ret;
-}
-
-/// isLiveIn - helper method that determines, from a regno, if a register
-/// is live into a block
-static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
- LiveIntervals& LI) {
- LiveInterval& I = LI.getOrCreateInterval(r);
- SlotIndex idx = LI.getMBBStartIdx(MBB);
- return I.liveAt(idx);
-}
-
-/// isLiveOut - help method that determines, from a regno, if a register is
-/// live out of a block.
-static bool isLiveOut(unsigned r, MachineBasicBlock* MBB,
- LiveIntervals& LI) {
- for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
- E = MBB->succ_end(); PI != E; ++PI)
- if (isLiveIn(r, *PI, LI))
- return true;
-
- return false;
-}
-
-/// interferes - checks for local interferences by scanning a block. The only
-/// trick parameter is 'mode' which tells it the relationship of the two
-/// registers. 0 - defined in the same block, 1 - first properly dominates
-/// second, 2 - second properly dominates first
-static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan,
- LiveIntervals& LV, unsigned mode) {
- MachineInstr* def = 0;
- MachineInstr* kill = 0;
-
- // The code is still in SSA form at this point, so there is only one
- // definition per VReg. Thus we can safely use MRI->getVRegDef().
- const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo();
-
- bool interference = false;
-
- // Wallk the block, checking for interferences
- for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end();
- MBI != MBE; ++MBI) {
- MachineInstr* curr = MBI;
-
- // Same defining block...
- if (mode == 0) {
- if (curr == MRI->getVRegDef(a)) {
- // If we find our first definition, save it
- if (!def) {
- def = curr;
- // If there's already an unkilled definition, then
- // this is an interference
- } else if (!kill) {
- interference = true;
- break;
- // If there's a definition followed by a KillInst, then
- // they can't interfere
- } else {
- interference = false;
- break;
- }
- // Symmetric with the above
- } else if (curr == MRI->getVRegDef(b)) {
- if (!def) {
- def = curr;
- } else if (!kill) {
- interference = true;
- break;
- } else {
- interference = false;
- break;
- }
- // Store KillInsts if they match up with the definition
- } else if (curr->killsRegister(a)) {
- if (def == MRI->getVRegDef(a)) {
- kill = curr;
- } else if (curr->killsRegister(b)) {
- if (def == MRI->getVRegDef(b)) {
- kill = curr;
- }
- }
- }
- // First properly dominates second...
- } else if (mode == 1) {
- if (curr == MRI->getVRegDef(b)) {
- // Definition of second without kill of first is an interference
- if (!kill) {
- interference = true;
- break;
- // Definition after a kill is a non-interference
- } else {
- interference = false;
- break;
- }
- // Save KillInsts of First
- } else if (curr->killsRegister(a)) {
- kill = curr;
- }
- // Symmetric with the above
- } else if (mode == 2) {
- if (curr == MRI->getVRegDef(a)) {
- if (!kill) {
- interference = true;
- break;
- } else {
- interference = false;
- break;
- }
- } else if (curr->killsRegister(b)) {
- kill = curr;
- }
- }
- }
-
- return interference;
-}
-
-/// processBlock - Determine how to break up PHIs in the current block. Each
-/// PHI is broken up by some combination of renaming its operands and inserting
-/// copies. This method is responsible for determining which operands receive
-/// which treatment.
-void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
-
- // Holds names that have been added to a set in any PHI within this block
- // before the current one.
- std::set<unsigned> ProcessedNames;
-
- // Iterate over all the PHI nodes in this block
- MachineBasicBlock::iterator P = MBB->begin();
- while (P != MBB->end() && P->isPHI()) {
- unsigned DestReg = P->getOperand(0).getReg();
-
- // Don't both doing PHI elimination for dead PHI's.
- if (P->registerDefIsDead(DestReg)) {
- ++P;
+ if (isLiveOut)
continue;
- }
- LiveInterval& PI = LI.getOrCreateInterval(DestReg);
- SlotIndex pIdx = LI.getInstructionIndex(P).getDefIndex();
- VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
- PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
+ MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+ assert(LastUse);
+ SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+ SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+ LastUse->setIsKill(true);
+ }
- // PHIUnion is the set of incoming registers to the PHI node that
- // are going to be renames rather than having copies inserted. This set
- // is refinded over the course of this function. UnionedBlocks is the set
- // of corresponding MBBs.
- std::map<unsigned, MachineBasicBlock*> PHIUnion;
- SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks;
-
- // Iterate over the operands of the PHI node
- for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
- unsigned SrcReg = P->getOperand(i-1).getReg();
-
- // Don't need to try to coalesce a register with itself.
- if (SrcReg == DestReg) {
- ProcessedNames.insert(SrcReg);
- continue;
- }
-
- // We don't need to insert copies for implicit_defs.
- MachineInstr* DefMI = MRI.getVRegDef(SrcReg);
- if (DefMI->isImplicitDef())
- ProcessedNames.insert(SrcReg);
-
- // Check for trivial interferences via liveness information, allowing us
- // to avoid extra work later. Any registers that interfere cannot both
- // be in the renaming set, so choose one and add copies for it instead.
- // The conditions are:
- // 1) if the operand is live into the PHI node's block OR
- // 2) if the PHI node is live out of the operand's defining block OR
- // 3) if the operand is itself a PHI node and the original PHI is
- // live into the operand's defining block OR
- // 4) if the operand is already being renamed for another PHI node
- // in this block OR
- // 5) if any two operands are defined in the same block, insert copies
- // for one of them
- if (isLiveIn(SrcReg, P->getParent(), LI) ||
- isLiveOut(P->getOperand(0).getReg(),
- MRI.getVRegDef(SrcReg)->getParent(), LI) ||
- ( MRI.getVRegDef(SrcReg)->isPHI() &&
- isLiveIn(P->getOperand(0).getReg(),
- MRI.getVRegDef(SrcReg)->getParent(), LI) ) ||
- ProcessedNames.count(SrcReg) ||
- UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) {
-
- // Add a copy for the selected register
- MachineBasicBlock* From = P->getOperand(i).getMBB();
- Waiting[From].insert(std::make_pair(SrcReg, DestReg));
- UsedByAnother.insert(SrcReg);
- } else {
- // Otherwise, add it to the renaming set
- PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB()));
- UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent());
- }
- }
-
- // Compute the dominator forest for the renaming set. This is a forest
- // where the nodes are the registers and the edges represent dominance
- // relations between the defining blocks of the registers
- std::vector<StrongPHIElimination::DomForestNode*> DF =
- computeDomForest(PHIUnion, MRI);
-
- // Walk DomForest to resolve interferences at an inter-block level. This
- // will remove registers from the renaming set (and insert copies for them)
- // if interferences are found.
- std::vector<std::pair<unsigned, unsigned> > localInterferences;
- processPHIUnion(P, PHIUnion, DF, localInterferences);
-
- // If one of the inputs is defined in the same block as the current PHI
- // then we need to check for a local interference between that input and
- // the PHI.
- for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
- E = PHIUnion.end(); I != E; ++I)
- if (MRI.getVRegDef(I->first)->getParent() == P->getParent())
- localInterferences.push_back(std::make_pair(I->first,
- P->getOperand(0).getReg()));
-
- // The dominator forest walk may have returned some register pairs whose
- // interference cannot be determined from dominator analysis. We now
- // examine these pairs for local interferences.
- for (std::vector<std::pair<unsigned, unsigned> >::iterator I =
- localInterferences.begin(), E = localInterferences.end(); I != E; ++I) {
- std::pair<unsigned, unsigned> p = *I;
-
- MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
-
- // Determine the block we need to scan and the relationship between
- // the two registers
- MachineBasicBlock* scan = 0;
- unsigned mode = 0;
- if (MRI.getVRegDef(p.first)->getParent() ==
- MRI.getVRegDef(p.second)->getParent()) {
- scan = MRI.getVRegDef(p.first)->getParent();
- mode = 0; // Same block
- } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(),
- MRI.getVRegDef(p.second)->getParent())) {
- scan = MRI.getVRegDef(p.second)->getParent();
- mode = 1; // First dominates second
- } else {
- scan = MRI.getVRegDef(p.first)->getParent();
- mode = 2; // Second dominates first
- }
-
- // If there's an interference, we need to insert copies
- if (interferes(p.first, p.second, scan, LI, mode)) {
- // Insert copies for First
- for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
- if (P->getOperand(i-1).getReg() == p.first) {
- unsigned SrcReg = p.first;
- MachineBasicBlock* From = P->getOperand(i).getMBB();
-
- Waiting[From].insert(std::make_pair(SrcReg,
- P->getOperand(0).getReg()));
- UsedByAnother.insert(SrcReg);
-
- PHIUnion.erase(SrcReg);
- }
- }
- }
- }
-
- // Add the renaming set for this PHI node to our overall renaming information
- for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
- QE = PHIUnion.end(); QI != QE; ++QI) {
- DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> "
- << P->getOperand(0).getReg() << "\n");
- }
-
- RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
-
- // Remember which registers are already renamed, so that we don't try to
- // rename them for another PHI node in this block
- for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
- E = PHIUnion.end(); I != E; ++I)
- ProcessedNames.insert(I->first);
-
- ++P;
+ LI->renumber();
+
+ Allocator.Reset();
+ RegNodeMap.clear();
+ PHISrcDefs.clear();
+ InsertedSrcCopySet.clear();
+ InsertedSrcCopyMap.clear();
+ InsertedDestCopies.clear();
+
+ return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+ if (RegNodeMap.count(Reg))
+ return;
+ RegNodeMap[Reg] = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+ Node *N = this;
+ Node *Parent = parent.getPointer();
+ Node *Grandparent = Parent->parent.getPointer();
+
+ while (Parent != Grandparent) {
+ N->parent.setPointer(Grandparent);
+ N = Grandparent;
+ Parent = Parent->parent.getPointer();
+ Grandparent = Parent->parent.getPointer();
+ }
+
+ return Parent;
+}
+
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+ DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+ if (RI == RegNodeMap.end())
+ return 0;
+ Node *Node = RI->second;
+ if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+ return 0;
+ return Node->getLeader()->value;
+}
+
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+ Node *Node1 = RegNodeMap[Reg1]->getLeader();
+ Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+ if (Node1->rank > Node2->rank) {
+ Node2->parent.setPointer(Node1->getLeader());
+ } else if (Node1->rank < Node2->rank) {
+ Node1->parent.setPointer(Node2->getLeader());
+ } else if (Node1 != Node2) {
+ Node2->parent.setPointer(Node1->getLeader());
+ Node1->rank++;
}
}
-/// processPHIUnion - Take a set of candidate registers to be coalesced when
-/// decomposing the PHI instruction. Use the DominanceForest to remove the ones
-/// that are known to interfere, and flag others that need to be checked for
-/// local interferences.
-void StrongPHIElimination::processPHIUnion(MachineInstr* Inst,
- std::map<unsigned, MachineBasicBlock*>& PHIUnion,
- std::vector<StrongPHIElimination::DomForestNode*>& DF,
- std::vector<std::pair<unsigned, unsigned> >& locals) {
-
- std::vector<DomForestNode*> worklist(DF.begin(), DF.end());
- SmallPtrSet<DomForestNode*, 4> visited;
-
- // Code is still in SSA form, so we can use MRI::getVRegDef()
- MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo();
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- unsigned DestReg = Inst->getOperand(0).getReg();
-
- // DF walk on the DomForest
- while (!worklist.empty()) {
- DomForestNode* DFNode = worklist.back();
-
- visited.insert(DFNode);
-
- bool inserted = false;
- for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end();
- CI != CE; ++CI) {
- DomForestNode* child = *CI;
-
- // If the current node is live-out of the defining block of one of its
- // children, insert a copy for it. NOTE: The paper actually calls for
- // a more elaborate heuristic for determining whether to insert copies
- // for the child or the parent. In the interest of simplicity, we're
- // just always choosing the parent.
- if (isLiveOut(DFNode->getReg(),
- MRI.getVRegDef(child->getReg())->getParent(), LI)) {
- // Insert copies for parent
- for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) {
- if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) {
- unsigned SrcReg = DFNode->getReg();
- MachineBasicBlock* From = Inst->getOperand(i).getMBB();
-
- Waiting[From].insert(std::make_pair(SrcReg, DestReg));
- UsedByAnother.insert(SrcReg);
-
- PHIUnion.erase(SrcReg);
- }
- }
-
- // If a node is live-in to the defining block of one of its children, but
- // not live-out, then we need to scan that block for local interferences.
- } else if (isLiveIn(DFNode->getReg(),
- MRI.getVRegDef(child->getReg())->getParent(), LI) ||
- MRI.getVRegDef(DFNode->getReg())->getParent() ==
- MRI.getVRegDef(child->getReg())->getParent()) {
- // Add (p, c) to possible local interferences
- locals.push_back(std::make_pair(DFNode->getReg(), child->getReg()));
- }
-
- if (!visited.count(child)) {
- worklist.push_back(child);
- inserted = true;
- }
- }
-
- if (!inserted) worklist.pop_back();
- }
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+ Node *Node = RegNodeMap[Reg];
+ Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
}
-/// ScheduleCopies - Insert copies into predecessor blocks, scheduling
-/// them properly so as to avoid the 'lost copy' and the 'virtual swap'
-/// problems.
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ Node *DestNode = RegNodeMap[DestReg];
+ if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+ return 0;
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+ if (SrcColor)
+ return SrcColor;
+ }
+ return 0;
+}
+
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+ Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+ Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
///
-/// Based on "Practical Improvements to the Construction and Destruction
-/// of Static Single Assignment Form" by Briggs, et al.
-void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
- std::set<unsigned>& pushed) {
- // FIXME: This function needs to update LiveIntervals
- std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB];
-
- std::multimap<unsigned, unsigned> worklist;
- std::map<unsigned, unsigned> map;
-
- // Setup worklist of initial copies
- for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
- E = copy_set.end(); I != E; ) {
- map.insert(std::make_pair(I->first, I->first));
- map.insert(std::make_pair(I->second, I->second));
-
- if (!UsedByAnother.count(I->second)) {
- worklist.insert(*I);
-
- // Avoid iterator invalidation
- std::multimap<unsigned, unsigned>::iterator OI = I;
- ++I;
- copy_set.erase(OI);
- } else {
- ++I;
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+/// congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+/// same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+/// def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+///
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+ MachineBasicBlock &MBB,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+ // Sort defs by their order in the original basic block, as the code below
+ // assumes that it is processing definitions in dominance order.
+ std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+ std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+ for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+ BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+ for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+ E = (*BBI)->operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+
+ // FIXME: This would be faster if it were possible to bail out of checking
+ // an instruction's operands after the explicit defs, but this is incorrect
+ // for variadic instructions, which may appear before register allocation
+ // in the future.
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned DestReg = MO.getReg();
+ if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // If the virtual register being defined is not used in any PHI or has
+ // already been isolated, then there are no more interferences to check.
+ unsigned DestColor = getRegColor(DestReg);
+ if (!DestColor)
+ continue;
+
+ // The input to this pass sometimes is not in SSA form in every basic
+ // block, as some virtual registers have redefinitions. We could eliminate
+ // this by fixing the passes that generate the non-SSA code, or we could
+ // handle it here by tracking defining machine instructions rather than
+ // virtual registers. For now, we just handle the situation conservatively
+ // in a way that will possibly lead to false interferences.
+ unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+ unsigned NewParent = CurrentParent;
+ if (NewParent == DestReg)
+ continue;
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+
+ // If NewParent is nonzero, then its definition dominates the current
+ // instruction, so it is only necessary to check for the liveness of
+ // NewParent in order to check for an interference.
+ if (NewParent
+ && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+ // If there is an interference, always isolate the new register. This
+ // could be improved by using a heuristic that decides which of the two
+ // registers to isolate.
+ isolateReg(DestReg);
+ CurrentParent = NewParent;
+ } else {
+ // If there is no interference, update ImmediateDominatingParent and set
+ // the CurrentDominatingParent for this color to the current register.
+ ImmediateDominatingParent[DestReg] = NewParent;
+ CurrentParent = DestReg;
+ }
}
}
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- MachineFunction* MF = MBB->getParent();
- MachineRegisterInfo& MRI = MF->getRegInfo();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
- SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests;
-
- // Iterate over the worklist, inserting copies
- while (!worklist.empty() || !copy_set.empty()) {
- while (!worklist.empty()) {
- std::multimap<unsigned, unsigned>::iterator WI = worklist.begin();
- std::pair<unsigned, unsigned> curr = *WI;
- worklist.erase(WI);
-
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first);
-
- if (isLiveOut(curr.second, MBB, LI)) {
- // Create a temporary
- unsigned t = MF->getRegInfo().createVirtualRegister(RC);
-
- // Insert copy from curr.second to a temporary at
- // the Phi defining curr.second
- MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
- BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY),
- t).addReg(curr.second);
- DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
- << "\n");
-
- // Push temporary on Stacks
- Stacks[curr.second].push_back(t);
-
- // Insert curr.second in pushed
- pushed.insert(curr.second);
-
- // Create a live interval for this temporary
- InsertedPHIDests.push_back(std::make_pair(t, --PI));
- }
-
- // Insert copy from map[curr.first] to curr.second
- BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]);
- map[curr.first] = curr.second;
- DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
- << curr.second << "\n");
-
- // Push this copy onto InsertedPHICopies so we can
- // update LiveIntervals with it.
- MachineBasicBlock::iterator MI = MBB->getFirstTerminator();
- InsertedPHIDests.push_back(std::make_pair(curr.second, --MI));
-
- // If curr.first is a destination in copy_set...
- for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
- E = copy_set.end(); I != E; )
- if (curr.first == I->second) {
- std::pair<unsigned, unsigned> temp = *I;
- worklist.insert(temp);
-
- // Avoid iterator invalidation
- std::multimap<unsigned, unsigned>::iterator OI = I;
- ++I;
- copy_set.erase(OI);
-
+
+ // We now walk the PHIs in successor blocks and check for interferences. This
+ // is necessary because the use of a PHI's operands are logically contained in
+ // the predecessor block. The def of a PHI's destination register is processed
+ // along with the other defs in a basic block.
+
+ CurrentPHIForColor.clear();
+
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ MachineInstr *PHI = BBI;
+
+ // If a PHI is already isolated, either by being isolated directly or
+ // having all of its operands isolated, ignore it.
+ unsigned Color = getPHIColor(PHI);
+ if (!Color)
+ continue;
+
+ // Find the index of the PHI operand that corresponds to this basic block.
+ unsigned PredIndex;
+ for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+ if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
break;
- } else {
- ++I;
- }
- }
-
- if (!copy_set.empty()) {
- std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin();
- std::pair<unsigned, unsigned> curr = *CI;
- worklist.insert(curr);
- copy_set.erase(CI);
-
- LiveInterval& I = LI.getInterval(curr.second);
- MachineBasicBlock::iterator term = MBB->getFirstTerminator();
- SlotIndex endIdx = SlotIndex();
- if (term != MBB->end())
- endIdx = LI.getInstructionIndex(term);
+ }
+ assert(PredIndex < PHI->getNumOperands());
+ unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ unsigned &CurrentParent = CurrentDominatingParent[Color];
+ unsigned NewParent = CurrentParent;
+ while (NewParent
+ && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+ CurrentParent = NewParent;
+
+ // If there is an interference with a register, always isolate the
+ // register rather than the PHI. It is also possible to isolate the
+ // PHI, but that introduces copies for all of the registers involved
+ // in that PHI.
+ if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+ && NewParent != PredOperandReg)
+ isolateReg(NewParent);
+
+ std::pair<MachineInstr*, unsigned>
+ &CurrentPHI = CurrentPHIForColor[Color];
+
+ // If two PHIs have the same operand from every shared predecessor, then
+ // they don't actually interfere. Otherwise, isolate the current PHI. This
+ // could possibly be improved, e.g. we could isolate the PHI with the
+ // fewest operands.
+ if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+ isolatePHI(PHI);
else
- endIdx = LI.getMBBEndIdx(MBB);
-
- if (I.liveAt(endIdx)) {
- const TargetRegisterClass *RC =
- MF->getRegInfo().getRegClass(curr.first);
-
- // Insert a copy from dest to a new temporary t at the end of b
- unsigned t = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), t).addReg(curr.second);
- map[curr.second] = t;
-
- MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
- InsertedPHIDests.push_back(std::make_pair(t, --TI));
- }
- }
- }
-
- // Renumber the instructions so that we can perform the index computations
- // needed to create new live intervals.
- LI.renumber();
-
- // For copies that we inserted at the ends of predecessors, we construct
- // live intervals. This is pretty easy, since we know that the destination
- // register cannot have be in live at that point previously. We just have
- // to make sure that, for registers that serve as inputs to more than one
- // PHI, we don't create multiple overlapping live intervals.
- std::set<unsigned> RegHandled;
- for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I =
- InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
- if (RegHandled.insert(I->first).second) {
- LiveInterval& Int = LI.getOrCreateInterval(I->first);
- SlotIndex instrIdx = LI.getInstructionIndex(I->second);
- if (Int.liveAt(instrIdx.getDefIndex()))
- Int.removeRange(instrIdx.getDefIndex(),
- LI.getMBBEndIdx(I->second->getParent()).getNextSlot(),
- true);
-
- LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
- R.valno->setCopy(I->second);
- R.valno->def = LI.getInstructionIndex(I->second).getDefIndex();
+ CurrentPHI = std::make_pair(PHI, PredOperandReg);
}
}
}
-/// InsertCopies - insert copies into MBB and all of its successors
-void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
- SmallPtrSet<MachineBasicBlock*, 16>& visited) {
- MachineBasicBlock* MBB = MDTN->getBlock();
- visited.insert(MBB);
-
- std::set<unsigned> pushed;
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- // Rewrite register uses from Stacks
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- if (I->isPHI())
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+ MachineBasicBlock *MBB) {
+ assert(PHI->isPHI());
+ ++NumPHIsLowered;
+ unsigned PHIColor = getPHIColor(PHI);
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = PHI->getOperand(i);
+
+ // If a source is defined by an implicit def, there is no need to insert a
+ // copy in the predecessor.
+ if (SrcMO.isUndef())
continue;
-
- for (unsigned i = 0; i < I->getNumOperands(); ++i)
- if (I->getOperand(i).isReg() &&
- Stacks[I->getOperand(i).getReg()].size()) {
- // Remove the live range for the old vreg.
- LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
- LiveInterval::iterator OldLR =
- OldInt.FindLiveRangeContaining(LI.getInstructionIndex(I).getUseIndex());
- if (OldLR != OldInt.end())
- OldInt.removeRange(*OldLR, true);
-
- // Change the register
- I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back());
-
- // Add a live range for the new vreg
- LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
- VNInfo* FirstVN = *Int.vni_begin();
- FirstVN->setHasPHIKill(false);
- LiveRange LR (LI.getMBBStartIdx(I->getParent()),
- LI.getInstructionIndex(I).getUseIndex().getNextSlot(),
- FirstVN);
-
- Int.addRange(LR);
+
+ unsigned SrcReg = SrcMO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+ unsigned SrcColor = getRegColor(SrcReg);
+
+ // If neither the PHI nor the operand were isolated, then we only need to
+ // set the phi-kill flag on the VNInfo at this PHI.
+ if (PHIColor && SrcColor == PHIColor) {
+ LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+ SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+ VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
+ assert(SrcVNI);
+ SrcVNI->setHasPHIKill(true);
+ continue;
+ }
+
+ unsigned CopyReg = 0;
+ if (PHIColor) {
+ SrcCopyMap::const_iterator I
+ = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+ CopyReg
+ = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+ }
+
+ if (!CopyReg) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineBasicBlock::iterator
+ CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+ unsigned SrcSubReg = SrcMO.getSubReg();
+ MachineInstr *CopyInstr = BuildMI(*PredBB,
+ CopyInsertPoint,
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyReg).addReg(SrcReg, 0, SrcSubReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ ++NumSrcCopiesInserted;
+
+ // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+ // the newly added range.
+ LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+ InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+ addReg(CopyReg);
+ if (PHIColor) {
+ unionRegs(PHIColor, CopyReg);
+ assert(getRegColor(CopyReg) != CopyReg);
+ } else {
+ PHIColor = CopyReg;
+ assert(getRegColor(CopyReg) == CopyReg);
}
- }
+
+ if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
+ InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
+ }
+
+ SrcMO.setReg(CopyReg);
+
+ // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+ // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+ // processed later, but this is still correct to do at this point because we
+ // never rely on LiveIntervals being correct while inserting copies.
+ // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+ // pass does?
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+ if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+ SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
+ }
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ unsigned DestColor = getRegColor(DestReg);
+
+ if (PHIColor && DestColor == PHIColor) {
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+
+ // Set the phi-def flag for the VN at this PHI.
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+ assert(DestVNI);
+ DestVNI->setIsPHIDef(true);
- // Schedule the copies for this block
- ScheduleCopies(MBB, pushed);
-
- // Recur down the dominator tree.
- for (MachineDomTreeNode::iterator I = MDTN->begin(),
- E = MDTN->end(); I != E; ++I)
- if (!visited.count((*I)->getBlock()))
- InsertCopies(*I, visited);
-
- // As we exit this block, pop the names we pushed while processing it
- for (std::set<unsigned>::iterator I = pushed.begin(),
- E = pushed.end(); I != E; ++I)
- Stacks[*I].pop_back();
+ // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+ // instruction. After PHI elimination, PHI instructions are replaced by VNs
+ // with the phi-def flag set, and the live ranges of these VNs start at the
+ // beginning of the basic block.
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ DestVNI->def = MBBStartIndex;
+ DestLI.addRange(LiveRange(MBBStartIndex,
+ PHIIndex.getDefIndex(),
+ DestVNI));
+ return;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+ unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineInstr *CopyInstr = BuildMI(*MBB,
+ MBB->SkipPHIsAndLabels(MBB->begin()),
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DestReg).addReg(CopyReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ PHI->getOperand(0).setReg(CopyReg);
+ ++NumDestCopiesInserted;
+
+ // Add the region from the beginning of MBB to the copy instruction to
+ // CopyReg's live interval, and give the VNInfo the phidef flag.
+ LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+ VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+ CopyInstr,
+ LI->getVNInfoAllocator());
+ CopyVNI->setIsPHIDef(true);
+ CopyLI.addRange(LiveRange(MBBStartIndex,
+ DestCopyIndex.getDefIndex(),
+ CopyVNI));
+
+ // Adjust DestReg's live interval to adjust for its new definition at
+ // CopyInstr.
+ LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+ assert(DestVNI);
+ DestVNI->def = DestCopyIndex.getDefIndex();
+
+ InsertedDestCopies[CopyReg] = CopyInstr;
}
-bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
- unsigned secondary) {
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- LiveInterval& LHS = LI.getOrCreateInterval(primary);
- LiveInterval& RHS = LI.getOrCreateInterval(secondary);
-
- LI.renumber();
-
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+ if (Reg == NewReg)
+ return;
+
+ LiveInterval &OldLI = LI->getInterval(Reg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ // Merge the live ranges of the two registers.
DenseMap<VNInfo*, VNInfo*> VNMap;
- for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- LiveRange R = *I;
-
- SlotIndex Start = R.start;
- SlotIndex End = R.end;
- if (LHS.getLiveRangeContaining(Start))
- return false;
-
- if (LHS.getLiveRangeContaining(End))
- return false;
-
- LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R);
- if (RI != LHS.end() && RI->start < End)
- return false;
- }
-
- for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- LiveRange R = *I;
- VNInfo* OldVN = R.valno;
- VNInfo*& NewVN = VNMap[OldVN];
+ for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+ LRI != LRE; ++LRI) {
+ LiveRange OldLR = *LRI;
+ VNInfo *OldVN = OldLR.valno;
+
+ VNInfo *&NewVN = VNMap[OldVN];
if (!NewVN) {
- NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator());
+ NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+ VNMap[OldVN] = NewVN;
}
-
- LiveRange LR (R.start, R.end, NewVN);
- LHS.addRange(LR);
- }
-
- LI.removeInterval(RHS.reg);
-
- return true;
-}
-bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
-
- // Compute DFS numbers of each block
- computeDFS(Fn);
-
- // Determine which phi node operands need copies
- for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
- if (!I->empty() && I->begin()->isPHI())
- processBlock(I);
-
- // Break interferences where two different phis want to coalesce
- // in the same register.
- std::set<unsigned> seen;
- typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> >
- RenameSetType;
- for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
- I != E; ++I) {
- for (std::map<unsigned, MachineBasicBlock*>::iterator
- OI = I->second.begin(), OE = I->second.end(); OI != OE; ) {
- if (!seen.count(OI->first)) {
- seen.insert(OI->first);
- ++OI;
- } else {
- Waiting[OI->second].insert(std::make_pair(OI->first, I->first));
- unsigned reg = OI->first;
- ++OI;
- I->second.erase(reg);
- DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first
- << "\n");
- }
- }
+ LiveRange LR(OldLR.start, OldLR.end, NewVN);
+ NewLI.addRange(LR);
}
-
- // Insert copies
- // FIXME: This process should probably preserve LiveIntervals
- SmallPtrSet<MachineBasicBlock*, 16> visited;
- MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
- InsertCopies(MDT.getRootNode(), visited);
-
- // Perform renaming
- for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
- I != E; ++I)
- while (I->second.size()) {
- std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
-
- DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
-
- if (SI->first != I->first) {
- if (mergeLiveIntervals(I->first, SI->first)) {
- Fn.getRegInfo().replaceRegWith(SI->first, I->first);
-
- if (RenameSets.count(SI->first)) {
- I->second.insert(RenameSets[SI->first].begin(),
- RenameSets[SI->first].end());
- RenameSets.erase(SI->first);
- }
- } else {
- // Insert a last-minute copy if a conflict was detected.
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
- BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), I->first).addReg(SI->first);
-
- LI.renumber();
-
- LiveInterval& Int = LI.getOrCreateInterval(I->first);
- SlotIndex instrIdx =
- LI.getInstructionIndex(--SI->second->getFirstTerminator());
- if (Int.liveAt(instrIdx.getDefIndex()))
- Int.removeRange(instrIdx.getDefIndex(),
- LI.getMBBEndIdx(SI->second).getNextSlot(), true);
- LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
- --SI->second->getFirstTerminator());
- R.valno->setCopy(--SI->second->getFirstTerminator());
- R.valno->def = instrIdx.getDefIndex();
-
- DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> "
- << I->first << "\n");
- }
- }
-
- LiveInterval& Int = LI.getOrCreateInterval(I->first);
- const LiveRange* LR =
- Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
- LR->valno->setHasPHIKill(true);
-
- I->second.erase(SI->first);
- }
-
- // Remove PHIs
- std::vector<MachineInstr*> phis;
- for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
- for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end();
- BI != BE; ++BI)
- if (BI->isPHI())
- phis.push_back(BI);
- }
-
- for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end();
- I != E; ) {
- MachineInstr* PInstr = *(I++);
-
- // If this is a dead PHI node, then remove it from LiveIntervals.
- unsigned DestReg = PInstr->getOperand(0).getReg();
- LiveInterval& PI = LI.getInterval(DestReg);
- if (PInstr->registerDefIsDead(DestReg)) {
- if (PI.containsOneValue()) {
- LI.removeInterval(DestReg);
- } else {
- SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
- PI.removeRange(*PI.getLiveRangeContaining(idx), true);
- }
- } else {
- // Trim live intervals of input registers. They are no longer live into
- // this block if they died after the PHI. If they lived after it, don't
- // trim them because they might have other legitimate uses.
- for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) {
- unsigned reg = PInstr->getOperand(i).getReg();
-
- MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB();
- LiveInterval& InputI = LI.getInterval(reg);
- if (MBB != PInstr->getParent() &&
- InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
- InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex()))
- InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
- LI.getInstructionIndex(PInstr),
- true);
- }
-
- // If the PHI is not dead, then the valno defined by the PHI
- // now has an unknown def.
- SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
- const LiveRange* PLR = PI.getLiveRangeContaining(idx);
- PLR->valno->setIsPHIDef(true);
- LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
- PLR->start, PLR->valno);
- PI.addRange(R);
- }
-
- LI.RemoveMachineInstrFromMaps(PInstr);
- PInstr->eraseFromParent();
- }
-
- LI.renumber();
-
- return true;
+ // Remove the LiveInterval for the register being renamed and replace all
+ // of its defs and uses with the new register.
+ LI->removeInterval(Reg);
+ MRI->replaceRegWith(Reg, NewReg);
}
diff --git a/src/LLVM/lib/CodeGen/TailDuplication.cpp b/src/LLVM/lib/CodeGen/TailDuplication.cpp
index a815b36..3a6211a 100644
--- a/src/LLVM/lib/CodeGen/TailDuplication.cpp
+++ b/src/LLVM/lib/CodeGen/TailDuplication.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
@@ -34,6 +35,7 @@
STATISTIC(NumTailDups , "Number of tail duplicated blocks");
STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumAddedPHIs , "Number of phis added");
// Heuristic for tail duplication.
static cl::opt<unsigned>
@@ -80,19 +82,36 @@
void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
MachineBasicBlock *PredBB,
DenseMap<unsigned, unsigned> &LocalVRMap,
- SmallVector<std::pair<unsigned,unsigned>, 4> &Copies);
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+ const DenseSet<unsigned> &UsedByPhi,
+ bool Remove);
void DuplicateInstruction(MachineInstr *MI,
MachineBasicBlock *TailBB,
MachineBasicBlock *PredBB,
MachineFunction &MF,
- DenseMap<unsigned, unsigned> &LocalVRMap);
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi);
void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
SmallVector<MachineBasicBlock*, 8> &TDBBs,
SmallSetVector<MachineBasicBlock*, 8> &Succs);
bool TailDuplicateBlocks(MachineFunction &MF);
- bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+ bool shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple, MachineBasicBlock &TailBB);
+ bool isSimpleBB(MachineBasicBlock *TailBB);
+ bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+ bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &RegsUsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies);
+ bool TailDuplicate(MachineBasicBlock *TailBB,
+ bool IsSimple,
+ MachineFunction &MF,
SmallVector<MachineBasicBlock*, 8> &TDBBs,
SmallVector<MachineInstr*, 16> &Copies);
+ bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+ bool IsSimple,
+ MachineFunction &MF);
+
void RemoveDeadBlock(MachineBasicBlock *MBB);
};
@@ -146,11 +165,11 @@
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
if (CheckExtra && !Preds.count(PHIBB)) {
- // This is not a hard error.
dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
<< ": " << *MI;
dbgs() << " extra input from predecessor BB#"
<< PHIBB->getNumber() << '\n';
+ llvm_unreachable(0);
}
if (PHIBB->getNumber() < 0) {
dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
@@ -163,6 +182,109 @@
}
}
+/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+bool
+TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+ bool IsSimple,
+ MachineFunction &MF) {
+ // Save the successors list.
+ SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ SmallVector<MachineBasicBlock*, 8> TDBBs;
+ SmallVector<MachineInstr*, 16> Copies;
+ if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
+ return false;
+
+ ++NumTails;
+
+ SmallVector<MachineInstr*, 8> NewPHIs;
+ MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+ // TailBB's immediate successors are now successors of those predecessors
+ // which duplicated TailBB. Add the predecessors as sources to the PHI
+ // instructions.
+ bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+ if (PreRegAlloc)
+ UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+ // If it is dead, remove it.
+ if (isDead) {
+ NumInstrDups -= MBB->size();
+ RemoveDeadBlock(MBB);
+ ++NumDeadBlocks;
+ }
+
+ // Update SSA form.
+ if (!SSAUpdateVRs.empty()) {
+ for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+ unsigned VReg = SSAUpdateVRs[i];
+ SSAUpdate.Initialize(VReg);
+
+ // If the original definition is still around, add it as an available
+ // value.
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ MachineBasicBlock *DefBB = 0;
+ if (DefMI) {
+ DefBB = DefMI->getParent();
+ SSAUpdate.AddAvailableValue(DefBB, VReg);
+ }
+
+ // Add the new vregs as available values.
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(VReg);
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+ }
+
+ // Rewrite uses that are outside of the original def's block.
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+ while (UI != MRI->use_end()) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI->isDebugValue()) {
+ // SSAUpdate can replace the use with an undef. That creates
+ // a debug instruction that is a kill.
+ // FIXME: Should it SSAUpdate job to delete debug instructions
+ // instead of replacing the use with undef?
+ UseMI->eraseFromParent();
+ continue;
+ }
+ if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+ continue;
+ SSAUpdate.RewriteUse(UseMO);
+ }
+ }
+
+ SSAUpdateVRs.clear();
+ SSAUpdateVals.clear();
+ }
+
+ // Eliminate some of the copies inserted by tail duplication to maintain
+ // SSA form.
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ MachineInstr *Copy = Copies[i];
+ if (!Copy->isCopy())
+ continue;
+ unsigned Dst = Copy->getOperand(0).getReg();
+ unsigned Src = Copy->getOperand(1).getReg();
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+ if (++UI == MRI->use_end()) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
+ }
+ }
+
+ if (NewPHIs.size())
+ NumAddedPHIs += NewPHIs.size();
+
+ return true;
+}
+
/// TailDuplicateBlocks - Look for small blocks that are unconditionally
/// branched to and do not fall through. Tail-duplicate their instructions
/// into their predecessors to eliminate (dynamic) branches.
@@ -174,104 +296,23 @@
VerifyPHIs(MF, true);
}
- SmallVector<MachineInstr*, 8> NewPHIs;
- MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
-
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
MachineBasicBlock *MBB = I++;
if (NumTails == TailDupLimit)
break;
- // Only duplicate blocks that end with unconditional branches.
- if (MBB->canFallThrough())
+ bool IsSimple = isSimpleBB(MBB);
+
+ if (!shouldTailDuplicate(MF, IsSimple, *MBB))
continue;
- // Save the successors list.
- SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
- MBB->succ_end());
-
- SmallVector<MachineBasicBlock*, 8> TDBBs;
- SmallVector<MachineInstr*, 16> Copies;
- if (TailDuplicate(MBB, MF, TDBBs, Copies)) {
- ++NumTails;
-
- // TailBB's immediate successors are now successors of those predecessors
- // which duplicated TailBB. Add the predecessors as sources to the PHI
- // instructions.
- bool isDead = MBB->pred_empty();
- if (PreRegAlloc)
- UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
-
- // If it is dead, remove it.
- if (isDead) {
- NumInstrDups -= MBB->size();
- RemoveDeadBlock(MBB);
- ++NumDeadBlocks;
- }
-
- // Update SSA form.
- if (!SSAUpdateVRs.empty()) {
- for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
- unsigned VReg = SSAUpdateVRs[i];
- SSAUpdate.Initialize(VReg);
-
- // If the original definition is still around, add it as an available
- // value.
- MachineInstr *DefMI = MRI->getVRegDef(VReg);
- MachineBasicBlock *DefBB = 0;
- if (DefMI) {
- DefBB = DefMI->getParent();
- SSAUpdate.AddAvailableValue(DefBB, VReg);
- }
-
- // Add the new vregs as available values.
- DenseMap<unsigned, AvailableValsTy>::iterator LI =
- SSAUpdateVals.find(VReg);
- for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = LI->second[j].first;
- unsigned SrcReg = LI->second[j].second;
- SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
- }
-
- // Rewrite uses that are outside of the original def's block.
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
- while (UI != MRI->use_end()) {
- MachineOperand &UseMO = UI.getOperand();
- MachineInstr *UseMI = &*UI;
- ++UI;
- if (UseMI->getParent() == DefBB)
- continue;
- SSAUpdate.RewriteUse(UseMO);
- }
- }
-
- SSAUpdateVRs.clear();
- SSAUpdateVals.clear();
- }
-
- // Eliminate some of the copies inserted by tail duplication to maintain
- // SSA form.
- for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
- MachineInstr *Copy = Copies[i];
- if (!Copy->isCopy())
- continue;
- unsigned Dst = Copy->getOperand(0).getReg();
- unsigned Src = Copy->getOperand(1).getReg();
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
- if (++UI == MRI->use_end()) {
- // Copy is the only use. Do trivial copy propagation here.
- MRI->replaceRegWith(Dst, Src);
- Copy->eraseFromParent();
- }
- }
-
- if (PreRegAlloc && TailDupVerify)
- VerifyPHIs(MF, false);
- MadeChange = true;
- }
+ MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
}
+ if (PreRegAlloc && TailDupVerify)
+ VerifyPHIs(MF, false);
+
return MadeChange;
}
@@ -280,6 +321,8 @@
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
UE = MRI->use_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
+ if (UseMI->isDebugValue())
+ continue;
if (UseMI->getParent() != BB)
return true;
}
@@ -293,6 +336,24 @@
return 0;
}
+
+// Remember which registers are used by phis in this block. This is
+// used to determine which registers are liveout while modifying the
+// block (which is why we need to copy the information).
+static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
+ DenseSet<unsigned> *UsedByPhi) {
+ for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
+ I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ UsedByPhi->insert(SrcReg);
+ }
+ }
+}
+
/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
/// SSA update.
void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
@@ -315,7 +376,9 @@
MachineBasicBlock *TailBB,
MachineBasicBlock *PredBB,
DenseMap<unsigned, unsigned> &LocalVRMap,
- SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) {
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
+ const DenseSet<unsigned> &RegsUsedByPhi,
+ bool Remove) {
unsigned DefReg = MI->getOperand(0).getReg();
unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
assert(SrcOpIdx && "Unable to find matching PHI source?");
@@ -327,9 +390,12 @@
// available value liveout of the block.
unsigned NewDef = MRI->createVirtualRegister(RC);
Copies.push_back(std::make_pair(NewDef, SrcReg));
- if (isDefLiveOut(DefReg, TailBB, MRI))
+ if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
AddSSAUpdateEntry(DefReg, NewDef, PredBB);
+ if (!Remove)
+ return;
+
// Remove PredBB from the PHI node.
MI->RemoveOperand(SrcOpIdx+1);
MI->RemoveOperand(SrcOpIdx);
@@ -343,21 +409,22 @@
MachineBasicBlock *TailBB,
MachineBasicBlock *PredBB,
MachineFunction &MF,
- DenseMap<unsigned, unsigned> &LocalVRMap) {
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi) {
MachineInstr *NewMI = TII->duplicate(MI, MF);
for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = NewMI->getOperand(i);
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (MO.isDef()) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
unsigned NewReg = MRI->createVirtualRegister(RC);
MO.setReg(NewReg);
LocalVRMap.insert(std::make_pair(Reg, NewReg));
- if (isDefLiveOut(Reg, TailBB, MRI))
+ if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
AddSSAUpdateEntry(Reg, NewReg, PredBB);
} else {
DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
@@ -416,6 +483,13 @@
// This register is defined in the tail block.
for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
MachineBasicBlock *SrcBB = LI->second[j].first;
+ // If we didn't duplicate a bb into a particular predecessor, we
+ // might still have added an entry to SSAUpdateVals to correcly
+ // recompute SSA. If that case, avoid adding a dummy extra argument
+ // this PHI.
+ if (!SrcBB->isSuccessor(SuccBB))
+ continue;
+
unsigned SrcReg = LI->second[j].second;
if (Idx != 0) {
II->getOperand(Idx).setReg(SrcReg);
@@ -448,65 +522,225 @@
}
}
-/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
-/// of its predecessors.
+/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
bool
-TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
- SmallVector<MachineBasicBlock*, 8> &TDBBs,
- SmallVector<MachineInstr*, 16> &Copies) {
- // Set the limit on the number of instructions to duplicate, with a default
- // of one less than the tail-merge threshold. When optimizing for size,
+TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple,
+ MachineBasicBlock &TailBB) {
+ // Only duplicate blocks that end with unconditional branches.
+ if (TailBB.canFallThrough())
+ return false;
+
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB.isSuccessor(&TailBB))
+ return false;
+
+ // Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
unsigned MaxDuplicateCount;
- if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
MaxDuplicateCount = 1;
else
MaxDuplicateCount = TailDuplicateSize;
- if (PreRegAlloc) {
- // Pre-regalloc tail duplication hurts compile time and doesn't help
- // much except for indirect branches.
- if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
- return false;
- // If the target has hardware branch prediction that can handle indirect
- // branches, duplicating them can often make them predictable when there
- // are common paths through the code. The limit needs to be high enough
- // to allow undoing the effects of tail merging and other optimizations
- // that rearrange the predecessors of the indirect branch.
- MaxDuplicateCount = 20;
- }
+ // If the target has hardware branch prediction that can handle indirect
+ // branches, duplicating them can often make them predictable when there
+ // are common paths through the code. The limit needs to be high enough
+ // to allow undoing the effects of tail merging and other optimizations
+ // that rearrange the predecessors of the indirect branch.
- // Don't try to tail-duplicate single-block loops.
- if (TailBB->isSuccessor(TailBB))
- return false;
+ bool HasIndirectbr = false;
+ if (!TailBB.empty())
+ HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+
+ if (HasIndirectbr && PreRegAlloc)
+ MaxDuplicateCount = 20;
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
- bool HasCall = false;
- for (MachineBasicBlock::iterator I = TailBB->begin();
- I != TailBB->end(); ++I) {
+ for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
+ ++I) {
// Non-duplicable things shouldn't be tail-duplicated.
- if (I->getDesc().isNotDuplicable()) return false;
+ if (I->getDesc().isNotDuplicable())
+ return false;
+
// Do not duplicate 'return' instructions if this is a pre-regalloc run.
// A return may expand into a lot more instructions (e.g. reload of callee
// saved registers) after PEI.
- if (PreRegAlloc && I->getDesc().isReturn()) return false;
- // Don't duplicate more than the threshold.
- if (InstrCount == MaxDuplicateCount) return false;
- // Remember if we saw a call.
- if (I->getDesc().isCall()) HasCall = true;
+ if (PreRegAlloc && I->getDesc().isReturn())
+ return false;
+
+ // Avoid duplicating calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (PreRegAlloc && I->getDesc().isCall())
+ return false;
+
if (!I->isPHI() && !I->isDebugValue())
InstrCount += 1;
- }
- // Heuristically, don't tail-duplicate calls if it would expand code size,
- // as it's less likely to be worth the extra cost.
- if (InstrCount > 1 && HasCall)
- return false;
+ if (InstrCount > MaxDuplicateCount)
+ return false;
+ }
+
+ if (HasIndirectbr && PreRegAlloc)
+ return true;
+
+ if (IsSimple)
+ return true;
+
+ if (!PreRegAlloc)
+ return true;
+
+ return canCompletelyDuplicateBB(TailBB);
+}
+
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+ if (TailBB->succ_size() != 1)
+ return false;
+ if (TailBB->pred_empty())
+ return false;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ MachineBasicBlock::iterator E = TailBB->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I == E)
+ return true;
+ return I->getDesc().isUnconditionalBranch();
+}
+
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+ SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+ for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+ SE = A.succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *BB = *SI;
+ if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+ return true;
+ }
+
+ return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+ for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+ PE = BB.pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ return false;
+
+ if (!PredCond.empty())
+ return false;
+ }
+ return true;
+}
+
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ const DenseSet<unsigned> &UsedByPhi,
+ SmallVector<MachineInstr*, 16> &Copies) {
+ SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
+ TailBB->succ_end());
+ SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ bool Changed = false;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->getLandingPadSuccessor())
+ continue;
+
+ if (bothUsedInPHI(*PredBB, Succs))
+ continue;
+
+ MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+
+ Changed = true;
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From simple Succ: " << *TailBB);
+
+ MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+ // Make PredFBB explicit.
+ if (PredCond.empty())
+ PredFBB = PredTBB;
+
+ // Make fall through explicit.
+ if (!PredTBB)
+ PredTBB = NextBB;
+ if (!PredFBB)
+ PredFBB = NextBB;
+
+ // Redirect
+ if (PredFBB == TailBB)
+ PredFBB = NewTarget;
+ if (PredTBB == TailBB)
+ PredTBB = NewTarget;
+
+ // Make the branch unconditional if possible
+ if (PredTBB == PredFBB) {
+ PredCond.clear();
+ PredFBB = NULL;
+ }
+
+ // Avoid adding fall through branches.
+ if (PredFBB == NextBB)
+ PredFBB = NULL;
+ if (PredTBB == NextBB && PredFBB == NULL)
+ PredTBB = NULL;
+
+ TII->RemoveBranch(*PredBB);
+
+ if (PredTBB)
+ TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+ PredBB->removeSuccessor(TailBB);
+ unsigned NumSuccessors = PredBB->succ_size();
+ assert(NumSuccessors <= 1);
+ if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+ PredBB->addSuccessor(NewTarget);
+
+ TDBBs.push_back(PredBB);
+ }
+ return Changed;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
+ bool IsSimple,
+ MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies) {
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+ DenseSet<unsigned> UsedByPhi;
+ getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+ if (IsSimple)
+ return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
// Iterate through all the unique predecessors and tail-duplicate this
// block into them, if possible. Copying the list ahead of time also
// avoids trouble with the predecessor list reallocating.
@@ -519,7 +753,9 @@
assert(TailBB != PredBB &&
"Single-block loop should have been rejected earlier!");
- if (PredBB->succ_size() > 1) continue;
+ // EH edges are ignored by AnalyzeBranch.
+ if (PredBB->succ_size() > 1)
+ continue;
MachineBasicBlock *PredTBB, *PredFBB;
SmallVector<MachineOperand, 4> PredCond;
@@ -527,9 +763,6 @@
continue;
if (!PredCond.empty())
continue;
- // EH edges are ignored by AnalyzeBranch.
- if (PredBB->succ_size() != 1)
- continue;
// Don't duplicate into a fall-through predecessor (at least for now).
if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
continue;
@@ -552,11 +785,11 @@
if (MI->isPHI()) {
// Replace the uses of the def of the PHI with the register coming
// from PredBB.
- ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos);
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
} else {
// Replace def of virtual registers with new registers, and update
// uses with PHI source register or the new registers.
- DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap);
+ DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
}
}
MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
@@ -565,6 +798,10 @@
TII->get(TargetOpcode::COPY),
CopyInfos[i].first).addReg(CopyInfos[i].second));
}
+
+ // Simplify
+ TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
// Update the CFG.
@@ -585,12 +822,11 @@
MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
SmallVector<MachineOperand, 4> PriorCond;
- bool PriorUnAnalyzable =
- TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true);
// This has to check PrevBB->succ_size() because EH edges are ignored by
// AnalyzeBranch.
- if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
- TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 &&
+ if (PrevBB->succ_size() == 1 &&
+ !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
+ PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
!TailBB->hasAddressTaken()) {
DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
<< "From MBB: " << *TailBB);
@@ -603,7 +839,7 @@
// Replace the uses of the def of the PHI with the register coming
// from PredBB.
MachineInstr *MI = &*I++;
- ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos);
+ ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
if (MI->getParent())
MI->eraseFromParent();
}
@@ -613,7 +849,7 @@
// Replace def of virtual registers with new registers, and update
// uses with PHI source register or the new registers.
MachineInstr *MI = &*I++;
- DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap);
+ DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
MI->eraseFromParent();
}
MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
@@ -634,6 +870,57 @@
Changed = true;
}
+ // If this is after register allocation, there are no phis to fix.
+ if (!PreRegAlloc)
+ return Changed;
+
+ // If we made no changes so far, we are safe.
+ if (!Changed)
+ return Changed;
+
+
+ // Handle the nasty case in that we duplicated a block that is part of a loop
+ // into some but not all of its predecessors. For example:
+ // 1 -> 2 <-> 3 |
+ // \ |
+ // \---> rest |
+ // if we duplicate 2 into 1 but not into 3, we end up with
+ // 12 -> 3 <-> 2 -> rest |
+ // \ / |
+ // \----->-----/ |
+ // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
+ // with a phi in 3 (which now dominates 2).
+ // What we do here is introduce a copy in 3 of the register defined by the
+ // phi, just like when we are duplicating 2 into 3, but we don't copy any
+ // real instructions or remove the 3 -> 2 edge from the phi in 2.
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+ continue;
+
+ // EH edges
+ if (PredBB->succ_size() != 1)
+ continue;
+
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+ }
+ MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first).addReg(CopyInfos[i].second));
+ }
+ }
+
return Changed;
}
@@ -650,4 +937,3 @@
// Remove the block.
MBB->eraseFromParent();
}
-
diff --git a/src/LLVM/lib/CodeGen/TargetInstrInfoImpl.cpp b/src/LLVM/lib/CodeGen/TargetInstrInfoImpl.cpp
index 6e4a0d8..f32678f 100644
--- a/src/LLVM/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/src/LLVM/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -22,13 +22,18 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool> DisableHazardRecognizer(
+ "disable-sched-hazard", cl::Hidden, cl::init(false),
+ cl::desc("Disable hazard detection during preRA scheduling"));
+
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
/// after it, replacing it with an unconditional branch to NewDest.
void
@@ -54,8 +59,8 @@
// the two operands returned by findCommutedOpIndices.
MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
- const TargetInstrDesc &TID = MI->getDesc();
- bool HasDef = TID.getNumDefs();
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool HasDef = MCID.getNumDefs();
if (HasDef && !MI->getOperand(0).isReg())
// No idea how to commute this instruction. Target should implement its own.
return 0;
@@ -69,23 +74,25 @@
assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
"This only knows how to commute register operands so far");
+ unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
unsigned Reg1 = MI->getOperand(Idx1).getReg();
unsigned Reg2 = MI->getOperand(Idx2).getReg();
bool Reg1IsKill = MI->getOperand(Idx1).isKill();
bool Reg2IsKill = MI->getOperand(Idx2).isKill();
- bool ChangeReg0 = false;
- if (HasDef && MI->getOperand(0).getReg() == Reg1) {
- // Must be two address instruction!
- assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
- "Expecting a two-address instruction!");
+ // If destination is tied to either of the commuted source register, then
+ // it must be updated.
+ if (HasDef && Reg0 == Reg1 &&
+ MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
Reg2IsKill = false;
- ChangeReg0 = true;
+ Reg0 = Reg2;
+ } else if (HasDef && Reg0 == Reg2 &&
+ MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+ Reg1IsKill = false;
+ Reg0 = Reg1;
}
if (NewMI) {
// Create a new instruction.
- unsigned Reg0 = HasDef
- ? (ChangeReg0 ? Reg2 : MI->getOperand(0).getReg()) : 0;
bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false;
MachineFunction &MF = *MI->getParent()->getParent();
if (HasDef)
@@ -99,8 +106,8 @@
.addReg(Reg1, getKillRegState(Reg2IsKill));
}
- if (ChangeReg0)
- MI->getOperand(0).setReg(Reg2);
+ if (HasDef)
+ MI->getOperand(0).setReg(Reg0);
MI->getOperand(Idx2).setReg(Reg1);
MI->getOperand(Idx1).setReg(Reg2);
MI->getOperand(Idx2).setIsKill(Reg1IsKill);
@@ -114,12 +121,12 @@
bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
- const TargetInstrDesc &TID = MI->getDesc();
- if (!TID.isCommutable())
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isCommutable())
return false;
// This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
// is not true, then the target must implement this.
- SrcOpIdx1 = TID.getNumDefs();
+ SrcOpIdx1 = MCID.getNumDefs();
SrcOpIdx2 = SrcOpIdx1 + 1;
if (!MI->getOperand(SrcOpIdx1).isReg() ||
!MI->getOperand(SrcOpIdx2).isReg())
@@ -132,12 +139,12 @@
bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
bool MadeChange = false;
- const TargetInstrDesc &TID = MI->getDesc();
- if (!TID.isPredicable())
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isPredicable())
return false;
-
+
for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
- if (TID.OpInfo[i].isPredicate()) {
+ if (MCID.OpInfo[i].isPredicate()) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
MO.setReg(Pred[j].getReg());
@@ -155,6 +162,42 @@
return MadeChange;
}
+bool TargetInstrInfoImpl::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isLoad() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool TargetInstrInfoImpl::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isStore() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg,
@@ -166,8 +209,10 @@
MBB.insert(I, MI);
}
-bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1) const {
+bool
+TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
@@ -205,8 +250,7 @@
if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
return RC->contains(LiveOp.getReg()) ? RC : 0;
- const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg);
- if (RC == LiveRC || RC->hasSubClass(LiveRC))
+ if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
return RC;
// FIXME: Allow folding when register classes are memory compatible.
@@ -252,9 +296,9 @@
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- Flags, /*Offset=*/0,
- MFI.getObjectSize(FI),
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ Flags, MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
NewMI->addMemOperand(MF, MMO);
@@ -318,6 +362,19 @@
const TargetInstrInfo &TII = *TM.getInstrInfo();
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ // Remat clients assume operand 0 is the defined register.
+ if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
+ return false;
+ unsigned DefReg = MI->getOperand(0).getReg();
+
+ // A sub-register definition can only be rematerialized if the instruction
+ // doesn't read the other parts of the register. Otherwise it is really a
+ // read-modify-write operation on the full virtual register which cannot be
+ // moved safely.
+ if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
+ MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg))
+ return false;
+
// A load from a fixed stack slot can be rematerialized. This may be
// redundant with subsequent checks, but it's target-independent,
// simple, and a common case.
@@ -326,15 +383,20 @@
MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
return true;
- const TargetInstrDesc &TID = MI->getDesc();
+ const MCInstrDesc &MCID = MI->getDesc();
// Avoid instructions obviously unsafe for remat.
- if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() ||
- TID.mayStore())
+ if (MCID.isNotDuplicable() || MCID.mayStore() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ // Don't remat inline asm. We have no idea how expensive it is
+ // even if it's side effect free.
+ if (MI->isInlineAsm())
return false;
// Avoid instructions which load from potentially varying memory.
- if (TID.mayLoad() && !MI->isInvariantLoad(AA))
+ if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
return false;
// If any of the registers accessed are non-constant, conservatively assume
@@ -372,13 +434,9 @@
continue;
}
- // Only allow one virtual-register def, and that in the first operand.
- if (MO.isDef() != (i == 0))
- return false;
-
- // For the def, it should be the only def of that register.
- if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() ||
- MRI.isLiveIn(Reg)))
+ // Only allow one virtual-register def. There may be multiple defs of the
+ // same virtual register, though.
+ if (MO.isDef() && Reg != DefReg)
return false;
// Don't allow any virtual-register uses. Rematting an instruction with
@@ -414,8 +472,24 @@
return false;
}
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+ return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ // Dummy hazard recognizer allows all instructions to issue.
+ return new ScheduleHazardRecognizer();
+}
+
// Default implementation of CreateTargetPostRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfoImpl::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
- return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II);
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
diff --git a/src/LLVM/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/src/LLVM/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 0000000..3848f4d
--- /dev/null
+++ b/src/LLVM/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,629 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF
+//===----------------------------------------------------------------------===//
+
+MCSymbol *
+TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
+ Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ unsigned Encoding = getPersonalityEncoding();
+ switch (Encoding & 0x70) {
+ default:
+ report_fatal_error("We do not support this DWARF encoding yet!");
+ case dwarf::DW_EH_PE_absptr:
+ return Mang->getSymbol(GV);
+ break;
+ case dwarf::DW_EH_PE_pcrel: {
+ return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+ Mang->getSymbol(GV)->getName());
+ break;
+ }
+ }
+}
+
+void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
+ const TargetMachine &TM,
+ const MCSymbol *Sym) const {
+ SmallString<64> NameData("DW.ref.");
+ NameData += Sym->getName();
+ MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
+ Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
+ Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
+ StringRef Prefix = ".data.";
+ NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
+ const MCSection *Sec = getContext().getELFSection(NameData,
+ ELF::SHT_PROGBITS,
+ Flags,
+ SectionKind::getDataRel(),
+ 0, Label->getName());
+ Streamer.SwitchSection(Sec);
+ Streamer.EmitValueToAlignment(8);
+ Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+ const MCExpr *E = MCConstantExpr::Create(8, getContext());
+ Streamer.EmitELFSize(Label, E);
+ Streamer.EmitLabel(Label);
+
+ unsigned Size = TM.getTargetData()->getPointerSize();
+ Streamer.EmitSymbolValue(Sym, Size);
+}
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+ // N.B.: The defaults used in here are no the same ones used in MC.
+ // We follow gcc, MC follows gas. For example, given ".section .eh_frame",
+ // both gas and MC will produce a section with no flags. Given
+ // section(".eh_frame") gcc will produce
+ // .section .eh_frame,"a",@progbits
+ if (Name.empty() || Name[0] != '.') return K;
+
+ // Some lame default implementation based on some magic section names.
+ if (Name == ".bss" ||
+ Name.startswith(".bss.") ||
+ Name.startswith(".gnu.linkonce.b.") ||
+ Name.startswith(".llvm.linkonce.b.") ||
+ Name == ".sbss" ||
+ Name.startswith(".sbss.") ||
+ Name.startswith(".gnu.linkonce.sb.") ||
+ Name.startswith(".llvm.linkonce.sb."))
+ return SectionKind::getBSS();
+
+ if (Name == ".tdata" ||
+ Name.startswith(".tdata.") ||
+ Name.startswith(".gnu.linkonce.td.") ||
+ Name.startswith(".llvm.linkonce.td."))
+ return SectionKind::getThreadData();
+
+ if (Name == ".tbss" ||
+ Name.startswith(".tbss.") ||
+ Name.startswith(".gnu.linkonce.tb.") ||
+ Name.startswith(".llvm.linkonce.tb."))
+ return SectionKind::getThreadBSS();
+
+ return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+ if (Name == ".init_array")
+ return ELF::SHT_INIT_ARRAY;
+
+ if (Name == ".fini_array")
+ return ELF::SHT_FINI_ARRAY;
+
+ if (Name == ".preinit_array")
+ return ELF::SHT_PREINIT_ARRAY;
+
+ if (K.isBSS() || K.isThreadBSS())
+ return ELF::SHT_NOBITS;
+
+ return ELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (!K.isMetadata())
+ Flags |= ELF::SHF_ALLOC;
+
+ if (K.isText())
+ Flags |= ELF::SHF_EXECINSTR;
+
+ if (K.isWriteable())
+ Flags |= ELF::SHF_WRITE;
+
+ if (K.isThreadLocal())
+ Flags |= ELF::SHF_TLS;
+
+ // K.isMergeableConst() is left out to honour PR4650
+ if (K.isMergeableCString() || K.isMergeableConst4() ||
+ K.isMergeableConst8() || K.isMergeableConst16())
+ Flags |= ELF::SHF_MERGE;
+
+ if (K.isMergeableCString())
+ Flags |= ELF::SHF_STRINGS;
+
+ return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ StringRef SectionName = GV->getSection();
+
+ // Infer section flags from the section name if we can.
+ Kind = getELFKindForNamedSection(SectionName, Kind);
+
+ return getContext().getELFSection(SectionName,
+ getELFSectionType(SectionName, Kind),
+ getELFSectionFlags(Kind), Kind);
+}
+
+/// getSectionPrefixForGlobal - Return the section prefix name used by options
+/// FunctionsSections and DataSections.
+static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+ if (Kind.isText()) return ".text.";
+ if (Kind.isReadOnly()) return ".rodata.";
+
+ if (Kind.isThreadData()) return ".tdata.";
+ if (Kind.isThreadBSS()) return ".tbss.";
+
+ if (Kind.isDataNoRel()) return ".data.";
+ if (Kind.isDataRelLocal()) return ".data.rel.local.";
+ if (Kind.isDataRel()) return ".data.rel.";
+ if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return ".data.rel.ro.";
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniquedSection;
+ if (Kind.isText())
+ EmitUniquedSection = TM.getFunctionSections();
+ else
+ EmitUniquedSection = TM.getDataSections();
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+ !Kind.isCommon() && !Kind.isBSS()) {
+ const char *Prefix;
+ Prefix = getSectionPrefixForGlobal(Kind);
+
+ SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append(Sym->getName().begin(), Sym->getName().end());
+ StringRef Group = "";
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (GV->isWeakForLinker()) {
+ Group = Sym->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
+ return getContext().getELFSection(Name.str(),
+ getELFSectionType(Name.str(), Kind),
+ Flags, Kind, 0, Group);
+ }
+
+ if (Kind.isText()) return TextSection;
+
+ if (Kind.isMergeable1ByteCString() ||
+ Kind.isMergeable2ByteCString() ||
+ Kind.isMergeable4ByteCString()) {
+
+ // We also need alignment here.
+ // FIXME: this is getting the alignment of the character, not the
+ // alignment of the global!
+ unsigned Align =
+ TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+ const char *SizeSpec = ".rodata.str1.";
+ if (Kind.isMergeable2ByteCString())
+ SizeSpec = ".rodata.str2.";
+ else if (Kind.isMergeable4ByteCString())
+ SizeSpec = ".rodata.str4.";
+ else
+ assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+ std::string Name = SizeSpec + utostr(Align);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_MERGE |
+ ELF::SHF_STRINGS,
+ Kind);
+ }
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ return ReadOnlySection; // .const
+ }
+
+ if (Kind.isReadOnly()) return ReadOnlySection;
+
+ if (Kind.isThreadData()) return TLSDataSection;
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+
+ // Note: we claim that common symbols are put in BSSSection, but they are
+ // really emitted with the magic .comm directive, which creates a symbol table
+ // entry but not a section.
+ if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+
+ if (Kind.isDataNoRel()) return DataSection;
+ if (Kind.isDataRelLocal()) return DataRelLocalSection;
+ if (Kind.isDataRel()) return DataRelSection;
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+const MCExpr *TargetLoweringObjectFileELF::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI,
+ unsigned Encoding, MCStreamer &Streamer) const {
+
+ if (Encoding & dwarf::DW_EH_PE_indirect) {
+ MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += ".DW.stub";
+
+ // Add information about the stub reference to ELFMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+//===----------------------------------------------------------------------===//
+// MachO
+//===----------------------------------------------------------------------===//
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // Parse the section specifier and create it if valid.
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty()) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getNameStr() +
+ "' has an invalid section specifier '" + GV->getSection()+
+ "': " + ErrorCode + ".");
+ // Fall back to dropping it into the data section.
+ return DataSection;
+ }
+
+ // Get the section.
+ const MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+ // If TAA wasn't set by ParseSectionSpecifier() above,
+ // use the value returned by getMachOSection() as a default.
+ if (!TAAParsed)
+ TAA = S->getTypeAndAttributes();
+
+ // Okay, now that we got the section, verify that the TAA & StubSize agree.
+ // If the user declared multiple globals with different section flags, we need
+ // to reject it here.
+ if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getNameStr() +
+ "' section type or attributes does not match previous"
+ " section specifier");
+ }
+
+ return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
+
+ if (Kind.isText())
+ return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+ // If this is weak/linkonce, put this in a coalescable section, either in text
+ // or data depending on if it is writable.
+ if (GV->isWeakForLinker()) {
+ if (Kind.isReadOnly())
+ return ConstTextCoalSection;
+ return DataCoalSection;
+ }
+
+ // FIXME: Alignment check should be handled by section classifier.
+ if (Kind.isMergeable1ByteCString() &&
+ TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return CStringSection;
+
+ // Do not put 16-bit arrays in the UString section if they have an
+ // externally visible label, this runs into issues with certain linker
+ // versions.
+ if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
+ TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return UStringSection;
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ }
+
+ // Otherwise, if it is readonly, but not something we can specially optimize,
+ // just drop it in .const.
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ // If this is marked const, put it into a const section. But if the dynamic
+ // linker needs to write to it, put it in the data segment.
+ if (Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ // Put zero initialized globals with strong external linkage in the
+ // DATA, __common section with the .zerofill directive.
+ if (Kind.isBSSExtern())
+ return DataCommonSection;
+
+ // Put zero initialized globals with local linkage in __DATA,__bss directive
+ // with the .zerofill directive (aka .lcomm).
+ if (Kind.isBSSLocal())
+ return DataBSSSection;
+
+ // Otherwise, just drop the variable in the normal data section.
+ return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+ // If this constant requires a relocation, we have to put it in the data
+ // segment, not in the text segment.
+ if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ return ReadOnlySection; // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+ /// On Darwin, internally linked data beginning with "L" or "l" does not have
+ /// the directive emitted (this occurs in ObjC metadata).
+ if (!GV) return false;
+
+ // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+ if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+ // FIXME: ObjC metadata is currently emitted as internal symbols that have
+ // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and
+ // this horrible hack can go away.
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
+ return false;
+ }
+
+ return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+
+ if (Encoding & DW_EH_PE_indirect) {
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
+ MachOMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+MCSymbol *TargetLoweringObjectFileMachO::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
+ MachOMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return SSym;
+}
+
+//===----------------------------------------------------------------------===//
+// COFF
+//===----------------------------------------------------------------------===//
+
+static unsigned
+getCOFFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (K.isMetadata())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isText())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_CNT_CODE;
+ else if (K.isBSS ())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isReadOnly())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ;
+ else if (K.isWriteable())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ return Flags;
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ return getContext().getCOFFSection(GV->getSection(),
+ getCOFFSectionFlags(Kind),
+ Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+ if (Kind.isText())
+ return ".text$";
+ if (Kind.isBSS ())
+ return ".bss$";
+ if (Kind.isWriteable())
+ return ".data$";
+ return ".rdata$";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if (GV->isWeakForLinker()) {
+ const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+ SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+ return getContext().getCOFFSection(Name.str(), Characteristics,
+ COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
+ }
+
+ if (Kind.isText())
+ return getTextSection();
+
+ return getDataSection();
+}
+
diff --git a/src/LLVM/lib/CodeGen/TwoAddressInstructionPass.cpp b/src/LLVM/lib/CodeGen/TwoAddressInstructionPass.cpp
index c5d025b..d879378 100644
--- a/src/LLVM/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/src/LLVM/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -105,12 +105,12 @@
MachineFunction::iterator &mbbi,
unsigned RegB, unsigned RegC, unsigned Dist);
- bool isProfitableToConv3Addr(unsigned RegA);
+ bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
- unsigned RegB, unsigned Dist);
+ unsigned RegA, unsigned RegB, unsigned Dist);
typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
@@ -124,7 +124,11 @@
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
unsigned SrcIdx, unsigned DstIdx,
- unsigned Dist);
+ unsigned Dist,
+ SmallPtrSet<MachineInstr*, 8> &Processed);
+
+ void ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed);
void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &Processed);
@@ -138,7 +142,9 @@
public:
static char ID; // Pass identification, replacement for typeid
- TwoAddressInstructionPass() : MachineFunctionPass(ID) {}
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -146,10 +152,7 @@
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
- if (StrongPHIElim)
- AU.addPreservedID(StrongPHIEliminationID);
- else
- AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(PHIEliminationID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -159,8 +162,11 @@
}
char TwoAddressInstructionPass::ID = 0;
-static RegisterPass<TwoAddressInstructionPass>
-X("twoaddressinstruction", "Two-Address instruction pass");
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
@@ -171,6 +177,10 @@
bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
MachineInstr *MI, unsigned SavedReg,
MachineBasicBlock::iterator OldPos) {
+ // FIXME: Shouldn't we be trying to do this before we three-addressify the
+ // instruction? After this transformation is done, we no longer need
+ // the instruction to be in three-address form.
+
// Check if it's safe to move this instruction.
bool SeenStore = true; // Be conservative.
if (!MI->isSafeToMove(TII, AA, SeenStore))
@@ -211,7 +221,11 @@
break;
}
- if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
+ // If we find the instruction that kills SavedReg, and it is in an
+ // appropriate location, we can try to sink the current instruction
+ // past it.
+ if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
+ KillMI->getDesc().isTerminator())
return false;
// If any of the definitions are used by another instruction between the
@@ -274,8 +288,8 @@
/// isTwoAddrUse - Return true if the specified MI is using the specified
/// register as a two-address operand.
static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
- const TargetInstrDesc &TID = UseMI->getDesc();
- for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+ const MCInstrDesc &MCID = UseMI->getDesc();
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
MachineOperand &MO = UseMI->getOperand(i);
if (MO.isReg() && MO.getReg() == Reg &&
(MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
@@ -437,8 +451,9 @@
/// isTwoAddrUse - Return true if the specified MI uses the specified register
/// as a two-address use. If so, return the destination register by reference.
static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
- const TargetInstrDesc &TID = MI.getDesc();
- unsigned NumOps = TID.getNumOperands();
+ const MCInstrDesc &MCID = MI.getDesc();
+ unsigned NumOps = MI.isInlineAsm()
+ ? MI.getNumOperands() : MCID.getNumOperands();
for (unsigned i = 0; i != NumOps; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
@@ -548,8 +563,9 @@
unsigned FromRegC = getMappedReg(regC, SrcRegMap);
unsigned ToRegB = getMappedReg(regB, DstRegMap);
unsigned ToRegC = getMappedReg(regC, DstRegMap);
- if (!regsAreCompatible(FromRegB, ToRegB, TRI) &&
- (regsAreCompatible(FromRegB, ToRegC, TRI) ||
+ if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
+ ((!FromRegC && !ToRegC) ||
+ regsAreCompatible(FromRegB, ToRegC, TRI) ||
regsAreCompatible(FromRegC, ToRegB, TRI)))
return true;
@@ -612,16 +628,18 @@
/// isProfitableToConv3Addr - Return true if it is profitable to convert the
/// given 2-address instruction to a 3-address one.
bool
-TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) {
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
// Look for situations like this:
// %reg1024<def> = MOV r1
// %reg1025<def> = MOV r0
// %reg1026<def> = ADD %reg1024, %reg1025
// r2 = MOV %reg1026
// Turn ADD into a 3-address instruction to avoid a copy.
- unsigned FromRegA = getMappedReg(RegA, SrcRegMap);
+ unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+ if (!FromRegB)
+ return false;
unsigned ToRegA = getMappedReg(RegA, DstRegMap);
- return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI));
+ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
}
/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
@@ -630,7 +648,8 @@
TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
- unsigned RegB, unsigned Dist) {
+ unsigned RegA, unsigned RegB,
+ unsigned Dist) {
MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
if (NewMI) {
DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
@@ -650,12 +669,64 @@
mi = NewMI;
nmi = llvm::next(mi);
}
+
+ // Update source and destination register maps.
+ SrcRegMap.erase(RegA);
+ DstRegMap.erase(RegB);
return true;
}
return false;
}
+/// ScanUses - Scan forward recursively for only uses, update maps if the use
+/// is a copy or a two-address instruction.
+void
+TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed) {
+ SmallVector<unsigned, 4> VirtRegPairs;
+ bool IsDstPhys;
+ bool IsCopy = false;
+ unsigned NewReg = 0;
+ unsigned Reg = DstReg;
+ while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
+ NewReg, IsDstPhys)) {
+ if (IsCopy && !Processed.insert(UseMI))
+ break;
+
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end())
+ // Earlier in the same MBB.Reached via a back edge.
+ break;
+
+ if (IsDstPhys) {
+ VirtRegPairs.push_back(NewReg);
+ break;
+ }
+ bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
+ if (!isNew)
+ assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+ VirtRegPairs.push_back(NewReg);
+ Reg = NewReg;
+ }
+
+ if (!VirtRegPairs.empty()) {
+ unsigned ToReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ while (!VirtRegPairs.empty()) {
+ unsigned FromReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
+ ToReg = FromReg;
+ }
+ bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!");
+ }
+}
+
/// ProcessCopy - If the specified instruction is not yet processed, process it
/// if it's a copy. For a copy instruction, we find the physical registers the
/// source and destination registers might be mapped to. These are kept in
@@ -687,49 +758,11 @@
assert(SrcRegMap[DstReg] == SrcReg &&
"Can't map to two src physical registers!");
- SmallVector<unsigned, 4> VirtRegPairs;
- bool IsCopy = false;
- unsigned NewReg = 0;
- while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII,
- IsCopy, NewReg, IsDstPhys)) {
- if (IsCopy) {
- if (!Processed.insert(UseMI))
- break;
- }
-
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
- if (DI != DistanceMap.end())
- // Earlier in the same MBB.Reached via a back edge.
- break;
-
- if (IsDstPhys) {
- VirtRegPairs.push_back(NewReg);
- break;
- }
- bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second;
- if (!isNew)
- assert(SrcRegMap[NewReg] == DstReg &&
- "Can't map to two src physical registers!");
- VirtRegPairs.push_back(NewReg);
- DstReg = NewReg;
- }
-
- if (!VirtRegPairs.empty()) {
- unsigned ToReg = VirtRegPairs.back();
- VirtRegPairs.pop_back();
- while (!VirtRegPairs.empty()) {
- unsigned FromReg = VirtRegPairs.back();
- VirtRegPairs.pop_back();
- bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
- if (!isNew)
- assert(DstRegMap[FromReg] == ToReg &&
- "Can't map to two dst physical registers!");
- ToReg = FromReg;
- }
- }
+ ScanUses(DstReg, MBB, Processed);
}
Processed.insert(MI);
+ return;
}
/// isSafeToDelete - If the specified instruction does not produce any side
@@ -737,10 +770,10 @@
static bool isSafeToDelete(MachineInstr *MI,
const TargetInstrInfo *TII,
SmallVector<unsigned, 4> &Kills) {
- const TargetInstrDesc &TID = MI->getDesc();
- if (TID.mayStore() || TID.isCall())
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.mayStore() || MCID.isCall())
return false;
- if (TID.isTerminator() || TID.hasUnmodeledSideEffects())
+ if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -828,8 +861,9 @@
TryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
- unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
- const TargetInstrDesc &TID = mi->getDesc();
+ unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
+ SmallPtrSet<MachineInstr*, 8> &Processed) {
+ const MCInstrDesc &MCID = mi->getDesc();
unsigned regA = mi->getOperand(DstIdx).getReg();
unsigned regB = mi->getOperand(SrcIdx).getReg();
@@ -851,7 +885,7 @@
unsigned regCIdx = ~0U;
bool TryCommute = false;
bool AggressiveCommute = false;
- if (TID.isCommutable() && mi->getNumOperands() >= 3 &&
+ if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
if (SrcIdx == SrcOp1)
regCIdx = SrcOp2;
@@ -879,12 +913,15 @@
return false;
}
- if (TID.isConvertibleTo3Addr()) {
+ if (TargetRegisterInfo::isVirtualRegister(regA))
+ ScanUses(regA, &*mbbi, Processed);
+
+ if (MCID.isConvertibleTo3Addr()) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
- if (!regBKilled || isProfitableToConv3Addr(regA)) {
+ if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
// Try to convert it.
- if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+ if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
++NumConvertedTo3Addr;
return true; // Done with this instruction.
}
@@ -899,7 +936,7 @@
// movq (%rax), %rcx
// addq %rdx, %rcx
// because it's preferable to schedule a load than a register copy.
- if (TID.mayLoad() && !regBKilled) {
+ if (MCID.mayLoad() && !regBKilled) {
// Determine if a load can be unfolded.
unsigned LoadRegIndex;
unsigned NewOpc =
@@ -908,14 +945,14 @@
/*UnfoldStore=*/false,
&LoadRegIndex);
if (NewOpc != 0) {
- const TargetInstrDesc &UnfoldTID = TII->get(NewOpc);
- if (UnfoldTID.getNumDefs() == 1) {
+ const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
+ if (UnfoldMCID.getNumDefs() == 1) {
MachineFunction &MF = *mbbi->getParent();
// Unfold the load.
DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi);
const TargetRegisterClass *RC =
- UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
if (!TII->unfoldMemoryOperand(MF, mi, Reg,
@@ -943,7 +980,7 @@
MachineBasicBlock::iterator NewMI = NewMIs[1];
bool TransformSuccess =
TryInstructionTransform(NewMI, mi, mbbi,
- NewSrcIdx, NewDstIdx, Dist);
+ NewSrcIdx, NewDstIdx, Dist, Processed);
if (TransformSuccess ||
NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
// Success, or at least we made an improvement. Keep the unfolded
@@ -951,7 +988,7 @@
if (LV) {
for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() != 0 &&
+ if (MO.isReg() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
if (MO.isUse()) {
if (MO.isKill()) {
@@ -1012,9 +1049,11 @@
DEBUG(dbgs() << "********** Function: "
<< MF.getFunction()->getName() << '\n');
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
// ReMatRegs - Keep track of the registers whose def's are remat'ed.
- BitVector ReMatRegs;
- ReMatRegs.resize(MRI->getLastVirtReg()+1);
+ BitVector ReMatRegs(MRI->getNumVirtRegs());
typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
TiedOperandMap;
@@ -1040,7 +1079,7 @@
if (mi->isRegSequence())
RegSequences.push_back(&*mi);
- const TargetInstrDesc &TID = mi->getDesc();
+ const MCInstrDesc &MCID = mi->getDesc();
bool FirstTied = true;
DistanceMap.insert(std::make_pair(mi, ++Dist));
@@ -1049,7 +1088,8 @@
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
- unsigned NumOps = TID.getNumOperands();
+ unsigned NumOps = mi->isInlineAsm()
+ ? mi->getNumOperands() : MCID.getNumOperands();
for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
unsigned DstIdx = 0;
if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
@@ -1067,12 +1107,7 @@
"two address instruction invalid");
unsigned regB = mi->getOperand(SrcIdx).getReg();
- TiedOperandMap::iterator OI = TiedOperands.find(regB);
- if (OI == TiedOperands.end()) {
- SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
- OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
- }
- OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
+ TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
}
// Now iterate over the information collected above.
@@ -1092,10 +1127,12 @@
mi->getOperand(DstIdx).getReg())
break; // Done with this instruction.
- if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist))
+ if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
+ Processed))
break; // The tied operands have been eliminated.
}
+ bool IsEarlyClobber = false;
bool RemovedKillFlag = false;
bool AllUsesCopied = true;
unsigned LastCopiedReg = 0;
@@ -1103,7 +1140,11 @@
for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
unsigned SrcIdx = TiedPairs[tpi].first;
unsigned DstIdx = TiedPairs[tpi].second;
- unsigned regA = mi->getOperand(DstIdx).getReg();
+
+ const MachineOperand &DstMO = mi->getOperand(DstIdx);
+ unsigned regA = DstMO.getReg();
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+
// Grab regB from the instruction because it may have changed if the
// instruction was commuted.
regB = mi->getOperand(SrcIdx).getReg();
@@ -1142,7 +1183,7 @@
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
- ReMatRegs.set(regB);
+ ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
++NumReMats;
} else {
BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
@@ -1167,15 +1208,17 @@
}
if (AllUsesCopied) {
- // Replace other (un-tied) uses of regB with LastCopiedReg.
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
+ if (!IsEarlyClobber) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = mi->getOperand(i);
+ if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
}
- MO.setReg(LastCopiedReg);
}
}
@@ -1228,13 +1271,12 @@
}
// Some remat'ed instructions are dead.
- int VReg = ReMatRegs.find_first();
- while (VReg != -1) {
+ for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
if (MRI->use_nodbg_empty(VReg)) {
MachineInstr *DefMI = MRI->getVRegDef(VReg);
DefMI->eraseFromParent();
}
- VReg = ReMatRegs.find_next(VReg);
}
// Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
@@ -1345,7 +1387,6 @@
continue;
// Insert a copy to replace the original.
- MachineBasicBlock::iterator InsertLoc = SomeMI;
MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
SomeMI->getDebugLoc(),
TII->get(TargetOpcode::COPY))
@@ -1411,6 +1452,7 @@
SmallSet<unsigned, 4> Seen;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
+ unsigned SubIdx = MI->getOperand(i+1).getImm();
if (MI->getOperand(i).getSubReg() ||
TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
@@ -1430,7 +1472,9 @@
bool isKill = MI->getOperand(i).isKill();
if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
- !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
+ !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
+ !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
+ MRI->getRegClass(SrcReg), SubIdx)) {
// REG_SEQUENCE cannot have duplicated operands, add a copy.
// Also add an copy if the source is live-in the block. We don't want
// to end up with a partial-redef of a livein, e.g.
@@ -1459,7 +1503,7 @@
MachineBasicBlock::iterator InsertLoc = MI;
MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
- .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm())
+ .addReg(DstReg, RegState::Define, SubIdx)
.addReg(SrcReg, getKillRegState(isKill));
MI->getOperand(i).setReg(0);
if (LV && isKill)
diff --git a/src/LLVM/lib/CodeGen/UnreachableBlockElim.cpp b/src/LLVM/lib/CodeGen/UnreachableBlockElim.cpp
index ade2ab4..52693f0 100644
--- a/src/LLVM/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/src/LLVM/lib/CodeGen/UnreachableBlockElim.cpp
@@ -26,6 +26,7 @@
#include "llvm/Function.h"
#include "llvm/Pass.h"
#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,16 +44,19 @@
virtual bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- UnreachableBlockElim() : FunctionPass(ID) {}
+ UnreachableBlockElim() : FunctionPass(ID) {
+ initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
}
};
}
char UnreachableBlockElim::ID = 0;
INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
- "Remove unreachable blocks from the CFG", false, false);
+ "Remove unreachable blocks from the CFG", false, false)
FunctionPass *llvm::createUnreachableBlockEliminationPass() {
return new UnreachableBlockElim();
@@ -105,9 +109,8 @@
}
char UnreachableMachineBlockElim::ID = 0;
-static RegisterPass<UnreachableMachineBlockElim>
-Y("unreachable-mbb-elimination",
- "Remove unreachable machine basic blocks");
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks", false, false)
char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
@@ -119,6 +122,7 @@
bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+ bool ModifiedPHI = false;
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
@@ -180,6 +184,7 @@
if (!preds.count(phi->getOperand(i).getMBB())) {
phi->RemoveOperand(i);
phi->RemoveOperand(i-1);
+ ModifiedPHI = true;
}
if (phi->getNumOperands() == 3) {
@@ -189,9 +194,13 @@
MachineInstr* temp = phi;
++phi;
temp->eraseFromParent();
+ ModifiedPHI = true;
- if (Input != Output)
- F.getRegInfo().replaceRegWith(Output, Input);
+ if (Input != Output) {
+ MachineRegisterInfo &MRI = F.getRegInfo();
+ MRI.constrainRegClass(Input, MRI.getRegClass(Output));
+ MRI.replaceRegWith(Output, Input);
+ }
continue;
}
@@ -202,5 +211,5 @@
F.RenumberBlocks();
- return DeadBlocks.size();
+ return (DeadBlocks.size() || ModifiedPHI);
}
diff --git a/src/LLVM/lib/CodeGen/VirtRegMap.cpp b/src/LLVM/lib/CodeGen/VirtRegMap.cpp
index 20ffcff..8a1cdc0 100644
--- a/src/LLVM/lib/CodeGen/VirtRegMap.cpp
+++ b/src/LLVM/lib/CodeGen/VirtRegMap.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -40,7 +41,8 @@
#include <algorithm>
using namespace llvm;
-STATISTIC(NumSpills , "Number of register spills");
+STATISTIC(NumSpillSlots, "Number of spill slots allocated");
+STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
//===----------------------------------------------------------------------===//
// VirtRegMap implementation
@@ -48,7 +50,7 @@
char VirtRegMap::ID = 0;
-INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false);
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
@@ -74,8 +76,7 @@
EmergencySpillSlots.clear();
SpillSlotToUsesMap.resize(8);
- ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1-
- TargetRegisterInfo::FirstVirtualRegister);
+ ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
allocatableRCRegs.clear();
for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
@@ -89,24 +90,38 @@
}
void VirtRegMap::grow() {
- unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
- Virt2PhysMap.grow(LastVirtReg);
- Virt2StackSlotMap.grow(LastVirtReg);
- Virt2ReMatIdMap.grow(LastVirtReg);
- Virt2SplitMap.grow(LastVirtReg);
- Virt2SplitKillMap.grow(LastVirtReg);
- ReMatMap.grow(LastVirtReg);
- ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
+ unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+ Virt2PhysMap.resize(NumRegs);
+ Virt2StackSlotMap.resize(NumRegs);
+ Virt2ReMatIdMap.resize(NumRegs);
+ Virt2SplitMap.resize(NumRegs);
+ Virt2SplitKillMap.resize(NumRegs);
+ ReMatMap.resize(NumRegs);
+ ImplicitDefed.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ assert(SS >= LowSpillSlot && "Unexpected low spill slot");
+ unsigned Idx = SS-LowSpillSlot;
+ while (Idx >= SpillSlotToUsesMap.size())
+ SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+ ++NumSpillSlots;
+ return SS;
}
unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
unsigned physReg = Hint.second;
- if (physReg &&
- TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+ if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
physReg = getPhys(physReg);
if (Hint.first == 0)
- return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg))
+ return (TargetRegisterInfo::isPhysicalRegister(physReg))
? physReg : 0;
return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
}
@@ -116,18 +131,7 @@
assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
- int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
- if (LowSpillSlot == NO_STACK_SLOT)
- LowSpillSlot = SS;
- if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
- HighSpillSlot = SS;
- unsigned Idx = SS-LowSpillSlot;
- while (Idx >= SpillSlotToUsesMap.size())
- SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
- Virt2StackSlotMap[virtReg] = SS;
- ++NumSpills;
- return SS;
+ return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
}
void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
@@ -160,14 +164,7 @@
EmergencySpillSlots.find(RC);
if (I != EmergencySpillSlots.end())
return I->second;
- int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
- if (LowSpillSlot == NO_STACK_SLOT)
- LowSpillSlot = SS;
- if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
- HighSpillSlot = SS;
- EmergencySpillSlots[RC] = SS;
- return SS;
+ return EmergencySpillSlots[RC] = createSpillSlot(RC);
}
void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
@@ -232,10 +229,11 @@
UnusedRegs.resize(NumRegs);
BitVector Used(NumRegs);
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
- if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
- Used.set(Virt2PhysMap[i]);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ Used.set(Virt2PhysMap[Reg]);
+ }
BitVector Allocatable = TRI->getAllocatableSet(*MF);
bool AnyUnused = false;
@@ -258,23 +256,123 @@
return AnyUnused;
}
+void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+ DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+ << "********** Function: "
+ << MF->getFunction()->getName() << '\n');
+ DEBUG(dump());
+ SmallVector<unsigned, 8> SuperDeads;
+ SmallVector<unsigned, 8> SuperDefs;
+ SmallVector<unsigned, 8> SuperKills;
+
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ DEBUG(MBBI->print(dbgs(), Indexes));
+ for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
+ MII != MIE;) {
+ MachineInstr *MI = MII;
+ ++MII;
+
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ unsigned VirtReg = MO.getReg();
+ unsigned PhysReg = getPhys(VirtReg);
+ assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+
+ // Preserve semantics of sub-register operands.
+ if (MO.getSubReg()) {
+ // A virtual register kill refers to the whole register, so we may
+ // have to add <imp-use,kill> operands for the super-register. A
+ // partial redef always kills and redefines the super-register.
+ if (MO.readsReg() && (MO.isDef() || MO.isKill()))
+ SuperKills.push_back(PhysReg);
+
+ if (MO.isDef()) {
+ // The <def,undef> flag only makes sense for sub-register defs, and
+ // we are substituting a full physreg. An <imp-use,kill> operand
+ // from the SuperKills list will represent the partial read of the
+ // super-register.
+ MO.setIsUndef(false);
+
+ // Also add implicit defs for the super-register.
+ if (MO.isDead())
+ SuperDeads.push_back(PhysReg);
+ else
+ SuperDefs.push_back(PhysReg);
+ }
+
+ // PhysReg operands cannot have subregister indexes.
+ PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+ assert(PhysReg && "Invalid SubReg for physical register");
+ MO.setSubReg(0);
+ }
+ // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+ // we need the inlining here.
+ MO.setReg(PhysReg);
+ }
+
+ // Add any missing super-register kills after rewriting the whole
+ // instruction.
+ while (!SuperKills.empty())
+ MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+ while (!SuperDeads.empty())
+ MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+
+ while (!SuperDefs.empty())
+ MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+
+ DEBUG(dbgs() << "> " << *MI);
+
+ // Finally, remove any identity copies.
+ if (MI->isIdentityCopy()) {
+ ++NumIdCopies;
+ if (MI->getNumOperands() == 2) {
+ DEBUG(dbgs() << "Deleting identity copy.\n");
+ RemoveMachineInstrFromMaps(MI);
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ // It's safe to erase MI because MII has already been incremented.
+ MI->eraseFromParent();
+ } else {
+ // Transform identity copy to a KILL to deal with subregisters.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "Identity copy: " << *MI);
+ }
+ }
+ }
+ }
+
+ // Tell MRI about physical registers in use.
+ for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+ if (!MRI->reg_nodbg_empty(Reg))
+ MRI->setPhysRegUsed(Reg);
+}
+
void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF->getRegInfo();
OS << "********** REGISTER MAP **********\n";
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
- if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
- OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
- << "] " << MRI.getRegClass(i)->getName() << "\n";
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> "
+ << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ << MRI.getRegClass(Reg)->getName() << "\n";
+ }
}
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
- if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
- OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i]
- << "] " << MRI.getRegClass(i)->getName() << "\n";
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << MRI.getRegClass(Reg)->getName() << "\n";
+ }
+ }
OS << '\n';
}
diff --git a/src/LLVM/lib/CodeGen/VirtRegMap.h b/src/LLVM/lib/CodeGen/VirtRegMap.h
index 8b6082d..03abff3 100644
--- a/src/LLVM/lib/CodeGen/VirtRegMap.h
+++ b/src/LLVM/lib/CodeGen/VirtRegMap.h
@@ -35,6 +35,7 @@
class TargetInstrInfo;
class TargetRegisterInfo;
class raw_ostream;
+ class SlotIndexes;
class VirtRegMap : public MachineFunctionPass {
public:
@@ -80,7 +81,7 @@
/// Virt2SplitKillMap - This is splitted virtual register to its last use
/// (kill) index mapping.
- IndexedMap<SlotIndex> Virt2SplitKillMap;
+ IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
/// ReMatMap - This is virtual register to re-materialized instruction
/// mapping. Each virtual register whose definition is going to be
@@ -134,6 +135,9 @@
/// UnusedRegs - A list of physical registers that have not been used.
BitVector UnusedRegs;
+ /// createSpillSlot - Allocate a spill slot for RC from MFI.
+ unsigned createSpillSlot(const TargetRegisterClass *RC);
+
VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT
void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
@@ -153,10 +157,13 @@
}
MachineFunction &getMachineFunction() const {
- assert(MF && "getMachineFunction called before runOnMAchineFunction");
+ assert(MF && "getMachineFunction called before runOnMachineFunction");
return *MF;
}
+ MachineRegisterInfo &getRegInfo() const { return *MRI; }
+ const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
+
void grow();
/// @brief returns true if the specified virtual register is
@@ -201,16 +208,30 @@
/// @brief returns the register allocation preference.
unsigned getRegAllocPref(unsigned virtReg);
+ /// @brief returns true if VirtReg is assigned to its preferred physreg.
+ bool hasPreferredPhys(unsigned VirtReg) {
+ return getPhys(VirtReg) == getRegAllocPref(VirtReg);
+ }
+
/// @brief records virtReg is a split live interval from SReg.
void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
Virt2SplitMap[virtReg] = SReg;
}
/// @brief returns the live interval virtReg is split from.
- unsigned getPreSplitReg(unsigned virtReg) {
+ unsigned getPreSplitReg(unsigned virtReg) const {
return Virt2SplitMap[virtReg];
}
+ /// getOriginal - Return the original virtual register that VirtReg descends
+ /// from through splitting.
+ /// A register that was not created by splitting is its own original.
+ /// This operation is idempotent.
+ unsigned getOriginal(unsigned VirtReg) const {
+ unsigned Orig = getPreSplitReg(VirtReg);
+ return Orig ? Orig : VirtReg;
+ }
+
/// @brief returns true if the specified virtual register is not
/// mapped to a stack slot or rematerialized.
bool isAssignedReg(unsigned virtReg) const {
@@ -426,12 +447,12 @@
/// @brief Mark the specified register as being implicitly defined.
void setIsImplicitlyDefined(unsigned VirtReg) {
- ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister);
+ ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
}
/// @brief Returns true if the virtual register is implicitly defined.
bool isImplicitlyDefined(unsigned VirtReg) const {
- return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister];
+ return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
}
/// @brief Updates information about the specified virtual register's value
@@ -487,6 +508,13 @@
return 0;
}
+ /// rewrite - Rewrite all instructions in MF to use only physical registers
+ /// by mapping all virtual register operands to their assigned physical
+ /// registers.
+ ///
+ /// @param Indexes Optionally remove deleted instructions from indexes.
+ void rewrite(SlotIndexes *Indexes);
+
void print(raw_ostream &OS, const Module* M = 0) const;
void dump() const;
};
diff --git a/src/LLVM/lib/CodeGen/VirtRegRewriter.cpp b/src/LLVM/lib/CodeGen/VirtRegRewriter.cpp
index 0a9b58d..a5ec797 100644
--- a/src/LLVM/lib/CodeGen/VirtRegRewriter.cpp
+++ b/src/LLVM/lib/CodeGen/VirtRegRewriter.cpp
@@ -22,8 +22,8 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include <algorithm>
using namespace llvm;
STATISTIC(NumDSE , "Number of dead stores elided");
@@ -32,7 +32,7 @@
STATISTIC(NumDRM , "Number of re-materializable defs elided");
STATISTIC(NumStores , "Number of stores added");
STATISTIC(NumPSpills , "Number of physical register spills");
-STATISTIC(NumOmitted , "Number of reloads omited");
+STATISTIC(NumOmitted , "Number of reloads omitted");
STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
STATISTIC(NumCopified, "Number of available reloads turned into copies");
STATISTIC(NumReMats , "Number of re-materialization");
@@ -67,23 +67,16 @@
/// Note that operands may be added, so the MO reference is no longer valid.
static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
const TargetRegisterInfo &TRI) {
- if (unsigned SubIdx = MO.getSubReg()) {
- // Insert the physical subreg and reset the subreg field.
- MO.setReg(TRI.getSubReg(Reg, SubIdx));
- MO.setSubReg(0);
+ if (MO.getSubReg()) {
+ MO.substPhysReg(Reg, TRI);
- // Any def, dead, and kill flags apply to the full virtual register, so they
- // also apply to the full physical register. Add imp-def/dead and imp-kill
- // as needed.
+ // Any kill flags apply to the full virtual register, so they also apply to
+ // the full physical register.
+ // We assume that partial defs have already been decorated with a super-reg
+ // <imp-def> operand by LiveIntervals.
MachineInstr &MI = *MO.getParent();
- if (MO.isDef())
- if (MO.isDead())
- MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true);
- else
- MI.addRegisterDefined(Reg, &TRI);
- else if (!MO.isUndef() &&
- (MO.isKill() ||
- MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
+ if (MO.isUse() && !MO.isUndef() &&
+ (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
} else {
MO.setReg(Reg);
@@ -223,7 +216,8 @@
<< SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
else
DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
- DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n");
+ DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
+ << (CanClobber ? " canclobber" : "") << "\n");
}
/// canClobberPhysRegForSS - Return true if the spiller is allowed to change
@@ -267,6 +261,10 @@
/// now).
void ModifyStackSlotOrReMat(int SlotOrReMat);
+ /// ClobberSharingStackSlots - When a register mapped to a stack slot changes,
+ /// other stack slots sharing the same register are no longer valid.
+ void ClobberSharingStackSlots(int StackSlot);
+
/// AddAvailableRegsToLiveIn - Availability information is being kept coming
/// into the specified MBB. Add available physical registers as potential
/// live-in's. If they are reused in the MBB, they will be added to the
@@ -304,7 +302,7 @@
const TargetLowering *TL = MF.getTarget().getTargetLowering();
if (!TL->isTypeLegal(TL->getPointerTy()))
- // Believe it or not, this is true on PIC16.
+ // Believe it or not, this is true on 16-bit targets like PIC16.
return InsertLoc;
const TargetRegisterClass *ptrRegClass =
@@ -469,25 +467,72 @@
}
}
-/// InvalidateKill - Invalidate register kill information for a specific
-/// register. This also unsets the kills marker on the last kill operand.
-static void InvalidateKill(unsigned Reg,
- const TargetRegisterInfo* TRI,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- if (RegKills[Reg]) {
- KillOps[Reg]->setIsKill(false);
- // KillOps[Reg] might be a def of a super-register.
- unsigned KReg = KillOps[Reg]->getReg();
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
- for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
- if (RegKills[*SR]) {
- KillOps[*SR]->setIsKill(false);
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
- }
+/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
+/// but not re-defined and it's being reused. Remove the kill flag for the
+/// register and unset the kill's marker and last kill operand.
+static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
+
+ MachineOperand *KillOp = KillOps[Reg];
+ KillOp->setIsKill(false);
+ // KillOps[Reg] might be a def of a super-register.
+ unsigned KReg = KillOp->getReg();
+ if (!RegKills[KReg])
+ return;
+
+ assert(KillOps[KReg]->getParent() == KillOp->getParent() &&
+ "invalid superreg kill flags");
+ KillOps[KReg] = NULL;
+ RegKills.reset(KReg);
+
+ // If it's a def of a super-register. Its other sub-regsters are no
+ // longer killed as well.
+ for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+ DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n");
+
+ assert(KillOps[*SR]->getParent() == KillOp->getParent() &&
+ "invalid subreg kill flags");
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+}
+
+/// ResurrectKill - Invalidate kill info associated with a previous MI. An
+/// optimization may have decided that it's safe to reuse a previously killed
+/// register. If we fail to erase the invalid kill flags, then the register
+/// scavenger may later clobber the register used by this MI. Note that this
+/// must be done even if this MI is being deleted! Consider:
+///
+/// USE $r1 (vreg1) <kill>
+/// ...
+/// $r1(vreg3) = COPY $r1 (vreg2)
+///
+/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
+/// vreg1's only use is a kill. The rewriter doesn't know it should be live
+/// until it rewrites vreg2. At that points it sees that the copy is dead and
+/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
+/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
+/// vreg1 before deleting the copy.
+static void ResurrectKill(MachineInstr &MI, unsigned Reg,
+ const TargetRegisterInfo* TRI, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+ ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+ return;
+ }
+ // No previous kill for this reg. Check for subreg kills as well.
+ // d4 =
+ // store d4, fi#0
+ // ...
+ // = s8<kill>
+ // ...
+ // = d4 <avoiding reload>
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ unsigned SReg = *SR;
+ if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
+ ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
}
}
@@ -509,15 +554,22 @@
KillRegs->push_back(Reg);
assert(Reg < KillOps.size());
if (KillOps[Reg] == &MO) {
+ // This operand was the kill, now no longer.
KillOps[Reg] = NULL;
RegKills.reset(Reg);
for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
if (RegKills[*SR]) {
+ assert(KillOps[*SR] == &MO && "bad subreg kill flags");
KillOps[*SR] = NULL;
RegKills.reset(*SR);
}
}
}
+ else {
+ // This operand may have reused a previously killed reg. Keep it live in
+ // case it continues to be used after erasing this instruction.
+ ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+ }
}
}
@@ -585,44 +637,8 @@
if (Reg == 0)
continue;
- if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
- // That can't be right. Register is killed but not re-defined and it's
- // being reused. Let's fix that.
- KillOps[Reg]->setIsKill(false);
- // KillOps[Reg] might be a def of a super-register.
- unsigned KReg = KillOps[Reg]->getReg();
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
-
- // Must be a def of a super-register. Its other sub-regsters are no
- // longer killed as well.
- for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
- } else {
- // Check for subreg kills as well.
- // d4 =
- // store d4, fi#0
- // ...
- // = s8<kill>
- // ...
- // = d4 <avoiding reload>
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- unsigned SReg = *SR;
- if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
- KillOps[SReg]->setIsKill(false);
- unsigned KReg = KillOps[SReg]->getReg();
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
-
- for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
- KillOps[*SSR] = NULL;
- RegKills.reset(*SSR);
- }
- }
- }
- }
+ // This operand may have reused a previously killed reg. Keep it live.
+ ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
if (MO.isKill()) {
RegKills.set(Reg);
@@ -653,7 +669,7 @@
}
}
-/// ReMaterialize - Re-materialize definition for Reg targetting DestReg.
+/// ReMaterialize - Re-materialize definition for Reg targeting DestReg.
///
static void ReMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MII,
@@ -663,8 +679,8 @@
VirtRegMap &VRM) {
MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
#ifndef NDEBUG
- const TargetInstrDesc &TID = ReMatDefMI->getDesc();
- assert(TID.getNumDefs() == 1 &&
+ const MCInstrDesc &MCID = ReMatDefMI->getDesc();
+ assert(MCID.getNumDefs() == 1 &&
"Don't know how to remat instructions that define > 1 values!");
#endif
TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
@@ -777,7 +793,8 @@
NotAvailable.insert(Reg);
else {
MBB.addLiveIn(Reg);
- InvalidateKill(Reg, TRI, RegKills, KillOps);
+ if (RegKills[Reg])
+ ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
}
// Skip over the same register.
@@ -818,6 +835,26 @@
PhysRegsAvailable.erase(I);
}
+void AvailableSpills::ClobberSharingStackSlots(int StackSlot) {
+ std::map<int, unsigned>::iterator It =
+ SpillSlotsOrReMatsAvailable.find(StackSlot);
+ if (It == SpillSlotsOrReMatsAvailable.end()) return;
+ unsigned Reg = It->second >> 1;
+
+ // Erase entries in PhysRegsAvailable for other stack slots.
+ std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+ while (I != PhysRegsAvailable.end() && I->first == Reg) {
+ std::multimap<unsigned, int>::iterator NextI = llvm::next(I);
+ if (I->second != StackSlot) {
+ DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in "
+ << PrintReg(Reg, TRI) << '\n');
+ SpillSlotsOrReMatsAvailable.erase(I->second);
+ PhysRegsAvailable.erase(I);
+ }
+ I = NextI;
+ }
+}
+
// ************************** //
// Reuse Info Implementation //
// ************************** //
@@ -1063,6 +1100,7 @@
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
VirtRegMap *VRM;
+ LiveIntervals *LIs;
BitVector AllocatableRegs;
DenseMap<MachineInstr*, unsigned> DistanceMap;
DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
@@ -1075,6 +1113,11 @@
LiveIntervals* LIs);
private:
+ void EraseInstr(MachineInstr *MI) {
+ VRM->RemoveMachineInstrFromMaps(MI);
+ LIs->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
bool OptimizeByUnfold2(unsigned VirtReg, int SS,
MachineBasicBlock::iterator &MII,
@@ -1117,6 +1160,12 @@
bool InsertSpills(MachineInstr *MI);
+ void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ ReuseInfo &ReusedOperands,
+ std::vector<MachineOperand*> &KillOps);
+
void RewriteMBB(LiveIntervals *LIs,
AvailableSpills &Spills, BitVector &RegKills,
std::vector<MachineOperand*> &KillOps);
@@ -1124,17 +1173,18 @@
}
bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
- LiveIntervals* LIs) {
+ LiveIntervals* lis) {
MRI = &MF.getRegInfo();
TRI = MF.getTarget().getRegisterInfo();
TII = MF.getTarget().getInstrInfo();
VRM = &vrm;
+ LIs = lis;
AllocatableRegs = TRI->getAllocatableSet(MF);
DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
<< MF.getFunction()->getName() << "':\n");
DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
" reloads!) ****\n");
- DEBUG(MF.dump());
+ DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
// Spills - Keep track of which spilled values are available in physregs
// so that we can choose to reuse the physregs instead of emitting
@@ -1185,7 +1235,7 @@
}
DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
- DEBUG(MF.dump());
+ DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
// Mark unused spill slots.
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1197,10 +1247,8 @@
MFI->RemoveStackObject(SS);
for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
MachineInstr *DVMI = DbgValues[j];
- MachineBasicBlock *DVMBB = DVMI->getParent();
DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
- VRM->RemoveMachineInstrFromMaps(DVMI);
- DVMBB->erase(DVMI);
+ EraseInstr(DVMI);
}
++NumDSS;
}
@@ -1280,8 +1328,7 @@
VRM->transferRestorePts(&MI, NewMIs[0]);
MII = MBB->insert(MII, NewMIs[0]);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
++NumModRefUnfold;
// Unfold next instructions that fold the same SS.
@@ -1296,8 +1343,7 @@
VRM->transferRestorePts(&NextMI, NewMIs[0]);
MBB->insert(NextMII, NewMIs[0]);
InvalidateKills(NextMI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&NextMI);
- MBB->erase(&NextMI);
+ EraseInstr(&NextMI);
++NumModRefUnfold;
// Skip over dbg_value instructions.
while (NextMII != MBB->end() && NextMII->isDebugValue())
@@ -1424,8 +1470,7 @@
VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
MII = FoldedMI;
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
return true;
}
}
@@ -1438,11 +1483,11 @@
/// where SrcReg is r1 and it is tied to r0. Return true if after
/// commuting this instruction it will be r0 = op r2, r1.
static bool CommuteChangesDestination(MachineInstr *DefMI,
- const TargetInstrDesc &TID,
+ const MCInstrDesc &MCID,
unsigned SrcReg,
const TargetInstrInfo *TII,
unsigned &DstIdx) {
- if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+ if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3)
return false;
if (!DefMI->getOperand(1).isReg() ||
DefMI->getOperand(1).getReg() != SrcReg)
@@ -1482,11 +1527,11 @@
MachineInstr &MI = *MII;
MachineBasicBlock::iterator DefMII = prior(MII);
MachineInstr *DefMI = DefMII;
- const TargetInstrDesc &TID = DefMI->getDesc();
+ const MCInstrDesc &MCID = DefMI->getDesc();
unsigned NewDstIdx;
if (DefMII != MBB->begin() &&
- TID.isCommutable() &&
- CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
+ MCID.isCommutable() &&
+ CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) {
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
unsigned NewReg = NewDstMO.getReg();
if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
@@ -1531,14 +1576,11 @@
// Delete all 3 old instructions.
InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(ReloadMI);
- MBB->erase(ReloadMI);
+ EraseInstr(ReloadMI);
InvalidateKills(*DefMI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(DefMI);
- MBB->erase(DefMI);
+ EraseInstr(DefMI);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
// If NewReg was previously holding value of some SS, it's now clobbered.
// This has to be done now because it's a physical register. When this
@@ -1581,8 +1623,7 @@
bool CheckDef = PrevMII != MBB->begin();
if (CheckDef)
--PrevMII;
- VRM->RemoveMachineInstrFromMaps(LastStore);
- MBB->erase(LastStore);
+ EraseInstr(LastStore);
if (CheckDef) {
// Look at defs of killed registers on the store. Mark the defs
// as dead since the store has been deleted and they aren't
@@ -1593,8 +1634,7 @@
MachineInstr *DeadDef = PrevMII;
if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
// FIXME: This assumes a remat def does not have side effects.
- VRM->RemoveMachineInstrFromMaps(DeadDef);
- MBB->erase(DeadDef);
+ EraseInstr(DeadDef);
++NumDRM;
}
}
@@ -1618,11 +1658,19 @@
/// isSafeToDelete - Return true if this instruction doesn't produce any side
/// effect and all of its defs are dead.
static bool isSafeToDelete(MachineInstr &MI) {
- const TargetInstrDesc &TID = MI.getDesc();
- if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
- TID.isCall() || TID.isBarrier() || TID.isReturn() ||
- TID.hasUnmodeledSideEffects())
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() ||
+ MCID.isCall() || MCID.isBarrier() || MCID.isReturn() ||
+ MI.isLabel() || MI.isDebugValue() ||
+ MI.hasUnmodeledSideEffects())
return false;
+
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI.isInlineAsm())
+ return false;
+
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.getReg())
@@ -1682,8 +1730,7 @@
LastUD->setIsDead();
break;
}
- VRM->RemoveMachineInstrFromMaps(LastUDMI);
- MBB->erase(LastUDMI);
+ EraseInstr(LastUDMI);
} else {
LastUD->setIsKill();
RegKills.set(Reg);
@@ -1768,9 +1815,13 @@
else
DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
DEBUG(dbgs() << " from physreg "
- << TRI->getName(InReg) << " for vreg"
- << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
+ <<" instead of reloading into physreg "
<< TRI->getName(Phys) << '\n');
+
+ // Reusing a physreg may resurrect it. But we expect ProcessUses to update
+ // the kill flags for the current instruction after processing it.
+
++NumOmitted;
continue;
} else if (InReg && InReg != Phys) {
@@ -1780,8 +1831,8 @@
else
DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
DEBUG(dbgs() << " from physreg "
- << TRI->getName(InReg) << " for vreg"
- << VirtReg <<" by copying it into physreg "
+ << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
+ <<" by copying it into physreg "
<< TRI->getName(Phys) << '\n');
// If the reloaded / remat value is available in another register,
@@ -1835,7 +1886,7 @@
return true;
}
-/// InsertEmergencySpills - Insert spills after MI if requested by VRM. Return
+/// InsertSpills - Insert spills after MI if requested by VRM. Return
/// true if spills were inserted.
bool LocalRewriter::InsertSpills(MachineInstr *MI) {
if (!VRM->isSpillPt(MI))
@@ -1863,6 +1914,350 @@
}
+/// ProcessUses - Process all of MI's spilled operands and all available
+/// operands.
+void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ ReuseInfo &ReusedOperands,
+ std::vector<MachineOperand*> &KillOps) {
+ // Clear kill info.
+ SmallSet<unsigned, 2> KilledMIRegs;
+ SmallVector<unsigned, 4> VirtUseOps;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue; // Ignore non-register operands.
+
+ unsigned VirtReg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+ // Ignore physregs for spilling, but remember that it is used by this
+ // function.
+ MRI->setPhysRegUsed(VirtReg);
+ continue;
+ }
+
+ // We want to process implicit virtual register uses first.
+ if (MO.isImplicit())
+ // If the virtual register is implicitly defined, emit a implicit_def
+ // before so scavenger knows it's "defined".
+ // FIXME: This is a horrible hack done the by register allocator to
+ // remat a definition with virtual register operand.
+ VirtUseOps.insert(VirtUseOps.begin(), i);
+ else
+ VirtUseOps.push_back(i);
+
+ // A partial def causes problems because the same operand both reads and
+ // writes the register. This rewriter is designed to rewrite uses and defs
+ // separately, so a partial def would already have been rewritten to a
+ // physreg by the time we get to processing defs.
+ // Add an implicit use operand to model the partial def.
+ if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
+ MI.findRegisterUseOperandIdx(VirtReg) == -1) {
+ VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
+ MI.addOperand(MachineOperand::CreateReg(VirtReg,
+ false, // isDef
+ true)); // isImplicit
+ DEBUG(dbgs() << "Partial redef: " << MI);
+ }
+ }
+
+ // Process all of the spilled uses and all non spilled reg references.
+ SmallVector<int, 2> PotentialDeadStoreSlots;
+ KilledMIRegs.clear();
+ for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+ unsigned i = VirtUseOps[j];
+ unsigned VirtReg = MI.getOperand(i).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register?");
+
+ unsigned SubIdx = MI.getOperand(i).getSubReg();
+ if (VRM->isAssignedReg(VirtReg)) {
+ // This virtual register was assigned a physreg!
+ unsigned Phys = VRM->getPhys(VirtReg);
+ MRI->setPhysRegUsed(Phys);
+ if (MI.getOperand(i).isDef())
+ ReusedOperands.markClobbered(Phys);
+ substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+ if (VRM->isImplicitlyDefined(VirtReg))
+ // FIXME: Is this needed?
+ BuildMI(*MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+ continue;
+ }
+
+ // This virtual register is now known to be a spilled value.
+ if (!MI.getOperand(i).isUse())
+ continue; // Handle defs in the loop below (handle use&def here though)
+
+ bool AvoidReload = MI.getOperand(i).isUndef();
+ // Check if it is defined by an implicit def. It should not be spilled.
+ // Note, this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ bool DoReMat = VRM->isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+ int ReuseSlot = SSorRMId;
+
+ // Check to see if this stack slot is available.
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+ // If this is a sub-register use, make sure the reuse register is in the
+ // right register class. For example, for x86 not all of the 32-bit
+ // registers have accessible sub-registers.
+ // Similarly so for EXTRACT_SUBREG. Consider this:
+ // EDI = op
+ // MOV32_mr fi#1, EDI
+ // ...
+ // = EXTRACT_SUBREG fi#1
+ // fi#1 is available in EDI, but it cannot be reused because it's not in
+ // the right register file.
+ if (PhysReg && !AvoidReload && SubIdx) {
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ if (!RC->contains(PhysReg))
+ PhysReg = 0;
+ }
+
+ if (PhysReg && !AvoidReload) {
+ // This spilled operand might be part of a two-address operand. If this
+ // is the case, then changing it will necessarily require changing the
+ // def part of the instruction as well. However, in some cases, we
+ // aren't allowed to modify the reused register. If none of these cases
+ // apply, reuse it.
+ bool CanReuse = true;
+ bool isTied = MI.isRegTiedToDefOperand(i);
+ if (isTied) {
+ // Okay, we have a two address operand. We can reuse this physreg as
+ // long as we are allowed to clobber the value and there isn't an
+ // earlier def that has already clobbered the physreg.
+ CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg);
+ }
+ // If this is an asm, and a PhysReg alias is used elsewhere as an
+ // earlyclobber operand, we can't also use it as an input.
+ if (MI.isInlineAsm()) {
+ for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
+ MachineOperand &MOk = MI.getOperand(k);
+ if (MOk.isReg() && MOk.isEarlyClobber() &&
+ TRI->regsOverlap(MOk.getReg(), PhysReg)) {
+ CanReuse = false;
+ DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+ << " for " << PrintReg(VirtReg) << ": " << MOk
+ << '\n');
+ break;
+ }
+ }
+ }
+
+ if (CanReuse) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg "
+ << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg)
+ << " instead of reloading into "
+ << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n');
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ // Reusing a physreg may resurrect it. But we expect ProcessUses to
+ // update the kill flags for the current instr after processing it.
+
+ // The only technical detail we have is that we don't know that
+ // PhysReg won't be clobbered by a reloaded stack slot that occurs
+ // later in the instruction. In particular, consider 'op V1, V2'.
+ // If V1 is available in physreg R0, we would choose to reuse it
+ // here, instead of reloading it into the register the allocator
+ // indicated (say R1). However, V2 might have to be reloaded
+ // later, and it might indicate that it needs to live in R0. When
+ // this occurs, we need to have information available that
+ // indicates it is safe to use R1 for the reload instead of R0.
+ //
+ // To further complicate matters, we might conflict with an alias,
+ // or R0 and R1 might not be compatible with each other. In this
+ // case, we actually insert a reload for V1 in R1, ensuring that
+ // we can get at R0 or its alias.
+ ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+ VRM->getPhys(VirtReg), VirtReg);
+ if (isTied)
+ // Only mark it clobbered if this is a use&def operand.
+ ReusedOperands.markClobbered(PhysReg);
+ ++NumReused;
+
+ if (MI.getOperand(i).isKill() &&
+ ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+ // The store of this spilled value is potentially dead, but we
+ // won't know for certain until we've confirmed that the re-use
+ // above is valid, which means waiting until the other operands
+ // are processed. For now we just track the spill slot, we'll
+ // remove it after the other operands are processed if valid.
+
+ PotentialDeadStoreSlots.push_back(ReuseSlot);
+ }
+
+ // Mark is isKill if it's there no other uses of the same virtual
+ // register and it's not a two-address operand. IsKill will be
+ // unset if reg is reused.
+ if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+ continue;
+ } // CanReuse
+
+ // Otherwise we have a situation where we have a two-address instruction
+ // whose mod/ref operand needs to be reloaded. This reload is already
+ // available in some register "PhysReg", but if we used PhysReg as the
+ // operand to our 2-addr instruction, the instruction would modify
+ // PhysReg. This isn't cool if something later uses PhysReg and expects
+ // to get its initial value.
+ //
+ // To avoid this problem, and to avoid doing a load right after a store,
+ // we emit a copy from PhysReg into the designated register for this
+ // operand.
+ //
+ // This case also applies to an earlyclobber'd PhysReg.
+ unsigned DesignatedReg = VRM->getPhys(VirtReg);
+ assert(DesignatedReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ DesignatedReg = ReusedOperands.
+ GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+ MaybeDeadStores, RegKills, KillOps, *VRM);
+
+ // If the mapped designated register is actually the physreg we have
+ // incoming, we don't need to inserted a dead copy.
+ if (DesignatedReg == PhysReg) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+ << " for " << PrintReg(VirtReg)
+ << " instead of reloading into same physreg.\n");
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ ReusedOperands.markClobbered(RReg);
+ ++NumReused;
+ continue;
+ }
+
+ MRI->setPhysRegUsed(DesignatedReg);
+ ReusedOperands.markClobbered(DesignatedReg);
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, *MBB->getParent());
+ MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DesignatedReg).addReg(PhysReg);
+ CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ // This invalidates DesignatedReg.
+ Spills.ClobberPhysReg(DesignatedReg);
+
+ Spills.addAvailable(ReuseSlot, DesignatedReg);
+ unsigned RReg =
+ SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+ ++NumReused;
+ continue;
+ } // if (PhysReg)
+
+ // Otherwise, reload it and remember that we have it.
+ PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+ MRI->setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ if (AvoidReload)
+ ++NumAvoided;
+ else {
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, *MBB->getParent());
+
+ if (DoReMat) {
+ ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+ } else {
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM->addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+ }
+ // This invalidates PhysReg.
+ Spills.ClobberPhysReg(PhysReg);
+
+ // Any stores to this stack slot are not dead anymore.
+ if (!DoReMat)
+ MaybeDeadStores[SSorRMId] = NULL;
+ Spills.addAvailable(SSorRMId, PhysReg);
+ // Assumes this is the last use. IsKill will be unset if reg is reused
+ // unless it's a two-address operand.
+ if (!MI.isRegTiedToDefOperand(i) &&
+ KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+ }
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ }
+
+ // Ok - now we can remove stores that have been confirmed dead.
+ for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+ // This was the last use and the spilled value is still available
+ // for reuse. That means the spill was unnecessary!
+ int PDSSlot = PotentialDeadStoreSlots[j];
+ MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+ if (DeadStore) {
+ DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ EraseInstr(DeadStore);
+ MaybeDeadStores[PDSSlot] = NULL;
+ ++NumDSE;
+ }
+ }
+}
+
/// rewriteMBB - Keep track of which spills are available even after the
/// register allocator is done with them. If possible, avoid reloading vregs.
void
@@ -1887,9 +2282,6 @@
// ReMatDefs - These are rematerializable def MIs which are not deleted.
SmallSet<MachineInstr*, 4> ReMatDefs;
- // Clear kill info.
- SmallSet<unsigned, 2> KilledMIRegs;
-
// Keep track of the registers we have already spilled in case there are
// multiple defs of the same register in MI.
SmallSet<unsigned, 8> SpilledMIRegs;
@@ -1925,309 +2317,8 @@
/// ReusedOperands - Keep track of operand reuse in case we need to undo
/// reuse.
ReuseInfo ReusedOperands(MI, TRI);
- SmallVector<unsigned, 4> VirtUseOps;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue; // Ignore non-register operands.
- unsigned VirtReg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
- // Ignore physregs for spilling, but remember that it is used by this
- // function.
- MRI->setPhysRegUsed(VirtReg);
- continue;
- }
-
- // We want to process implicit virtual register uses first.
- if (MO.isImplicit())
- // If the virtual register is implicitly defined, emit a implicit_def
- // before so scavenger knows it's "defined".
- // FIXME: This is a horrible hack done the by register allocator to
- // remat a definition with virtual register operand.
- VirtUseOps.insert(VirtUseOps.begin(), i);
- else
- VirtUseOps.push_back(i);
- }
-
- // Process all of the spilled uses and all non spilled reg references.
- SmallVector<int, 2> PotentialDeadStoreSlots;
- KilledMIRegs.clear();
- for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
- unsigned i = VirtUseOps[j];
- unsigned VirtReg = MI.getOperand(i).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "Not a virtual register?");
-
- unsigned SubIdx = MI.getOperand(i).getSubReg();
- if (VRM->isAssignedReg(VirtReg)) {
- // This virtual register was assigned a physreg!
- unsigned Phys = VRM->getPhys(VirtReg);
- MRI->setPhysRegUsed(Phys);
- if (MI.getOperand(i).isDef())
- ReusedOperands.markClobbered(Phys);
- substitutePhysReg(MI.getOperand(i), Phys, *TRI);
- if (VRM->isImplicitlyDefined(VirtReg))
- // FIXME: Is this needed?
- BuildMI(*MBB, &MI, MI.getDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
- continue;
- }
-
- // This virtual register is now known to be a spilled value.
- if (!MI.getOperand(i).isUse())
- continue; // Handle defs in the loop below (handle use&def here though)
-
- bool AvoidReload = MI.getOperand(i).isUndef();
- // Check if it is defined by an implicit def. It should not be spilled.
- // Note, this is for correctness reason. e.g.
- // 8 %reg1024<def> = IMPLICIT_DEF
- // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
- // The live range [12, 14) are not part of the r1024 live interval since
- // it's defined by an implicit def. It will not conflicts with live
- // interval of r1025. Now suppose both registers are spilled, you can
- // easily see a situation where both registers are reloaded before
- // the INSERT_SUBREG and both target registers that would overlap.
- bool DoReMat = VRM->isReMaterialized(VirtReg);
- int SSorRMId = DoReMat
- ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
- int ReuseSlot = SSorRMId;
-
- // Check to see if this stack slot is available.
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
- // If this is a sub-register use, make sure the reuse register is in the
- // right register class. For example, for x86 not all of the 32-bit
- // registers have accessible sub-registers.
- // Similarly so for EXTRACT_SUBREG. Consider this:
- // EDI = op
- // MOV32_mr fi#1, EDI
- // ...
- // = EXTRACT_SUBREG fi#1
- // fi#1 is available in EDI, but it cannot be reused because it's not in
- // the right register file.
- if (PhysReg && !AvoidReload && SubIdx) {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- if (!RC->contains(PhysReg))
- PhysReg = 0;
- }
-
- if (PhysReg && !AvoidReload) {
- // This spilled operand might be part of a two-address operand. If this
- // is the case, then changing it will necessarily require changing the
- // def part of the instruction as well. However, in some cases, we
- // aren't allowed to modify the reused register. If none of these cases
- // apply, reuse it.
- bool CanReuse = true;
- bool isTied = MI.isRegTiedToDefOperand(i);
- if (isTied) {
- // Okay, we have a two address operand. We can reuse this physreg as
- // long as we are allowed to clobber the value and there isn't an
- // earlier def that has already clobbered the physreg.
- CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
- Spills.canClobberPhysReg(PhysReg);
- }
-
- if (CanReuse) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg "
- << TRI->getName(PhysReg) << " for vreg"
- << VirtReg <<" instead of reloading into physreg "
- << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
-
- // The only technical detail we have is that we don't know that
- // PhysReg won't be clobbered by a reloaded stack slot that occurs
- // later in the instruction. In particular, consider 'op V1, V2'.
- // If V1 is available in physreg R0, we would choose to reuse it
- // here, instead of reloading it into the register the allocator
- // indicated (say R1). However, V2 might have to be reloaded
- // later, and it might indicate that it needs to live in R0. When
- // this occurs, we need to have information available that
- // indicates it is safe to use R1 for the reload instead of R0.
- //
- // To further complicate matters, we might conflict with an alias,
- // or R0 and R1 might not be compatible with each other. In this
- // case, we actually insert a reload for V1 in R1, ensuring that
- // we can get at R0 or its alias.
- ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
- VRM->getPhys(VirtReg), VirtReg);
- if (isTied)
- // Only mark it clobbered if this is a use&def operand.
- ReusedOperands.markClobbered(PhysReg);
- ++NumReused;
-
- if (MI.getOperand(i).isKill() &&
- ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
- // The store of this spilled value is potentially dead, but we
- // won't know for certain until we've confirmed that the re-use
- // above is valid, which means waiting until the other operands
- // are processed. For now we just track the spill slot, we'll
- // remove it after the other operands are processed if valid.
-
- PotentialDeadStoreSlots.push_back(ReuseSlot);
- }
-
- // Mark is isKill if it's there no other uses of the same virtual
- // register and it's not a two-address operand. IsKill will be
- // unset if reg is reused.
- if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
-
- continue;
- } // CanReuse
-
- // Otherwise we have a situation where we have a two-address instruction
- // whose mod/ref operand needs to be reloaded. This reload is already
- // available in some register "PhysReg", but if we used PhysReg as the
- // operand to our 2-addr instruction, the instruction would modify
- // PhysReg. This isn't cool if something later uses PhysReg and expects
- // to get its initial value.
- //
- // To avoid this problem, and to avoid doing a load right after a store,
- // we emit a copy from PhysReg into the designated register for this
- // operand.
- //
- // This case also applies to an earlyclobber'd PhysReg.
- unsigned DesignatedReg = VRM->getPhys(VirtReg);
- assert(DesignatedReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- DesignatedReg = ReusedOperands.
- GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
- MaybeDeadStores, RegKills, KillOps, *VRM);
-
- // If the mapped designated register is actually the physreg we have
- // incoming, we don't need to inserted a dead copy.
- if (DesignatedReg == PhysReg) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
- << " for vreg" << VirtReg
- << " instead of reloading into same physreg.\n");
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- ReusedOperands.markClobbered(RReg);
- ++NumReused;
- continue;
- }
-
- MRI->setPhysRegUsed(DesignatedReg);
- ReusedOperands.markClobbered(DesignatedReg);
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, MF);
- MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- DesignatedReg).addReg(PhysReg);
- CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
- // This invalidates DesignatedReg.
- Spills.ClobberPhysReg(DesignatedReg);
-
- Spills.addAvailable(ReuseSlot, DesignatedReg);
- unsigned RReg =
- SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- DEBUG(dbgs() << '\t' << *prior(MII));
- ++NumReused;
- continue;
- } // if (PhysReg)
-
- // Otherwise, reload it and remember that we have it.
- PhysReg = VRM->getPhys(VirtReg);
- assert(PhysReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
- Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-
- MRI->setPhysRegUsed(PhysReg);
- ReusedOperands.markClobbered(PhysReg);
- if (AvoidReload)
- ++NumAvoided;
- else {
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, MF);
-
- if (DoReMat) {
- ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
- } else {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM->addSpillSlotUse(SSorRMId, LoadMI);
- ++NumLoads;
- DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
- }
- // This invalidates PhysReg.
- Spills.ClobberPhysReg(PhysReg);
-
- // Any stores to this stack slot are not dead anymore.
- if (!DoReMat)
- MaybeDeadStores[SSorRMId] = NULL;
- Spills.addAvailable(SSorRMId, PhysReg);
- // Assumes this is the last use. IsKill will be unset if reg is reused
- // unless it's a two-address operand.
- if (!MI.isRegTiedToDefOperand(i) &&
- KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
-
- UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
- DEBUG(dbgs() << '\t' << *prior(InsertLoc));
- }
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- }
-
- // Ok - now we can remove stores that have been confirmed dead.
- for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
- // This was the last use and the spilled value is still available
- // for reuse. That means the spill was unnecessary!
- int PDSSlot = PotentialDeadStoreSlots[j];
- MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
- if (DeadStore) {
- DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
- InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(DeadStore);
- MBB->erase(DeadStore);
- MaybeDeadStores[PDSSlot] = NULL;
- ++NumDSE;
- }
- }
-
+ ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
DEBUG(dbgs() << '\t' << MI);
@@ -2249,7 +2340,7 @@
for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
unsigned VirtReg = FoldedVirts[FVI].first;
VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
- DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR);
+ DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR);
int SS = VRM->getStackSlot(VirtReg);
if (SS == VirtRegMap::NO_STACK_SLOT)
@@ -2281,14 +2372,13 @@
BackTracked = true;
} else {
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- // Unset last kill since it's being reused.
- InvalidateKill(InReg, TRI, RegKills, KillOps);
+ // InvalidateKills resurrects any prior kill of the copy's source
+ // allowing the source reg to be reused in place of the copy.
Spills.disallowClobberPhysReg(InReg);
}
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
goto ProcessNextInst;
}
@@ -2299,8 +2389,7 @@
TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
MBB->insert(MII, NewMIs[0]);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
--NextMII; // backtrack to the unfolded instruction.
BackTracked = true;
@@ -2336,8 +2425,7 @@
MBB->insert(MII, NewStore);
VRM->addSpillSlotUse(SS, NewStore);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
--NextMII;
--NextMII; // backtrack to the unfolded instruction.
@@ -2352,8 +2440,7 @@
// If we get here, the store is dead, nuke it now.
DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(DeadStore);
- MBB->erase(DeadStore);
+ EraseInstr(DeadStore);
if (!NewStore)
++NumDSE;
}
@@ -2430,8 +2517,7 @@
// Last def is now dead.
TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
}
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
Spills.disallowClobberPhysReg(VirtReg);
goto ProcessNextInst;
@@ -2488,6 +2574,10 @@
}
}
+ // If StackSlot is available in a register that also holds other stack
+ // slots, clobber those stack slots now.
+ Spills.ClobberSharingStackSlots(StackSlot);
+
assert(PhysReg && "VR not assigned a physical register?");
MRI->setPhysRegUsed(PhysReg);
unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
@@ -2507,8 +2597,7 @@
++NumDCE;
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
UpdateKills(*LastStore, TRI, RegKills, KillOps);
goto ProcessNextInst;
@@ -2519,8 +2608,7 @@
// Delete dead instructions without side effects.
if (!Erased && !BackTracked && isSafeToDelete(MI)) {
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
}
if (!Erased)