Subzero: Fix address mode optimization involving phi temporaries.
Also adds much-needed logging of the decision process that goes into the address mode optimization.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/490333003
diff --git a/src/IceDefs.h b/src/IceDefs.h
index cf9052c..d156a60 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -101,6 +101,7 @@
IceV_LinearScan = 1 << 8,
IceV_Frame = 1 << 9,
IceV_Timing = 1 << 10,
+ IceV_AddrOpt = 1 << 11,
IceV_All = ~IceV_None
};
typedef uint32_t VerboseMask;
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index d8a754b..b718ba5 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -146,9 +146,16 @@
}
void Variable::setDefinition(Inst *Inst, const CfgNode *Node) {
+ if (DefInst && !DefInst->isDeleted() && DefInst != Inst) {
+ // Detect when a variable is being defined multiple times,
+ // particularly for Phi instruction lowering. If this happens, we
+ // need to lock DefInst to NULL.
+ DefInst = NULL;
+ DefNode = NULL;
+ return;
+ }
if (DefNode == NULL)
return;
- // Can first check preexisting DefInst if we care about multi-def vars.
DefInst = Inst;
if (Node != DefNode)
DefNode = NULL;
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 057a3ea..71788df 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -136,6 +136,10 @@
void TargetLowering::lower() {
assert(!Context.atEnd());
Inst *Inst = *Context.getCur();
+ // Mark the current instruction as deleted before lowering,
+ // otherwise the Dest variable will likely get marked as non-SSA.
+ // See Variable::setDefinition().
+ Inst->setDeleted();
switch (Inst->getKind()) {
case Inst::Alloca:
lowerAlloca(llvm::dyn_cast<InstAlloca>(Inst));
@@ -200,7 +204,6 @@
Func->setError("Can't lower unsupported instruction type");
break;
}
- Inst->setDeleted();
postLower();
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 7ed8abf..b4e1b3e 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2607,13 +2607,13 @@
if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
+ NextBr->setDeleted();
Operand *Src0RM = legalize(
Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg, true);
_cmp(Src0RM, Src1);
_br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
NextBr->getTargetFalse());
// Skip over the following branch instruction.
- NextBr->setDeleted();
Context.advanceNext();
return;
}
@@ -3194,8 +3194,8 @@
// Lower the phi assignments now, before the branch (same placement
// as before).
InstAssign *PhiAssign = PhiAssigns[i];
- lowerAssign(PhiAssign);
PhiAssign->setDeleted();
+ lowerAssign(PhiAssign);
Context.advanceNext();
}
_br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
@@ -3482,8 +3482,35 @@
return false;
}
-void computeAddressOpt(Variable *&Base, Variable *&Index, uint16_t &Shift,
- int32_t &Offset) {
+void dumpAddressOpt(const Cfg *Func, const Variable *Base,
+ const Variable *Index, uint16_t Shift, int32_t Offset,
+ const Inst *Reason) {
+ if (!Func->getContext()->isVerbose(IceV_AddrOpt))
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "Instruction: ";
+ Reason->dumpDecorated(Func);
+ Str << " results in Base=";
+ if (Base)
+ Base->dump(Func);
+ else
+ Str << "<null>";
+ Str << ", Index=";
+ if (Index)
+ Index->dump(Func);
+ else
+ Str << "<null>";
+ Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
+}
+
+void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
+ Variable *&Index, uint16_t &Shift, int32_t &Offset) {
+ Func->setCurrentNode(NULL);
+ if (Func->getContext()->isVerbose(IceV_AddrOpt)) {
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "\nStarting computeAddressOpt for instruction:\n ";
+ Instr->dumpDecorated(Func);
+ }
(void)Offset; // TODO: pattern-match for non-zero offsets.
if (Base == NULL)
return;
@@ -3506,6 +3533,7 @@
// TODO: ensure BaseVariable0 stays single-BB
true) {
Base = BaseVariable0;
+ dumpAddressOpt(Func, Base, Index, Shift, Offset, BaseInst);
continue;
}
@@ -3523,6 +3551,7 @@
Base = BaseVariable0;
Index = BaseVariable1;
Shift = 0; // should already have been 0
+ dumpAddressOpt(Func, Base, Index, Shift, Offset, BaseInst);
continue;
}
@@ -3560,6 +3589,7 @@
if (Shift + LogMult <= 3) {
Index = IndexVariable0;
Shift += LogMult;
+ dumpAddressOpt(Func, Base, Index, Shift, Offset, IndexInst);
continue;
}
}
@@ -3589,6 +3619,7 @@
}
Base = Var;
Offset += IsAdd ? Const->getValue() : -Const->getValue();
+ dumpAddressOpt(Func, Base, Index, Shift, Offset, BaseInst);
continue;
}
@@ -3684,12 +3715,12 @@
const OperandX8632Mem::SegmentRegisters SegmentReg =
OperandX8632Mem::DefaultSegment;
Variable *Base = llvm::dyn_cast<Variable>(Addr);
- computeAddressOpt(Base, Index, Shift, Offset);
+ computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
if (Base && Addr != Base) {
+ Inst->setDeleted();
Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Shift, SegmentReg);
- Inst->setDeleted();
Context.insert(InstLoad::create(Func, Dest, Addr));
}
}
@@ -3866,12 +3897,12 @@
// registers there either.
const OperandX8632Mem::SegmentRegisters SegmentReg =
OperandX8632Mem::DefaultSegment;
- computeAddressOpt(Base, Index, Shift, Offset);
+ computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
if (Base && Addr != Base) {
+ Inst->setDeleted();
Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Shift, SegmentReg);
- Inst->setDeleted();
Context.insert(InstStore::create(Func, Data, Addr));
}
}
@@ -3943,9 +3974,9 @@
llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
if (NextCast->getCastKind() == InstCast::Sext &&
NextCast->getSrc(0) == SignExtendedResult) {
+ NextCast->setDeleted();
_movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
// Skip over the instruction.
- NextCast->setDeleted();
Context.advanceNext();
}
}
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index ddd1bde..b1d2d6f 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -45,6 +45,7 @@
clEnumValN(Ice::IceV_LinearScan, "regalloc", "Linear scan details"),
clEnumValN(Ice::IceV_Frame, "frame", "Stack frame layout details"),
clEnumValN(Ice::IceV_Timing, "time", "Pass timing details"),
+ clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"),
clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
clEnumValN(Ice::IceV_None, "none", "No verbosity"), clEnumValEnd));
static cl::opt<Ice::TargetArch> TargetArch(
diff --git a/tests_lit/llvm2ice_tests/addr-opt-multi-def-var.ll b/tests_lit/llvm2ice_tests/addr-opt-multi-def-var.ll
new file mode 100644
index 0000000..5f5c7c4
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/addr-opt-multi-def-var.ll
@@ -0,0 +1,66 @@
+; This is distilled from a real function that led to a bug in the
+; address mode optimization code. It followed assignment chains
+; through non-SSA temporaries created from Phi instruction lowering.
+;
+; This test depends to some degree on the stability of "--verbose
+; addropt" output format.
+
+; RUN: %llvm2ice -O2 --verbose addropt %s | FileCheck %s
+
+declare i32 @_calloc_r(i32, i32, i32)
+
+define internal i32 @_Balloc(i32 %ptr, i32 %k) {
+entry:
+ %gep = add i32 %ptr, 76
+ %gep.asptr = inttoptr i32 %gep to i32*
+ %0 = load i32* %gep.asptr, align 1
+ %cmp = icmp eq i32 %0, 0
+ br i1 %cmp, label %if.then, label %if.end5
+
+if.then: ; preds = %entry
+ %call = tail call i32 @_calloc_r(i32 %ptr, i32 4, i32 33)
+ %gep.asptr2 = inttoptr i32 %gep to i32*
+ store i32 %call, i32* %gep.asptr2, align 1
+ %cmp3 = icmp eq i32 %call, 0
+ br i1 %cmp3, label %return, label %if.end5
+
+if.end5: ; preds = %if.then, %entry
+ %1 = phi i32 [ %call, %if.then ], [ %0, %entry ]
+ %gep_array = mul i32 %k, 4
+ %gep2 = add i32 %1, %gep_array
+ %gep2.asptr = inttoptr i32 %gep2 to i32*
+ %2 = load i32* %gep2.asptr, align 1
+; The above load instruction is a good target for address mode
+; optimization. Correct analysis would lead to dump output like:
+; Starting computeAddressOpt for instruction:
+; [ 15] %__13 = load i32* %gep2.asptr, align 1
+; Instruction: [ 14] %gep2.asptr = i32 %gep2
+; results in Base=%gep2, Index=<null>, Shift=0, Offset=0
+; Instruction: [ 13] %gep2 = add i32 %__9, %gep_array
+; results in Base=%__9, Index=%gep_array, Shift=0, Offset=0
+; Instruction: [ 18] %__9 = i32 %__9_phi
+; results in Base=%__9_phi, Index=%gep_array, Shift=0, Offset=0
+; Instruction: [ 12] %gep_array = mul i32 %k, 4
+; results in Base=%__9_phi, Index=%k, Shift=2, Offset=0
+;
+; Incorrect, overly-aggressive analysis would lead to output like:
+; Starting computeAddressOpt for instruction:
+; [ 15] %__13 = load i32* %gep2.asptr, align 1
+; Instruction: [ 14] %gep2.asptr = i32 %gep2
+; results in Base=%gep2, Index=<null>, Shift=0, Offset=0
+; Instruction: [ 13] %gep2 = add i32 %__9, %gep_array
+; results in Base=%__9, Index=%gep_array, Shift=0, Offset=0
+; Instruction: [ 18] %__9 = i32 %__9_phi
+; results in Base=%__9_phi, Index=%gep_array, Shift=0, Offset=0
+; Instruction: [ 19] %__9_phi = i32 %__4
+; results in Base=%__4, Index=%gep_array, Shift=0, Offset=0
+; Instruction: [ 12] %gep_array = mul i32 %k, 4
+; results in Base=%__4, Index=%k, Shift=2, Offset=0
+;
+; CHECK-NOT: results in Base=%__4,
+;
+ ret i32 %2
+
+return: ; preds = %if.then
+ ret i32 0
+}