Subzero: Use a setcc sequence for better icmp lowering.
For an example like:
%a = icmp eq i32 %b, %c
The original icmp lowering sequence for i8/i16/i32 was something like:
cmpl b, c
movb 1, a
je label
movb 0, a
label:
The improved sequence is:
cmpl b, c
sete a
In O2 mode, this doesn't help when successive compare/branch instructions are fused, but it does help when the boolean result needs to be saved and later used.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/1118353005
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index fc31921..b253278 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -339,6 +339,9 @@
addSource(Source);
}
+InstX8632Setcc::InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond)
+ : InstX8632(Func, InstX8632::Setcc, 0, Dest), Condition(Cond) {}
+
InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
bool Locked)
: InstX8632Lockable(Func, InstX8632::Xadd, 2,
@@ -2726,6 +2729,35 @@
dumpSources(Func);
}
+void InstX8632Setcc::emit(const Cfg *Func) const {
+ if (!ALLOW_DUMP)
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ Str << "\tset" << InstX8632BrAttributes[Condition].DisplayString << "\t";
+ Dest->emit(Func);
+}
+
+void InstX8632Setcc::emitIAS(const Cfg *Func) const {
+ assert(Condition != CondX86::Br_None);
+ assert(getDest()->getType() == IceType_i1);
+ assert(getSrcSize() == 0);
+ X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
+ if (getDest()->hasReg())
+ Asm->setcc(Condition, RegX8632::getEncodedByteReg(getDest()->getRegNum()));
+ else
+ Asm->setcc(Condition, static_cast<TargetX8632 *>(Func->getTarget())
+ ->stackVarToAsmOperand(getDest()));
+ return;
+}
+
+void InstX8632Setcc::dump(const Cfg *Func) const {
+ if (!ALLOW_DUMP)
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "setcc." << InstX8632BrAttributes[Condition].DisplayString << " ";
+ dumpDest(Func);
+}
+
void InstX8632Xadd::emit(const Cfg *Func) const {
if (!ALLOW_DUMP)
return;
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index fca32c6..4ea2b36 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -242,6 +242,7 @@
Rol,
Sar,
Sbb,
+ Setcc,
Shl,
Shld,
Shr,
@@ -1585,6 +1586,30 @@
~InstX8632Ret() override {}
};
+// Conditional set-byte instruction.
+class InstX8632Setcc : public InstX8632 {
+ InstX8632Setcc() = delete;
+ InstX8632Setcc(const InstX8632Cmov &) = delete;
+ InstX8632Setcc &operator=(const InstX8632Setcc &) = delete;
+
+public:
+ static InstX8632Setcc *create(Cfg *Func, Variable *Dest,
+ CondX86::BrCond Cond) {
+ return new (Func->allocate<InstX8632Setcc>())
+ InstX8632Setcc(Func, Dest, Cond);
+ }
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+ void dump(const Cfg *Func) const override;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Setcc); }
+
+private:
+ InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond);
+ ~InstX8632Setcc() override {}
+
+ const CondX86::BrCond Condition;
+};
+
// Exchanging Add instruction. Exchanges the first operand (destination
// operand) with the second operand (source operand), then loads the sum
// of the two values into the destination operand. The destination may be
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 582e441..399d058 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2748,12 +2748,8 @@
// cmp b, c
Operand *Src0RM =
legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
_cmp(Src0RM, Src1);
- _mov(Dest, One);
- _br(getIcmp32Mapping(Inst->getCondition()), Label);
- _mov_nonkillable(Dest, Zero);
- Context.insert(Label);
+ _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
}
void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 34f187c..8a3c36e 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -415,6 +415,9 @@
void _sbb(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Sbb::create(Func, Dest, Src0));
}
+ void _setcc(Variable *Dest, CondX86::BrCond Condition) {
+ Context.insert(InstX8632Setcc::create(Func, Dest, Condition));
+ }
void _shl(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Shl::create(Func, Dest, Src0));
}
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp
index b49f63a..f14c216 100644
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp
@@ -156,6 +156,13 @@
EmitUint8(0xC0 + dst);
}
+void AssemblerX8632::setcc(CondX86::BrCond condition, const Address &address) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x90 + condition);
+ EmitOperand(0, address);
+}
+
void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Immediate &imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
if (isByteSizedType(Ty)) {
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index 291ccdc..4cb6ee7 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -492,6 +492,7 @@
void popal();
void setcc(CondX86::BrCond condition, ByteRegister dst);
+ void setcc(CondX86::BrCond condition, const Address &address);
void mov(Type Ty, GPRRegister dst, const Immediate &src);
void mov(Type Ty, GPRRegister dst, GPRRegister src);
diff --git a/tests_lit/assembler/x86/sandboxing.ll b/tests_lit/assembler/x86/sandboxing.ll
index 2b33d40..9b697a3 100644
--- a/tests_lit/assembler/x86/sandboxing.ll
+++ b/tests_lit/assembler/x86/sandboxing.ll
@@ -96,22 +96,23 @@
; boundary should not trigger nop padding.
define void @label_at_boundary(i32 %arg) {
entry:
+ %cmp = icmp eq i32 %arg, 0
call void @call_target()
; bundle boundary
%addr_short = bitcast [2 x i8]* @global_short to i16*
store i16 0, i16* %addr_short, align 1 ; 9-byte instruction
- %cmp = icmp eq i32 %arg, 0 ; 23-byte lowering sequence
+ %blah = select i1 %cmp, i32 3, i32 5 ; 23-byte lowering sequence
; label is here
store i16 0, i16* %addr_short, align 1 ; 9-byte instruction
ret void
}
; CHECK-LABEL: label_at_boundary
; CHECK: call
-; We rely on the hideous 4-instruction 23-byte Om1 lowering sequence for icmp.
+; We rely on the hideous 4-instruction 23-byte Om1 lowering sequence for select.
; CHECK-NEXT: 20: {{.*}} mov WORD PTR
-; CHECK-NEXT: 29: {{.*}} cmp DWORD PTR
+; CHECK-NEXT: 29: {{.*}} cmp BYTE PTR
; CHECK-NEXT: 2e: {{.*}} mov DWORD PTR
-; CHECK-NEXT: 36: {{.*}} je 40
+; CHECK-NEXT: 36: {{.*}} jne 40
; CHECK-NEXT: 38: {{.*}} mov DWORD PTR
; CHECK-NEXT: 40: {{.*}} mov WORD PTR
diff --git a/tests_lit/llvm2ice_tests/8bit.pnacl.ll b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
index e47a709..4f48cf4 100644
--- a/tests_lit/llvm2ice_tests/8bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
@@ -282,7 +282,7 @@
}
; CHECK-LABEL: selectI8Var
; CHECK: cmp
-; CHECK: jl
+; CHECK: setl
; CHECK: mov {{[a-d]l}}
define internal i32 @testPhi8(i32 %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) {
diff --git a/tests_lit/llvm2ice_tests/branch-opt.ll b/tests_lit/llvm2ice_tests/branch-opt.ll
index 580a900..41e0521 100644
--- a/tests_lit/llvm2ice_tests/branch-opt.ll
+++ b/tests_lit/llvm2ice_tests/branch-opt.ll
@@ -53,7 +53,7 @@
; OM1-LABEL: testCondFallthroughToNextBlock
; OM1: cmp {{.*}},0x7b
-; OM1: jge
+; OM1: setge
; OM1: cmp
; OM1: jne
; OM1: jmp
@@ -88,7 +88,7 @@
; OM1-LABEL: testCondTargetNextBlock
; OM1: cmp {{.*}},0x7b
-; OM1: jge
+; OM1: setge
; OM1: cmp
; OM1: jne
; OM1: jmp
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll b/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll
index 53d2eca..ee81cce 100644
--- a/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll
@@ -41,7 +41,7 @@
; OM1-LABEL: test_atomic_cmpxchg_loop
; OM1: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}}
; OM1: cmp
-; OM1: je
+; OM1: sete
; OM1: call
; Still works if the compare operands are flipped.
@@ -130,4 +130,4 @@
; O2: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}}
; O2: mov {{.*}}
; O2: cmp
-; O2: je
+; O2: sete
diff --git a/tests_lit/llvm2ice_tests/phi.ll b/tests_lit/llvm2ice_tests/phi.ll
index 4e75850..2470a80 100644
--- a/tests_lit/llvm2ice_tests/phi.ll
+++ b/tests_lit/llvm2ice_tests/phi.ll
@@ -22,9 +22,7 @@
; put in the right place.
; CHECK-LABEL: testPhi1
; CHECK: cmp {{.*}},0x0
-; CHECK: mov {{.*}},0x1
-; CHECK: jg
-; CHECK: mov {{.*}},0x0
+; CHECK: setg
; CHECK: mov [[PHI:.*]],
; CHECK: cmp {{.*}},0x0
; CHECK: je
diff --git a/tests_lit/llvm2ice_tests/simple-loop.ll b/tests_lit/llvm2ice_tests/simple-loop.ll
index b8fd057..0b06c76 100644
--- a/tests_lit/llvm2ice_tests/simple-loop.ll
+++ b/tests_lit/llvm2ice_tests/simple-loop.ll
@@ -46,5 +46,5 @@
; such atrocious code (by design).
; OPTM1-LABEL: simple_loop
; OPTM1: cmp {{.*}},0x0
-; OPTM1: jg
+; OPTM1: setl
; OPTM1: ret