Subzero: Use a setcc sequence for better icmp lowering.

For an example like:
  %a = icmp eq i32 %b, %c

The original icmp lowering sequence for i8/i16/i32 was something like:

  cmpl b, c
  movb 1, a
  je label
  movb 0, a
label:

The improved sequence is:
  cmpl b, c
  sete a

In O2 mode, this doesn't help when successive compare/branch instructions are fused, but it does help when the boolean result needs to be saved and later used.

BUG= none
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/1118353005
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index fc31921..b253278 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -339,6 +339,9 @@
     addSource(Source);
 }
 
+InstX8632Setcc::InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond)
+    : InstX8632(Func, InstX8632::Setcc, 0, Dest), Condition(Cond) {}
+
 InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
                              bool Locked)
     : InstX8632Lockable(Func, InstX8632::Xadd, 2,
@@ -2726,6 +2729,35 @@
   dumpSources(Func);
 }
 
+void InstX8632Setcc::emit(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Str << "\tset" << InstX8632BrAttributes[Condition].DisplayString << "\t";
+  Dest->emit(Func);
+}
+
+void InstX8632Setcc::emitIAS(const Cfg *Func) const {
+  assert(Condition != CondX86::Br_None);
+  assert(getDest()->getType() == IceType_i1);
+  assert(getSrcSize() == 0);
+  X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
+  if (getDest()->hasReg())
+    Asm->setcc(Condition, RegX8632::getEncodedByteReg(getDest()->getRegNum()));
+  else
+    Asm->setcc(Condition, static_cast<TargetX8632 *>(Func->getTarget())
+                              ->stackVarToAsmOperand(getDest()));
+  return;
+}
+
+void InstX8632Setcc::dump(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "setcc." << InstX8632BrAttributes[Condition].DisplayString << " ";
+  dumpDest(Func);
+}
+
 void InstX8632Xadd::emit(const Cfg *Func) const {
   if (!ALLOW_DUMP)
     return;
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index fca32c6..4ea2b36 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -242,6 +242,7 @@
     Rol,
     Sar,
     Sbb,
+    Setcc,
     Shl,
     Shld,
     Shr,
@@ -1585,6 +1586,30 @@
   ~InstX8632Ret() override {}
 };
 
+// Conditional set-byte instruction.
+class InstX8632Setcc : public InstX8632 {
+  InstX8632Setcc() = delete;
+  InstX8632Setcc(const InstX8632Cmov &) = delete;
+  InstX8632Setcc &operator=(const InstX8632Setcc &) = delete;
+
+public:
+  static InstX8632Setcc *create(Cfg *Func, Variable *Dest,
+                                CondX86::BrCond Cond) {
+    return new (Func->allocate<InstX8632Setcc>())
+        InstX8632Setcc(Func, Dest, Cond);
+  }
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+  void dump(const Cfg *Func) const override;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Setcc); }
+
+private:
+  InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond);
+  ~InstX8632Setcc() override {}
+
+  const CondX86::BrCond Condition;
+};
+
 // Exchanging Add instruction.  Exchanges the first operand (destination
 // operand) with the second operand (source operand), then loads the sum
 // of the two values into the destination operand. The destination may be
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 582e441..399d058 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2748,12 +2748,8 @@
   // cmp b, c
   Operand *Src0RM =
       legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
-  InstX8632Label *Label = InstX8632Label::create(Func, this);
   _cmp(Src0RM, Src1);
-  _mov(Dest, One);
-  _br(getIcmp32Mapping(Inst->getCondition()), Label);
-  _mov_nonkillable(Dest, Zero);
-  Context.insert(Label);
+  _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
 }
 
 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 34f187c..8a3c36e 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -415,6 +415,9 @@
   void _sbb(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Sbb::create(Func, Dest, Src0));
   }
+  void _setcc(Variable *Dest, CondX86::BrCond Condition) {
+    Context.insert(InstX8632Setcc::create(Func, Dest, Condition));
+  }
   void _shl(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Shl::create(Func, Dest, Src0));
   }
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp
index b49f63a..f14c216 100644
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp
@@ -156,6 +156,13 @@
   EmitUint8(0xC0 + dst);
 }
 
+void AssemblerX8632::setcc(CondX86::BrCond condition, const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x90 + condition);
+  EmitOperand(0, address);
+}
+
 void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   if (isByteSizedType(Ty)) {
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index 291ccdc..4cb6ee7 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -492,6 +492,7 @@
   void popal();
 
   void setcc(CondX86::BrCond condition, ByteRegister dst);
+  void setcc(CondX86::BrCond condition, const Address &address);
 
   void mov(Type Ty, GPRRegister dst, const Immediate &src);
   void mov(Type Ty, GPRRegister dst, GPRRegister src);
diff --git a/tests_lit/assembler/x86/sandboxing.ll b/tests_lit/assembler/x86/sandboxing.ll
index 2b33d40..9b697a3 100644
--- a/tests_lit/assembler/x86/sandboxing.ll
+++ b/tests_lit/assembler/x86/sandboxing.ll
@@ -96,22 +96,23 @@
 ; boundary should not trigger nop padding.
 define void @label_at_boundary(i32 %arg) {
 entry:
+  %cmp = icmp eq i32 %arg, 0
   call void @call_target()
   ; bundle boundary
   %addr_short = bitcast [2 x i8]* @global_short to i16*
   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
-  %cmp = icmp eq i32 %arg, 0               ; 23-byte lowering sequence
+  %blah = select i1 %cmp, i32 3, i32 5     ; 23-byte lowering sequence
   ; label is here
   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
   ret void
 }
 ; CHECK-LABEL: label_at_boundary
 ; CHECK: call
-; We rely on the hideous 4-instruction 23-byte Om1 lowering sequence for icmp.
+; We rely on the hideous 4-instruction 23-byte Om1 lowering sequence for select.
 ; CHECK-NEXT: 20: {{.*}} mov WORD PTR
-; CHECK-NEXT: 29: {{.*}} cmp DWORD PTR
+; CHECK-NEXT: 29: {{.*}} cmp BYTE PTR
 ; CHECK-NEXT: 2e: {{.*}} mov DWORD PTR
-; CHECK-NEXT: 36: {{.*}} je 40
+; CHECK-NEXT: 36: {{.*}} jne 40
 ; CHECK-NEXT: 38: {{.*}} mov DWORD PTR
 ; CHECK-NEXT: 40: {{.*}} mov WORD PTR
 
diff --git a/tests_lit/llvm2ice_tests/8bit.pnacl.ll b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
index e47a709..4f48cf4 100644
--- a/tests_lit/llvm2ice_tests/8bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
@@ -282,7 +282,7 @@
 }
 ; CHECK-LABEL: selectI8Var
 ; CHECK: cmp
-; CHECK: jl
+; CHECK: setl
 ; CHECK: mov {{[a-d]l}}
 
 define internal i32 @testPhi8(i32 %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) {
diff --git a/tests_lit/llvm2ice_tests/branch-opt.ll b/tests_lit/llvm2ice_tests/branch-opt.ll
index 580a900..41e0521 100644
--- a/tests_lit/llvm2ice_tests/branch-opt.ll
+++ b/tests_lit/llvm2ice_tests/branch-opt.ll
@@ -53,7 +53,7 @@
 
 ; OM1-LABEL: testCondFallthroughToNextBlock
 ; OM1: cmp {{.*}},0x7b
-; OM1: jge
+; OM1: setge
 ; OM1: cmp
 ; OM1: jne
 ; OM1: jmp
@@ -88,7 +88,7 @@
 
 ; OM1-LABEL: testCondTargetNextBlock
 ; OM1: cmp {{.*}},0x7b
-; OM1: jge
+; OM1: setge
 ; OM1: cmp
 ; OM1: jne
 ; OM1: jmp
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll b/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll
index 53d2eca..ee81cce 100644
--- a/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll
@@ -41,7 +41,7 @@
 ; OM1-LABEL: test_atomic_cmpxchg_loop
 ; OM1: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}}
 ; OM1: cmp
-; OM1: je
+; OM1: sete
 ; OM1: call
 
 ; Still works if the compare operands are flipped.
@@ -130,4 +130,4 @@
 ; O2: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}}
 ; O2: mov {{.*}}
 ; O2: cmp
-; O2: je
+; O2: sete
diff --git a/tests_lit/llvm2ice_tests/phi.ll b/tests_lit/llvm2ice_tests/phi.ll
index 4e75850..2470a80 100644
--- a/tests_lit/llvm2ice_tests/phi.ll
+++ b/tests_lit/llvm2ice_tests/phi.ll
@@ -22,9 +22,7 @@
 ; put in the right place.
 ; CHECK-LABEL: testPhi1
 ; CHECK: cmp {{.*}},0x0
-; CHECK: mov {{.*}},0x1
-; CHECK: jg
-; CHECK: mov {{.*}},0x0
+; CHECK: setg
 ; CHECK: mov [[PHI:.*]],
 ; CHECK: cmp {{.*}},0x0
 ; CHECK: je
diff --git a/tests_lit/llvm2ice_tests/simple-loop.ll b/tests_lit/llvm2ice_tests/simple-loop.ll
index b8fd057..0b06c76 100644
--- a/tests_lit/llvm2ice_tests/simple-loop.ll
+++ b/tests_lit/llvm2ice_tests/simple-loop.ll
@@ -46,5 +46,5 @@
 ; such atrocious code (by design).
 ; OPTM1-LABEL: simple_loop
 ; OPTM1:      cmp {{.*}},0x0
-; OPTM1:      jg
+; OPTM1:      setl
 ; OPTM1:      ret