Lower the select instruction when the operands are of vector type.

Select of vectors is implemented by appropriately masking and
combining the inputs with sign extend / bitwise operations
and without the use of branches.

BUG=none
R=jvoung@chromium.org, stichnot@chromium.org

Review URL: https://codereview.chromium.org/417653004
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 004b555..6893856 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -376,7 +376,7 @@
 InstSelect::InstSelect(Cfg *Func, Variable *Dest, Operand *Condition,
                        Operand *SourceTrue, Operand *SourceFalse)
     : Inst(Func, Inst::Select, 3, Dest) {
-  assert(Condition->getType() == IceType_i1);
+  assert(typeElementType(Condition->getType()) == IceType_i1);
   addSource(Condition);
   addSource(SourceTrue);
   addSource(SourceFalse);
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index f1a68da..93a872c 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -458,6 +458,7 @@
 template <> const char *InstX8632Psub::Opcode = "psub";
 template <> const char *InstX8632And::Opcode = "and";
 template <> const char *InstX8632Pand::Opcode = "pand";
+template <> const char *InstX8632Pandn::Opcode = "pandn";
 template <> const char *InstX8632Or::Opcode = "or";
 template <> const char *InstX8632Por::Opcode = "por";
 template <> const char *InstX8632Xor::Opcode = "xor";
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index b50199b..ce1cc65 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -174,6 +174,7 @@
     Or,
     Padd,
     Pand,
+    Pandn,
     Pcmpeq,
     Pcmpgt,
     Pextrw,
@@ -564,6 +565,7 @@
 typedef InstX8632Binop<InstX8632::Psub> InstX8632Psub;
 typedef InstX8632Binop<InstX8632::And> InstX8632And;
 typedef InstX8632Binop<InstX8632::Pand> InstX8632Pand;
+typedef InstX8632Binop<InstX8632::Pandn> InstX8632Pandn;
 typedef InstX8632Binop<InstX8632::Or> InstX8632Or;
 typedef InstX8632Binop<InstX8632::Por> InstX8632Por;
 typedef InstX8632Binop<InstX8632::Xor> InstX8632Xor;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 71b4c17..c8cf170 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -3410,11 +3410,46 @@
 }
 
 void TargetX8632::lowerSelect(const InstSelect *Inst) {
-  // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
   Variable *Dest = Inst->getDest();
   Operand *SrcT = Inst->getTrueOperand();
   Operand *SrcF = Inst->getFalseOperand();
-  Operand *Condition = legalize(Inst->getCondition());
+  Operand *Condition = Inst->getCondition();
+
+  if (isVectorType(Dest->getType())) {
+    // a=d?b:c ==> d=sext(d); a=(b&d)|(c&~d)
+    // TODO(wala): SSE4.1 has blendvps and pblendvb.  SSE4.1 also has
+    // blendps and pblendw for constant condition operands.
+    Type SrcTy = SrcT->getType();
+    Variable *T = makeReg(SrcTy);
+    Variable *T2 = makeReg(SrcTy);
+    // Sign extend the condition operand if applicable.
+    if (SrcTy == IceType_v4f32) {
+      // The sext operation takes only integer arguments.
+      Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());
+      lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
+      _movp(T, T3);
+    } else if (typeElementType(SrcTy) != IceType_i1) {
+      lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
+    } else {
+      _movp(T, Condition);
+    }
+    // ALIGNHACK: Until stack alignment support is implemented, the
+    // bitwise vector instructions need to have both operands in
+    // registers.  Once there is support for stack alignment, LEGAL_HACK
+    // can be removed.
+#define LEGAL_HACK(Vect) legalizeToVar((Vect))
+    _movp(T2, T);
+    _pand(T, LEGAL_HACK(SrcT));
+    _pandn(T2, LEGAL_HACK(SrcF));
+    _por(T, T2);
+    _movp(Dest, T);
+#undef LEGAL_HACK
+
+    return;
+  }
+
+  // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
+  Operand *ConditionRMI = legalize(Condition);
   Constant *Zero = Ctx->getConstantZero(IceType_i32);
   InstX8632Label *Label = InstX8632Label::create(Func, this);
 
@@ -3423,7 +3458,7 @@
     Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
     Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
     Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
-    _cmp(Condition, Zero);
+    _cmp(ConditionRMI, Zero);
     _mov(DestLo, SrcLoRI);
     _mov(DestHi, SrcHiRI);
     _br(InstX8632Br::Br_ne, Label);
@@ -3436,7 +3471,7 @@
     _mov(DestLo, SrcLoRI);
     _mov(DestHi, SrcHiRI);
   } else {
-    _cmp(Condition, Zero);
+    _cmp(ConditionRMI, Zero);
     SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
     _mov(Dest, SrcT);
     _br(InstX8632Br::Br_ne, Label);
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 864881f..cbc98ce 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -304,6 +304,9 @@
   void _pand(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Pand::create(Func, Dest, Src0));
   }
+  void _pandn(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Pandn::create(Func, Dest, Src0));
+  }
   void _pcmpeq(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Pcmpeq::create(Func, Dest, Src0));
   }