Implement vector sign mask intrinsic.

BUG=swiftshader:15

Change-Id: I7fec56518a5b4e07d2189ab01a0a223b038564c1
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 7ba22f2..937e997 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -465,8 +465,7 @@
   void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
   void ucomiss(Type Ty, XmmRegister a, const Address &b);
 
-  void movmskpd(GPRRegister dst, XmmRegister src);
-  void movmskps(GPRRegister dst, XmmRegister src);
+  void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
 
   void sqrtss(Type Ty, XmmRegister dst, const Address &src);
   void sqrtss(Type Ty, XmmRegister dst, XmmRegister src);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index b28d8e8..9c50ea9 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -1916,21 +1916,25 @@
 }
 
 template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movmskpd(GPRRegister dst, XmmRegister src) {
+void AssemblerX86Base<TraitsType>::movmsk(Type Ty, GPRRegister dst,
+                                          XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
+  if (Ty == IceType_v16i8) {
+    emitUint8(0x66);
+  } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
+    // No operand size prefix
+  } else {
+    assert(false && "Unexpected movmsk operand type");
+  }
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
-  emitUint8(0x50);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movmskps(GPRRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x50);
+  if (Ty == IceType_v16i8) {
+    emitUint8(0xD7);
+  } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
+    emitUint8(0x50);
+  } else {
+    assert(false && "Unexpected movmsk operand type");
+  }
   emitXmmRegisterOperand(dst, src);
 }
 
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 09025aa..bd10572 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -117,6 +117,7 @@
       Maxss,
       Mov,
       Movd,
+      Movmsk,
       Movp,
       Movq,
       MovssRegs,
@@ -1250,6 +1251,27 @@
         : InstX86BaseUnaryopXmm<InstX86Base::Movd>(Func, Dest, Src) {}
   };
 
+  class InstX86Movmsk final : public InstX86Base {
+    InstX86Movmsk() = delete;
+    InstX86Movmsk(const InstX86Movmsk &) = delete;
+    InstX86Movmsk &operator=(const InstX86Movmsk &) = delete;
+
+  public:
+    static InstX86Movmsk *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Movmsk>())
+          InstX86Movmsk(Func, Dest, Source);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Instr) {
+      return InstX86Base::isClassof(Instr, InstX86Base::InstX86Movmsk);
+    }
+
+  private:
+    InstX86Movmsk(Cfg *Func, Variable *Dest, Operand *Source);
+  };
+
   class InstX86Sqrtss : public InstX86BaseUnaryopXmm<InstX86Base::Sqrtss> {
   public:
     static InstX86Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Src) {
@@ -3005,6 +3027,7 @@
   using Movsx = typename InstImpl<TraitsType>::InstX86Movsx;
   using Movzx = typename InstImpl<TraitsType>::InstX86Movzx;
   using Movd = typename InstImpl<TraitsType>::InstX86Movd;
+  using Movmsk = typename InstImpl<TraitsType>::InstX86Movmsk;
   using Sqrtss = typename InstImpl<TraitsType>::InstX86Sqrtss;
   using Mov = typename InstImpl<TraitsType>::InstX86Mov;
   using Movp = typename InstImpl<TraitsType>::InstX86Movp;
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 5c6c005..4075bf3 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -181,6 +181,13 @@
 }
 
 template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Movmsk::InstX86Movmsk(Cfg *Func, Variable *Dest,
+                                                   Operand *Source)
+    : InstX86Base(Func, InstX86Base::Movmsk, 1, Dest) {
+  this->addSource(Source);
+}
+
+template <typename TraitsType>
 InstImpl<TraitsType>::InstX86Cmov::InstX86Cmov(Cfg *Func, Variable *Dest,
                                                Operand *Source,
                                                BrCond Condition)
@@ -1008,6 +1015,64 @@
 }
 
 template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movmsk::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  this->dumpDest(Func);
+  Str << " = movmsk." << this->getSrc(0)->getType() << " ";
+  this->dumpSources(Func);
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movmsk::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 1);
+  Type SrcTy = this->getSrc(0)->getType();
+  assert(isVectorType(SrcTy));
+  switch (SrcTy) {
+  case IceType_v16i8:
+    Str << "\t"
+           "pmovmskb"
+           "\t";
+    break;
+  case IceType_v4i32:
+  case IceType_v4f32:
+    Str << "\t"
+           "movmskps"
+           "\t";
+    break;
+  default:
+    llvm_unreachable("Unexpected operand type");
+  }
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getDest()->emit(Func);
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movmsk::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 1);
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  const Variable *Dest = this->getDest();
+  const Variable *Src = llvm::cast<Variable>(this->getSrc(0));
+  const Type DestTy = Dest->getType();
+  const Type SrcTy = Src->getType();
+  assert(isVectorType(SrcTy));
+  assert(isScalarIntegerType(DestTy));
+  if (!Traits::Is64Bit) {
+    assert(typeWidthInBytes(DestTy) <= 4);
+  } else {
+    assert(DestTy == IceType_i32 || DestTy == IceType_i64);
+  }
+  XmmRegister SrcReg = Traits::getEncodedXmm(Src->getRegNum());
+  GPRRegister DestReg = Traits::getEncodedGPR(Dest->getRegNum());
+  Asm->movmsk(SrcTy, DestReg, SrcReg);
+}
+
+template <typename TraitsType>
 void InstImpl<TraitsType>::InstX86Sqrtss::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index e61fb1d..b20a893 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -65,7 +65,8 @@
     LoadSubVector,
     StoreSubVector,
     VectorPackSigned,
-    VectorPackUnsigned
+    VectorPackUnsigned,
+    SignMask
   };
 
   /// Operations that can be represented by the AtomicRMW intrinsic.
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 97576ee..fa0b9f1 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -905,6 +905,10 @@
     AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
     Context.insert<typename Traits::Insts::Shufps>(Dest, Src0, Src1);
   }
+  void _movmsk(Variable *Dest, Operand *Src0) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src0);
+    Context.insert<typename Traits::Insts::Movmsk>(Dest, Src0);
+  }
   void _sqrtss(Variable *Dest, Operand *Src0) {
     AutoMemorySandboxer<> _(this, &Dest, &Src0);
     Context.insert<typename Traits::Insts::Sqrtss>(Dest, Src0);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index a038c43..38dfba7 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4455,6 +4455,21 @@
     _movp(Dest, T);
     return;
   }
+  case Intrinsics::SignMask: {
+    Operand *SrcReg = legalizeToReg(Instr->getArg(0));
+    Variable *Dest = Instr->getDest();
+    Variable *T = makeReg(IceType_i32);
+    if (SrcReg->getType() == IceType_v4f32 ||
+        SrcReg->getType() == IceType_v4i32 ||
+        SrcReg->getType() == IceType_v16i8) {
+      _movmsk(T, SrcReg);
+    } else {
+      // TODO(capn): We could implement v8i16 sign mask using packsswb/pmovmskb
+      llvm::report_fatal_error("Invalid type for SignMask intrinsic");
+    }
+    _mov(Dest, T);
+    return;
+  }
   default: // UnknownIntrinsic
     Func->setError("Unexpected intrinsic");
     return;