Implement integer vector multiply intrinsics.

BUG=swiftshader:15

Change-Id: Ib822b50c0a14e5ebc114db9759cbeecbb9f7a3c1
Reviewed-on: https://chromium-review.googlesource.com/403472
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 6633e65..2db0f74 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -363,6 +363,12 @@
   void pandn(Type Ty, XmmRegister dst, const Address &src);
   void pmull(Type Ty, XmmRegister dst, XmmRegister src);
   void pmull(Type Ty, XmmRegister dst, const Address &src);
+  void pmulhw(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmulhw(Type Ty, XmmRegister dst, const Address &src);
+  void pmulhuw(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmulhuw(Type Ty, XmmRegister dst, const Address &src);
+  void pmaddwd(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmaddwd(Type Ty, XmmRegister dst, const Address &src);
   void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
   void pmuludq(Type Ty, XmmRegister dst, const Address &src);
   void por(Type Ty, XmmRegister dst, XmmRegister src);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 347a07e..3674d4a 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -839,6 +839,87 @@
 }
 
 template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
+                                          XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitRexRB(RexTypeIrrelevant, dst, src);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xE5);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
+                                          const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitAddrSizeOverridePrefix();
+  emitRex(RexTypeIrrelevant, src, dst);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xE5);
+  emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
+                                           XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitRexRB(RexTypeIrrelevant, dst, src);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xE4);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
+                                           const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitAddrSizeOverridePrefix();
+  emitRex(RexTypeIrrelevant, src, dst);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xE4);
+  emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
+                                           XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitRexRB(RexTypeIrrelevant, dst, src);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xF5);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
+                                           const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitAddrSizeOverridePrefix();
+  emitRex(RexTypeIrrelevant, src, dst);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xF5);
+  emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
 void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
                                            XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 52d566c..6d74b93 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -140,6 +140,9 @@
       Pextr,
       Pinsr,
       Pmull,
+      Pmulhw,
+      Pmulhuw,
+      Pmaddwd,
       Pmuludq,
       Pop,
       Por,
@@ -1848,6 +1851,60 @@
                                                                 Source) {}
   };
 
+  class InstX86Pmulhw
+      : public InstX86BaseBinopXmm<InstX86Base::Pmulhw, false,
+                                   InstX86Base::SseSuffix::None> {
+  public:
+    static InstX86Pmulhw *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      assert(Dest->getType() == IceType_v8i16 &&
+             Source->getType() == IceType_v8i16);
+      return new (Func->allocate<InstX86Pmulhw>())
+          InstX86Pmulhw(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Pmulhw(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pmulhw, false,
+                              InstX86Base::SseSuffix::None>(Func, Dest,
+                                                            Source) {}
+  };
+
+  class InstX86Pmulhuw
+      : public InstX86BaseBinopXmm<InstX86Base::Pmulhuw, false,
+                                   InstX86Base::SseSuffix::None> {
+  public:
+    static InstX86Pmulhuw *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      assert(Dest->getType() == IceType_v8i16 &&
+             Source->getType() == IceType_v8i16);
+      return new (Func->allocate<InstX86Pmulhuw>())
+          InstX86Pmulhuw(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Pmulhuw(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pmulhuw, false,
+                              InstX86Base::SseSuffix::None>(Func, Dest,
+                                                            Source) {}
+  };
+
+  class InstX86Pmaddwd
+      : public InstX86BaseBinopXmm<InstX86Base::Pmaddwd, false,
+                                   InstX86Base::SseSuffix::None> {
+  public:
+    static InstX86Pmaddwd *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      assert(Dest->getType() == IceType_v8i16 &&
+             Source->getType() == IceType_v8i16);
+      return new (Func->allocate<InstX86Pmaddwd>())
+          InstX86Pmaddwd(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Pmaddwd(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pmaddwd, false,
+                              InstX86Base::SseSuffix::None>(Func, Dest,
+                                                            Source) {}
+  };
+
   class InstX86Pmuludq
       : public InstX86BaseBinopXmm<InstX86Base::Pmuludq, false,
                                    InstX86Base::SseSuffix::None> {
@@ -3066,6 +3123,9 @@
   using Mulps = typename InstImpl<TraitsType>::InstX86Mulps;
   using Mulss = typename InstImpl<TraitsType>::InstX86Mulss;
   using Pmull = typename InstImpl<TraitsType>::InstX86Pmull;
+  using Pmulhw = typename InstImpl<TraitsType>::InstX86Pmulhw;
+  using Pmulhuw = typename InstImpl<TraitsType>::InstX86Pmulhuw;
+  using Pmaddwd = typename InstImpl<TraitsType>::InstX86Pmaddwd;
   using Pmuludq = typename InstImpl<TraitsType>::InstX86Pmuludq;
   using Divps = typename InstImpl<TraitsType>::InstX86Divps;
   using Divss = typename InstImpl<TraitsType>::InstX86Divss;
@@ -3291,6 +3351,15 @@
   const char *InstImpl<TraitsType>::InstX86Pmull::Base::Opcode = "pmull";      \
   template <>                                                                  \
   template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pmulhw::Base::Opcode = "pmulhw";    \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pmulhuw::Base::Opcode = "pmulhuw";  \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pmaddwd::Base::Opcode = "pmaddwd";  \
+  template <>                                                                  \
+  template <>                                                                  \
   const char *InstImpl<TraitsType>::InstX86Pmuludq::Base::Opcode = "pmuludq";  \
   template <>                                                                  \
   template <>                                                                  \
@@ -3644,6 +3713,24 @@
   template <>                                                                  \
   template <>                                                                  \
   const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pmulhw::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::pmulhw,                            \
+          &InstImpl<TraitsType>::Assembler::pmulhw};                           \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pmulhuw::Base::Emitter = {                  \
+          &InstImpl<TraitsType>::Assembler::pmulhuw,                           \
+          &InstImpl<TraitsType>::Assembler::pmulhuw};                          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pmaddwd::Base::Emitter = {                  \
+          &InstImpl<TraitsType>::Assembler::pmaddwd,                           \
+          &InstImpl<TraitsType>::Assembler::pmaddwd};                          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
       InstImpl<TraitsType>::InstX86Pmuludq::Base::Emitter = {                  \
           &InstImpl<TraitsType>::Assembler::pmuludq,                           \
           &InstImpl<TraitsType>::Assembler::pmuludq};                          \
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index c06b256..8eae1b3 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -1082,8 +1082,7 @@
   Type Ty = this->getSrc(0)->getType();
   assert(isScalarFloatingType(Ty));
   Str << "\t"
-         "sqrt"
-      << Traits::TypeAttributes[Ty].SpSdString << "\t";
+         "sqrt" << Traits::TypeAttributes[Ty].SpSdString << "\t";
   this->getSrc(0)->emit(Func);
   Str << ", ";
   this->getDest()->emit(Func);
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index b20a893..b5cab5e 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -63,10 +63,13 @@
     Trap,
     // The intrinsics below are not part of the PNaCl specification.
     LoadSubVector,
+    MultiplyAddPairs,
+    MultiplyHighSigned,
+    MultiplyHighUnsigned,
+    SignMask,
     StoreSubVector,
     VectorPackSigned,
-    VectorPackUnsigned,
-    SignMask
+    VectorPackUnsigned
   };
 
   /// Operations that can be represented by the AtomicRMW intrinsic.
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index da1fff4..41b91b8 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -801,6 +801,18 @@
     AutoMemorySandboxer<> _(this, &Dest, &Src0);
     Context.insert<typename Traits::Insts::Pmull>(Dest, Src0);
   }
+  void _pmulhw(Variable *Dest, Operand *Src0) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src0);
+    Context.insert<typename Traits::Insts::Pmulhw>(Dest, Src0);
+  }
+  void _pmulhuw(Variable *Dest, Operand *Src0) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src0);
+    Context.insert<typename Traits::Insts::Pmulhuw>(Dest, Src0);
+  }
+  void _pmaddwd(Variable *Dest, Operand *Src0) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src0);
+    Context.insert<typename Traits::Insts::Pmaddwd>(Dest, Src0);
+  }
   void _pmuludq(Variable *Dest, Operand *Src0) {
     AutoMemorySandboxer<> _(this, &Dest, &Src0);
     Context.insert<typename Traits::Insts::Pmuludq>(Dest, Src0);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index e0739df..d4c8833 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4473,6 +4473,42 @@
     _mov(Dest, T);
     return;
   }
+  case Intrinsics::MultiplyHighSigned: {
+    Operand *Src0 = Instr->getArg(0);
+    Operand *Src1 = Instr->getArg(1);
+    Variable *Dest = Instr->getDest();
+    auto *T = makeReg(Dest->getType());
+    auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T, Src0RM);
+    _pmulhw(T, Src1RM);
+    _movp(Dest, T);
+    return;
+  }
+  case Intrinsics::MultiplyHighUnsigned: {
+    Operand *Src0 = Instr->getArg(0);
+    Operand *Src1 = Instr->getArg(1);
+    Variable *Dest = Instr->getDest();
+    auto *T = makeReg(Dest->getType());
+    auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T, Src0RM);
+    _pmulhuw(T, Src1RM);
+    _movp(Dest, T);
+    return;
+  }
+  case Intrinsics::MultiplyAddPairs: {
+    Operand *Src0 = Instr->getArg(0);
+    Operand *Src1 = Instr->getArg(1);
+    Variable *Dest = Instr->getDest();
+    auto *T = makeReg(Dest->getType());
+    auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T, Src0RM);
+    _pmaddwd(T, Src1RM);
+    _movp(Dest, T);
+    return;
+  }
   default: // UnknownIntrinsic
     Func->setError("Unexpected intrinsic");
     return;