Implement saturated vector add/subtract.

BUG=swiftshader:15

Change-Id: Ic120eddd1761e33b7d76bf3ed8ec5ca74634f958
Reviewed-on: https://chromium-review.googlesource.com/403477
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 2db0f74..5cd4faf 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -357,6 +357,10 @@
 
   void padd(Type Ty, XmmRegister dst, XmmRegister src);
   void padd(Type Ty, XmmRegister dst, const Address &src);
+  void padds(Type Ty, XmmRegister dst, XmmRegister src);
+  void padds(Type Ty, XmmRegister dst, const Address &src);
+  void paddus(Type Ty, XmmRegister dst, XmmRegister src);
+  void paddus(Type Ty, XmmRegister dst, const Address &src);
   void pand(Type Ty, XmmRegister dst, XmmRegister src);
   void pand(Type Ty, XmmRegister dst, const Address &src);
   void pandn(Type Ty, XmmRegister dst, XmmRegister src);
@@ -375,6 +379,10 @@
   void por(Type Ty, XmmRegister dst, const Address &src);
   void psub(Type Ty, XmmRegister dst, XmmRegister src);
   void psub(Type Ty, XmmRegister dst, const Address &src);
+  void psubs(Type Ty, XmmRegister dst, XmmRegister src);
+  void psubs(Type Ty, XmmRegister dst, const Address &src);
+  void psubus(Type Ty, XmmRegister dst, XmmRegister src);
+  void psubus(Type Ty, XmmRegister dst, const Address &src);
   void pxor(Type Ty, XmmRegister dst, XmmRegister src);
   void pxor(Type Ty, XmmRegister dst, const Address &src);
 
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 3674d4a..0abf587 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -758,6 +758,76 @@
 }
 
 template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitRexRB(RexTypeIrrelevant, dst, src);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xEC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xED);
+  } else {
+    assert(false && "Unexpected padds operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
+                                         const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitAddrSizeOverridePrefix();
+  emitRex(RexTypeIrrelevant, src, dst);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xEC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xED);
+  } else {
+    assert(false && "Unexpected padds operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
+                                          XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitRexRB(RexTypeIrrelevant, dst, src);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xDC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xDD);
+  } else {
+    assert(false && "Unexpected paddus operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
+                                          const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitAddrSizeOverridePrefix();
+  emitRex(RexTypeIrrelevant, src, dst);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xDC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xDD);
+  } else {
+    assert(false && "Unexpected paddus operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
 void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
@@ -1001,6 +1071,75 @@
 }
 
 template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitRexRB(RexTypeIrrelevant, dst, src);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xE8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xE9);
+  } else {
+    assert(false && "Unexpected psubs operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
+                                         const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitAddrSizeOverridePrefix();
+  emitRex(RexTypeIrrelevant, src, dst);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xE8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xE9);
+  } else {
+    assert(false && "Unexpected psubs operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
+                                          XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitRexRB(RexTypeIrrelevant, dst, src);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xD8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xD9);
+  } else {
+    assert(false && "Unexpected psubus operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
+                                          const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitAddrSizeOverridePrefix();
+  emitRex(RexTypeIrrelevant, src, dst);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xD8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xD9);
+  } else {
+    assert(false && "Unexpected psubus operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
 void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 6d74b93..489ffea 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -132,6 +132,8 @@
       Orps,
       OrRMW,
       Padd,
+      Padds,
+      Paddus,
       Pand,
       Pandn,
       Pblendvb,
@@ -156,6 +158,8 @@
       Psra,
       Psrl,
       Psub,
+      Psubs,
+      Psubus,
       Push,
       Pxor,
       Ret,
@@ -1435,6 +1439,38 @@
                                                                 Source) {}
   };
 
+  class InstX86Padds
+      : public InstX86BaseBinopXmm<InstX86Base::Padds, true,
+                                   InstX86Base::SseSuffix::Integral> {
+  public:
+    static InstX86Padds *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Padds>())
+          InstX86Padds(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Padds(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Padds, true,
+                              InstX86Base::SseSuffix::Integral>(Func, Dest,
+                                                                Source) {}
+  };
+
+  class InstX86Paddus
+      : public InstX86BaseBinopXmm<InstX86Base::Paddus, true,
+                                   InstX86Base::SseSuffix::Integral> {
+  public:
+    static InstX86Paddus *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Paddus>())
+          InstX86Paddus(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Paddus(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Paddus, true,
+                              InstX86Base::SseSuffix::Integral>(Func, Dest,
+                                                                Source) {}
+  };
+
   class InstX86Sub : public InstX86BaseBinopGPR<InstX86Base::Sub> {
   public:
     static InstX86Sub *create(Cfg *Func, Variable *Dest, Operand *Source) {
@@ -1531,6 +1567,38 @@
                                                                 Source) {}
   };
 
+  class InstX86Psubs
+      : public InstX86BaseBinopXmm<InstX86Base::Psubs, true,
+                                   InstX86Base::SseSuffix::Integral> {
+  public:
+    static InstX86Psubs *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Psubs>())
+          InstX86Psubs(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Psubs(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Psubs, true,
+                              InstX86Base::SseSuffix::Integral>(Func, Dest,
+                                                                Source) {}
+  };
+
+  class InstX86Psubus
+      : public InstX86BaseBinopXmm<InstX86Base::Psubus, true,
+                                   InstX86Base::SseSuffix::Integral> {
+  public:
+    static InstX86Psubus *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Psubus>())
+          InstX86Psubus(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Psubus(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Psubus, true,
+                              InstX86Base::SseSuffix::Integral>(Func, Dest,
+                                                                Source) {}
+  };
+
   class InstX86And : public InstX86BaseBinopGPR<InstX86Base::And> {
   public:
     static InstX86And *create(Cfg *Func, Variable *Dest, Operand *Source) {
@@ -3097,6 +3165,8 @@
   using Andnps = typename InstImpl<TraitsType>::InstX86Andnps;
   using Andps = typename InstImpl<TraitsType>::InstX86Andps;
   using Padd = typename InstImpl<TraitsType>::InstX86Padd;
+  using Padds = typename InstImpl<TraitsType>::InstX86Padds;
+  using Paddus = typename InstImpl<TraitsType>::InstX86Paddus;
   using Sub = typename InstImpl<TraitsType>::InstX86Sub;
   using SubRMW = typename InstImpl<TraitsType>::InstX86SubRMW;
   using Subps = typename InstImpl<TraitsType>::InstX86Subps;
@@ -3104,6 +3174,8 @@
   using Sbb = typename InstImpl<TraitsType>::InstX86Sbb;
   using SbbRMW = typename InstImpl<TraitsType>::InstX86SbbRMW;
   using Psub = typename InstImpl<TraitsType>::InstX86Psub;
+  using Psubs = typename InstImpl<TraitsType>::InstX86Psubs;
+  using Psubus = typename InstImpl<TraitsType>::InstX86Psubus;
   using And = typename InstImpl<TraitsType>::InstX86And;
   using AndRMW = typename InstImpl<TraitsType>::InstX86AndRMW;
   using Pand = typename InstImpl<TraitsType>::InstX86Pand;
@@ -3279,6 +3351,12 @@
   const char *InstImpl<TraitsType>::InstX86Padd::Base::Opcode = "padd";        \
   template <>                                                                  \
   template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Padds::Base::Opcode = "padds";      \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Paddus::Base::Opcode = "paddus";    \
+  template <>                                                                  \
+  template <>                                                                  \
   const char *InstImpl<TraitsType>::InstX86Sub::Base::Opcode = "sub";          \
   template <>                                                                  \
   template <>                                                                  \
@@ -3300,6 +3378,12 @@
   const char *InstImpl<TraitsType>::InstX86Psub::Base::Opcode = "psub";        \
   template <>                                                                  \
   template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Psubs::Base::Opcode = "psubs";      \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Psubus::Base::Opcode = "psubus";    \
+  template <>                                                                  \
+  template <>                                                                  \
   const char *InstImpl<TraitsType>::InstX86And::Base::Opcode = "and";          \
   template <>                                                                  \
   template <>                                                                  \
@@ -3683,6 +3767,18 @@
   template <>                                                                  \
   template <>                                                                  \
   const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Padds::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::padds,                             \
+          &InstImpl<TraitsType>::Assembler::padds};                            \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Paddus::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::paddus,                            \
+          &InstImpl<TraitsType>::Assembler::paddus};                           \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
       InstImpl<TraitsType>::InstX86Pand::Base::Emitter = {                     \
           &InstImpl<TraitsType>::Assembler::pand,                              \
           &InstImpl<TraitsType>::Assembler::pand};                             \
@@ -3749,6 +3845,18 @@
   template <>                                                                  \
   template <>                                                                  \
   const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Psubs::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::psubs,                             \
+          &InstImpl<TraitsType>::Assembler::psubs};                            \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Psubus::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::psubus,                            \
+          &InstImpl<TraitsType>::Assembler::psubus};                           \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
       InstImpl<TraitsType>::InstX86Pxor::Base::Emitter = {                     \
           &InstImpl<TraitsType>::Assembler::pxor,                              \
           &InstImpl<TraitsType>::Assembler::pxor};                             \
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index b5cab5e..3e2a738 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -62,12 +62,16 @@
     Stackrestore,
     Trap,
     // The intrinsics below are not part of the PNaCl specification.
+    AddSaturateSigned,
+    AddSaturateUnsigned,
     LoadSubVector,
     MultiplyAddPairs,
     MultiplyHighSigned,
     MultiplyHighUnsigned,
     SignMask,
     StoreSubVector,
+    SubtractSaturateSigned,
+    SubtractSaturateUnsigned,
     VectorPackSigned,
     VectorPackUnsigned
   };
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 41b91b8..e6276ce 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -767,6 +767,14 @@
     AutoMemorySandboxer<> _(this, &Dest, &Src0);
     Context.insert<typename Traits::Insts::Padd>(Dest, Src0);
   }
+  void _padds(Variable *Dest, Operand *Src0) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src0);
+    Context.insert<typename Traits::Insts::Padds>(Dest, Src0);
+  }
+  void _paddus(Variable *Dest, Operand *Src0) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src0);
+    Context.insert<typename Traits::Insts::Paddus>(Dest, Src0);
+  }
   void _pand(Variable *Dest, Operand *Src0) {
     AutoMemorySandboxer<> _(this, &Dest, &Src0);
     Context.insert<typename Traits::Insts::Pand>(Dest, Src0);
@@ -864,6 +872,14 @@
     AutoMemorySandboxer<> _(this, &Dest, &Src0);
     Context.insert<typename Traits::Insts::Psub>(Dest, Src0);
   }
+  void _psubs(Variable *Dest, Operand *Src0) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src0);
+    Context.insert<typename Traits::Insts::Psubs>(Dest, Src0);
+  }
+  void _psubus(Variable *Dest, Operand *Src0) {
+    AutoMemorySandboxer<> _(this, &Dest, &Src0);
+    Context.insert<typename Traits::Insts::Psubus>(Dest, Src0);
+  }
   void _push(Operand *Src0) {
     Context.insert<typename Traits::Insts::Push>(Src0);
   }
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index d4c8833..a5de2ae 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4509,6 +4509,54 @@
     _movp(Dest, T);
     return;
   }
+  case Intrinsics::AddSaturateSigned: {
+    Operand *Src0 = Instr->getArg(0);
+    Operand *Src1 = Instr->getArg(1);
+    Variable *Dest = Instr->getDest();
+    auto *T = makeReg(Dest->getType());
+    auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T, Src0RM);
+    _padds(T, Src1RM);
+    _movp(Dest, T);
+    return;
+  }
+  case Intrinsics::SubtractSaturateSigned: {
+    Operand *Src0 = Instr->getArg(0);
+    Operand *Src1 = Instr->getArg(1);
+    Variable *Dest = Instr->getDest();
+    auto *T = makeReg(Dest->getType());
+    auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T, Src0RM);
+    _psubs(T, Src1RM);
+    _movp(Dest, T);
+    return;
+  }
+  case Intrinsics::AddSaturateUnsigned: {
+    Operand *Src0 = Instr->getArg(0);
+    Operand *Src1 = Instr->getArg(1);
+    Variable *Dest = Instr->getDest();
+    auto *T = makeReg(Dest->getType());
+    auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T, Src0RM);
+    _paddus(T, Src1RM);
+    _movp(Dest, T);
+    return;
+  }
+  case Intrinsics::SubtractSaturateUnsigned: {
+    Operand *Src0 = Instr->getArg(0);
+    Operand *Src1 = Instr->getArg(1);
+    Variable *Dest = Instr->getDest();
+    auto *T = makeReg(Dest->getType());
+    auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T, Src0RM);
+    _psubus(T, Src1RM);
+    _movp(Dest, T);
+    return;
+  }
   default: // UnknownIntrinsic
     Func->setError("Unexpected intrinsic");
     return;