Generalize the Sqrt intrinsic to process vectors.

BUG=swiftshader:15

Change-Id: Ib89d628c85696c20a249b8810cd357a292d10402
Reviewed-on: https://chromium-review.googlesource.com/405293
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 937e997..6633e65 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -467,8 +467,8 @@
 
   void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
 
-  void sqrtss(Type Ty, XmmRegister dst, const Address &src);
-  void sqrtss(Type Ty, XmmRegister dst, XmmRegister src);
+  void sqrt(Type Ty, XmmRegister dst, const Address &src);
+  void sqrt(Type Ty, XmmRegister dst, XmmRegister src);
 
   void xorps(Type Ty, XmmRegister dst, const Address &src);
   void xorps(Type Ty, XmmRegister dst, XmmRegister src);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 77b746d..347a07e 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -1939,10 +1939,11 @@
 }
 
 template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrtss(Type Ty, XmmRegister dst,
-                                          const Address &src) {
+void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  if (isScalarFloatingType(Ty))
+    emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, src, dst);
   emitUint8(0x0F);
@@ -1951,10 +1952,11 @@
 }
 
 template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrtss(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
+void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  if (isScalarFloatingType(Ty))
+    emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
   emitUint8(0x51);
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index bd10572..52d566c 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -166,7 +166,7 @@
       Shr,
       Shrd,
       Shufps,
-      Sqrtss,
+      Sqrt,
       Store,
       StoreP,
       StoreQ,
@@ -1272,18 +1272,17 @@
     InstX86Movmsk(Cfg *Func, Variable *Dest, Operand *Source);
   };
 
-  class InstX86Sqrtss : public InstX86BaseUnaryopXmm<InstX86Base::Sqrtss> {
+  class InstX86Sqrt : public InstX86BaseUnaryopXmm<InstX86Base::Sqrt> {
   public:
-    static InstX86Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Src) {
-      return new (Func->allocate<InstX86Sqrtss>())
-          InstX86Sqrtss(Func, Dest, Src);
+    static InstX86Sqrt *create(Cfg *Func, Variable *Dest, Operand *Src) {
+      return new (Func->allocate<InstX86Sqrt>()) InstX86Sqrt(Func, Dest, Src);
     }
 
     virtual void emit(const Cfg *Func) const override;
 
   private:
-    InstX86Sqrtss(Cfg *Func, Variable *Dest, Operand *Src)
-        : InstX86BaseUnaryopXmm<InstX86Base::Sqrtss>(Func, Dest, Src) {}
+    InstX86Sqrt(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86BaseUnaryopXmm<InstX86Base::Sqrt>(Func, Dest, Src) {}
   };
 
   /// Move/assignment instruction - wrapper for mov/movss/movsd.
@@ -3028,7 +3027,7 @@
   using Movzx = typename InstImpl<TraitsType>::InstX86Movzx;
   using Movd = typename InstImpl<TraitsType>::InstX86Movd;
   using Movmsk = typename InstImpl<TraitsType>::InstX86Movmsk;
-  using Sqrtss = typename InstImpl<TraitsType>::InstX86Sqrtss;
+  using Sqrt = typename InstImpl<TraitsType>::InstX86Sqrt;
   using Mov = typename InstImpl<TraitsType>::InstX86Mov;
   using Movp = typename InstImpl<TraitsType>::InstX86Movp;
   using Movq = typename InstImpl<TraitsType>::InstX86Movq;
@@ -3169,7 +3168,7 @@
   const char *InstImpl<TraitsType>::InstX86Movzx::Base::Opcode = "movz";       \
   template <>                                                                  \
   template <>                                                                  \
-  const char *InstImpl<TraitsType>::InstX86Sqrtss::Base::Opcode = "sqrtss";    \
+  const char *InstImpl<TraitsType>::InstX86Sqrt::Base::Opcode = "sqrt";        \
   template <>                                                                  \
   template <>                                                                  \
   const char *InstImpl<TraitsType>::InstX86Cbwdq::Base::Opcode =               \
@@ -3436,9 +3435,9 @@
   template <>                                                                  \
   template <>                                                                  \
   const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
-      InstImpl<TraitsType>::InstX86Sqrtss::Base::Emitter = {                   \
-          &InstImpl<TraitsType>::Assembler::sqrtss,                            \
-          &InstImpl<TraitsType>::Assembler::sqrtss};                           \
+      InstImpl<TraitsType>::InstX86Sqrt::Base::Emitter = {                     \
+          &InstImpl<TraitsType>::Assembler::sqrt,                              \
+          &InstImpl<TraitsType>::Assembler::sqrt};                             \
                                                                                \
   /* Binary GPR ops */                                                         \
   template <>                                                                  \
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 88c0272..c06b256 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -1074,7 +1074,7 @@
 }
 
 template <typename TraitsType>
-void InstImpl<TraitsType>::InstX86Sqrtss::emit(const Cfg *Func) const {
+void InstImpl<TraitsType>::InstX86Sqrt::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1082,7 +1082,8 @@
   Type Ty = this->getSrc(0)->getType();
   assert(isScalarFloatingType(Ty));
   Str << "\t"
-         "sqrt" << Traits::TypeAttributes[Ty].SdSsString << "\t";
+         "sqrt"
+      << Traits::TypeAttributes[Ty].SpSdString << "\t";
   this->getSrc(0)->emit(Func);
   Str << ", ";
   this->getDest()->emit(Func);
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index a2f1123..45cab9d 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -5264,6 +5264,8 @@
     llvm::report_fatal_error("setjmp should have been prelowered.");
   }
   case Intrinsics::Sqrt: {
+    assert(isScalarFloatingType(Dest->getType()) ||
+           getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
     Variable *Src = legalizeToReg(Instr->getArg(0));
     Variable *T = makeReg(Dest->getType());
     _vsqrt(T, Src);
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 3bc60c8..a9e59e2 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -4422,6 +4422,9 @@
         _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
       }
       _mov(Dest, T);
+    } else {
+      assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
+      UnimplementedLoweringError(this, Instr); // Not required for PNaCl
     }
     return;
   }
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 8844519..380ba00 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -949,7 +949,7 @@
     const char *CvtString;      // i (integer), s (single FP), d (double FP)
     const char *SdSsString;     // ss, sd, or <blank>
     const char *PdPsString;     // ps, pd, or <blank>
-    const char *SpsdString;     // ss, sd, ps, pd, or <blank>
+    const char *SpSdString;     // ss, sd, ps, pd, or <blank>
     const char *IntegralString; // b, w, d, or <blank>
     const char *UnpackString;   // bw, wd, dq, or <blank>
     const char *PackString;     // wb, dw, or <blank>
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index fa0b9f1..da1fff4 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -909,9 +909,9 @@
     AutoMemorySandboxer<> _(this, &Dest, &Src0);
     Context.insert<typename Traits::Insts::Movmsk>(Dest, Src0);
   }
-  void _sqrtss(Variable *Dest, Operand *Src0) {
+  void _sqrt(Variable *Dest, Operand *Src0) {
     AutoMemorySandboxer<> _(this, &Dest, &Src0);
-    Context.insert<typename Traits::Insts::Sqrtss>(Dest, Src0);
+    Context.insert<typename Traits::Insts::Sqrt>(Dest, Src0);
   }
   void _store(Operand *Value, X86Operand *Mem) {
     AutoMemorySandboxer<> _(this, &Value, &Mem);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 1ac77c4..e0739df 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4349,10 +4349,12 @@
     return;
   }
   case Intrinsics::Sqrt: {
+    assert(isScalarFloatingType(Instr->getDest()->getType()) ||
+           getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
     Operand *Src = legalize(Instr->getArg(0));
     Variable *Dest = Instr->getDest();
     Variable *T = makeReg(Dest->getType());
-    _sqrtss(T, Src);
+    _sqrt(T, Src);
     _mov(Dest, T);
     return;
   }
diff --git a/unittest/AssemblerX8632/XmmArith.cpp b/unittest/AssemblerX8632/XmmArith.cpp
index 1c85e2b..d21f153 100644
--- a/unittest/AssemblerX8632/XmmArith.cpp
+++ b/unittest/AssemblerX8632/XmmArith.cpp
@@ -1663,8 +1663,8 @@
                                                                                \
     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));               \
-    __ sqrtss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                 \
-              XmmRegister::Encoded_Reg_##Src);                                 \
+    __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                   \
+            XmmRegister::Encoded_Reg_##Src);                                   \
                                                                                \
     AssembledTest test = assemble();                                           \
     test.setDqwordTo(T0, test##Size##SrcValue);                                \
@@ -1686,8 +1686,8 @@
     const uint32_t T1 = allocateDqword();                                      \
                                                                                \
     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));               \
-    __ sqrtss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                 \
-              dwordAddress(T0));                                               \
+    __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                   \
+            dwordAddress(T0));                                                 \
                                                                                \
     AssembledTest test = assemble();                                           \
     test.setDqwordTo(T0, test##Size##SrcValue);                                \
diff --git a/unittest/AssemblerX8664/XmmArith.cpp b/unittest/AssemblerX8664/XmmArith.cpp
index 4ceed00..72c6730 100644
--- a/unittest/AssemblerX8664/XmmArith.cpp
+++ b/unittest/AssemblerX8664/XmmArith.cpp
@@ -1752,7 +1752,7 @@
                                                                                \
     __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1));                          \
-    __ sqrtss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());      \
+    __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
                                                                                \
     AssembledTest test = assemble();                                           \
     test.setDqwordTo(T0, test##Size##SrcValue);                                \
@@ -1774,7 +1774,7 @@
     const uint32_t T1 = allocateDqword();                                      \
                                                                                \
     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1));                          \
-    __ sqrtss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0));         \
+    __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0));           \
                                                                                \
     AssembledTest test = assemble();                                           \
     test.setDqwordTo(T0, test##Size##SrcValue);                                \