Add scalar lowering for sqrt intrinsic.

Re-used test_arith_main.cpp, mostly to share the set of interesting
floating point constants.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882
R=stichnot@chromium.org, wala@chromium.org

Review URL: https://codereview.chromium.org/384443003
diff --git a/crosstest/runtests.sh b/crosstest/runtests.sh
index 0bc4cf2..cf821e2 100755
--- a/crosstest/runtests.sh
+++ b/crosstest/runtests.sh
@@ -30,7 +30,9 @@
     ./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
         --dir="${OUTDIR}" \
         --llvm-bin-path="${LLVM_BIN_PATH}" \
-        --test=test_arith.cpp --test=test_arith_frem.ll \
+        --test=test_arith.cpp \
+        --test=test_arith_frem.ll \
+        --test=test_arith_sqrt.ll \
         --driver=test_arith_main.cpp \
         --output=test_arith_O${optlevel}
 
diff --git a/crosstest/test_arith.h b/crosstest/test_arith.h
index 996d962..b4c2c37 100644
--- a/crosstest/test_arith.h
+++ b/crosstest/test_arith.h
@@ -27,3 +27,6 @@
   double test##inst(double a, double b);
 FPOP_TABLE
 #undef X
+
+float mySqrt(float a);
+double mySqrt(double a);
diff --git a/crosstest/test_arith_main.cpp b/crosstest/test_arith_main.cpp
index 745da61..8c53ad5 100644
--- a/crosstest/test_arith_main.cpp
+++ b/crosstest/test_arith_main.cpp
@@ -1,5 +1,6 @@
 /* crosstest.py --test=test_arith.cpp --test=test_arith_frem.ll \
-   --driver=test_arith_main.cpp --prefix=Subzero_ --output=test_arith */
+   --test=test_arith_sqrt.ll --driver=test_arith_main.cpp \
+   --prefix=Subzero_ --output=test_arith */
 
 #include <stdint.h>
 
@@ -123,6 +124,7 @@
   static const Type NegInf = -1.0 / 0.0;
   static const Type PosInf = 1.0 / 0.0;
   static const Type Nan = 0.0 / 0.0;
+  static const Type NegNan = -0.0 / 0.0;
   volatile Type Values[] = {
     0,                    1,                    0x7e,
     0x7f,                 0x80,                 0x81,
@@ -134,7 +136,8 @@
     0x100000001ll,        0x7ffffffffffffffell, 0x7fffffffffffffffll,
     0x8000000000000000ll, 0x8000000000000001ll, 0xfffffffffffffffell,
     0xffffffffffffffffll, NegInf,               PosInf,
-    Nan,                  FLT_MIN,              FLT_MAX,
+    Nan,                  NegNan,               -0.0,
+    FLT_MIN,              FLT_MAX,
     DBL_MIN,              DBL_MAX
   };
   const static size_t NumValues = sizeof(Values) / sizeof(*Values);
@@ -173,6 +176,22 @@
       }
     }
   }
+  for (size_t i = 0; i < NumValues; ++i) {
+    Type Value = Values[i];
+    ++TotalTests;
+    Type ResultSz = Subzero_::mySqrt(Value);
+    Type ResultLlc = mySqrt(Value);
+    // Compare results using memcmp() in case they are both NaN.
+    if (!memcmp(&ResultSz, &ResultLlc, sizeof(Type))) {
+      ++Passes;
+    } else {
+      ++Failures;
+      std::cout << std::fixed << "test_sqrt"
+                << (8 * sizeof(Type)) << "(" << Value
+                << "): sz=" << ResultSz << " llc=" << ResultLlc
+                << std::endl;
+    }
+  }
 }
 
 int main(int argc, char **argv) {
diff --git a/crosstest/test_arith_sqrt.ll b/crosstest/test_arith_sqrt.ll
new file mode 100644
index 0000000..2550aeb
--- /dev/null
+++ b/crosstest/test_arith_sqrt.ll
@@ -0,0 +1,14 @@
+target triple = "i686-pc-linux-gnu"
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+
+define float @_Z6mySqrtf(float %a) {
+  %x = call float @llvm.sqrt.f32(float %a)
+  ret float %x
+}
+
+define double @_Z6mySqrtd(double %a) {
+  %x = call double @llvm.sqrt.f64(double %a)
+  ret double %x
+}
diff --git a/crosstest/test_fcmp_main.cpp b/crosstest/test_fcmp_main.cpp
index 8677c48..0c98c0a 100644
--- a/crosstest/test_fcmp_main.cpp
+++ b/crosstest/test_fcmp_main.cpp
@@ -22,15 +22,18 @@
   static const double Ten = 10.0;
   static const double PosInf = 1.0 / 0.0;
   static const double Nan = 0.0 / 0.0;
+  static const double NegNan = -0.0 / 0.0;
   assert(std::fpclassify(NegInf) == FP_INFINITE);
   assert(std::fpclassify(PosInf) == FP_INFINITE);
   assert(std::fpclassify(Nan) == FP_NAN);
+  assert(std::fpclassify(NegNan) == FP_NAN);
   assert(NegInf < Zero);
   assert(NegInf < PosInf);
   assert(Zero < PosInf);
 
-  volatile double Values[] = { NegInf,  Zero,    DBL_MIN, FLT_MIN, Ten,
-                               FLT_MAX, DBL_MAX, PosInf,  Nan, };
+  volatile double Values[] = { NegInf, -Zero,   Zero,    DBL_MIN, FLT_MIN,
+                               Ten,    FLT_MAX, DBL_MAX, PosInf,  Nan,
+                               NegNan };
   const static size_t NumValues = sizeof(Values) / sizeof(*Values);
 
   typedef bool (*FuncTypeFloat)(float, float);
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 79ec127..63e2cd7 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -271,6 +271,11 @@
   return false;
 }
 
+InstX8632Sqrtss::InstX8632Sqrtss(Cfg *Func, Variable *Dest, Operand *Source)
+    : InstX8632(Func, InstX8632::Sqrtss, 1, Dest) {
+  addSource(Source);
+}
+
 InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
     : InstX8632(Func, InstX8632::Ret, Source ? 1 : 0, NULL) {
   if (Source)
@@ -919,6 +924,25 @@
   dumpSources(Func);
 }
 
+void InstX8632Sqrtss::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 1);
+  Type Ty = getSrc(0)->getType();
+  assert(Ty == IceType_f32 || Ty == IceType_f64);
+  Str << "\tsqrt" << TypeX8632Attributes[Ty].SdSsString << "\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << "\n";
+}
+
+void InstX8632Sqrtss::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  dumpDest(Func);
+  Str << " = sqrt." << getDest()->getType() << " ";
+  dumpSources(Func);
+}
+
 void InstX8632Xadd::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   if (Locked) {
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 0e4964f..f76d6df 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -170,6 +170,7 @@
     Shld,
     Shr,
     Shrd,
+    Sqrtss,
     Store,
     StoreQ,
     Sub,
@@ -827,6 +828,24 @@
   virtual ~InstX8632Ret() {}
 };
 
+// Sqrtss - Scalar sqrt of a float or double.
+class InstX8632Sqrtss : public InstX8632 {
+public:
+  static InstX8632Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX8632Sqrtss>())
+        InstX8632Sqrtss(Func, Dest, Source);
+  }
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Sqrtss); }
+
+private:
+  InstX8632Sqrtss(Cfg *Func, Variable *Dest, Operand *Source);
+  InstX8632Sqrtss(const InstX8632Sqrtss &) LLVM_DELETED_FUNCTION;
+  InstX8632Sqrtss &operator=(const InstX8632Sqrtss &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632Sqrtss() {}
+};
+
 // Exchanging Add instruction.  Exchanges the first operand (destination
 // operand) with the second operand (source operand), then loads the sum
 // of the two values into the destination operand. The destination may be
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 569fd9e..62a525c 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2018,7 +2018,14 @@
     lowerCall(Call);
     return;
   }
-  case Intrinsics::Sqrt:
+  case Intrinsics::Sqrt: {
+    Operand *Src = legalize(Instr->getArg(0));
+    Variable *Dest = Instr->getDest();
+    Variable *T = makeReg(Dest->getType());
+    _sqrtss(T, Src);
+    _mov(Dest, T);
+    return;
+  }
   case Intrinsics::Stacksave:
   case Intrinsics::Stackrestore:
     // TODO(jvoung): fill it in.
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 4c2c527..f709704 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -252,6 +252,9 @@
   void _shrd(Variable *Dest, Variable *Src0, Variable *Src1) {
     Context.insert(InstX8632Shrd::create(Func, Dest, Src0, Src1));
   }
+  void _sqrtss(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Sqrtss::create(Func, Dest, Src0));
+  }
   void _store(Operand *Value, OperandX8632 *Mem) {
     Context.insert(InstX8632Store::create(Func, Value, Mem));
   }
diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
index 15f9a65..5ed776b 100644
--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -15,6 +15,8 @@
 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
 declare void @llvm.nacl.longjmp(i8*, i32)
 declare i32 @llvm.nacl.setjmp(i8*)
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
 declare void @llvm.trap()
 
 define i32 @test_nacl_read_tp() {
@@ -160,6 +162,63 @@
 ; CHECKO2REM-LABEL: test_setjmp_unused
 ; CHECKO2REM: call setjmp
 
+define float @test_sqrt_float(float %x, i32 %iptr) {
+entry:
+  %r = call float @llvm.sqrt.f32(float %x)
+  %r2 = call float @llvm.sqrt.f32(float %r)
+  %r3 = call float @llvm.sqrt.f32(float -0.0)
+  %r4 = fadd float %r2, %r3
+  br label %next
+
+next:
+  %__6 = inttoptr i32 %iptr to float*
+  %y = load float* %__6, align 4
+  %r5 = call float @llvm.sqrt.f32(float %y)
+  %r6 = fadd float %r4, %r5
+  ret float %r6
+}
+; CHECK-LABEL: test_sqrt_float
+; CHECK: sqrtss xmm{{.*}}
+; CHECK: sqrtss xmm{{.*}}
+; CHECK: sqrtss xmm{{.*}}, dword ptr
+; CHECK-LABEL: .L{{.*}}next
+; We could fold the load and the sqrt into one operation, but the
+; current folding only handles load + arithmetic op. The sqrt inst
+; is considered an intrinsic call and not an arithmetic op.
+; CHECK: sqrtss xmm{{.*}}
+
+define double @test_sqrt_double(double %x, i32 %iptr) {
+entry:
+  %r = call double @llvm.sqrt.f64(double %x)
+  %r2 = call double @llvm.sqrt.f64(double %r)
+  %r3 = call double @llvm.sqrt.f64(double -0.0)
+  %r4 = fadd double %r2, %r3
+  br label %next
+
+next:
+  %__6 = inttoptr i32 %iptr to double*
+  %y = load double* %__6, align 8
+  %r5 = call double @llvm.sqrt.f64(double %y)
+  %r6 = fadd double %r4, %r5
+  ret double %r6
+}
+; CHECK-LABEL: test_sqrt_double
+; CHECK: sqrtsd xmm{{.*}}
+; CHECK: sqrtsd xmm{{.*}}
+; CHECK: sqrtsd xmm{{.*}}, qword ptr
+; CHECK-LABEL: .L{{.*}}next
+; CHECK: sqrtsd xmm{{.*}}
+
+define float @test_sqrt_ignored(float %x, double %y) {
+entry:
+  %ignored1 = call float @llvm.sqrt.f32(float %x)
+  %ignored2 = call double @llvm.sqrt.f64(double %y)
+  ret float 0.0
+}
+; CHECKO2REM-LABEL: test_sqrt_ignored
+; CHECKO2REM-NOT: sqrtss
+; CHECKO2REM-NOT: sqrtsd
+
 define i32 @test_trap(i32 %br) {
 entry:
   %r1 = icmp eq i32 %br, 0