Add scalar lowering for sqrt intrinsic.
Re-used test_arith_main.cpp, mostly to share the set of interesting
floating point constants.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882
R=stichnot@chromium.org, wala@chromium.org
Review URL: https://codereview.chromium.org/384443003
diff --git a/crosstest/runtests.sh b/crosstest/runtests.sh
index 0bc4cf2..cf821e2 100755
--- a/crosstest/runtests.sh
+++ b/crosstest/runtests.sh
@@ -30,7 +30,9 @@
./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
--dir="${OUTDIR}" \
--llvm-bin-path="${LLVM_BIN_PATH}" \
- --test=test_arith.cpp --test=test_arith_frem.ll \
+ --test=test_arith.cpp \
+ --test=test_arith_frem.ll \
+ --test=test_arith_sqrt.ll \
--driver=test_arith_main.cpp \
--output=test_arith_O${optlevel}
diff --git a/crosstest/test_arith.h b/crosstest/test_arith.h
index 996d962..b4c2c37 100644
--- a/crosstest/test_arith.h
+++ b/crosstest/test_arith.h
@@ -27,3 +27,6 @@
double test##inst(double a, double b);
FPOP_TABLE
#undef X
+
+float mySqrt(float a);
+double mySqrt(double a);
diff --git a/crosstest/test_arith_main.cpp b/crosstest/test_arith_main.cpp
index 745da61..8c53ad5 100644
--- a/crosstest/test_arith_main.cpp
+++ b/crosstest/test_arith_main.cpp
@@ -1,5 +1,6 @@
/* crosstest.py --test=test_arith.cpp --test=test_arith_frem.ll \
- --driver=test_arith_main.cpp --prefix=Subzero_ --output=test_arith */
+ --test=test_arith_sqrt.ll --driver=test_arith_main.cpp \
+ --prefix=Subzero_ --output=test_arith */
#include <stdint.h>
@@ -123,6 +124,7 @@
static const Type NegInf = -1.0 / 0.0;
static const Type PosInf = 1.0 / 0.0;
static const Type Nan = 0.0 / 0.0;
+ static const Type NegNan = -0.0 / 0.0;
volatile Type Values[] = {
0, 1, 0x7e,
0x7f, 0x80, 0x81,
@@ -134,7 +136,8 @@
0x100000001ll, 0x7ffffffffffffffell, 0x7fffffffffffffffll,
0x8000000000000000ll, 0x8000000000000001ll, 0xfffffffffffffffell,
0xffffffffffffffffll, NegInf, PosInf,
- Nan, FLT_MIN, FLT_MAX,
+ Nan, NegNan, -0.0,
+ FLT_MIN, FLT_MAX,
DBL_MIN, DBL_MAX
};
const static size_t NumValues = sizeof(Values) / sizeof(*Values);
@@ -173,6 +176,22 @@
}
}
}
+ for (size_t i = 0; i < NumValues; ++i) {
+ Type Value = Values[i];
+ ++TotalTests;
+ Type ResultSz = Subzero_::mySqrt(Value);
+ Type ResultLlc = mySqrt(Value);
+ // Compare results using memcmp() in case they are both NaN.
+ if (!memcmp(&ResultSz, &ResultLlc, sizeof(Type))) {
+ ++Passes;
+ } else {
+ ++Failures;
+ std::cout << std::fixed << "test_sqrt"
+ << (8 * sizeof(Type)) << "(" << Value
+ << "): sz=" << ResultSz << " llc=" << ResultLlc
+ << std::endl;
+ }
+ }
}
int main(int argc, char **argv) {
diff --git a/crosstest/test_arith_sqrt.ll b/crosstest/test_arith_sqrt.ll
new file mode 100644
index 0000000..2550aeb
--- /dev/null
+++ b/crosstest/test_arith_sqrt.ll
@@ -0,0 +1,14 @@
+target triple = "i686-pc-linux-gnu"
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+
+define float @_Z6mySqrtf(float %a) {
+ %x = call float @llvm.sqrt.f32(float %a)
+ ret float %x
+}
+
+define double @_Z6mySqrtd(double %a) {
+ %x = call double @llvm.sqrt.f64(double %a)
+ ret double %x
+}
diff --git a/crosstest/test_fcmp_main.cpp b/crosstest/test_fcmp_main.cpp
index 8677c48..0c98c0a 100644
--- a/crosstest/test_fcmp_main.cpp
+++ b/crosstest/test_fcmp_main.cpp
@@ -22,15 +22,18 @@
static const double Ten = 10.0;
static const double PosInf = 1.0 / 0.0;
static const double Nan = 0.0 / 0.0;
+ static const double NegNan = -0.0 / 0.0;
assert(std::fpclassify(NegInf) == FP_INFINITE);
assert(std::fpclassify(PosInf) == FP_INFINITE);
assert(std::fpclassify(Nan) == FP_NAN);
+ assert(std::fpclassify(NegNan) == FP_NAN);
assert(NegInf < Zero);
assert(NegInf < PosInf);
assert(Zero < PosInf);
- volatile double Values[] = { NegInf, Zero, DBL_MIN, FLT_MIN, Ten,
- FLT_MAX, DBL_MAX, PosInf, Nan, };
+ volatile double Values[] = { NegInf, -Zero, Zero, DBL_MIN, FLT_MIN,
+ Ten, FLT_MAX, DBL_MAX, PosInf, Nan,
+ NegNan };
const static size_t NumValues = sizeof(Values) / sizeof(*Values);
typedef bool (*FuncTypeFloat)(float, float);
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 79ec127..63e2cd7 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -271,6 +271,11 @@
return false;
}
+InstX8632Sqrtss::InstX8632Sqrtss(Cfg *Func, Variable *Dest, Operand *Source)
+ : InstX8632(Func, InstX8632::Sqrtss, 1, Dest) {
+ addSource(Source);
+}
+
InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
: InstX8632(Func, InstX8632::Ret, Source ? 1 : 0, NULL) {
if (Source)
@@ -919,6 +924,25 @@
dumpSources(Func);
}
+void InstX8632Sqrtss::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 1);
+ Type Ty = getSrc(0)->getType();
+ assert(Ty == IceType_f32 || Ty == IceType_f64);
+ Str << "\tsqrt" << TypeX8632Attributes[Ty].SdSsString << "\t";
+ getDest()->emit(Func);
+ Str << ", ";
+ getSrc(0)->emit(Func);
+ Str << "\n";
+}
+
+void InstX8632Sqrtss::dump(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrDump();
+ dumpDest(Func);
+ Str << " = sqrt." << getDest()->getType() << " ";
+ dumpSources(Func);
+}
+
void InstX8632Xadd::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
if (Locked) {
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 0e4964f..f76d6df 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -170,6 +170,7 @@
Shld,
Shr,
Shrd,
+ Sqrtss,
Store,
StoreQ,
Sub,
@@ -827,6 +828,24 @@
virtual ~InstX8632Ret() {}
};
+// Sqrtss - Scalar sqrt of a float or double.
+class InstX8632Sqrtss : public InstX8632 {
+public:
+ static InstX8632Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ return new (Func->allocate<InstX8632Sqrtss>())
+ InstX8632Sqrtss(Func, Dest, Source);
+ }
+ virtual void emit(const Cfg *Func) const;
+ virtual void dump(const Cfg *Func) const;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Sqrtss); }
+
+private:
+ InstX8632Sqrtss(Cfg *Func, Variable *Dest, Operand *Source);
+ InstX8632Sqrtss(const InstX8632Sqrtss &) LLVM_DELETED_FUNCTION;
+ InstX8632Sqrtss &operator=(const InstX8632Sqrtss &) LLVM_DELETED_FUNCTION;
+ virtual ~InstX8632Sqrtss() {}
+};
+
// Exchanging Add instruction. Exchanges the first operand (destination
// operand) with the second operand (source operand), then loads the sum
// of the two values into the destination operand. The destination may be
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 569fd9e..62a525c 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2018,7 +2018,14 @@
lowerCall(Call);
return;
}
- case Intrinsics::Sqrt:
+ case Intrinsics::Sqrt: {
+ Operand *Src = legalize(Instr->getArg(0));
+ Variable *Dest = Instr->getDest();
+ Variable *T = makeReg(Dest->getType());
+ _sqrtss(T, Src);
+ _mov(Dest, T);
+ return;
+ }
case Intrinsics::Stacksave:
case Intrinsics::Stackrestore:
// TODO(jvoung): fill it in.
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 4c2c527..f709704 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -252,6 +252,9 @@
void _shrd(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert(InstX8632Shrd::create(Func, Dest, Src0, Src1));
}
+ void _sqrtss(Variable *Dest, Operand *Src0) {
+ Context.insert(InstX8632Sqrtss::create(Func, Dest, Src0));
+ }
void _store(Operand *Value, OperandX8632 *Mem) {
Context.insert(InstX8632Store::create(Func, Value, Mem));
}
diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
index 15f9a65..5ed776b 100644
--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -15,6 +15,8 @@
declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
declare void @llvm.nacl.longjmp(i8*, i32)
declare i32 @llvm.nacl.setjmp(i8*)
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
declare void @llvm.trap()
define i32 @test_nacl_read_tp() {
@@ -160,6 +162,63 @@
; CHECKO2REM-LABEL: test_setjmp_unused
; CHECKO2REM: call setjmp
+define float @test_sqrt_float(float %x, i32 %iptr) {
+entry:
+ %r = call float @llvm.sqrt.f32(float %x)
+ %r2 = call float @llvm.sqrt.f32(float %r)
+ %r3 = call float @llvm.sqrt.f32(float -0.0)
+ %r4 = fadd float %r2, %r3
+ br label %next
+
+next:
+ %__6 = inttoptr i32 %iptr to float*
+ %y = load float* %__6, align 4
+ %r5 = call float @llvm.sqrt.f32(float %y)
+ %r6 = fadd float %r4, %r5
+ ret float %r6
+}
+; CHECK-LABEL: test_sqrt_float
+; CHECK: sqrtss xmm{{.*}}
+; CHECK: sqrtss xmm{{.*}}
+; CHECK: sqrtss xmm{{.*}}, dword ptr
+; CHECK-LABEL: .L{{.*}}next
+; We could fold the load and the sqrt into one operation, but the
+; current folding only handles load + arithmetic op. The sqrt inst
+; is considered an intrinsic call and not an arithmetic op.
+; CHECK: sqrtss xmm{{.*}}
+
+define double @test_sqrt_double(double %x, i32 %iptr) {
+entry:
+ %r = call double @llvm.sqrt.f64(double %x)
+ %r2 = call double @llvm.sqrt.f64(double %r)
+ %r3 = call double @llvm.sqrt.f64(double -0.0)
+ %r4 = fadd double %r2, %r3
+ br label %next
+
+next:
+ %__6 = inttoptr i32 %iptr to double*
+ %y = load double* %__6, align 8
+ %r5 = call double @llvm.sqrt.f64(double %y)
+ %r6 = fadd double %r4, %r5
+ ret double %r6
+}
+; CHECK-LABEL: test_sqrt_double
+; CHECK: sqrtsd xmm{{.*}}
+; CHECK: sqrtsd xmm{{.*}}
+; CHECK: sqrtsd xmm{{.*}}, qword ptr
+; CHECK-LABEL: .L{{.*}}next
+; CHECK: sqrtsd xmm{{.*}}
+
+define float @test_sqrt_ignored(float %x, double %y) {
+entry:
+ %ignored1 = call float @llvm.sqrt.f32(float %x)
+ %ignored2 = call double @llvm.sqrt.f64(double %y)
+ ret float 0.0
+}
+; CHECKO2REM-LABEL: test_sqrt_ignored
+; CHECKO2REM-NOT: sqrtss
+; CHECKO2REM-NOT: sqrtsd
+
define i32 @test_trap(i32 %br) {
entry:
%r1 = icmp eq i32 %br, 0