Correct reciprocal approximation for power-of-two values.
Intel's reciprocal approximation instruction is not exact for power-of-two
values. It provides 12 bits of mantissa precision and keeps a balance between
positive and negative errors, but the reciprocal of 2^x is not 2^-x. This
affects conformance tests which expect varyings not to be affected by the
perspective division. Correct for this by multiplying by the inverse.
Bug 27165393
Change-Id: Ie52ec511a14a4f447adc47ce9c875bbad03cd274
Reviewed-on: https://swiftshader-review.googlesource.com/4903
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Reactor/Nucleus.cpp b/src/Reactor/Nucleus.cpp
index 2325c7f..ce48b52 100644
--- a/src/Reactor/Nucleus.cpp
+++ b/src/Reactor/Nucleus.cpp
@@ -33,6 +33,7 @@
#include "Thread.hpp"
#include "Memory.hpp"
+#include <xmmintrin.h>
#include <fstream>
#if defined(__x86_64__) && defined(_WIN32)
@@ -4635,9 +4636,9 @@
Constant *shuffle[2];
shuffle[0] = Nucleus::createConstantInt(0);
shuffle[1] = Nucleus::createConstantInt(1);
-
+
Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
-
+
storeValue(Nucleus::createBitCast(packed, Int2::getType()));
}
}
@@ -5199,7 +5200,7 @@
Value *element = Nucleus::createBitCast(cast.value, Long::getType());
long2 = Nucleus::createInsertElement(long2, element, 0);
RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
-
+
if(CPUID::supportsSSE4_1())
{
storeValue(x86::pmovsxwd(vector).value);
@@ -6069,9 +6070,18 @@
return IfThenElse(x < y, x, y);
}
- RValue<Float> Rcp_pp(RValue<Float> x)
+ RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
{
- return x86::rcpss(x);
+ if(exactAtPow2)
+ {
+ // rcpss uses a piecewise-linear approximation which minimizes the relative error
+ // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+ return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+ }
+ else
+ {
+ return x86::rcpss(x);
+ }
}
RValue<Float> RcpSqrt_pp(RValue<Float> x)
@@ -6580,9 +6590,18 @@
return x86::minps(x, y);
}
- RValue<Float4> Rcp_pp(RValue<Float4> x)
+ RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
{
- return x86::rcpps(x);
+ if(exactAtPow2)
+ {
+ // rcpps uses a piecewise-linear approximation which minimizes the relative error
+ // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+ return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+ }
+ else
+ {
+ return x86::rcpps(x);
+ }
}
RValue<Float4> RcpSqrt_pp(RValue<Float4> x)