Correct reciprocal approximation for power-of-two values. Intel's reciprocal approximation instruction is not exact for power-of-two values. It provides 12 bits of mantissa precision and keeps a balance between positive and negative errors, but the reciprocal of 2^x is not 2^-x. This affects conformance tests which expect varyings not to be affected by the perspective division. Correct for this by multiplying by the inverse. Bug 27165393 Change-Id: Ie52ec511a14a4f447adc47ce9c875bbad03cd274 Reviewed-on: https://swiftshader-review.googlesource.com/4903 Tested-by: Nicolas Capens <capn@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Reviewed-by: Nicolas Capens <capn@google.com>

commit: 05b3d665fd92cd11b8a6517dafe661a5398547b7 [log] [tgz]
author: Nicolas Capens <capn@google.com> Thu Feb 25 23:58:33 2016 -0500
committer: Nicolas Capens <capn@google.com> Fri Feb 26 15:54:48 2016 +0000
tree: 7b768212a45b17c3c939de470f90b3004b1b960e
parent: 407813b4c558c92a42831104f32cce0d113db8ca [diff] [blame]
diff --git a/src/Reactor/Nucleus.cpp b/src/Reactor/Nucleus.cpp
index 2325c7f..ce48b52 100644
--- a/src/Reactor/Nucleus.cpp
+++ b/src/Reactor/Nucleus.cpp

@@ -33,6 +33,7 @@
 #include "Thread.hpp"
 #include "Memory.hpp"
 
+#include <xmmintrin.h>
 #include <fstream>
 
 #if defined(__x86_64__) && defined(_WIN32)
@@ -4635,9 +4636,9 @@
 			Constant *shuffle[2];
 			shuffle[0] = Nucleus::createConstantInt(0);
 			shuffle[1] = Nucleus::createConstantInt(1);
-	
+
 			Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
-	
+
 			storeValue(Nucleus::createBitCast(packed, Int2::getType()));
 		}
 	}
@@ -5199,7 +5200,7 @@
 		Value *element = Nucleus::createBitCast(cast.value, Long::getType());
 		long2 = Nucleus::createInsertElement(long2, element, 0);
 		RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
-		
+
 		if(CPUID::supportsSSE4_1())
 		{
 			storeValue(x86::pmovsxwd(vector).value);
@@ -6069,9 +6070,18 @@
 		return IfThenElse(x < y, x, y);
 	}
 
-	RValue<Float> Rcp_pp(RValue<Float> x)
+	RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
 	{
-		return x86::rcpss(x);
+		if(exactAtPow2)
+		{
+			// rcpss uses a piecewise-linear approximation which minimizes the relative error
+			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+			return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+		}
+		else
+		{
+			return x86::rcpss(x);
+		}
 	}
 
 	RValue<Float> RcpSqrt_pp(RValue<Float> x)
@@ -6580,9 +6590,18 @@
 		return x86::minps(x, y);
 	}
 
-	RValue<Float4> Rcp_pp(RValue<Float4> x)
+	RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
 	{
-		return x86::rcpps(x);
+		if(exactAtPow2)
+		{
+			// rcpps uses a piecewise-linear approximation which minimizes the relative error
+			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+			return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+		}
+		else
+		{
+			return x86::rcpps(x);
+		}
 	}
 
 	RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
commit	05b3d665fd92cd11b8a6517dafe661a5398547b7	[log] [tgz]
author	Nicolas Capens <capn@google.com>	Thu Feb 25 23:58:33 2016 -0500
committer	Nicolas Capens <capn@google.com>	Fri Feb 26 15:54:48 2016 +0000
tree	7b768212a45b17c3c939de470f90b3004b1b960e
parent	407813b4c558c92a42831104f32cce0d113db8ca [diff] [blame]