High precision implementation for sin/cos/tan This implementation allows all dEQP precision test to pass for sin, cos and tan function tests. Change-Id: I33a24497dea68ab2de2e65931f50f2dd4298523c Reviewed-on: https://swiftshader-review.googlesource.com/13555 Reviewed-by: Nicolas Capens <nicolascapens@google.com> Tested-by: Alexis Hétu <sugoi@google.com>

commit: 929c6b00b1a732dca359cf1c8c92670f58b599ef [log] [tgz]
author: Alexis Hetu <sugoi@google.com> Tue Nov 07 16:04:25 2017 -0500
committer: Alexis Hétu <sugoi@google.com> Tue Nov 14 18:50:36 2017 +0000
tree: f8972142d7c55831216f98a251643cdc9253badc
parent: 5bf9708430d230f133833b72d64921cafda98929 [diff]
diff --git a/src/OpenGL/compiler/OutputASM.cpp b/src/OpenGL/compiler/OutputASM.cpp
index 889f44e..f9a4dbb 100644
--- a/src/OpenGL/compiler/OutputASM.cpp
+++ b/src/OpenGL/compiler/OutputASM.cpp

@@ -1894,6 +1894,12 @@
 			instruction->dst.integer = (dst->getBasicType() == EbtInt);
 		}
 
+		if(src0)
+		{
+			TIntermTyped* src = src0->getAsTyped();
+			instruction->dst.partialPrecision = src && (src->getPrecision() <= EbpLow);
+		}
+
 		argument(instruction->src[0], src0, index0);
 		argument(instruction->src[1], src1, index1);
 		argument(instruction->src[2], src2, index2);

diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp
index 53644b8..017d0f8 100644
--- a/src/Shader/ShaderCore.cpp
+++ b/src/Shader/ShaderCore.cpp

@@ -287,6 +287,21 @@
 		Float4 y = x * Float4(1.59154943e-1f);   // 1/2pi
 		y = y - Round(y);
 
+		if(!pp)
+		{
+			// From the paper: "A Fast, Vectorizable Algorithm for Producing Single-Precision Sine-Cosine Pairs"
+			// This implementation passes OpenGL ES 3.0 precision requirements, at the cost of more operations:
+			// !pp : 17 mul, 7 add, 1 sub, 1 reciprocal
+			//  pp : 4 mul, 2 add, 2 abs
+
+			Float4 y2 = y * y;
+			Float4 c1 = y2 * (y2 * (y2 * Float4(-0.0204391631f) + Float4(0.2536086171f)) + Float4(-1.2336977925f)) + Float4(1.0f);
+			Float4 s1 = y * (y2 * (y2 * (y2 * Float4(-0.0046075748f) + Float4(0.0796819754f)) + Float4(-0.645963615f)) + Float4(1.5707963235f));
+			Float4 c2 = (c1 * c1) - (s1 * s1);
+			Float4 s2 = Float4(2.0f) * s1 * c1;
+			return Float4(2.0f) * s2 * c2 * reciprocal(s2 * s2 + c2 * c2, pp, true);
+		}
+
 		const Float4 A = Float4(-16.0f);
 		const Float4 B = Float4(8.0f);
 		const Float4 C = Float4(7.75160950e-1f);
commit	929c6b00b1a732dca359cf1c8c92670f58b599ef	[log] [tgz]
author	Alexis Hetu <sugoi@google.com>	Tue Nov 07 16:04:25 2017 -0500
committer	Alexis Hétu <sugoi@google.com>	Tue Nov 14 18:50:36 2017 +0000
tree	f8972142d7c55831216f98a251643cdc9253badc
parent	5bf9708430d230f133833b72d64921cafda98929 [diff]