RoundEven implementation

Implementation for the roundEven glsl intrinsic function.
All dEQP roundEven tests pass.

Change-Id: I8b6f3704f03eea32b08a6c2dc318ecb99a01957d
Reviewed-on: https://swiftshader-review.googlesource.com/3382
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index dded202..d810f1b 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -3935,6 +3935,7 @@
 			case Shader::OPCODE_TRUNC:      trunc(d, s0);                                   break;
 			case Shader::OPCODE_FLOOR:      floor(d, s0);                                   break;
 			case Shader::OPCODE_ROUND:		round(d, s0);                                   break;
+			case Shader::OPCODE_ROUNDEVEN:	roundEven(d, s0);                               break;
 			case Shader::OPCODE_CEIL:       ceil(d, s0);                                    break;
 			case Shader::OPCODE_EXP2X:		exp2x(d, s0, pp);								break;
 			case Shader::OPCODE_EXP2:		exp2(d, s0, pp);								break;
diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp
index 516ec70..6d5b856 100644
--- a/src/Shader/ShaderCore.cpp
+++ b/src/Shader/ShaderCore.cpp
@@ -1001,6 +1001,25 @@
 		dst.w = Round(src.w);
 	}
 
+	void ShaderCore::roundEven(Vector4f &dst, Vector4f &src)
+	{
+		// dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
+		// ex.: 1.5:  2 + (0 * 2 - 1) * 1 * 0 = 2
+		//      2.5:  3 + (0 * 2 - 1) * 1 * 1 = 2
+		//     -1.5: -2 + (1 * 2 - 1) * 1 * 0 = -2
+		//     -2.5: -3 + (1 * 2 - 1) * 1 * 1 = -2
+		// Even if the round implementation rounds the other way:
+		//      1.5:  1 + (1 * 2 - 1) * 1 * 1 = 2
+		//      2.5:  2 + (1 * 2 - 1) * 1 * 0 = 2
+		//     -1.5: -1 + (0 * 2 - 1) * 1 * 1 = -2
+		//     -2.5: -2 + (0 * 2 - 1) * 1 * 0 = -2
+		round(dst, src);
+		dst.x += ((Float4(CmpLT(dst.x, src.x) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.x), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.x) & Int4(1));
+		dst.y += ((Float4(CmpLT(dst.y, src.y) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.y), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.y) & Int4(1));
+		dst.z += ((Float4(CmpLT(dst.z, src.z) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.z), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.z) & Int4(1));
+		dst.w += ((Float4(CmpLT(dst.w, src.w) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.w), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.w) & Int4(1));
+	}
+
 	void ShaderCore::ceil(Vector4f &dst, Vector4f &src)
 	{
 		dst.x = Ceil(src.x);
diff --git a/src/Shader/ShaderCore.hpp b/src/Shader/ShaderCore.hpp
index 66ff2cd..bd18f59 100644
--- a/src/Shader/ShaderCore.hpp
+++ b/src/Shader/ShaderCore.hpp
@@ -289,6 +289,7 @@
 		void trunc(Vector4f &dst, Vector4f &src);

 		void floor(Vector4f &dst, Vector4f &src);

 		void round(Vector4f &dst, Vector4f &src);

+		void roundEven(Vector4f &dst, Vector4f &src);

 		void ceil(Vector4f &dst, Vector4f &src);

 		void powx(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp = false);

 		void pow(Vector4f &dst, Vector4f &src0, Vector4f &src1, bool pp = false);

diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index acbdbcf..14bd5f6 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -162,6 +162,7 @@
 			case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
 			case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
 			case Shader::OPCODE_ROUND:      round(d, s0);                   break;
+			case Shader::OPCODE_ROUNDEVEN:	roundEven(d, s0);               break;
 			case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
 			case Shader::OPCODE_LIT:		lit(d, s0);						break;
 			case Shader::OPCODE_LOG2X:		log2x(d, s0, pp);				break;