Support frustum clipping against z = -1.

This allows performing the depth range adjustment only once as part of the
viewport transform and not have it be affected by transform feedback.

Change-Id: I89fba3b131c234bea064b1b65f50b62261f4c307
Reviewed-on: https://swiftshader-review.googlesource.com/5421
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/OpenGL/libGL/Context.cpp b/src/OpenGL/libGL/Context.cpp
index 64f3f68..4922ecb 100644
--- a/src/OpenGL/libGL/Context.cpp
+++ b/src/OpenGL/libGL/Context.cpp
@@ -2425,12 +2425,7 @@
 {
 	if(!mState.currentProgram)
 	{
-		const sw::Matrix Z(1, 0, 0, 0,
-		                   0, 1, 0, 0,
-		                   0, 0, 0.5, 0.5,
-		                   0, 0, 0, 1);   // Map depth range from [-1, 1] to [0, 1]
-
-		device->setProjectionMatrix(Z * projection.current());
+		device->setProjectionMatrix(projection.current());
 		device->setViewMatrix(modelView.current());
 		device->setTextureMatrix(0, texture[0].current());
 		device->setTextureMatrix(1, texture[1].current());
@@ -3462,12 +3457,7 @@
 		return error(GL_INVALID_OPERATION);
 	}
 
-	const sw::Matrix Z(1, 0, 0, 0,
-	                   0, 1, 0, 0,
-	                   0, 0, 0.5, 0.5,
-	                   0, 0, 0, 1);   // Map depth range from [-1, 1] to [0, 1]
-
-	device->setProjectionMatrix(Z * projection.current());
+	device->setProjectionMatrix(projection.current());
 	device->setViewMatrix(modelView.current());
 	device->setTextureMatrix(0, texture[0].current());
 	device->setTextureMatrix(1, texture[1].current());
diff --git a/src/OpenGL/libGLES_CM/Context.cpp b/src/OpenGL/libGLES_CM/Context.cpp
index b659bd9..8650d84 100644
--- a/src/OpenGL/libGLES_CM/Context.cpp
+++ b/src/OpenGL/libGLES_CM/Context.cpp
@@ -1959,12 +1959,7 @@
 	device->setAmbientMaterialSource(sw::MATERIAL_MATERIAL);
 	device->setEmissiveMaterialSource(sw::MATERIAL_MATERIAL);
 
-	const sw::Matrix Z(1, 0, 0, 0,
-	                   0, 1, 0, 0,
-	                   0, 0, 0.5, 0.5,
-	                   0, 0, 0, 1);   // Map depth range from [-1, 1] to [0, 1]
-
-	device->setProjectionMatrix(Z * projectionStack.current());
+	device->setProjectionMatrix(projectionStack.current());
 	device->setModelMatrix(modelViewStack.current());
 	device->setTextureMatrix(0, textureStack0.current());
 	device->setTextureMatrix(1, textureStack1.current());
diff --git a/src/Renderer/Clipper.cpp b/src/Renderer/Clipper.cpp
index 3256d68..1c17402 100644
--- a/src/Renderer/Clipper.cpp
+++ b/src/Renderer/Clipper.cpp
@@ -20,8 +20,9 @@
 
 namespace sw
 {
-	Clipper::Clipper()
+	Clipper::Clipper(bool symmetricNormalizedDepth)
 	{
+		n = symmetricNormalizedDepth ? -1.0f : 0.0f;
 	}
 
 	Clipper::~Clipper()
@@ -30,12 +31,12 @@
 
 	unsigned int Clipper::computeClipFlags(const float4 &v)
 	{
-		return ((v.x > v.w)  ? CLIP_RIGHT  : 0) |
-		       ((v.y > v.w)  ? CLIP_TOP    : 0) |
-		       ((v.z > v.w)  ? CLIP_FAR    : 0) |
-		       ((v.x < -v.w) ? CLIP_LEFT   : 0) |
-		       ((v.y < -v.w) ? CLIP_BOTTOM : 0) |
-		       ((v.z < 0)    ? CLIP_NEAR   : 0) |
+		return ((v.x > v.w)     ? CLIP_RIGHT  : 0) |
+		       ((v.y > v.w)     ? CLIP_TOP    : 0) |
+		       ((v.z > v.w)     ? CLIP_FAR    : 0) |
+		       ((v.x < -v.w)    ? CLIP_LEFT   : 0) |
+		       ((v.y < -v.w)    ? CLIP_BOTTOM : 0) |
+		       ((v.z < n * v.w) ? CLIP_NEAR   : 0) |
 		       Clipper::CLIP_FINITE;   // FIXME: xyz finite
 	}
 
@@ -90,8 +91,8 @@
 		{
 			int j = i == polygon.n - 1 ? 0 : i + 1;
 
-			float di = V[i]->z;
-			float dj = V[j]->z;
+			float di = V[i]->z - n * V[i]->w;
+			float dj = V[j]->z - n * V[j]->w;
 
 			if(di >= 0)
 			{
@@ -100,7 +101,7 @@
 				if(dj < 0)
 				{
 					clipEdge(polygon.B[polygon.b], *V[i], *V[j], di, dj);
-					polygon.B[polygon.b].z = 0;
+					polygon.B[polygon.b].z = n * polygon.B[polygon.b].w;
 					T[t++] = &polygon.B[polygon.b++];
 				}
 			}
@@ -109,7 +110,7 @@
 				if(dj > 0)
 				{
 					clipEdge(polygon.B[polygon.b], *V[j], *V[i], dj, di);
-					polygon.B[polygon.b].z = 0;
+					polygon.B[polygon.b].z = n * polygon.B[polygon.b].w;
 					T[t++] = &polygon.B[polygon.b++];
 				}
 			}
diff --git a/src/Renderer/Clipper.hpp b/src/Renderer/Clipper.hpp
index 9c7d110..057153a 100644
--- a/src/Renderer/Clipper.hpp
+++ b/src/Renderer/Clipper.hpp
@@ -52,7 +52,7 @@
 			CLIP_USER = 0x3F00
 		};
 
-		Clipper();
+		Clipper(bool symmetricNormalizedDepth);
 
 		~Clipper();
 
@@ -69,6 +69,8 @@
 		void clipPlane(Polygon &polygon, const Plane &plane);
 
 		void clipEdge(float4 &Vo, const float4 &Vi, const float4 &Vj, float di, float dj) const;
+
+		float n;   // Near clip plane distance
 	};
 }
 
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp
index 983f4ec..208ae9d 100644
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -113,7 +113,7 @@
 		sw::exactColorRounding = exactColorRounding;
 
 		setRenderTarget(0, 0);
-		clipper = new Clipper();
+		clipper = new Clipper(symmetricNormalizedDepth);
 
 		updateViewMatrix = true;
 		updateBaseMatrix = true;
diff --git a/src/Shader/SetupRoutine.cpp b/src/Shader/SetupRoutine.cpp
index ab7176b..cc29156 100644
--- a/src/Shader/SetupRoutine.cpp
+++ b/src/Shader/SetupRoutine.cpp
@@ -302,13 +302,9 @@
 				conditionalRotate2(wMax == w2, v0, v1, v2);
 			}
 
-			Float4 p0 = *Pointer<Float4>(v0 + pos * 16, 16);
-			Float4 p1 = *Pointer<Float4>(v1 + pos * 16, 16);
-			Float4 p2 = *Pointer<Float4>(v2 + pos * 16, 16);
-
-			Float w0 = p0.w;
-			Float w1 = p1.w;
-			Float w2 = p2.w;
+			Float w0 = *Pointer<Float>(v0 + pos * 16 + 12);
+			Float w1 = *Pointer<Float>(v1 + pos * 16 + 12);
+			Float w2 = *Pointer<Float>(v2 + pos * 16 + 12);
 
 			Float4 w012;
 
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index 6428cdf..c67a8bc 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -72,13 +72,12 @@
 
 			UInt cacheIndex = index & 0x0000003F;
 			Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
-			writeVertex(vertex, cacheLine, primitiveNumber, indexInPrimitive);
+			writeVertex(vertex, cacheLine);
 
-			vertex += sizeof(Vertex);
-			batch += sizeof(unsigned int);
-			vertexCount--;
 			if(state.transformFeedbackEnabled != 0)
 			{
+				transformFeedback(vertex, primitiveNumber, indexInPrimitive);
+
 				indexInPrimitive++;
 				If(indexInPrimitive == 3)
 				{
@@ -86,6 +85,10 @@
 					indexInPrimitive = 0;
 				}
 			}
+
+			vertex += sizeof(Vertex);
+			batch += sizeof(unsigned int);
+			vertexCount--;
 		}
 		Until(vertexCount == 0)
 
@@ -107,41 +110,26 @@
 	{
 		int pos = state.positionRegister;
 
-		Float4 outPosZ = o[pos].z;
-		if(state.transformFeedbackEnabled && symmetricNormalizedDepth && !state.fixedFunction)
-		{
-			outPosZ = (outPosZ + o[pos].w) * Float4(0.5f);
-		}
-
 		Int4 maxX = CmpLT(o[pos].w, o[pos].x);
 		Int4 maxY = CmpLT(o[pos].w, o[pos].y);
-		Int4 maxZ = CmpLT(o[pos].w, outPosZ);
-
+		Int4 maxZ = CmpLT(o[pos].w, o[pos].z);
 		Int4 minX = CmpNLE(-o[pos].w, o[pos].x);
 		Int4 minY = CmpNLE(-o[pos].w, o[pos].y);
-		Int4 minZ = CmpNLE(Float4(0.0f), outPosZ);
+		Int4 minZ = symmetricNormalizedDepth ? CmpNLE(-o[pos].w, o[pos].z) : CmpNLE(Float4(0.0f), o[pos].z);
 
-		Int flags;
-
-		flags = SignMask(maxX);
-		clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + flags * 4);   // FIXME: Array indexing
-		flags = SignMask(maxY);
-		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + flags * 4);
-		flags = SignMask(maxZ);
-		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + flags * 4);
-		flags = SignMask(minX);
-		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + flags * 4);
-		flags = SignMask(minY);
-		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + flags * 4);
-		flags = SignMask(minZ);
-		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + flags * 4);
+		clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4);   // FIXME: Array indexing
+		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4);
+		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + SignMask(maxZ) * 4);
+		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + SignMask(minX) * 4);
+		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + SignMask(minY) * 4);
+		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4);
 
 		Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
 		Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
-		Int4 finiteZ = CmpLE(Abs(outPosZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
+		Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
 
-		flags = SignMask(finiteX & finiteY & finiteZ);
-		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + flags * 4);
+		Int4 finiteXYZ = finiteX & finiteY & finiteZ;
+		clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4);
 
 		if(state.preTransformed)
 		{
@@ -586,11 +574,6 @@
 			o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,XXXX)) * o[pos].w;
 			o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,YYYY)) * o[pos].w;
 		}
-
-		if(!state.transformFeedbackEnabled && symmetricNormalizedDepth && !state.fixedFunction)
-		{
-			o[pos].z = (o[pos].z + o[pos].w) * Float4(0.5f);
-		}
 	}
 
 	void VertexRoutine::writeCache(Pointer<Byte> &cacheLine)
@@ -661,6 +644,7 @@
 		*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF;
 		*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF;
 
+		// Viewport transform
 		int pos = state.positionRegister;
 
 		v.x = o[pos].x;
@@ -668,6 +652,11 @@
 		v.z = o[pos].z;
 		v.w = o[pos].w;
 
+		if(symmetricNormalizedDepth)
+		{
+			v.z = (v.z + v.w) * Float4(0.5f);   // [-1, 1] -> [0, 1]
+		}
+
 		Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
 		Float4 rhw = Float4(1.0f) / w;
 
@@ -684,7 +673,7 @@
 		*Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w;
 	}
 
-	void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache, const UInt &primitiveNumber, const UInt &indexInPrimitive)
+	void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache)
 	{
 		for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
 		{
@@ -696,45 +685,39 @@
 
 		*Pointer<Int4>(vertex + OFFSET(Vertex,X)) = *Pointer<Int4>(cache + OFFSET(Vertex,X));
 		*Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags));
+	}
 
-		if(state.transformFeedbackEnabled != 0)
+	void VertexRoutine::transformFeedback(const Pointer<Byte> &vertex, const UInt &primitiveNumber, const UInt &indexInPrimitive)
+	{
+		If(indexInPrimitive < state.verticesPerPrimitive)
 		{
-			If(indexInPrimitive < state.verticesPerPrimitive)
+			UInt tOffset = primitiveNumber * state.verticesPerPrimitive + indexInPrimitive;
+
+			for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
 			{
-				UInt tOffset = primitiveNumber * state.verticesPerPrimitive + indexInPrimitive;
-				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; ++i)
+				if(state.transformFeedbackEnabled & (1ULL << i))
 				{
-					if(state.transformFeedbackEnabled & (1ULL << i))
+					UInt reg = *Pointer<UInt>(data + OFFSET(DrawData, vs.reg[i]));
+					UInt row = *Pointer<UInt>(data + OFFSET(DrawData, vs.row[i]));
+					UInt col = *Pointer<UInt>(data + OFFSET(DrawData, vs.col[i]));
+					UInt str = *Pointer<UInt>(data + OFFSET(DrawData, vs.str[i]));
+
+					Pointer<Byte> t = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.t[i])) + (tOffset * str * sizeof(float));
+					Pointer<Byte> v = vertex + OFFSET(Vertex, v) + reg * sizeof(float);
+
+					For(UInt r = 0, r < row, r++)
 					{
-						UInt reg = *Pointer<UInt>(data + OFFSET(DrawData, vs.reg[i]));
-						UInt row = *Pointer<UInt>(data + OFFSET(DrawData, vs.row[i]));
-						UInt col = *Pointer<UInt>(data + OFFSET(DrawData, vs.col[i]));
-						UInt str = *Pointer<UInt>(data + OFFSET(DrawData, vs.str[i]));
+						UInt rOffsetX = r * col * sizeof(float);
+						UInt rOffset4 = r * sizeof(float4);
 
-						Pointer<Byte> t = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.t[i])) + (tOffset * str * sizeof(float));
-						Pointer<Byte> v = vertex + OFFSET(Vertex, v) + reg * sizeof(float);
-
-						For(UInt r = 0, r < row, r++)
+						For(UInt c = 0, c < col, c++)
 						{
-							UInt rOffsetX = r * col * sizeof(float);
-							UInt rOffset4 = r * sizeof(float4);
-							For(UInt c = 0, c < col, c++)
-							{
-								UInt cOffset = c * sizeof(float);
-								*Pointer<Float>(t + rOffsetX + cOffset) = *Pointer<Float>(v + rOffset4 + cOffset);
-							}
+							UInt cOffset = c * sizeof(float);
+							*Pointer<Float>(t + rOffsetX + cOffset) = *Pointer<Float>(v + rOffset4 + cOffset);
 						}
 					}
 				}
 			}
-
-			// Make this correction after transform feedback has been outputted
-			if(symmetricNormalizedDepth && !state.fixedFunction && state.output[state.positionRegister].write)
-			{
-				Float z = *Pointer<Float>(vertex + OFFSET(Vertex, v[state.positionRegister]) + 2 * sizeof(float));
-				Float w = *Pointer<Float>(vertex + OFFSET(Vertex, v[state.positionRegister]) + 3 * sizeof(float));
-				*Pointer<Float>(vertex + OFFSET(Vertex, v[state.positionRegister]) + 2 * sizeof(float)) = (z + w) * Float(0.5f);
-			}
 		}
 	}
 }
diff --git a/src/Shader/VertexRoutine.hpp b/src/Shader/VertexRoutine.hpp
index eadd795..4aec45f 100644
--- a/src/Shader/VertexRoutine.hpp
+++ b/src/Shader/VertexRoutine.hpp
@@ -63,7 +63,8 @@
 		void computeClipFlags();
 		void postTransform();
 		void writeCache(Pointer<Byte> &cacheLine);
-		void writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheLine, const UInt &primitiveNumber, const UInt &indexInPrimitive);
+		void writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheLine);
+		void transformFeedback(const Pointer<Byte> &vertex, const UInt &primitiveNumber, const UInt &indexInPrimitive);
 	};
 }