Fix ignoring fragment shader depth for EarlyFragmentTests mode

The spec states that "If early fragment tests are enabled, any depth
value computed by the fragment shader has no effect."

Previously we were writing fragment shader output depth into a separate
variable, and using that as the value to write into the depth buffer
regardless of the EarlyFragmentTests mode.

This change fixes that by having the shader overwrite the interpolated
depth, but write to the depth buffer before the fragment shader
executes if EarlyFragmentTests mode is enabled.

The alternative would have been to keep writing depth at the end of the
pipeline, and replace the fragment depth with the shader output depth
only when EarlyFragmentTests mode is not enabled. However, it is
probably advantageous to write depth early if we can, since the local
variables and memory will still be in registers and cache after the
depth test.

Bug: b/140302392
Tests: dEQP-VK.spirv_assembly.instruction.graphics.early_fragment.*
Change-Id: I3b7093d154e3f886096a88b7af4802037fd534f0
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/56068
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index 809a03c..705ccf5 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -242,10 +242,14 @@
 	it = spirvShader->outputBuiltins.find(spv::BuiltInFragDepth);
 	if(it != spirvShader->outputBuiltins.end())
 	{
-		oDepth = routine.getVariable(it->second.Id)[it->second.FirstComponent];
-		if(state.depthClamp)
+		for(unsigned int q : samples)
 		{
-			oDepth = Min(Max(oDepth, Float4(state.minDepthClamp)), Float4(state.maxDepthClamp));
+			z[q] = routine.getVariable(it->second.Id)[it->second.FirstComponent];
+
+			if(state.depthClamp)
+			{
+				z[q] = Min(Max(z[q], Float4(state.minDepthClamp)), Float4(state.maxDepthClamp));
+			}
 		}
 	}
 }
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index db35edd..069544d 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -148,6 +148,11 @@
 
 		If(depthPass || !earlyFragmentTests)
 		{
+			if(earlyFragmentTests)
+			{
+				writeDepth(zBuffer, x, zMask, samples);
+			}
+
 			Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16);
 
 			// Centroid locations
@@ -310,9 +315,12 @@
 					}
 				}
 
-				If(depthPass || earlyFragmentTests)
+				If(depthPass)
 				{
-					writeDepth(zBuffer, x, zMask, samples);
+					if(!earlyFragmentTests)
+					{
+						writeDepth(zBuffer, x, zMask, samples);
+					}
 
 					rasterOperation(cBuffer, x, sMask, zMask, cMask, samples);
 
@@ -420,11 +428,6 @@
 {
 	Float4 Z = z;
 
-	if(spirvShader && spirvShader->getModes().DepthReplacing)
-	{
-		Z = oDepth;
-	}
-
 	Pointer<Byte> buffer = zBuffer + 4 * x;
 	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
 
@@ -497,11 +500,6 @@
 {
 	Short4 Z = convertFixed16(z, true);
 
-	if(spirvShader && spirvShader->getModes().DepthReplacing)
-	{
-		Z = convertFixed16(oDepth, true);
-	}
-
 	Pointer<Byte> buffer = zBuffer + 2 * x;
 	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
 
@@ -665,11 +663,6 @@
 {
 	Float4 Z = z;
 
-	if(spirvShader && spirvShader->getModes().DepthReplacing)
-	{
-		Z = oDepth;
-	}
-
 	Pointer<Byte> buffer = zBuffer + 4 * x;
 	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
 
@@ -697,11 +690,6 @@
 {
 	Short4 Z = As<Short4>(convertFixed16(z, true));
 
-	if(spirvShader && spirvShader->getModes().DepthReplacing)
-	{
-		Z = As<Short4>(convertFixed16(oDepth, true));
-	}
-
 	Pointer<Byte> buffer = zBuffer + 2 * x;
 	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
 
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index cc24a46..c1e6c0c 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -46,9 +46,6 @@
 	SpirvRoutine routine;
 	const vk::DescriptorSet::Bindings &descriptorSets;
 
-	// Depth output
-	Float4 oDepth;
-
 	virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], const SampleSet &samples) = 0;
 	virtual void executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples) = 0;
 	virtual Bool alphaTest(Int cMask[4], const SampleSet &samples) = 0;