Always perform depth test using floating point values This CL removes a lot of duplicated code previously shared between the 16-bit depth test and the 32-bit depth test. Instead of having a separate path for 16-bit depth comparisons, all comparisons now use floating point, which will make it more maintainable and easier to convert to larger SIMD widths. While the 16 bit depth comparison is now done using floating point values, it is performed in the [0.0f, 0xFFFF] range for simplicity. One of the issues was the use of convertFixed16() on depth values, because depth value need to be rounded, whereas color values do not require rounding when calling this function. The increased accuracy also fixes 2 previously failing Vulkan tests. Tests: dEQP-VK.fragment_operations.early_fragment.* Bug: b/204919030 Bug: b/200826740 Change-Id: I87aefbd9aed36d5e0acbe4daebc213eaf3f77f9c Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/66888 Presubmit-Ready: Alexis Hétu <sugoi@google.com> Commit-Queue: Alexis Hétu <sugoi@google.com> Tested-by: Alexis Hétu <sugoi@google.com> Reviewed-by: Sean Risser <srisser@google.com>

commit: 1d6a1036bdb37072ea2f946df2bcbec367b99c3a [log] [tgz]
author: Alexis Hetu <sugoi@google.com> Tue Jul 05 12:20:29 2022 -0400
committer: swiftshader-scoped@luci-project-accounts.iam.gserviceaccount.com <swiftshader-scoped@luci-project-accounts.iam.gserviceaccount.com> Wed Jul 06 15:19:23 2022 +0000
tree: ed3376c4c9985c9fe04605516658e52d39069556
parent: 488cb63b5b4d39860a9a264b997d9209c2f1c633 [diff]
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index 1435809..1017d0d 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp

@@ -363,7 +363,8 @@
 			switch(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT))
 			{
 			case VK_FORMAT_D16_UNORM:
-				data->minimumResolvableDepthDifference = 1.0f / 0xFFFF;
+				// Minimum is 1 unit, but account for potential floating-point rounding errors
+				data->minimumResolvableDepthDifference = 1.01f / 0xFFFF;
 				break;
 			case VK_FORMAT_D32_SFLOAT:
 				// The minimum resolvable depth difference is determined per-polygon for floating-point depth

diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index ca86a18..07b3db1 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp

@@ -419,10 +419,8 @@
 	}
 }
 
-Bool PixelRoutine::depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask)
+SIMD::Float PixelRoutine::getDepthValue32F(const Pointer<Byte> &zBuffer, int q, const Int &x) const
 {
-	SIMD::Float Z = z;
-
 	ASSERT(SIMD::Width == 4);
 	Pointer<Byte> buffer = zBuffer + 4 * x;
 	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
@@ -432,12 +430,65 @@
 		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
 	}
 
+	Float4 zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
+	return SIMD::Float(zValue);
+}
+
+SIMD::Float PixelRoutine::getDepthValue16(const Pointer<Byte> &zBuffer, int q, const Int &x) const
+{
+	ASSERT(SIMD::Width == 4);
+	Pointer<Byte> buffer = zBuffer + 2 * x;
+	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
+
+	if(q > 0)
+	{
+		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
+	}
+
+	UShort4 zValue16;
+	zValue16 = As<UShort4>(Insert(As<Int2>(zValue16), *Pointer<Int>(buffer), 0));
+	zValue16 = As<UShort4>(Insert(As<Int2>(zValue16), *Pointer<Int>(buffer + pitch), 1));
+	Float4 zValue = Float4(zValue16);
+	return SIMD::Float(zValue);
+}
+
+SIMD::Float PixelRoutine::clampDepth(const SIMD::Float &z)
+{
+	if(!state.depthClamp)
+	{
+		return z;
+	}
+
+	return Min(Max(z, state.minDepthClamp), state.maxDepthClamp);
+}
+
+Bool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask)
+{
+	if(!state.depthTestActive)
+	{
+		return true;
+	}
+
+	SIMD::Float Z;
 	SIMD::Float zValue;
 
 	if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
 	{
-		ASSERT(SIMD::Width == 4);
-		zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
+		switch(state.depthFormat)
+		{
+		case VK_FORMAT_D16_UNORM:
+			Z = Min(Max(Round(z * 0xFFFFu), 0.0f), 0xFFFFu);
+			zValue = getDepthValue16(zBuffer, q, x);
+			break;
+		case VK_FORMAT_D32_SFLOAT:
+		case VK_FORMAT_D32_SFLOAT_S8_UINT:
+			Z = z;
+			zValue = getDepthValue32F(zBuffer, q, x);
+			break;
+		default:
+			UNSUPPORTED("Depth format: %d", int(state.depthFormat));
+			return false;
+		}
 	}
 
 	SIMD::Int zTest;
@@ -493,114 +544,6 @@
 	return zMask != 0;
 }
 
-Bool PixelRoutine::depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask)
-{
-	ASSERT(SIMD::Width == 4);
-	Short4 Z = convertFixed16(Extract128(z, 0), true);
-
-	Pointer<Byte> buffer = zBuffer + 2 * x;
-	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
-
-	if(q > 0)
-	{
-		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
-	}
-
-	Short4 zValue;
-
-	if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
-	{
-		zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer), 0));
-		zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer + pitch), 1));
-	}
-
-	Int4 zTest;
-
-	// Bias values to make unsigned compares out of Reactor's (due SSE's) signed compares only
-	zValue = zValue - Short4(0x8000u);
-	Z = Z - Short4(0x8000u);
-
-	switch(state.depthCompareMode)
-	{
-	case VK_COMPARE_OP_ALWAYS:
-		// Optimized
-		break;
-	case VK_COMPARE_OP_NEVER:
-		// Optimized
-		break;
-	case VK_COMPARE_OP_EQUAL:
-		zTest = Int4(CmpEQ(zValue, Z));
-		break;
-	case VK_COMPARE_OP_NOT_EQUAL:
-		zTest = ~Int4(CmpEQ(zValue, Z));
-		break;
-	case VK_COMPARE_OP_LESS:
-		zTest = Int4(CmpGT(zValue, Z));
-		break;
-	case VK_COMPARE_OP_GREATER_OR_EQUAL:
-		zTest = ~Int4(CmpGT(zValue, Z));
-		break;
-	case VK_COMPARE_OP_LESS_OR_EQUAL:
-		zTest = ~Int4(CmpGT(Z, zValue));
-		break;
-	case VK_COMPARE_OP_GREATER:
-		zTest = Int4(CmpGT(Z, zValue));
-		break;
-	default:
-		UNSUPPORTED("VkCompareOp: %d", int(state.depthCompareMode));
-	}
-
-	switch(state.depthCompareMode)
-	{
-	case VK_COMPARE_OP_ALWAYS:
-		zMask = cMask;
-		break;
-	case VK_COMPARE_OP_NEVER:
-		zMask = 0x0;
-		break;
-	default:
-		zMask = SignMask(zTest) & cMask;
-		break;
-	}
-
-	if(state.stencilActive)
-	{
-		zMask &= sMask;
-	}
-
-	return zMask != 0;
-}
-
-SIMD::Float PixelRoutine::clampDepth(const SIMD::Float &z)
-{
-	if(!state.depthClamp)
-	{
-		return z;
-	}
-
-	return Min(Max(z, state.minDepthClamp), state.maxDepthClamp);
-}
-
-Bool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask)
-{
-	if(!state.depthTestActive)
-	{
-		return true;
-	}
-
-	switch(state.depthFormat)
-	{
-	case VK_FORMAT_D16_UNORM:
-		return depthTest16(zBuffer, q, x, z, sMask, zMask, cMask);
-	case VK_FORMAT_D32_SFLOAT:
-	case VK_FORMAT_D32_SFLOAT_S8_UINT:
-		return depthTest32F(zBuffer, q, x, z, sMask, zMask, cMask);
-	default:
-		UNSUPPORTED("Depth format: %d", int(state.depthFormat));
-		return false;
-	}
-}
-
 Int4 PixelRoutine::depthBoundsTest16(const Pointer<Byte> &zBuffer, int q, const Int &x)
 {
 	Pointer<Byte> buffer = zBuffer + 2 * x;
@@ -714,7 +657,7 @@
 
 void PixelRoutine::writeDepth16(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
 {
-	Short4 Z = As<Short4>(convertFixed16(z, true));
+	Short4 Z = UShort4(Round(z * 0xFFFFu), true);
 
 	Pointer<Byte> buffer = zBuffer + 2 * x;
 	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
@@ -2313,7 +2256,7 @@
 		buffer += pitchB;
 		texelColor.x.z = Float(Int(*Pointer<UShort>(buffer + 0)));
 		texelColor.x.w = Float(Int(*Pointer<UShort>(buffer + 2)));
-		texelColor.x *= (1.0f / 0xFFFF);
+		texelColor.x *= (1.0f / 0xFFFFu);
 		texelColor.y = texelColor.z = texelColor.w = 1.0f;
 		break;
 	case VK_FORMAT_R16_SFLOAT:
@@ -2336,8 +2279,8 @@
 		texelColor.y.z = Float(Int(*Pointer<UShort>(buffer + 2)));
 		texelColor.x.w = Float(Int(*Pointer<UShort>(buffer + 4)));
 		texelColor.y.w = Float(Int(*Pointer<UShort>(buffer + 6)));
-		texelColor.x *= (1.0f / 0xFFFF);
-		texelColor.y *= (1.0f / 0xFFFF);
+		texelColor.x *= (1.0f / 0xFFFFu);
+		texelColor.y *= (1.0f / 0xFFFFu);
 		texelColor.z = texelColor.w = 1.0f;
 		break;
 	case VK_FORMAT_R16G16_SFLOAT:
@@ -2372,10 +2315,10 @@
 		texelColor.y.w = Float(Int(*Pointer<UShort>(buffer + 0xa)));
 		texelColor.z.w = Float(Int(*Pointer<UShort>(buffer + 0xc)));
 		texelColor.w.w = Float(Int(*Pointer<UShort>(buffer + 0xe)));
-		texelColor.x *= (1.0f / 0xFFFF);
-		texelColor.y *= (1.0f / 0xFFFF);
-		texelColor.z *= (1.0f / 0xFFFF);
-		texelColor.w *= (1.0f / 0xFFFF);
+		texelColor.x *= (1.0f / 0xFFFFu);
+		texelColor.y *= (1.0f / 0xFFFFu);
+		texelColor.z *= (1.0f / 0xFFFFu);
+		texelColor.w *= (1.0f / 0xFFFFu);
 		break;
 	case VK_FORMAT_R16G16B16A16_SFLOAT:
 		buffer += 8 * x;
@@ -2558,17 +2501,17 @@
 	{
 	case VK_FORMAT_R16G16B16A16_UNORM:
 		color.w = Min(Max(color.w, 0.0f), 1.0f);  // TODO(b/204560089): Omit clamp if redundant
-		color.w = As<Float4>(RoundInt(color.w * 0xFFFF));
+		color.w = As<Float4>(RoundInt(color.w * 0xFFFFu));
 		color.z = Min(Max(color.z, 0.0f), 1.0f);  // TODO(b/204560089): Omit clamp if redundant
-		color.z = As<Float4>(RoundInt(color.z * 0xFFFF));
+		color.z = As<Float4>(RoundInt(color.z * 0xFFFFu));
 		// [[fallthrough]]
 	case VK_FORMAT_R16G16_UNORM:
 		color.y = Min(Max(color.y, 0.0f), 1.0f);  // TODO(b/204560089): Omit clamp if redundant
-		color.y = As<Float4>(RoundInt(color.y * 0xFFFF));
+		color.y = As<Float4>(RoundInt(color.y * 0xFFFFu));
 		//[[fallthrough]]
 	case VK_FORMAT_R16_UNORM:
 		color.x = Min(Max(color.x, 0.0f), 1.0f);  // TODO(b/204560089): Omit clamp if redundant
-		color.x = As<Float4>(RoundInt(color.x * 0xFFFF));
+		color.x = As<Float4>(RoundInt(color.x * 0xFFFFu));
 		break;
 	default:
 		// TODO(b/204560089): Omit clamp if redundant
@@ -2757,7 +2700,7 @@
 
 			UInt xyzw, packedCol;
 
-			xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
+			xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFFu;
 			buffer += pitchB;
 			xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
 
@@ -3205,7 +3148,7 @@
 
 UShort4 PixelRoutine::convertFixed16(const Float4 &cf, bool saturate)
 {
-	return UShort4(cf * 0xFFFF, saturate);
+	return UShort4(cf * 0xFFFFu, saturate);
 }
 
 Float4 PixelRoutine::convertFloat32(const UShort4 &cf)

diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index c00768e..6aa1f88 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp

@@ -75,7 +75,11 @@
 	void depthBoundsTest(const Pointer<Byte> &zBuffer, int q, const Int &x, Int &zMask, Int &cMask);
 
 	void readPixel(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &pixel);
-	enum BlendFactorModifier { None, OneMinus };
+	enum BlendFactorModifier
+	{
+		None,
+		OneMinus
+	};
 	Float blendConstant(vk::Format format, int component, BlendFactorModifier modifier = None);
 	void blendFactorRGB(SIMD::Float4 &blendFactorRGB, const SIMD::Float4 &sourceColor, const SIMD::Float4 &destColor, VkBlendFactor colorBlendFactor, vk::Format format);
 	void blendFactorAlpha(SIMD::Float &blendFactorAlpha, const SIMD::Float &sourceAlpha, const SIMD::Float &destAlpha, VkBlendFactor alphaBlendFactor, vk::Format format);
@@ -103,8 +107,8 @@
 	void linearToSRGB16_12_16(Vector4s &c);
 	Float4 sRGBtoLinear(const Float4 &x);
 
-	Bool depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask);
-	Bool depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask);
+	SIMD::Float getDepthValue32F(const Pointer<Byte> &zBuffer, int q, const Int &x) const;
+	SIMD::Float getDepthValue16(const Pointer<Byte> &zBuffer, int q, const Int &x) const;
 
 	void writeDepth32F(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask);
 	void writeDepth16(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask);
commit	1d6a1036bdb37072ea2f946df2bcbec367b99c3a	[log] [tgz]
author	Alexis Hetu <sugoi@google.com>	Tue Jul 05 12:20:29 2022 -0400
committer	swiftshader-scoped@luci-project-accounts.iam.gserviceaccount.com <swiftshader-scoped@luci-project-accounts.iam.gserviceaccount.com>	Wed Jul 06 15:19:23 2022 +0000
tree	ed3376c4c9985c9fe04605516658e52d39069556
parent	488cb63b5b4d39860a9a264b997d9209c2f1c633 [diff]