Implement depthBoundsTest

The depthBoundsTest checks if the depth is withing a certain range, and
if not, sets coverage to 0 for the particular fragment.

Tests: dEQP-VK.pipeline.depth.nocolor.format.d16_unorm.compare_ops.*
Tests: dEQP-VK.pipeline.depth.nocolor.format.d32_sfloat.compare_ops.*
Tests: dEQP-VK.pipeline.depth.nocolor.format.d32_sfloat_s8_uint.compare_ops.*
Tests: dEQP-VK.pipeline.depth.nocolor.format.d32_sfloat_s8_uint_separate_layouts.compare_ops.*
Tests: dEQP-VK.dynamic_state.ds_state.depth_bounds_1
Tests: dEQP-VK.dynamic_state.ds_state.depth_bounds_2
Tests: dEQP-VK.rasterization.frag_side_effects.color_at_end.depth_bounds
Tests: dEQP-VK.rasterization.frag_side_effects.color_at_beginning.depth_bounds

Bug: b/181656417
Change-Id: I50076834afa21f5b93e59b27a5438ba26f8008c8
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/53888
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Sean Risser <srisser@google.com>
Commit-Queue: Sean Risser <srisser@google.com>
diff --git a/src/Device/Context.cpp b/src/Device/Context.cpp
index df37f02..cfb8567 100644
--- a/src/Device/Context.cpp
+++ b/src/Device/Context.cpp
@@ -457,12 +457,10 @@
 				UNSUPPORTED("pCreateInfo->pDepthStencilState->flags %d", int(pCreateInfo->pDepthStencilState->flags));
 			}
 
-			if(depthStencilState->depthBoundsTestEnable != VK_FALSE)
-			{
-				UNSUPPORTED("VkPhysicalDeviceFeatures::depthBounds");
-			}
-
 			depthBoundsTestEnable = (depthStencilState->depthBoundsTestEnable != VK_FALSE);
+			minDepthBounds = depthStencilState->minDepthBounds;
+			maxDepthBounds = depthStencilState->maxDepthBounds;
+
 			depthBufferEnable = (depthStencilState->depthTestEnable != VK_FALSE);
 			depthWriteEnable = (depthStencilState->depthWriteEnable != VK_FALSE);
 			depthCompareMode = depthStencilState->depthCompareOp;
@@ -593,6 +591,11 @@
 	return attachments.stencilBuffer && stencilEnable;
 }
 
+bool GraphicsState::depthBoundsTestActive() const
+{
+	return depthBoundsTestEnable;
+}
+
 const GraphicsState GraphicsState::combineStates(const DynamicState &dynamicState) const
 {
 	GraphicsState combinedState = *this;
@@ -627,7 +630,8 @@
 		ASSERT(dynamicState.minDepthBounds >= 0.0f && dynamicState.minDepthBounds <= 1.0f);
 		ASSERT(dynamicState.maxDepthBounds >= 0.0f && dynamicState.maxDepthBounds <= 1.0f);
 
-		UNSUPPORTED("VkPhysicalDeviceFeatures::depthBounds");
+		combinedState.minDepthBounds = dynamicState.minDepthBounds;
+		combinedState.maxDepthBounds = dynamicState.maxDepthBounds;
 	}
 
 	if(hasDynamicState(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) && stencilEnable)
diff --git a/src/Device/Context.hpp b/src/Device/Context.hpp
index ba2b493..2e15d47 100644
--- a/src/Device/Context.hpp
+++ b/src/Device/Context.hpp
@@ -155,6 +155,8 @@
 	inline float getConstantDepthBias() const { return constantDepthBias; }
 	inline float getSlopeDepthBias() const { return slopeDepthBias; }
 	inline float getDepthBiasClamp() const { return depthBiasClamp; }
+	inline float getMinDepthBounds() const { return minDepthBounds; }
+	inline float getMaxDepthBounds() const { return maxDepthBounds; }
 	inline bool hasDepthRangeUnrestricted() const { return depthRangeUnrestricted; }
 
 	// Pixel processor states
@@ -184,6 +186,7 @@
 	bool depthWriteActive(const Attachments &attachments) const;
 	bool depthBufferActive(const Attachments &attachments) const;
 	bool stencilActive(const Attachments &attachments) const;
+	bool depthBoundsTestActive() const;
 
 private:
 	inline bool hasDynamicState(VkDynamicState dynamicState) const { return (dynamicStateFlags & (1 << dynamicState)) != 0; }
@@ -219,6 +222,8 @@
 	float constantDepthBias;
 	float slopeDepthBias;
 	float depthBiasClamp;
+	float minDepthBounds;
+	float maxDepthBounds;
 	bool depthRangeUnrestricted;
 
 	// Pixel processor states
diff --git a/src/Device/PixelProcessor.cpp b/src/Device/PixelProcessor.cpp
index 16afb8a..fa85953 100644
--- a/src/Device/PixelProcessor.cpp
+++ b/src/Device/PixelProcessor.cpp
@@ -123,6 +123,10 @@
 		state.depthClamp = !state.depthFormat.isFloatFormat() || !pipelineState.hasDepthRangeUnrestricted();
 	}
 
+	state.depthBoundsTestActive = pipelineState.depthBoundsTestActive();
+	state.minDepthBounds = pipelineState.getMinDepthBounds();
+	state.maxDepthBounds = pipelineState.getMaxDepthBounds();
+
 	state.occlusionEnabled = occlusionEnabled;
 
 	bool fragmentContainsKill = (fragmentShader && fragmentShader->getModes().ContainsKill);
diff --git a/src/Device/PixelProcessor.hpp b/src/Device/PixelProcessor.hpp
index b2ae47c..b7187d4 100644
--- a/src/Device/PixelProcessor.hpp
+++ b/src/Device/PixelProcessor.hpp
@@ -77,6 +77,7 @@
 		StencilOpState backStencil;
 
 		bool depthTestActive;
+		bool depthBoundsTestActive;
 		bool occlusionEnabled;
 		bool perspective;
 
@@ -91,6 +92,8 @@
 		bool centroid;
 		bool sampleShadingEnabled;
 		float minSampleShading;
+		float minDepthBounds;
+		float maxDepthBounds;
 		VkFrontFace frontFace;
 		vk::Format depthFormat;
 		bool depthBias;
diff --git a/src/Device/QuadRasterizer.cpp b/src/Device/QuadRasterizer.cpp
index 024be64..c56d4da 100644
--- a/src/Device/QuadRasterizer.cpp
+++ b/src/Device/QuadRasterizer.cpp
@@ -83,7 +83,7 @@
 		}
 	}
 
-	if(state.depthTestActive)
+	if(state.depthTestActive || state.depthBoundsTestActive)
 	{
 		zBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, depthBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
 	}
@@ -216,7 +216,7 @@
 			}
 		}
 
-		if(state.depthTestActive)
+		if(state.depthTestActive || state.depthBoundsTestActive)
 		{
 			zBuffer += *Pointer<Int>(data + OFFSET(DrawData, depthPitchB)) << (1 + clusterCountLog2);  // FIXME: Precompute
 		}
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 7272825..f94155e 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -141,6 +141,7 @@
 			for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
 			{
 				depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
+				depthBoundsTest(zBuffer, q, x, zMask[q], cMask[q]);
 			}
 		}
 
@@ -307,6 +308,7 @@
 					for(unsigned int q = sampleLoopInit; q < sampleLoopEnd; q++)
 					{
 						depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
+						depthBoundsTest(zBuffer, q, x, zMask[q], cMask[q]);
 					}
 				}
 
@@ -604,6 +606,58 @@
 	}
 }
 
+Int4 PixelRoutine::depthBoundsTest16(const Pointer<Byte> &zBuffer, int q, const Int &x)
+{
+	Pointer<Byte> buffer = zBuffer + 2 * x;
+	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
+
+	if(q > 0)
+	{
+		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
+	}
+
+	Float4 minDepthBound(state.minDepthBounds);
+	Float4 maxDepthBound(state.maxDepthBounds);
+
+	Int2 z;
+	z = Insert(z, *Pointer<Int>(buffer), 0);
+	z = Insert(z, *Pointer<Int>(buffer + pitch), 1);
+
+	Float4 zValue = convertFloat32(As<UShort4>(z));
+	return Int4(CmpLE(minDepthBound, zValue) & CmpLE(zValue, maxDepthBound));
+}
+
+Int4 PixelRoutine::depthBoundsTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x)
+{
+	Pointer<Byte> buffer = zBuffer + 4 * x;
+	Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
+
+	if(q > 0)
+	{
+		buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
+	}
+
+	Float4 zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
+	return Int4(CmpLE(Float4(state.minDepthBounds), zValue) & CmpLE(zValue, Float4(state.maxDepthBounds)));
+}
+
+void PixelRoutine::depthBoundsTest(const Pointer<Byte> &zBuffer, int q, const Int &x, Int &zMask, Int &cMask)
+{
+	if(state.depthBoundsTestActive)
+	{
+		Int4 zTest = (state.depthFormat == VK_FORMAT_D16_UNORM) ? depthBoundsTest16(zBuffer, q, x) : depthBoundsTest32F(zBuffer, q, x);
+
+		if(!state.depthTestActive)
+		{
+			cMask &= zMask & SignMask(zTest);
+		}
+		else
+		{
+			zMask &= cMask & SignMask(zTest);
+		}
+	}
+}
+
 void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha, int sampleId)
 {
 	static const int a2c[4] = {
@@ -2788,6 +2842,11 @@
 	return UShort4(cf * Float4(0xFFFF), saturate);
 }
 
+Float4 PixelRoutine::convertFloat32(const UShort4 &cf)
+{
+	return Float4(cf) * Float4(1.0f / 65535.0f);
+}
+
 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
 {
 	Pointer<Byte> LUT = constants + OFFSET(Constants, sRGBtoLinear12_16);
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index 46e0362..800f708 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -63,6 +63,7 @@
 
 	bool isSRGB(int index) const;
 	UShort4 convertFixed16(const Float4 &cf, bool saturate = true);
+	Float4 convertFloat32(const UShort4 &cf);
 	void linearToSRGB12_16(Vector4s &c);
 
 private:
@@ -72,6 +73,7 @@
 	void stencilOperation(Byte8 &newValue, const Byte8 &bufferValue, const PixelProcessor::States::StencilOpState &ops, bool isBack, const Int &zMask, const Int &sMask);
 	void stencilOperation(Byte8 &output, const Byte8 &bufferValue, VkStencilOp operation, bool isBack);
 	Bool depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask);
+	void depthBoundsTest(const Pointer<Byte> &zBuffer, int q, const Int &x, Int &zMask, Int &cMask);
 
 	// Raster operations
 	void blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorActive);
@@ -91,6 +93,9 @@
 
 	void writeDepth32F(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask);
 	void writeDepth16(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask);
+
+	Int4 depthBoundsTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x);
+	Int4 depthBoundsTest16(const Pointer<Byte> &zBuffer, int q, const Int &x);
 };
 
 }  // namespace sw
diff --git a/src/Renderer/PixelProcessor.hpp b/src/Renderer/PixelProcessor.hpp
index 2fc292a..2c54398 100644
--- a/src/Renderer/PixelProcessor.hpp
+++ b/src/Renderer/PixelProcessor.hpp
@@ -62,6 +62,7 @@
 			bool stencilWriteMaskedCCW                : 1;
 
 			bool depthTestActive                      : 1;
+			bool depthBoundsTestActive                : 1;
 			bool fogActive                            : 1;
 			FogMode pixelFogMode                      : BITS(FOG_LAST);
 			bool specularAdd                          : 1;
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 7e72f34..dc507f8 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -49,7 +49,7 @@
 		VK_FALSE,  // depthClamp
 		VK_TRUE,   // depthBiasClamp
 		VK_TRUE,   // fillModeNonSolid
-		VK_FALSE,  // depthBounds
+		VK_TRUE,   // depthBounds
 		VK_FALSE,  // wideLines
 		VK_TRUE,   // largePoints
 		VK_FALSE,  // alphaToOne