Implement OpImageGather

Bug: b/129523279
Tests: dEQP-VK.glsl.texture_gather.*
Change-Id: Ie87e600bd787fa832beaf834289f64ef3b590bd8
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/31869
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Device/Sampler.hpp b/src/Device/Sampler.hpp
index cbe29d8..65313ba 100644
--- a/src/Device/Sampler.hpp
+++ b/src/Device/Sampler.hpp
@@ -145,6 +145,7 @@
 		AddressingMode addressingModeW;
 		MipmapType mipmapFilter;
 		VkComponentMapping swizzle;
+		int gatherComponent;
 		bool highPrecisionFiltering;
 		bool compareEnable;
 		VkCompareOp compareOp;
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index fbb74ef..e4ba3e1 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -115,13 +115,13 @@
 			// TODO: Eliminate int-float-int conversion.
 			lod = Float(As<Int>(Float(lodOrBias.x)));
 		}
-		else if(function == Base)
+		else if(function == Base || function == Gather)
 		{
 			lod = Float(0);
 		}
 		else UNREACHABLE("Sampler function %d", int(function));
 
-		if(function != Base && function != Fetch)
+		if(function != Base && function != Fetch && function != Gather)
 		{
 			lod += *Pointer<Float>(sampler + OFFSET(vk::Sampler, mipLodBias));
 
@@ -151,7 +151,7 @@
 		bool seamlessCube = (state.addressingModeU == ADDRESSING_SEAMLESS);
 		bool use32BitFiltering = hasFloatTexture() || hasUnnormalizedIntegerTexture() || force32BitFiltering ||
 		                         seamlessCube || state.unnormalizedCoordinates || state.compareEnable || state.largeTexture ||
-		                         borderModeActive();
+		                         borderModeActive() || (function == Gather);
 
 		if(use32BitFiltering)
 		{
@@ -242,17 +242,37 @@
 			}
 		}
 
-		if((state.swizzle.r != VK_COMPONENT_SWIZZLE_R) ||
-			(state.swizzle.g != VK_COMPONENT_SWIZZLE_G) ||
-			(state.swizzle.b != VK_COMPONENT_SWIZZLE_B) ||
-			(state.swizzle.a != VK_COMPONENT_SWIZZLE_A))
+		if(state.textureFilter != FILTER_GATHER)
 		{
-			const Vector4f col(c);
-			auto integer = hasUnnormalizedIntegerTexture();
-			applySwizzle(state.swizzle.r, c.x, col, integer);
-			applySwizzle(state.swizzle.g, c.y, col, integer);
-			applySwizzle(state.swizzle.b, c.z, col, integer);
-			applySwizzle(state.swizzle.a, c.w, col, integer);
+			if((state.swizzle.r != VK_COMPONENT_SWIZZLE_R) ||
+			   (state.swizzle.g != VK_COMPONENT_SWIZZLE_G) ||
+			   (state.swizzle.b != VK_COMPONENT_SWIZZLE_B) ||
+			   (state.swizzle.a != VK_COMPONENT_SWIZZLE_A))
+			{
+				const Vector4f col(c);
+				bool integer = hasUnnormalizedIntegerTexture();
+				applySwizzle(state.swizzle.r, c.x, col, integer);
+				applySwizzle(state.swizzle.g, c.y, col, integer);
+				applySwizzle(state.swizzle.b, c.z, col, integer);
+				applySwizzle(state.swizzle.a, c.w, col, integer);
+			}
+		}
+		else  // Gather
+		{
+			VkComponentSwizzle swizzle = gatherSwizzle();
+
+			// R/G/B/A swizzles affect the component collected from each texel earlier.
+			// Handle the ZERO and ONE cases here because we don't need to know the format.
+
+			if(swizzle == VK_COMPONENT_SWIZZLE_ZERO)
+			{
+				c.x = c.y = c.z = c.w = Float4(0);
+			}
+			else if(swizzle == VK_COMPONENT_SWIZZLE_ONE)
+			{
+				bool integer = hasUnnormalizedIntegerTexture();
+				c.x = c.y = c.z = c.w = integer ? As<Float4>(Int4(1)) : RValue<Float4>(Float4(1.0f));
+			}
 		}
 
 		return c;
@@ -597,10 +617,20 @@
 			}
 			else  // Gather
 			{
-				c.x = c10.x;
-				c.y = c01.x;
-				c.z = c11.x;
-				c.w = c00.x;
+				VkComponentSwizzle swizzle = gatherSwizzle();
+				switch(swizzle)
+				{
+				case VK_COMPONENT_SWIZZLE_ZERO:
+				case VK_COMPONENT_SWIZZLE_ONE:
+					// Handled at the final component swizzle.
+					break;
+				default:
+					c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
+					break;
+				}
 			}
 		}
 
@@ -885,10 +915,20 @@
 			}
 			else  // Gather
 			{
-				c.x = c10.x;
-				c.y = c01.x;
-				c.z = c11.x;
-				c.w = c00.x;
+				VkComponentSwizzle swizzle = gatherSwizzle();
+				switch(swizzle)
+				{
+				case VK_COMPONENT_SWIZZLE_ZERO:
+				case VK_COMPONENT_SWIZZLE_ONE:
+					// Handled at the final component swizzle.
+					break;
+				default:
+					c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
+					c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
+					break;
+				}
 			}
 		}
 
@@ -2024,6 +2064,20 @@
 		}
 	}
 
+	// TODO: Eliminate when the gather + mirror addressing case is handled by mirroring the footprint.
+	static Int4 mirror(Int4 n)
+	{
+		auto positive = CmpNLT(n, Int4(0));
+		return (positive & n) | (~positive & (-(Int4(1) + n)));
+	}
+
+	static Int4 mod(Int4 n, Int4 d)
+	{
+		auto x = n % d;
+		auto positive = CmpNLT(x, Int4(0));
+		return (positive & x) | (~positive & (x + d));
+	}
+
 	void SamplerCore::address(Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function)
 	{
 		if(addressingMode == ADDRESSING_UNUSED)
@@ -2071,51 +2125,77 @@
 					break;
 				}
 			}
+			else if(state.textureFilter == FILTER_GATHER && addressingMode == ADDRESSING_MIRROR)
+			{
+				// Gather requires the 'footprint' of the texels from which a component is taken, to also mirror around.
+				// Therefore we can't just compute one texel's location and find the other ones at +1 offsets from it.
+				// Here we handle that case separately by doing the mirroring per texel coordinate.
+				// TODO: Mirror the footprint by adjusting the sign of the 0.5f and 1 offsets.
+
+				coord = coord * Float4(dim);
+				coord -= Float4(0.5f);
+				Float4 floor = Floor(coord);
+				xyz0 = Int4(floor);
+
+				if(function.option == Offset)
+				{
+					xyz0 += As<Int4>(texOffset);
+				}
+
+				xyz1 = xyz0 + Int4(1);
+
+				xyz0 = (maxXYZ) - mirror(mod(xyz0, Int4(2) * dim) - dim);
+				xyz1 = (maxXYZ) - mirror(mod(xyz1, Int4(2) * dim) - dim);
+
+				return;
+			}
 			else
 			{
-				switch(addressingMode)
+				if(function.option != Offset)
 				{
-				case ADDRESSING_CLAMP:
-				case ADDRESSING_SEAMLESS:
-					// Linear filtering of cube doesn't require clamping because the coordinates
-					// are already in [0, 1] range and numerical imprecision is tolerated.
-					if(addressingMode != ADDRESSING_SEAMLESS || pointFilter)
+					switch(addressingMode)
 					{
-						Float4 one = As<Float4>(Int4(oneBits));
-						coord = Min(Max(coord, Float4(0.0f)), one);
+					case ADDRESSING_CLAMP:
+					case ADDRESSING_SEAMLESS:
+						// Linear filtering of cube doesn't require clamping because the coordinates
+						// are already in [0, 1] range and numerical imprecision is tolerated.
+						if(addressingMode != ADDRESSING_SEAMLESS || pointFilter)
+						{
+							Float4 one = As<Float4>(Int4(oneBits));
+							coord = Min(Max(coord, Float4(0.0f)), one);
+						}
+						break;
+					case ADDRESSING_MIRROR:
+						{
+							Float4 half = As<Float4>(Int4(halfBits));
+							Float4 one = As<Float4>(Int4(oneBits));
+							Float4 two = As<Float4>(Int4(twoBits));
+							coord = one - Abs(two * Frac(coord * half) - one);
+						}
+						break;
+					case ADDRESSING_MIRRORONCE:
+						{
+							Float4 half = As<Float4>(Int4(halfBits));
+							Float4 one = As<Float4>(Int4(oneBits));
+							Float4 two = As<Float4>(Int4(twoBits));
+							coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one);
+						}
+						break;
+					case ADDRESSING_BORDER:
+						// Don't map to a valid range here.
+						break;
+					default:   // Wrap
+						coord = Frac(coord);
+						break;
 					}
-					break;
-				case ADDRESSING_MIRROR:
-					{
-						Float4 half = As<Float4>(Int4(halfBits));
-						Float4 one = As<Float4>(Int4(oneBits));
-						Float4 two = As<Float4>(Int4(twoBits));
-						coord = one - Abs(two * Frac(coord * half) - one);
-					}
-					break;
-				case ADDRESSING_MIRRORONCE:
-					{
-						Float4 half = As<Float4>(Int4(halfBits));
-						Float4 one = As<Float4>(Int4(oneBits));
-						Float4 two = As<Float4>(Int4(twoBits));
-						coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one);
-					}
-					break;
-				case ADDRESSING_BORDER:
-					// Don't map to a valid range here.
-					break;
-				default:   // Wrap
-					coord = Frac(coord);
-					break;
 				}
 
 				coord = coord * Float4(dim);
 			}
 
-			if(state.textureFilter == FILTER_POINT ||
-			   state.textureFilter == FILTER_GATHER)
+			if(state.textureFilter == FILTER_POINT)
 			{
-				if(addressingMode == ADDRESSING_BORDER)
+				if(addressingMode == ADDRESSING_BORDER || function.option == Offset)
 				{
 					xyz0 = Int4(Floor(coord));
 				}
@@ -2176,8 +2256,8 @@
 					xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ);
 					break;
 				default:   // Wrap
-					xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
-					xyz1 = (xyz1 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
+					xyz0 = mod(xyz0, dim);
+					xyz1 = mod(xyz1, dim);
 					break;
 				}
 			}
@@ -2292,4 +2372,18 @@
 		       state.addressingModeV == ADDRESSING_BORDER ||
 		       state.addressingModeW == ADDRESSING_BORDER;
 	}
+
+	VkComponentSwizzle SamplerCore::gatherSwizzle() const
+	{
+		switch(state.gatherComponent)
+		{
+		case 0: return state.swizzle.r;
+		case 1: return state.swizzle.g;
+		case 2: return state.swizzle.b;
+		case 3: return state.swizzle.a;
+		default:
+			UNREACHABLE("Invalid component");
+			return VK_COMPONENT_SWIZZLE_R;
+		}
+	}
 }
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp
index bcca3a0..068139b 100644
--- a/src/Pipeline/SamplerCore.hpp
+++ b/src/Pipeline/SamplerCore.hpp
@@ -36,7 +36,8 @@
 		Fetch,     // Use provided integer coordinates.
 		Base,      // Sample base level.
 		Query,     // Return implicit LOD.
-		SAMPLER_METHOD_LAST = Query,
+		Gather,    // Return one channel of each texel in footprint.
+		SAMPLER_METHOD_LAST = Gather,
 	};
 
 	enum SamplerOption
@@ -109,6 +110,7 @@
 		bool hasYuvFormat() const;
 		bool isRGBComponent(int component) const;
 		bool borderModeActive() const;
+		VkComponentSwizzle gatherSwizzle() const;
 
 		Pointer<Byte> &constants;
 		const Sampler &state;
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 755487e..e2362b2 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -935,6 +935,8 @@
 			case spv::OpImageSampleProjExplicitLod:
 			case spv::OpImageSampleProjDrefImplicitLod:
 			case spv::OpImageSampleProjDrefExplicitLod:
+			case spv::OpImageGather:
+			case spv::OpImageDrefGather:
 			case spv::OpImageFetch:
 			case spv::OpImageQuerySizeLod:
 			case spv::OpImageQuerySize:
@@ -2489,6 +2491,12 @@
 		case spv::OpImageSampleProjDrefExplicitLod:
 			return EmitImageSampleExplicitLod(ProjDref, insn, state);
 
+		case spv::OpImageGather:
+			return EmitImageGather(None, insn, state);
+
+		case spv::OpImageDrefGather:
+			return EmitImageGather(Dref, insn, state);
+
 		case spv::OpImageFetch:
 			return EmitImageFetch(insn, state);
 
@@ -4659,6 +4667,14 @@
 		return EmitImageSample({variant, Implicit}, insn, state);
 	}
 
+	SpirvShader::EmitResult SpirvShader::EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const
+	{
+		ImageInstruction instruction = {variant, Gather};
+		instruction.gatherComponent = !instruction.isDref() ? getObject(insn.word(5)).constantValue[0] : 0;
+
+		return EmitImageSample(instruction, insn, state);
+	}
+
 	SpirvShader::EmitResult SpirvShader::EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
 	{
 		auto isDref = (variant == Dref) || (variant == ProjDref);
@@ -4714,7 +4730,7 @@
 		Object::ID offsetId = 0;
 		bool sample = false;
 
-		uint32_t operand = instruction.isDref() ? 6 : 5;
+		uint32_t operand = (instruction.isDref() || instruction.samplerMethod == Gather) ? 6 : 5;
 
 		if(insn.wordCount() > operand)
 		{
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index c8b9f93..cad734d 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -524,6 +524,7 @@
 					uint32_t variant : BITS(VARIANT_LAST);
 					uint32_t samplerMethod : BITS(SAMPLER_METHOD_LAST);
 					uint32_t samplerOption : BITS(SAMPLER_OPTION_LAST);
+					uint32_t gatherComponent : 2;
 
 					// Parameters are passed to the sampling routine in this order:
 					uint32_t coordinates : 3;       // 1-4 (does not contain projection component)
@@ -937,6 +938,7 @@
 		EmitResult EmitPhi(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const;
 		EmitResult EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const;
+		EmitResult EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const;
 		EmitResult EmitImageFetch(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const;
 		EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const;
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index 9d3dc8a..0d0e373 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -78,7 +78,7 @@
 	Sampler samplerState = {};
 	samplerState.textureType = convertTextureType(type);
 	samplerState.textureFormat = imageDescriptor->format;
-	samplerState.textureFilter = convertFilterMode(sampler);
+	samplerState.textureFilter = (instruction.samplerMethod == Gather) ? FILTER_GATHER : convertFilterMode(sampler);
 	samplerState.border = sampler->borderColor;
 
 	samplerState.addressingModeU = convertAddressingMode(0, sampler->addressModeU, type);
@@ -87,6 +87,7 @@
 
 	samplerState.mipmapFilter = convertMipmapMode(sampler);
 	samplerState.swizzle = imageDescriptor->swizzle;
+	samplerState.gatherComponent = instruction.gatherComponent;
 	samplerState.highPrecisionFiltering = false;
 	samplerState.compareEnable = (sampler->compareEnable == VK_TRUE);
 	samplerState.compareOp = sampler->compareOp;