Implement GLSLstd450Interpolate* functions

This cl adds an implementation for:
- GLSLstd450InterpolateAtCentroid
- GLSLstd450InterpolateAtSample
- GLSLstd450InterpolateAtOffset

These functions essentially replicate the behavior of
regular interpolants in the fragment shader processing.

A specific extra difficulty encountered here is detecting
which kind of pointer offset we are dealing with. Pointer
offsets might be caused by [] operators being used on a
vector or on an array (possibly an array of vectors). This
distinction is important as it impacts what interpolant
offsets point to. Note that there's missing coverage in
dEQP-VK for interpolant arrays and this was caught with
SwANGLE tests (a dEQP-VK issue will be logged shortly).

Another issue was dealing with dynamic interpolant offsets,
which was solved by looping over all of them and combining
all plane equations into one before performing the
interpolation.

Bug: b/171415086
Change-Id: Id7c4c931918ba172d00da84655051445b110d3a9
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/51737
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Commit-Queue: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index bdc3da7..c42af5f 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -192,7 +192,7 @@
 	auto storesAndAtomicsMask = (sampleId >= 0) ? maskAny(cMask[sampleId], sMask[sampleId], zMask[sampleId]) : maskAny(cMask, sMask, zMask);
 	routine.killMask = 0;
 
-	spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets);
+	spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets, state.multiSampleCount);
 	spirvShader->emitEpilog(&routine);
 	if((sampleId < 0) || (sampleId == static_cast<int>(state.multiSampleCount - 1)))
 	{
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index ce3b0cb..66f0e21 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -1629,9 +1629,9 @@
 	}
 }
 
-void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const
+void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount) const
 {
-	EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, robustBufferAccess, executionModel);
+	EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, robustBufferAccess, multiSampleCount, executionModel);
 
 	dbgBeginEmit(&state);
 	defer(dbgEndEmit(&state));
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 84db5fd..7af156a 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -783,7 +783,7 @@
 	std::vector<InterfaceComponent> outputs;
 
 	void emitProlog(SpirvRoutine *routine) const;
-	void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
+	void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount = 0) const;
 	void emitEpilog(SpirvRoutine *routine) const;
 	void clearPhis(SpirvRoutine *routine) const;
 
@@ -917,6 +917,7 @@
 		          RValue<SIMD::Int> storesAndAtomicsMask,
 		          const vk::DescriptorSet::Bindings &descriptorSets,
 		          bool robustBufferAccess,
+		          unsigned int multiSampleCount,
 		          spv::ExecutionModel executionModel)
 		    : routine(routine)
 		    , function(function)
@@ -924,6 +925,7 @@
 		    , storesAndAtomicsMaskValue(storesAndAtomicsMask.value())
 		    , descriptorSets(descriptorSets)
 		    , robustBufferAccess(robustBufferAccess)
+		    , multiSampleCount(multiSampleCount)
 		    , executionModel(executionModel)
 		{
 			ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0));  // Must parse OpEntryPoint before emitting.
@@ -985,6 +987,8 @@
 
 		OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
 
+		unsigned int getMultiSampleCount() const { return multiSampleCount; }
+
 		Intermediate &createIntermediate(Object::ID id, uint32_t componentCount)
 		{
 			auto it = intermediates.emplace(std::piecewise_construct,
@@ -1019,6 +1023,7 @@
 		std::unordered_map<Object::ID, SIMD::Pointer> pointers;
 
 		const bool robustBufferAccess = true;  // Emit robustBufferAccess safe code.
+		const unsigned int multiSampleCount = 0;
 		const spv::ExecutionModel executionModel = spv::ExecutionModelMax;
 	};
 
@@ -1229,7 +1234,16 @@
 	void EvalSpecConstantUnaryOp(InsnIterator insn);
 	void EvalSpecConstantBinaryOp(InsnIterator insn);
 
+	// Fragment input interpolation functions
 	uint32_t GetNumInputComponents(int32_t location) const;
+	enum InterpolationType
+	{
+		Centroid,
+		AtSample,
+		AtOffset,
+	};
+	SIMD::Float Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId, uint32_t component,
+	                        uint32_t component_count, EmitState *state, InterpolationType type) const;
 
 	// Helper for implementing OpStore, which doesn't take an InsnIterator so it
 	// can also store independent operands.
diff --git a/src/Pipeline/SpirvShaderGLSLstd450.cpp b/src/Pipeline/SpirvShaderGLSLstd450.cpp
index e43a298..7455029 100644
--- a/src/Pipeline/SpirvShaderGLSLstd450.cpp
+++ b/src/Pipeline/SpirvShaderGLSLstd450.cpp
@@ -16,6 +16,7 @@
 
 #include "ShaderCore.hpp"
 #include "Device/Primitive.hpp"
+#include "Pipeline/Constants.hpp"
 
 #include <spirv/unified1/GLSL.std.450.h>
 #include <spirv/unified1/spirv.hpp>
@@ -42,6 +43,17 @@
 	return interpolant;
 }
 
+// TODO(b/179925303): Eliminate when interpolants are tightly packed.
+uint32_t ComputeInterpolantOffset(uint32_t offset, uint32_t components_per_row, bool useArrayOffset)
+{
+	if(useArrayOffset)
+	{
+		uint32_t interpolant_offset = offset / components_per_row;
+		offset = (interpolant_offset * 4) + (offset - interpolant_offset * components_per_row);
+	}
+	return offset;
+}
+
 }  // namespace
 
 namespace sw {
@@ -900,17 +912,35 @@
 		}
 		case GLSLstd450InterpolateAtCentroid:
 		{
-			UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
+			Decorations d;
+			ApplyDecorationsForId(&d, insn.word(5));
+			auto ptr = state->getPointer(insn.word(5));
+			for(auto i = 0u; i < type.componentCount; i++)
+			{
+				dst.move(i, Interpolate(ptr, d.Location, 0, i, type.componentCount, state, SpirvShader::Centroid));
+			}
 			break;
 		}
 		case GLSLstd450InterpolateAtSample:
 		{
-			UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
+			Decorations d;
+			ApplyDecorationsForId(&d, insn.word(5));
+			auto ptr = state->getPointer(insn.word(5));
+			for(auto i = 0u; i < type.componentCount; i++)
+			{
+				dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, type.componentCount, state, SpirvShader::AtSample));
+			}
 			break;
 		}
 		case GLSLstd450InterpolateAtOffset:
 		{
-			UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
+			Decorations d;
+			ApplyDecorationsForId(&d, insn.word(5));
+			auto ptr = state->getPointer(insn.word(5));
+			for(auto i = 0u; i < type.componentCount; i++)
+			{
+				dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, type.componentCount, state, SpirvShader::AtOffset));
+			}
 			break;
 		}
 		case GLSLstd450NMin:
@@ -953,6 +983,97 @@
 	return EmitResult::Continue;
 }
 
+SIMD::Float SpirvShader::Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId, uint32_t component,
+                                     uint32_t component_count, EmitState *state, InterpolationType type) const
+{
+	uint32_t interpolant = (location * 4);
+	uint32_t components_per_row = GetNumInputComponents(location);
+	if((location < 0) || (interpolant >= inputs.size()) || (components_per_row == 0))
+	{
+		return SIMD::Float(0.0f);
+	}
+
+	// Distinguish between the operator[] being used on a vector of on an array
+	// If the number of components of the interpolant is 1, then the operator[] automatically means this is an array.
+	// Otherwise, if the component_count is 1, than the operator[] can be the result of this operator being called
+	// from a vec2, vec3 or vec4, so a component_count greater than 1 means any offset is for an array
+	bool useArrayOffset = (components_per_row == 1) || (component_count > 1);
+
+	const auto &interpolationData = state->routine->interpolationData;
+
+	SIMD::Float x;
+	SIMD::Float y;
+	SIMD::Float rhw;
+
+	switch(type)
+	{
+		case Centroid:
+			x = interpolationData.xCentroid;
+			y = interpolationData.yCentroid;
+			rhw = interpolationData.rhwCentroid;
+			break;
+		case AtSample:
+			x = SIMD::Float(0.0f);
+			y = SIMD::Float(0.0f);
+
+			if(state->getMultiSampleCount() > 1)
+			{
+				static constexpr int NUM_SAMPLES = 4;
+				ASSERT(state->getMultiSampleCount() == NUM_SAMPLES);
+
+				Array<Float> sampleX(NUM_SAMPLES);
+				Array<Float> sampleY(NUM_SAMPLES);
+				for(int i = 0; i < NUM_SAMPLES; ++i)
+				{
+					sampleX[i] = Constants::SampleLocationsX[i];
+					sampleY[i] = Constants::SampleLocationsY[i];
+				}
+
+				auto sampleOperand = Operand(this, state, paramId);
+				ASSERT(sampleOperand.componentCount == 1);
+
+				// If sample does not exist, the position used to interpolate the
+				// input variable is undefined, so we just clamp to avoid OOB accesses.
+				SIMD::Int samples = sampleOperand.Int(0) & SIMD::Int(NUM_SAMPLES - 1);
+
+				for(int i = 0; i < SIMD::Width; ++i)
+				{
+					Int sample = Extract(samples, i);
+					x = Insert(x, sampleX[sample], i);
+					y = Insert(y, sampleY[sample], i);
+				}
+			}
+
+			x += interpolationData.x;
+			y += interpolationData.y;
+			rhw = interpolationData.rhw;
+			break;
+		case AtOffset:
+		{
+			//  An offset of (0, 0) identifies the center of the pixel.
+			auto offset = Operand(this, state, paramId);
+			ASSERT(offset.componentCount == 2);
+
+			x = interpolationData.x + offset.Float(0);
+			y = interpolationData.y + offset.Float(1);
+			rhw = interpolationData.rhw;
+		}
+		break;
+		default:
+			UNREACHABLE("Unknown interpolation type: %d", (int)type);
+			return SIMD::Float(0.0f);
+	}
+
+	uint32_t offset = ComputeInterpolantOffset((ptr.staticOffsets[0] >> 2) + component, components_per_row, useArrayOffset);
+	if((interpolant + offset) >= inputs.size())
+	{
+		return SIMD::Float(0.0f);
+	}
+
+	Pointer<Byte> planeEquation = interpolationData.primitive + OFFSET(Primitive, V[interpolant]) + offset * sizeof(PlaneEquation);
+	return SpirvRoutine::interpolateAtXY(x, y, rhw, planeEquation, false, true);
+}
+
 SIMD::Float SpirvRoutine::interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
 {
 	SIMD::Float A;