SpirvShader: Add a single-entry, last-used sampler cache

This complements the const-cache in 34348.

Timings for the Glass demo running on a i7-4930K:

this change:               24.25 FPS
this change without 34528: 23.02 FPS
parent change (inc 34528): 22.46 FPS

Bug: b/137649247
Change-Id: I206cdaabfaf63da7f67e3cd5f6823f3343b823c8
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/34528
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index dd04fc9..b0759c5 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -2057,6 +2057,25 @@
 				routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
 				break;
 			}
+
+			case spv::OpImageDrefGather:
+			case spv::OpImageFetch:
+			case spv::OpImageGather:
+			case spv::OpImageQueryLod:
+			case spv::OpImageSampleDrefExplicitLod:
+			case spv::OpImageSampleDrefImplicitLod:
+			case spv::OpImageSampleExplicitLod:
+			case spv::OpImageSampleImplicitLod:
+			case spv::OpImageSampleProjDrefExplicitLod:
+			case spv::OpImageSampleProjDrefImplicitLod:
+			case spv::OpImageSampleProjExplicitLod:
+			case spv::OpImageSampleProjImplicitLod:
+			{
+				Object::ID resultId = insn.word(2);
+				routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{});
+				break;
+			}
+
 			default:
 				// Nothing else produces interface variables, so can all be safely ignored.
 				break;
@@ -5013,10 +5032,20 @@
 			in[i] = sampleValue.Float(0);
 		}
 
-		auto samplerFunc = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
+		auto cacheIt = state->routine->samplerCache.find(resultId);
+		ASSERT(cacheIt != state->routine->samplerCache.end());
+		auto &cache = cacheIt->second;
+		auto cacheHit = cache.imageDescriptor == imageDescriptor && cache.sampler == sampler;
+
+		If(!cacheHit)
+		{
+			cache.function = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
+			cache.imageDescriptor = imageDescriptor;
+			cache.sampler = sampler;
+		}
 
 		Array<SIMD::Float> out(4);
-		Call<ImageSampler>(samplerFunc, texture, sampler, &in[0], &out[0], state->routine->constants);
+		Call<ImageSampler>(cache.function, texture, sampler, &in[0], &out[0], state->routine->constants);
 
 		for (auto i = 0u; i < resultType.sizeInComponents; i++) { result.move(i, out[i]); }
 
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 7c850ba..c1d5e62 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -1251,10 +1251,17 @@
 
 		using Variable = Array<SIMD::Float>;
 
+		struct SamplerCache
+		{
+			Pointer<Byte> imageDescriptor;
+			Pointer<Byte> sampler;
+			Pointer<Byte> function;
+		};
+
 		vk::PipelineLayout const * const pipelineLayout;
 
 		std::unordered_map<SpirvShader::Object::ID, Variable> variables;
-
+		std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache;
 		Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
 		Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};
 
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 106ac35..7bf902e 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -1953,6 +1953,12 @@
 		return V(jit->builder->CreateBitCast(V(v), T(destType)));
 	}
 
+	Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
+	{
+		RR_DEBUG_INFO_UPDATE_LOC();
+		return V(jit->builder->CreateICmpEQ(V(lhs), V(rhs)));
+	}
+
 	Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
 	{
 		RR_DEBUG_INFO_UPDATE_LOC();
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index cc20e27..9f66115 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -230,6 +230,7 @@
 		static Value *createBitCast(Value *V, Type *destType);
 
 		// Compare instructions
+		static Value *createPtrEQ(Value *lhs, Value *rhs);
 		static Value *createICmpEQ(Value *lhs, Value *rhs);
 		static Value *createICmpNE(Value *lhs, Value *rhs);
 		static Value *createICmpUGT(Value *lhs, Value *rhs);
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 5add9cd..20cf3b0 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2356,6 +2356,12 @@
 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset);
 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset);
 
+	template <typename T>
+	RValue<Bool> operator==(const Pointer<T> &lhs, const Pointer<T> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createPtrEQ(lhs.loadValue(), rhs.loadValue()));
+	}
+
 	template<typename T>
 	RValue<T> Load(RValue<Pointer<T>> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
 	{
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index 9bc1227c..d4df0f9 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -1072,6 +1072,39 @@
 
 }
 
+TEST(ReactorUnitTests, PointersEqual)
+{
+	Function<Int(Pointer<Byte>, Pointer<Byte>)> function;
+	{
+		Pointer<Byte> ptrA = function.Arg<0>();
+		Pointer<Byte> ptrB = function.Arg<1>();
+		If (ptrA == ptrB)
+		{
+			Return(1);
+		}
+		Else
+		{
+			Return(0);
+		}
+	}
+
+	auto routine = function("one");
+	auto equal = (int(*)(void*, void*))routine->getEntry();
+	int* a = reinterpret_cast<int*>(uintptr_t(0x0000000000000000));
+	int* b = reinterpret_cast<int*>(uintptr_t(0x00000000F0000000));
+	int* c = reinterpret_cast<int*>(uintptr_t(0xF000000000000000));
+	EXPECT_EQ(equal(&a, &a), 1);
+	EXPECT_EQ(equal(&b, &b), 1);
+	EXPECT_EQ(equal(&c, &c), 1);
+
+	EXPECT_EQ(equal(&a, &b), 0);
+	EXPECT_EQ(equal(&b, &a), 0);
+	EXPECT_EQ(equal(&b, &c), 0);
+	EXPECT_EQ(equal(&c, &b), 0);
+	EXPECT_EQ(equal(&c, &a), 0);
+	EXPECT_EQ(equal(&a, &c), 0);
+}
+
 TEST(ReactorUnitTests, Call)
 {
 	if (!rr::Caps.CallSupported)
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 0cf2370..a1894ce 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -1222,6 +1222,11 @@
 		return V(result);
 	}
 
+	Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
+	{
+		return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
+	}
+
 	Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
 	{
 		return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);