SpirvShader: Add a single-entry, last-used sampler cache
This complements the const-cache in 34348.
Timings for the Glass demo running on a i7-4930K:
this change: 24.25 FPS
this change without 34528: 23.02 FPS
parent change (inc 34528): 22.46 FPS
Bug: b/137649247
Change-Id: I206cdaabfaf63da7f67e3cd5f6823f3343b823c8
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/34528
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index dd04fc9..b0759c5 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -2057,6 +2057,25 @@
routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
break;
}
+
+ case spv::OpImageDrefGather:
+ case spv::OpImageFetch:
+ case spv::OpImageGather:
+ case spv::OpImageQueryLod:
+ case spv::OpImageSampleDrefExplicitLod:
+ case spv::OpImageSampleDrefImplicitLod:
+ case spv::OpImageSampleExplicitLod:
+ case spv::OpImageSampleImplicitLod:
+ case spv::OpImageSampleProjDrefExplicitLod:
+ case spv::OpImageSampleProjDrefImplicitLod:
+ case spv::OpImageSampleProjExplicitLod:
+ case spv::OpImageSampleProjImplicitLod:
+ {
+ Object::ID resultId = insn.word(2);
+ routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{});
+ break;
+ }
+
default:
// Nothing else produces interface variables, so can all be safely ignored.
break;
@@ -5013,10 +5032,20 @@
in[i] = sampleValue.Float(0);
}
- auto samplerFunc = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
+ auto cacheIt = state->routine->samplerCache.find(resultId);
+ ASSERT(cacheIt != state->routine->samplerCache.end());
+ auto &cache = cacheIt->second;
+ auto cacheHit = cache.imageDescriptor == imageDescriptor && cache.sampler == sampler;
+
+ If(!cacheHit)
+ {
+ cache.function = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
+ cache.imageDescriptor = imageDescriptor;
+ cache.sampler = sampler;
+ }
Array<SIMD::Float> out(4);
- Call<ImageSampler>(samplerFunc, texture, sampler, &in[0], &out[0], state->routine->constants);
+ Call<ImageSampler>(cache.function, texture, sampler, &in[0], &out[0], state->routine->constants);
for (auto i = 0u; i < resultType.sizeInComponents; i++) { result.move(i, out[i]); }
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 7c850ba..c1d5e62 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -1251,10 +1251,17 @@
using Variable = Array<SIMD::Float>;
+ struct SamplerCache
+ {
+ Pointer<Byte> imageDescriptor;
+ Pointer<Byte> sampler;
+ Pointer<Byte> function;
+ };
+
vk::PipelineLayout const * const pipelineLayout;
std::unordered_map<SpirvShader::Object::ID, Variable> variables;
-
+ std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache;
Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 106ac35..7bf902e 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -1953,6 +1953,12 @@
return V(jit->builder->CreateBitCast(V(v), T(destType)));
}
+ Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
+ {
+ RR_DEBUG_INFO_UPDATE_LOC();
+ return V(jit->builder->CreateICmpEQ(V(lhs), V(rhs)));
+ }
+
Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
{
RR_DEBUG_INFO_UPDATE_LOC();
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index cc20e27..9f66115 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -230,6 +230,7 @@
static Value *createBitCast(Value *V, Type *destType);
// Compare instructions
+ static Value *createPtrEQ(Value *lhs, Value *rhs);
static Value *createICmpEQ(Value *lhs, Value *rhs);
static Value *createICmpNE(Value *lhs, Value *rhs);
static Value *createICmpUGT(Value *lhs, Value *rhs);
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 5add9cd..20cf3b0 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2356,6 +2356,12 @@
RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset);
RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset);
+ template <typename T>
+ RValue<Bool> operator==(const Pointer<T> &lhs, const Pointer<T> &rhs)
+ {
+ return RValue<Bool>(Nucleus::createPtrEQ(lhs.loadValue(), rhs.loadValue()));
+ }
+
template<typename T>
RValue<T> Load(RValue<Pointer<T>> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
{
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index 9bc1227c..d4df0f9 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -1072,6 +1072,39 @@
}
+TEST(ReactorUnitTests, PointersEqual)
+{
+ Function<Int(Pointer<Byte>, Pointer<Byte>)> function;
+ {
+ Pointer<Byte> ptrA = function.Arg<0>();
+ Pointer<Byte> ptrB = function.Arg<1>();
+ If (ptrA == ptrB)
+ {
+ Return(1);
+ }
+ Else
+ {
+ Return(0);
+ }
+ }
+
+ auto routine = function("one");
+ auto equal = (int(*)(void*, void*))routine->getEntry();
+ int* a = reinterpret_cast<int*>(uintptr_t(0x0000000000000000));
+ int* b = reinterpret_cast<int*>(uintptr_t(0x00000000F0000000));
+ int* c = reinterpret_cast<int*>(uintptr_t(0xF000000000000000));
+ EXPECT_EQ(equal(&a, &a), 1);
+ EXPECT_EQ(equal(&b, &b), 1);
+ EXPECT_EQ(equal(&c, &c), 1);
+
+ EXPECT_EQ(equal(&a, &b), 0);
+ EXPECT_EQ(equal(&b, &a), 0);
+ EXPECT_EQ(equal(&b, &c), 0);
+ EXPECT_EQ(equal(&c, &b), 0);
+ EXPECT_EQ(equal(&c, &a), 0);
+ EXPECT_EQ(equal(&a, &c), 0);
+}
+
TEST(ReactorUnitTests, Call)
{
if (!rr::Caps.CallSupported)
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 0cf2370..a1894ce 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -1222,6 +1222,11 @@
return V(result);
}
+ Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
+ {
+ return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
+ }
+
Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
{
return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);