Use correct alignment for input attachment loads
Previously SIMD::Load would assume that all pointers are 4-byte aligned.
However, texel pointers generated by SpirvShader::GetTexelAddress are
not aligned for small formats.
Add an alignment parameter to SIMD::Load and use it in EmitImageRead.
Bug: b/135954761
Change-Id: I6a420049e98f42a68960d557dee933fee9487af3
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33328
Tested-by: Chris Forbes <chrisforbes@google.com>
Presubmit-Ready: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index f23f859..f5b8bf2 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -287,7 +287,7 @@
{
template<typename T>
- T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
+ T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
{
using EL = typename Element<T>::type;
auto offsets = ptr.offsets();
@@ -302,16 +302,16 @@
T out = T(0);
If(AnyTrue(mask))
{
- EL el = *rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], sizeof(float));
+ EL el = *rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment);
out = T(el);
}
return out;
}
if (ptr.hasStaticSequentialOffsets(sizeof(float)))
{
- return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, sizeof(float));
+ return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment);
}
- return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, sizeof(float));
+ return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment);
}
else
{
@@ -321,13 +321,13 @@
{
// Load one, replicate.
auto offset = Extract(offsets, 0);
- out = T(rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order));
+ out = T(rr::Load(rr::Pointer<EL>(&ptr.base[offset]), alignment, atomic, order));
}
Else If(ptr.hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
{
// Load all elements in a single SIMD instruction.
auto offset = Extract(offsets, 0);
- out = rr::Load(rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
+ out = rr::Load(rr::Pointer<T>(&ptr.base[offset]), alignment, atomic, order);
}
Else
{
@@ -338,7 +338,7 @@
If(Extract(mask, i) != 0)
{
auto offset = Extract(offsets, i);
- auto el = rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
+ auto el = rr::Load(rr::Pointer<EL>(&ptr.base[offset]), alignment, atomic, order);
out = Insert(out, el, i);
}
}
@@ -5258,7 +5258,7 @@
// TODO: specialize for small formats?
for (auto i = 0; i < (texelSize + 3)/4; i++)
{
- packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->activeLaneMask());
+ packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
texelPtr += sizeof(float);
}
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 658c77c..606fda6 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -251,7 +251,7 @@
}
template<typename T>
- T Load(Pointer ptr, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
+ T Load(Pointer ptr, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
}
// Incrementally constructed complex bundle of rvalues