Use correct alignment for input attachment loads

Previously SIMD::Load would assume that all pointers are 4-byte aligned.
However, texel pointers generated by SpirvShader::GetTexelAddress are
not aligned for small formats.

Add an alignment parameter to SIMD::Load and use it in EmitImageRead.

Bug: b/135954761
Change-Id: I6a420049e98f42a68960d557dee933fee9487af3
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33328
Tested-by: Chris Forbes <chrisforbes@google.com>
Presubmit-Ready: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index f23f859..f5b8bf2 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -287,7 +287,7 @@
 	{
 
 		template<typename T>
-		T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
+		T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
 		{
 			using EL = typename Element<T>::type;
 			auto offsets = ptr.offsets();
@@ -302,16 +302,16 @@
 					T out = T(0);
 					If(AnyTrue(mask))
 					{
-						EL el = *rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], sizeof(float));
+						EL el = *rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment);
 						out = T(el);
 					}
 					return out;
 				}
 				if (ptr.hasStaticSequentialOffsets(sizeof(float)))
 				{
-					return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, sizeof(float));
+					return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment);
 				}
-				return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, sizeof(float));
+				return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment);
 			}
 			else
 			{
@@ -321,13 +321,13 @@
 				{
 					// Load one, replicate.
 					auto offset = Extract(offsets, 0);
-					out = T(rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order));
+					out = T(rr::Load(rr::Pointer<EL>(&ptr.base[offset]), alignment, atomic, order));
 				}
 				Else If(ptr.hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
 				{
 					// Load all elements in a single SIMD instruction.
 					auto offset = Extract(offsets, 0);
-					out = rr::Load(rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
+					out = rr::Load(rr::Pointer<T>(&ptr.base[offset]), alignment, atomic, order);
 				}
 				Else
 				{
@@ -338,7 +338,7 @@
 						If(Extract(mask, i) != 0)
 						{
 							auto offset = Extract(offsets, i);
-							auto el = rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
+							auto el = rr::Load(rr::Pointer<EL>(&ptr.base[offset]), alignment, atomic, order);
 							out = Insert(out, el, i);
 						}
 					}
@@ -5258,7 +5258,7 @@
 		// TODO: specialize for small formats?
 		for (auto i = 0; i < (texelSize + 3)/4; i++)
 		{
-			packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->activeLaneMask());
+			packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
 			texelPtr += sizeof(float);
 		}
 
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 658c77c..606fda6 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -251,7 +251,7 @@
 		}
 
 		template<typename T>
-		T Load(Pointer ptr, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
+		T Load(Pointer ptr, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
 	}
 
 	// Incrementally constructed complex bundle of rvalues