SpirvShader: Optimize SIMD loads of static, equal offsets, in-bound pointers

Bug: b/135609394
Change-Id: Ic566f2ef6d66e31b434d29b23aafd954a05958a4
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33709
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 316af3d..cf46147 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -291,22 +291,29 @@
 		{
 			using EL = typename Element<T>::type;
 
-			if (ptr.hasStaticSequentialOffsets(sizeof(float)) &&
-				ptr.isStaticAllInBounds(sizeof(float)))
+			if (ptr.isStaticAllInBounds(sizeof(float)))
 			{
-				// All elements sequential and in bounds.
-				// Perform regular load.
-				auto load = rr::Load(rr::Pointer<SIMD::Int>(ptr.base + ptr.staticOffsets[0]), alignment, atomic, order);
-				return As<T>(load & mask); // TODO: Mask here should be unnecessary, but keeps with MaskedLoad and Gather.
+				// All elements are statically known to be in-bounds.
+				// We can avoid costly conditional on masks.
+
+				if (ptr.hasStaticSequentialOffsets(sizeof(float)))
+				{
+					// Offsets are sequential. Perform regular load.
+					return rr::Load(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), alignment, atomic, order);
+				}
+				if (ptr.hasStaticEqualOffsets())
+				{
+					// Load one, replicate.
+					return T(*rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment));
+				}
 			}
-
-			auto offsets = ptr.offsets();
-
-			if(robust)  // Disable OOB reads.
+			else if(robust)  // Disable OOB reads.
 			{
 				mask &= ptr.isInBounds(sizeof(float));
 			}
 
+			auto offsets = ptr.offsets();
+
 			if (!atomic && order == std::memory_order_relaxed)
 			{
 				if (ptr.hasStaticEqualOffsets())