Avoid dynamic branches on in-bounds loads

When robustBufferAccess is disabled, applications must guarantee that
no accesses will go out of bounds (statically or dynamically). For
accesses with static equal or sequential offsets this means they'll be
in-bounds for both active and inactive lanes. Hence we can omit all
lane masking on such load operations.

Bug: b/131224163
Bug: b/135609394
Change-Id: I8ef2d42b0159af0ee425bbd3ef5898ce971d8491
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33928
Tested-by: Nicolas Capens <nicolascapens@google.com>
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index a15a585..dd04fc9 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -291,7 +291,7 @@
 		{
 			using EL = typename Element<T>::type;
 
-			if (ptr.isStaticAllInBounds(sizeof(float)))
+			if (ptr.isStaticallyInBounds(sizeof(float), robustness))
 			{
 				// All elements are statically known to be in-bounds.
 				// We can avoid costly conditional on masks.
@@ -314,7 +314,7 @@
 				case OutOfBoundsBehavior::Nullify:
 				case OutOfBoundsBehavior::RobustBufferAccess:
 				case OutOfBoundsBehavior::UndefinedValue:
-					mask &= ptr.isInBounds(sizeof(float));  // Disable out-of-bounds reads.
+					mask &= ptr.isInBounds(sizeof(float), robustness);  // Disable out-of-bounds reads.
 					break;
 				case OutOfBoundsBehavior::UndefinedBehavior:
 					// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
@@ -406,7 +406,7 @@
 			case OutOfBoundsBehavior::Nullify:
 			case OutOfBoundsBehavior::RobustBufferAccess:  // TODO: Allows writing anywhere within bounds. Could be faster than masking.
 			case OutOfBoundsBehavior::UndefinedValue:  // Should not be used for store operations. Treat as robust buffer access.
-				mask &= ptr.isInBounds(sizeof(float));  // Disable out-of-bounds writes.
+				mask &= ptr.isInBounds(sizeof(float), robustness);  // Disable out-of-bounds writes.
 				break;
 			case OutOfBoundsBehavior::UndefinedBehavior:
 				// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
@@ -432,7 +432,7 @@
 				}
 				else if (ptr.hasStaticSequentialOffsets(sizeof(float)))
 				{
-					if (ptr.isStaticAllInBounds(sizeof(float)))
+					if (ptr.isStaticallyInBounds(sizeof(float), robustness))
 					{
 						// Pointer has no elements OOB, and the store is not atomic.
 						// Perform a RMW.
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 9acd451..8dd0430 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -145,11 +145,11 @@
 				return dynamicOffsets + SIMD::Int(staticOffsets[0], staticOffsets[1], staticOffsets[2], staticOffsets[3]);
 			}
 
-			inline SIMD::Int isInBounds(unsigned int accessSize) const
+			inline SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
 			{
 				ASSERT(accessSize > 0);
 
-				if (isStaticAllInBounds(accessSize))
+				if (isStaticallyInBounds(accessSize, robustness))
 				{
 					return SIMD::Int(0xffffffff);
 				}
@@ -168,12 +168,31 @@
 				return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit()));
 			}
 
-			inline bool isStaticAllInBounds(unsigned int accessSize) const
+			inline bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
 			{
-				if (hasDynamicOffsets || hasDynamicLimit)
+				if (hasDynamicOffsets)
 				{
 					return false;
 				}
+
+				if (hasDynamicLimit)
+				{
+					if (hasStaticEqualOffsets() || hasStaticSequentialOffsets(accessSize))
+					{
+						switch(robustness)
+						{
+						case OutOfBoundsBehavior::UndefinedBehavior:
+							// With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
+							// but since it can't know in advance which branches are taken this must be true even for inactives lanes.
+							return true;
+						case OutOfBoundsBehavior::Nullify:
+						case OutOfBoundsBehavior::RobustBufferAccess:
+						case OutOfBoundsBehavior::UndefinedValue:
+							return false;
+						}
+					}
+				}
+
 				for (int i = 0; i < SIMD::Width; i++)
 				{
 					if (staticOffsets[i] + accessSize - 1 >= staticLimit)
@@ -181,6 +200,7 @@
 						return false;
 					}
 				}
+
 				return true;
 			}
 
@@ -255,8 +275,8 @@
 			SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
 			std::array<int32_t, SIMD::Width> staticOffsets;
 
-			bool hasDynamicLimit; // True if dynamicLimit is zero.
-			bool hasDynamicOffsets; // True if all dynamicOffsets are zero.
+			bool hasDynamicLimit;    // True if dynamicLimit is non-zero.
+			bool hasDynamicOffsets;  // True if any dynamicOffsets are non-zero.
 		};
 
 		template <typename T> struct Element {};