Provide fine-grained out-of-bounds behavior control

The required or desired behavior on out-of-bounds accesses depends on
the robustness feature, storage class, static analysis, debugging state
and paranoia level preference.

Specifically, this change:
- Omits bounds checks on local variable initialization.
- Omits bounds checks on modf() and frexp() output variables.
- Bounds checks on image read/write instead of using robustBufferAccess
  feature setting.
- Bounds checks on OpCopyMemory instead of using robustBufferAccess
  feature setting.

Bug: b/131224163
Change-Id: I199e73d42d9cce0645792dd1d876ea69d4ec3835
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33988
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index ce621ee..a15a585 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -287,7 +287,7 @@
 	{
 
 		template<typename T>
-		T Load(Pointer ptr, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
+		T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
 		{
 			using EL = typename Element<T>::type;
 
@@ -307,9 +307,19 @@
 					return T(*rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment));
 				}
 			}
-			else if(robust)  // Disable OOB reads.
+			else
 			{
-				mask &= ptr.isInBounds(sizeof(float));
+				switch(robustness)
+				{
+				case OutOfBoundsBehavior::Nullify:
+				case OutOfBoundsBehavior::RobustBufferAccess:
+				case OutOfBoundsBehavior::UndefinedValue:
+					mask &= ptr.isInBounds(sizeof(float));  // Disable out-of-bounds reads.
+					break;
+				case OutOfBoundsBehavior::UndefinedBehavior:
+					// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+					break;
+				}
 			}
 
 			auto offsets = ptr.offsets();
@@ -329,11 +339,26 @@
 					}
 					return out;
 				}
+
+				bool zeroMaskedLanes = true;
+				switch(robustness)
+				{
+				case OutOfBoundsBehavior::Nullify:
+				case OutOfBoundsBehavior::RobustBufferAccess:  // Must either return an in-bounds value, or zero.
+					zeroMaskedLanes = true;
+					break;
+				case OutOfBoundsBehavior::UndefinedValue:
+				case OutOfBoundsBehavior::UndefinedBehavior:
+					zeroMaskedLanes = false;
+					break;
+				}
+
 				if (ptr.hasStaticSequentialOffsets(sizeof(float)))
 				{
-					return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, robust);
+					return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, zeroMaskedLanes);
 				}
-				return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, robust);
+
+				return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, zeroMaskedLanes);
 			}
 			else
 			{
@@ -370,15 +395,22 @@
 		}
 
 		template<typename T>
-		void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
+		void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
 		{
 			using EL = typename Element<T>::type;
 			constexpr size_t alignment = sizeof(float);
 			auto offsets = ptr.offsets();
 
-			if(robust)  // Disable OOB writes.
+			switch(robustness)
 			{
-				mask &= ptr.isInBounds(sizeof(float));
+			case OutOfBoundsBehavior::Nullify:
+			case OutOfBoundsBehavior::RobustBufferAccess:  // TODO: Allows writing anywhere within bounds. Could be faster than masking.
+			case OutOfBoundsBehavior::UndefinedValue:  // Should not be used for store operations. Treat as robust buffer access.
+				mask &= ptr.isInBounds(sizeof(float));  // Disable out-of-bounds writes.
+				break;
+			case OutOfBoundsBehavior::UndefinedBehavior:
+				// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+				break;
 			}
 
 			if (!atomic && order == std::memory_order_relaxed)
@@ -487,7 +519,7 @@
 			{
 			case spv::OpEntryPoint:
 			{
-				auto executionModel = spv::ExecutionModel(insn.word(1));
+				executionModel = spv::ExecutionModel(insn.word(1));
 				auto id = Function::ID(insn.word(2));
 				auto name = insn.string(3);
 				auto stage = executionModelToStage(executionModel);
@@ -1967,6 +1999,36 @@
 		object.definition = insn;
 	}
 
+	OutOfBoundsBehavior SpirvShader::EmitState::getOutOfBoundsBehavior(spv::StorageClass storageClass) const
+	{
+		switch(storageClass)
+		{
+		case spv::StorageClassUniform:
+		case spv::StorageClassStorageBuffer:
+			// Buffer resource access. robustBufferAccess feature applies.
+			return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
+			                          : OutOfBoundsBehavior::UndefinedBehavior;
+
+		case spv::StorageClassImage:
+			return OutOfBoundsBehavior::UndefinedValue;  // "The value returned by a read of an invalid texel is undefined"
+
+		case spv::StorageClassInput:
+			if(executionModel == spv::ExecutionModelVertex)
+			{
+				// Vertex attributes follow robustBufferAccess rules.
+				return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
+				                          : OutOfBoundsBehavior::UndefinedBehavior;
+			}
+			// Fall through to default case.
+		default:
+			// TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
+			// TODO(b/131224163): Optimize cases statically known to be within bounds.
+			return OutOfBoundsBehavior::UndefinedValue;
+		}
+
+		return OutOfBoundsBehavior::Nullify;
+	}
+
 	// emit-time
 
 	void SpirvShader::emitProlog(SpirvRoutine *routine) const
@@ -2004,7 +2066,7 @@
 
 	void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const
 	{
-		EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess);
+		EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess, executionModel);
 
 		// Emit everything up to the first label
 		// TODO: Separate out dispatch of block from non-block instructions?
@@ -2743,7 +2805,8 @@
 				{
 					auto p = ptr + offset;
 					if (interleavedByLane) { p = interleaveByLane(p); }
-					SIMD::Store(p, initialValue.Float(i), state->robust, state->activeLaneMask());
+					auto robustness = OutOfBoundsBehavior::UndefinedBehavior;  // Local variables are always within bounds.
+					SIMD::Store(p, initialValue.Float(i), robustness, state->activeLaneMask());
 				});
 				break;
 			}
@@ -2786,16 +2849,15 @@
 		}
 
 		auto ptr = GetPointerToData(pointerId, 0, state);
-
 		bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
-
 		auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
+		auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
 
 		VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
 		{
 			auto p = ptr + offset;
-			if (interleavedByLane) { p = interleaveByLane(p); }
-			dst.move(i, SIMD::Load<SIMD::Float>(p, state->robust, state->activeLaneMask(), atomic, memoryOrder));
+			if (interleavedByLane) { p = interleaveByLane(p); }  // TODO: Interleave once, then add offset?
+			dst.move(i, SIMD::Load<SIMD::Float>(p, robustness, state->activeLaneMask(), atomic, memoryOrder));
 		});
 
 		return EmitResult::Continue;
@@ -2823,6 +2885,7 @@
 
 		auto ptr = GetPointerToData(pointerId, 0, state);
 		bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
+		auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
 
 		if (object.kind == Object::Kind::Constant)
 		{
@@ -2832,7 +2895,7 @@
 			{
 				auto p = ptr + offset;
 				if (interleavedByLane) { p = interleaveByLane(p); }
-				SIMD::Store(p, SIMD::Float(src[i]), state->robust, state->activeLaneMask(), atomic, memoryOrder);
+				SIMD::Store(p, SIMD::Float(src[i]), robustness, state->activeLaneMask(), atomic, memoryOrder);
 			});
 		}
 		else
@@ -2843,7 +2906,7 @@
 			{
 				auto p = ptr + offset;
 				if (interleavedByLane) { p = interleaveByLane(p); }
-				SIMD::Store(p, src.Float(i), state->robust, state->activeLaneMask(), atomic, memoryOrder);
+				SIMD::Store(p, src.Float(i), robustness, state->activeLaneMask(), atomic, memoryOrder);
 			});
 		}
 
@@ -3891,6 +3954,11 @@
 			auto ptrTy = getType(getObject(ptrId).type);
 			auto ptr = GetPointerToData(ptrId, 0, state);
 			bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+			// TODO: GLSL modf() takes an output parameter and thus the pointer is assumed
+			// to be in bounds even for inactive lanes.
+			// - Clarify the SPIR-V spec.
+			// - Eliminate lane masking and assume interleaving.
+			auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
 
 			for (auto i = 0u; i < type.sizeInComponents; i++)
 			{
@@ -3899,7 +3967,7 @@
 				dst.move(i, frac);
 				auto p = ptr + (i * sizeof(float));
 				if (interleavedByLane) { p = interleaveByLane(p); }
-				SIMD::Store(p, whole, state->robust, state->activeLaneMask());
+				SIMD::Store(p, whole, robustness, state->activeLaneMask());
 			}
 			break;
 		}
@@ -4024,6 +4092,11 @@
 			auto ptrTy = getType(getObject(ptrId).type);
 			auto ptr = GetPointerToData(ptrId, 0, state);
 			bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+			// TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed
+			// to be in bounds even for inactive lanes.
+			// - Clarify the SPIR-V spec.
+			// - Eliminate lane masking and assume interleaving.
+			auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
 
 			for (auto i = 0u; i < type.sizeInComponents; i++)
 			{
@@ -4035,7 +4108,7 @@
 
 				auto p = ptr + (i * sizeof(float));
 				if (interleavedByLane) { p = interleaveByLane(p); }
-				SIMD::Store(p, exponent, state->robust, state->activeLaneMask());
+				SIMD::Store(p, exponent, robustness, state->activeLaneMask());
 			}
 			break;
 		}
@@ -5245,13 +5318,18 @@
 		auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
 		auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect);
 
+		// "The value returned by a read of an invalid texel is undefined,
+		//  unless that read operation is from a buffer resource and the robustBufferAccess feature is enabled."
+		// TODO: Don't always assume a buffer resource.
+		auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
+
 		SIMD::Int packed[4];
 		// Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
 		// of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
 		// TODO: specialize for small formats?
 		for (auto i = 0; i < (texelSize + 3)/4; i++)
 		{
-			packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->robust, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
+			packed[i] = SIMD::Load<SIMD::Int>(texelPtr, robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
 			texelPtr += sizeof(float);
 		}
 
@@ -5587,9 +5665,12 @@
 		auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
 		auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, 0, false);
 
+		// SPIR-V 1.4: "If the coordinates are outside the image, the memory location that is accessed is undefined."
+		auto robustness = OutOfBoundsBehavior::UndefinedValue;
+
 		for (auto i = 0u; i < numPackedElements; i++)
 		{
-			SIMD::Store(texelPtr, packed[i], state->robust, state->activeLaneMask());
+			SIMD::Store(texelPtr, packed[i], robustness, state->activeLaneMask());
 			texelPtr += sizeof(float);
 		}
 
@@ -5778,8 +5859,11 @@
 			if (dstInterleavedByLane) { dst = interleaveByLane(dst); }
 			if (srcInterleavedByLane) { src = interleaveByLane(src); }
 
-			auto value = SIMD::Load<SIMD::Float>(src, state->robust, state->activeLaneMask());
-			SIMD::Store(dst, value, state->robust, state->activeLaneMask());
+			// TODO(b/131224163): Optimize based on src/dst storage classes.
+			auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
+
+			auto value = SIMD::Load<SIMD::Float>(src, robustness, state->activeLaneMask());
+			SIMD::Store(dst, value, robustness, state->activeLaneMask());
 		});
 		return EmitResult::Continue;
 	}
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index d97befb..9acd451 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -55,6 +55,14 @@
 	// Forward declarations.
 	class SpirvRoutine;
 
+	enum class OutOfBoundsBehavior
+	{
+		Nullify,             // Loads become zero, stores are elided.
+		RobustBufferAccess,  // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing).
+		UndefinedValue,      // Only for load operations. Not secure. No program termination.
+		UndefinedBehavior,   // Program may terminate.
+	};
+
 	// SIMD contains types that represent multiple scalars packed into a single
 	// vector data type. Types in the SIMD namespace provide a semantic hint
 	// that the data should be treated as a per-execution-lane scalar instead of
@@ -257,16 +265,16 @@
 		template <> struct Element<UInt>  { using type = rr::UInt; };
 
 		template<typename T>
-		void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
+		void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
 
 		template<typename T>
-		void Store(Pointer ptr, RValue<T> val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
+		void Store(Pointer ptr, RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
 		{
-			Store(ptr, T(val), robust, mask, atomic, order);
+			Store(ptr, T(val), robustness, mask, atomic, order);
 		}
 
 		template<typename T>
-		T Load(Pointer ptr, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
+		T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
 	}
 
 	// Incrementally constructed complex bundle of rvalues
@@ -850,6 +858,7 @@
 		Function::ID entryPoint;
 
 		const bool robustBufferAccess = true;
+		spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing.
 
 		// DeclareType creates a Type for the given OpTypeX instruction, storing
 		// it into the types map. It is called from the analysis pass (constructor).
@@ -934,13 +943,16 @@
 					Function::ID function,
 					RValue<SIMD::Int> activeLaneMask,
 					const vk::DescriptorSet::Bindings &descriptorSets,
-					bool robustBufferAccess)
+					bool robustBufferAccess,
+					spv::ExecutionModel executionModel)
 				: routine(routine),
 				  function(function),
 				  activeLaneMaskValue(activeLaneMask.value),
 				  descriptorSets(descriptorSets),
-				  robust(robustBufferAccess)
+				  robustBufferAccess(robustBufferAccess),
+				  executionModel(executionModel)
 			{
+				ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0));  // Must parse OpEntryPoint before emitting.
 			}
 
 			RValue<SIMD::Int> activeLaneMask() const
@@ -975,7 +987,7 @@
 
 			const vk::DescriptorSet::Bindings &descriptorSets;
 
-			const bool robust = true;  // Emit robustBufferAccess safe code.
+			OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
 
 			Intermediate& createIntermediate(Object::ID id, uint32_t size)
 			{
@@ -1005,9 +1017,13 @@
 				ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
 				return it->second;
 			}
+
 		private:
 			std::unordered_map<Object::ID, Intermediate> intermediates;
 			std::unordered_map<Object::ID, SIMD::Pointer> pointers;
+
+			const bool robustBufferAccess = true;  // Emit robustBufferAccess safe code.
+			const spv::ExecutionModel executionModel = spv::ExecutionModelMax;
 		};
 
 		// EmitResult is an enumerator of result values from the Emit functions.
@@ -1203,6 +1219,8 @@
 		static sw::FilterType convertFilterMode(const vk::Sampler *sampler);
 		static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler);
 		static sw::AddressingMode convertAddressingMode(int coordinateIndex, VkSamplerAddressMode addressMode, VkImageViewType imageViewType);
+
+		// Returns 0 when invalid.
 		static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model);
 	};