Provide fine-grained out-of-bounds behavior control The required or desired behavior on out-of-bounds accesses depends on the robustness feature, storage class, static analysis, debugging state and paranoia level preference. Specifically, this change: - Omits bounds checks on local variable initialization. - Omits bounds checks on modf() and frexp() output variables. - Bounds checks on image read/write instead of using robustBufferAccess feature setting. - Bounds checks on OpCopyMemory instead of using robustBufferAccess feature setting. Bug: b/131224163 Change-Id: I199e73d42d9cce0645792dd1d876ea69d4ec3835 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33988 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp index ce621ee..a15a585 100644 --- a/src/Pipeline/SpirvShader.cpp +++ b/src/Pipeline/SpirvShader.cpp
@@ -287,7 +287,7 @@ { template<typename T> - T Load(Pointer ptr, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */) + T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */) { using EL = typename Element<T>::type; @@ -307,9 +307,19 @@ return T(*rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment)); } } - else if(robust) // Disable OOB reads. + else { - mask &= ptr.isInBounds(sizeof(float)); + switch(robustness) + { + case OutOfBoundsBehavior::Nullify: + case OutOfBoundsBehavior::RobustBufferAccess: + case OutOfBoundsBehavior::UndefinedValue: + mask &= ptr.isInBounds(sizeof(float)); // Disable out-of-bounds reads. + break; + case OutOfBoundsBehavior::UndefinedBehavior: + // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. + break; + } } auto offsets = ptr.offsets(); @@ -329,11 +339,26 @@ } return out; } + + bool zeroMaskedLanes = true; + switch(robustness) + { + case OutOfBoundsBehavior::Nullify: + case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero. + zeroMaskedLanes = true; + break; + case OutOfBoundsBehavior::UndefinedValue: + case OutOfBoundsBehavior::UndefinedBehavior: + zeroMaskedLanes = false; + break; + } + if (ptr.hasStaticSequentialOffsets(sizeof(float))) { - return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, robust); + return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, zeroMaskedLanes); } - return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, robust); + + return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, zeroMaskedLanes); } else { @@ -370,15 +395,22 @@ } template<typename T> - void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */) + void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */) { using EL = typename Element<T>::type; constexpr size_t alignment = sizeof(float); auto offsets = ptr.offsets(); - if(robust) // Disable OOB writes. + switch(robustness) { - mask &= ptr.isInBounds(sizeof(float)); + case OutOfBoundsBehavior::Nullify: + case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking. + case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access. + mask &= ptr.isInBounds(sizeof(float)); // Disable out-of-bounds writes. + break; + case OutOfBoundsBehavior::UndefinedBehavior: + // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. + break; } if (!atomic && order == std::memory_order_relaxed) @@ -487,7 +519,7 @@ { case spv::OpEntryPoint: { - auto executionModel = spv::ExecutionModel(insn.word(1)); + executionModel = spv::ExecutionModel(insn.word(1)); auto id = Function::ID(insn.word(2)); auto name = insn.string(3); auto stage = executionModelToStage(executionModel); @@ -1967,6 +1999,36 @@ object.definition = insn; } + OutOfBoundsBehavior SpirvShader::EmitState::getOutOfBoundsBehavior(spv::StorageClass storageClass) const + { + switch(storageClass) + { + case spv::StorageClassUniform: + case spv::StorageClassStorageBuffer: + // Buffer resource access. robustBufferAccess feature applies. + return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess + : OutOfBoundsBehavior::UndefinedBehavior; + + case spv::StorageClassImage: + return OutOfBoundsBehavior::UndefinedValue; // "The value returned by a read of an invalid texel is undefined" + + case spv::StorageClassInput: + if(executionModel == spv::ExecutionModelVertex) + { + // Vertex attributes follow robustBufferAccess rules. + return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess + : OutOfBoundsBehavior::UndefinedBehavior; + } + // Fall through to default case. + default: + // TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain. + // TODO(b/131224163): Optimize cases statically known to be within bounds. + return OutOfBoundsBehavior::UndefinedValue; + } + + return OutOfBoundsBehavior::Nullify; + } + // emit-time void SpirvShader::emitProlog(SpirvRoutine *routine) const @@ -2004,7 +2066,7 @@ void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const { - EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess); + EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess, executionModel); // Emit everything up to the first label // TODO: Separate out dispatch of block from non-block instructions? @@ -2743,7 +2805,8 @@ { auto p = ptr + offset; if (interleavedByLane) { p = interleaveByLane(p); } - SIMD::Store(p, initialValue.Float(i), state->robust, state->activeLaneMask()); + auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds. + SIMD::Store(p, initialValue.Float(i), robustness, state->activeLaneMask()); }); break; } @@ -2786,16 +2849,15 @@ } auto ptr = GetPointerToData(pointerId, 0, state); - bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass); - auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents); + auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass); VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset) { auto p = ptr + offset; - if (interleavedByLane) { p = interleaveByLane(p); } - dst.move(i, SIMD::Load<SIMD::Float>(p, state->robust, state->activeLaneMask(), atomic, memoryOrder)); + if (interleavedByLane) { p = interleaveByLane(p); } // TODO: Interleave once, then add offset? + dst.move(i, SIMD::Load<SIMD::Float>(p, robustness, state->activeLaneMask(), atomic, memoryOrder)); }); return EmitResult::Continue; @@ -2823,6 +2885,7 @@ auto ptr = GetPointerToData(pointerId, 0, state); bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass); + auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass); if (object.kind == Object::Kind::Constant) { @@ -2832,7 +2895,7 @@ { auto p = ptr + offset; if (interleavedByLane) { p = interleaveByLane(p); } - SIMD::Store(p, SIMD::Float(src[i]), state->robust, state->activeLaneMask(), atomic, memoryOrder); + SIMD::Store(p, SIMD::Float(src[i]), robustness, state->activeLaneMask(), atomic, memoryOrder); }); } else @@ -2843,7 +2906,7 @@ { auto p = ptr + offset; if (interleavedByLane) { p = interleaveByLane(p); } - SIMD::Store(p, src.Float(i), state->robust, state->activeLaneMask(), atomic, memoryOrder); + SIMD::Store(p, src.Float(i), robustness, state->activeLaneMask(), atomic, memoryOrder); }); } @@ -3891,6 +3954,11 @@ auto ptrTy = getType(getObject(ptrId).type); auto ptr = GetPointerToData(ptrId, 0, state); bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass); + // TODO: GLSL modf() takes an output parameter and thus the pointer is assumed + // to be in bounds even for inactive lanes. + // - Clarify the SPIR-V spec. + // - Eliminate lane masking and assume interleaving. + auto robustness = OutOfBoundsBehavior::UndefinedBehavior; for (auto i = 0u; i < type.sizeInComponents; i++) { @@ -3899,7 +3967,7 @@ dst.move(i, frac); auto p = ptr + (i * sizeof(float)); if (interleavedByLane) { p = interleaveByLane(p); } - SIMD::Store(p, whole, state->robust, state->activeLaneMask()); + SIMD::Store(p, whole, robustness, state->activeLaneMask()); } break; } @@ -4024,6 +4092,11 @@ auto ptrTy = getType(getObject(ptrId).type); auto ptr = GetPointerToData(ptrId, 0, state); bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass); + // TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed + // to be in bounds even for inactive lanes. + // - Clarify the SPIR-V spec. + // - Eliminate lane masking and assume interleaving. + auto robustness = OutOfBoundsBehavior::UndefinedBehavior; for (auto i = 0u; i < type.sizeInComponents; i++) { @@ -4035,7 +4108,7 @@ auto p = ptr + (i * sizeof(float)); if (interleavedByLane) { p = interleaveByLane(p); } - SIMD::Store(p, exponent, state->robust, state->activeLaneMask()); + SIMD::Store(p, exponent, robustness, state->activeLaneMask()); } break; } @@ -5245,13 +5318,18 @@ auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes); auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect); + // "The value returned by a read of an invalid texel is undefined, + // unless that read operation is from a buffer resource and the robustBufferAccess feature is enabled." + // TODO: Don't always assume a buffer resource. + auto robustness = OutOfBoundsBehavior::RobustBufferAccess; + SIMD::Int packed[4]; // Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch // of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits. // TODO: specialize for small formats? for (auto i = 0; i < (texelSize + 3)/4; i++) { - packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->robust, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4)); + packed[i] = SIMD::Load<SIMD::Int>(texelPtr, robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4)); texelPtr += sizeof(float); } @@ -5587,9 +5665,12 @@ auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes); auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, 0, false); + // SPIR-V 1.4: "If the coordinates are outside the image, the memory location that is accessed is undefined." + auto robustness = OutOfBoundsBehavior::UndefinedValue; + for (auto i = 0u; i < numPackedElements; i++) { - SIMD::Store(texelPtr, packed[i], state->robust, state->activeLaneMask()); + SIMD::Store(texelPtr, packed[i], robustness, state->activeLaneMask()); texelPtr += sizeof(float); } @@ -5778,8 +5859,11 @@ if (dstInterleavedByLane) { dst = interleaveByLane(dst); } if (srcInterleavedByLane) { src = interleaveByLane(src); } - auto value = SIMD::Load<SIMD::Float>(src, state->robust, state->activeLaneMask()); - SIMD::Store(dst, value, state->robust, state->activeLaneMask()); + // TODO(b/131224163): Optimize based on src/dst storage classes. + auto robustness = OutOfBoundsBehavior::RobustBufferAccess; + + auto value = SIMD::Load<SIMD::Float>(src, robustness, state->activeLaneMask()); + SIMD::Store(dst, value, robustness, state->activeLaneMask()); }); return EmitResult::Continue; }
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp index d97befb..9acd451 100644 --- a/src/Pipeline/SpirvShader.hpp +++ b/src/Pipeline/SpirvShader.hpp
@@ -55,6 +55,14 @@ // Forward declarations. class SpirvRoutine; + enum class OutOfBoundsBehavior + { + Nullify, // Loads become zero, stores are elided. + RobustBufferAccess, // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing). + UndefinedValue, // Only for load operations. Not secure. No program termination. + UndefinedBehavior, // Program may terminate. + }; + // SIMD contains types that represent multiple scalars packed into a single // vector data type. Types in the SIMD namespace provide a semantic hint // that the data should be treated as a per-execution-lane scalar instead of @@ -257,16 +265,16 @@ template <> struct Element<UInt> { using type = rr::UInt; }; template<typename T> - void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed); + void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed); template<typename T> - void Store(Pointer ptr, RValue<T> val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed) + void Store(Pointer ptr, RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed) { - Store(ptr, T(val), robust, mask, atomic, order); + Store(ptr, T(val), robustness, mask, atomic, order); } template<typename T> - T Load(Pointer ptr, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float)); + T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float)); } // Incrementally constructed complex bundle of rvalues @@ -850,6 +858,7 @@ Function::ID entryPoint; const bool robustBufferAccess = true; + spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. // DeclareType creates a Type for the given OpTypeX instruction, storing // it into the types map. It is called from the analysis pass (constructor). @@ -934,13 +943,16 @@ Function::ID function, RValue<SIMD::Int> activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets, - bool robustBufferAccess) + bool robustBufferAccess, + spv::ExecutionModel executionModel) : routine(routine), function(function), activeLaneMaskValue(activeLaneMask.value), descriptorSets(descriptorSets), - robust(robustBufferAccess) + robustBufferAccess(robustBufferAccess), + executionModel(executionModel) { + ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0)); // Must parse OpEntryPoint before emitting. } RValue<SIMD::Int> activeLaneMask() const @@ -975,7 +987,7 @@ const vk::DescriptorSet::Bindings &descriptorSets; - const bool robust = true; // Emit robustBufferAccess safe code. + OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const; Intermediate& createIntermediate(Object::ID id, uint32_t size) { @@ -1005,9 +1017,13 @@ ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value()); return it->second; } + private: std::unordered_map<Object::ID, Intermediate> intermediates; std::unordered_map<Object::ID, SIMD::Pointer> pointers; + + const bool robustBufferAccess = true; // Emit robustBufferAccess safe code. + const spv::ExecutionModel executionModel = spv::ExecutionModelMax; }; // EmitResult is an enumerator of result values from the Emit functions. @@ -1203,6 +1219,8 @@ static sw::FilterType convertFilterMode(const vk::Sampler *sampler); static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler); static sw::AddressingMode convertAddressingMode(int coordinateIndex, VkSamplerAddressMode addressMode, VkImageViewType imageViewType); + + // Returns 0 when invalid. static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model); };