Provide fine-grained out-of-bounds behavior control
The required or desired behavior on out-of-bounds accesses depends on
the robustness feature, storage class, static analysis, debugging state
and paranoia level preference.
Specifically, this change:
- Omits bounds checks on local variable initialization.
- Omits bounds checks on modf() and frexp() output variables.
- Bounds checks on image read/write instead of using robustBufferAccess
feature setting.
- Bounds checks on OpCopyMemory instead of using robustBufferAccess
feature setting.
Bug: b/131224163
Change-Id: I199e73d42d9cce0645792dd1d876ea69d4ec3835
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33988
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index ce621ee..a15a585 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -287,7 +287,7 @@
{
template<typename T>
- T Load(Pointer ptr, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
+ T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
{
using EL = typename Element<T>::type;
@@ -307,9 +307,19 @@
return T(*rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment));
}
}
- else if(robust) // Disable OOB reads.
+ else
{
- mask &= ptr.isInBounds(sizeof(float));
+ switch(robustness)
+ {
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess:
+ case OutOfBoundsBehavior::UndefinedValue:
+ mask &= ptr.isInBounds(sizeof(float)); // Disable out-of-bounds reads.
+ break;
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+ break;
+ }
}
auto offsets = ptr.offsets();
@@ -329,11 +339,26 @@
}
return out;
}
+
+ bool zeroMaskedLanes = true;
+ switch(robustness)
+ {
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero.
+ zeroMaskedLanes = true;
+ break;
+ case OutOfBoundsBehavior::UndefinedValue:
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ zeroMaskedLanes = false;
+ break;
+ }
+
if (ptr.hasStaticSequentialOffsets(sizeof(float)))
{
- return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, robust);
+ return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, zeroMaskedLanes);
}
- return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, robust);
+
+ return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, zeroMaskedLanes);
}
else
{
@@ -370,15 +395,22 @@
}
template<typename T>
- void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
+ void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
{
using EL = typename Element<T>::type;
constexpr size_t alignment = sizeof(float);
auto offsets = ptr.offsets();
- if(robust) // Disable OOB writes.
+ switch(robustness)
{
- mask &= ptr.isInBounds(sizeof(float));
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking.
+ case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access.
+ mask &= ptr.isInBounds(sizeof(float)); // Disable out-of-bounds writes.
+ break;
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+ break;
}
if (!atomic && order == std::memory_order_relaxed)
@@ -487,7 +519,7 @@
{
case spv::OpEntryPoint:
{
- auto executionModel = spv::ExecutionModel(insn.word(1));
+ executionModel = spv::ExecutionModel(insn.word(1));
auto id = Function::ID(insn.word(2));
auto name = insn.string(3);
auto stage = executionModelToStage(executionModel);
@@ -1967,6 +1999,36 @@
object.definition = insn;
}
+ OutOfBoundsBehavior SpirvShader::EmitState::getOutOfBoundsBehavior(spv::StorageClass storageClass) const
+ {
+ switch(storageClass)
+ {
+ case spv::StorageClassUniform:
+ case spv::StorageClassStorageBuffer:
+ // Buffer resource access. robustBufferAccess feature applies.
+ return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
+ : OutOfBoundsBehavior::UndefinedBehavior;
+
+ case spv::StorageClassImage:
+ return OutOfBoundsBehavior::UndefinedValue; // "The value returned by a read of an invalid texel is undefined"
+
+ case spv::StorageClassInput:
+ if(executionModel == spv::ExecutionModelVertex)
+ {
+ // Vertex attributes follow robustBufferAccess rules.
+ return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
+ : OutOfBoundsBehavior::UndefinedBehavior;
+ }
+ // Fall through to default case.
+ default:
+ // TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
+ // TODO(b/131224163): Optimize cases statically known to be within bounds.
+ return OutOfBoundsBehavior::UndefinedValue;
+ }
+
+ return OutOfBoundsBehavior::Nullify;
+ }
+
// emit-time
void SpirvShader::emitProlog(SpirvRoutine *routine) const
@@ -2004,7 +2066,7 @@
void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const
{
- EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess);
+ EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess, executionModel);
// Emit everything up to the first label
// TODO: Separate out dispatch of block from non-block instructions?
@@ -2743,7 +2805,8 @@
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, initialValue.Float(i), state->robust, state->activeLaneMask());
+ auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds.
+ SIMD::Store(p, initialValue.Float(i), robustness, state->activeLaneMask());
});
break;
}
@@ -2786,16 +2849,15 @@
}
auto ptr = GetPointerToData(pointerId, 0, state);
-
bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
-
auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
+ auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{
auto p = ptr + offset;
- if (interleavedByLane) { p = interleaveByLane(p); }
- dst.move(i, SIMD::Load<SIMD::Float>(p, state->robust, state->activeLaneMask(), atomic, memoryOrder));
+ if (interleavedByLane) { p = interleaveByLane(p); } // TODO: Interleave once, then add offset?
+ dst.move(i, SIMD::Load<SIMD::Float>(p, robustness, state->activeLaneMask(), atomic, memoryOrder));
});
return EmitResult::Continue;
@@ -2823,6 +2885,7 @@
auto ptr = GetPointerToData(pointerId, 0, state);
bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
+ auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
if (object.kind == Object::Kind::Constant)
{
@@ -2832,7 +2895,7 @@
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, SIMD::Float(src[i]), state->robust, state->activeLaneMask(), atomic, memoryOrder);
+ SIMD::Store(p, SIMD::Float(src[i]), robustness, state->activeLaneMask(), atomic, memoryOrder);
});
}
else
@@ -2843,7 +2906,7 @@
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, src.Float(i), state->robust, state->activeLaneMask(), atomic, memoryOrder);
+ SIMD::Store(p, src.Float(i), robustness, state->activeLaneMask(), atomic, memoryOrder);
});
}
@@ -3891,6 +3954,11 @@
auto ptrTy = getType(getObject(ptrId).type);
auto ptr = GetPointerToData(ptrId, 0, state);
bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+ // TODO: GLSL modf() takes an output parameter and thus the pointer is assumed
+ // to be in bounds even for inactive lanes.
+ // - Clarify the SPIR-V spec.
+ // - Eliminate lane masking and assume interleaving.
+ auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
for (auto i = 0u; i < type.sizeInComponents; i++)
{
@@ -3899,7 +3967,7 @@
dst.move(i, frac);
auto p = ptr + (i * sizeof(float));
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, whole, state->robust, state->activeLaneMask());
+ SIMD::Store(p, whole, robustness, state->activeLaneMask());
}
break;
}
@@ -4024,6 +4092,11 @@
auto ptrTy = getType(getObject(ptrId).type);
auto ptr = GetPointerToData(ptrId, 0, state);
bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+ // TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed
+ // to be in bounds even for inactive lanes.
+ // - Clarify the SPIR-V spec.
+ // - Eliminate lane masking and assume interleaving.
+ auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
for (auto i = 0u; i < type.sizeInComponents; i++)
{
@@ -4035,7 +4108,7 @@
auto p = ptr + (i * sizeof(float));
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, exponent, state->robust, state->activeLaneMask());
+ SIMD::Store(p, exponent, robustness, state->activeLaneMask());
}
break;
}
@@ -5245,13 +5318,18 @@
auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect);
+ // "The value returned by a read of an invalid texel is undefined,
+ // unless that read operation is from a buffer resource and the robustBufferAccess feature is enabled."
+ // TODO: Don't always assume a buffer resource.
+ auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
+
SIMD::Int packed[4];
// Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
// of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
// TODO: specialize for small formats?
for (auto i = 0; i < (texelSize + 3)/4; i++)
{
- packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->robust, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
+ packed[i] = SIMD::Load<SIMD::Int>(texelPtr, robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
texelPtr += sizeof(float);
}
@@ -5587,9 +5665,12 @@
auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, 0, false);
+ // SPIR-V 1.4: "If the coordinates are outside the image, the memory location that is accessed is undefined."
+ auto robustness = OutOfBoundsBehavior::UndefinedValue;
+
for (auto i = 0u; i < numPackedElements; i++)
{
- SIMD::Store(texelPtr, packed[i], state->robust, state->activeLaneMask());
+ SIMD::Store(texelPtr, packed[i], robustness, state->activeLaneMask());
texelPtr += sizeof(float);
}
@@ -5778,8 +5859,11 @@
if (dstInterleavedByLane) { dst = interleaveByLane(dst); }
if (srcInterleavedByLane) { src = interleaveByLane(src); }
- auto value = SIMD::Load<SIMD::Float>(src, state->robust, state->activeLaneMask());
- SIMD::Store(dst, value, state->robust, state->activeLaneMask());
+ // TODO(b/131224163): Optimize based on src/dst storage classes.
+ auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
+
+ auto value = SIMD::Load<SIMD::Float>(src, robustness, state->activeLaneMask());
+ SIMD::Store(dst, value, robustness, state->activeLaneMask());
});
return EmitResult::Continue;
}
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index d97befb..9acd451 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -55,6 +55,14 @@
// Forward declarations.
class SpirvRoutine;
+ enum class OutOfBoundsBehavior
+ {
+ Nullify, // Loads become zero, stores are elided.
+ RobustBufferAccess, // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing).
+ UndefinedValue, // Only for load operations. Not secure. No program termination.
+ UndefinedBehavior, // Program may terminate.
+ };
+
// SIMD contains types that represent multiple scalars packed into a single
// vector data type. Types in the SIMD namespace provide a semantic hint
// that the data should be treated as a per-execution-lane scalar instead of
@@ -257,16 +265,16 @@
template <> struct Element<UInt> { using type = rr::UInt; };
template<typename T>
- void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
+ void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
template<typename T>
- void Store(Pointer ptr, RValue<T> val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
+ void Store(Pointer ptr, RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
{
- Store(ptr, T(val), robust, mask, atomic, order);
+ Store(ptr, T(val), robustness, mask, atomic, order);
}
template<typename T>
- T Load(Pointer ptr, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
+ T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
}
// Incrementally constructed complex bundle of rvalues
@@ -850,6 +858,7 @@
Function::ID entryPoint;
const bool robustBufferAccess = true;
+ spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing.
// DeclareType creates a Type for the given OpTypeX instruction, storing
// it into the types map. It is called from the analysis pass (constructor).
@@ -934,13 +943,16 @@
Function::ID function,
RValue<SIMD::Int> activeLaneMask,
const vk::DescriptorSet::Bindings &descriptorSets,
- bool robustBufferAccess)
+ bool robustBufferAccess,
+ spv::ExecutionModel executionModel)
: routine(routine),
function(function),
activeLaneMaskValue(activeLaneMask.value),
descriptorSets(descriptorSets),
- robust(robustBufferAccess)
+ robustBufferAccess(robustBufferAccess),
+ executionModel(executionModel)
{
+ ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0)); // Must parse OpEntryPoint before emitting.
}
RValue<SIMD::Int> activeLaneMask() const
@@ -975,7 +987,7 @@
const vk::DescriptorSet::Bindings &descriptorSets;
- const bool robust = true; // Emit robustBufferAccess safe code.
+ OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
Intermediate& createIntermediate(Object::ID id, uint32_t size)
{
@@ -1005,9 +1017,13 @@
ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
return it->second;
}
+
private:
std::unordered_map<Object::ID, Intermediate> intermediates;
std::unordered_map<Object::ID, SIMD::Pointer> pointers;
+
+ const bool robustBufferAccess = true; // Emit robustBufferAccess safe code.
+ const spv::ExecutionModel executionModel = spv::ExecutionModelMax;
};
// EmitResult is an enumerator of result values from the Emit functions.
@@ -1203,6 +1219,8 @@
static sw::FilterType convertFilterMode(const vk::Sampler *sampler);
static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler);
static sw::AddressingMode convertAddressingMode(int coordinateIndex, VkSamplerAddressMode addressMode, VkImageViewType imageViewType);
+
+ // Returns 0 when invalid.
static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model);
};