Optimize out-of-bounds checks with unsigned compare
Also separate the out-of-bounds handling from the offset calculations.
Bug: b/155862459
Change-Id: Ib6c5ea029d16fc411d792e6c129b37386d48fc1d
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/45092
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/SpirvShaderImage.cpp b/src/Pipeline/SpirvShaderImage.cpp
index d8c76dd..e6c0438 100644
--- a/src/Pipeline/SpirvShaderImage.cpp
+++ b/src/Pipeline/SpirvShaderImage.cpp
@@ -509,44 +509,26 @@
? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
: OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
- // If the out of bounds behavior is set to nullify, then out of bounds coordinates must be properly detected.
- // Other out of bounds behaviors work properly without precise out of bounds coordinate detection.
- bool nullifyOutOfBounds = (outOfBoundsBehavior == OutOfBoundsBehavior::Nullify);
+ SIMD::Int ptrOffset = u * SIMD::Int(texelSize);
- SIMD::Int ptrOffset(0);
- SIMD::Int oobMask(0);
- if(nullifyOutOfBounds)
- {
- auto width = SIMD::Int(*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, extent.width)));
- oobMask |= CmpLT(u, SIMD::Int(0)) | CmpNLT(u, width);
- }
- ptrOffset += u * SIMD::Int(texelSize);
if(dims > 1)
{
- if(nullifyOutOfBounds)
- {
- auto height = SIMD::Int(*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, extent.height)));
- oobMask |= CmpLT(v, SIMD::Int(0)) | CmpNLT(v, height);
- }
ptrOffset += v * rowPitch;
}
+
+ SIMD::Int w = 0;
if((dims > 2) || isArrayed)
{
- SIMD::Int w(0);
if(dims > 2)
{
w += coordinate.Int(2);
}
+
if(isArrayed)
{
w += coordinate.Int(dims);
}
- if(nullifyOutOfBounds)
- {
- auto depth = SIMD::Int(*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, extent.depth)));
- auto arrayLayers = SIMD::Int(*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)));
- oobMask |= CmpLT(w, SIMD::Int(0)) | CmpNLT(w, depth * arrayLayers);
- }
+
ptrOffset += w * slicePitch;
}
@@ -562,8 +544,26 @@
ptrOffset += sample.Int(0) * samplePitch;
}
- if(nullifyOutOfBounds)
+ // If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
+ // Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
+ if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
{
+ auto width = SIMD::UInt(*Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, extent.width)));
+ SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(u), width));
+
+ if(dims > 1)
+ {
+ auto height = SIMD::UInt(*Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, extent.height)));
+ oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(v), height));
+ }
+
+ if((dims > 2) || isArrayed)
+ {
+ auto depth = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, extent.depth));
+ auto arrayLayers = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers));
+ oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(w), SIMD::UInt(depth * arrayLayers)));
+ }
+
constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16; // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
static_assert(OOB_OFFSET >= MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");