SpirvShader: Split SIMD::Pointer limits into dynamic and static parts.
Currently does not do anything differently, but is a stepping stone to optimizations.
Bug: b/135609394
Change-Id: I9020d4819b0e6f4bdd9564c2407c1903b7f33f4f
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33052
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index b6866a2..8f9aa31 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -291,7 +291,7 @@
{
using EL = typename Element<T>::type;
auto offsets = ptr.offsets();
- mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
+ mask &= ptr.isInBounds(sizeof(float)); // Disable OOB reads.
if (!atomic && order == std::memory_order_relaxed)
{
return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, sizeof(float));
@@ -335,7 +335,7 @@
{
using EL = typename Element<T>::type;
auto offsets = ptr.offsets();
- mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
+ mask &= ptr.isInBounds(sizeof(float)); // Disable OOB writes.
if (!atomic && order == std::memory_order_relaxed)
{
return rr::Scatter(rr::Pointer<EL>(ptr.base), val, offsets, mask, sizeof(float));
@@ -5863,7 +5863,7 @@
ASSERT(d.HasOffset);
auto arrayBase = structBase + d.Offset;
- auto arraySizeInBytes = SIMD::Int(arrayBase.limit) - arrayBase.offsets();
+ auto arraySizeInBytes = SIMD::Int(arrayBase.limit()) - arrayBase.offsets();
auto arrayLength = arraySizeInBytes / SIMD::Int(arrayElTy.sizeInComponents * sizeof(float));
result.move(0, SIMD::Int(arrayLength));
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index fb78278..4400542 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -72,9 +72,28 @@
struct Pointer
{
Pointer(rr::Pointer<Byte> base, rr::Int limit)
- : base(base), limit(limit), dynamicOffsets(0), staticOffsets{}, hasDynamicOffsets(false) {}
+ : base(base),
+ dynamicLimit(limit), staticLimit(0),
+ dynamicOffsets(0), staticOffsets{},
+ hasDynamicLimit(true), hasDynamicOffsets(false) {}
+
+ Pointer(rr::Pointer<Byte> base, unsigned int limit)
+ : base(base),
+ dynamicLimit(0), staticLimit(limit),
+ dynamicOffsets(0), staticOffsets{},
+ hasDynamicLimit(false), hasDynamicOffsets(false) {}
+
Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset)
- : base(base), limit(limit), dynamicOffsets(offset), staticOffsets{}, hasDynamicOffsets(true) {}
+ : base(base),
+ dynamicLimit(limit), staticLimit(0),
+ dynamicOffsets(offset), staticOffsets{},
+ hasDynamicLimit(true), hasDynamicOffsets(true) {}
+
+ Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset)
+ : base(base),
+ dynamicLimit(0), staticLimit(limit),
+ dynamicOffsets(offset), staticOffsets{},
+ hasDynamicLimit(false), hasDynamicOffsets(true) {}
inline Pointer& operator += (Int i)
{
@@ -119,6 +138,18 @@
return dynamicOffsets + SIMD::Int(staticOffsets[0], staticOffsets[1], staticOffsets[2], staticOffsets[3]);
}
+ inline SIMD::Int isInBounds(unsigned int accessSize) const
+ {
+ ASSERT(accessSize > 0);
+
+ return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit()));
+ }
+
+ inline Int limit() const
+ {
+ return dynamicLimit + staticLimit;
+ }
+
// Returns true if all offsets are sequential (N+0, N+1, N+2, N+3)
inline rr::Bool hasSequentialOffsets() const
{
@@ -161,14 +192,15 @@
rr::Pointer<rr::Byte> base;
// Upper (non-inclusive) limit for offsets from base.
- rr::Int limit;
+ rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero.
+ unsigned int staticLimit;
// Per lane offsets from base.
SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
std::array<int32_t, SIMD::Width> staticOffsets;
- // True if all dynamicOffsets are zero.
- bool hasDynamicOffsets;
+ bool hasDynamicLimit; // True if dynamicLimit is zero.
+ bool hasDynamicOffsets; // True if all dynamicOffsets are zero.
};
template <typename T> struct Element {};