Eliminate multiplication operators on SIMD::Pointer
Multiplication of a pointer is nonsensical. This operator was abused for
signifying that the previously added offset has to be multiplied.
This was only ever done in the InterleaveByLane() function, to multiply
by SIMD::Width. This accomplishes the 'interleaving' of multiple
composite object's elements, from a pointer+offset which previously only
addressed a single element (i.e. as if our SIMD width is 1).
InterleaveByLane() was renamed to GetElementPointer() to clarify its
purpose, and now takes the offset as a parameter so we can explicitly
multiply it before adding it to the SIMD::Pointer.
SpirvShader::WalkAccessChain() was likewise adjusted to explicitly take
the SIMD width into account for offsetting the pointers to interleaved
elements.
Lastly, SpirvShader::Interpolate() previously assumed the pointer
offsets were not already scaled by SIMD::Width, so the extra scaling
factor now has to be undone as well to obtain the indexes.
Bug: b/234488907
Change-Id: I67db76d5884851e86ddf1e426ae394106edde89b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/66228
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index c6377ed..c87d535 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -1366,6 +1366,8 @@
auto &baseObject = getObject(baseId);
Type::ID typeId = getType(baseObject).element;
Decorations d = GetDecorationsForId(baseObject.typeId());
+ auto storageClass = getType(baseObject).storageClass;
+ bool interleavedByLane = IsStorageInterleavedByLane(storageClass);
auto ptr = state->getPointer(baseId);
OffsetToElement(ptr, elementId, d.ArrayStride, state);
@@ -1397,7 +1399,7 @@
case spv::OpTypeRuntimeArray:
{
// TODO(b/127950082): Check bounds.
- if(getType(baseObject).storageClass == spv::StorageClassUniformConstant)
+ if(storageClass == spv::StorageClassUniformConstant)
{
// indexing into an array of descriptors.
auto d = descriptorDecorations.at(baseId);
@@ -1419,8 +1421,13 @@
else
{
auto stride = getType(type.element).componentCount * static_cast<uint32_t>(sizeof(float));
- auto &obj = getObject(indexIds[i]);
- if(obj.kind == Object::Kind::Constant)
+
+ if(interleavedByLane)
+ {
+ stride *= SIMD::Width;
+ }
+
+ if(getObject(indexIds[i]).kind == Object::Kind::Constant)
{
ptr += stride * GetConstScalarInt(indexIds[i]);
}
@@ -1440,8 +1447,14 @@
if(constantOffset != 0)
{
+ if(interleavedByLane)
+ {
+ constantOffset *= SIMD::Width;
+ }
+
ptr += constantOffset;
}
+
return ptr;
}
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index ebc34cb..5dddf7c 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -1023,7 +1023,7 @@
static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
static bool IsExplicitLayout(spv::StorageClass storageClass);
- static sw::SIMD::Pointer InterleaveByLane(sw::SIMD::Pointer p);
+ static sw::SIMD::Pointer GetElementPointer(sw::SIMD::Pointer structure, uint32_t offset, bool interleavedByLane);
// Output storage buffers and images should not be affected by helper invocations
static bool StoresInHelperInvocation(spv::StorageClass storageClass);
diff --git a/src/Pipeline/SpirvShaderDebugger.cpp b/src/Pipeline/SpirvShaderDebugger.cpp
index b0559af..cedd6e1 100644
--- a/src/Pipeline/SpirvShaderDebugger.cpp
+++ b/src/Pipeline/SpirvShaderDebugger.cpp
@@ -1959,7 +1959,7 @@
case Object::Kind::Intermediate:
{
size += objTy.componentCount * sizeof(uint32_t) * sw::SIMD::Width;
- auto dst = InterleaveByLane(SIMD::Pointer(base, 0));
+ auto dst = GetElementPointer(SIMD::Pointer(base, 0), 0, true);
for(uint32_t i = 0u; i < objTy.componentCount; i++)
{
auto val = SpirvShader::Operand(shader, state, objId).Int(i);
@@ -1974,7 +1974,7 @@
{
size += sizeof(void *) + sizeof(uint32_t) * SIMD::Width;
auto ptr = state->getPointer(objId);
- store(base, ptr.base);
+ store(base, ptr.getUniformPointer());
store(base + sizeof(void *), ptr.offsets());
entry.kind = Entry::Kind::Pointer;
}
diff --git a/src/Pipeline/SpirvShaderGLSLstd450.cpp b/src/Pipeline/SpirvShaderGLSLstd450.cpp
index fd09c9e..f2c43d3 100644
--- a/src/Pipeline/SpirvShaderGLSLstd450.cpp
+++ b/src/Pipeline/SpirvShaderGLSLstd450.cpp
@@ -1094,6 +1094,11 @@
uint32_t packedInterpolant = GetPackedInterpolant(location);
Pointer<Byte> planeEquation = interpolationData.primitive + OFFSET(Primitive, V[packedInterpolant]);
+
+ // The pointer's offsets index into the input variable array, which are SIMD::Float vectors.
+ // To obtain the index into the interpolant's plane equation we must unscale by the vector size.
+ const int offsetShift = log2i(sizeof(float) * SIMD::Width);
+
if(ptr.hasDynamicOffsets)
{
// Combine plane equations into one
@@ -1103,7 +1108,7 @@
for(int i = 0; i < SIMD::Width; ++i)
{
- Int offset = ((Extract(ptr.dynamicOffsets, i) + ptr.staticOffsets[i]) >> 2) + component;
+ Int offset = ((Extract(ptr.dynamicOffsets, i) + ptr.staticOffsets[i]) >> offsetShift) + component;
Pointer<Byte> planeEquationI = planeEquation + (offset * sizeof(PlaneEquation));
A = Insert(A, Extract(*Pointer<SIMD::Float>(planeEquationI + OFFSET(PlaneEquation, A), 16), i), i);
B = Insert(B, Extract(*Pointer<SIMD::Float>(planeEquationI + OFFSET(PlaneEquation, B), 16), i), i);
@@ -1115,7 +1120,7 @@
{
ASSERT(ptr.hasStaticEqualOffsets());
- uint32_t offset = (ptr.staticOffsets[0] >> 2) + component;
+ uint32_t offset = (ptr.staticOffsets[0] >> offsetShift) + component;
if((interpolant + offset) >= inputs.size())
{
return SIMD::Float(0.0f);
diff --git a/src/Pipeline/SpirvShaderMemory.cpp b/src/Pipeline/SpirvShaderMemory.cpp
index 0fec9c9..b44ed04 100644
--- a/src/Pipeline/SpirvShaderMemory.cpp
+++ b/src/Pipeline/SpirvShaderMemory.cpp
@@ -59,8 +59,7 @@
auto robustness = getOutOfBoundsBehavior(pointerId, state);
VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
- auto p = ptr + el.offset;
- if(interleavedByLane) { p = InterleaveByLane(p); } // TODO: Interleave once, then add offset?
+ auto p = GetElementPointer(ptr, el.offset, interleavedByLane);
dst.move(el.index, p.Load<SIMD::Float>(robustness, state->activeLaneMask(), atomic, memoryOrder));
});
@@ -111,8 +110,7 @@
SPIRV_SHADER_DBG("Store(atomic: {0}, order: {1}, ptr: {2}, val: {3}, mask: {4}", atomic, int(memoryOrder), ptr, value, mask);
VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
- auto p = ptr + el.offset;
- if(interleavedByLane) { p = InterleaveByLane(p); }
+ auto p = GetElementPointer(ptr, el.offset, interleavedByLane);
p.Store(value.Float(el.index), robustness, mask, atomic, memoryOrder);
});
}
@@ -225,8 +223,7 @@
auto ptr = GetPointerToData(resultId, 0, false, state);
Operand initialValue(this, state, initializerId);
VisitMemoryObject(resultId, [&](const MemoryElement &el) {
- auto p = ptr + el.offset;
- if(interleavedByLane) { p = InterleaveByLane(p); }
+ auto p = GetElementPointer(ptr, el.offset, interleavedByLane);
auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds.
p.Store(initialValue.Float(el.index), robustness, state->activeLaneMask());
});
@@ -269,10 +266,8 @@
auto srcOffset = it->second;
auto dstOffset = el.offset;
- auto dst = dstPtr + dstOffset;
- auto src = srcPtr + srcOffset;
- if(dstInterleavedByLane) { dst = InterleaveByLane(dst); }
- if(srcInterleavedByLane) { src = InterleaveByLane(src); }
+ auto dst = GetElementPointer(dstPtr, dstOffset, dstInterleavedByLane);
+ auto src = GetElementPointer(srcPtr, srcOffset, srcInterleavedByLane);
// TODO(b/131224163): Optimize based on src/dst storage classes.
auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
@@ -538,14 +533,21 @@
}
}
-sw::SIMD::Pointer SpirvShader::InterleaveByLane(sw::SIMD::Pointer p)
+sw::SIMD::Pointer SpirvShader::GetElementPointer(sw::SIMD::Pointer structure, uint32_t offset, bool interleavedByLane)
{
- p *= sw::SIMD::Width;
- p.staticOffsets[0] += 0 * sizeof(float);
- p.staticOffsets[1] += 1 * sizeof(float);
- p.staticOffsets[2] += 2 * sizeof(float);
- p.staticOffsets[3] += 3 * sizeof(float);
- return p;
+ if(interleavedByLane)
+ {
+ structure.staticOffsets[0] += 0 * sizeof(float);
+ structure.staticOffsets[1] += 1 * sizeof(float);
+ structure.staticOffsets[2] += 2 * sizeof(float);
+ structure.staticOffsets[3] += 3 * sizeof(float);
+
+ return structure + offset * sw::SIMD::Width;
+ }
+ else
+ {
+ return structure + offset;
+ }
}
bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index f6f12ca..03477aa 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -4411,27 +4411,12 @@
return *this;
}
-Pointer4 &Pointer4::operator*=(Int4 i)
-{
- ASSERT_MSG(isBasePlusOffset, "No offset to multiply for this type of pointer");
- dynamicOffsets = offsets() * i;
- staticOffsets = {};
- hasDynamicOffsets = true;
- return *this;
-}
-
Pointer4 Pointer4::operator+(Int4 i)
{
Pointer4 p = *this;
p += i;
return p;
}
-Pointer4 Pointer4::operator*(Int4 i)
-{
- Pointer4 p = *this;
- p *= i;
- return p;
-}
Pointer4 &Pointer4::operator+=(int i)
{
@@ -4446,29 +4431,12 @@
return *this;
}
-Pointer4 &Pointer4::operator*=(int i)
-{
- ASSERT_MSG(isBasePlusOffset, "No offset to multiply for this type of pointer");
- for(int el = 0; el < 4; el++) { staticOffsets[el] *= i; }
- if(hasDynamicOffsets)
- {
- dynamicOffsets *= Int4(i);
- }
- return *this;
-}
-
Pointer4 Pointer4::operator+(int i)
{
Pointer4 p = *this;
p += i;
return p;
}
-Pointer4 Pointer4::operator*(int i)
-{
- Pointer4 p = *this;
- p *= i;
- return p;
-}
Int4 Pointer4::offsets() const
{
@@ -4482,17 +4450,17 @@
if(isStaticallyInBounds(accessSize, robustness))
{
- return Int4(0xffffffff);
+ return Int4(0xFFFFFFFF);
}
if(!hasDynamicOffsets && !hasDynamicLimit)
{
// Common fast paths.
return Int4(
- (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
- (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
- (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
- (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0);
+ (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
+ (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
+ (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
+ (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0);
}
return CmpGE(offsets(), Int4(0)) & CmpLT(offsets() + Int4(accessSize - 1), Int4(limit()));
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 1c1ca43..e63d984 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2158,16 +2158,9 @@
Pointer4(std::array<Pointer<Byte>, 4> pointers);
Pointer4 &operator+=(Int4 i);
- Pointer4 &operator*=(Int4 i);
-
Pointer4 operator+(Int4 i);
- Pointer4 operator*(Int4 i);
-
Pointer4 &operator+=(int i);
- Pointer4 &operator*=(int i);
-
Pointer4 operator+(int i);
- Pointer4 operator*(int i);
Int4 offsets() const;