Switch SIMD::Pointer::base from a Float* to Byte*
While having the base units as sizeof(float) is handy (as that's the smallest unit we currently support), it makes limit checking tricky as we're continually converting units of byte <-> float.
This sets us up nicely for when we want to support smaller types.
Change-Id: I3e334b6df4899f433b281118be23fdf8df6d9829
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/29328
Presubmit-Ready: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 66bdb8d..51a419e 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -1063,15 +1063,15 @@
// a functor for each scalar element within the object.
// The functor's first parameter is the index of the scalar element;
- // the second parameter is the offset (in sizeof(float) units) from
- // the base of the object.
+ // the second parameter is the offset (in bytes) from the base of the
+ // object.
ApplyDecorationsForId(&d, id);
auto const &type = getType(id);
if (d.HasOffset)
{
- offset += d.Offset / sizeof(float);
+ offset += d.Offset;
d.HasOffset = false;
}
@@ -1086,7 +1086,7 @@
break;
case spv::OpTypeVector:
{
- auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride / sizeof(float) : 1;
+ auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : sizeof(float);
for (auto i = 0u; i < type.definition.word(3); i++)
{
VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
@@ -1095,7 +1095,7 @@
}
case spv::OpTypeMatrix:
{
- auto columnStride = (d.HasRowMajor && d.RowMajor) ? 1 : d.MatrixStride / sizeof(float);
+ auto columnStride = (d.HasRowMajor && d.RowMajor) ? sizeof(float) : d.MatrixStride;
d.InsideMatrix = true;
for (auto i = 0u; i < type.definition.word(3); i++)
{
@@ -1117,7 +1117,7 @@
for (auto i = 0u; i < arraySize; i++)
{
ASSERT(d.HasArrayStride);
- VisitMemoryObjectInner<F>(type.definition.word(2), d, index, offset + i * d.ArrayStride / sizeof(float), f);
+ VisitMemoryObjectInner<F>(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
}
break;
}
@@ -1143,7 +1143,7 @@
// Objects without explicit layout are tightly packed.
for (auto i = 0u; i < getType(type.element).sizeInComponents; i++)
{
- f(i, i);
+ f(i, i * sizeof(float));
}
}
}
@@ -1172,7 +1172,7 @@
auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
- Pointer<Byte> bufferInfo = Pointer<Byte>(set.base) + bindingOffset; // VkDescriptorBufferInfo*
+ Pointer<Byte> bufferInfo = set.base + bindingOffset; // VkDescriptorBufferInfo*
Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(bufferInfo + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
Int offset = *Pointer<Int>(bufferInfo + OFFSET(VkDescriptorBufferInfo, offset));
@@ -1269,7 +1269,7 @@
int memberIndex = GetConstantInt(indexIds[i]);
ApplyDecorationsForIdMember(&d, typeId, memberIndex);
ASSERT(d.HasOffset);
- constantOffset += d.Offset / sizeof(float);
+ constantOffset += d.Offset;
typeId = type.definition.word(2u + memberIndex);
break;
}
@@ -1281,11 +1281,11 @@
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
{
- constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
+ constantOffset += d.ArrayStride * GetConstantInt(indexIds[i]);
}
else
{
- ptr.addOffset(SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0));
+ ptr.addOffset(SIMD::Int(d.ArrayStride) * routine->getIntermediate(indexIds[i]).Int(0));
}
typeId = type.element;
break;
@@ -1295,7 +1295,7 @@
// TODO: b/127950082: Check bounds.
ASSERT(d.HasMatrixStride);
d.InsideMatrix = true;
- auto columnStride = (d.HasRowMajor && d.RowMajor) ? 1 : d.MatrixStride/sizeof(float);
+ auto columnStride = (d.HasRowMajor && d.RowMajor) ? sizeof(float) : d.MatrixStride;
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
{
@@ -1310,7 +1310,7 @@
}
case spv::OpTypeVector:
{
- auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride / sizeof(float) : 1;
+ auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : sizeof(float);
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
{
@@ -1357,7 +1357,7 @@
int offsetIntoStruct = 0;
for (auto j = 0; j < memberIndex; j++) {
auto memberType = type.definition.word(2u + j);
- offsetIntoStruct += getType(memberType).sizeInComponents;
+ offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
}
constantOffset += offsetIntoStruct;
typeId = type.definition.word(2u + memberIndex);
@@ -1370,7 +1370,7 @@
case spv::OpTypeRuntimeArray:
{
// TODO: b/127950082: Check bounds.
- auto stride = getType(type.element).sizeInComponents;
+ auto stride = getType(type.element).sizeInComponents * sizeof(float);
auto & obj = getObject(indexIds[i]);
if (obj.kind == Object::Kind::Constant)
{
@@ -1398,7 +1398,7 @@
uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
{
- uint32_t constantOffset = 0;
+ uint32_t componentOffset = 0;
for (auto i = 0u; i < numIndexes; i++)
{
@@ -1413,7 +1413,7 @@
auto memberType = type.definition.word(2u + j);
offsetIntoStruct += getType(memberType).sizeInComponents;
}
- constantOffset += offsetIntoStruct;
+ componentOffset += offsetIntoStruct;
typeId = type.definition.word(2u + memberIndex);
break;
}
@@ -1424,7 +1424,7 @@
{
auto elementType = type.definition.word(2);
auto stride = getType(elementType).sizeInComponents;
- constantOffset += stride * indexes[i];
+ componentOffset += stride * indexes[i];
typeId = elementType;
break;
}
@@ -1434,7 +1434,7 @@
}
}
- return constantOffset;
+ return componentOffset;
}
void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
@@ -2326,8 +2326,8 @@
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(o) + Extract(ptr.offset, j);
- if (interleavedByLane) { offset = offset * SIMD::Width + j; }
- load[i] = Insert(load[i], Load(&ptr.base[offset], sizeof(float), atomic, memoryOrder), j);
+ if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
+ load[i] = Insert(load[i], Load(Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder), j);
}
}
});
@@ -2338,18 +2338,19 @@
if (interleavedByLane)
{
// Lane-interleaved data.
- Pointer<SIMD::Float> src = ptr.base;
- VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+ VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{
- load[i] = Load(&src[o], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
+ Pointer<SIMD::Float> src = &ptr.base[offset * SIMD::Width];
+ load[i] = Load(src, sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
});
}
else
{
// Non-interleaved data.
- VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+ VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{
- load[i] = RValue<SIMD::Float>(Load(&ptr.base[o], sizeof(float), atomic, memoryOrder)); // TODO: optimize alignment
+ Pointer<Float> src = &ptr.base[offset];
+ load[i] = RValue<SIMD::Float>(Load(src, sizeof(float), atomic, memoryOrder)); // TODO: optimize alignment
});
}
}
@@ -2408,8 +2409,8 @@
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(o) + Extract(ptr.offset, j);
- if (interleavedByLane) { offset = offset * SIMD::Width + j; }
- Store(RValue<Float>(src[i]), &ptr.base[offset], sizeof(float), atomic, memoryOrder);
+ if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
+ Store(Float(src[i]), Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder);
}
}
});
@@ -2418,10 +2419,10 @@
{
// Constant source data.
// No divergent offsets or masked lanes.
- Pointer<SIMD::Float> dst = ptr.base;
- VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+ VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{
- Store(RValue<SIMD::Float>(src[i]), &dst[o], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
+ Pointer<SIMD::Float> dst = &ptr.base[offset * SIMD::Width];
+ Store(SIMD::Float(src[i]), dst, sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
});
}
}
@@ -2439,8 +2440,8 @@
If(Extract(state->activeLaneMask(), j) != 0)
{
Int offset = Int(o) + Extract(ptr.offset, j);
- if (interleavedByLane) { offset = offset * SIMD::Width + j; }
- Store(Extract(src.Float(i), j), &ptr.base[offset], sizeof(float), atomic, memoryOrder);
+ if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
+ Store(Extract(src.Float(i), j), Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder);
}
}
});
@@ -2451,19 +2452,19 @@
if (interleavedByLane)
{
// Lane-interleaved data.
- Pointer<SIMD::Float> dst = ptr.base;
- VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+ VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{
- Store(src.Float(i), &dst[o], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
+ Pointer<SIMD::Float> dst = &ptr.base[offset * SIMD::Width];
+ Store(src.Float(i), dst, sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
});
}
else
{
// Intermediate source data. Non-interleaved data.
- Pointer<SIMD::Float> dst = ptr.base;
- VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+ VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{
- Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[o], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
+ Pointer<SIMD::Float> dst = &ptr.base[offset];
+ Store(SIMD::Float(src.Float(i)), dst, sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
});
}
}
@@ -3518,9 +3519,9 @@
{
If(Extract(state->activeLaneMask(), j) != 0)
{
- Int offset = Int(i) + Extract(ptr.offset, j);
- if (interleavedByLane) { offset = offset * SIMD::Width + j; }
- Store(Extract(whole, j), &ptr.base[offset], sizeof(float), false, std::memory_order_relaxed);
+ Int offset = Int(i * sizeof(float)) + Extract(ptr.offset, j);
+ if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
+ Store(Extract(whole, j), Pointer<Float>(&ptr.base[offset]), sizeof(float), false, std::memory_order_relaxed);
}
}
}
@@ -3660,12 +3661,11 @@
// TODO: Refactor and consolidate with EmitStore.
for (int j = 0; j < SIMD::Width; j++)
{
- auto ptrBase = Pointer<Int>(ptr.base);
If(Extract(state->activeLaneMask(), j) != 0)
{
- Int offset = Int(i) + Extract(ptr.offset, j);
- if (interleavedByLane) { offset = offset * SIMD::Width + j; }
- Store(Extract(exponent, j), &ptrBase[offset], sizeof(uint32_t), false, std::memory_order_relaxed);
+ Int offset = Int(i * sizeof(float)) + Extract(ptr.offset, j);
+ if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(uint32_t)); }
+ Store(Extract(exponent, j), Pointer<Int>(&ptr.base[offset]), sizeof(uint32_t), false, std::memory_order_relaxed);
}
}
}
@@ -4735,8 +4735,7 @@
Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset);
Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
- // TODO: texelOffset is in bytes - remove shift when SIMD::Pointer is offset by bytes.
- SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, sizeof(uint32_t)) >> 2;
+ SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, sizeof(uint32_t));
state->routine->createPointer(resultId, SIMD::Pointer(imageBase, texelOffset));
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 7a0b30e..ec0d448 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -71,9 +71,9 @@
inline void addOffset(Int delta) { offset += delta; uniform = false; }
// Base address for the pointer, common across all lanes.
- rr::Pointer<rr::Float> base;
+ rr::Pointer<rr::Byte> base;
- // Per lane offsets from base.
+ // Per lane offsets from base in bytes.
// If uniform is true, all offsets are considered zero.
Int offset;
@@ -624,6 +624,8 @@
SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
+
+ // Returns the *component* offset in the literal for the given access chain.
uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const;
// EmitState holds control-flow state for the emit() pass.
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index f28da09..f6341e2 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2349,11 +2349,30 @@
}
template<typename T>
+ RValue<T> Load(Pointer<T> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
+ {
+ return Load(RValue<Pointer<T>>(pointer), alignment, atomic, memoryOrder);
+ }
+
+ template<typename T>
void Store(RValue<T> value, RValue<Pointer<T>> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
{
Nucleus::createStore(value.value, pointer.value, T::getType(), false, alignment, atomic, memoryOrder);
}
+ template<typename T>
+ void Store(RValue<T> value, Pointer<T> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
+ {
+ Store(value, RValue<Pointer<T>>(pointer), alignment, atomic, memoryOrder);
+ }
+
+ template<typename T>
+ void Store(T value, Pointer<T> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
+ {
+ Store(RValue<T>(value), RValue<Pointer<T>>(pointer), alignment, atomic, memoryOrder);
+ }
+
+
template<class T, int S = 1>
class Array : public LValue<T>
{