Fix workgroup builtins size
NumWorkgroups, WorkgroupId, and WorkgroupSize are uniform globals with
three components. While we store them in a 4-component vector, they must
not be stored in a SIMD::Int.
PointCoord was corrected to a 2-component vector (per lane).
The X, Y, and Z enums were replaced by explicit numeric indexes. Also
the loops for multi-component builtins were replaced with explicit
assignment of each component. The asserts for single-component builtins
have been removed since these are checked by spirv-val.
Bug: b/214583550
Change-Id: Iecc81f1f255978e160712fbaa0f069b8e7065705
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/66510
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index f235d8fb..cbdf4ce 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -25,17 +25,6 @@
#include <queue>
-namespace {
-
-enum
-{
- X,
- Y,
- Z
-};
-
-} // anonymous namespace
-
namespace sw {
ComputeProgram::ComputeProgram(vk::Device *device, std::shared_ptr<SpirvShader> shader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets)
@@ -63,45 +52,36 @@
void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3])
{
- // TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff
- // they are ever going to be read.
+ // TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff they are ever going to be read.
routine->numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
- routine->workgroupID = Insert(Insert(Insert(Int4(0), workgroupID[X], X), workgroupID[Y], Y), workgroupID[Z], Z);
+ routine->workgroupID = Insert(Insert(Insert(Int4(0), workgroupID[0], 0), workgroupID[1], 1), workgroupID[2], 2);
routine->workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
routine->subgroupsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, subgroupsPerWorkgroup));
routine->invocationsPerSubgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerSubgroup));
routine->setInputBuiltin(shader.get(), spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
- for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
- {
- value[builtin.FirstComponent + component] =
- As<SIMD::Float>(SIMD::Int(Extract(routine->numWorkgroups, component)));
- }
+ value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(routine->numWorkgroups.x));
+ value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(routine->numWorkgroups.y));
+ value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(routine->numWorkgroups.z));
});
routine->setInputBuiltin(shader.get(), spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
- for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
- {
- value[builtin.FirstComponent + component] =
- As<SIMD::Float>(SIMD::Int(workgroupID[component]));
- }
+ value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(workgroupID[0]));
+ value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(workgroupID[1]));
+ value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(workgroupID[2]));
});
routine->setInputBuiltin(shader.get(), spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
- for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
- {
- value[builtin.FirstComponent + component] =
- As<SIMD::Float>(SIMD::Int(Extract(routine->workgroupSize, component)));
- }
+ value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(routine->workgroupSize.x));
+ value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(routine->workgroupSize.y));
+ value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(routine->workgroupSize.z));
});
routine->setInputBuiltin(shader.get(), spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
- ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine->subgroupsPerWorkgroup));
});
routine->setInputBuiltin(shader.get(), spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
- ASSERT(builtin.SizeInComponents == 1);
value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine->invocationsPerSubgroup));
});
@@ -113,35 +93,34 @@
Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
Int4 workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
- // TODO: Fix Int4 swizzles so we can just use workgroupSize.x, workgroupSize.y.
- Int workgroupSizeX = Extract(workgroupSize, X);
- Int workgroupSizeY = Extract(workgroupSize, Y);
+ Int workgroupSizeX = workgroupSize.x;
+ Int workgroupSizeY = workgroupSize.y;
SIMD::Int localInvocationID[3];
{
SIMD::Int idx = localInvocationIndex;
- localInvocationID[Z] = idx / SIMD::Int(workgroupSizeX * workgroupSizeY);
- idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY); // modulo
- localInvocationID[Y] = idx / SIMD::Int(workgroupSizeX);
- idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX); // modulo
- localInvocationID[X] = idx;
+ localInvocationID[2] = idx / SIMD::Int(workgroupSizeX * workgroupSizeY);
+ idx -= localInvocationID[2] * SIMD::Int(workgroupSizeX * workgroupSizeY); // modulo
+ localInvocationID[1] = idx / SIMD::Int(workgroupSizeX);
+ idx -= localInvocationID[1] * SIMD::Int(workgroupSizeX); // modulo
+ localInvocationID[0] = idx;
}
- Int4 wgID = Insert(Insert(Insert(SIMD::Int(0), workgroupID[X], X), workgroupID[Y], Y), workgroupID[Z], Z);
+ Int4 wgID = Insert(Insert(Insert(SIMD::Int(0), workgroupID[0], 0), workgroupID[1], 1), workgroupID[2], 2);
auto localBase = workgroupSize * wgID;
SIMD::Int globalInvocationID[3];
- globalInvocationID[X] = SIMD::Int(Extract(localBase, X)) + localInvocationID[X];
- globalInvocationID[Y] = SIMD::Int(Extract(localBase, Y)) + localInvocationID[Y];
- globalInvocationID[Z] = SIMD::Int(Extract(localBase, Z)) + localInvocationID[Z];
+ globalInvocationID[0] = SIMD::Int(Extract(localBase, 0)) + localInvocationID[0];
+ globalInvocationID[1] = SIMD::Int(Extract(localBase, 1)) + localInvocationID[1];
+ globalInvocationID[2] = SIMD::Int(Extract(localBase, 2)) + localInvocationID[2];
routine->localInvocationIndex = localInvocationIndex;
routine->subgroupIndex = subgroupIndex;
- routine->localInvocationID[X] = localInvocationID[X];
- routine->localInvocationID[Y] = localInvocationID[Y];
- routine->localInvocationID[Z] = localInvocationID[Z];
- routine->globalInvocationID[X] = globalInvocationID[X];
- routine->globalInvocationID[Y] = globalInvocationID[Y];
- routine->globalInvocationID[Z] = globalInvocationID[Z];
+ routine->localInvocationID[0] = localInvocationID[0];
+ routine->localInvocationID[1] = localInvocationID[1];
+ routine->localInvocationID[2] = localInvocationID[2];
+ routine->globalInvocationID[0] = globalInvocationID[0];
+ routine->globalInvocationID[1] = globalInvocationID[1];
+ routine->globalInvocationID[2] = globalInvocationID[2];
routine->setInputBuiltin(shader.get(), spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
ASSERT(builtin.SizeInComponents == 1);
@@ -228,14 +207,12 @@
Data data;
data.descriptorSets = descriptorSets;
data.descriptorDynamicOffsets = descriptorDynamicOffsets;
- data.numWorkgroups[X] = groupCountX;
- data.numWorkgroups[Y] = groupCountY;
- data.numWorkgroups[Z] = groupCountZ;
- data.numWorkgroups[3] = 0;
- data.workgroupSize[X] = workgroupSizeX;
- data.workgroupSize[Y] = workgroupSizeY;
- data.workgroupSize[Z] = workgroupSizeZ;
- data.workgroupSize[3] = 0;
+ data.numWorkgroups[0] = groupCountX;
+ data.numWorkgroups[1] = groupCountY;
+ data.numWorkgroups[2] = groupCountZ;
+ data.workgroupSize[0] = workgroupSizeX;
+ data.workgroupSize[1] = workgroupSizeY;
+ data.workgroupSize[2] = workgroupSizeZ;
data.invocationsPerSubgroup = invocationsPerSubgroup;
data.invocationsPerWorkgroup = invocationsPerWorkgroup;
data.subgroupsPerWorkgroup = subgroupsPerWorkgroup;
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp
index 3fc6d97..9af300c 100644
--- a/src/Pipeline/ComputeProgram.hpp
+++ b/src/Pipeline/ComputeProgram.hpp
@@ -72,8 +72,8 @@
{
vk::DescriptorSet::Bindings descriptorSets;
vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets;
- uint4 numWorkgroups; // [x, y, z, 0]
- uint4 workgroupSize; // [x, y, z, 0]
+ uint4 numWorkgroups; // [x, y, z, -]
+ uint4 workgroupSize; // [x, y, z, -]
uint32_t invocationsPerSubgroup; // SPIR-V: "SubgroupSize"
uint32_t subgroupsPerWorkgroup; // SPIR-V: "NumSubgroups"
uint32_t invocationsPerWorkgroup; // Total number of invocations per workgroup.
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index c102bb8..d172fd7 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -1592,22 +1592,22 @@
// are only used when debugging. See b/146486064 for more information.
// Give careful consideration to the runtime performance loss before adding
// more state here.
- std::array<SIMD::Int, 2> windowSpacePosition;
- Int layer; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
+ std::array<SIMD::Int, 2> windowSpacePosition; // TODO(b/236162233): SIMD::Int2
+ Int layer; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
Int instanceID;
SIMD::Int vertexIndex;
- std::array<SIMD::Float, 4> fragCoord;
- std::array<SIMD::Float, 4> pointCoord;
+ std::array<SIMD::Float, 4> fragCoord; // TODO(b/236162233): SIMD::Float4
+ std::array<SIMD::Float, 2> pointCoord; // TODO(b/236162233): SIMD::Float2
SIMD::Int helperInvocation;
- SIMD::Int numWorkgroups;
- SIMD::Int workgroupID;
- SIMD::Int workgroupSize;
+ Int4 numWorkgroups;
+ Int4 workgroupID;
+ Int4 workgroupSize;
Int subgroupsPerWorkgroup;
Int invocationsPerSubgroup;
Int subgroupIndex;
SIMD::Int localInvocationIndex;
- std::array<SIMD::Int, 3> localInvocationID;
- std::array<SIMD::Int, 3> globalInvocationID;
+ std::array<SIMD::Int, 3> localInvocationID; // TODO(b/236162233): SIMD::Int3
+ std::array<SIMD::Int, 3> globalInvocationID; // TODO(b/236162233): SIMD::Int3
Pointer<Byte> dbgState; // Pointer to a debugger state.
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 7886b1f..dd439de 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -1538,6 +1538,9 @@
Int4(const Int &rhs);
Int4(const Reference<Int> &rhs);
+ template<int T>
+ Int4(const SwizzleMask1<Int4, T> &rhs);
+
RValue<Int4> operator=(int broadcast);
RValue<Int4> operator=(RValue<Int4> rhs);
RValue<Int4> operator=(const Int4 &rhs);
@@ -2658,6 +2661,13 @@
}
template<int T>
+Int4::Int4(const SwizzleMask1<Int4, T> &rhs)
+ : XYZW(this)
+{
+ *this = rhs.operator RValue<Int4>();
+}
+
+template<int T>
Float4::Float4(const SwizzleMask1<Float4, T> &rhs)
: XYZW(this)
{
@@ -3078,7 +3088,7 @@
// Calls the static function pointer fptr with the given arguments args.
template<typename Return, typename... CArgs, typename... RArgs>
-inline CToReactorT<Return> Call(Return(fptr)(CArgs...), RArgs &&... args)
+inline CToReactorT<Return> Call(Return(fptr)(CArgs...), RArgs &&...args)
{
return CallHelper<Return(CArgs...)>::Call(fptr, CastToReactor(std::forward<RArgs>(args))...);
}
@@ -3086,7 +3096,7 @@
// Calls the static function pointer fptr with the given arguments args.
// Overload for calling functions with void return type.
template<typename... CArgs, typename... RArgs>
-inline void Call(void(fptr)(CArgs...), RArgs &&... args)
+inline void Call(void(fptr)(CArgs...), RArgs &&...args)
{
CallHelper<void(CArgs...)>::Call(fptr, CastToReactor(std::forward<RArgs>(args))...);
}
@@ -3094,7 +3104,7 @@
// Calls the member function pointer fptr with the given arguments args.
// object can be a Class*, or a Pointer<Byte>.
template<typename Return, typename Class, typename C, typename... CArgs, typename... RArgs>
-inline CToReactorT<Return> Call(Return (Class::*fptr)(CArgs...), C &&object, RArgs &&... args)
+inline CToReactorT<Return> Call(Return (Class::*fptr)(CArgs...), C &&object, RArgs &&...args)
{
using Helper = CallHelper<Return(Class *, void *, CArgs...)>;
using fptrTy = decltype(fptr);
@@ -3118,7 +3128,7 @@
// Overload for calling functions with void return type.
// object can be a Class*, or a Pointer<Byte>.
template<typename Class, typename C, typename... CArgs, typename... RArgs>
-inline void Call(void (Class::*fptr)(CArgs...), C &&object, RArgs &&... args)
+inline void Call(void (Class::*fptr)(CArgs...), C &&object, RArgs &&...args)
{
using Helper = CallHelper<void(Class *, void *, CArgs...)>;
using fptrTy = decltype(fptr);
@@ -3175,7 +3185,7 @@
// Calls the Reactor function pointer fptr with the signature FUNCTION_SIGNATURE and arguments.
// Overload for calling functions with non-void return type.
template<typename FUNCTION_SIGNATURE, typename... RArgs>
-inline CToReactorT<NonVoidFunctionReturnType<FUNCTION_SIGNATURE>> Call(Pointer<Byte> fptr, RArgs &&... args)
+inline CToReactorT<NonVoidFunctionReturnType<FUNCTION_SIGNATURE>> Call(Pointer<Byte> fptr, RArgs &&...args)
{
return CallHelper<FUNCTION_SIGNATURE>::Call(fptr, CastToReactor(std::forward<RArgs>(args))...);
}
@@ -3183,7 +3193,7 @@
// Calls the Reactor function pointer fptr with the signature FUNCTION_SIGNATURE and arguments.
// Overload for calling functions with void return type.
template<typename FUNCTION_SIGNATURE, typename... RArgs>
-inline VoidFunctionReturnType<FUNCTION_SIGNATURE> Call(Pointer<Byte> fptr, RArgs &&... args)
+inline VoidFunctionReturnType<FUNCTION_SIGNATURE> Call(Pointer<Byte> fptr, RArgs &&...args)
{
CallHelper<FUNCTION_SIGNATURE>::Call(fptr, CastToReactor(std::forward<RArgs>(args))...);
}