Fix workgroup builtins size

NumWorkgroups, WorkgroupId, and WorkgroupSize are uniform globals with
three components. While we store them in a 4-component vector, they must
not be stored in a SIMD::Int.

PointCoord was corrected to a 2-component vector (per lane).

The X, Y, and Z enums were replaced by explicit numeric indexes. Also
the loops for multi-component builtins were replaced with explicit
assignment of each component. The asserts for single-component builtins
have been removed since these are checked by spirv-val.

Bug: b/214583550
Change-Id: Iecc81f1f255978e160712fbaa0f069b8e7065705
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/66510
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index f235d8fb..cbdf4ce 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -25,17 +25,6 @@
 
 #include <queue>
 
-namespace {
-
-enum
-{
-	X,
-	Y,
-	Z
-};
-
-}  // anonymous namespace
-
 namespace sw {
 
 ComputeProgram::ComputeProgram(vk::Device *device, std::shared_ptr<SpirvShader> shader, vk::PipelineLayout const *pipelineLayout, const vk::DescriptorSet::Bindings &descriptorSets)
@@ -63,45 +52,36 @@
 
 void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine *routine, Int workgroupID[3])
 {
-	// TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff
-	// they are ever going to be read.
+	// TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff they are ever going to be read.
 	routine->numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
-	routine->workgroupID = Insert(Insert(Insert(Int4(0), workgroupID[X], X), workgroupID[Y], Y), workgroupID[Z], Z);
+	routine->workgroupID = Insert(Insert(Insert(Int4(0), workgroupID[0], 0), workgroupID[1], 1), workgroupID[2], 2);
 	routine->workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
 	routine->subgroupsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, subgroupsPerWorkgroup));
 	routine->invocationsPerSubgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerSubgroup));
 
 	routine->setInputBuiltin(shader.get(), spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
-		for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
-		{
-			value[builtin.FirstComponent + component] =
-			    As<SIMD::Float>(SIMD::Int(Extract(routine->numWorkgroups, component)));
-		}
+		value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(routine->numWorkgroups.x));
+		value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(routine->numWorkgroups.y));
+		value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(routine->numWorkgroups.z));
 	});
 
 	routine->setInputBuiltin(shader.get(), spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
-		for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
-		{
-			value[builtin.FirstComponent + component] =
-			    As<SIMD::Float>(SIMD::Int(workgroupID[component]));
-		}
+		value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(workgroupID[0]));
+		value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(workgroupID[1]));
+		value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(workgroupID[2]));
 	});
 
 	routine->setInputBuiltin(shader.get(), spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
-		for(uint32_t component = 0; component < builtin.SizeInComponents; component++)
-		{
-			value[builtin.FirstComponent + component] =
-			    As<SIMD::Float>(SIMD::Int(Extract(routine->workgroupSize, component)));
-		}
+		value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(routine->workgroupSize.x));
+		value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(routine->workgroupSize.y));
+		value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(routine->workgroupSize.z));
 	});
 
 	routine->setInputBuiltin(shader.get(), spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
-		ASSERT(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine->subgroupsPerWorkgroup));
 	});
 
 	routine->setInputBuiltin(shader.get(), spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
-		ASSERT(builtin.SizeInComponents == 1);
 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine->invocationsPerSubgroup));
 	});
 
@@ -113,35 +93,34 @@
 	Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
 	Int4 workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
 
-	// TODO: Fix Int4 swizzles so we can just use workgroupSize.x, workgroupSize.y.
-	Int workgroupSizeX = Extract(workgroupSize, X);
-	Int workgroupSizeY = Extract(workgroupSize, Y);
+	Int workgroupSizeX = workgroupSize.x;
+	Int workgroupSizeY = workgroupSize.y;
 
 	SIMD::Int localInvocationID[3];
 	{
 		SIMD::Int idx = localInvocationIndex;
-		localInvocationID[Z] = idx / SIMD::Int(workgroupSizeX * workgroupSizeY);
-		idx -= localInvocationID[Z] * SIMD::Int(workgroupSizeX * workgroupSizeY);  // modulo
-		localInvocationID[Y] = idx / SIMD::Int(workgroupSizeX);
-		idx -= localInvocationID[Y] * SIMD::Int(workgroupSizeX);  // modulo
-		localInvocationID[X] = idx;
+		localInvocationID[2] = idx / SIMD::Int(workgroupSizeX * workgroupSizeY);
+		idx -= localInvocationID[2] * SIMD::Int(workgroupSizeX * workgroupSizeY);  // modulo
+		localInvocationID[1] = idx / SIMD::Int(workgroupSizeX);
+		idx -= localInvocationID[1] * SIMD::Int(workgroupSizeX);  // modulo
+		localInvocationID[0] = idx;
 	}
 
-	Int4 wgID = Insert(Insert(Insert(SIMD::Int(0), workgroupID[X], X), workgroupID[Y], Y), workgroupID[Z], Z);
+	Int4 wgID = Insert(Insert(Insert(SIMD::Int(0), workgroupID[0], 0), workgroupID[1], 1), workgroupID[2], 2);
 	auto localBase = workgroupSize * wgID;
 	SIMD::Int globalInvocationID[3];
-	globalInvocationID[X] = SIMD::Int(Extract(localBase, X)) + localInvocationID[X];
-	globalInvocationID[Y] = SIMD::Int(Extract(localBase, Y)) + localInvocationID[Y];
-	globalInvocationID[Z] = SIMD::Int(Extract(localBase, Z)) + localInvocationID[Z];
+	globalInvocationID[0] = SIMD::Int(Extract(localBase, 0)) + localInvocationID[0];
+	globalInvocationID[1] = SIMD::Int(Extract(localBase, 1)) + localInvocationID[1];
+	globalInvocationID[2] = SIMD::Int(Extract(localBase, 2)) + localInvocationID[2];
 
 	routine->localInvocationIndex = localInvocationIndex;
 	routine->subgroupIndex = subgroupIndex;
-	routine->localInvocationID[X] = localInvocationID[X];
-	routine->localInvocationID[Y] = localInvocationID[Y];
-	routine->localInvocationID[Z] = localInvocationID[Z];
-	routine->globalInvocationID[X] = globalInvocationID[X];
-	routine->globalInvocationID[Y] = globalInvocationID[Y];
-	routine->globalInvocationID[Z] = globalInvocationID[Z];
+	routine->localInvocationID[0] = localInvocationID[0];
+	routine->localInvocationID[1] = localInvocationID[1];
+	routine->localInvocationID[2] = localInvocationID[2];
+	routine->globalInvocationID[0] = globalInvocationID[0];
+	routine->globalInvocationID[1] = globalInvocationID[1];
+	routine->globalInvocationID[2] = globalInvocationID[2];
 
 	routine->setInputBuiltin(shader.get(), spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
 		ASSERT(builtin.SizeInComponents == 1);
@@ -228,14 +207,12 @@
 	Data data;
 	data.descriptorSets = descriptorSets;
 	data.descriptorDynamicOffsets = descriptorDynamicOffsets;
-	data.numWorkgroups[X] = groupCountX;
-	data.numWorkgroups[Y] = groupCountY;
-	data.numWorkgroups[Z] = groupCountZ;
-	data.numWorkgroups[3] = 0;
-	data.workgroupSize[X] = workgroupSizeX;
-	data.workgroupSize[Y] = workgroupSizeY;
-	data.workgroupSize[Z] = workgroupSizeZ;
-	data.workgroupSize[3] = 0;
+	data.numWorkgroups[0] = groupCountX;
+	data.numWorkgroups[1] = groupCountY;
+	data.numWorkgroups[2] = groupCountZ;
+	data.workgroupSize[0] = workgroupSizeX;
+	data.workgroupSize[1] = workgroupSizeY;
+	data.workgroupSize[2] = workgroupSizeZ;
 	data.invocationsPerSubgroup = invocationsPerSubgroup;
 	data.invocationsPerWorkgroup = invocationsPerWorkgroup;
 	data.subgroupsPerWorkgroup = subgroupsPerWorkgroup;
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp
index 3fc6d97..9af300c 100644
--- a/src/Pipeline/ComputeProgram.hpp
+++ b/src/Pipeline/ComputeProgram.hpp
@@ -72,8 +72,8 @@
 	{
 		vk::DescriptorSet::Bindings descriptorSets;
 		vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets;
-		uint4 numWorkgroups;               // [x, y, z, 0]
-		uint4 workgroupSize;               // [x, y, z, 0]
+		uint4 numWorkgroups;               // [x, y, z, -]
+		uint4 workgroupSize;               // [x, y, z, -]
 		uint32_t invocationsPerSubgroup;   // SPIR-V: "SubgroupSize"
 		uint32_t subgroupsPerWorkgroup;    // SPIR-V: "NumSubgroups"
 		uint32_t invocationsPerWorkgroup;  // Total number of invocations per workgroup.
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index c102bb8..d172fd7 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -1592,22 +1592,22 @@
 	// are only used when debugging. See b/146486064 for more information.
 	// Give careful consideration to the runtime performance loss before adding
 	// more state here.
-	std::array<SIMD::Int, 2> windowSpacePosition;
-	Int layer;  // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
+	std::array<SIMD::Int, 2> windowSpacePosition;  // TODO(b/236162233): SIMD::Int2
+	Int layer;                                     // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
 	Int instanceID;
 	SIMD::Int vertexIndex;
-	std::array<SIMD::Float, 4> fragCoord;
-	std::array<SIMD::Float, 4> pointCoord;
+	std::array<SIMD::Float, 4> fragCoord;   // TODO(b/236162233): SIMD::Float4
+	std::array<SIMD::Float, 2> pointCoord;  // TODO(b/236162233): SIMD::Float2
 	SIMD::Int helperInvocation;
-	SIMD::Int numWorkgroups;
-	SIMD::Int workgroupID;
-	SIMD::Int workgroupSize;
+	Int4 numWorkgroups;
+	Int4 workgroupID;
+	Int4 workgroupSize;
 	Int subgroupsPerWorkgroup;
 	Int invocationsPerSubgroup;
 	Int subgroupIndex;
 	SIMD::Int localInvocationIndex;
-	std::array<SIMD::Int, 3> localInvocationID;
-	std::array<SIMD::Int, 3> globalInvocationID;
+	std::array<SIMD::Int, 3> localInvocationID;   // TODO(b/236162233): SIMD::Int3
+	std::array<SIMD::Int, 3> globalInvocationID;  // TODO(b/236162233): SIMD::Int3
 
 	Pointer<Byte> dbgState;  // Pointer to a debugger state.
 
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 7886b1f..dd439de 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -1538,6 +1538,9 @@
 	Int4(const Int &rhs);
 	Int4(const Reference<Int> &rhs);
 
+	template<int T>
+	Int4(const SwizzleMask1<Int4, T> &rhs);
+
 	RValue<Int4> operator=(int broadcast);
 	RValue<Int4> operator=(RValue<Int4> rhs);
 	RValue<Int4> operator=(const Int4 &rhs);
@@ -2658,6 +2661,13 @@
 }
 
 template<int T>
+Int4::Int4(const SwizzleMask1<Int4, T> &rhs)
+    : XYZW(this)
+{
+	*this = rhs.operator RValue<Int4>();
+}
+
+template<int T>
 Float4::Float4(const SwizzleMask1<Float4, T> &rhs)
     : XYZW(this)
 {
@@ -3078,7 +3088,7 @@
 
 // Calls the static function pointer fptr with the given arguments args.
 template<typename Return, typename... CArgs, typename... RArgs>
-inline CToReactorT<Return> Call(Return(fptr)(CArgs...), RArgs &&... args)
+inline CToReactorT<Return> Call(Return(fptr)(CArgs...), RArgs &&...args)
 {
 	return CallHelper<Return(CArgs...)>::Call(fptr, CastToReactor(std::forward<RArgs>(args))...);
 }
@@ -3086,7 +3096,7 @@
 // Calls the static function pointer fptr with the given arguments args.
 // Overload for calling functions with void return type.
 template<typename... CArgs, typename... RArgs>
-inline void Call(void(fptr)(CArgs...), RArgs &&... args)
+inline void Call(void(fptr)(CArgs...), RArgs &&...args)
 {
 	CallHelper<void(CArgs...)>::Call(fptr, CastToReactor(std::forward<RArgs>(args))...);
 }
@@ -3094,7 +3104,7 @@
 // Calls the member function pointer fptr with the given arguments args.
 // object can be a Class*, or a Pointer<Byte>.
 template<typename Return, typename Class, typename C, typename... CArgs, typename... RArgs>
-inline CToReactorT<Return> Call(Return (Class::*fptr)(CArgs...), C &&object, RArgs &&... args)
+inline CToReactorT<Return> Call(Return (Class::*fptr)(CArgs...), C &&object, RArgs &&...args)
 {
 	using Helper = CallHelper<Return(Class *, void *, CArgs...)>;
 	using fptrTy = decltype(fptr);
@@ -3118,7 +3128,7 @@
 // Overload for calling functions with void return type.
 // object can be a Class*, or a Pointer<Byte>.
 template<typename Class, typename C, typename... CArgs, typename... RArgs>
-inline void Call(void (Class::*fptr)(CArgs...), C &&object, RArgs &&... args)
+inline void Call(void (Class::*fptr)(CArgs...), C &&object, RArgs &&...args)
 {
 	using Helper = CallHelper<void(Class *, void *, CArgs...)>;
 	using fptrTy = decltype(fptr);
@@ -3175,7 +3185,7 @@
 // Calls the Reactor function pointer fptr with the signature FUNCTION_SIGNATURE and arguments.
 // Overload for calling functions with non-void return type.
 template<typename FUNCTION_SIGNATURE, typename... RArgs>
-inline CToReactorT<NonVoidFunctionReturnType<FUNCTION_SIGNATURE>> Call(Pointer<Byte> fptr, RArgs &&... args)
+inline CToReactorT<NonVoidFunctionReturnType<FUNCTION_SIGNATURE>> Call(Pointer<Byte> fptr, RArgs &&...args)
 {
 	return CallHelper<FUNCTION_SIGNATURE>::Call(fptr, CastToReactor(std::forward<RArgs>(args))...);
 }
@@ -3183,7 +3193,7 @@
 // Calls the Reactor function pointer fptr with the signature FUNCTION_SIGNATURE and arguments.
 // Overload for calling functions with void return type.
 template<typename FUNCTION_SIGNATURE, typename... RArgs>
-inline VoidFunctionReturnType<FUNCTION_SIGNATURE> Call(Pointer<Byte> fptr, RArgs &&... args)
+inline VoidFunctionReturnType<FUNCTION_SIGNATURE> Call(Pointer<Byte> fptr, RArgs &&...args)
 {
 	CallHelper<FUNCTION_SIGNATURE>::Call(fptr, CastToReactor(std::forward<RArgs>(args))...);
 }