SpirvShader: Implement GroupNonUniformVote capability

Implements the following opcodes:
• OpGroupNonUniformAll
• OpGroupNonUniformAny
• OpGroupNonUniformAllEqual

Bug: b/133510501
Tests: dEQP-VK.subgroups.*
Change-Id: Ic2ff8d99bfc7d21d7dc356eae170a95f1d016fcc
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/35068
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index c15cd79..0ffbc12 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -45,6 +45,20 @@
 		return rr::SignMask(~ints) != 0;
 	}
 
+	rr::RValue<sw::SIMD::UInt> AllTrueMask(rr::RValue<sw::SIMD::UInt> const &mask)
+	{
+		sw::SIMD::UInt v1 = mask;              // [x]    [y]    [z]    [w]
+		sw::SIMD::UInt v2 = v1.xzxz & v1.ywyw; // [xy]   [zw]   [xy]   [zw]
+		return v2.xxxx & v2.yyyy;              // [xyzw] [xyzw] [xyzw] [xyzw]
+	}
+
+	rr::RValue<sw::SIMD::UInt> AnyTrueMask(rr::RValue<sw::SIMD::UInt> const &mask)
+	{
+		sw::SIMD::UInt v1 = mask;              // [x]    [y]    [z]    [w]
+		sw::SIMD::UInt v2 = v1.xzxz | v1.ywyw; // [xy]   [zw]   [xy]   [zw]
+		return v2.xxxx | v2.yyyy;              // [xyzw] [xyzw] [xyzw] [xyzw]
+	}
+
 	rr::RValue<sw::SIMD::Float> Sign(rr::RValue<sw::SIMD::Float> const &val)
 	{
 		return rr::As<sw::SIMD::Float>((rr::As<sw::SIMD::UInt>(val) & sw::SIMD::UInt(0x80000000)) | sw::SIMD::UInt(0x3f800000));
@@ -827,6 +841,7 @@
 				case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
 				case spv::CapabilityMultiView: capabilities.MultiView = true; break;
 				case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
+				case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
 				case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
 				default:
 					UNSUPPORTED("Unsupported capability %u", insn.word(1));
@@ -1069,6 +1084,9 @@
 			case spv::OpImageRead:
 			case spv::OpImageTexelPointer:
 			case spv::OpGroupNonUniformElect:
+			case spv::OpGroupNonUniformAll:
+			case spv::OpGroupNonUniformAny:
+			case spv::OpGroupNonUniformAllEqual:
 			case spv::OpGroupNonUniformBroadcast:
 			case spv::OpGroupNonUniformBroadcastFirst:
 			case spv::OpGroupNonUniformBallot:
@@ -2723,6 +2741,9 @@
 			return EmitMemoryBarrier(insn, state);
 
 		case spv::OpGroupNonUniformElect:
+		case spv::OpGroupNonUniformAll:
+		case spv::OpGroupNonUniformAny:
+		case spv::OpGroupNonUniformAllEqual:
 		case spv::OpGroupNonUniformBroadcast:
 		case spv::OpGroupNonUniformBroadcastFirst:
 		case spv::OpGroupNonUniformBallot:
@@ -5983,6 +6004,40 @@
 			break;
 		}
 
+		case spv::OpGroupNonUniformAll:
+		{
+			GenericValue predicate(this, state, insn.word(4));
+			dst.move(0, AllTrueMask(predicate.UInt(0) | ~As<SIMD::UInt>(state->activeLaneMask())));
+			break;
+		}
+
+		case spv::OpGroupNonUniformAny:
+		{
+			GenericValue predicate(this, state, insn.word(4));
+			dst.move(0, AnyTrueMask(predicate.UInt(0) & As<SIMD::UInt>(state->activeLaneMask())));
+			break;
+		}
+
+		case spv::OpGroupNonUniformAllEqual:
+		{
+			GenericValue value(this, state, insn.word(4));
+			auto res = SIMD::UInt(0xffffffff);
+			SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask());
+			SIMD::UInt inactive = ~active;
+			for (auto i = 0u; i < type.sizeInComponents; i++)
+			{
+				SIMD::UInt v = value.UInt(i) & active;
+				SIMD::UInt filled = v;
+				for (int j = 0; j < SIMD::Width - 1; j++)
+				{
+					filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value
+				}
+				res &= AllTrueMask(CmpEQ(filled.xyzw, filled.yzwx));
+			}
+			dst.move(0, res);
+			break;
+		}
+
 		case spv::OpGroupNonUniformBroadcast:
 		{
 			auto valueId = Object::ID(insn.word(4));
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index c3071bc..ea3f0ce 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -737,6 +737,7 @@
 			bool GroupNonUniform : 1;
 			bool MultiView : 1;
 			bool DeviceGroup : 1;
+			bool GroupNonUniformVote : 1;
 			bool GroupNonUniformBallot : 1;
 		};
 
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index f38ff36..2a44d8e 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -313,7 +313,7 @@
 {
 	properties->subgroupSize = sw::SIMD::Width;
 	properties->supportedStages = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT;
-	properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT;
+	properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT;
 	properties->quadOperationsInAllStages = VK_FALSE;
 }