Implement shaderSubgroupBroadcastDynamicId

OpGroupNonUniformBroadcast can be supplied either a constant value or an
intermediate. I've split this Op into a fast path for constants that use
the original code, and a slow path that handles intermediates.

Tests: dEQP-VK.subgroups.ballot_broadcast.*
Bug: b/169608683
Change-Id: Idc74f3fe7e906315c59cf5b3bf3a450046e37375
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/50671
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Sean Risser <srisser@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShaderGroup.cpp b/src/Pipeline/SpirvShaderGroup.cpp
index 51db930..19e997f 100644
--- a/src/Pipeline/SpirvShaderGroup.cpp
+++ b/src/Pipeline/SpirvShaderGroup.cpp
@@ -139,12 +139,39 @@
 		case spv::OpGroupNonUniformBroadcast:
 		{
 			auto valueId = Object::ID(insn.word(4));
-			auto id = SIMD::Int(GetConstScalarInt(insn.word(5)));
+			auto idId = Object::ID(insn.word(5));
 			Operand value(this, state, valueId);
-			auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
-			for(auto i = 0u; i < type.componentCount; i++)
+
+			// Decide between the fast path for constants and the slow path for
+			// intermediates.
+			if(getObject(idId).kind == SpirvShader::Object::Kind::Constant)
 			{
-				dst.move(i, OrAll(value.Int(i) & mask));
+				auto id = SIMD::Int(GetConstScalarInt(insn.word(5)));
+				auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
+				for(auto i = 0u; i < type.componentCount; i++)
+				{
+					dst.move(i, OrAll(value.Int(i) & mask));
+				}
+			}
+			else
+			{
+				Operand id(this, state, idId);
+
+				SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask());  // Considers helper invocations active. See b/151137030
+				SIMD::UInt inactive = ~active;
+				SIMD::UInt filled = id.UInt(0) & active;
+
+				for(int j = 0; j < SIMD::Width - 1; j++)
+				{
+					filled |= filled.yzwx & inactive;  // Populate inactive 'holes' with a live value
+				}
+
+				auto mask = CmpEQ(filled, SIMD::UInt(0, 1, 2, 3));
+
+				for(uint32_t i = 0u; i < type.componentCount; i++)
+				{
+					dst.move(i, OrAll(value.UInt(i) & mask));
+				}
 			}
 			break;
 		}
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 225adf7..9ddc243 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -271,7 +271,7 @@
 	features->vulkanMemoryModelAvailabilityVisibilityChains = VK_FALSE;
 	features->shaderOutputViewportIndex = VK_FALSE;
 	features->shaderOutputLayer = VK_FALSE;
-	features->subgroupBroadcastDynamicId = VK_FALSE;
+	features->subgroupBroadcastDynamicId = VK_TRUE;
 }
 
 void PhysicalDevice::getFeatures2(VkPhysicalDeviceFeatures2 *features) const