SpirvShader: Implement workgroup storage.

I couldn't find any tests that did not rely on another feature being implemented.

Bug: b/131667866
Change-Id: Ic872c06865b9c7a2bafffbe733f79718ba1505e5
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30209
Presubmit-Ready: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index f0c7d61..d02f340 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -51,6 +51,7 @@
 		routine.descriptorDynamicOffsets = data + OFFSET(Data, descriptorDynamicOffsets);
 		routine.pushConstants = data + OFFSET(Data, pushConstants);
 		routine.constants = *Pointer<Pointer<Byte>>(data + OFFSET(Data, constants));
+		routine.workgroupMemory = *Pointer<Pointer<Byte>>(data + OFFSET(Data, workgroupMemory));
 
 		auto &modes = shader->getModes();
 
@@ -175,7 +176,7 @@
 	}
 
 	void ComputeProgram::run(
-		Routine *routine,
+		Routine *routine, SpirvShader const *shader,
 		vk::DescriptorSet::Bindings const &descriptorSets,
 		vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
 		PushConstantStorage const &pushConstants,
@@ -183,6 +184,11 @@
 	{
 		auto runWorkgroup = (void(*)(void*))(routine->getEntry());
 
+		// We're sharing a buffer here across all workgroups.
+		// We can only do this because we know workgroups are executed
+		// serially.
+		std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
+
 		Data data;
 		data.descriptorSets = descriptorSets;
 		data.descriptorDynamicOffsets = descriptorDynamicOffsets;
@@ -192,6 +198,7 @@
 		data.numWorkgroups[3] = 0;
 		data.pushConstants = pushConstants;
 		data.constants = &sw::constants;
+		data.workgroupMemory = workgroupMemory.data();
 
 		// TODO(bclayton): Split work across threads.
 		for (uint32_t groupZ = 0; groupZ < groupCountZ; groupZ++)
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp
index 61892b4..59a2315 100644
--- a/src/Pipeline/ComputeProgram.hpp
+++ b/src/Pipeline/ComputeProgram.hpp
@@ -50,7 +50,7 @@
 		// run executes the compute shader routine for all workgroups.
 		// TODO(bclayton): This probably does not belong here. Consider moving.
 		static void run(
-			Routine *routine,
+			Routine *routine, SpirvShader const *shader,
 			vk::DescriptorSet::Bindings const &descriptorSetBindings,
 			vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
 			PushConstantStorage const &pushConstants,
@@ -71,6 +71,7 @@
 			uint4 workgroupID;
 			PushConstantStorage pushConstants;
 			const Constants *constants;
+			uint8_t* workgroupMemory;
 		};
 
 		SpirvRoutine routine;
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index b3baaba..2008e8e 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -540,6 +540,7 @@
 				object.definition = insn;
 				object.type = typeId;
 
+				ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
 				ASSERT(getType(typeId).storageClass == storageClass);
 
 				switch (storageClass)
@@ -561,6 +562,13 @@
 					break; // Correctly handled.
 
 				case spv::StorageClassWorkgroup:
+				{
+					auto &elTy = getType(getType(typeId).element);
+					auto sizeInBytes = elTy.sizeInComponents * sizeof(float);
+					workgroupMemory.allocate(resultId, sizeInBytes);
+					object.kind = Object::Kind::Pointer;
+					break;
+				}
 				case spv::StorageClassAtomicCounter:
 				case spv::StorageClassImage:
 					UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
@@ -1147,6 +1155,7 @@
 		case spv::StorageClassUniform:
 		case spv::StorageClassStorageBuffer:
 		case spv::StorageClassPushConstant:
+		case spv::StorageClassWorkgroup:
 			return false;
 		default:
 			return true;
@@ -2479,6 +2488,14 @@
 			routine->createPointer(resultId, SIMD::Pointer(base, size));
 			break;
 		}
+		case spv::StorageClassWorkgroup:
+		{
+			ASSERT(objectTy.opcode() == spv::OpTypePointer);
+			auto base = &routine->workgroupMemory[0];
+			auto size = workgroupMemory.size();
+			routine->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
+			break;
+		}
 		case spv::StorageClassInput:
 		{
 			if (object.kind == Object::Kind::InterfaceVariable)
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index de4c03b..9b2b045 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -656,6 +656,30 @@
 			uint32_t SizeInComponents;
 		};
 
+		struct WorkgroupMemory
+		{
+			// allocates a new variable of size bytes with the given identifier.
+			inline void allocate(Object::ID id, uint32_t size)
+			{
+				uint32_t offset = totalSize;
+				auto it = offsets.emplace(id, offset);
+				ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value()));
+				totalSize += size;
+			}
+			// returns the byte offset of the variable with the given identifier.
+			inline uint32_t offsetOf(Object::ID id) const
+			{
+				auto it = offsets.find(id);
+				ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value()));
+				return it->second;
+			}
+			// returns the total allocated size in bytes.
+			inline uint32_t size() const { return totalSize; }
+		private:
+			uint32_t totalSize = 0; // in bytes
+			std::unordered_map<Object::ID, uint32_t> offsets; // in bytes
+		};
+
 		std::vector<InterfaceComponent> inputs;
 		std::vector<InterfaceComponent> outputs;
 
@@ -666,6 +690,7 @@
 		using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
 		std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins;
 		std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins;
+		WorkgroupMemory workgroupMemory;
 
 		Type const &getType(Type::ID id) const
 		{
@@ -956,6 +981,7 @@
 		Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
 		Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};
 
+		Pointer<Byte> workgroupMemory;
 		Pointer<Pointer<Byte>> descriptorSets;
 		Pointer<Int> descriptorDynamicOffsets;
 		Pointer<Byte> pushConstants;
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index dc65704..3d0a4ac 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -568,7 +568,8 @@
 {
 	ASSERT_OR_RETURN(routine != nullptr);
 	sw::ComputeProgram::run(
-		routine, descriptorSets, descriptorDynamicOffsets, pushConstants,
+		routine, shader,
+		descriptorSets, descriptorDynamicOffsets, pushConstants,
 		groupCountX, groupCountY, groupCountZ);
 }