SpirvShader: Implement workgroup storage.
I couldn't find any tests that did not rely on another feature being implemented.
Bug: b/131667866
Change-Id: Ic872c06865b9c7a2bafffbe733f79718ba1505e5
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30209
Presubmit-Ready: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index f0c7d61..d02f340 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -51,6 +51,7 @@
routine.descriptorDynamicOffsets = data + OFFSET(Data, descriptorDynamicOffsets);
routine.pushConstants = data + OFFSET(Data, pushConstants);
routine.constants = *Pointer<Pointer<Byte>>(data + OFFSET(Data, constants));
+ routine.workgroupMemory = *Pointer<Pointer<Byte>>(data + OFFSET(Data, workgroupMemory));
auto &modes = shader->getModes();
@@ -175,7 +176,7 @@
}
void ComputeProgram::run(
- Routine *routine,
+ Routine *routine, SpirvShader const *shader,
vk::DescriptorSet::Bindings const &descriptorSets,
vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
PushConstantStorage const &pushConstants,
@@ -183,6 +184,11 @@
{
auto runWorkgroup = (void(*)(void*))(routine->getEntry());
+ // We're sharing a buffer here across all workgroups.
+ // We can only do this because we know workgroups are executed
+ // serially.
+ std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
+
Data data;
data.descriptorSets = descriptorSets;
data.descriptorDynamicOffsets = descriptorDynamicOffsets;
@@ -192,6 +198,7 @@
data.numWorkgroups[3] = 0;
data.pushConstants = pushConstants;
data.constants = &sw::constants;
+ data.workgroupMemory = workgroupMemory.data();
// TODO(bclayton): Split work across threads.
for (uint32_t groupZ = 0; groupZ < groupCountZ; groupZ++)
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp
index 61892b4..59a2315 100644
--- a/src/Pipeline/ComputeProgram.hpp
+++ b/src/Pipeline/ComputeProgram.hpp
@@ -50,7 +50,7 @@
// run executes the compute shader routine for all workgroups.
// TODO(bclayton): This probably does not belong here. Consider moving.
static void run(
- Routine *routine,
+ Routine *routine, SpirvShader const *shader,
vk::DescriptorSet::Bindings const &descriptorSetBindings,
vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
PushConstantStorage const &pushConstants,
@@ -71,6 +71,7 @@
uint4 workgroupID;
PushConstantStorage pushConstants;
const Constants *constants;
+ uint8_t* workgroupMemory;
};
SpirvRoutine routine;
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index b3baaba..2008e8e 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -540,6 +540,7 @@
object.definition = insn;
object.type = typeId;
+ ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
ASSERT(getType(typeId).storageClass == storageClass);
switch (storageClass)
@@ -561,6 +562,13 @@
break; // Correctly handled.
case spv::StorageClassWorkgroup:
+ {
+ auto &elTy = getType(getType(typeId).element);
+ auto sizeInBytes = elTy.sizeInComponents * sizeof(float);
+ workgroupMemory.allocate(resultId, sizeInBytes);
+ object.kind = Object::Kind::Pointer;
+ break;
+ }
case spv::StorageClassAtomicCounter:
case spv::StorageClassImage:
UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
@@ -1147,6 +1155,7 @@
case spv::StorageClassUniform:
case spv::StorageClassStorageBuffer:
case spv::StorageClassPushConstant:
+ case spv::StorageClassWorkgroup:
return false;
default:
return true;
@@ -2479,6 +2488,14 @@
routine->createPointer(resultId, SIMD::Pointer(base, size));
break;
}
+ case spv::StorageClassWorkgroup:
+ {
+ ASSERT(objectTy.opcode() == spv::OpTypePointer);
+ auto base = &routine->workgroupMemory[0];
+ auto size = workgroupMemory.size();
+ routine->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
+ break;
+ }
case spv::StorageClassInput:
{
if (object.kind == Object::Kind::InterfaceVariable)
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index de4c03b..9b2b045 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -656,6 +656,30 @@
uint32_t SizeInComponents;
};
+ struct WorkgroupMemory
+ {
+ // allocates a new variable of size bytes with the given identifier.
+ inline void allocate(Object::ID id, uint32_t size)
+ {
+ uint32_t offset = totalSize;
+ auto it = offsets.emplace(id, offset);
+ ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value()));
+ totalSize += size;
+ }
+ // returns the byte offset of the variable with the given identifier.
+ inline uint32_t offsetOf(Object::ID id) const
+ {
+ auto it = offsets.find(id);
+ ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value()));
+ return it->second;
+ }
+ // returns the total allocated size in bytes.
+ inline uint32_t size() const { return totalSize; }
+ private:
+ uint32_t totalSize = 0; // in bytes
+ std::unordered_map<Object::ID, uint32_t> offsets; // in bytes
+ };
+
std::vector<InterfaceComponent> inputs;
std::vector<InterfaceComponent> outputs;
@@ -666,6 +690,7 @@
using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins;
std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins;
+ WorkgroupMemory workgroupMemory;
Type const &getType(Type::ID id) const
{
@@ -956,6 +981,7 @@
Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};
+ Pointer<Byte> workgroupMemory;
Pointer<Pointer<Byte>> descriptorSets;
Pointer<Int> descriptorDynamicOffsets;
Pointer<Byte> pushConstants;
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index dc65704..3d0a4ac 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -568,7 +568,8 @@
{
ASSERT_OR_RETURN(routine != nullptr);
sw::ComputeProgram::run(
- routine, descriptorSets, descriptorDynamicOffsets, pushConstants,
+ routine, shader,
+ descriptorSets, descriptorDynamicOffsets, pushConstants,
groupCountX, groupCountY, groupCountZ);
}