Vulkan: Very rough first implementation of compute shaders
This is by no means finished:
* ComputeProgram holds the codegen and the invoke logic. ComputeProgram::run() should probably be moved somewhere else (maybe Renderer?).
* ComputeProgram::run() is currently fully single threaded.
* The compute routines are currently not cached.
With that said, this is enough to start passing a whole bunch of dEQP compute tests.
Test: *.compute.*
Bug: b/126871859
Change-Id: Ic6a76826b2fec0d34d54e0bed564b360ea0610c0
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/26551
Presubmit-Ready: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/build/Visual Studio 15 2017 Win64/libvk_swiftshader.vcxproj b/build/Visual Studio 15 2017 Win64/libvk_swiftshader.vcxproj
index 9052ac6..707b7d6 100644
--- a/build/Visual Studio 15 2017 Win64/libvk_swiftshader.vcxproj
+++ b/build/Visual Studio 15 2017 Win64/libvk_swiftshader.vcxproj
@@ -300,6 +300,7 @@
<ClInclude Include="$(SolutionDir)src\Device\Vector.hpp" />
<ClInclude Include="$(SolutionDir)src\Device\Vertex.hpp" />
<ClInclude Include="$(SolutionDir)src\Device\VertexProcessor.hpp" />
+ <ClCompile Include="$(SolutionDir)src\Pipeline\ComputeProgram.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\Constants.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\PixelProgram.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\PixelRoutine.cpp" />
@@ -309,6 +310,7 @@
<ClCompile Include="$(SolutionDir)src\Pipeline\SpirvShader.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\VertexProgram.cpp" />
<ClCompile Include="$(SolutionDir)src\Pipeline\VertexRoutine.cpp" />
+ <ClInclude Include="$(SolutionDir)src\Pipeline\ComputeProgram.hpp" />
<ClInclude Include="$(SolutionDir)src\Pipeline\Constants.hpp" />
<ClInclude Include="$(SolutionDir)src\Pipeline\PixelProgram.hpp" />
<ClInclude Include="$(SolutionDir)src\Pipeline\PixelRoutine.hpp" />
diff --git a/build/Visual Studio 15 2017 Win64/libvk_swiftshader.vcxproj.filters b/build/Visual Studio 15 2017 Win64/libvk_swiftshader.vcxproj.filters
index 77e70da..63ccc64 100644
--- a/build/Visual Studio 15 2017 Win64/libvk_swiftshader.vcxproj.filters
+++ b/build/Visual Studio 15 2017 Win64/libvk_swiftshader.vcxproj.filters
@@ -157,6 +157,9 @@
<ClCompile Include="$(SolutionDir)src\Device\VertexProcessor.cpp">
<Filter>src\Device</Filter>
</ClCompile>
+ <ClCompile Include="$(SolutionDir)src\Pipeline\ComputeProgram.cpp">
+ <Filter>src\Pipeline</Filter>
+ </ClCompile>
<ClCompile Include="$(SolutionDir)src\Pipeline\Constants.cpp">
<Filter>src\Pipeline</Filter>
</ClCompile>
@@ -390,6 +393,9 @@
<ClInclude Include="$(SolutionDir)src\Device\VertexProcessor.hpp">
<Filter>src\Device</Filter>
</ClInclude>
+ <ClInclude Include="$(SolutionDir)src\Pipeline\ComputeProgram.hpp">
+ <Filter>src\Pipeline</Filter>
+ </ClInclude>
<ClInclude Include="$(SolutionDir)src\Pipeline\Constants.hpp">
<Filter>src\Pipeline</Filter>
</ClInclude>
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
new file mode 100644
index 0000000..2892ed7
--- /dev/null
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -0,0 +1,168 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ComputeProgram.hpp"
+
+#include "Vulkan/VkDebug.hpp"
+#include "Vulkan/VkPipelineLayout.hpp"
+
+namespace sw
+{
+ ComputeProgram::ComputeProgram(SpirvShader const *shader, vk::PipelineLayout const *pipelineLayout)
+ : data(Arg<0>()),
+ routine(pipelineLayout),
+ shader(shader),
+ pipelineLayout(pipelineLayout)
+ {
+ }
+
+ ComputeProgram::~ComputeProgram()
+ {
+ }
+
+ void ComputeProgram::generate()
+ {
+ shader->emitProlog(&routine);
+ emit();
+ shader->emitEpilog(&routine);
+ }
+
+ void ComputeProgram::emit()
+ {
+ Pointer<Pointer<Byte>> descriptorSetsIn = *Pointer<Pointer<Pointer<Byte>>>(data + OFFSET(Data, descriptorSets));
+ size_t numDescriptorSets = routine.pipelineLayout->getNumDescriptorSets();
+ for(unsigned int i = 0; i < numDescriptorSets; i++)
+ {
+ routine.descriptorSets[i] = descriptorSetsIn[i];
+ }
+
+ auto &modes = shader->getModes();
+
+ Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
+ Int4 workgroupID = *Pointer<Int4>(data + OFFSET(Data, workgroupID));
+ Int4 workgroupSize = Int4(modes.LocalSizeX, modes.LocalSizeY, modes.LocalSizeZ, 0);
+
+ setInputBuiltin(spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
+ {
+ for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
+ {
+ value[builtin.FirstComponent + component] =
+ As<Float4>(Int4(Extract(numWorkgroups, component)));
+ }
+ });
+
+ setInputBuiltin(spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
+ {
+ for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
+ {
+ value[builtin.FirstComponent + component] =
+ As<Float4>(Int4(Extract(workgroupSize, component)));
+ }
+ });
+
+ // Total number of invocations required to execute this workgroup.
+ const int numInvocations = modes.LocalSizeX * modes.LocalSizeY * modes.LocalSizeZ;
+
+ enum { XXXX, YYYY, ZZZZ };
+
+ For(Int invocationIndex = 0, invocationIndex < numInvocations, invocationIndex += SIMD::Width)
+ {
+ Int4 localInvocationIndex = Int4(invocationIndex) + Int4(0, 1, 2, 3);
+
+ Int4 localInvocationID[3];
+ {
+ Int4 idx = localInvocationIndex;
+ localInvocationID[ZZZZ] = idx / Int4(modes.LocalSizeX * modes.LocalSizeY);
+ idx -= localInvocationID[ZZZZ] * Int4(modes.LocalSizeX * modes.LocalSizeY); // modulo
+ localInvocationID[YYYY] = idx / Int4(modes.LocalSizeX);
+ idx -= localInvocationID[YYYY] * Int4(modes.LocalSizeX); // modulo
+ localInvocationID[XXXX] = idx;
+ }
+
+ setInputBuiltin(spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
+ {
+ ASSERT(builtin.SizeInComponents == 1);
+ value[builtin.FirstComponent] = As<Float4>(localInvocationIndex);
+ });
+
+ setInputBuiltin(spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
+ {
+ for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
+ {
+ value[builtin.FirstComponent + component] = As<Float4>(localInvocationID[component]);
+ }
+ });
+
+ setInputBuiltin(spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)
+ {
+ for (uint32_t component = 0; component < builtin.SizeInComponents; component++)
+ {
+ Int4 globalInvocationID =
+ Int4(Extract(workgroupID, component)) *
+ Int4(Extract(workgroupSize, component)) +
+ localInvocationID[component];
+ value[builtin.FirstComponent + component] = As<Float4>(globalInvocationID);
+ // RR_WATCH(component, globalInvocationID);
+ }
+ });
+
+ // TODO(bclayton): Disable lanes where (invocationIDs >= numInvocations)
+ // Int4 enabledLanes = invocationIDs < Int4(numInvocations);
+
+ // Process numLanes of the workgroup.
+ shader->emit(&routine);
+ }
+ }
+
+ void ComputeProgram::setInputBuiltin(spv::BuiltIn id, std::function<void(const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)> cb)
+ {
+ auto it = shader->inputBuiltins.find(id);
+ if (it != shader->inputBuiltins.end())
+ {
+ const auto& builtin = it->second;
+ auto &value = routine.getValue(builtin.Id);
+ cb(builtin, value);
+ }
+ }
+
+ void ComputeProgram::run(
+ Routine *routine,
+ uint32_t numDescriptorSets, void** descriptorSets,
+ uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
+ {
+ auto runWorkgroup = (void(*)(void*))(routine->getEntry());
+
+ Data data;
+ data.descriptorSets = descriptorSets;
+ data.numWorkgroups[0] = groupCountX;
+ data.numWorkgroups[1] = groupCountY;
+ data.numWorkgroups[2] = groupCountZ;
+ data.numWorkgroups[3] = 0;
+
+ // TODO(bclayton): Split work across threads.
+ for (uint32_t groupZ = 0; groupZ < groupCountZ; groupZ++)
+ {
+ data.workgroupID[2] = groupZ;
+ for (uint32_t groupY = 0; groupY < groupCountY; groupY++)
+ {
+ data.workgroupID[1] = groupY;
+ for (uint32_t groupX = 0; groupX < groupCountX; groupX++)
+ {
+ data.workgroupID[0] = groupX;
+ runWorkgroup(&data);
+ }
+ }
+ }
+ }
+}
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp
new file mode 100644
index 0000000..d547a91
--- /dev/null
+++ b/src/Pipeline/ComputeProgram.hpp
@@ -0,0 +1,75 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef sw_ComputeProgram_hpp
+#define sw_ComputeProgram_hpp
+
+#include "SpirvShader.hpp"
+
+#include "Reactor/Reactor.hpp"
+
+#include <functional>
+
+namespace vk
+{
+ class PipelineLayout;
+} // namespace vk
+
+namespace sw
+{
+
+ using namespace rr;
+
+ class DescriptorSetsLayout;
+
+ // ComputeProgram builds a SPIR-V compute shader.
+ class ComputeProgram : public Function<Void(Pointer<Byte>)>
+ {
+ public:
+ ComputeProgram(SpirvShader const *spirvShader, vk::PipelineLayout const *pipelineLayout);
+
+ virtual ~ComputeProgram();
+
+ // generate builds the shader program.
+ void generate();
+
+ // run executes the compute shader routine for all workgroups.
+ // TODO(bclayton): This probably does not belong here. Consider moving.
+ static void run(
+ Routine *routine,
+ uint32_t numDescriptorSets, void** descriptorSets,
+ uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
+
+ protected:
+ void emit();
+
+ void setInputBuiltin(spv::BuiltIn id, std::function<void(const SpirvShader::BuiltinMapping& builtin, Array<Float4>& value)> cb);
+
+ Pointer<Byte> data; // argument 0
+
+ struct Data
+ {
+ void** descriptorSets;
+ uint4 numWorkgroups;
+ uint4 workgroupID;
+ };
+
+ SpirvRoutine routine;
+ SpirvShader const * const shader;
+ vk::PipelineLayout const * const pipelineLayout;
+ };
+
+} // namespace sw
+
+#endif // sw_ComputeProgram_hpp
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index 9634c10..c80483d 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -19,6 +19,7 @@
#include "VkImage.hpp"
#include "VkImageView.hpp"
#include "VkPipeline.hpp"
+#include "VkPipelineLayout.hpp"
#include "VkRenderPass.hpp"
#include "Device/Renderer.hpp"
@@ -123,6 +124,30 @@
VkPipeline pipeline;
};
+class Dispatch : public CommandBuffer::Command
+{
+public:
+ Dispatch(uint32_t pGroupCountX, uint32_t pGroupCountY, uint32_t pGroupCountZ) :
+ groupCountX(pGroupCountX), groupCountY(pGroupCountY), groupCountZ(pGroupCountZ)
+ {
+ }
+
+protected:
+ void play(CommandBuffer::ExecutionState& executionState) override
+ {
+ ComputePipeline* pipeline = static_cast<ComputePipeline*>(
+ executionState.pipelines[VK_PIPELINE_BIND_POINT_COMPUTE]);
+ pipeline->run(groupCountX, groupCountY, groupCountZ,
+ MAX_BOUND_DESCRIPTOR_SETS,
+ executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_COMPUTE]);
+ }
+
+private:
+ uint32_t groupCountX;
+ uint32_t groupCountY;
+ uint32_t groupCountZ;
+};
+
struct VertexBufferBind : public CommandBuffer::Command
{
VertexBufferBind(uint32_t pBinding, const VkBuffer pBuffer, const VkDeviceSize pOffset) :
@@ -666,12 +691,15 @@
void CommandBuffer::bindPipeline(VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline)
{
- if(pipelineBindPoint != VK_PIPELINE_BIND_POINT_GRAPHICS)
+ switch(pipelineBindPoint)
{
- UNIMPLEMENTED();
+ case VK_PIPELINE_BIND_POINT_COMPUTE:
+ case VK_PIPELINE_BIND_POINT_GRAPHICS:
+ addCommand<PipelineBind>(pipelineBindPoint, pipeline);
+ break;
+ default:
+ UNIMPLEMENTED();
}
-
- addCommand<PipelineBind>(pipelineBindPoint, pipeline);
}
void CommandBuffer::bindVertexBuffers(uint32_t firstBinding, uint32_t bindingCount,
@@ -822,7 +850,7 @@
void CommandBuffer::dispatch(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
{
- UNIMPLEMENTED();
+ addCommand<Dispatch>(groupCountX, groupCountY, groupCountZ);
}
void CommandBuffer::dispatchIndirect(VkBuffer buffer, VkDeviceSize offset)
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index 3e512a5..0b5f0c8 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -15,6 +15,7 @@
#include "VkPipeline.hpp"
#include "VkPipelineLayout.hpp"
#include "VkShaderModule.hpp"
+#include "Pipeline/ComputeProgram.hpp"
#include "Pipeline/SpirvShader.hpp"
#include "spirv-tools/optimizer.hpp"
@@ -538,6 +539,7 @@
void ComputePipeline::destroyPipeline(const VkAllocationCallbacks* pAllocator)
{
+ delete shader;
}
size_t ComputePipeline::ComputeRequiredAllocationSize(const VkComputePipelineCreateInfo* pCreateInfo)
@@ -545,4 +547,35 @@
return 0;
}
+void ComputePipeline::compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo)
+{
+ auto module = Cast(pCreateInfo->stage.module);
+
+ auto code = preprocessSpirv(module->getCode(), pCreateInfo->stage.pSpecializationInfo);
+
+ ASSERT_OR_RETURN(code.size() > 0);
+
+ ASSERT(shader == nullptr);
+
+ // FIXME (b/119409619): use allocator.
+ shader = new sw::SpirvShader(code);
+
+ sw::ComputeProgram program(shader, layout);
+
+ program.generate();
+
+ // TODO(bclayton): Cache program
+ routine = program("ComputeRoutine");
+}
+
+void ComputePipeline::run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
+ size_t numDescriptorSets, VkDescriptorSet *descriptorSets)
+{
+ ASSERT_OR_RETURN(routine != nullptr);
+ sw::ComputeProgram::run(
+ routine,
+ numDescriptorSets, reinterpret_cast<void**>(descriptorSets),
+ groupCountX, groupCountY, groupCountZ);
+}
+
} // namespace vk
diff --git a/src/Vulkan/VkPipeline.hpp b/src/Vulkan/VkPipeline.hpp
index 7b4d591..d3a92d2 100644
--- a/src/Vulkan/VkPipeline.hpp
+++ b/src/Vulkan/VkPipeline.hpp
@@ -100,6 +100,15 @@
#endif
static size_t ComputeRequiredAllocationSize(const VkComputePipelineCreateInfo* pCreateInfo);
+
+ void compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo);
+
+ void run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
+ size_t numDescriptorSets, VkDescriptorSet *descriptorSets);
+
+protected:
+ sw::SpirvShader *shader = nullptr;
+ rr::Routine *routine = nullptr;
};
static inline Pipeline* Cast(VkPipeline object)
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index 9f23143..a22a3ef 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -1019,7 +1019,11 @@
for(uint32_t i = 0; i < createInfoCount; i++)
{
VkResult result = vk::GraphicsPipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i]);
- if(result != VK_SUCCESS)
+ if(result == VK_SUCCESS)
+ {
+ static_cast<vk::GraphicsPipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i]);
+ }
+ else
{
// According to the Vulkan spec, section 9.4. Multiple Pipeline Creation
// "When an application attempts to create many pipelines in a single command,
@@ -1032,10 +1036,6 @@
pPipelines[i] = VK_NULL_HANDLE;
errorResult = result;
}
- else
- {
- static_cast<vk::GraphicsPipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i]);
- }
}
return errorResult;
@@ -1052,7 +1052,11 @@
for(uint32_t i = 0; i < createInfoCount; i++)
{
VkResult result = vk::ComputePipeline::Create(pAllocator, &pCreateInfos[i], &pPipelines[i]);
- if(result != VK_SUCCESS)
+ if(result == VK_SUCCESS)
+ {
+ static_cast<vk::ComputePipeline*>(vk::Cast(pPipelines[i]))->compileShaders(pAllocator, &pCreateInfos[i]);
+ }
+ else
{
// According to the Vulkan spec, section 9.4. Multiple Pipeline Creation
// "When an application attempts to create many pipelines in a single command,
diff --git a/src/Vulkan/vulkan.vcxproj b/src/Vulkan/vulkan.vcxproj
index 0328234..7eaae3f 100644
--- a/src/Vulkan/vulkan.vcxproj
+++ b/src/Vulkan/vulkan.vcxproj
@@ -141,6 +141,7 @@
<ClCompile Include="..\Device\SwiftConfig.cpp" />
<ClCompile Include="..\Device\Vector.cpp" />
<ClCompile Include="..\Device\VertexProcessor.cpp" />
+ <ClCompile Include="..\Pipeline\ComputeProgram.cpp" />
<ClCompile Include="..\Pipeline\Constants.cpp" />
<ClCompile Include="..\Pipeline\PixelProgram.cpp" />
<ClCompile Include="..\Pipeline\PixelRoutine.cpp" />
@@ -252,6 +253,7 @@
<ClInclude Include="..\Device\Vector.hpp" />
<ClInclude Include="..\Device\Vertex.hpp" />
<ClInclude Include="..\Device\VertexProcessor.hpp" />
+ <ClInclude Include="..\Pipeline\ComputeProgram.hpp" />
<ClInclude Include="..\Pipeline\Constants.hpp" />
<ClInclude Include="..\Pipeline\PixelProgram.hpp" />
<ClInclude Include="..\Pipeline\PixelRoutine.hpp" />
diff --git a/src/Vulkan/vulkan.vcxproj.filters b/src/Vulkan/vulkan.vcxproj.filters
index 06eef79..2139b9e 100644
--- a/src/Vulkan/vulkan.vcxproj.filters
+++ b/src/Vulkan/vulkan.vcxproj.filters
@@ -120,6 +120,9 @@
<ClCompile Include="..\Pipeline\PixelProgram.cpp">
<Filter>Source Files\Pipeline</Filter>
</ClCompile>
+ <ClCompile Include="..\Pipeline\ComputeProgram.cpp">
+ <Filter>Source Files\Pipeline</Filter>
+ </ClCompile>
<ClCompile Include="..\Pipeline\Constants.cpp">
<Filter>Source Files\Pipeline</Filter>
</ClCompile>
@@ -491,6 +494,9 @@
<ClInclude Include="..\Pipeline\PixelProgram.hpp">
<Filter>Header Files\Pipeline</Filter>
</ClInclude>
+ <ClInclude Include="..\Pipeline\ComputeProgram.hpp">
+ <Filter>Header Files\Pipeline</Filter>
+ </ClInclude>
<ClInclude Include="..\Pipeline\Constants.hpp">
<Filter>Header Files\Pipeline</Filter>
</ClInclude>