Add support for push constants
- Proper support for calculating offsets in explicit-layout storage
classes (push constant, uniform, and storage buffer) according to the
Offset, ArrayStride and MatrixStride decorations.
- Plumb a block of push constant data throughout the pipeline
- Implement push constant update commands
Bug: b/128690261
Bug: b/128872954
Test: dEQP-VK.*push_constant*
Test: dEQP-VK.glsl.*
Test: dEQP-VK.spirv_assembly.*
Change-Id: I7d5a66ac4aafd6b637b4693eb6ce96a327b4904e
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/27528
Tested-by: Chris Forbes <chrisforbes@google.com>
Presubmit-Ready: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Device/Context.hpp b/src/Device/Context.hpp
index 2405224..8645a1a 100644
--- a/src/Device/Context.hpp
+++ b/src/Device/Context.hpp
@@ -15,6 +15,7 @@
#ifndef sw_Context_hpp
#define sw_Context_hpp
+#include "Vulkan/VkConfig.h"
#include "Sampler.hpp"
#include "Stream.hpp"
#include "Point.hpp"
@@ -107,6 +108,11 @@
TRANSPARENCY_LAST = TRANSPARENCY_ALPHA_TO_COVERAGE
};
+ struct PushConstantStorage
+ {
+ unsigned char data[vk::MAX_PUSH_CONSTANT_SIZE];
+ };
+
class Context
{
public:
@@ -220,6 +226,8 @@
unsigned int sampleMask;
unsigned int multiSampleMask;
int sampleCount;
+
+ PushConstantStorage pushConstants;
};
}
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index 2038d50..042d68d 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -444,6 +444,11 @@
data->scissorY1 = scissor.offset.y + scissor.extent.height;
}
+ // Push constants
+ {
+ data->pushConstants = context->pushConstants;
+ }
+
draw->primitive = 0;
draw->count = count;
diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp
index c31676c..eb93485 100644
--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -187,6 +187,8 @@
float4 a2c1;
float4 a2c2;
float4 a2c3;
+
+ PushConstantStorage pushConstants;
};
class Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index a119a3c..d15b8d4 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -52,6 +52,8 @@
routine.descriptorSets[i] = descriptorSetsIn[i];
}
+ routine.pushConstants = Pointer<Byte>(data + OFFSET(Data, pushConstants));
+
auto &modes = shader->getModes();
int localSize[3] = {modes.WorkgroupSizeX, modes.WorkgroupSizeY, modes.WorkgroupSizeZ};
@@ -167,7 +169,7 @@
}
void ComputeProgram::run(
- Routine *routine, void** descriptorSets,
+ Routine *routine, void** descriptorSets, PushConstantStorage const &pushConstants,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
{
auto runWorkgroup = (void(*)(void*))(routine->getEntry());
@@ -178,6 +180,7 @@
data.numWorkgroups[Y] = groupCountY;
data.numWorkgroups[Z] = groupCountZ;
data.numWorkgroups[3] = 0;
+ data.pushConstants = pushConstants;
// TODO(bclayton): Split work across threads.
for (uint32_t groupZ = 0; groupZ < groupCountZ; groupZ++)
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp
index ef3a4de..6b63233 100644
--- a/src/Pipeline/ComputeProgram.hpp
+++ b/src/Pipeline/ComputeProgram.hpp
@@ -18,6 +18,7 @@
#include "SpirvShader.hpp"
#include "Reactor/Reactor.hpp"
+#include "Device/Context.hpp"
#include <functional>
@@ -47,7 +48,7 @@
// run executes the compute shader routine for all workgroups.
// TODO(bclayton): This probably does not belong here. Consider moving.
static void run(
- Routine *routine, void** descriptorSets,
+ Routine *routine, void** descriptorSets, PushConstantStorage const &pushConstants,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
protected:
@@ -62,6 +63,7 @@
void** descriptorSets;
uint4 numWorkgroups;
uint4 workgroupID;
+ PushConstantStorage pushConstants;
};
SpirvRoutine routine;
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index 57df1cd..7d78e69 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -31,6 +31,8 @@
{
enableIndex = 0;
+ routine.pushConstants = data + OFFSET(DrawData, pushConstants);
+
spirvShader->emit(&routine);
spirvShader->emitEpilog(&routine);
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 7a858b0..46a8fd5 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -181,6 +181,7 @@
break;
case spv::StorageClassUniform:
case spv::StorageClassStorageBuffer:
+ case spv::StorageClassPushConstant:
object.kind = Object::Kind::PhysicalPointer;
break;
@@ -192,7 +193,6 @@
case spv::StorageClassWorkgroup:
case spv::StorageClassCrossWorkgroup:
case spv::StorageClassGeneric:
- case spv::StorageClassPushConstant:
case spv::StorageClassAtomicCounter:
case spv::StorageClassImage:
UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
@@ -653,6 +653,7 @@
{
case spv::StorageClassUniform:
case spv::StorageClassStorageBuffer:
+ case spv::StorageClassPushConstant:
return false;
default:
return true;
@@ -742,10 +743,88 @@
VisitInterfaceInner<F>(def.word(1), d, f);
}
+ SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
+ {
+ // Produce a offset into external memory in sizeof(float) units
+
+ int constantOffset = 0;
+ SIMD::Int dynamicOffset = SIMD::Int(0);
+ auto &baseObject = getObject(id);
+ Type::ID typeId = getType(baseObject.type).element;
+
+ // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
+ // Start with its offset and build from there.
+ if (baseObject.kind == Object::Kind::Value)
+ {
+ dynamicOffset += routine->getIntermediate(id).Int(0);
+ }
+
+ for (auto i = 0u; i < numIndexes; i++)
+ {
+ auto & type = getType(typeId);
+ switch (type.definition.opcode())
+ {
+ case spv::OpTypeStruct:
+ {
+ int memberIndex = GetConstantInt(indexIds[i]);
+ Decorations d{};
+ ApplyDecorationsForIdMember(&d, typeId, memberIndex);
+ ASSERT(d.HasOffset);
+ constantOffset += d.Offset / sizeof(float);
+ typeId = type.definition.word(2u + memberIndex);
+ break;
+ }
+ case spv::OpTypeArray:
+ case spv::OpTypeRuntimeArray:
+ {
+ // TODO: b/127950082: Check bounds.
+ Decorations d{};
+ ApplyDecorationsForId(&d, typeId);
+ ASSERT(d.HasArrayStride);
+ auto & obj = getObject(indexIds[i]);
+ if (obj.kind == Object::Kind::Constant)
+ constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
+ else
+ dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
+ typeId = type.element;
+ break;
+ }
+ case spv::OpTypeMatrix:
+ {
+ // TODO: b/127950082: Check bounds.
+ Decorations d{};
+ ApplyDecorationsForId(&d, typeId);
+ ASSERT(d.HasMatrixStride);
+ auto & obj = getObject(indexIds[i]);
+ if (obj.kind == Object::Kind::Constant)
+ constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
+ else
+ dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
+ typeId = type.element;
+ break;
+ }
+ case spv::OpTypeVector:
+ {
+ auto & obj = getObject(indexIds[i]);
+ if (obj.kind == Object::Kind::Constant)
+ constantOffset += GetConstantInt(indexIds[i]);
+ else
+ dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
+ typeId = type.element;
+ break;
+ }
+ default:
+ UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
+ }
+ }
+
+ return dynamicOffset + SIMD::Int(constantOffset);
+ }
+
SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
{
- // TODO: think about explicit layout (UBO/SSBO) storage classes
// TODO: avoid doing per-lane work in some cases if we can?
+ // Produce a *component* offset into location-oriented memory
int constantOffset = 0;
SIMD::Int dynamicOffset = SIMD::Int(0);
@@ -1275,6 +1354,11 @@
routine->physicalPointers[resultId] = address;
break;
}
+ case spv::StorageClassPushConstant:
+ {
+ routine->physicalPointers[resultId] = routine->pushConstants;
+ break;
+ }
default:
break;
}
@@ -1372,7 +1456,17 @@
ASSERT(getObject(baseId).pointerBase == getObject(objectId).pointerBase);
auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
- dst.emplace(0, WalkAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine));
+
+ if (type.storageClass == spv::StorageClassPushConstant ||
+ type.storageClass == spv::StorageClassUniform ||
+ type.storageClass == spv::StorageClassStorageBuffer)
+ {
+ dst.emplace(0, WalkExplicitLayoutAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine));
+ }
+ else
+ {
+ dst.emplace(0, WalkAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine));
+ }
}
void SpirvShader::EmitStore(InsnIterator insn, SpirvRoutine *routine) const
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 1f5e075..c3bd154 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -471,6 +471,7 @@
void ProcessInterfaceVariable(Object &object);
+ SIMD::Int WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
SIMD::Int WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const;
@@ -524,6 +525,7 @@
SIMD::Int activeLaneMask = SIMD::Int(0xFFFFFFFF);
std::array<Pointer<Byte>, vk::MAX_BOUND_DESCRIPTOR_SETS> descriptorSets;
+ Pointer<Byte> pushConstants;
void createLvalue(SpirvShader::Object::ID id, uint32_t size)
{
diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp
index 182e7bd..9a40f79 100644
--- a/src/Pipeline/VertexProgram.cpp
+++ b/src/Pipeline/VertexProgram.cpp
@@ -43,6 +43,8 @@
routine.getValue(it->second.Id)[it->second.FirstComponent] =
As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, instanceID)))));
}
+
+ routine.pushConstants = data + OFFSET(DrawData, pushConstants);
}
VertexProgram::~VertexProgram()
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index 0421780..2bf215d 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -139,7 +139,8 @@
executionState.pipelines[VK_PIPELINE_BIND_POINT_COMPUTE]);
pipeline->run(groupCountX, groupCountY, groupCountZ,
MAX_BOUND_DESCRIPTOR_SETS,
- executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_COMPUTE]);
+ executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_COMPUTE],
+ executionState.pushConstants);
}
private:
@@ -241,6 +242,8 @@
}
}
+ context.pushConstants = executionState.pushConstants;
+
executionState.renderer->setContext(context);
executionState.renderer->setScissor(pipeline->getScissor());
executionState.renderer->setViewport(pipeline->getViewport());
@@ -288,6 +291,8 @@
}
}
+ context.pushConstants = executionState.pushConstants;
+
context.indexBuffer = Cast(executionState.indexBufferBinding.buffer)->getOffsetPointer(
executionState.indexBufferBinding.offset + firstIndex * (executionState.indexType == VK_INDEX_TYPE_UINT16 ? 2 : 4));
@@ -571,6 +576,28 @@
const VkDescriptorSet descriptorSet;
};
+struct SetPushConstants : public CommandBuffer::Command
+{
+ SetPushConstants(uint32_t offset, uint32_t size, void const *pValues)
+ : offset(offset), size(size)
+ {
+ ASSERT(offset < MAX_PUSH_CONSTANT_SIZE);
+ ASSERT(offset + size <= MAX_PUSH_CONSTANT_SIZE);
+
+ memcpy(data, pValues, size);
+ }
+
+ void play(CommandBuffer::ExecutionState& executionState)
+ {
+ memcpy(&executionState.pushConstants.data[offset], data, size);
+ }
+
+private:
+ uint32_t offset;
+ uint32_t size;
+ unsigned char data[MAX_PUSH_CONSTANT_SIZE];
+};
+
CommandBuffer::CommandBuffer(VkCommandBufferLevel pLevel) : level(pLevel)
{
// FIXME (b/119409619): replace this vector by an allocator so we can control all memory allocations
@@ -740,7 +767,7 @@
void CommandBuffer::pushConstants(VkPipelineLayout layout, VkShaderStageFlags stageFlags,
uint32_t offset, uint32_t size, const void* pValues)
{
- UNIMPLEMENTED("pushConstants");
+ addCommand<SetPushConstants>(offset, size, pValues);
}
void CommandBuffer::setViewport(uint32_t firstViewport, uint32_t viewportCount, const VkViewport* pViewports)
diff --git a/src/Vulkan/VkCommandBuffer.hpp b/src/Vulkan/VkCommandBuffer.hpp
index 38d8e59..4f86071 100644
--- a/src/Vulkan/VkCommandBuffer.hpp
+++ b/src/Vulkan/VkCommandBuffer.hpp
@@ -17,6 +17,7 @@
#include "VkConfig.h"
#include "VkObject.hpp"
+#include "Device/Context.hpp"
#include <memory>
#include <vector>
@@ -126,6 +127,7 @@
Framebuffer* renderPassFramebuffer = nullptr;
Pipeline* pipelines[VK_PIPELINE_BIND_POINT_RANGE_SIZE] = {};
VkDescriptorSet boundDescriptorSets[VK_PIPELINE_BIND_POINT_RANGE_SIZE][MAX_BOUND_DESCRIPTOR_SETS] = { { VK_NULL_HANDLE } };
+ sw::PushConstantStorage pushConstants;
struct VertexInputBinding
{
diff --git a/src/Vulkan/VkConfig.h b/src/Vulkan/VkConfig.h
index 0e4f10c..772f772 100644
--- a/src/Vulkan/VkConfig.h
+++ b/src/Vulkan/VkConfig.h
@@ -57,6 +57,7 @@
{
MAX_BOUND_DESCRIPTOR_SETS = 4,
MAX_VERTEX_INPUT_BINDINGS = 16,
+ MAX_PUSH_CONSTANT_SIZE = 128,
};
enum
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 95cf71d..7dddd76 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -147,7 +147,7 @@
65536, // maxTexelBufferElements
16384, // maxUniformBufferRange
(1ul << 27), // maxStorageBufferRange
- 128, // maxPushConstantsSize
+ vk::MAX_PUSH_CONSTANT_SIZE, // maxPushConstantsSize
4096, // maxMemoryAllocationCount
4000, // maxSamplerAllocationCount
131072, // bufferImageGranularity
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index e619a03..1a28242 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -546,11 +546,11 @@
}
void ComputePipeline::run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
- size_t numDescriptorSets, VkDescriptorSet *descriptorSets)
+ size_t numDescriptorSets, VkDescriptorSet *descriptorSets, sw::PushConstantStorage const &pushConstants)
{
ASSERT_OR_RETURN(routine != nullptr);
sw::ComputeProgram::run(
- routine, reinterpret_cast<void**>(descriptorSets),
+ routine, reinterpret_cast<void**>(descriptorSets), pushConstants,
groupCountX, groupCountY, groupCountZ);
}
diff --git a/src/Vulkan/VkPipeline.hpp b/src/Vulkan/VkPipeline.hpp
index 84b6ef5..c43a8e9 100644
--- a/src/Vulkan/VkPipeline.hpp
+++ b/src/Vulkan/VkPipeline.hpp
@@ -104,7 +104,7 @@
void compileShaders(const VkAllocationCallbacks* pAllocator, const VkComputePipelineCreateInfo* pCreateInfo);
void run(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
- size_t numDescriptorSets, VkDescriptorSet *descriptorSets);
+ size_t numDescriptorSets, VkDescriptorSet *descriptorSets, sw::PushConstantStorage const &pushConstants);
protected:
sw::SpirvShader *shader = nullptr;