Implement descriptor indexing for storage buffers Descriptor indexing is a part of the 2022 Vulkan roadmap. It allows shaders to use non-constant, non-uniform indexes for descriptor arrays with various types. This CL allows the use of non-uniform indices for storage buffers. Tests: dEQP-VK.descriptor_indexing.* Bug: b/206633340 Change-Id: Ice05f8ff8e5a12298a0c42806204e2c9379ac9aa Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/64928 Reviewed-by: Nicolas Capens <nicolascapens@google.com> Commit-Queue: Sean Risser <srisser@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: Sean Risser <srisser@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp index 99a2d44..1a7dff9 100644 --- a/src/Pipeline/SpirvShader.cpp +++ b/src/Pipeline/SpirvShader.cpp
@@ -447,6 +447,9 @@ case spv::CapabilityStencilExportEXT: capabilities.StencilExportEXT = true; break; case spv::CapabilityVulkanMemoryModel: capabilities.VulkanMemoryModel = true; break; case spv::CapabilityVulkanMemoryModelDeviceScope: capabilities.VulkanMemoryModelDeviceScope = true; break; + case spv::CapabilityShaderNonUniform: capabilities.ShaderNonUniform = true; break; + case spv::CapabilityRuntimeDescriptorArray: capabilities.RuntimeDescriptorArray = true; break; + case spv::CapabilityStorageBufferArrayNonUniformIndexing: capabilities.StorageBufferArrayNonUniformIndexing = true; break; default: UNSUPPORTED("Unsupported capability %u", insn.word(1)); } @@ -810,6 +813,7 @@ if(!strcmp(ext, "SPV_GOOGLE_decorate_string")) break; if(!strcmp(ext, "SPV_GOOGLE_hlsl_functionality1")) break; if(!strcmp(ext, "SPV_GOOGLE_user_type")) break; + if(!strcmp(ext, "SPV_EXT_descriptor_indexing")) break; UNSUPPORTED("SPIR-V Extension: %s", ext); } break; @@ -1243,15 +1247,15 @@ } } -SIMD::Pointer SpirvShader::WalkExplicitLayoutAccessChain(Object::ID baseId, Object::ID elementId, const Span &indexIds, const EmitState *state) const +SIMD::Pointer SpirvShader::WalkExplicitLayoutAccessChain(Object::ID baseId, Object::ID elementId, const Span &indexIds, bool nonUniform, const EmitState *state) const { // Produce a offset into external memory in sizeof(float) units auto &baseObject = getObject(baseId); Type::ID typeId = getType(baseObject).element; Decorations d = GetDecorationsForId(baseObject.typeId()); + SIMD::Int arrayIndex = 0; - Int arrayIndex = 0; uint32_t start = 0; if(baseObject.kind == Object::Kind::DescriptorSet) { @@ -1266,8 +1270,8 @@ } else { - // Note: the value of indexIds[0] must be dynamically uniform. - arrayIndex = Extract(state->getIntermediate(indexIds[0]).Int(0), 0); + nonUniform |= GetDecorationsForId(indexIds[0]).NonUniform; + arrayIndex = state->getIntermediate(indexIds[0]).Int(0); } start = 1; @@ -1275,7 +1279,7 @@ } } - auto ptr = GetPointerToData(baseId, arrayIndex, state); + auto ptr = GetPointerToData(baseId, arrayIndex, nonUniform, state); OffsetToElement(ptr, elementId, d.ArrayStride, state); int constantOffset = 0; @@ -1536,6 +1540,10 @@ case spv::DecorationColMajor: HasRowMajor = true; RowMajor = false; + break; + case spv::DecorationNonUniform: + NonUniform = true; + break; default: // Intentionally partial, there are many decorations we just don't care about. break; @@ -1594,6 +1602,7 @@ BufferBlock |= src.BufferBlock; RelaxedPrecision |= src.RelaxedPrecision; InsideMatrix |= src.InsideMatrix; + NonUniform |= src.NonUniform; } void SpirvShader::DescriptorDecorations::Apply(const sw::SpirvShader::DescriptorDecorations &src) @@ -2206,6 +2215,7 @@ { Type::ID typeId = insn.word(1); Object::ID resultId = insn.word(2); + bool nonUniform = GetDecorationsForId(resultId).NonUniform; Object::ID baseId = insn.word(3); auto &type = getType(typeId); ASSERT(type.componentCount == 1); @@ -2218,7 +2228,7 @@ type.storageClass == spv::StorageClassUniform || type.storageClass == spv::StorageClassStorageBuffer) { - auto ptr = WalkExplicitLayoutAccessChain(baseId, elementId, Span(insn, indexId, insn.wordCount() - indexId), state); + auto ptr = WalkExplicitLayoutAccessChain(baseId, elementId, Span(insn, indexId, insn.wordCount() - indexId), nonUniform, state); state->createPointer(resultId, ptr); } else @@ -2564,7 +2574,7 @@ auto arrayId = Type::ID(structTy.definition.word(2 + arrayFieldIdx)); auto &result = state->createIntermediate(insn.resultId(), 1); - auto structBase = GetPointerToData(structPtrId, 0, state); + auto structBase = GetPointerToData(structPtrId, 0, false, state); Decorations structDecorations = {}; ApplyDecorationsForIdMember(&structDecorations, structPtrTy.element, arrayFieldIdx);
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp index 8cc7bc8..ebc34cb 100644 --- a/src/Pipeline/SpirvShader.hpp +++ b/src/Pipeline/SpirvShader.hpp
@@ -723,6 +723,9 @@ bool StencilExportEXT : 1; bool VulkanMemoryModel : 1; bool VulkanMemoryModelDeviceScope : 1; + bool ShaderNonUniform : 1; + bool RuntimeDescriptorArray : 1; + bool StorageBufferArrayNonUniformIndexing : 1; }; const Capabilities &getUsedCapabilities() const @@ -805,6 +808,7 @@ bool RelaxedPrecision : 1; bool RowMajor : 1; // RowMajor if true; ColMajor if false bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. + bool NonUniform : 1; Decorations() : Location{ -1 } @@ -828,6 +832,7 @@ , RelaxedPrecision{ false } , RowMajor{ false } , InsideMatrix{ false } + , NonUniform{ false } { } @@ -1280,12 +1285,12 @@ // - Pointer // - InterfaceVariable // Calling GetPointerToData with objects of any other kind will assert. - SIMD::Pointer GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const; + SIMD::Pointer GetPointerToData(Object::ID id, SIMD::Int arrayIndex, bool nonUniform, EmitState const *state) const; void OffsetToElement(SIMD::Pointer &ptr, Object::ID elementId, int32_t arrayStride, EmitState const *state) const; OutOfBoundsBehavior getOutOfBoundsBehavior(Object::ID pointerId, EmitState const *state) const; - SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, Object::ID elementId, const Span &indexIds, const EmitState *state) const; + SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, Object::ID elementId, const Span &indexIds, bool nonUniform, const EmitState *state) const; SIMD::Pointer WalkAccessChain(Object::ID id, Object::ID elementId, const Span &indexIds, const EmitState *state) const; // Returns the *component* offset in the literal for the given access chain. @@ -1569,9 +1574,9 @@ std::array<SIMD::Float, 4> fragCoord; std::array<SIMD::Float, 4> pointCoord; SIMD::Int helperInvocation; - Int4 numWorkgroups; - Int4 workgroupID; - Int4 workgroupSize; + SIMD::Int numWorkgroups; + SIMD::Int workgroupID; + SIMD::Int workgroupSize; Int subgroupsPerWorkgroup; Int invocationsPerSubgroup; Int subgroupIndex;
diff --git a/src/Pipeline/SpirvShaderMemory.cpp b/src/Pipeline/SpirvShaderMemory.cpp index c99c86c..0fec9c9 100644 --- a/src/Pipeline/SpirvShaderMemory.cpp +++ b/src/Pipeline/SpirvShaderMemory.cpp
@@ -53,7 +53,7 @@ memoryOrder = MemoryOrder(memorySemantics); } - auto ptr = GetPointerToData(pointerId, 0, state); + auto ptr = GetPointerToData(pointerId, 0, false, state); bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass); auto &dst = state->createIntermediate(resultId, resultTy.componentCount); auto robustness = getOutOfBoundsBehavior(pointerId, state); @@ -98,7 +98,7 @@ ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer." - auto ptr = GetPointerToData(pointerId, 0, state); + auto ptr = GetPointerToData(pointerId, 0, false, state); bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass); auto robustness = getOutOfBoundsBehavior(pointerId, state); @@ -222,7 +222,7 @@ case spv::StorageClassWorkgroup: { bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass); - auto ptr = GetPointerToData(resultId, 0, state); + auto ptr = GetPointerToData(resultId, 0, false, state); Operand initialValue(this, state, initializerId); VisitMemoryObject(resultId, [&](const MemoryElement &el) { auto p = ptr + el.offset; @@ -256,8 +256,8 @@ bool dstInterleavedByLane = IsStorageInterleavedByLane(dstPtrTy.storageClass); bool srcInterleavedByLane = IsStorageInterleavedByLane(srcPtrTy.storageClass); - auto dstPtr = GetPointerToData(dstPtrId, 0, state); - auto srcPtr = GetPointerToData(srcPtrId, 0, state); + auto dstPtr = GetPointerToData(dstPtrId, 0, false, state); + auto srcPtr = GetPointerToData(srcPtrId, 0, false, state); std::unordered_map<uint32_t, uint32_t> srcOffsets; @@ -378,7 +378,7 @@ } } -SIMD::Pointer SpirvShader::GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const +SIMD::Pointer SpirvShader::GetPointerToData(Object::ID id, SIMD::Int arrayIndices, bool nonUniform, EmitState const *state) const { auto routine = state->routine; auto &object = getObject(id); @@ -397,37 +397,64 @@ uint32_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding); uint32_t descriptorSize = routine->pipelineLayout->getDescriptorSize(d.DescriptorSet, d.Binding); - Int descriptorOffset = bindingOffset + descriptorSize * arrayIndex; auto set = state->getPointer(id); - Pointer<Byte> descriptor = set.getUniformPointer() + descriptorOffset; // BufferDescriptor* or inline uniform block + if(nonUniform) + { + SIMD::Int descriptorOffset = bindingOffset + descriptorSize * arrayIndices; + auto robustness = getOutOfBoundsBehavior(id, state); + ASSERT(routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding) != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT); - auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding); - if(descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) - { - // Note: there is no bounds checking for inline uniform blocks. - // MAX_INLINE_UNIFORM_BLOCK_SIZE represents the maximum size of - // an inline uniform block, but this value should remain unused. - return SIMD::Pointer(descriptor, vk::MAX_INLINE_UNIFORM_BLOCK_SIZE); - } - else - { - Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void* - Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes)); + std::array<Pointer<Byte>, 4> pointers; + for(int i = 0; i < SIMD::Width; i++) + { + pointers[i] = *Pointer<Pointer<Byte>>(set.getPointerForLane(i) + Extract(descriptorOffset, i) + OFFSET(vk::BufferDescriptor, ptr)); + } + + SIMD::Pointer ptr(pointers); if(routine->pipelineLayout->isDescriptorDynamic(d.DescriptorSet, d.Binding)) { - Int dynamicOffsetIndex = - routine->pipelineLayout->getDynamicOffsetIndex(d.DescriptorSet, d.Binding) + - arrayIndex; - Int offset = routine->descriptorDynamicOffsets[dynamicOffsetIndex]; - Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize)); + SIMD::Int dynamicOffsetIndex = SIMD::Int(routine->pipelineLayout->getDynamicOffsetIndex(d.DescriptorSet, d.Binding) + arrayIndices); + SIMD::Pointer routineDynamicOffsets = SIMD::Pointer(routine->descriptorDynamicOffsets, 0, sizeof(int) * dynamicOffsetIndex); + SIMD::Int dynamicOffsets = routineDynamicOffsets.Load<SIMD::Int>(robustness, state->activeLaneMask()); + ptr += dynamicOffsets; + } + return ptr; + } + else + { + rr::Int arrayIdx = Extract(arrayIndices, 0); + rr::Int descriptorOffset = bindingOffset + descriptorSize * arrayIdx; + Pointer<Byte> descriptor = set.getUniformPointer() + descriptorOffset; // BufferDescriptor* or inline uniform block - return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset)); + auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding); + if(descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) + { + // Note: there is no bounds checking for inline uniform blocks. + // MAX_INLINE_UNIFORM_BLOCK_SIZE represents the maximum size of + // an inline uniform block, but this value should remain unused. + return SIMD::Pointer(descriptor, vk::MAX_INLINE_UNIFORM_BLOCK_SIZE); } else { - return SIMD::Pointer(data, size); + Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void* + rr::Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes)); + + if(routine->pipelineLayout->isDescriptorDynamic(d.DescriptorSet, d.Binding)) + { + rr::Int dynamicOffsetIndex = + routine->pipelineLayout->getDynamicOffsetIndex(d.DescriptorSet, d.Binding) + + arrayIdx; + rr::Int offset = routine->descriptorDynamicOffsets[dynamicOffsetIndex]; + rr::Int robustnessSize = *Pointer<rr::Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize)); + + return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset)); + } + else + { + return SIMD::Pointer(data, size); + } } } }