Implement descriptor indexing for storage buffers

Descriptor indexing is a part of the 2022 Vulkan roadmap. It allows
shaders to use non-constant, non-uniform indexes for descriptor arrays
with various types. This CL allows the use of non-uniform indices for
storage buffers.

Tests: dEQP-VK.descriptor_indexing.*
Bug: b/206633340
Change-Id: Ice05f8ff8e5a12298a0c42806204e2c9379ac9aa
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/64928
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Commit-Queue: Sean Risser <srisser@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Sean Risser <srisser@google.com>
diff --git a/src/Pipeline/SpirvShaderMemory.cpp b/src/Pipeline/SpirvShaderMemory.cpp
index c99c86c..0fec9c9 100644
--- a/src/Pipeline/SpirvShaderMemory.cpp
+++ b/src/Pipeline/SpirvShaderMemory.cpp
@@ -53,7 +53,7 @@
 		memoryOrder = MemoryOrder(memorySemantics);
 	}
 
-	auto ptr = GetPointerToData(pointerId, 0, state);
+	auto ptr = GetPointerToData(pointerId, 0, false, state);
 	bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
 	auto &dst = state->createIntermediate(resultId, resultTy.componentCount);
 	auto robustness = getOutOfBoundsBehavior(pointerId, state);
@@ -98,7 +98,7 @@
 
 	ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
 
-	auto ptr = GetPointerToData(pointerId, 0, state);
+	auto ptr = GetPointerToData(pointerId, 0, false, state);
 	bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
 	auto robustness = getOutOfBoundsBehavior(pointerId, state);
 
@@ -222,7 +222,7 @@
 		case spv::StorageClassWorkgroup:
 			{
 				bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
-				auto ptr = GetPointerToData(resultId, 0, state);
+				auto ptr = GetPointerToData(resultId, 0, false, state);
 				Operand initialValue(this, state, initializerId);
 				VisitMemoryObject(resultId, [&](const MemoryElement &el) {
 					auto p = ptr + el.offset;
@@ -256,8 +256,8 @@
 
 	bool dstInterleavedByLane = IsStorageInterleavedByLane(dstPtrTy.storageClass);
 	bool srcInterleavedByLane = IsStorageInterleavedByLane(srcPtrTy.storageClass);
-	auto dstPtr = GetPointerToData(dstPtrId, 0, state);
-	auto srcPtr = GetPointerToData(srcPtrId, 0, state);
+	auto dstPtr = GetPointerToData(dstPtrId, 0, false, state);
+	auto srcPtr = GetPointerToData(srcPtrId, 0, false, state);
 
 	std::unordered_map<uint32_t, uint32_t> srcOffsets;
 
@@ -378,7 +378,7 @@
 	}
 }
 
-SIMD::Pointer SpirvShader::GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const
+SIMD::Pointer SpirvShader::GetPointerToData(Object::ID id, SIMD::Int arrayIndices, bool nonUniform, EmitState const *state) const
 {
 	auto routine = state->routine;
 	auto &object = getObject(id);
@@ -397,37 +397,64 @@
 
 			uint32_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
 			uint32_t descriptorSize = routine->pipelineLayout->getDescriptorSize(d.DescriptorSet, d.Binding);
-			Int descriptorOffset = bindingOffset + descriptorSize * arrayIndex;
 
 			auto set = state->getPointer(id);
-			Pointer<Byte> descriptor = set.getUniformPointer() + descriptorOffset;  // BufferDescriptor* or inline uniform block
+			if(nonUniform)
+			{
+				SIMD::Int descriptorOffset = bindingOffset + descriptorSize * arrayIndices;
+				auto robustness = getOutOfBoundsBehavior(id, state);
+				ASSERT(routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding) != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
 
-			auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
-			if(descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
-			{
-				// Note: there is no bounds checking for inline uniform blocks.
-				// MAX_INLINE_UNIFORM_BLOCK_SIZE represents the maximum size of
-				// an inline uniform block, but this value should remain unused.
-				return SIMD::Pointer(descriptor, vk::MAX_INLINE_UNIFORM_BLOCK_SIZE);
-			}
-			else
-			{
-				Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr));  // void*
-				Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
+				std::array<Pointer<Byte>, 4> pointers;
+				for(int i = 0; i < SIMD::Width; i++)
+				{
+					pointers[i] = *Pointer<Pointer<Byte>>(set.getPointerForLane(i) + Extract(descriptorOffset, i) + OFFSET(vk::BufferDescriptor, ptr));
+				}
+
+				SIMD::Pointer ptr(pointers);
 
 				if(routine->pipelineLayout->isDescriptorDynamic(d.DescriptorSet, d.Binding))
 				{
-					Int dynamicOffsetIndex =
-					    routine->pipelineLayout->getDynamicOffsetIndex(d.DescriptorSet, d.Binding) +
-					    arrayIndex;
-					Int offset = routine->descriptorDynamicOffsets[dynamicOffsetIndex];
-					Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
+					SIMD::Int dynamicOffsetIndex = SIMD::Int(routine->pipelineLayout->getDynamicOffsetIndex(d.DescriptorSet, d.Binding) + arrayIndices);
+					SIMD::Pointer routineDynamicOffsets = SIMD::Pointer(routine->descriptorDynamicOffsets, 0, sizeof(int) * dynamicOffsetIndex);
+					SIMD::Int dynamicOffsets = routineDynamicOffsets.Load<SIMD::Int>(robustness, state->activeLaneMask());
+					ptr += dynamicOffsets;
+				}
+				return ptr;
+			}
+			else
+			{
+				rr::Int arrayIdx = Extract(arrayIndices, 0);
+				rr::Int descriptorOffset = bindingOffset + descriptorSize * arrayIdx;
+				Pointer<Byte> descriptor = set.getUniformPointer() + descriptorOffset;  // BufferDescriptor* or inline uniform block
 
-					return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
+				auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
+				if(descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+				{
+					// Note: there is no bounds checking for inline uniform blocks.
+					// MAX_INLINE_UNIFORM_BLOCK_SIZE represents the maximum size of
+					// an inline uniform block, but this value should remain unused.
+					return SIMD::Pointer(descriptor, vk::MAX_INLINE_UNIFORM_BLOCK_SIZE);
 				}
 				else
 				{
-					return SIMD::Pointer(data, size);
+					Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr));  // void*
+					rr::Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
+
+					if(routine->pipelineLayout->isDescriptorDynamic(d.DescriptorSet, d.Binding))
+					{
+						rr::Int dynamicOffsetIndex =
+						    routine->pipelineLayout->getDynamicOffsetIndex(d.DescriptorSet, d.Binding) +
+						    arrayIdx;
+						rr::Int offset = routine->descriptorDynamicOffsets[dynamicOffsetIndex];
+						rr::Int robustnessSize = *Pointer<rr::Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
+
+						return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
+					}
+					else
+					{
+						return SIMD::Pointer(data, size);
+					}
 				}
 			}
 		}