Fix various issues in vertex fetch setup

There are still some minor sins in here -- we should really unfuse
attributes from buffers completely -- but this is enough to have vertex
fetch work for all per-vertex attribute scenarios.

Bug: b/124177079

Change-Id: I2a7a1a6f049aa80c1a527e9fa9643bb33701d165
Reviewed-on: https://swiftshader-review.googlesource.com/c/25448
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Chris Forbes <chrisforbes@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Device/Stream.hpp b/src/Device/Stream.hpp
index 1bc9b96..31a80ab 100644
--- a/src/Device/Stream.hpp
+++ b/src/Device/Stream.hpp
@@ -87,6 +87,8 @@
 			type = STREAMTYPE_FLOAT;
 			count = 0;
 			normalized = false;
+			offset = 0;
+			binding = 0;
 
 			return *this;
 		}
@@ -99,6 +101,8 @@
 		StreamType type;
 		unsigned char count;
 		bool normalized;
+		unsigned int offset;
+		unsigned int binding;
 	};
 }
 
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp
index b21186c..613ede3 100644
--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -88,10 +88,10 @@
 				spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
 			{
 
-				Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void *) * i);
-				UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(unsigned int) * i);
+				Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void *) * (i/4));
+				UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(unsigned int) * (i/4));
 
-				auto value = readStream(input, stride, state.input[i], index);
+				auto value = readStream(input, stride, state.input[i/4], index);
 				routine.inputs[i] = value.x;
 				routine.inputs[i+1] = value.y;
 				routine.inputs[i+2] = value.z;
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index 4578fe5..5113021 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -155,9 +155,14 @@
 		sw::Context context = pipeline->getContext();
 		for(uint32_t i = 0; i < MAX_VERTEX_INPUT_BINDINGS; i++)
 		{
-			const auto& vertexInput = executionState.vertexInputBindings[i];
-			Buffer* buffer = Cast(vertexInput.buffer);
-			context.input[i].buffer = buffer ? buffer->getOffsetPointer(vertexInput.offset + context.input[i].stride * firstVertex) : nullptr;
+			auto &attrib = context.input[i];
+			if (attrib.count)
+			{
+				const auto &vertexInput = executionState.vertexInputBindings[attrib.binding];
+				Buffer *buffer = Cast(vertexInput.buffer);
+				attrib.buffer = buffer ? buffer->getOffsetPointer(
+						attrib.offset + vertexInput.offset + attrib.stride * firstVertex) : nullptr;
+			}
 		}
 
 		executionState.renderer->setContext(context);
@@ -567,9 +572,9 @@
 void CommandBuffer::bindVertexBuffers(uint32_t firstBinding, uint32_t bindingCount,
                                       const VkBuffer* pBuffers, const VkDeviceSize* pOffsets)
 {
-	for(uint32_t i = firstBinding; i < (firstBinding + bindingCount); ++i)
+	for(uint32_t i = 0; i < bindingCount; ++i)
 	{
-		addCommand<VertexBufferBind>(i, pBuffers[i], pOffsets[i]);
+		addCommand<VertexBufferBind>(i + firstBinding, pBuffers[i], pOffsets[i]);
 	}
 }
 
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index 82591f4..b6d2ec7 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -229,11 +229,14 @@
 		UNIMPLEMENTED();
 	}
 
+	// Temporary in-binding-order representation of buffer strides, to be consumed below
+	// when considering attributes. TODO: unfuse buffers from attributes in backend, is old GL model.
+	uint32_t bufferStrides[MAX_VERTEX_INPUT_BINDINGS];
 	for(uint32_t i = 0; i < vertexInputState->vertexBindingDescriptionCount; i++)
 	{
-		const VkVertexInputBindingDescription* vertexBindingDescription = vertexInputState->pVertexBindingDescriptions;
-		context.input[vertexBindingDescription->binding].stride = vertexBindingDescription->stride;
-		if(vertexBindingDescription->inputRate != VK_VERTEX_INPUT_RATE_VERTEX)
+		auto const & desc = vertexInputState->pVertexBindingDescriptions[i];
+		bufferStrides[desc.binding] = desc.stride;
+		if(desc.inputRate != VK_VERTEX_INPUT_RATE_VERTEX)
 		{
 			UNIMPLEMENTED();
 		}
@@ -241,20 +244,14 @@
 
 	for(uint32_t i = 0; i < vertexInputState->vertexAttributeDescriptionCount; i++)
 	{
-		const VkVertexInputAttributeDescription* vertexAttributeDescriptions = vertexInputState->pVertexAttributeDescriptions;
-		sw::Stream& input = context.input[vertexAttributeDescriptions->binding];
-		input.count = getNumberOfChannels(vertexAttributeDescriptions->format);
-		input.type = getStreamType(vertexAttributeDescriptions->format);
-		input.normalized = !sw::Surface::isNonNormalizedInteger(vertexAttributeDescriptions->format);
-
-		if(vertexAttributeDescriptions->location != vertexAttributeDescriptions->binding)
-		{
-			UNIMPLEMENTED();
-		}
-		if(vertexAttributeDescriptions->offset != 0)
-		{
-			UNIMPLEMENTED();
-		}
+		auto const & desc = vertexInputState->pVertexAttributeDescriptions[i];
+		sw::Stream& input = context.input[desc.location];
+		input.count = getNumberOfChannels(desc.format);
+		input.type = getStreamType(desc.format);
+		input.normalized = !sw::Surface::isNonNormalizedInteger(desc.format);
+		input.offset = desc.offset;
+		input.binding = desc.binding;
+		input.stride = bufferStrides[desc.binding];
 	}
 
 	const VkPipelineInputAssemblyStateCreateInfo* assemblyState = pCreateInfo->pInputAssemblyState;