Support for per-instance attributes

Bug: b/129149966
Test: dEQP-VK.*instance*
Change-Id: I51642d32a8390495f5d9ecd0bb64bca1db33a03d
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/26689
Presubmit-Ready: Chris Forbes <chrisforbes@google.com>
Tested-by: Chris Forbes <chrisforbes@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index de4075d..765e594 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -314,7 +314,7 @@
 		for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
 		{
 			data->input[i] = context->input[i].buffer;
-			data->stride[i] = context->input[i].stride;
+			data->stride[i] = context->input[i].vertexStride;
 		}
 
 		if(context->indexBuffer)
@@ -322,7 +322,7 @@
 			data->indices = context->indexBuffer;
 		}
 
-		if(context->vertexShader->hasBuiltinInput(spv::BuiltInInstanceId))
+		if(context->vertexShader->hasBuiltinInput(spv::BuiltInInstanceIndex))
 		{
 			data->instanceID = context->instanceID;
 		}
@@ -1550,6 +1550,19 @@
 		queries.remove(query);
 	}
 
+	void Renderer::advanceInstanceAttributes()
+	{
+		for(uint32_t i = 0; i < vk::MAX_VERTEX_INPUT_BINDINGS; i++)
+		{
+			auto &attrib = context->input[i];
+			if (attrib.count && attrib.instanceStride)
+			{
+				// Under the casts: attrib.buffer += attrib.instanceStride
+				attrib.buffer = (void const *)((uintptr_t)attrib.buffer + attrib.instanceStride);
+			}
+		}
+	}
+
 	#if PERF_HUD
 		int Renderer::getThreadCount()
 		{
diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp
index e51b788..b65cea4 100644
--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -250,6 +250,8 @@
 		void addQuery(Query *query);
 		void removeQuery(Query *query);
 
+		void advanceInstanceAttributes();
+
 		void synchronize();
 
 		#if PERF_HUD
diff --git a/src/Device/Stream.hpp b/src/Device/Stream.hpp
index 54841e3..b9ff604 100644
--- a/src/Device/Stream.hpp
+++ b/src/Device/Stream.hpp
@@ -39,15 +39,17 @@
 	struct StreamResource
 	{
 		const void *buffer;
-		unsigned int stride;
+		unsigned int vertexStride;
+		unsigned int instanceStride;
 	};
 
 	struct Stream : public StreamResource
 	{
-		Stream(const void *buffer = nullptr, unsigned int stride = 0)
+		Stream(const void *buffer = nullptr, unsigned int vertexStride = 0)
 		{
 			this->buffer = buffer;
-			this->stride = stride;
+			this->vertexStride = vertexStride;
+			this->instanceStride = 0;
 		}
 
 		Stream &define(StreamType type, unsigned int count, bool normalized = false)
@@ -74,7 +76,8 @@
 			static const float4 null = {0, 0, 0, 1};
 
 			buffer = &null;
-			stride = 0;
+			vertexStride = 0;
+			instanceStride = 0;
 			type = STREAMTYPE_FLOAT;
 			count = 0;
 			normalized = false;
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index 06e1c1b..cbf9287 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -206,7 +206,7 @@
 	const VkIndexType indexType;
 };
 
-void CommandBuffer::ExecutionState::bindVertexInputs(sw::Context& context, int firstVertex)
+void CommandBuffer::ExecutionState::bindVertexInputs(sw::Context& context, int firstVertex, int firstInstance)
 {
 	for(uint32_t i = 0; i < MAX_VERTEX_INPUT_BINDINGS; i++)
 	{
@@ -216,7 +216,7 @@
 			const auto &vertexInput = vertexInputBindings[attrib.binding];
 			Buffer *buffer = Cast(vertexInput.buffer);
 			attrib.buffer = buffer ? buffer->getOffsetPointer(
-					attrib.offset + vertexInput.offset + attrib.stride * firstVertex) : nullptr;
+					attrib.offset + vertexInput.offset + attrib.vertexStride * firstVertex + attrib.instanceStride * firstInstance) : nullptr;
 		}
 	}
 }
@@ -266,7 +266,7 @@
 			executionState.pipelines[VK_PIPELINE_BIND_POINT_GRAPHICS]);
 
 		sw::Context context = pipeline->getContext();
-		executionState.bindVertexInputs(context, firstVertex);
+		executionState.bindVertexInputs(context, firstVertex, firstInstance);
 
 		const auto& boundDescriptorSets = executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_GRAPHICS];
 		for(int i = 0; i < vk::MAX_BOUND_DESCRIPTOR_SETS; i++)
@@ -284,11 +284,11 @@
 		executionState.bindAttachments();
 
 		const uint32_t primitiveCount = pipeline->computePrimitiveCount(vertexCount);
-		const uint32_t lastInstance = firstInstance + instanceCount - 1;
-		for(uint32_t instance = firstInstance; instance <= lastInstance; instance++)
+		for(uint32_t instance = firstInstance; instance != firstInstance + instanceCount; instance++)
 		{
 			executionState.renderer->setInstanceID(instance);
 			executionState.renderer->draw(context.drawType, primitiveCount);
+			executionState.renderer->advanceInstanceAttributes();
 		}
 	}
 
@@ -311,7 +311,8 @@
 				executionState.pipelines[VK_PIPELINE_BIND_POINT_GRAPHICS]);
 
 		sw::Context context = pipeline->getContext();
-		executionState.bindVertexInputs(context, vertexOffset);
+
+		executionState.bindVertexInputs(context, vertexOffset, firstInstance);
 
 		const auto& boundDescriptorSets = executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_GRAPHICS];
 		for(int i = 0; i < vk::MAX_BOUND_DESCRIPTOR_SETS; i++)
@@ -335,11 +336,11 @@
 				? (context.drawType | sw::DRAW_INDEXED16) : (context.drawType | sw::DRAW_INDEXED32);
 
 		const uint32_t primitiveCount = pipeline->computePrimitiveCount(indexCount);
-		const uint32_t lastInstance = firstInstance + instanceCount - 1;
-		for(uint32_t instance = firstInstance; instance <= lastInstance; instance++)
+		for(uint32_t instance = firstInstance; instance != firstInstance + instanceCount; instance++)
 		{
 			executionState.renderer->setInstanceID(instance);
 			executionState.renderer->draw(static_cast<sw::DrawType>(drawType), primitiveCount);
+			executionState.renderer->advanceInstanceAttributes();
 		}
 	}
 
diff --git a/src/Vulkan/VkCommandBuffer.hpp b/src/Vulkan/VkCommandBuffer.hpp
index 4fbbe35..03a0b20 100644
--- a/src/Vulkan/VkCommandBuffer.hpp
+++ b/src/Vulkan/VkCommandBuffer.hpp
@@ -140,7 +140,7 @@
 		VkIndexType indexType;
 
 		void bindAttachments();
-		void bindVertexInputs(sw::Context& context, int firstVertex);
+		void bindVertexInputs(sw::Context& context, int firstVertex, int firstInstance);
 	};
 
 	void submit(CommandBuffer::ExecutionState& executionState);
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index a0b6f2c..d3ec02b 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -276,15 +276,13 @@
 
 	// Temporary in-binding-order representation of buffer strides, to be consumed below
 	// when considering attributes. TODO: unfuse buffers from attributes in backend, is old GL model.
-	uint32_t bufferStrides[MAX_VERTEX_INPUT_BINDINGS];
+	uint32_t vertexStrides[MAX_VERTEX_INPUT_BINDINGS];
+	uint32_t instanceStrides[MAX_VERTEX_INPUT_BINDINGS];
 	for(uint32_t i = 0; i < vertexInputState->vertexBindingDescriptionCount; i++)
 	{
 		auto const & desc = vertexInputState->pVertexBindingDescriptions[i];
-		bufferStrides[desc.binding] = desc.stride;
-		if(desc.inputRate != VK_VERTEX_INPUT_RATE_VERTEX)
-		{
-			UNIMPLEMENTED("vertexInputState->pVertexBindingDescriptions[%d]", i);
-		}
+		vertexStrides[desc.binding] = desc.inputRate == VK_VERTEX_INPUT_RATE_VERTEX ? desc.stride : 0;
+		instanceStrides[desc.binding] = desc.inputRate == VK_VERTEX_INPUT_RATE_INSTANCE ? desc.stride : 0;
 	}
 
 	for(uint32_t i = 0; i < vertexInputState->vertexAttributeDescriptionCount; i++)
@@ -296,7 +294,8 @@
 		input.normalized = !vk::Format(desc.format).isNonNormalizedInteger();
 		input.offset = desc.offset;
 		input.binding = desc.binding;
-		input.stride = bufferStrides[desc.binding];
+		input.vertexStride = vertexStrides[desc.binding];
+		input.instanceStride = instanceStrides[desc.binding];
 	}
 
 	const VkPipelineInputAssemblyStateCreateInfo* assemblyState = pCreateInfo->pInputAssemblyState;