Negative baseVertex support

baseVertex is a signed integer that's allowed have a negative value.
In order to support it, the offset was moved from being applied
on the index buffer memory pointer directly to being applied
to the indices from the index buffer inside the Vertex routine.

Fixes all issues in the following subcategory using SwANGLE:
dEQP-GLES31.functional.draw_indirect.*

Bug: b/144353667
Change-Id: Ia70c258eab25c274553c4955dc2336a42d43f005
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/38148
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp
index 5bb3918..55c7b41 100644
--- a/src/Pipeline/VertexProgram.cpp
+++ b/src/Pipeline/VertexProgram.cpp
@@ -70,13 +70,9 @@
 		{
 			assert(it->second.SizeInComponents == 1);
 
-			Int4 indices;
-			indices = Insert(indices, As<Int>(batch[0]), 0);
-			indices = Insert(indices, As<Int>(batch[1]), 1);
-			indices = Insert(indices, As<Int>(batch[2]), 2);
-			indices = Insert(indices, As<Int>(batch[3]), 3);
 			routine.getVariable(it->second.Id)[it->second.FirstComponent] =
-					As<Float4>(indices + Int4(*Pointer<Int>(data + OFFSET(DrawData, baseVertex))));
+					As<Float4>(*Pointer<Int4>(As<Pointer<Int4>>(batch)) +
+					           Int4(*Pointer<Int>(data + OFFSET(DrawData, baseVertex))));
 		}
 
 		auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp
index baa69e4..1c90a6e 100644
--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -94,13 +94,14 @@
 			{
 				Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) * (i / 4));
 				UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4));
+				Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex));
 				UInt robustnessSize(0);
 				if(state.robustBufferAccess)
 				{
 					robustnessSize = *Pointer<UInt>(data + OFFSET(DrawData, robustnessSize) + sizeof(uint32_t) * (i / 4));
 				}
 
-				auto value = readStream(input, stride, state.input[i / 4], batch, state.robustBufferAccess, robustnessSize);
+				auto value = readStream(input, stride, state.input[i / 4], batch, state.robustBufferAccess, robustnessSize, baseVertex);
 				routine.inputs[i + 0] = value.x;
 				routine.inputs[i + 1] = value.y;
 				routine.inputs[i + 2] = value.z;
@@ -143,10 +144,15 @@
 	}
 
 	Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
-	                                   bool robustBufferAccess, UInt & robustnessSize)
+	                                   bool robustBufferAccess, UInt & robustnessSize, Int baseVertex)
 	{
 		Vector4f v;
-		UInt4 offsets = *Pointer<UInt4>(As<Pointer<UInt4>>(batch)) * UInt4(stride);
+		// Because of the following rule in the Vulkan spec, we do not care if a very large negative
+		// baseVertex would overflow all the way back into a valid region of the index buffer:
+		// "Out-of-bounds buffer loads will return any of the following values :

+		//  - Values from anywhere within the memory range(s) bound to the buffer (possibly including
+		//    bytes of memory past the end of the buffer, up to the end of the bound range)."
+		UInt4 offsets = (*Pointer<UInt4>(As<Pointer<UInt4>>(batch)) + As<UInt4>(Int4(baseVertex))) * UInt4(stride);
 
 		Pointer<Byte> source0 = buffer + offsets.x;

 		Pointer<Byte> source1 = buffer + offsets.y;

diff --git a/src/Pipeline/VertexRoutine.hpp b/src/Pipeline/VertexRoutine.hpp
index a7d3103..69dfa7c 100644
--- a/src/Pipeline/VertexRoutine.hpp
+++ b/src/Pipeline/VertexRoutine.hpp
@@ -67,7 +67,7 @@
 		typedef VertexProcessor::State::Input Stream;
 
 		Vector4f readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
-		                    bool robustBufferAccess, UInt& robustnessSize);
+		                    bool robustBufferAccess, UInt& robustnessSize, Int baseVertex);
 		void readInput(Pointer<UInt> &batch);
 		void computeClipFlags();
 		void writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch);
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index da411cd..de7057d 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -411,7 +411,7 @@
 	uint32_t reference;
 };
 
-void CommandBuffer::ExecutionState::bindVertexInputs(sw::Context& context, int firstVertex, int firstInstance)
+void CommandBuffer::ExecutionState::bindVertexInputs(sw::Context& context, int firstInstance)
 {
 	for(uint32_t i = 0; i < MAX_VERTEX_INPUT_BINDINGS; i++)
 	{
@@ -420,7 +420,6 @@
 		{
 			const auto &vertexInput = vertexInputBindings[attrib.binding];
 			VkDeviceSize offset = attrib.offset + vertexInput.offset +
-			                      attrib.vertexStride * firstVertex +
 			                      attrib.instanceStride * firstInstance;
 			attrib.buffer = vertexInput.buffer ? vertexInput.buffer->getOffsetPointer(offset) : nullptr;
 
@@ -524,7 +523,7 @@
 
 		sw::Context context = pipeline->getContext();
 
-		executionState.bindVertexInputs(context, vertexOffset, firstInstance);
+		executionState.bindVertexInputs(context, firstInstance);
 
 		context.descriptorSets = pipelineState.descriptorSets;
 		context.descriptorDynamicOffsets = pipelineState.descriptorDynamicOffsets;
diff --git a/src/Vulkan/VkCommandBuffer.hpp b/src/Vulkan/VkCommandBuffer.hpp
index b592add..01664e4 100644
--- a/src/Vulkan/VkCommandBuffer.hpp
+++ b/src/Vulkan/VkCommandBuffer.hpp
@@ -180,7 +180,7 @@
 		uint32_t subpassIndex = 0;
 
 		void bindAttachments(sw::Context& context);
-		void bindVertexInputs(sw::Context& context, int firstVertex, int firstInstance);
+		void bindVertexInputs(sw::Context& context, int firstInstance);
 	};
 
 	void submit(CommandBuffer::ExecutionState& executionState);