Refactor Queue::Submit to use internal structure

Before this CL, Queue::submit() would internally use the VkSubmitInfo
structure to store the queue submit related information. This had the
downside of having to re-parse the pNext chain in submitQueue(),
which is now no longer the case in this CL, since the internal
structure no longer includes a pNext pointer.

Also, pointers in the internal structure with a size of 0 are no
longer assigned a value and are set to nullptr instead.

This CL paves the way for the VK_KHR_synchronization2 implementation,
which will be able to use the same internal structure as the one used
by Queue::submit().

Bug: b/204110005
Change-Id: Ib99261ef101680519bed57892a284ae20b229661
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/58591
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Sean Risser <srisser@google.com>
Commit-Queue: Alexis Hétu <sugoi@google.com>
diff --git a/src/Vulkan/VkQueue.cpp b/src/Vulkan/VkQueue.cpp
index 419658d..f664fea 100644
--- a/src/Vulkan/VkQueue.cpp
+++ b/src/Vulkan/VkQueue.cpp
@@ -29,11 +29,11 @@
 
 #include <cstring>
 
-namespace {
+namespace vk {
 
-VkSubmitInfo *DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits)
+Queue::SubmitInfo *Queue::DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits)
 {
-	size_t submitSize = sizeof(VkSubmitInfo) * submitCount;
+	size_t submitSize = sizeof(SubmitInfo) * submitCount;
 	size_t totalSize = submitSize;
 	for(uint32_t i = 0; i < submitCount; i++)
 	{
@@ -50,7 +50,6 @@
 			case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
 				{
 					const auto *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
-					totalSize += sizeof(VkTimelineSemaphoreSubmitInfo);
 					totalSize += tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
 					totalSize += tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
 				}
@@ -72,31 +71,53 @@
 	uint8_t *mem = static_cast<uint8_t *>(
 	    vk::allocateHostMemory(totalSize, vk::REQUIRED_MEMORY_ALIGNMENT, vk::NULL_ALLOCATION_CALLBACKS, vk::Fence::GetAllocationScope()));
 
-	auto submits = new(mem) VkSubmitInfo[submitCount];
-	memcpy(mem, pSubmits, submitSize);
+	auto submits = new(mem) SubmitInfo[submitCount];
 	mem += submitSize;
 
 	for(uint32_t i = 0; i < submitCount; i++)
 	{
-		size_t size = pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
-		submits[i].pWaitSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
-		memcpy(mem, pSubmits[i].pWaitSemaphores, size);
-		mem += size;
+		submits[i].commandBufferCount = pSubmits[i].commandBufferCount;
+		submits[i].signalSemaphoreCount = pSubmits[i].signalSemaphoreCount;
+		submits[i].waitSemaphoreCount = pSubmits[i].waitSemaphoreCount;
 
-		size = pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
-		submits[i].pWaitDstStageMask = reinterpret_cast<const VkPipelineStageFlags *>(mem);
-		memcpy(mem, pSubmits[i].pWaitDstStageMask, size);
-		mem += size;
+		submits[i].pWaitSemaphores = nullptr;
+		submits[i].pWaitDstStageMask = nullptr;
+		submits[i].pSignalSemaphores = nullptr;
+		submits[i].pCommandBuffers = nullptr;
 
-		size = pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
-		submits[i].pSignalSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
-		memcpy(mem, pSubmits[i].pSignalSemaphores, size);
-		mem += size;
+		if(pSubmits[i].waitSemaphoreCount > 0)
+		{
+			size_t size = pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
+			submits[i].pWaitSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
+			memcpy(mem, pSubmits[i].pWaitSemaphores, size);
+			mem += size;
 
-		size = pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
-		submits[i].pCommandBuffers = reinterpret_cast<const VkCommandBuffer *>(mem);
-		memcpy(mem, pSubmits[i].pCommandBuffers, size);
-		mem += size;
+			size = pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
+			submits[i].pWaitDstStageMask = reinterpret_cast<const VkPipelineStageFlags *>(mem);
+			memcpy(mem, pSubmits[i].pWaitDstStageMask, size);
+			mem += size;
+		}
+
+		if(pSubmits[i].signalSemaphoreCount > 0)
+		{
+			size_t size = pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
+			submits[i].pSignalSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
+			memcpy(mem, pSubmits[i].pSignalSemaphores, size);
+			mem += size;
+		}
+
+		if(pSubmits[i].commandBufferCount > 0)
+		{
+			size_t size = pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
+			submits[i].pCommandBuffers = reinterpret_cast<const VkCommandBuffer *>(mem);
+			memcpy(mem, pSubmits[i].pCommandBuffers, size);
+			mem += size;
+		}
+
+		submits[i].waitSemaphoreValueCount = 0;
+		submits[i].pWaitSemaphoreValues = nullptr;
+		submits[i].signalSemaphoreValueCount = 0;
+		submits[i].pSignalSemaphoreValues = nullptr;
 
 		for(const auto *extension = reinterpret_cast<const VkBaseInStructure *>(pSubmits[i].pNext);
 		    extension != nullptr; extension = reinterpret_cast<const VkBaseInStructure *>(extension->pNext))
@@ -107,24 +128,23 @@
 				{
 					const VkTimelineSemaphoreSubmitInfo *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
 
-					size = sizeof(VkTimelineSemaphoreSubmitInfo);
-					VkTimelineSemaphoreSubmitInfo *tlsSubmitInfoCopy = reinterpret_cast<VkTimelineSemaphoreSubmitInfo *>(mem);
-					memcpy(mem, extension, size);
-					// Don't copy the pNext pointer at all.
-					tlsSubmitInfoCopy->pNext = nullptr;
-					mem += size;
+					if(tlsSubmitInfo->waitSemaphoreValueCount > 0)
+					{
+						submits[i].waitSemaphoreValueCount = tlsSubmitInfo->waitSemaphoreValueCount;
+						size_t size = tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
+						submits[i].pWaitSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
+						memcpy(mem, tlsSubmitInfo->pWaitSemaphoreValues, size);
+						mem += size;
+					}
 
-					size = tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
-					tlsSubmitInfoCopy->pWaitSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
-					memcpy(mem, tlsSubmitInfo->pWaitSemaphoreValues, size);
-					mem += size;
-
-					size = tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
-					tlsSubmitInfoCopy->pSignalSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
-					memcpy(mem, tlsSubmitInfo->pSignalSemaphoreValues, size);
-					mem += size;
-
-					submits[i].pNext = tlsSubmitInfoCopy;
+					if(tlsSubmitInfo->signalSemaphoreValueCount > 0)
+					{
+						submits[i].signalSemaphoreValueCount = tlsSubmitInfo->signalSemaphoreValueCount;
+						size_t size = tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
+						submits[i].pSignalSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
+						memcpy(mem, tlsSubmitInfo->pSignalSemaphoreValues, size);
+						mem += size;
+					}
 				}
 				break;
 			case VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO:
@@ -144,10 +164,6 @@
 	return submits;
 }
 
-}  // anonymous namespace
-
-namespace vk {
-
 Queue::Queue(Device *device, marl::Scheduler *scheduler)
     : device(device)
 {
@@ -193,36 +209,13 @@
 
 	for(uint32_t i = 0; i < task.submitCount; i++)
 	{
-		VkSubmitInfo &submitInfo = task.pSubmits[i];
-		const VkTimelineSemaphoreSubmitInfo *timelineInfo = nullptr;
-		for(const auto *nextInfo = reinterpret_cast<const VkBaseInStructure *>(submitInfo.pNext);
-		    nextInfo != nullptr; nextInfo = nextInfo->pNext)
-		{
-			switch(nextInfo->sType)
-			{
-			case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
-				timelineInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(submitInfo.pNext);
-				break;
-			case VK_STRUCTURE_TYPE_MAX_ENUM:
-				// dEQP tests that this value is ignored.
-				break;
-			case VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO:
-				// SwiftShader doesn't use device group submit info because it only supports a single physical device.
-				// However, this extension is core in Vulkan 1.1, so we must treat it as a valid structure type.
-				break;
-			default:
-				UNSUPPORTED("submitInfo.pNext->sType = %s", vk::Stringify(nextInfo->sType).c_str());
-				break;
-			}
-		}
-
+		SubmitInfo &submitInfo = task.pSubmits[i];
 		for(uint32_t j = 0; j < submitInfo.waitSemaphoreCount; j++)
 		{
 			if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pWaitSemaphores[j]))
 			{
-				ASSERT_MSG(timelineInfo != nullptr,
-				           "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used");
-				sem->wait(timelineInfo->pWaitSemaphoreValues[j]);
+				ASSERT(j < submitInfo.waitSemaphoreValueCount);
+				sem->wait(submitInfo.pWaitSemaphoreValues[j]);
 			}
 			else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pWaitSemaphores[j]))
 			{
@@ -248,9 +241,8 @@
 		{
 			if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pSignalSemaphores[j]))
 			{
-				ASSERT_MSG(timelineInfo != nullptr,
-				           "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used");
-				sem->signal(timelineInfo->pSignalSemaphoreValues[j]);
+				ASSERT(j < submitInfo.signalSemaphoreValueCount);
+				sem->signal(submitInfo.pSignalSemaphoreValues[j]);
 			}
 			else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pSignalSemaphores[j]))
 			{
diff --git a/src/Vulkan/VkQueue.hpp b/src/Vulkan/VkQueue.hpp
index 08ff88c..f21b1fe 100644
--- a/src/Vulkan/VkQueue.hpp
+++ b/src/Vulkan/VkQueue.hpp
@@ -61,10 +61,25 @@
 	void insertDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo);
 
 private:
+	struct SubmitInfo
+	{
+		uint32_t waitSemaphoreCount;
+		const VkSemaphore *pWaitSemaphores;
+		const VkPipelineStageFlags *pWaitDstStageMask;
+		uint32_t commandBufferCount;
+		const VkCommandBuffer *pCommandBuffers;
+		uint32_t signalSemaphoreCount;
+		const VkSemaphore *pSignalSemaphores;
+		uint32_t waitSemaphoreValueCount;
+		const uint64_t *pWaitSemaphoreValues;
+		uint32_t signalSemaphoreValueCount;
+		const uint64_t *pSignalSemaphoreValues;
+	};
+
 	struct Task
 	{
 		uint32_t submitCount = 0;
-		VkSubmitInfo *pSubmits = nullptr;
+		SubmitInfo *pSubmits = nullptr;
 		std::shared_ptr<sw::CountedEvent> events;
 
 		enum Type
@@ -78,11 +93,12 @@
 	void taskLoop(marl::Scheduler *scheduler);
 	void garbageCollect();
 	void submitQueue(const Task &task);
+	static SubmitInfo *DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits);
 
 	Device *device;
 	std::unique_ptr<sw::Renderer> renderer;
 	sw::Chan<Task> pending;
-	sw::Chan<VkSubmitInfo *> toDelete;
+	sw::Chan<SubmitInfo *> toDelete;
 	std::thread queueThread;
 };