Threaded Queue submit with events and fences

This cl does 3 main things:
- It pushes the queue submit operation to its own thread
- It implements events
- It implements fences

Some details:
- Because we can have N async draw operations and we need to signal
  the fence only after all operations are completed, fences have a
  add/done mechanism to allow signaling the fence only when all
  draw operations are completed.
- Device::waitForFences() detects large timeouts to avoid integer
  overflow if now+timeout is bigger than the remaining nanoseconds
  available in a long long.

Bug b/117835459

Change-Id: I2f02c3b4bb9d9ac9037909b02b0601e1bae15d21
Tests: dEQP-VK.synchronization.*
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/29769
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp
index ca91985..6b082e6 100644
--- a/src/Vulkan/VkDevice.cpp
+++ b/src/Vulkan/VkDevice.cpp
@@ -17,11 +17,22 @@
 #include "VkConfig.h"
 #include "VkDebug.hpp"
 #include "VkDescriptorSetLayout.hpp"
+#include "VkFence.hpp"
 #include "VkQueue.hpp"
 #include "Device/Blitter.hpp"
 
+#include <chrono>
+#include <climits>
 #include <new> // Must #include this to use "placement new"
 
+namespace
+{
+	std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds> now()
+	{
+		return std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now());
+	}
+}
+
 namespace vk
 {
 
@@ -42,7 +53,7 @@
 
 		for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++)
 		{
-			new (&queues[queueID]) Queue(queueCreateInfo.queueFamilyIndex, queueCreateInfo.pQueuePriorities[j]);
+			new (&queues[queueID]) Queue();
 		}
 	}
 
@@ -86,17 +97,85 @@
 	return queues[queueIndex];
 }
 
-void Device::waitForFences(uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout)
+VkResult Device::waitForFences(uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout)
 {
-	// FIXME(b/117835459) : noop
+	const time_point start = now();
+	const uint64_t max_timeout = (LLONG_MAX - start.time_since_epoch().count());
+	bool infiniteTimeout = (timeout > max_timeout);
+	const time_point end_ns = start + std::chrono::nanoseconds(std::min(max_timeout, timeout));
+	if(waitAll) // All fences must be signaled
+	{
+		for(uint32_t i = 0; i < fenceCount; i++)
+		{
+			if(timeout == 0)
+			{
+				if(Cast(pFences[i])->getStatus() != VK_SUCCESS) // At least one fence is not signaled
+				{
+					return VK_TIMEOUT;
+				}
+			}
+			else if(infiniteTimeout)
+			{
+				if(Cast(pFences[i])->wait() != VK_SUCCESS) // At least one fence is not signaled
+				{
+					return VK_TIMEOUT;
+				}
+			}
+			else
+			{
+				if(Cast(pFences[i])->waitUntil(end_ns) != VK_SUCCESS) // At least one fence is not signaled
+				{
+					return VK_TIMEOUT;
+				}
+			}
+		}
+
+		return VK_SUCCESS;
+	}
+	else // At least one fence must be signaled
+	{
+		// Start by quickly checking the status of all fences, as only one is required
+		for(uint32_t i = 0; i < fenceCount; i++)
+		{
+			if(Cast(pFences[i])->getStatus() == VK_SUCCESS) // At least one fence is signaled
+			{
+				return VK_SUCCESS;
+			}
+		}
+
+		if(timeout > 0)
+		{
+			for(uint32_t i = 0; i < fenceCount; i++)
+			{
+				if(infiniteTimeout)
+				{
+					if(Cast(pFences[i])->wait() == VK_SUCCESS) // At least one fence is signaled
+					{
+						return VK_SUCCESS;
+					}
+				}
+				else
+				{
+					if(Cast(pFences[i])->waitUntil(end_ns) == VK_SUCCESS) // At least one fence is signaled
+					{
+						return VK_SUCCESS;
+					}
+				}
+			}
+		}
+
+		return VK_TIMEOUT;
+	}
 }
 
-void Device::waitIdle()
+VkResult Device::waitIdle()
 {
 	for(uint32_t i = 0; i < queueCount; i++)
 	{
 		queues[i].waitIdle();
 	}
+
+	return VK_SUCCESS;
 }
 
 void Device::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo,