Reimplement VkFence using sw::WaitGroup and sw::Event.

This change fixes the ASAN issue as described in b/133135427.

Reproduction case:
./build/vk-unittests --gtest_repeat=-1 --gtest_filter=ComputeParams/SwiftShaderVulkanBufferToBufferComputeTest.Memcpy/0

Bug: b/133127573
Bug: b/133135427
Bug: swiftshader:130
Change-Id: I06fbf10ab042160e8ca481f2afaa30d4f676dc75
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/31681
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index 15f13fb..c4d4177 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -405,7 +405,7 @@
 
 		if(fence)
 		{
-			fence->add();
+			fence->start();
 		}
 		ASSERT(!draw->fence);
 		draw->fence = fence;
@@ -891,7 +891,7 @@
 
 				if(draw.fence)
 				{
-					draw.fence->done();
+					draw.fence->finish();
 					draw.fence = nullptr;
 				}
 
diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp
index 9871872..e51cf07 100644
--- a/src/Vulkan/VkDevice.cpp
+++ b/src/Vulkan/VkDevice.cpp
@@ -119,6 +119,7 @@
 
 VkResult Device::waitForFences(uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout)
 {
+	using time_point = std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
 	const time_point start = now();
 	const uint64_t max_timeout = (LLONG_MAX - start.time_since_epoch().count());
 	bool infiniteTimeout = (timeout > max_timeout);
@@ -143,7 +144,7 @@
 			}
 			else
 			{
-				if(Cast(pFences[i])->waitUntil(end_ns) != VK_SUCCESS) // At least one fence is not signaled
+				if(Cast(pFences[i])->wait(end_ns) != VK_SUCCESS) // At least one fence is not signaled
 				{
 					return VK_TIMEOUT;
 				}
@@ -176,7 +177,7 @@
 				}
 				else
 				{
-					if(Cast(pFences[i])->waitUntil(end_ns) == VK_SUCCESS) // At least one fence is signaled
+					if(Cast(pFences[i])->wait(end_ns) == VK_SUCCESS) // At least one fence is signaled
 					{
 						return VK_SUCCESS;
 					}
diff --git a/src/Vulkan/VkFence.hpp b/src/Vulkan/VkFence.hpp
index 7e8761c..d3951c4 100644
--- a/src/Vulkan/VkFence.hpp
+++ b/src/Vulkan/VkFence.hpp
@@ -16,86 +16,68 @@
 #define VK_FENCE_HPP_
 
 #include "VkObject.hpp"
-#include <chrono>
-#include <condition_variable>
-#include <mutex>
+#include "System/Synchronization.hpp"
 
 namespace vk
 {
 
-using time_point = std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
-
 class Fence : public Object<Fence, VkFence>
 {
 public:
-	Fence() : status(VK_NOT_READY) {}
+	Fence() : signaled(sw::Event::ClearMode::Manual, false) {}
 
 	Fence(const VkFenceCreateInfo* pCreateInfo, void* mem) :
-		status((pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) ? VK_SUCCESS : VK_NOT_READY)
-	{
-	}
+		signaled(sw::Event::ClearMode::Manual, (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) != 0) {}
 
 	static size_t ComputeRequiredAllocationSize(const VkFenceCreateInfo* pCreateInfo)
 	{
 		return 0;
 	}
 
-	void add()
-	{
-		std::unique_lock<std::mutex> lock(mutex);
-		++count;
-	}
-
-	void done()
-	{
-		std::unique_lock<std::mutex> lock(mutex);
-		ASSERT(count > 0);
-		--count;
-		if(count == 0)
-		{
-			// signal the fence, without the unlock/lock required to call signal() here
-			status = VK_SUCCESS;
-			lock.unlock();
-			condition.notify_all();
-		}
-	}
-
 	void reset()
 	{
-		std::unique_lock<std::mutex> lock(mutex);
-		ASSERT(count == 0);
-		status = VK_NOT_READY;
+		ASSERT_MSG(wg.count() == 0, "Fence::reset() called when work is in flight");
+		signaled.clear();
 	}
 
 	VkResult getStatus()
 	{
-		std::unique_lock<std::mutex> lock(mutex);
-		auto out = status;
-		lock.unlock();
-		return out;
+		return signaled ? VK_SUCCESS : VK_NOT_READY;
 	}
 
 	VkResult wait()
 	{
-		std::unique_lock<std::mutex> lock(mutex);
-		condition.wait(lock, [this] { return status == VK_SUCCESS; });
-		auto out = status;
-		lock.unlock();
-		return out;
+		signaled.wait();
+		return VK_SUCCESS;
 	}
 
-	VkResult waitUntil(const time_point& timeout_ns)
+    template <class CLOCK, class DURATION>
+	VkResult wait(const std::chrono::time_point<CLOCK, DURATION>& timeout)
 	{
-		std::unique_lock<std::mutex> lock(mutex);
-		return condition.wait_until(lock, timeout_ns, [this] { return status == VK_SUCCESS; }) ?
-		       VK_SUCCESS : VK_TIMEOUT;
+		return signaled.wait(timeout) ? VK_SUCCESS : VK_TIMEOUT;
+	}
+
+	void start()
+	{
+		ASSERT(!signaled);
+		wg.add();
+	}
+
+	void finish()
+	{
+		ASSERT(!signaled);
+		if (wg.done())
+		{
+			signaled.signal();
+		}
 	}
 
 private:
-	VkResult status = VK_NOT_READY; // guarded by mutex
-	int32_t count = 0; // guarded by mutex
-	std::mutex mutex;
-	std::condition_variable condition;
+	Fence(const Fence&) = delete;
+	Fence& operator = (const Fence&) = delete;
+
+	sw::WaitGroup wg;
+	sw::Event signaled;
 };
 
 static inline Fence* Cast(VkFence object)
diff --git a/src/Vulkan/VkQueue.cpp b/src/Vulkan/VkQueue.cpp
index 2e47ba0..de19f0e 100644
--- a/src/Vulkan/VkQueue.cpp
+++ b/src/Vulkan/VkQueue.cpp
@@ -106,7 +106,7 @@
 
 	if(task.fence)
 	{
-		task.fence->add();
+		task.fence->start();
 	}
 
 	pending.put(task);
@@ -155,7 +155,7 @@
 		// TODO: fix renderer signaling so that work submitted separately from (but before) a fence
 		// is guaranteed complete by the time the fence signals.
 		renderer->synchronize();
-		task.fence->done();
+		task.fence->finish();
 	}
 }
 
@@ -184,7 +184,7 @@
 {
 	// Wait for task queue to flush.
 	vk::Fence fence;
-	fence.add();
+	fence.start();
 
 	Task task;
 	task.fence = &fence;
diff --git a/src/WSI/VkSwapchainKHR.cpp b/src/WSI/VkSwapchainKHR.cpp
index 30c72a9..54cea09 100644
--- a/src/WSI/VkSwapchainKHR.cpp
+++ b/src/WSI/VkSwapchainKHR.cpp
@@ -193,8 +193,8 @@
 
 			if(fence)
 			{
-				vk::Cast(fence)->add();
-				vk::Cast(fence)->done();
+				vk::Cast(fence)->start();
+				vk::Cast(fence)->finish();
 			}
 
 			return VK_SUCCESS;