Reimplement vk::Query using sw sync primitives.

Hide all the error-prone synchronization logic in the class, and expose a documented API.

This should have no impact on behavior - it was authored to make it harder to break things in the future.
That said, it appears to fix a whole bunch of flakes with the dEQP query tests.

Bug: b/133127573
Change-Id: I5c30b79b9b1cd36dba1fa2d3c34af0f5bd62772a
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/31816
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index 85eb7f4..6ed66ff 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -289,7 +289,7 @@
 	{
 		for(auto query : queries)
 		{
-			if(query->type == type)
+			if(query->getType() == type)
 			{
 				return true;
 			}
@@ -377,7 +377,7 @@
 			draw->queries = new std::list<vk::Query*>();
 			for(auto &query : queries)
 			{
-				++query->reference; // Atomic
+				query->start();
 				draw->queries->push_back(query);
 			}
 		}
@@ -855,34 +855,23 @@
 				{
 					for(auto &query : *(draw.queries))
 					{
-						std::unique_lock<std::mutex> mutexLock(query->mutex);
-
-						switch(query->type)
+						switch(query->getType())
 						{
 						case VK_QUERY_TYPE_OCCLUSION:
 							for(int cluster = 0; cluster < clusterCount; cluster++)
 							{
-								query->data += data.occlusion[cluster];
+								query->add(data.occlusion[cluster]);
 							}
 							break;
 						default:
 							break;
 						}
 
-						int queryRef = --query->reference; // Atomic
-						if(queryRef == 0)
-						{
-							query->state = vk::Query::FINISHED;
-						}
-
-						// Manual unlocking is done before notifying, to avoid
-						// waking up the waiting thread only to block again
-						mutexLock.unlock();
-						query->condition.notify_one();
+						query->finish();
 					}
 
 					delete draw.queries;
-					draw.queries = 0;
+					draw.queries = nullptr;
 				}
 
 				draw.vertexRoutine->unbind();
diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp
index 0d01f45..e8242e0 100644
--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -32,7 +32,7 @@
 namespace vk
 {
 	class DescriptorSet;
-	struct Query;
+	class Query;
 }
 
 namespace sw
diff --git a/src/Vulkan/VkQueryPool.cpp b/src/Vulkan/VkQueryPool.cpp
index f8b34f4..7211d0d 100644
--- a/src/Vulkan/VkQueryPool.cpp
+++ b/src/Vulkan/VkQueryPool.cpp
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "VkQueryPool.hpp"
-#include "Common/Thread.hpp"
 
 #include <chrono>
 #include <cstring>
@@ -21,6 +20,69 @@
 
 namespace vk
 {
+	Query::Query() : finished(sw::Event::ClearMode::Manual), state(UNAVAILABLE), type(INVALID_TYPE), value(0) {}
+
+	void Query::reset()
+	{
+		ASSERT(wg.count() == 0);
+		finished.clear();
+		auto prevState = state.exchange(UNAVAILABLE);
+		ASSERT(prevState != ACTIVE);
+		type = INVALID_TYPE;
+		value = 0;
+	}
+
+	void Query::prepare(VkQueryType ty)
+	{
+		auto prevState = state.exchange(ACTIVE);
+		ASSERT(prevState == UNAVAILABLE);
+		type = ty;
+	}
+
+	void Query::start()
+	{
+		ASSERT(state == ACTIVE);
+		wg.add();
+	}
+
+	void Query::finish()
+	{
+		if (wg.done())
+		{
+			auto prevState = state.exchange(FINISHED);
+			ASSERT(prevState == ACTIVE);
+			finished.signal();
+		}
+	}
+
+	Query::Data Query::getData() const
+	{
+		Data out;
+		out.state = state;
+		out.value = value;
+		return out;
+	}
+
+	VkQueryType Query::getType() const
+	{
+		return type;
+	}
+
+	void Query::wait()
+	{
+		finished.wait();
+	}
+
+	void Query::set(int64_t v)
+	{
+		value = v;
+	}
+
+	void Query::add(int64_t v)
+	{
+		value += v;
+	}
+
 	QueryPool::QueryPool(const VkQueryPoolCreateInfo* pCreateInfo, void* mem) :
 		pool(reinterpret_cast<Query*>(mem)), type(pCreateInfo->queryType),
 		count(pCreateInfo->queryCount)
@@ -61,7 +123,7 @@
 
 		// The sum of firstQuery and queryCount must be less than or equal to the number of queries
 		ASSERT((firstQuery + queryCount) <= count);
-		
+
 		VkResult result = VK_SUCCESS;
 		uint8_t* data = static_cast<uint8_t*>(pData);
 		for(uint32_t i = firstQuery; i < (firstQuery + queryCount); i++, data += stride)
@@ -72,14 +134,16 @@
 			// VK_NOT_READY. However, availability state is still written to pData for those
 			// queries if VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set.
 			auto &query = pool[i];
-			std::unique_lock<std::mutex> mutexLock(query.mutex);
+
 			if(flags & VK_QUERY_RESULT_WAIT_BIT) // Must wait for query to finish
 			{
-				query.condition.wait(mutexLock, [&query] { return query.state == Query::FINISHED; });
+				query.wait();
 			}
 
+			const auto current = query.getData();
+
 			bool writeResult = true;
-			if(pool[i].state == Query::ACTIVE)
+			if(current.state == Query::ACTIVE)
 			{
 				result = VK_NOT_READY;
 				writeResult = (flags & VK_QUERY_RESULT_PARTIAL_BIT); // Allow writing partial results
@@ -90,11 +154,11 @@
 				uint64_t* result64 = reinterpret_cast<uint64_t*>(data);
 				if(writeResult)
 				{
-					result64[0] = pool[i].data;
+					result64[0] = current.value;
 				}
 				if(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) // Output query availablity
 				{
-					result64[1] = pool[i].state;
+					result64[1] = current.state;
 				}
 			}
 			else
@@ -102,11 +166,11 @@
 				uint32_t* result32 = reinterpret_cast<uint32_t*>(data);
 				if(writeResult)
 				{
-					result32[0] = static_cast<uint32_t>(pool[i].data);
+					result32[0] = static_cast<uint32_t>(current.value);
 				}
 				if(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) // Output query availablity
 				{
-					result32[1] = pool[i].state;
+					result32[1] = current.state;
 				}
 			}
 		}
@@ -123,31 +187,14 @@
 			UNIMPLEMENTED("flags");
 		}
 
-		std::unique_lock<std::mutex> lock(pool[query].mutex);
-		ASSERT(pool[query].state == Query::UNAVAILABLE);
-		pool[query].state = Query::ACTIVE;
-		pool[query].data = 0;
-		pool[query].reference = 1;
-		pool[query].type = type;
+		pool[query].prepare(type);
+		pool[query].start();
 	}
 
 	void QueryPool::end(uint32_t query)
 	{
 		ASSERT(query < count);
-
-		#if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
-		{
-			std::unique_lock<std::mutex> mutexLock(pool[query].mutex);
-			ASSERT(pool[query].state == Query::ACTIVE);
-		}
-		#endif
-
-		int ref = --pool[query].reference;
-		if(ref == 0)
-		{
-			std::unique_lock<std::mutex> mutexLock(pool[query].mutex);
-			pool[query].state = Query::FINISHED;
-		}
+		pool[query].finish();
 	}
 
 	void QueryPool::reset(uint32_t firstQuery, uint32_t queryCount)
@@ -157,10 +204,7 @@
 
 		for(uint32_t i = firstQuery; i < (firstQuery + queryCount); i++)
 		{
-			std::unique_lock<std::mutex> mutexLock(pool[i].mutex);
-
-			pool[i].state = Query::UNAVAILABLE;
-			pool[i].data = 0;
+			pool[i].reset();
 		}
 	}
 
@@ -169,8 +213,7 @@
 		ASSERT(query < count);
 		ASSERT(type == VK_QUERY_TYPE_TIMESTAMP);
 
-		std::unique_lock<std::mutex> mutexLock(pool[query].mutex);
-		pool[query].data = std::chrono::time_point_cast<std::chrono::nanoseconds>(
-			std::chrono::system_clock::now()).time_since_epoch().count();
+		pool[query].set(std::chrono::time_point_cast<std::chrono::nanoseconds>(
+			std::chrono::system_clock::now()).time_since_epoch().count());
 	}
 } // namespace vk
diff --git a/src/Vulkan/VkQueryPool.hpp b/src/Vulkan/VkQueryPool.hpp
index d85bf85..88ca21b 100644
--- a/src/Vulkan/VkQueryPool.hpp
+++ b/src/Vulkan/VkQueryPool.hpp
@@ -15,6 +15,8 @@
 #ifndef VK_QUERY_POOL_HPP_
 #define VK_QUERY_POOL_HPP_
 
+#include "System/Synchronization.hpp"
+
 #include "VkObject.hpp"
 #include <atomic>
 #include <condition_variable>
@@ -23,8 +25,13 @@
 namespace vk
 {
 
-struct Query
+class Query
 {
+public:
+	static auto constexpr INVALID_TYPE = VK_QUERY_TYPE_MAX_ENUM;
+
+	Query();
+
 	enum State
 	{
 		UNAVAILABLE,
@@ -32,12 +39,53 @@
 		FINISHED
 	};
 
-	std::mutex mutex;
-	std::condition_variable condition;
-	State state;  // guarded by mutex
-	int64_t data; // guarded by mutex
-	std::atomic<int> reference;
-	VkQueryType type;
+	struct Data
+	{
+		State state;   // The current query state.
+		int64_t value; // The current query value.
+	};
+
+	// reset() sets the state of the Query to UNAVAILABLE, sets the type to
+	// INVALID_TYPE and clears the query value.
+	// reset() must not be called while the query is in the ACTIVE state.
+	void reset();
+
+	// prepare() sets the Query type to ty, and sets the state to ACTIVE.
+	// prepare() must not be called when the query is already ACTIVE.
+	void prepare(VkQueryType ty);
+
+	// start() begins a query task which is closed with a call to finish().
+	// Query tasks can be nested.
+	// start() must only be called when in the ACTIVE state.
+	void start();
+
+	// finish() ends a query task begun with a call to start().
+	// Once all query tasks are complete the query will transition to the
+	// FINISHED state.
+	// finish() must only be called when in the ACTIVE state.
+	void finish();
+
+	// wait() blocks until the query reaches the FINISHED state.
+	void wait();
+
+	// getData() returns the current query state and value.
+	Data getData() const;
+
+	// getType() returns the type of query.
+	VkQueryType getType() const;
+
+	// set() replaces the current query value with val.
+	void set(int64_t val);
+
+	// add() adds val to the current query value.
+	void add(int64_t val);
+
+private:
+	sw::WaitGroup wg;
+	sw::Event finished;
+	std::atomic<State> state;
+	std::atomic<VkQueryType> type;
+	std::atomic<int64_t> value;
 };
 
 class QueryPool : public Object<QueryPool, VkQueryPool>