Implement timestamp query support

This change set VkQueueFamilyProperties::timestampValidBits to non-zero
(64) to indicate support for timestamps.

It also enables the timestampComputeAndGraphics feature to indicate that
all graphics and compute queues (we only have a single one at the
moment) support timestamp queries. Note that applications often check
this aggregate flag instead of the individual queue family properties.

timestampPeriod was lowered from 60 to 1, since we're writing timestamps
with nanosecond granularity. Actually obtaining the time was already in
place, see QueryPool::writeTimestamp(), but it didn't put the query in
the 'available' state.

The clock was replaced with std::chrono::steady_clock, which typically
provides higher resolution than system_clock [1]. While it doesn't offer
wall clock related time, this is not a requirement of Vulkan timestamps.

Query::INVALID_TYPE was eliminated. Queries belong to a query pool which
takes a valid type at construction. This also allowed to eliminate
Query::prepare().

[1] https://www.modernescpp.com/index.php/the-three-clocks

Bug: b/142643809
Tests: dEQP-VK.pipeline.timestamp.*
Change-Id: Icb7c8c5ed78052eba9df630a4c1cb450bda7f85b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/53488
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index a44dde2..3023458 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -28,16 +28,6 @@
 
 namespace vk {
 
-#if VK_USE_PLATFORM_FUCHSIA
-if(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_TEMP_ZIRCON_VMO_BIT_FUCHSIA)
-{
-	properties->compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_TEMP_ZIRCON_VMO_BIT_FUCHSIA;
-	properties->exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_TEMP_ZIRCON_VMO_BIT_FUCHSIA;
-	properties->externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT;
-	return;
-}
-#endif
-
 PhysicalDevice::PhysicalDevice(const void *, void *mem)
 {
 }
@@ -524,8 +514,8 @@
 		sampleCounts,                                     // sampledImageStencilSampleCounts
 		sampleCounts,                                     // storageImageSampleCounts
 		1,                                                // maxSampleMaskWords
-		VK_FALSE,                                         // timestampComputeAndGraphics
-		60,                                               // timestampPeriod
+		VK_TRUE,                                          // timestampComputeAndGraphics
+		1,                                                // timestampPeriod
 		sw::MAX_CLIP_DISTANCES,                           // maxClipDistances
 		sw::MAX_CULL_DISTANCES,                           // maxCullDistances
 		sw::MAX_CLIP_DISTANCES + sw::MAX_CULL_DISTANCES,  // maxCombinedClipAndCullDistances
@@ -1523,17 +1513,25 @@
 	return 1;
 }
 
+VkQueueFamilyProperties PhysicalDevice::getQueueFamilyProperties() const
+{
+	VkQueueFamilyProperties properties = {};
+	properties.minImageTransferGranularity.width = 1;
+	properties.minImageTransferGranularity.height = 1;
+	properties.minImageTransferGranularity.depth = 1;
+	properties.queueCount = 1;
+	properties.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
+	properties.timestampValidBits = 64;
+
+	return properties;
+}
+
 void PhysicalDevice::getQueueFamilyProperties(uint32_t pQueueFamilyPropertyCount,
                                               VkQueueFamilyProperties *pQueueFamilyProperties) const
 {
 	for(uint32_t i = 0; i < pQueueFamilyPropertyCount; i++)
 	{
-		pQueueFamilyProperties[i].minImageTransferGranularity.width = 1;
-		pQueueFamilyProperties[i].minImageTransferGranularity.height = 1;
-		pQueueFamilyProperties[i].minImageTransferGranularity.depth = 1;
-		pQueueFamilyProperties[i].queueCount = 1;
-		pQueueFamilyProperties[i].queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
-		pQueueFamilyProperties[i].timestampValidBits = 0;  // No support for time stamps
+		pQueueFamilyProperties[i] = getQueueFamilyProperties();
 	}
 }
 
@@ -1542,12 +1540,7 @@
 {
 	for(uint32_t i = 0; i < pQueueFamilyPropertyCount; i++)
 	{
-		pQueueFamilyProperties[i].queueFamilyProperties.minImageTransferGranularity.width = 1;
-		pQueueFamilyProperties[i].queueFamilyProperties.minImageTransferGranularity.height = 1;
-		pQueueFamilyProperties[i].queueFamilyProperties.minImageTransferGranularity.depth = 1;
-		pQueueFamilyProperties[i].queueFamilyProperties.queueCount = 1;
-		pQueueFamilyProperties[i].queueFamilyProperties.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
-		pQueueFamilyProperties[i].queueFamilyProperties.timestampValidBits = 0;  // No support for time stamps
+		pQueueFamilyProperties[i].queueFamilyProperties = getQueueFamilyProperties();
 	}
 }
 
diff --git a/src/Vulkan/VkPhysicalDevice.hpp b/src/Vulkan/VkPhysicalDevice.hpp
index b3dd2aa..0b4fe59 100644
--- a/src/Vulkan/VkPhysicalDevice.hpp
+++ b/src/Vulkan/VkPhysicalDevice.hpp
@@ -72,6 +72,7 @@
 	                              VkImageUsageFlags usage, VkImageCreateFlags flags,
 	                              VkImageFormatProperties *pImageFormatProperties) const;
 	uint32_t getQueueFamilyPropertyCount() const;
+
 	void getQueueFamilyProperties(uint32_t pQueueFamilyPropertyCount,
 	                              VkQueueFamilyProperties *pQueueFamilyProperties) const;
 	void getQueueFamilyProperties(uint32_t pQueueFamilyPropertyCount,
@@ -81,6 +82,7 @@
 private:
 	const VkPhysicalDeviceLimits &getLimits() const;
 	VkSampleCountFlags getSampleCounts() const;
+	VkQueueFamilyProperties getQueueFamilyProperties() const;
 };
 
 using DispatchablePhysicalDevice = DispatchableObject<PhysicalDevice, VkPhysicalDevice>;
diff --git a/src/Vulkan/VkQueryPool.cpp b/src/Vulkan/VkQueryPool.cpp
index 5d4334e..912afb7 100644
--- a/src/Vulkan/VkQueryPool.cpp
+++ b/src/Vulkan/VkQueryPool.cpp
@@ -20,10 +20,10 @@
 
 namespace vk {
 
-Query::Query()
+Query::Query(VkQueryType type)
     : finished(marl::Event::Mode::Manual)
     , state(UNAVAILABLE)
-    , type(INVALID_TYPE)
+    , type(type)
     , value(0)
 {}
 
@@ -32,20 +32,13 @@
 	finished.clear();
 	auto prevState = state.exchange(UNAVAILABLE);
 	ASSERT(prevState != ACTIVE);
-	type = INVALID_TYPE;
 	value = 0;
 }
 
-void Query::prepare(VkQueryType ty)
-{
-	auto prevState = state.exchange(ACTIVE);
-	ASSERT(prevState == UNAVAILABLE);
-	type = ty;
-}
-
 void Query::start()
 {
-	ASSERT(state == ACTIVE);
+	auto prevState = state.exchange(ACTIVE);
+	ASSERT(prevState != FINISHED);  // Must be reset first
 	wg.add();
 }
 
@@ -106,7 +99,7 @@
 	// Construct all queries
 	for(uint32_t i = 0; i < count; i++)
 	{
-		new(&pool[i]) Query();
+		new(&pool[i]) Query(type);
 	}
 }
 
@@ -138,11 +131,6 @@
 	uint8_t *data = static_cast<uint8_t *>(pData);
 	for(uint32_t i = firstQuery; i < (firstQuery + queryCount); i++, data += stride)
 	{
-		// If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are both not set
-		// then no result values are written to pData for queries that are in the
-		// unavailable state at the time of the call, and vkGetQueryPoolResults returns
-		// VK_NOT_READY. However, availability state is still written to pData for those
-		// queries if VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set.
 		auto &query = pool[i];
 
 		if(flags & VK_QUERY_RESULT_WAIT_BIT)  // Must wait for query to finish
@@ -152,6 +140,11 @@
 
 		const auto current = query.getData();
 
+		// "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are both not set
+		//  then no result values are written to pData for queries that are in the
+		//  unavailable state at the time of the call, and vkGetQueryPoolResults returns
+		//  VK_NOT_READY. However, availability state is still written to pData for those
+		//  queries if VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
 		bool writeResult = true;
 		if(current.state == Query::ACTIVE || (current.state == Query::UNAVAILABLE && !(flags & VK_QUERY_RESULT_WAIT_BIT)))
 		{
@@ -198,7 +191,6 @@
 		UNSUPPORTED("vkCmdBeginQuery::flags %d", int(flags));
 	}
 
-	pool[query].prepare(type);
 	pool[query].start();
 }
 
@@ -224,10 +216,12 @@
 	ASSERT(query < count);
 	ASSERT(type == VK_QUERY_TYPE_TIMESTAMP);
 
+	pool[query].start();
 	pool[query].set(std::chrono::time_point_cast<std::chrono::nanoseconds>(
-	                    std::chrono::system_clock::now())
+	                    std::chrono::steady_clock::now())
 	                    .time_since_epoch()
 	                    .count());
+	pool[query].finish();
 }
 
 }  // namespace vk
diff --git a/src/Vulkan/VkQueryPool.hpp b/src/Vulkan/VkQueryPool.hpp
index 9c7d246..1816cc6 100644
--- a/src/Vulkan/VkQueryPool.hpp
+++ b/src/Vulkan/VkQueryPool.hpp
@@ -29,9 +29,7 @@
 class Query
 {
 public:
-	static auto constexpr INVALID_TYPE = VK_QUERY_TYPE_MAX_ENUM;
-
-	Query();
+	Query(VkQueryType type);
 
 	enum State
 	{
@@ -51,13 +49,9 @@
 	// reset() must not be called while the query is in the ACTIVE state.
 	void reset();
 
-	// prepare() sets the Query type to ty, and sets the state to ACTIVE.
-	// prepare() must not be called when the query is already ACTIVE.
-	void prepare(VkQueryType ty);
-
 	// start() begins a query task which is closed with a call to finish().
 	// Query tasks can be nested.
-	// start() must only be called when in the ACTIVE state.
+	// start() sets the state to ACTIVE.
 	void start();
 
 	// finish() ends a query task begun with a call to start().
diff --git a/src/Vulkan/VkQueue.cpp b/src/Vulkan/VkQueue.cpp
index 7995eff..c1afd6f 100644
--- a/src/Vulkan/VkQueue.cpp
+++ b/src/Vulkan/VkQueue.cpp
@@ -317,14 +317,16 @@
 
 	for(uint32_t i = 0; i < presentInfo->waitSemaphoreCount; i++)
 	{
-		vk::DynamicCast<BinarySemaphore>(presentInfo->pWaitSemaphores[i])->wait();
+		auto *semaphore = vk::DynamicCast<BinarySemaphore>(presentInfo->pWaitSemaphores[i]);
+		semaphore->wait();
 	}
 
 	VkResult commandResult = VK_SUCCESS;
 
 	for(uint32_t i = 0; i < presentInfo->swapchainCount; i++)
 	{
-		VkResult perSwapchainResult = vk::Cast(presentInfo->pSwapchains[i])->present(presentInfo->pImageIndices[i]);
+		auto *swapchain = vk::Cast(presentInfo->pSwapchains[i]);
+		VkResult perSwapchainResult = swapchain->present(presentInfo->pImageIndices[i]);
 
 		if(presentInfo->pResults)
 		{
diff --git a/src/Vulkan/VkSemaphore.cpp b/src/Vulkan/VkSemaphore.cpp
index c82543a..5f9e7d1 100644
--- a/src/Vulkan/VkSemaphore.cpp
+++ b/src/Vulkan/VkSemaphore.cpp
@@ -21,8 +21,6 @@
 #include "marl/blockingcall.h"
 #include "marl/conditionvariable.h"
 
-#include <chrono>
-#include <climits>
 #include <functional>
 #include <memory>
 #include <utility>