Order timestamp writes correctly wrt renderer work

Bug: 139469122
Change-Id: I0f319952b00fcca4165bf3f59221ebaf1ba12045
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/35189
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Chris Forbes <chrisforbes@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index 88e0bd9..92f03e5 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -1087,19 +1087,31 @@
 
 struct WriteTimeStamp : public CommandBuffer::Command
 {
-	WriteTimeStamp(QueryPool* queryPool, uint32_t query)
-		: queryPool(queryPool), query(query)
+	WriteTimeStamp(QueryPool* queryPool, uint32_t query, VkPipelineStageFlagBits stage)
+		: queryPool(queryPool), query(query), stage(stage)
 	{
 	}
 
 	void play(CommandBuffer::ExecutionState& executionState)
 	{
+		if (stage & ~(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT))
+		{
+			// The `top of pipe` and `draw indirect` stages are handled in command buffer processing so a timestamp write
+			// done in those stages can just be done here without any additional synchronization.
+			// Everything else is deferred to the Renderer; we will treat those stages all as if they were
+			// `bottom of pipe`.
+			//
+			// FIXME(chrisforbes): once Yarn is integrated, do this in a task so we don't have to stall here.
+			executionState.renderer->synchronize();
+		}
+
 		queryPool->writeTimestamp(query);
 	}
 
 private:
 	QueryPool* queryPool;
 	uint32_t query;
+	VkPipelineStageFlagBits stage;
 };
 
 struct CopyQueryPoolResults : public CommandBuffer::Command
@@ -1282,7 +1294,7 @@
 
 void CommandBuffer::writeTimestamp(VkPipelineStageFlagBits pipelineStage, QueryPool* queryPool, uint32_t query)
 {
-	addCommand<WriteTimeStamp>(queryPool, query);
+	addCommand<WriteTimeStamp>(queryPool, query, pipelineStage);
 }
 
 void CommandBuffer::copyQueryPoolResults(const QueryPool* queryPool, uint32_t firstQuery, uint32_t queryCount,