Fix multiview renderpass queries

The spec states that "If queries are used while executing a render pass
instance that has multiview enabled, the query uses N consecutive query
indices in the query pool (starting at query) where N is the number of
bits set in the view mask in the subpass the query is used in. How the
numerical results of the query are distributed among the queries is
implementation-dependent. For example, some implementations may write
each view’s results to a distinct query, while other implementations
may write the total result to the first query and write zero to the
other queries."

This wasn't working properly for occlusion queries, but we missed it
because dEQP doesn't provide coverage for this case if timestamp queries
are not supported. This change fixes it for both query types.

Note that for occlusion queries we write the total result to the first
query, and set the remaining query results to 0, as explicitly allowed
by the spec.

For timestamp queries each query result is a valid timestamp (note that
the spec also allows for only the first query to be a timestamp, while
the remaining ones are zero).

Bug: b/142643809
Change-Id: I531248c2822c5f68d8636d4a3d153082c48cac1d
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/53668
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index 030487e..7ec4db7 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -34,6 +34,7 @@
 
 #include "marl/defer.h"
 
+#include <bitset>
 #include <cstring>
 
 namespace {
@@ -1059,16 +1060,24 @@
 
 	void play(vk::CommandBuffer::ExecutionState &executionState) override
 	{
-		queryPool->begin(query, flags);
+		// "If queries are used while executing a render pass instance that has multiview enabled, the query uses
+		//  N consecutive query indices in the query pool (starting at `query`)"
+		for(uint32_t i = 0; i < executionState.viewCount(); i++)
+		{
+			queryPool->begin(query + i, flags);
+		}
+
+		// The renderer accumulates the result into a single query.
+		ASSERT(queryPool->getType() == VK_QUERY_TYPE_OCCLUSION);
 		executionState.renderer->addQuery(queryPool->getQuery(query));
 	}
 
 	std::string description() override { return "vkCmdBeginQuery()"; }
 
 private:
-	vk::QueryPool *queryPool;
-	uint32_t query;
-	VkQueryControlFlags flags;
+	vk::QueryPool *const queryPool;
+	const uint32_t query;
+	const VkQueryControlFlags flags;
 };
 
 class CmdEndQuery : public vk::CommandBuffer::Command
@@ -1082,15 +1091,27 @@
 
 	void play(vk::CommandBuffer::ExecutionState &executionState) override
 	{
+		// The renderer accumulates the result into a single query.
+		ASSERT(queryPool->getType() == VK_QUERY_TYPE_OCCLUSION);
 		executionState.renderer->removeQuery(queryPool->getQuery(query));
-		queryPool->end(query);
+
+		// "implementations may write the total result to the first query and write zero to the other queries."
+		for(uint32_t i = 1; i < executionState.viewCount(); i++)
+		{
+			queryPool->getQuery(query + i)->set(0);
+		}
+
+		for(uint32_t i = 0; i < executionState.viewCount(); i++)
+		{
+			queryPool->end(query + i);
+		}
 	}
 
 	std::string description() override { return "vkCmdEndQuery()"; }
 
 private:
-	vk::QueryPool *queryPool;
-	uint32_t query;
+	vk::QueryPool *const queryPool;
+	const uint32_t query;
 };
 
 class CmdResetQueryPool : public vk::CommandBuffer::Command
@@ -1139,15 +1160,20 @@
 			executionState.renderer->synchronize();
 		}
 
-		queryPool->writeTimestamp(query);
+		// "the timestamp uses N consecutive query indices in the query pool (starting at `query`) where
+		//  N is the number of bits set in the view mask of the subpass the command is executed in."
+		for(uint32_t i = 0; i < executionState.viewCount(); i++)
+		{
+			queryPool->writeTimestamp(query + i);
+		}
 	}
 
 	std::string description() override { return "vkCmdWriteTimeStamp()"; }
 
 private:
-	vk::QueryPool *queryPool;
-	uint32_t query;
-	VkPipelineStageFlagBits stage;
+	vk::QueryPool *const queryPool;
+	const uint32_t query;
+	const VkPipelineStageFlagBits stage;
 };
 
 class CmdCopyQueryPoolResults : public vk::CommandBuffer::Command
@@ -1757,4 +1783,17 @@
 	}
 }
 
+// Returns the number of bits set in the view mask, or 1 if multiview is disabled.
+uint32_t CommandBuffer::ExecutionState::viewCount() const
+{
+	uint32_t viewMask = 1;
+
+	if(renderPass)
+	{
+		viewMask = renderPass->getViewMask(subpassIndex);
+	}
+
+	return static_cast<uint32_t>(std::bitset<32>(viewMask).count());
+}
+
 }  // namespace vk
diff --git a/src/Vulkan/VkCommandBuffer.hpp b/src/Vulkan/VkCommandBuffer.hpp
index 5671150..1f29ac1 100644
--- a/src/Vulkan/VkCommandBuffer.hpp
+++ b/src/Vulkan/VkCommandBuffer.hpp
@@ -165,6 +165,8 @@
 		uint32_t subpassIndex = 0;
 
 		void bindAttachments(Attachments *attachments);
+
+		uint32_t viewCount() const;
 	};
 
 	void submit(CommandBuffer::ExecutionState &executionState);
diff --git a/src/Vulkan/VkQueryPool.hpp b/src/Vulkan/VkQueryPool.hpp
index 1816cc6..f5aad98 100644
--- a/src/Vulkan/VkQueryPool.hpp
+++ b/src/Vulkan/VkQueryPool.hpp
@@ -99,12 +99,13 @@
 
 	void writeTimestamp(uint32_t query);
 
-	inline Query *getQuery(uint32_t query) const { return &(pool[query]); }
+	inline Query *getQuery(uint32_t query) const { return &pool[query]; }
+	inline VkQueryType getType() const { return type; }
 
 private:
-	Query *pool;
-	VkQueryType type;
-	uint32_t count;
+	Query *const pool;
+	const VkQueryType type;
+	const uint32_t count;
 };
 
 static inline QueryPool *Cast(VkQueryPool object)