Threaded Queue submit with events and fences This cl does 3 main things: - It pushes the queue submit operation to its own thread - It implements events - It implements fences Some details: - Because we can have N async draw operations and we need to signal the fence only after all operations are completed, fences have a add/done mechanism to allow signaling the fence only when all draw operations are completed. - Device::waitForFences() detects large timeouts to avoid integer overflow if now+timeout is bigger than the remaining nanoseconds available in a long long. Bug b/117835459 Change-Id: I2f02c3b4bb9d9ac9037909b02b0601e1bae15d21 Tests: dEQP-VK.synchronization.* Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/29769 Presubmit-Ready: Alexis Hétu <sugoi@google.com> Reviewed-by: Ben Clayton <bclayton@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp index ca91985..6b082e6 100644 --- a/src/Vulkan/VkDevice.cpp +++ b/src/Vulkan/VkDevice.cpp
@@ -17,11 +17,22 @@ #include "VkConfig.h" #include "VkDebug.hpp" #include "VkDescriptorSetLayout.hpp" +#include "VkFence.hpp" #include "VkQueue.hpp" #include "Device/Blitter.hpp" +#include <chrono> +#include <climits> #include <new> // Must #include this to use "placement new" +namespace +{ + std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds> now() + { + return std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now()); + } +} + namespace vk { @@ -42,7 +53,7 @@ for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++) { - new (&queues[queueID]) Queue(queueCreateInfo.queueFamilyIndex, queueCreateInfo.pQueuePriorities[j]); + new (&queues[queueID]) Queue(); } } @@ -86,17 +97,85 @@ return queues[queueIndex]; } -void Device::waitForFences(uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout) +VkResult Device::waitForFences(uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout) { - // FIXME(b/117835459) : noop + const time_point start = now(); + const uint64_t max_timeout = (LLONG_MAX - start.time_since_epoch().count()); + bool infiniteTimeout = (timeout > max_timeout); + const time_point end_ns = start + std::chrono::nanoseconds(std::min(max_timeout, timeout)); + if(waitAll) // All fences must be signaled + { + for(uint32_t i = 0; i < fenceCount; i++) + { + if(timeout == 0) + { + if(Cast(pFences[i])->getStatus() != VK_SUCCESS) // At least one fence is not signaled + { + return VK_TIMEOUT; + } + } + else if(infiniteTimeout) + { + if(Cast(pFences[i])->wait() != VK_SUCCESS) // At least one fence is not signaled + { + return VK_TIMEOUT; + } + } + else + { + if(Cast(pFences[i])->waitUntil(end_ns) != VK_SUCCESS) // At least one fence is not signaled + { + return VK_TIMEOUT; + } + } + } + + return VK_SUCCESS; + } + else // At least one fence must be signaled + { + // Start by quickly checking the status of all fences, as only one is required + for(uint32_t i = 0; i < fenceCount; i++) + { + if(Cast(pFences[i])->getStatus() == VK_SUCCESS) // At least one fence is signaled + { + return VK_SUCCESS; + } + } + + if(timeout > 0) + { + for(uint32_t i = 0; i < fenceCount; i++) + { + if(infiniteTimeout) + { + if(Cast(pFences[i])->wait() == VK_SUCCESS) // At least one fence is signaled + { + return VK_SUCCESS; + } + } + else + { + if(Cast(pFences[i])->waitUntil(end_ns) == VK_SUCCESS) // At least one fence is signaled + { + return VK_SUCCESS; + } + } + } + } + + return VK_TIMEOUT; + } } -void Device::waitIdle() +VkResult Device::waitIdle() { for(uint32_t i = 0; i < queueCount; i++) { queues[i].waitIdle(); } + + return VK_SUCCESS; } void Device::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo,