Refactor Queue::Submit to use internal structure
Before this CL, Queue::submit() would internally use the VkSubmitInfo
structure to store the queue submit related information. This had the
downside of having to re-parse the pNext chain in submitQueue(),
which is now no longer the case in this CL, since the internal
structure no longer includes a pNext pointer.
Also, pointers in the internal structure with a size of 0 are no
longer assigned a value and are set to nullptr instead.
This CL paves the way for the VK_KHR_synchronization2 implementation,
which will be able to use the same internal structure as the one used
by Queue::submit().
Bug: b/204110005
Change-Id: Ib99261ef101680519bed57892a284ae20b229661
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/58591
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Sean Risser <srisser@google.com>
Commit-Queue: Alexis Hétu <sugoi@google.com>
diff --git a/src/Vulkan/VkQueue.cpp b/src/Vulkan/VkQueue.cpp
index 419658d..f664fea 100644
--- a/src/Vulkan/VkQueue.cpp
+++ b/src/Vulkan/VkQueue.cpp
@@ -29,11 +29,11 @@
#include <cstring>
-namespace {
+namespace vk {
-VkSubmitInfo *DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits)
+Queue::SubmitInfo *Queue::DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits)
{
- size_t submitSize = sizeof(VkSubmitInfo) * submitCount;
+ size_t submitSize = sizeof(SubmitInfo) * submitCount;
size_t totalSize = submitSize;
for(uint32_t i = 0; i < submitCount; i++)
{
@@ -50,7 +50,6 @@
case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
{
const auto *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
- totalSize += sizeof(VkTimelineSemaphoreSubmitInfo);
totalSize += tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
totalSize += tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
}
@@ -72,31 +71,53 @@
uint8_t *mem = static_cast<uint8_t *>(
vk::allocateHostMemory(totalSize, vk::REQUIRED_MEMORY_ALIGNMENT, vk::NULL_ALLOCATION_CALLBACKS, vk::Fence::GetAllocationScope()));
- auto submits = new(mem) VkSubmitInfo[submitCount];
- memcpy(mem, pSubmits, submitSize);
+ auto submits = new(mem) SubmitInfo[submitCount];
mem += submitSize;
for(uint32_t i = 0; i < submitCount; i++)
{
- size_t size = pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
- submits[i].pWaitSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
- memcpy(mem, pSubmits[i].pWaitSemaphores, size);
- mem += size;
+ submits[i].commandBufferCount = pSubmits[i].commandBufferCount;
+ submits[i].signalSemaphoreCount = pSubmits[i].signalSemaphoreCount;
+ submits[i].waitSemaphoreCount = pSubmits[i].waitSemaphoreCount;
- size = pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
- submits[i].pWaitDstStageMask = reinterpret_cast<const VkPipelineStageFlags *>(mem);
- memcpy(mem, pSubmits[i].pWaitDstStageMask, size);
- mem += size;
+ submits[i].pWaitSemaphores = nullptr;
+ submits[i].pWaitDstStageMask = nullptr;
+ submits[i].pSignalSemaphores = nullptr;
+ submits[i].pCommandBuffers = nullptr;
- size = pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
- submits[i].pSignalSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
- memcpy(mem, pSubmits[i].pSignalSemaphores, size);
- mem += size;
+ if(pSubmits[i].waitSemaphoreCount > 0)
+ {
+ size_t size = pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
+ submits[i].pWaitSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
+ memcpy(mem, pSubmits[i].pWaitSemaphores, size);
+ mem += size;
- size = pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
- submits[i].pCommandBuffers = reinterpret_cast<const VkCommandBuffer *>(mem);
- memcpy(mem, pSubmits[i].pCommandBuffers, size);
- mem += size;
+ size = pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
+ submits[i].pWaitDstStageMask = reinterpret_cast<const VkPipelineStageFlags *>(mem);
+ memcpy(mem, pSubmits[i].pWaitDstStageMask, size);
+ mem += size;
+ }
+
+ if(pSubmits[i].signalSemaphoreCount > 0)
+ {
+ size_t size = pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
+ submits[i].pSignalSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
+ memcpy(mem, pSubmits[i].pSignalSemaphores, size);
+ mem += size;
+ }
+
+ if(pSubmits[i].commandBufferCount > 0)
+ {
+ size_t size = pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
+ submits[i].pCommandBuffers = reinterpret_cast<const VkCommandBuffer *>(mem);
+ memcpy(mem, pSubmits[i].pCommandBuffers, size);
+ mem += size;
+ }
+
+ submits[i].waitSemaphoreValueCount = 0;
+ submits[i].pWaitSemaphoreValues = nullptr;
+ submits[i].signalSemaphoreValueCount = 0;
+ submits[i].pSignalSemaphoreValues = nullptr;
for(const auto *extension = reinterpret_cast<const VkBaseInStructure *>(pSubmits[i].pNext);
extension != nullptr; extension = reinterpret_cast<const VkBaseInStructure *>(extension->pNext))
@@ -107,24 +128,23 @@
{
const VkTimelineSemaphoreSubmitInfo *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
- size = sizeof(VkTimelineSemaphoreSubmitInfo);
- VkTimelineSemaphoreSubmitInfo *tlsSubmitInfoCopy = reinterpret_cast<VkTimelineSemaphoreSubmitInfo *>(mem);
- memcpy(mem, extension, size);
- // Don't copy the pNext pointer at all.
- tlsSubmitInfoCopy->pNext = nullptr;
- mem += size;
+ if(tlsSubmitInfo->waitSemaphoreValueCount > 0)
+ {
+ submits[i].waitSemaphoreValueCount = tlsSubmitInfo->waitSemaphoreValueCount;
+ size_t size = tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
+ submits[i].pWaitSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
+ memcpy(mem, tlsSubmitInfo->pWaitSemaphoreValues, size);
+ mem += size;
+ }
- size = tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
- tlsSubmitInfoCopy->pWaitSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
- memcpy(mem, tlsSubmitInfo->pWaitSemaphoreValues, size);
- mem += size;
-
- size = tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
- tlsSubmitInfoCopy->pSignalSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
- memcpy(mem, tlsSubmitInfo->pSignalSemaphoreValues, size);
- mem += size;
-
- submits[i].pNext = tlsSubmitInfoCopy;
+ if(tlsSubmitInfo->signalSemaphoreValueCount > 0)
+ {
+ submits[i].signalSemaphoreValueCount = tlsSubmitInfo->signalSemaphoreValueCount;
+ size_t size = tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
+ submits[i].pSignalSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
+ memcpy(mem, tlsSubmitInfo->pSignalSemaphoreValues, size);
+ mem += size;
+ }
}
break;
case VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO:
@@ -144,10 +164,6 @@
return submits;
}
-} // anonymous namespace
-
-namespace vk {
-
Queue::Queue(Device *device, marl::Scheduler *scheduler)
: device(device)
{
@@ -193,36 +209,13 @@
for(uint32_t i = 0; i < task.submitCount; i++)
{
- VkSubmitInfo &submitInfo = task.pSubmits[i];
- const VkTimelineSemaphoreSubmitInfo *timelineInfo = nullptr;
- for(const auto *nextInfo = reinterpret_cast<const VkBaseInStructure *>(submitInfo.pNext);
- nextInfo != nullptr; nextInfo = nextInfo->pNext)
- {
- switch(nextInfo->sType)
- {
- case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
- timelineInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(submitInfo.pNext);
- break;
- case VK_STRUCTURE_TYPE_MAX_ENUM:
- // dEQP tests that this value is ignored.
- break;
- case VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO:
- // SwiftShader doesn't use device group submit info because it only supports a single physical device.
- // However, this extension is core in Vulkan 1.1, so we must treat it as a valid structure type.
- break;
- default:
- UNSUPPORTED("submitInfo.pNext->sType = %s", vk::Stringify(nextInfo->sType).c_str());
- break;
- }
- }
-
+ SubmitInfo &submitInfo = task.pSubmits[i];
for(uint32_t j = 0; j < submitInfo.waitSemaphoreCount; j++)
{
if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pWaitSemaphores[j]))
{
- ASSERT_MSG(timelineInfo != nullptr,
- "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used");
- sem->wait(timelineInfo->pWaitSemaphoreValues[j]);
+ ASSERT(j < submitInfo.waitSemaphoreValueCount);
+ sem->wait(submitInfo.pWaitSemaphoreValues[j]);
}
else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pWaitSemaphores[j]))
{
@@ -248,9 +241,8 @@
{
if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pSignalSemaphores[j]))
{
- ASSERT_MSG(timelineInfo != nullptr,
- "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used");
- sem->signal(timelineInfo->pSignalSemaphoreValues[j]);
+ ASSERT(j < submitInfo.signalSemaphoreValueCount);
+ sem->signal(submitInfo.pSignalSemaphoreValues[j]);
}
else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pSignalSemaphores[j]))
{
diff --git a/src/Vulkan/VkQueue.hpp b/src/Vulkan/VkQueue.hpp
index 08ff88c..f21b1fe 100644
--- a/src/Vulkan/VkQueue.hpp
+++ b/src/Vulkan/VkQueue.hpp
@@ -61,10 +61,25 @@
void insertDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo);
private:
+ struct SubmitInfo
+ {
+ uint32_t waitSemaphoreCount;
+ const VkSemaphore *pWaitSemaphores;
+ const VkPipelineStageFlags *pWaitDstStageMask;
+ uint32_t commandBufferCount;
+ const VkCommandBuffer *pCommandBuffers;
+ uint32_t signalSemaphoreCount;
+ const VkSemaphore *pSignalSemaphores;
+ uint32_t waitSemaphoreValueCount;
+ const uint64_t *pWaitSemaphoreValues;
+ uint32_t signalSemaphoreValueCount;
+ const uint64_t *pSignalSemaphoreValues;
+ };
+
struct Task
{
uint32_t submitCount = 0;
- VkSubmitInfo *pSubmits = nullptr;
+ SubmitInfo *pSubmits = nullptr;
std::shared_ptr<sw::CountedEvent> events;
enum Type
@@ -78,11 +93,12 @@
void taskLoop(marl::Scheduler *scheduler);
void garbageCollect();
void submitQueue(const Task &task);
+ static SubmitInfo *DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits);
Device *device;
std::unique_ptr<sw::Renderer> renderer;
sw::Chan<Task> pending;
- sw::Chan<VkSubmitInfo *> toDelete;
+ sw::Chan<SubmitInfo *> toDelete;
std::thread queueThread;
};