Merge changes I2b7adc3c,I5873dfa8
* changes:
Update Marl to ca8408f68
Squashed 'third_party/marl/' changes from 64d123947..ca8408f68
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index fff67ad..83f8126 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -611,7 +611,8 @@
UInt r11g11b10Pack(const Float4 &value)
{
- auto halfBits = floatToHalfBits(As<UInt4>(value), true);
+ // 10 and 11 bit floats are unsigned, so their minimal value is 0
+ auto halfBits = floatToHalfBits(As<UInt4>(Max(value, Float4(0.0f))), true);
// Truncates instead of rounding. See b/147900455
UInt4 truncBits = halfBits & UInt4(0x7FF00000, 0x7FF00000, 0x7FE00000, 0);
return (UInt(truncBits.x) >> 20) | (UInt(truncBits.y) >> 9) | (UInt(truncBits.z) << 1);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 9a83795..df72f66 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -58,37 +58,6 @@
// These functions only accept and return Subzero (Ice) types, and do not access any globals.
namespace {
namespace sz {
-void replaceEntryNode(Ice::Cfg *function, Ice::CfgNode *newEntryNode)
-{
- ASSERT_MSG(function->getEntryNode() != nullptr, "Function should have an entry node");
-
- if(function->getEntryNode() == newEntryNode)
- {
- return;
- }
-
- // Make this the new entry node
- function->setEntryNode(newEntryNode);
-
- // Reorder nodes so that new entry block comes first. This is required
- // by Cfg::renumberInstructions, which expects the first node in the list
- // to be the entry node.
- {
- auto nodes = function->getNodes();
-
- // TODO(amaiorano): Fast path if newEntryNode is last? Can avoid linear search.
-
- auto iter = std::find(nodes.begin(), nodes.end(), newEntryNode);
- ASSERT_MSG(iter != nodes.end(), "New node should be in the function's node list");
-
- nodes.erase(iter);
- nodes.insert(nodes.begin(), newEntryNode);
-
- // swapNodes replaces its nodes with the input one, and renumbers them,
- // so our new entry node will be 0, and the previous will be 1.
- function->swapNodes(nodes);
- }
-}
Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> ¶mTypes)
{
@@ -194,6 +163,8 @@
template<typename Return, typename... CArgs, typename... RArgs>
Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
{
+ static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
+
Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
@@ -256,6 +227,8 @@
Ice::GlobalContext *context = nullptr;
Ice::Cfg *function = nullptr;
+Ice::CfgNode *entryBlock = nullptr;
+Ice::CfgNode *basicBlockTop = nullptr;
Ice::CfgNode *basicBlock = nullptr;
Ice::CfgLocalAllocatorScope *allocator = nullptr;
rr::ELFMemoryStreamer *routine = nullptr;
@@ -489,12 +462,17 @@
return Ice::typeWidthInBytes(T(type));
}
-static void createRetVoidIfNoRet()
+static void finalizeFunction()
{
+ // Create a return if none was added
if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
{
Nucleus::createRetVoid();
}
+
+ // Connect the entry block to the top of the initial basic block
+ auto br = Ice::InstBr::create(::function, ::basicBlockTop);
+ ::entryBlock->appendInst(br);
}
using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
@@ -656,13 +634,23 @@
return symbolValue;
}
-void *loadImage(uint8_t *const elfImage, size_t &codeSize, const char *functionName = nullptr)
+struct EntryPoint
{
+ const void *entry;
+ size_t codeSize = 0;
+};
+
+std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
+{
+ ASSERT(functionNames.size() > 0);
+ std::vector<EntryPoint> entryPoints(functionNames.size());
+
ElfHeader *elfHeader = (ElfHeader *)elfImage;
+ // TODO: assert?
if(!elfHeader->checkMagic())
{
- return nullptr;
+ return {};
}
// Expect ELF bitness to match platform
@@ -682,7 +670,6 @@
#endif
SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
- void *entry = nullptr;
for(int i = 0; i < elfHeader->e_shnum; i++)
{
@@ -690,17 +677,25 @@
{
if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
{
- auto getCurrSectionName = [&]() {
+ auto findSectionNameEntryIndex = [&]() -> size_t {
auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
- return reinterpret_cast<const char *>(elfImage + sectionNameOffset);
- };
- if(functionName && strstr(getCurrSectionName(), functionName) == nullptr)
- {
- continue;
- }
+ const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
- entry = elfImage + sectionHeader[i].sh_offset;
- codeSize = sectionHeader[i].sh_size;
+ for(size_t j = 0; j < functionNames.size(); ++j)
+ {
+ if(strstr(sectionName, functionNames[j]) != nullptr)
+ {
+ return j;
+ }
+ }
+
+ UNREACHABLE("Failed to find executable section that matches input function names");
+ return static_cast<size_t>(-1);
+ };
+
+ size_t index = findSectionNameEntryIndex();
+ entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
+ entryPoints[index].codeSize = sectionHeader[i].sh_size;
}
}
else if(sectionHeader[i].sh_type == SHT_REL)
@@ -725,7 +720,7 @@
}
}
- return entry;
+ return entryPoints;
}
template<typename T>
@@ -796,18 +791,20 @@
void seek(uint64_t Off) override { position = Off; }
- const void *getEntryByName(const char *name)
+ std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
{
- size_t codeSize = 0;
- const void *entry = loadImage(&buffer[0], codeSize, name);
+ auto entryPoints = loadImage(&buffer[0], functionNames);
#if defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), NULL, 0);
#else
- __builtin___clear_cache((char *)entry, (char *)entry + codeSize);
+ for(auto &entryPoint : entryPoints)
+ {
+ __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
+ }
#endif
- return entry;
+ return entryPoints;
}
void finalize()
@@ -926,7 +923,9 @@
delete ::out;
::out = nullptr;
+ ::entryBlock = nullptr;
::basicBlock = nullptr;
+ ::basicBlockTop = nullptr;
::codegenMutex.unlock();
}
@@ -1026,10 +1025,11 @@
objectWriter->writeNonUserSections();
// Done compiling functions, get entry pointers to each of them
- for(size_t i = 0; i < Count; ++i)
+ auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
+ ASSERT(entryPoints.size() == Count);
+ for(size_t i = 0; i < entryPoints.size(); ++i)
{
- const void *entry = ::routine->getEntryByName(names[i]);
- ::routine->setEntry(i, entry);
+ ::routine->setEntry(i, entryPoints[i].entry);
}
::routine->finalize();
@@ -1042,7 +1042,7 @@
std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
{
- createRetVoidIfNoRet();
+ finalizeFunction();
return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
}
@@ -1083,7 +1083,9 @@
{
ASSERT(::function == nullptr);
ASSERT(::allocator == nullptr);
+ ASSERT(::entryBlock == nullptr);
ASSERT(::basicBlock == nullptr);
+ ASSERT(::basicBlockTop == nullptr);
::function = sz::createFunction(::context, T(returnType), T(paramTypes));
@@ -1093,7 +1095,9 @@
// TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
::allocator = new Ice::CfgLocalAllocatorScope(::function);
- ::basicBlock = ::function->getEntryNode();
+ ::entryBlock = ::function->getEntryNode();
+ ::basicBlock = ::function->makeNode();
+ ::basicBlockTop = ::basicBlock;
}
Value *Nucleus::getArgument(unsigned int index)
@@ -4595,29 +4599,13 @@
// ... <REACTOR CODE> ...
//
- // Save original entry block and current block, and create a new entry block and make it current.
- // This new block will be used to inject code above the begin routine's existing code. We make
- // this block branch to the original entry block as the last instruction.
- auto origEntryBB = ::function->getEntryNode();
- auto origCurrBB = ::basicBlock;
- auto newBB = ::function->makeNode();
- sz::replaceEntryNode(::function, newBB);
- ::basicBlock = newBB;
-
// this->handle = coro::getHandleParam();
- this->handle = sz::Call(::function, ::basicBlock, coro::getHandleParam);
+ this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
// YieldType promise;
// coro::setPromisePtr(handle, &promise); // For await
this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
- sz::Call(::function, ::basicBlock, coro::setPromisePtr, this->handle, this->promise);
-
- // Branch to original entry block
- auto br = Ice::InstBr::create(::function, origEntryBB);
- ::basicBlock->appendInst(br);
-
- // Restore current block for future instructions
- ::basicBlock = origCurrBB;
+ sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
}
// Adds instructions for Yield() calls at the current location of the main coroutine function.
@@ -4719,7 +4707,7 @@
// <resumeBlock>
// }
Ice::CfgNode *bb = awaitFunc->getEntryNode();
- Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone);
+ Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
bb->appendInst(br);
@@ -4831,7 +4819,7 @@
// Finish generating coroutine functions
{
Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
- createRetVoidIfNoRet();
+ finalizeFunction();
}
auto awaitFunc = ::coroGen->generateAwaitFunction();
@@ -4851,7 +4839,7 @@
{
{
Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
- createRetVoidIfNoRet();
+ finalizeFunction();
}
::coroYieldType = nullptr;
diff --git a/src/Vulkan/BUILD.gn b/src/Vulkan/BUILD.gn
index 9ea2d28..19b7e79 100644
--- a/src/Vulkan/BUILD.gn
+++ b/src/Vulkan/BUILD.gn
@@ -101,8 +101,6 @@
]
} else if (is_fuchsia) {
sources += [ "VkSemaphoreExternalFuchsia.hpp" ]
- } else {
- sources += [ "VkSemaphoreExternalNone.hpp" ]
}
}
diff --git a/src/Vulkan/VkSemaphore.cpp b/src/Vulkan/VkSemaphore.cpp
index 72bee8b..8daef84 100644
--- a/src/Vulkan/VkSemaphore.cpp
+++ b/src/Vulkan/VkSemaphore.cpp
@@ -17,18 +17,6 @@
#include "VkConfig.h"
#include "VkStringify.hpp"
-#if SWIFTSHADER_EXTERNAL_SEMAPHORE_OPAQUE_FD
-# if defined(__linux__) || defined(__ANDROID__)
-# include "VkSemaphoreExternalLinux.hpp"
-# else
-# error "Missing VK_KHR_external_semaphore_fd implementation for this platform!"
-# endif
-#elif VK_USE_PLATFORM_FUCHSIA
-# include "VkSemaphoreExternalFuchsia.hpp"
-#else
-# include "VkSemaphoreExternalNone.hpp"
-#endif
-
#include "marl/blockingcall.h"
#include "marl/conditionvariable.h"
@@ -38,11 +26,76 @@
namespace vk {
+// This is a base abstract class for all external semaphore implementations
+// used in this source file.
+class Semaphore::External
+{
+public:
+ virtual ~External() = default;
+
+ // Initialize new instance with a given initial state.
+ virtual VkResult init(bool initialState) = 0;
+
+ virtual bool tryWait() = 0;
+ virtual void wait() = 0;
+ virtual void signal() = 0;
+
+ // For VK_KHR_external_semaphore_fd
+ virtual VkResult importOpaqueFd(int fd)
+ {
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
+
+ virtual VkResult exportOpaqueFd(int *pFd)
+ {
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
+
+#if VK_USE_PLATFORM_FUCHSIA
+ // For VK_FUCHSIA_external_semaphore
+ virtual VkResult importHandle(zx_handle_t handle)
+ {
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
+ virtual VkResult exportHandle(zx_handle_t *pHandle)
+ {
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
+#endif
+ // Pointer to previous temporary external instanc,e used for |tempExternal| only.
+ External *previous = nullptr;
+};
+
+} // namespace vk
+
+#if SWIFTSHADER_EXTERNAL_SEMAPHORE_OPAQUE_FD
+# if defined(__linux__) || defined(__ANDROID__)
+# include "VkSemaphoreExternalLinux.hpp"
+# else
+# error "Missing VK_KHR_external_semaphore_fd implementation for this platform!"
+# endif
+#elif VK_USE_PLATFORM_FUCHSIA
+# include "VkSemaphoreExternalFuchsia.hpp"
+#endif
+
+namespace vk {
+
+// The bitmask of all external semaphore handle types supported by this source file.
+static const VkExternalSemaphoreHandleTypeFlags kSupportedTypes =
+#if SWIFTSHADER_EXTERNAL_SEMAPHORE_OPAQUE_FD
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
+#endif
+#if VK_USE_PLATFORM_FUCHSIA
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TEMP_ZIRCON_EVENT_BIT_FUCHSIA |
+#endif
+ 0;
+
namespace {
struct SemaphoreCreateInfo
{
bool exportSemaphore = false;
+ VkExternalSemaphoreHandleTypeFlags exportHandleTypes = 0;
// Create a new instance. The external instance will be allocated only
// the pCreateInfo->pNext chain indicates it needs to be exported.
@@ -57,9 +110,12 @@
{
const auto *exportInfo = reinterpret_cast<const VkExportSemaphoreCreateInfo *>(nextInfo);
exportSemaphore = true;
- if(exportInfo->handleTypes != Semaphore::External::kExternalSemaphoreHandleType)
+ exportHandleTypes = exportInfo->handleTypes;
+ if((exportHandleTypes & ~kSupportedTypes) != 0)
{
- UNSUPPORTED("exportInfo->handleTypes %d", int(exportInfo->handleTypes));
+ UNSUPPORTED("exportInfo->handleTypes 0x%X (supports 0x%X)",
+ int(exportHandleTypes),
+ int(kSupportedTypes));
}
}
break;
@@ -74,27 +130,30 @@
void Semaphore::wait()
{
- if(external)
+ std::unique_lock<std::mutex> lock(mutex);
+ External *ext = tempExternal ? tempExternal : external;
+ if(ext)
{
- if(!external->tryWait())
+ if(!ext->tryWait())
{
- // Dispatch the external wait to a background thread.
+ // Dispatch the ext wait to a background thread.
// Even if this creates a new thread on each
// call, it is assumed that this is negligible
// compared with the actual semaphore wait()
// operation.
- marl::blocking_call([this]() {
- external->wait();
+ marl::blocking_call([ext, &lock]() {
+ lock.unlock();
+ ext->wait();
+ lock.lock();
});
}
- // If the import was temporary, reset the semaphore to its
- // permanent state by getting rid of |external|.
+ // If the import was temporary, reset the semaphore to its previous state.
// See "6.4.5. Importing Semaphore Payloads" in Vulkan 1.1 spec.
- if(temporaryImport)
+ if(ext == tempExternal)
{
- deallocateExternal();
- temporaryImport = false;
+ tempExternal = ext->previous;
+ deallocateExternal(ext);
}
}
else
@@ -105,11 +164,13 @@
void Semaphore::signal()
{
- if(external)
+ std::unique_lock<std::mutex> lock(mutex);
+ External *ext = tempExternal ? tempExternal : external;
+ if(ext)
{
// Assumes that signalling an external semaphore is non-blocking,
// so it can be performed directly either from a fiber or thread.
- external->signal();
+ ext->signal();
}
else
{
@@ -121,16 +182,22 @@
: allocator(pAllocator)
{
SemaphoreCreateInfo info(pCreateInfo);
- if(info.exportSemaphore)
- {
- allocateExternal();
- external->init();
- }
+ exportableHandleTypes = info.exportHandleTypes;
}
void Semaphore::destroy(const VkAllocationCallbacks *pAllocator)
{
- deallocateExternal();
+ while(tempExternal)
+ {
+ External *ext = tempExternal;
+ tempExternal = ext->previous;
+ deallocateExternal(ext);
+ }
+ if(external)
+ {
+ deallocateExternal(external);
+ external = nullptr;
+ }
}
size_t Semaphore::ComputeRequiredAllocationSize(const VkSemaphoreCreateInfo *pCreateInfo)
@@ -139,78 +206,140 @@
return 0;
}
-void Semaphore::allocateExternal()
+template<class EXTERNAL>
+Semaphore::External *Semaphore::allocateExternal()
{
- ASSERT(external == nullptr);
- external = reinterpret_cast<Semaphore::External *>(
- vk::allocate(sizeof(Semaphore::External), vk::REQUIRED_MEMORY_ALIGNMENT, allocator));
- new(external) Semaphore::External();
+ auto *ext = reinterpret_cast<Semaphore::External *>(
+ vk::allocate(sizeof(EXTERNAL), alignof(EXTERNAL), allocator));
+ new(ext) EXTERNAL();
+ return ext;
}
-void Semaphore::deallocateExternal()
+void Semaphore::deallocateExternal(Semaphore::External *ext)
{
- if(external)
+ ext->~External();
+ vk::deallocate(ext, allocator);
+}
+
+template<typename ALLOC_FUNC, typename IMPORT_FUNC>
+VkResult Semaphore::importPayload(bool temporaryImport,
+ ALLOC_FUNC alloc_func,
+ IMPORT_FUNC import_func)
+{
+ std::unique_lock<std::mutex> lock(mutex);
+
+ // Create new External instance if needed.
+ External *ext = external;
+ if(temporaryImport || !ext)
{
- vk::deallocate(external, allocator);
- external = nullptr;
+ ext = alloc_func();
}
+ VkResult result = import_func(ext);
+ if(result != VK_SUCCESS)
+ {
+ if(temporaryImport || !external)
+ {
+ deallocateExternal(ext);
+ }
+ return result;
+ }
+
+ if(temporaryImport)
+ {
+ ext->previous = tempExternal;
+ tempExternal = ext;
+ }
+ else if(!external)
+ {
+ external = ext;
+ }
+ return VK_SUCCESS;
+}
+
+template<typename ALLOC_FUNC, typename EXPORT_FUNC>
+VkResult Semaphore::exportPayload(ALLOC_FUNC alloc_func, EXPORT_FUNC export_func)
+{
+ std::unique_lock<std::mutex> lock(mutex);
+ // Sanity check, do not try to export a semaphore that has a temporary import.
+ if(tempExternal != nullptr)
+ {
+ TRACE("Cannot export semaphore with a temporary import!");
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
+ // Allocate |external| if it doesn't exist yet.
+ if(!external)
+ {
+ External *ext = alloc_func();
+ VkResult result = ext->init(internal.isSignalled());
+ if(result != VK_SUCCESS)
+ {
+ deallocateExternal(ext);
+ return result;
+ }
+ external = ext;
+ }
+ return export_func(external);
}
#if SWIFTSHADER_EXTERNAL_SEMAPHORE_OPAQUE_FD
-VkResult Semaphore::importFd(int fd, bool tempImport)
+VkResult Semaphore::importFd(int fd, bool temporaryImport)
{
- std::unique_lock<std::mutex> lock(mutex);
- if(!external)
- {
- allocateExternal();
- }
- VkResult result = external->importFd(fd);
- if(result != VK_SUCCESS)
- {
- deallocateExternal();
- }
- else
- {
- temporaryImport = tempImport;
- }
- return result;
+ return importPayload(
+ temporaryImport,
+ [this]() {
+ return allocateExternal<OpaqueFdExternalSemaphore>();
+ },
+ [fd](External *ext) {
+ return ext->importOpaqueFd(fd);
+ });
}
VkResult Semaphore::exportFd(int *pFd)
{
- std::unique_lock<std::mutex> lock(mutex);
- if(!external)
+ if((exportableHandleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) == 0)
{
- TRACE("Cannot export non-external semaphore");
+ TRACE("Cannot export semaphore as opaque FD (exportableHandleType = 0x%X, want 0x%X)",
+ exportableHandleTypes,
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
+
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
}
- return external->exportFd(pFd);
+
+ return exportPayload([this]() { return allocateExternal<OpaqueFdExternalSemaphore>(); },
+ [pFd](External *ext) {
+ return ext->exportOpaqueFd(pFd);
+ });
}
#endif // SWIFTSHADER_EXTERNAL_SEMAPHORE_OPAQUE_FD
#if VK_USE_PLATFORM_FUCHSIA
-VkResult Semaphore::importHandle(zx_handle_t handle, bool tempImport)
+VkResult Semaphore::importHandle(zx_handle_t handle, bool temporaryImport)
{
- std::unique_lock<std::mutex> lock(mutex);
- if(!external)
- {
- allocateExternal();
- }
- // NOTE: Imports are just moving a handle so cannot fail.
- external->importHandle(handle);
- temporaryImport = tempImport;
- return VK_SUCCESS;
+ return importPayload(
+ temporaryImport,
+ [this]() {
+ return allocateExternal<ZirconEventExternalSemaphore>();
+ },
+ [handle](External *ext) {
+ return ext->importHandle(handle);
+ });
}
VkResult Semaphore::exportHandle(zx_handle_t *pHandle)
{
- std::unique_lock<std::mutex> lock(mutex);
- if(!external)
+ if((exportableHandleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TEMP_ZIRCON_EVENT_BIT_FUCHSIA) == 0)
{
- TRACE("Cannot export non-external semaphore");
+ TRACE("Cannot export semaphore as Zircon handle (exportableHandleType = 0x%X, want 0x%X)",
+ exportableHandleTypes,
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TEMP_ZIRCON_EVENT_BIT_FUCHSIA);
+
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
}
- return external->exportHandle(pHandle);
+
+ return exportPayload([this]() { return allocateExternal<ZirconEventExternalSemaphore>(); },
+ [pHandle](External *ext) {
+ return ext->exportHandle(pHandle);
+ });
}
#endif // VK_USE_PLATFORM_FUCHSIA
diff --git a/src/Vulkan/VkSemaphore.hpp b/src/Vulkan/VkSemaphore.hpp
index 54004d8..b67d4d9 100644
--- a/src/Vulkan/VkSemaphore.hpp
+++ b/src/Vulkan/VkSemaphore.hpp
@@ -58,14 +58,99 @@
class External;
private:
- void allocateExternal();
- void deallocateExternal();
+ // Small technical note on how semaphores are imported/exported with Vulkan:
+ //
+ // - A Vulkan Semaphore objects has a "payload", corresponding to a
+ // simple atomic boolean flag.
+ //
+ // - A Vulkan Semaphore object can be "exported": this creates a
+ // platform-specific handle / descriptor (which can be passed to other
+ // processes), and is linked in some way to the original semaphore's
+ // payload.
+ //
+ // - Similarly, said handle / descriptor can be "imported" into a Vulkan
+ // Semaphore object. By default, that semaphore loses its payload, and
+ // instead uses the one referenced / shared through the descriptor.
+ //
+ // Hence if semaphore A exports its payload through a descriptor that
+ // is later imported into semaphore B, then both A and B will use/share
+ // the same payload (i.e. signal flag), making cross-process
+ // synchronization possible.
+ //
+ // - There are also "temporary imports", where the target semaphore's
+ // payload is not lost, but is simply hidden/stashed. But the next wait()
+ // operation on the same semaphore should remove the temporary import,
+ // and restore the previous payload.
+ //
+ // - There are many handle / descriptor types, which are listed through
+ // the VkExternalSemaphoreHandleTypeFlagBits. A given Vulkan
+ // implementation might support onle one or several at the same time
+ // (e.g. on Linux or Android, it could support both OPAQUE_FD_BIT and
+ // SYNC_FD_BIT, while on Windows, it would be OPAQUE_WIN32_BIT +
+ // OPAQUE_WIN32_KMT_BIT + D3D12_FENCE_BIT).
+ //
+ // - To be able to export a semaphore, VkCreateSemaphore() must be called
+ // with a VkSemaphoreCreateInfo that lists the types of all possible
+ // platform-specific handles the semaphore could be exported to
+ // (e.g. on Linux, it is possible to specify that a semaphore might be
+ // exported as an opaque FD, or as a Linux Sync FD).
+ //
+ // However, which exact type is however only determined later by the
+ // export operation itself (e.g. vkGetSemaphoreFdKHR() could be called to export
+ // either a VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT or a
+ // VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT).
+ //
+ // Once a semaphore has been exported as one type, it is not possible
+ // to export the same payload with a different type (though the spec
+ // doesn't seem to be explicit about this, it's simply impossible in
+ // general).
+ //
+ // This leads to the following design:
+ //
+ // - |internal| is a simple marl::Event that represents the semaphore's
+ // payload when it is not exported, or imported non-temporarily.
+ //
+ // - |external| points to an external semaphore payload. It is created
+ // on demand if the semaphore is exported or imported non-temporarily.
+ // Note that once |external| is created, |internal| is ignored.
+ //
+ // - |tempExternal| points to a linked-list of temporary external
+ // semaphore payloads. The list head corresponds to the most recent
+ // temporary import.
+ //
+
+ // Internal template to allocate a new External implementation.
+ template<class EXTERNAL>
+ External *allocateExternal();
+
+ void deallocateExternal(External *ext);
+
+ // Used internally to import an external payload.
+ // |temporaryImport| is true iff the import is temporary.
+ // |alloc_func| is callable that allocates a new External instance of the
+ // appropriate type.
+ // |import_func| is callable that takes a single parameter, which
+ // corresponds to the external handle/descriptor, and returns a VkResult
+ // values.
+ template<typename ALLOC_FUNC, typename IMPORT_FUNC>
+ VkResult importPayload(bool temporaryImport,
+ ALLOC_FUNC alloc_func,
+ IMPORT_FUNC import_func);
+
+ // Used internally to export a given payload.
+ // |alloc_func| is a callable that allocates a new External instance of
+ // the appropriate type.
+ // |export_func| is a callable that takes a pointer to an External instance,
+ // and a pointer to a handle/descriptor, and returns a VkResult.
+ template<typename ALLOC_FUNC, typename EXPORT_FUNC>
+ VkResult exportPayload(ALLOC_FUNC alloc_func, EXPORT_FUNC export_func);
const VkAllocationCallbacks *allocator = nullptr;
- marl::Event internal;
+ VkExternalSemaphoreHandleTypeFlags exportableHandleTypes = (VkExternalSemaphoreHandleTypeFlags)0;
std::mutex mutex;
+ marl::Event internal;
External *external = nullptr;
- bool temporaryImport = false;
+ External *tempExternal = nullptr;
};
static inline Semaphore *Cast(VkSemaphore object)
diff --git a/src/Vulkan/VkSemaphoreExternalFuchsia.hpp b/src/Vulkan/VkSemaphoreExternalFuchsia.hpp
index a45f0b8..90f68d4 100644
--- a/src/Vulkan/VkSemaphoreExternalFuchsia.hpp
+++ b/src/Vulkan/VkSemaphoreExternalFuchsia.hpp
@@ -26,38 +26,44 @@
namespace vk {
-class Semaphore::External
+class ZirconEventExternalSemaphore : public Semaphore::External
{
public:
- // The type of external semaphore handle types supported by this implementation.
- static const VkExternalSemaphoreHandleTypeFlags kExternalSemaphoreHandleType =
- VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TEMP_ZIRCON_EVENT_BIT_FUCHSIA;
-
- // Default constructor. Note that one should call either init() or
- // importFd() before any call to wait() or signal().
- External() = default;
-
- ~External()
+ ~ZirconEventExternalSemaphore()
{
zx_handle_close(handle);
}
- void init()
+ VkResult init(bool initialValue) override
{
zx_status_t status = zx_event_create(0, &handle);
if(status != ZX_OK)
{
- ABORT("zx_event_create() returned %d", status);
+ TRACE("zx_event_create() returned %d", status);
+ return VK_ERROR_INITIALIZATION_FAILED;
}
+ if(initialValue)
+ {
+ status = zx_object_signal(handle, 0, ZX_EVENT_SIGNALED);
+ if(status != ZX_OK)
+ {
+ TRACE("zx_object_signal() returned %d", status);
+ zx_handle_close(handle);
+ handle = ZX_HANDLE_INVALID;
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+ }
+ return VK_SUCCESS;
}
- void importHandle(zx_handle_t new_handle)
+ VkResult importHandle(zx_handle_t new_handle) override
{
zx_handle_close(handle);
handle = new_handle;
+ return VK_SUCCESS;
}
- VkResult exportHandle(zx_handle_t *pHandle) const
+ VkResult exportHandle(zx_handle_t *pHandle) override
{
zx_handle_t new_handle = ZX_HANDLE_INVALID;
zx_status_t status = zx_handle_duplicate(handle, ZX_RIGHT_SAME_RIGHTS, &new_handle);
@@ -70,7 +76,7 @@
return VK_SUCCESS;
}
- void wait()
+ void wait() override
{
zx_signals_t observed = 0;
zx_status_t status = zx_object_wait_one(
@@ -91,7 +97,7 @@
}
}
- bool tryWait()
+ bool tryWait() override
{
zx_signals_t observed = 0;
zx_status_t status = zx_object_wait_one(
@@ -113,7 +119,7 @@
return true;
}
- void signal()
+ void signal() override
{
zx_status_t status = zx_object_signal(handle, 0, ZX_EVENT_SIGNALED);
if(status != ZX_OK)
diff --git a/src/Vulkan/VkSemaphoreExternalLinux.hpp b/src/Vulkan/VkSemaphoreExternalLinux.hpp
index 36bac26..d4e4cd2 100644
--- a/src/Vulkan/VkSemaphoreExternalLinux.hpp
+++ b/src/Vulkan/VkSemaphoreExternalLinux.hpp
@@ -44,7 +44,8 @@
class SharedSemaphore
{
public:
- SharedSemaphore()
+ SharedSemaphore(bool initialValue)
+ : signaled(initialValue)
{
pthread_mutexattr_t mattr;
pthread_mutexattr_init(&mattr);
@@ -129,20 +130,13 @@
namespace vk {
-class Semaphore::External
+class OpaqueFdExternalSemaphore : public Semaphore::External
{
public:
- // The type of external semaphore handle types supported by this implementation.
- static const VkExternalSemaphoreHandleTypeFlags kExternalSemaphoreHandleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
-
- // Default constructor. Note that one should call either init() or
- // importFd() before any call to wait() or signal().
- External() = default;
-
- ~External() { close(); }
+ ~OpaqueFdExternalSemaphore() { unmapRegion(); }
// Initialize instance by creating a new shared memory region.
- void init()
+ VkResult init(bool initialState) override
{
// Allocate or import the region's file descriptor.
const size_t size = sw::memoryPageSize();
@@ -153,24 +147,30 @@
snprintf(name, sizeof(name), "SwiftShader.Semaphore.%d", ++counter);
if(!memfd.allocate(name, size))
{
- ABORT("memfd.allocate() returned %s", strerror(errno));
+ TRACE("memfd.allocate() returned %s", strerror(errno));
+ return VK_ERROR_INITIALIZATION_FAILED;
}
- mapRegion(size, true);
+ if(!mapRegion(size, true, initialState))
+ return VK_ERROR_INITIALIZATION_FAILED;
+
+ return VK_SUCCESS;
}
// Import an existing semaphore through its file descriptor.
- VkResult importFd(int fd)
+ VkResult importOpaqueFd(int fd) override
{
- close();
+ unmapRegion();
memfd.importFd(fd);
- mapRegion(sw::memoryPageSize(), false);
+ if(!mapRegion(sw::memoryPageSize(), false, false))
+ return VK_ERROR_INITIALIZATION_FAILED;
+
return VK_SUCCESS;
}
// Export the current semaphore as a duplicated file descriptor to the same
// region. This can be consumed by importFd() running in a different
// process.
- VkResult exportFd(int *pFd) const
+ VkResult exportOpaqueFd(int *pFd) override
{
int fd = memfd.exportFd();
if(fd < 0)
@@ -181,24 +181,23 @@
return VK_SUCCESS;
}
- void wait()
+ void wait() override
{
semaphore->wait();
}
- bool tryWait()
+ bool tryWait() override
{
return semaphore->tryWait();
}
- void signal()
+ void signal() override
{
semaphore->signal();
}
private:
- // Unmap the semaphore if needed and close its file descriptor.
- void close()
+ void unmapRegion()
{
if(semaphore)
{
@@ -213,23 +212,25 @@
}
// Remap the shared region and setup the semaphore or increment its reference count.
- void mapRegion(size_t size, bool needInitialization)
+ bool mapRegion(size_t size, bool needsInitialization, bool initialValue)
{
// Map the region into memory and point the semaphore to it.
void *addr = memfd.mapReadWrite(0, size);
if(!addr)
{
- ABORT("mmap() failed: %s", strerror(errno));
+ TRACE("mmap() failed: %s", strerror(errno));
+ return false;
}
semaphore = reinterpret_cast<SharedSemaphore *>(addr);
- if(needInitialization)
+ if(needsInitialization)
{
- new(semaphore) SharedSemaphore();
+ new(semaphore) SharedSemaphore(initialValue);
}
else
{
semaphore->addRef();
}
+ return true;
}
LinuxMemFd memfd;
diff --git a/src/Vulkan/VkSemaphoreExternalNone.hpp b/src/Vulkan/VkSemaphoreExternalNone.hpp
deleted file mode 100644
index 9592e3f..0000000
--- a/src/Vulkan/VkSemaphoreExternalNone.hpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef VK_SEMAPHORE_EXTERNAL_NONE_H_
-#define VK_SEMAPHORE_EXTERNAL_NONE_H_
-
-namespace vk {
-
-// Empty external sempahore implementation.
-class Semaphore::External
-{
-public:
- // The type of external semaphore handle types supported by this implementation.
- static const VkExternalSemaphoreHandleTypeFlags kExternalSemaphoreHandleType = 0;
-
- void init() {}
-
- void wait() {}
-
- bool tryWait() { return true; }
-
- void signal() {}
-
-private:
- int dummy;
-};
-
-} // namespace vk
-
-#endif // VK_SEMAPHORE_EXTERNAL_NONE_H_
diff --git a/third_party/llvm-subzero/include/llvm/Support/Compiler.h b/third_party/llvm-subzero/include/llvm/Support/Compiler.h
index 55148a4..803fd48 100644
--- a/third_party/llvm-subzero/include/llvm/Support/Compiler.h
+++ b/third_party/llvm-subzero/include/llvm/Support/Compiler.h
@@ -243,6 +243,15 @@
#define LLVM_FALLTHROUGH
#endif
+/// LLVM_REQUIRE_CONSTANT_INITIALIZATION - Apply this to globals to ensure that
+/// they are constant initialized.
+#if __has_cpp_attribute(clang::require_constant_initialization)
+#define LLVM_REQUIRE_CONSTANT_INITIALIZATION \
+ [[clang::require_constant_initialization]]
+#else
+#define LLVM_REQUIRE_CONSTANT_INITIALIZATION
+#endif
+
/// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
/// pedantic diagnostics.
#ifdef __GNUC__
diff --git a/third_party/llvm-subzero/include/llvm/Support/ManagedStatic.h b/third_party/llvm-subzero/include/llvm/Support/ManagedStatic.h
index 7ce86ee..e4ebd7c 100644
--- a/third_party/llvm-subzero/include/llvm/Support/ManagedStatic.h
+++ b/third_party/llvm-subzero/include/llvm/Support/ManagedStatic.h
@@ -36,18 +36,37 @@
static void call(void *Ptr) { delete[](T *)Ptr; }
};
+// If the current compiler is MSVC 2017 or earlier, then we have to work around
+// a bug where MSVC emits code to perform dynamic initialization even if the
+// class has a constexpr constructor. Instead, fall back to the C++98 strategy
+// where there are no constructors or member initializers. We can remove this
+// when MSVC 2019 (19.20+) is our minimum supported version.
+#if !defined(__clang__) && defined(_MSC_VER) && _MSC_VER < 1920
+#define LLVM_AVOID_CONSTEXPR_CTOR
+#endif
+
/// ManagedStaticBase - Common base class for ManagedStatic instances.
class ManagedStaticBase {
protected:
+#ifndef LLVM_AVOID_CONSTEXPR_CTOR
+ mutable std::atomic<void *> Ptr{};
+ mutable void (*DeleterFn)(void *) = nullptr;
+ mutable const ManagedStaticBase *Next = nullptr;
+#else
// This should only be used as a static variable, which guarantees that this
// will be zero initialized.
mutable std::atomic<void *> Ptr;
- mutable void (*DeleterFn)(void*);
+ mutable void (*DeleterFn)(void *);
mutable const ManagedStaticBase *Next;
+#endif
void RegisterManagedStatic(void *(*creator)(), void (*deleter)(void*)) const;
public:
+#ifndef LLVM_AVOID_CONSTEXPR_CTOR
+ constexpr ManagedStaticBase() = default;
+#endif
+
/// isConstructed - Return true if this object has not been created yet.
bool isConstructed() const { return Ptr != nullptr; }
diff --git a/third_party/llvm-subzero/lib/Support/CommandLine.cpp b/third_party/llvm-subzero/lib/Support/CommandLine.cpp
index fa1782c..3e77cd0 100644
--- a/third_party/llvm-subzero/lib/Support/CommandLine.cpp
+++ b/third_party/llvm-subzero/lib/Support/CommandLine.cpp
@@ -383,11 +383,16 @@
GlobalParser->registerCategory(this);
}
-// A special subcommand representing no subcommand
-ManagedStatic<SubCommand> llvm::cl::TopLevelSubCommand;
+// A special subcommand representing no subcommand. It is particularly important
+// that this ManagedStatic uses constant initailization and not dynamic
+// initialization because it is referenced from cl::opt constructors, which run
+// dynamically in an arbitrary order.
+LLVM_REQUIRE_CONSTANT_INITIALIZATION ManagedStatic<SubCommand>
+llvm::cl::TopLevelSubCommand;
// A special subcommand that can be used to put an option into all subcommands.
-ManagedStatic<SubCommand> llvm::cl::AllSubCommands;
+LLVM_REQUIRE_CONSTANT_INITIALIZATION ManagedStatic<SubCommand>
+llvm::cl::AllSubCommands;
void SubCommand::registerSubCommand() {
GlobalParser->registerSubCommand(this);
diff --git a/third_party/marl/src/osfiber_x86.c b/third_party/marl/src/osfiber_x86.c
index 6c486aa..cac72cb 100644
--- a/third_party/marl/src/osfiber_x86.c
+++ b/third_party/marl/src/osfiber_x86.c
@@ -25,12 +25,19 @@
uint32_t stack_size,
void (*target)(void*),
void* arg) {
+ // The stack pointer needs to be 16-byte aligned when making a 'call'.
+ // The 'call' instruction automatically pushes the return instruction to the
+ // stack (4-bytes), before making the jump.
+ // The marl_fiber_swap() assembly function does not use 'call', instead it
+ // uses 'jmp', so we need to offset the ESP pointer by 4 bytes so that the
+ // stack is still 16-byte aligned when the return target is stack-popped by
+ // the callee.
uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size);
ctx->EIP = (uintptr_t)&marl_fiber_trampoline;
- ctx->ESP = (uintptr_t)&stack_top[-3];
- stack_top[-1] = (uintptr_t)arg;
- stack_top[-2] = (uintptr_t)target;
- stack_top[-3] = 0; // No return target.
+ ctx->ESP = (uintptr_t)&stack_top[-5];
+ stack_top[-3] = (uintptr_t)arg;
+ stack_top[-4] = (uintptr_t)target;
+ stack_top[-5] = 0; // No return target.
}
#endif // defined(__i386__)