libVulkan: Wrap the marl scheduler in a shared_ptr.

Hold a reference to the scheduler for each vulkan device.

This attempts to work around Windows-specific teardown issues, possibly caused by dependency atexit() logic.

Bug: b/141380274
Change-Id: I5a34c18f8e3738f02221f1784419bc21a6430067
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/36990
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp
index b817b31..5816683 100644
--- a/src/Vulkan/VkDevice.cpp
+++ b/src/Vulkan/VkDevice.cpp
@@ -57,11 +57,12 @@
 	cache.updateConstCache();
 }
 
-Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, marl::Scheduler *scheduler)
+Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, const std::shared_ptr<marl::Scheduler>& scheduler)
 	: physicalDevice(physicalDevice),
 	  queues(reinterpret_cast<Queue*>(mem)),
 	  enabledExtensionCount(pCreateInfo->enabledExtensionCount),
-	  enabledFeatures(enabledFeatures ? *enabledFeatures : VkPhysicalDeviceFeatures{})  // "Setting pEnabledFeatures to NULL and not including a VkPhysicalDeviceFeatures2 in the pNext member of VkDeviceCreateInfo is equivalent to setting all members of the structure to VK_FALSE."
+	  enabledFeatures(enabledFeatures ? *enabledFeatures : VkPhysicalDeviceFeatures{}),  // "Setting pEnabledFeatures to NULL and not including a VkPhysicalDeviceFeatures2 in the pNext member of VkDeviceCreateInfo is equivalent to setting all members of the structure to VK_FALSE."
+	  scheduler(scheduler)
 {
 	for(uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
 	{
@@ -76,7 +77,7 @@
 
 		for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++)
 		{
-			new (&queues[queueID]) Queue(this, scheduler);
+			new (&queues[queueID]) Queue(this, scheduler.get());
 		}
 	}
 
diff --git a/src/Vulkan/VkDevice.hpp b/src/Vulkan/VkDevice.hpp
index 7dac84b..b27fa50 100644
--- a/src/Vulkan/VkDevice.hpp
+++ b/src/Vulkan/VkDevice.hpp
@@ -42,7 +42,7 @@
 public:
 	static constexpr VkSystemAllocationScope GetAllocationScope() { return VK_SYSTEM_ALLOCATION_SCOPE_DEVICE; }
 
-	Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, marl::Scheduler *scheduler);
+	Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, const std::shared_ptr<marl::Scheduler>& scheduler);
 	void destroy(const VkAllocationCallbacks* pAllocator);
 
 	static size_t ComputeRequiredAllocationSize(const VkDeviceCreateInfo* pCreateInfo);
@@ -106,6 +106,7 @@
 	typedef char ExtensionName[VK_MAX_EXTENSION_NAME_SIZE];
 	ExtensionName* extensions = nullptr;
 	const VkPhysicalDeviceFeatures enabledFeatures = {};
+	std::shared_ptr<marl::Scheduler> scheduler;
 };
 
 using DispatchableDevice = DispatchableObject<Device, VkDevice>;
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index 7201fdd..11bac00 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -120,15 +120,23 @@
 	sw::CPUID::setEnableSSE(true);
 }
 
-marl::Scheduler* getOrCreateScheduler()
+std::shared_ptr<marl::Scheduler> getOrCreateScheduler()
 {
-	static auto scheduler = std::unique_ptr<marl::Scheduler>(new marl::Scheduler());
-	scheduler->setThreadInitializer([] {
-		sw::CPUID::setFlushToZero(true);
-		sw::CPUID::setDenormalsAreZero(true);
-	});
-	scheduler->setWorkerThreadCount(std::min<size_t>(marl::Thread::numLogicalCPUs(), 16));
-	return scheduler.get();
+	static std::mutex mutex;
+	static std::weak_ptr<marl::Scheduler> schedulerWeak;
+	std::unique_lock<std::mutex> lock(mutex);
+	auto scheduler = schedulerWeak.lock();
+	if (!scheduler)
+	{
+		scheduler = std::make_shared<marl::Scheduler>();
+		scheduler->setThreadInitializer([] {
+			sw::CPUID::setFlushToZero(true);
+			sw::CPUID::setDenormalsAreZero(true);
+		});
+		scheduler->setWorkerThreadCount(std::min<size_t>(marl::Thread::numLogicalCPUs(), 16));
+		schedulerWeak = scheduler;
+	}
+	return scheduler;
 }
 
 // initializeLibrary() is called by vkCreateInstance() to perform one-off global