Vulkan: Build a yarn::Scheduler and pass it down to the vk::Queue.

Nothing is actually scheduled, but is a first step towards using yarn.

As yarn is now actually used by SwiftShader, drop the BUILD_YARN build
flags, as failing to build yarn would result in linker errors.

Bug: b/139142453
Change-Id: Ibd9a69f72a248f58a62bb41eeb196c4647876e82
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/35153
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp
index 692db75..4ef1ee5 100644
--- a/src/Vulkan/VkDevice.cpp
+++ b/src/Vulkan/VkDevice.cpp
@@ -57,7 +57,7 @@
 	cache.updateConstCache();
 }
 
-Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures)
+Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, yarn::Scheduler *scheduler)
 	: physicalDevice(physicalDevice),
 	  queues(reinterpret_cast<Queue*>(mem)),
 	  enabledExtensionCount(pCreateInfo->enabledExtensionCount),
@@ -76,7 +76,7 @@
 
 		for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++)
 		{
-			new (&queues[queueID]) Queue(this);
+			new (&queues[queueID]) Queue(this, scheduler);
 		}
 	}
 
diff --git a/src/Vulkan/VkDevice.hpp b/src/Vulkan/VkDevice.hpp
index 809ef2f..24ed2c2 100644
--- a/src/Vulkan/VkDevice.hpp
+++ b/src/Vulkan/VkDevice.hpp
@@ -21,6 +21,11 @@
 #include <memory>
 #include <mutex>
 
+namespace yarn
+{
+	class Scheduler;
+}
+
 namespace sw
 {
 	class Blitter;
@@ -37,7 +42,7 @@
 public:
 	static constexpr VkSystemAllocationScope GetAllocationScope() { return VK_SYSTEM_ALLOCATION_SCOPE_DEVICE; }
 
-	Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures);
+	Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, yarn::Scheduler *scheduler);
 	void destroy(const VkAllocationCallbacks* pAllocator);
 
 	static size_t ComputeRequiredAllocationSize(const VkDeviceCreateInfo* pCreateInfo);
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index 31c0ebf..0efde4c 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -22,6 +22,8 @@
 #include "Pipeline/ComputeProgram.hpp"
 #include "Pipeline/SpirvShader.hpp"
 
+#include "Yarn/Trace.hpp"
+
 #include "spirv-tools/optimizer.hpp"
 
 #include <iostream>
@@ -257,6 +259,8 @@
 
 std::shared_ptr<sw::ComputeProgram> createProgram(const vk::PipelineCache::ComputeProgramKey& key)
 {
+	YARN_SCOPED_EVENT("createProgram");
+
 	vk::DescriptorSet::Bindings descriptorSets;  // FIXME(b/129523279): Delay code generation until invoke time.
 	// TODO(b/119409619): use allocator.
 	auto program = std::make_shared<sw::ComputeProgram>(key.getShader(), key.getLayout(), descriptorSets);
diff --git a/src/Vulkan/VkQueue.cpp b/src/Vulkan/VkQueue.cpp
index e654ca0..329847d 100644
--- a/src/Vulkan/VkQueue.cpp
+++ b/src/Vulkan/VkQueue.cpp
@@ -19,6 +19,11 @@
 #include "WSI/VkSwapchainKHR.hpp"
 #include "Device/Renderer.hpp"
 
+#include "Yarn/Defer.hpp"
+#include "Yarn/Scheduler.hpp"
+#include "Yarn/Thread.hpp"
+#include "Yarn/Trace.hpp"
+
 #include <cstring>
 
 namespace
@@ -74,9 +79,9 @@
 namespace vk
 {
 
-Queue::Queue(Device* device) : device(device)
+Queue::Queue(Device* device, yarn::Scheduler *scheduler) : device(device)
 {
-	queueThread = std::thread(&Queue::taskLoop, this);
+	queueThread = std::thread(&Queue::taskLoop, this, scheduler);
 }
 
 Queue::~Queue()
@@ -155,8 +160,12 @@
 	}
 }
 
-void Queue::taskLoop()
+void Queue::taskLoop(yarn::Scheduler* scheduler)
 {
+	yarn::Thread::setName("Queue<%p>", this);
+	scheduler->bind();
+	defer(scheduler->unbind());
+
 	while(true)
 	{
 		Task task = pending.take();
diff --git a/src/Vulkan/VkQueue.hpp b/src/Vulkan/VkQueue.hpp
index 6d5913e..c137d15 100644
--- a/src/Vulkan/VkQueue.hpp
+++ b/src/Vulkan/VkQueue.hpp
@@ -22,6 +22,11 @@
 
 #include "System/Synchronization.hpp"
 
+namespace yarn
+{
+	class Scheduler;
+}
+
 namespace sw
 {
 	class Context;
@@ -39,7 +44,7 @@
 	VK_LOADER_DATA loaderData = { ICD_LOADER_MAGIC };
 
 public:
-	Queue(Device* device);
+	Queue(Device* device, yarn::Scheduler *scheduler);
 	~Queue();
 
 	operator VkQueue()
@@ -64,8 +69,7 @@
 		Type type = SUBMIT_QUEUE;
 	};
 
-	static void TaskLoop(vk::Queue* queue);
-	void taskLoop();
+	void taskLoop(yarn::Scheduler* scheduler);
 	void garbageCollect();
 	void submitQueue(const Task& task);
 
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index 2e6879c..4a71cc5 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -64,6 +64,11 @@
 
 #include "Reactor/Nucleus.hpp"
 
+#include "Yarn/Scheduler.hpp"
+#include "Yarn/Thread.hpp"
+
+#include "System/CPUID.hpp"
+
 #include <algorithm>
 #include <cstring>
 #include <string>
@@ -102,12 +107,33 @@
 	rr::Nucleus::adjustDefaultConfig(cfg);
 }
 
+void setCPUDefaults()
+{
+	sw::CPUID::setEnableSSE4_1(true);
+	sw::CPUID::setEnableSSSE3(true);
+	sw::CPUID::setEnableSSE3(true);
+	sw::CPUID::setEnableSSE2(true);
+	sw::CPUID::setEnableSSE(true);
+}
+
+yarn::Scheduler* getOrCreateScheduler()
+{
+	static auto scheduler = std::unique_ptr<yarn::Scheduler>(new yarn::Scheduler());
+	scheduler->setThreadInitializer([] {
+		sw::CPUID::setFlushToZero(true);
+		sw::CPUID::setDenormalsAreZero(true);
+	});
+	scheduler->setWorkerThreadCount(std::min<size_t>(yarn::Thread::numLogicalCPUs(), 16));
+	return scheduler.get();
+}
+
 // initializeLibrary() is called by vkCreateInstance() to perform one-off global
 // initialization of the swiftshader driver.
 void initializeLibrary()
 {
 	static bool doOnce = [] {
 		setReactorDefaultConfig();
+		setCPUDefaults();
 		return true;
 	}();
 	(void)doOnce;
@@ -563,7 +589,8 @@
 		(void)queueFamilyPropertyCount; // Silence unused variable warning
 	}
 
-	return vk::DispatchableDevice::Create(pAllocator, pCreateInfo, pDevice, vk::Cast(physicalDevice), enabledFeatures);
+	auto scheduler = getOrCreateScheduler();
+	return vk::DispatchableDevice::Create(pAllocator, pCreateInfo, pDevice, vk::Cast(physicalDevice), enabledFeatures, scheduler);
 }
 
 VKAPI_ATTR void VKAPI_CALL vkDestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator)