Subzero: hide dependency on Marl

Subzero maintains an internal scheduler that it uses only if there's no
external scheduler already bound on the current thread. This removes the
dependency on Marl in ReactorUnitTests, as we can rely on the internal
one being used.

Bug: b/145754674
Change-Id: Iddbaa299ccd904a87b8aa86e82e5517c9d72ef59
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/41789
Tested-by: Antonio Maiorano <amaiorano@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3d9e9f1..783d14a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2329,9 +2329,9 @@
     )
 
     if(NOT WIN32 AND ${REACTOR_BACKEND} STREQUAL "Subzero")
-        target_link_libraries(ReactorUnitTests ${Reactor} marl pthread dl)
+        target_link_libraries(ReactorUnitTests ${Reactor} pthread dl)
     else()
-        target_link_libraries(ReactorUnitTests ${Reactor} marl)
+        target_link_libraries(ReactorUnitTests ${Reactor})
     endif()
 
     set(GLES_UNITTESTS_LIST
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index 7e38d93..d5aafa9 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -18,9 +18,6 @@
 
 #include "gtest/gtest.h"
 
-#include "marl/defer.h"
-#include "marl/scheduler.h"
-
 #include <array>
 #include <cmath>
 #include <thread>
@@ -2137,11 +2134,6 @@
 		return;
 	}
 
-	marl::Scheduler scheduler;
-	scheduler.setWorkerThreadCount(8);
-	scheduler.bind();
-	defer(scheduler.unbind());
-
 	Coroutine<int()> function;
 	{
 		Yield(Int(0));
@@ -2175,11 +2167,6 @@
 		return;
 	}
 
-	marl::Scheduler scheduler;
-	scheduler.setWorkerThreadCount(8);
-	scheduler.bind();
-	defer(scheduler.unbind());
-
 	Coroutine<uint8_t(uint8_t * data, int count)> function;
 	{
 		Pointer<Byte> data = function.Arg<0>();
@@ -2221,11 +2208,6 @@
 		return;
 	}
 
-	marl::Scheduler scheduler;
-	scheduler.setWorkerThreadCount(8);
-	scheduler.bind();
-	defer(scheduler.unbind());
-
 	Coroutine<int()> function;
 	{
 		Int4 a{ 1, 2, 3, 4 };
@@ -2260,11 +2242,6 @@
 		return;
 	}
 
-	marl::Scheduler scheduler;
-	scheduler.setWorkerThreadCount(8);
-	scheduler.bind();
-	defer(scheduler.unbind());
-
 	for(int i = 0; i < 2; ++i)
 	{
 		Coroutine<int()> function;
@@ -2289,11 +2266,6 @@
 		return;
 	}
 
-	marl::Scheduler scheduler;
-	scheduler.setWorkerThreadCount(8);
-	//scheduler.bind();
-	//defer(scheduler.unbind());
-
 	Coroutine<int()> function;
 	{
 		Yield(Int(0));
@@ -2318,9 +2290,6 @@
 	for(size_t t = 0; t < numThreads; ++t)
 	{
 		threads.emplace_back([&] {
-			scheduler.bind();
-			defer(scheduler.unbind());
-
 			auto coroutine = function();
 
 			for(size_t i = 0; i < fibonacci.size(); i++)
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index df12cb1..9a83795 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -268,7 +268,16 @@
 // Coroutine globals
 rr::Type *coroYieldType = nullptr;
 std::shared_ptr<rr::CoroutineGenerator> coroGen;
+marl::Scheduler &getOrCreateScheduler()
+{
+	static auto scheduler = [] {
+		auto s = std::make_unique<marl::Scheduler>();
+		s->setWorkerThreadCount(8);
+		return s;
+	}();
 
+	return *scheduler;
+}
 }  // Anonymous namespace
 
 namespace {
@@ -4467,6 +4476,7 @@
 // Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
 struct CoroutineData
 {
+	bool useInternalScheduler = false;
 	marl::Event suspended;                                // the coroutine is suspended on a yield()
 	marl::Event resumed;                                  // the caller is suspended on an await()
 	marl::Event done{ marl::Event::Mode::Manual };        // the coroutine should stop at the next yield()
@@ -4510,9 +4520,13 @@
 void stop(Nucleus::CoroutineHandle handle)
 {
 	auto *coroData = reinterpret_cast<CoroutineData *>(handle);
-	coroData->done.signal();               // signal that the coroutine should stop at next (or current) yield.
-	coroData->resumed.signal();            // wake the coroutine if blocked on a yield.
-	coroData->terminated.wait();           // wait for the coroutine to return.
+	coroData->done.signal();      // signal that the coroutine should stop at next (or current) yield.
+	coroData->resumed.signal();   // wake the coroutine if blocked on a yield.
+	coroData->terminated.wait();  // wait for the coroutine to return.
+	if(coroData->useInternalScheduler)
+	{
+		::getOrCreateScheduler().unbind();
+	}
 	coro::destroyCoroutineData(coroData);  // free the coroutine data.
 }
 
@@ -4752,6 +4766,12 @@
 	// This doubles up as our coroutine handle
 	auto coroData = coro::createCoroutineData();
 
+	coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
+	if(coroData->useInternalScheduler)
+	{
+		::getOrCreateScheduler().bind();
+	}
+
 	marl::schedule([=] {
 		// Store handle in TLS so that the coroutine can grab it right away, before
 		// any fiber switch occurs.