Subzero Coroutines: Use ultra-low-level marl APIs
Instead of using the `marl::Event` synchronization primitives, drop all the way down to using `marl::Scheduler::Fiber` APIs directly, and use the new `marl::Scheduler::Fiber::wait()` function that does not take a lock.
Provides around another 2x performance boost:
```
go run ./third_party/marl/tools/cmd/benchdiff/main.go pre.json post.json
Delta | Test name | (A) pre.json | (B) post.json
-2.08x -81.186µs | Coroutines/Fibonacci/iterations:512 | 156.202µs | 75.016µs
-2.10x -668.201µs | Coroutines/Fibonacci/iterations:4096 | 1.275069ms | 606.868µs
-2.11x -5.359326ms | Coroutines/Fibonacci/iterations:32768 | 10.166126ms | 4.8068ms
-2.12x -10.342µs | Coroutines/Fibonacci/iterations:64 | 19.585µs | 9.243µs
-2.13x -2.784542745s | Coroutines/Fibonacci/iterations:16777216 | 5.251299045s | 2.4667563s
-2.13x -347.220746ms | Coroutines/Fibonacci/iterations:2097152 | 653.812928ms | 306.592182ms
-2.14x -43.615678ms | Coroutines/Fibonacci/iterations:262144 | 82.017312ms | 38.401634ms
```
Bug: b/145754674
Change-Id: I5f6b0c8c92af645cc2a825c6f1e2769b2440638e
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/42850
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 3221a7e..522b566 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -4481,10 +4481,11 @@
struct CoroutineData
{
bool useInternalScheduler = false;
- marl::Event suspended; // the coroutine is suspended on a yield()
- marl::Event resumed; // the caller is suspended on an await()
- marl::Event done{ marl::Event::Mode::Manual }; // the coroutine should stop at the next yield()
- marl::Event terminated{ marl::Event::Mode::Manual }; // the coroutine has finished.
+ bool done = false; // the coroutine should stop at the next yield()
+ bool terminated = false; // the coroutine has finished.
+ bool inRoutine = false; // is the coroutine currently executing?
+ marl::Scheduler::Fiber *mainFiber = nullptr;
+ marl::Scheduler::Fiber *routineFiber = nullptr;
void *promisePtr = nullptr;
};
@@ -4504,19 +4505,31 @@
// is called.
bool suspend(Nucleus::CoroutineHandle handle)
{
- auto *data = reinterpret_cast<CoroutineData *>(handle);
- data->suspended.signal();
- data->resumed.wait();
- return !data->done.test();
+ auto *coroData = reinterpret_cast<CoroutineData *>(handle);
+ ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
+ ASSERT(coroData->inRoutine);
+ coroData->inRoutine = false;
+ coroData->mainFiber->notify();
+ while(!coroData->inRoutine)
+ {
+ coroData->routineFiber->wait();
+ }
+ return !coroData->done;
}
// resume() is called by await(), blocking until the coroutine calls yield()
// or the coroutine terminates.
void resume(Nucleus::CoroutineHandle handle)
{
- auto *data = reinterpret_cast<CoroutineData *>(handle);
- data->resumed.signal();
- data->suspended.wait();
+ auto *coroData = reinterpret_cast<CoroutineData *>(handle);
+ ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
+ ASSERT(!coroData->inRoutine);
+ coroData->inRoutine = true;
+ coroData->routineFiber->notify();
+ while(coroData->inRoutine)
+ {
+ coroData->mainFiber->wait();
+ }
}
// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
@@ -4524,9 +4537,18 @@
void stop(Nucleus::CoroutineHandle handle)
{
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
- coroData->done.signal(); // signal that the coroutine should stop at next (or current) yield.
- coroData->resumed.signal(); // wake the coroutine if blocked on a yield.
- coroData->terminated.wait(); // wait for the coroutine to return.
+ ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
+ ASSERT(!coroData->inRoutine);
+ if(!coroData->terminated)
+ {
+ coroData->done = true;
+ coroData->inRoutine = true;
+ coroData->routineFiber->notify();
+ while(!coroData->terminated)
+ {
+ coroData->mainFiber->wait();
+ }
+ }
if(coroData->useInternalScheduler)
{
::getOrCreateScheduler().unbind();
@@ -4555,7 +4577,7 @@
bool isDone(Nucleus::CoroutineHandle handle)
{
auto *coroData = reinterpret_cast<CoroutineData *>(handle);
- return coroData->done.test();
+ return coroData->done;
}
void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
@@ -4765,15 +4787,29 @@
// any fiber switch occurs.
coro::setHandleParam(coroData);
+ ASSERT(!coroData->routineFiber);
+ coroData->routineFiber = marl::Scheduler::Fiber::current();
+
beginFunc();
- coroData->done.signal(); // coroutine is done.
- coroData->suspended.signal(); // resume any blocking await() call.
- coroData->terminated.signal(); // signal that the coroutine data is ready for freeing.
+ ASSERT(coroData->inRoutine);
+ coroData->done = true; // coroutine is done.
+ coroData->terminated = true; // signal that the coroutine data is ready for freeing.
+ coroData->inRoutine = false;
+ coroData->mainFiber->notify();
};
- marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
- coroData->suspended.wait(); // block until the first yield or coroutine end
+ ASSERT(!coroData->mainFiber);
+ coroData->mainFiber = marl::Scheduler::Fiber::current();
+
+ // block until the first yield or coroutine end
+ ASSERT(!coroData->inRoutine);
+ coroData->inRoutine = true;
+ marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
+ while(coroData->inRoutine)
+ {
+ coroData->mainFiber->wait();
+ }
return coroData;
}