LLVMReactor: Drop codegenMutex, now thread safe. LLVMReactor used to have a Big Fat Global Mutex over the entire lifetime of the Nucleus object. This was required as LLVMReactor used global variables for storing builder state. Over the past year, there has been significant code cleanup and global state has been reduced to a couple of globals that can now be marked thread_local. With all state now being immutable global or thread local, we are now able to remove the mutex. ASAN and TSAN checks for our unittests are clean. Bug: b/153803432 Change-Id: Ibe4019fb783f86e734387db431539e915369b488 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33484 Tested-by: Ben Clayton <bclayton@google.com> Reviewed-by: Antonio Maiorano <amaiorano@google.com>

commit: 20cf5c5cbb314df3c0a4bb76953e62a8f1f3a362 [log] [tgz]
author: Ben Clayton <bclayton@google.com> Mon Jul 01 11:13:27 2019 +0100
committer: Ben Clayton <bclayton@google.com> Fri Apr 17 14:18:52 2020 +0000
tree: c58b5b7d420a37ebec520310ce8c0e84fd378bf7
parent: 7ccdeedd33479982664ccae3ad71920bd30c9257 [diff]
diff --git a/src/Reactor/Coroutine.hpp b/src/Reactor/Coroutine.hpp
index d70ecf0..cd8763c 100644
--- a/src/Reactor/Coroutine.hpp
+++ b/src/Reactor/Coroutine.hpp

@@ -135,12 +135,10 @@
 	// executable code. After calling, no more reactor functions may be
 	// called without building a new rr::Function or rr::Coroutine.
 	// While automatically called by operator(), finalize() should be called
-	// as early as possible to release the global Reactor mutex lock.
-	// It must also be called explicitly on the same thread that instantiates
-	// the Coroutine instance if operator() is invoked on separate threads.
-	// This is because presently, Reactor backends use a global mutex scoped
-	// to the generation of routines, and these must be locked/unlocked on the
-	// same thread.
+	// as soon as possible once the coroutine has been fully built.
+	// finalize() *must* be called explicitly on the same thread that
+	// instantiates the Coroutine instance if operator() is to be invoked on
+	// different threads.
 	inline void finalize(const Config::Edit &cfg = Config::Edit::None);
 
 	// Starts execution of the coroutine and returns a unique_ptr to a

diff --git a/src/Reactor/LLVMJIT.cpp b/src/Reactor/LLVMJIT.cpp
index bbd91e3..68153a7 100644
--- a/src/Reactor/LLVMJIT.cpp
+++ b/src/Reactor/LLVMJIT.cpp

@@ -57,6 +57,7 @@
     __pragma(warning(pop))
 #endif
 
+#include <atomic>
 #include <unordered_map>
 
 #if defined(_WIN64)
@@ -610,7 +611,7 @@
 		for(size_t i = 0; i < count; i++)
 		{
 			auto func = funcs[i];
-			static size_t numEmittedFunctions = 0;
+			static std::atomic<size_t> numEmittedFunctions = { 0 };
 			std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
 			func->setName(name);
 			func->setLinkage(llvm::GlobalValue::ExternalLinkage);

diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 6cce3f2..48861e9 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp

@@ -59,10 +59,13 @@
 }
 #endif
 
+#if !LLVM_ENABLE_THREADS
+#	error "LLVM_ENABLE_THREADS needs to be enabled"
+#endif
+
 namespace {
 
-std::unique_ptr<rr::JITBuilder> jit;
-std::mutex codegenMutex;
+thread_local std::unique_ptr<rr::JITBuilder> jit;
 
 // Default configuration settings. Must be accessed under mutex lock.
 std::mutex defaultConfigLock;
@@ -599,8 +602,6 @@
 
 Nucleus::Nucleus()
 {
-	::codegenMutex.lock();  // Reactor and LLVM are currently not thread safe
-
 	ASSERT(jit == nullptr);
 	jit.reset(new JITBuilder(Nucleus::getDefaultConfig()));
 }
@@ -608,7 +609,6 @@
 Nucleus::~Nucleus()
 {
 	jit.reset();
-	::codegenMutex.unlock();
 }
 
 void Nucleus::setDefaultConfig(const Config &cfg)

diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index dff1a29..2bf5324 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp

@@ -63,7 +63,7 @@
 }
 
 // Set of variables that do not have a stack location yet.
-std::unordered_set<Variable *> Variable::unmaterializedVariables;
+thread_local std::unordered_set<Variable *> Variable::unmaterializedVariables;
 
 Variable::Variable(Type *type, int arraySize)
     : arraySize(arraySize)

diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 488c0be..33e8b44 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp

@@ -133,7 +133,7 @@
 	static void materializeAll();
 	static void killUnmaterialized();
 
-	static std::unordered_set<Variable *> unmaterializedVariables;
+	static thread_local std::unordered_set<Variable *> unmaterializedVariables;
 
 	Type *const type;
 	mutable Value *rvalue = nullptr;

diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index d5aafa9..adb0cb4 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp

@@ -2949,6 +2949,114 @@
 	EXPECT_EQ(result, value);
 }
 
+TEST(ReactorUnitTests, Multithreaded_Function)
+{
+	constexpr int numThreads = 32;
+	constexpr int numLoops = 64;
+
+	auto threads = std::unique_ptr<std::thread[]>(new std::thread[numThreads]);
+	auto results = std::unique_ptr<int[]>(new int[numThreads * numLoops]);
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		auto threadFunc = [&](int t) {
+			for(int l = 0; l < numLoops; l++)
+			{
+				FunctionT<int(int, int)> function;
+				{
+					Int a = function.Arg<0>();
+					Int b = function.Arg<1>();
+					Return((a << 16) | b);
+				}
+
+				auto f = function("thread%d_loop%d", t, l);
+				results[t * numLoops + l] = f(t, l);
+			}
+		};
+		threads[t] = std::thread(threadFunc, t);
+	}
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		threads[t].join();
+	}
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		for(int l = 0; l < numLoops; l++)
+		{
+			auto expect = (t << 16) | l;
+			auto result = results[t * numLoops + l];
+			EXPECT_EQ(result, expect);
+		}
+	}
+}
+
+TEST(ReactorUnitTests, Multithreaded_Coroutine)
+{
+	if(!rr::Caps.CoroutinesSupported)
+	{
+		SUCCEED() << "Coroutines not supported";
+		return;
+	}
+
+	constexpr int numThreads = 32;
+	constexpr int numLoops = 64;
+
+	struct Result
+	{
+		bool yieldReturns[3];
+		int yieldValues[3];
+	};
+
+	auto threads = std::unique_ptr<std::thread[]>(new std::thread[numThreads]);
+	auto results = std::unique_ptr<Result[]>(new Result[numThreads * numLoops]);
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		auto threadFunc = [&](int t) {
+			for(int l = 0; l < numLoops; l++)
+			{
+				Coroutine<int(int, int)> function;
+				{
+					Int a = function.Arg<0>();
+					Int b = function.Arg<1>();
+					Yield(a);
+					Yield(b);
+				}
+
+				auto coroutine = function(t, l);
+
+				auto &result = results[t * numLoops + l];
+				result = {};
+				result.yieldReturns[0] = coroutine->await(result.yieldValues[0]);
+				result.yieldReturns[1] = coroutine->await(result.yieldValues[1]);
+				result.yieldReturns[2] = coroutine->await(result.yieldValues[2]);
+			}
+		};
+		threads[t] = std::thread(threadFunc, t);
+	}
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		threads[t].join();
+	}
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		for(int l = 0; l < numLoops; l++)
+		{
+			auto const &result = results[t * numLoops + l];
+			EXPECT_EQ(result.yieldReturns[0], true);
+			EXPECT_EQ(result.yieldValues[0], t);
+			EXPECT_EQ(result.yieldReturns[1], true);
+			EXPECT_EQ(result.yieldValues[1], l);
+			EXPECT_EQ(result.yieldReturns[2], false);
+			EXPECT_EQ(result.yieldValues[2], 0);
+		}
+	}
+}
+
 int main(int argc, char **argv)
 {
 	::testing::InitGoogleTest(&argc, argv);
commit	20cf5c5cbb314df3c0a4bb76953e62a8f1f3a362	[log] [tgz]
author	Ben Clayton <bclayton@google.com>	Mon Jul 01 11:13:27 2019 +0100
committer	Ben Clayton <bclayton@google.com>	Fri Apr 17 14:18:52 2020 +0000
tree	c58b5b7d420a37ebec520310ce8c0e84fd378bf7
parent	7ccdeedd33479982664ccae3ad71920bd30c9257 [diff]