Merge changes Ia8226c11,If413b9d6
* changes:
Update Marl to 748d3c161
Squashed 'third_party/marl/' changes from 539094011..748d3c161
diff --git a/src/Pipeline/SpirvShaderDebugger.cpp b/src/Pipeline/SpirvShaderDebugger.cpp
index be57e54..214ef1c 100644
--- a/src/Pipeline/SpirvShaderDebugger.cpp
+++ b/src/Pipeline/SpirvShaderDebugger.cpp
@@ -1288,21 +1288,23 @@
}
// No debug type information. Derive from SPIR-V.
- Operand val(shader, state, id);
switch(shader->getType(obj).opcode())
{
case spv::OpTypeInt:
{
+ Operand val(shader, state, id);
group.put<Key, int>(key, Extract(val.Int(0), l));
}
break;
case spv::OpTypeFloat:
{
+ Operand val(shader, state, id);
group.put<Key, float>(key, Extract(val.Float(0), l));
}
break;
case spv::OpTypeVector:
{
+ Operand val(shader, state, id);
auto count = shader->getType(obj).definition.word(3);
switch(count)
{
diff --git a/src/Reactor/Coroutine.hpp b/src/Reactor/Coroutine.hpp
index d70ecf0..cd8763c 100644
--- a/src/Reactor/Coroutine.hpp
+++ b/src/Reactor/Coroutine.hpp
@@ -135,12 +135,10 @@
// executable code. After calling, no more reactor functions may be
// called without building a new rr::Function or rr::Coroutine.
// While automatically called by operator(), finalize() should be called
- // as early as possible to release the global Reactor mutex lock.
- // It must also be called explicitly on the same thread that instantiates
- // the Coroutine instance if operator() is invoked on separate threads.
- // This is because presently, Reactor backends use a global mutex scoped
- // to the generation of routines, and these must be locked/unlocked on the
- // same thread.
+ // as soon as possible once the coroutine has been fully built.
+ // finalize() *must* be called explicitly on the same thread that
+ // instantiates the Coroutine instance if operator() is to be invoked on
+ // different threads.
inline void finalize(const Config::Edit &cfg = Config::Edit::None);
// Starts execution of the coroutine and returns a unique_ptr to a
diff --git a/src/Reactor/LLVMJIT.cpp b/src/Reactor/LLVMJIT.cpp
index 6dd5c48..68153a7 100644
--- a/src/Reactor/LLVMJIT.cpp
+++ b/src/Reactor/LLVMJIT.cpp
@@ -57,6 +57,7 @@
__pragma(warning(pop))
#endif
+#include <atomic>
#include <unordered_map>
#if defined(_WIN64)
@@ -75,41 +76,6 @@
namespace {
-// Cache provides a simple, thread-safe key-value store.
-template<typename KEY, typename VALUE>
-class Cache
-{
-public:
- Cache() = default;
- Cache(const Cache &other);
- VALUE getOrCreate(KEY key, std::function<VALUE()> create);
-
-private:
- mutable std::mutex mutex; // mutable required for copy constructor.
- std::unordered_map<KEY, VALUE> map;
-};
-
-template<typename KEY, typename VALUE>
-Cache<KEY, VALUE>::Cache(const Cache &other)
-{
- std::unique_lock<std::mutex> lock(other.mutex);
- map = other.map;
-}
-
-template<typename KEY, typename VALUE>
-VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create)
-{
- std::unique_lock<std::mutex> lock(mutex);
- auto it = map.find(key);
- if(it != map.end())
- {
- return it->second;
- }
- auto value = create();
- map.emplace(key, value);
- return value;
-}
-
// JITGlobals is a singleton that holds all the immutable machine specific
// information for the host device.
class JITGlobals
@@ -125,7 +91,7 @@
const llvm::TargetOptions targetOptions;
const llvm::DataLayout dataLayout;
- TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel);
+ TargetMachineSPtr createTargetMachine(rr::Optimization::Level optlevel);
private:
static JITGlobals create();
@@ -136,8 +102,6 @@
const llvm::TargetOptions &targetOptions,
const llvm::DataLayout &dataLayout);
JITGlobals(const JITGlobals &) = default;
-
- Cache<rr::Optimization::Level, TargetMachineSPtr> targetMachines;
};
JITGlobals *JITGlobals::get()
@@ -146,7 +110,7 @@
return &instance;
}
-JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel)
+JITGlobals::TargetMachineSPtr JITGlobals::createTargetMachine(rr::Optimization::Level optlevel)
{
#ifdef ENABLE_RR_DEBUG_INFO
auto llvmOptLevel = toLLVM(rr::Optimization::Level::None);
@@ -154,15 +118,13 @@
auto llvmOptLevel = toLLVM(optlevel);
#endif // ENABLE_RR_DEBUG_INFO
- return targetMachines.getOrCreate(optlevel, [&]() {
- return TargetMachineSPtr(llvm::EngineBuilder()
- .setOptLevel(llvmOptLevel)
- .setMCPU(mcpu)
- .setMArch(march)
- .setMAttrs(mattrs)
- .setTargetOptions(targetOptions)
- .selectTarget());
- });
+ return TargetMachineSPtr(llvm::EngineBuilder()
+ .setOptLevel(llvmOptLevel)
+ .setMCPU(mcpu)
+ .setMArch(march)
+ .setMAttrs(mattrs)
+ .setTargetOptions(targetOptions)
+ .selectTarget());
}
JITGlobals JITGlobals::create()
@@ -618,7 +580,7 @@
return;
}
}))
- , targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel()))
+ , targetMachine(JITGlobals::get()->createTargetMachine(config.getOptimization().getLevel()))
, compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine))
, objLayer(
session,
@@ -649,7 +611,7 @@
for(size_t i = 0; i < count; i++)
{
auto func = funcs[i];
- static size_t numEmittedFunctions = 0;
+ static std::atomic<size_t> numEmittedFunctions = { 0 };
std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
func->setName(name);
func->setLinkage(llvm::GlobalValue::ExternalLinkage);
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 6cce3f2..48861e9 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -59,10 +59,13 @@
}
#endif
+#if !LLVM_ENABLE_THREADS
+# error "LLVM_ENABLE_THREADS needs to be enabled"
+#endif
+
namespace {
-std::unique_ptr<rr::JITBuilder> jit;
-std::mutex codegenMutex;
+thread_local std::unique_ptr<rr::JITBuilder> jit;
// Default configuration settings. Must be accessed under mutex lock.
std::mutex defaultConfigLock;
@@ -599,8 +602,6 @@
Nucleus::Nucleus()
{
- ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
-
ASSERT(jit == nullptr);
jit.reset(new JITBuilder(Nucleus::getDefaultConfig()));
}
@@ -608,7 +609,6 @@
Nucleus::~Nucleus()
{
jit.reset();
- ::codegenMutex.unlock();
}
void Nucleus::setDefaultConfig(const Config &cfg)
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index dff1a29..2bf5324 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -63,7 +63,7 @@
}
// Set of variables that do not have a stack location yet.
-std::unordered_set<Variable *> Variable::unmaterializedVariables;
+thread_local std::unordered_set<Variable *> Variable::unmaterializedVariables;
Variable::Variable(Type *type, int arraySize)
: arraySize(arraySize)
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 488c0be..33e8b44 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -133,7 +133,7 @@
static void materializeAll();
static void killUnmaterialized();
- static std::unordered_set<Variable *> unmaterializedVariables;
+ static thread_local std::unordered_set<Variable *> unmaterializedVariables;
Type *const type;
mutable Value *rvalue = nullptr;
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index d5aafa9..adb0cb4 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -2949,6 +2949,114 @@
EXPECT_EQ(result, value);
}
+TEST(ReactorUnitTests, Multithreaded_Function)
+{
+ constexpr int numThreads = 32;
+ constexpr int numLoops = 64;
+
+ auto threads = std::unique_ptr<std::thread[]>(new std::thread[numThreads]);
+ auto results = std::unique_ptr<int[]>(new int[numThreads * numLoops]);
+
+ for(int t = 0; t < numThreads; t++)
+ {
+ auto threadFunc = [&](int t) {
+ for(int l = 0; l < numLoops; l++)
+ {
+ FunctionT<int(int, int)> function;
+ {
+ Int a = function.Arg<0>();
+ Int b = function.Arg<1>();
+ Return((a << 16) | b);
+ }
+
+ auto f = function("thread%d_loop%d", t, l);
+ results[t * numLoops + l] = f(t, l);
+ }
+ };
+ threads[t] = std::thread(threadFunc, t);
+ }
+
+ for(int t = 0; t < numThreads; t++)
+ {
+ threads[t].join();
+ }
+
+ for(int t = 0; t < numThreads; t++)
+ {
+ for(int l = 0; l < numLoops; l++)
+ {
+ auto expect = (t << 16) | l;
+ auto result = results[t * numLoops + l];
+ EXPECT_EQ(result, expect);
+ }
+ }
+}
+
+TEST(ReactorUnitTests, Multithreaded_Coroutine)
+{
+ if(!rr::Caps.CoroutinesSupported)
+ {
+ SUCCEED() << "Coroutines not supported";
+ return;
+ }
+
+ constexpr int numThreads = 32;
+ constexpr int numLoops = 64;
+
+ struct Result
+ {
+ bool yieldReturns[3];
+ int yieldValues[3];
+ };
+
+ auto threads = std::unique_ptr<std::thread[]>(new std::thread[numThreads]);
+ auto results = std::unique_ptr<Result[]>(new Result[numThreads * numLoops]);
+
+ for(int t = 0; t < numThreads; t++)
+ {
+ auto threadFunc = [&](int t) {
+ for(int l = 0; l < numLoops; l++)
+ {
+ Coroutine<int(int, int)> function;
+ {
+ Int a = function.Arg<0>();
+ Int b = function.Arg<1>();
+ Yield(a);
+ Yield(b);
+ }
+
+ auto coroutine = function(t, l);
+
+ auto &result = results[t * numLoops + l];
+ result = {};
+ result.yieldReturns[0] = coroutine->await(result.yieldValues[0]);
+ result.yieldReturns[1] = coroutine->await(result.yieldValues[1]);
+ result.yieldReturns[2] = coroutine->await(result.yieldValues[2]);
+ }
+ };
+ threads[t] = std::thread(threadFunc, t);
+ }
+
+ for(int t = 0; t < numThreads; t++)
+ {
+ threads[t].join();
+ }
+
+ for(int t = 0; t < numThreads; t++)
+ {
+ for(int l = 0; l < numLoops; l++)
+ {
+ auto const &result = results[t * numLoops + l];
+ EXPECT_EQ(result.yieldReturns[0], true);
+ EXPECT_EQ(result.yieldValues[0], t);
+ EXPECT_EQ(result.yieldReturns[1], true);
+ EXPECT_EQ(result.yieldValues[1], l);
+ EXPECT_EQ(result.yieldReturns[2], false);
+ EXPECT_EQ(result.yieldValues[2], 0);
+ }
+ }
+}
+
int main(int argc, char **argv)
{
::testing::InitGoogleTest(&argc, argv);
diff --git a/third_party/llvm-7.0/configs/android/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/android/include/llvm/Config/llvm-config.h
index f7d3c9b..9219b63 100644
--- a/third_party/llvm-7.0/configs/android/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/android/include/llvm/Config/llvm-config.h
@@ -34,7 +34,7 @@
#endif
/* Define if threads enabled */
-#define LLVM_ENABLE_THREADS 0
+#define LLVM_ENABLE_THREADS 1
/* Has gcc/MSVC atomic intrinsics */
#define LLVM_HAS_ATOMICS 1
diff --git a/third_party/llvm-7.0/configs/darwin/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/darwin/include/llvm/Config/llvm-config.h
index 831460c..54d348b 100644
--- a/third_party/llvm-7.0/configs/darwin/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/darwin/include/llvm/Config/llvm-config.h
@@ -28,7 +28,7 @@
#endif
/* Define if threads enabled */
-#define LLVM_ENABLE_THREADS 0
+#define LLVM_ENABLE_THREADS 1
/* Has gcc/MSVC atomic intrinsics */
#define LLVM_HAS_ATOMICS 1
diff --git a/third_party/llvm-7.0/configs/fuchsia/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/fuchsia/include/llvm/Config/llvm-config.h
index f906dc5..ec30fec 100644
--- a/third_party/llvm-7.0/configs/fuchsia/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/fuchsia/include/llvm/Config/llvm-config.h
@@ -30,7 +30,7 @@
#endif
/* Define if threads enabled */
-#define LLVM_ENABLE_THREADS 0
+#define LLVM_ENABLE_THREADS 1
/* Has gcc/MSVC atomic intrinsics */
#define LLVM_HAS_ATOMICS 1
diff --git a/third_party/llvm-7.0/configs/linux/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/linux/include/llvm/Config/llvm-config.h
index 1eaa4f9..2e740b9 100644
--- a/third_party/llvm-7.0/configs/linux/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/linux/include/llvm/Config/llvm-config.h
@@ -50,7 +50,7 @@
#endif
/* Define if threads enabled */
-#define LLVM_ENABLE_THREADS 0
+#define LLVM_ENABLE_THREADS 1
/* Has gcc/MSVC atomic intrinsics */
#define LLVM_HAS_ATOMICS 1
diff --git a/third_party/llvm-7.0/configs/windows/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/windows/include/llvm/Config/llvm-config.h
index 6c922b0..7bc36cb 100644
--- a/third_party/llvm-7.0/configs/windows/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/windows/include/llvm/Config/llvm-config.h
@@ -48,7 +48,7 @@
#endif
/* Define if threads enabled */
-#define LLVM_ENABLE_THREADS 0
+#define LLVM_ENABLE_THREADS 1
/* Has gcc/MSVC atomic intrinsics */
#define LLVM_HAS_ATOMICS 1
diff --git a/third_party/subzero/CMakeLists.txt b/third_party/subzero/CMakeLists.txt
index 70eb251..031a6d3 100644
--- a/third_party/subzero/CMakeLists.txt
+++ b/third_party/subzero/CMakeLists.txt
@@ -76,7 +76,7 @@
)
set(SUBZERO_TARGET_CPU MIPS32)
else()
- message(FATAL_ERROR "Architecture '${ARCH}' not supported by Subzero")
+ message(WARNING "Architecture '${ARCH}' not supported by Subzero")
endif()
if(WIN32)