Merge changes Ia8226c11,If413b9d6

* changes:
  Update Marl to 748d3c161
  Squashed 'third_party/marl/' changes from 539094011..748d3c161
diff --git a/src/Pipeline/SpirvShaderDebugger.cpp b/src/Pipeline/SpirvShaderDebugger.cpp
index be57e54..214ef1c 100644
--- a/src/Pipeline/SpirvShaderDebugger.cpp
+++ b/src/Pipeline/SpirvShaderDebugger.cpp
@@ -1288,21 +1288,23 @@
 	}
 
 	// No debug type information. Derive from SPIR-V.
-	Operand val(shader, state, id);
 	switch(shader->getType(obj).opcode())
 	{
 		case spv::OpTypeInt:
 		{
+			Operand val(shader, state, id);
 			group.put<Key, int>(key, Extract(val.Int(0), l));
 		}
 		break;
 		case spv::OpTypeFloat:
 		{
+			Operand val(shader, state, id);
 			group.put<Key, float>(key, Extract(val.Float(0), l));
 		}
 		break;
 		case spv::OpTypeVector:
 		{
+			Operand val(shader, state, id);
 			auto count = shader->getType(obj).definition.word(3);
 			switch(count)
 			{
diff --git a/src/Reactor/Coroutine.hpp b/src/Reactor/Coroutine.hpp
index d70ecf0..cd8763c 100644
--- a/src/Reactor/Coroutine.hpp
+++ b/src/Reactor/Coroutine.hpp
@@ -135,12 +135,10 @@
 	// executable code. After calling, no more reactor functions may be
 	// called without building a new rr::Function or rr::Coroutine.
 	// While automatically called by operator(), finalize() should be called
-	// as early as possible to release the global Reactor mutex lock.
-	// It must also be called explicitly on the same thread that instantiates
-	// the Coroutine instance if operator() is invoked on separate threads.
-	// This is because presently, Reactor backends use a global mutex scoped
-	// to the generation of routines, and these must be locked/unlocked on the
-	// same thread.
+	// as soon as possible once the coroutine has been fully built.
+	// finalize() *must* be called explicitly on the same thread that
+	// instantiates the Coroutine instance if operator() is to be invoked on
+	// different threads.
 	inline void finalize(const Config::Edit &cfg = Config::Edit::None);
 
 	// Starts execution of the coroutine and returns a unique_ptr to a
diff --git a/src/Reactor/LLVMJIT.cpp b/src/Reactor/LLVMJIT.cpp
index 6dd5c48..68153a7 100644
--- a/src/Reactor/LLVMJIT.cpp
+++ b/src/Reactor/LLVMJIT.cpp
@@ -57,6 +57,7 @@
     __pragma(warning(pop))
 #endif
 
+#include <atomic>
 #include <unordered_map>
 
 #if defined(_WIN64)
@@ -75,41 +76,6 @@
 
 namespace {
 
-// Cache provides a simple, thread-safe key-value store.
-template<typename KEY, typename VALUE>
-class Cache
-{
-public:
-	Cache() = default;
-	Cache(const Cache &other);
-	VALUE getOrCreate(KEY key, std::function<VALUE()> create);
-
-private:
-	mutable std::mutex mutex;  // mutable required for copy constructor.
-	std::unordered_map<KEY, VALUE> map;
-};
-
-template<typename KEY, typename VALUE>
-Cache<KEY, VALUE>::Cache(const Cache &other)
-{
-	std::unique_lock<std::mutex> lock(other.mutex);
-	map = other.map;
-}
-
-template<typename KEY, typename VALUE>
-VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create)
-{
-	std::unique_lock<std::mutex> lock(mutex);
-	auto it = map.find(key);
-	if(it != map.end())
-	{
-		return it->second;
-	}
-	auto value = create();
-	map.emplace(key, value);
-	return value;
-}
-
 // JITGlobals is a singleton that holds all the immutable machine specific
 // information for the host device.
 class JITGlobals
@@ -125,7 +91,7 @@
 	const llvm::TargetOptions targetOptions;
 	const llvm::DataLayout dataLayout;
 
-	TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel);
+	TargetMachineSPtr createTargetMachine(rr::Optimization::Level optlevel);
 
 private:
 	static JITGlobals create();
@@ -136,8 +102,6 @@
 	           const llvm::TargetOptions &targetOptions,
 	           const llvm::DataLayout &dataLayout);
 	JITGlobals(const JITGlobals &) = default;
-
-	Cache<rr::Optimization::Level, TargetMachineSPtr> targetMachines;
 };
 
 JITGlobals *JITGlobals::get()
@@ -146,7 +110,7 @@
 	return &instance;
 }
 
-JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel)
+JITGlobals::TargetMachineSPtr JITGlobals::createTargetMachine(rr::Optimization::Level optlevel)
 {
 #ifdef ENABLE_RR_DEBUG_INFO
 	auto llvmOptLevel = toLLVM(rr::Optimization::Level::None);
@@ -154,15 +118,13 @@
 	auto llvmOptLevel = toLLVM(optlevel);
 #endif  // ENABLE_RR_DEBUG_INFO
 
-	return targetMachines.getOrCreate(optlevel, [&]() {
-		return TargetMachineSPtr(llvm::EngineBuilder()
-		                             .setOptLevel(llvmOptLevel)
-		                             .setMCPU(mcpu)
-		                             .setMArch(march)
-		                             .setMAttrs(mattrs)
-		                             .setTargetOptions(targetOptions)
-		                             .selectTarget());
-	});
+	return TargetMachineSPtr(llvm::EngineBuilder()
+	                             .setOptLevel(llvmOptLevel)
+	                             .setMCPU(mcpu)
+	                             .setMArch(march)
+	                             .setMAttrs(mattrs)
+	                             .setTargetOptions(targetOptions)
+	                             .selectTarget());
 }
 
 JITGlobals JITGlobals::create()
@@ -618,7 +580,7 @@
 			          return;
 		          }
 	          }))
-	    , targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel()))
+	    , targetMachine(JITGlobals::get()->createTargetMachine(config.getOptimization().getLevel()))
 	    , compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine))
 	    , objLayer(
 	          session,
@@ -649,7 +611,7 @@
 		for(size_t i = 0; i < count; i++)
 		{
 			auto func = funcs[i];
-			static size_t numEmittedFunctions = 0;
+			static std::atomic<size_t> numEmittedFunctions = { 0 };
 			std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
 			func->setName(name);
 			func->setLinkage(llvm::GlobalValue::ExternalLinkage);
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 6cce3f2..48861e9 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -59,10 +59,13 @@
 }
 #endif
 
+#if !LLVM_ENABLE_THREADS
+#	error "LLVM_ENABLE_THREADS needs to be enabled"
+#endif
+
 namespace {
 
-std::unique_ptr<rr::JITBuilder> jit;
-std::mutex codegenMutex;
+thread_local std::unique_ptr<rr::JITBuilder> jit;
 
 // Default configuration settings. Must be accessed under mutex lock.
 std::mutex defaultConfigLock;
@@ -599,8 +602,6 @@
 
 Nucleus::Nucleus()
 {
-	::codegenMutex.lock();  // Reactor and LLVM are currently not thread safe
-
 	ASSERT(jit == nullptr);
 	jit.reset(new JITBuilder(Nucleus::getDefaultConfig()));
 }
@@ -608,7 +609,6 @@
 Nucleus::~Nucleus()
 {
 	jit.reset();
-	::codegenMutex.unlock();
 }
 
 void Nucleus::setDefaultConfig(const Config &cfg)
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index dff1a29..2bf5324 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -63,7 +63,7 @@
 }
 
 // Set of variables that do not have a stack location yet.
-std::unordered_set<Variable *> Variable::unmaterializedVariables;
+thread_local std::unordered_set<Variable *> Variable::unmaterializedVariables;
 
 Variable::Variable(Type *type, int arraySize)
     : arraySize(arraySize)
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 488c0be..33e8b44 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -133,7 +133,7 @@
 	static void materializeAll();
 	static void killUnmaterialized();
 
-	static std::unordered_set<Variable *> unmaterializedVariables;
+	static thread_local std::unordered_set<Variable *> unmaterializedVariables;
 
 	Type *const type;
 	mutable Value *rvalue = nullptr;
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index d5aafa9..adb0cb4 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -2949,6 +2949,114 @@
 	EXPECT_EQ(result, value);
 }
 
+TEST(ReactorUnitTests, Multithreaded_Function)
+{
+	constexpr int numThreads = 32;
+	constexpr int numLoops = 64;
+
+	auto threads = std::unique_ptr<std::thread[]>(new std::thread[numThreads]);
+	auto results = std::unique_ptr<int[]>(new int[numThreads * numLoops]);
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		auto threadFunc = [&](int t) {
+			for(int l = 0; l < numLoops; l++)
+			{
+				FunctionT<int(int, int)> function;
+				{
+					Int a = function.Arg<0>();
+					Int b = function.Arg<1>();
+					Return((a << 16) | b);
+				}
+
+				auto f = function("thread%d_loop%d", t, l);
+				results[t * numLoops + l] = f(t, l);
+			}
+		};
+		threads[t] = std::thread(threadFunc, t);
+	}
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		threads[t].join();
+	}
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		for(int l = 0; l < numLoops; l++)
+		{
+			auto expect = (t << 16) | l;
+			auto result = results[t * numLoops + l];
+			EXPECT_EQ(result, expect);
+		}
+	}
+}
+
+TEST(ReactorUnitTests, Multithreaded_Coroutine)
+{
+	if(!rr::Caps.CoroutinesSupported)
+	{
+		SUCCEED() << "Coroutines not supported";
+		return;
+	}
+
+	constexpr int numThreads = 32;
+	constexpr int numLoops = 64;
+
+	struct Result
+	{
+		bool yieldReturns[3];
+		int yieldValues[3];
+	};
+
+	auto threads = std::unique_ptr<std::thread[]>(new std::thread[numThreads]);
+	auto results = std::unique_ptr<Result[]>(new Result[numThreads * numLoops]);
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		auto threadFunc = [&](int t) {
+			for(int l = 0; l < numLoops; l++)
+			{
+				Coroutine<int(int, int)> function;
+				{
+					Int a = function.Arg<0>();
+					Int b = function.Arg<1>();
+					Yield(a);
+					Yield(b);
+				}
+
+				auto coroutine = function(t, l);
+
+				auto &result = results[t * numLoops + l];
+				result = {};
+				result.yieldReturns[0] = coroutine->await(result.yieldValues[0]);
+				result.yieldReturns[1] = coroutine->await(result.yieldValues[1]);
+				result.yieldReturns[2] = coroutine->await(result.yieldValues[2]);
+			}
+		};
+		threads[t] = std::thread(threadFunc, t);
+	}
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		threads[t].join();
+	}
+
+	for(int t = 0; t < numThreads; t++)
+	{
+		for(int l = 0; l < numLoops; l++)
+		{
+			auto const &result = results[t * numLoops + l];
+			EXPECT_EQ(result.yieldReturns[0], true);
+			EXPECT_EQ(result.yieldValues[0], t);
+			EXPECT_EQ(result.yieldReturns[1], true);
+			EXPECT_EQ(result.yieldValues[1], l);
+			EXPECT_EQ(result.yieldReturns[2], false);
+			EXPECT_EQ(result.yieldValues[2], 0);
+		}
+	}
+}
+
 int main(int argc, char **argv)
 {
 	::testing::InitGoogleTest(&argc, argv);
diff --git a/third_party/llvm-7.0/configs/android/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/android/include/llvm/Config/llvm-config.h
index f7d3c9b..9219b63 100644
--- a/third_party/llvm-7.0/configs/android/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/android/include/llvm/Config/llvm-config.h
@@ -34,7 +34,7 @@
 #endif
 
 /* Define if threads enabled */
-#define LLVM_ENABLE_THREADS 0
+#define LLVM_ENABLE_THREADS 1
 
 /* Has gcc/MSVC atomic intrinsics */
 #define LLVM_HAS_ATOMICS 1
diff --git a/third_party/llvm-7.0/configs/darwin/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/darwin/include/llvm/Config/llvm-config.h
index 831460c..54d348b 100644
--- a/third_party/llvm-7.0/configs/darwin/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/darwin/include/llvm/Config/llvm-config.h
@@ -28,7 +28,7 @@
 #endif
 
 /* Define if threads enabled */
-#define LLVM_ENABLE_THREADS 0
+#define LLVM_ENABLE_THREADS 1
 
 /* Has gcc/MSVC atomic intrinsics */
 #define LLVM_HAS_ATOMICS 1
diff --git a/third_party/llvm-7.0/configs/fuchsia/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/fuchsia/include/llvm/Config/llvm-config.h
index f906dc5..ec30fec 100644
--- a/third_party/llvm-7.0/configs/fuchsia/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/fuchsia/include/llvm/Config/llvm-config.h
@@ -30,7 +30,7 @@
 #endif
 
 /* Define if threads enabled */
-#define LLVM_ENABLE_THREADS 0
+#define LLVM_ENABLE_THREADS 1
 
 /* Has gcc/MSVC atomic intrinsics */
 #define LLVM_HAS_ATOMICS 1
diff --git a/third_party/llvm-7.0/configs/linux/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/linux/include/llvm/Config/llvm-config.h
index 1eaa4f9..2e740b9 100644
--- a/third_party/llvm-7.0/configs/linux/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/linux/include/llvm/Config/llvm-config.h
@@ -50,7 +50,7 @@
 #endif
 
 /* Define if threads enabled */
-#define LLVM_ENABLE_THREADS 0
+#define LLVM_ENABLE_THREADS 1
 
 /* Has gcc/MSVC atomic intrinsics */
 #define LLVM_HAS_ATOMICS 1
diff --git a/third_party/llvm-7.0/configs/windows/include/llvm/Config/llvm-config.h b/third_party/llvm-7.0/configs/windows/include/llvm/Config/llvm-config.h
index 6c922b0..7bc36cb 100644
--- a/third_party/llvm-7.0/configs/windows/include/llvm/Config/llvm-config.h
+++ b/third_party/llvm-7.0/configs/windows/include/llvm/Config/llvm-config.h
@@ -48,7 +48,7 @@
 #endif

 

 /* Define if threads enabled */

-#define LLVM_ENABLE_THREADS 0

+#define LLVM_ENABLE_THREADS 1

 

 /* Has gcc/MSVC atomic intrinsics */

 #define LLVM_HAS_ATOMICS 1

diff --git a/third_party/subzero/CMakeLists.txt b/third_party/subzero/CMakeLists.txt
index 70eb251..031a6d3 100644
--- a/third_party/subzero/CMakeLists.txt
+++ b/third_party/subzero/CMakeLists.txt
@@ -76,7 +76,7 @@
     )
     set(SUBZERO_TARGET_CPU MIPS32)
 else()
-    message(FATAL_ERROR "Architecture '${ARCH}' not supported by Subzero")
+    message(WARNING "Architecture '${ARCH}' not supported by Subzero")
 endif()
 
 if(WIN32)