Split JIT creation from LLVMReactor.cpp into LLVMJIT.cpp This makes a rough distinction between code that mainly only interacts with LLVM's IRBuilder interface, and the part that sets up the JIT and manages module and function creation, memory management, and external symbol resolution. The latter much more often requires changes when LLVM is upgraded than the former, so it's useful to keep them apart as we'll have multiple versions of LLVM in different build environments. Bug: b/148561024 Change-Id: I2ac2d00b4f3bcd277289a4ec6856c9eb7bb6aa24 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40688 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Antonio Maiorano <amaiorano@google.com>

commit: 41a7302e9c62c207e5ac5bcce40f16a1e418c42b [log] [tgz]
author: Nicolas Capens <capn@google.com> Thu Jan 30 00:30:14 2020 -0500
committer: Nicolas Capens <nicolascapens@google.com> Fri Jan 31 19:02:57 2020 +0000
tree: 82e3432ccf458653aad10f3f679daf23521599c9
parent: 55890e1afe33d3fd47c9dd5c0e625e4d1b84d88b [diff]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index caf9097..15ff72a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt

@@ -1745,6 +1745,7 @@
     ${SOURCE_DIR}/Reactor/EmulatedReactor.cpp
     ${SOURCE_DIR}/Reactor/ExecutableMemory.cpp
     ${SOURCE_DIR}/Reactor/ExecutableMemory.hpp
+    ${SOURCE_DIR}/Reactor/LLVMJIT.cpp
     ${SOURCE_DIR}/Reactor/LLVMReactor.cpp
     ${SOURCE_DIR}/Reactor/LLVMReactor.hpp
     ${SOURCE_DIR}/Reactor/LLVMReactorDebugInfo.cpp

diff --git a/src/Android.bp b/src/Android.bp
index 3d957d6..f512026 100644
--- a/src/Android.bp
+++ b/src/Android.bp

@@ -179,6 +179,7 @@
 
     srcs: [
         "Reactor/Reactor.cpp",
+        "Reactor/LLVMJIT.cpp",
         "Reactor/LLVMReactor.cpp",
         "Reactor/Debug.cpp",
         "Reactor/CPUID.cpp",
@@ -200,6 +201,7 @@
 
     srcs: [
         "Reactor/Reactor.cpp",
+        "Reactor/LLVMJIT.cpp",
         "Reactor/LLVMReactor.cpp",
         "Reactor/Debug.cpp",
         "Reactor/CPUID.cpp",

diff --git a/src/Reactor/BUILD.gn b/src/Reactor/BUILD.gn
index 9cb4b1e..4d51c08 100644
--- a/src/Reactor/BUILD.gn
+++ b/src/Reactor/BUILD.gn

@@ -330,6 +330,7 @@
 
     sources = [
       "CPUID.cpp",
+      "LLVMJIT.cpp",
       "LLVMReactor.cpp",
     ]
 

diff --git a/src/Reactor/LLVMJIT.cpp b/src/Reactor/LLVMJIT.cpp
new file mode 100644
index 0000000..1718957
--- /dev/null
+++ b/src/Reactor/LLVMJIT.cpp

@@ -0,0 +1,728 @@
+// Copyright 2020 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "LLVMReactor.hpp"
+
+#include "Debug.hpp"
+#include "ExecutableMemory.hpp"
+#include "Routine.hpp"
+
+#if defined(__clang__)
+// LLVM has occurrences of the extra-semi warning in its headers, which will be
+// treated as an error in SwiftShader targets.
+#	pragma clang diagnostic push
+#	pragma clang diagnostic ignored "-Wextra-semi"
+#endif  // defined(__clang__)
+
+// TODO(b/143539525): Eliminate when warning has been fixed.
+#ifdef _MSC_VER
+__pragma(warning(push))
+    __pragma(warning(disable : 4146))  // unary minus operator applied to unsigned type, result still unsigned
+#endif
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Coroutines.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+
+#if defined(__clang__)
+#	pragma clang diagnostic pop
+#endif  // defined(__clang__)
+
+#ifdef _MSC_VER
+    __pragma(warning(pop))
+#endif
+
+#include <unordered_map>
+
+#if defined(_WIN64)
+        extern "C" void __chkstk();
+#elif defined(_WIN32)
+extern "C" void _chkstk();
+#endif
+
+namespace {
+
+// Cache provides a simple, thread-safe key-value store.
+template<typename KEY, typename VALUE>
+class Cache
+{
+public:
+	Cache() = default;
+	Cache(const Cache &other);
+	VALUE getOrCreate(KEY key, std::function<VALUE()> create);
+
+private:
+	mutable std::mutex mutex;  // mutable required for copy constructor.
+	std::unordered_map<KEY, VALUE> map;
+};
+
+template<typename KEY, typename VALUE>
+Cache<KEY, VALUE>::Cache(const Cache &other)
+{
+	std::unique_lock<std::mutex> lock(other.mutex);
+	map = other.map;
+}
+
+template<typename KEY, typename VALUE>
+VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create)
+{
+	std::unique_lock<std::mutex> lock(mutex);
+	auto it = map.find(key);
+	if(it != map.end())
+	{
+		return it->second;
+	}
+	auto value = create();
+	map.emplace(key, value);
+	return value;
+}
+
+// JITGlobals is a singleton that holds all the immutable machine specific
+// information for the host device.
+class JITGlobals
+{
+public:
+	using TargetMachineSPtr = std::shared_ptr<llvm::TargetMachine>;
+
+	static JITGlobals *get();
+
+	const std::string mcpu;
+	const std::vector<std::string> mattrs;
+	const char *const march;
+	const llvm::TargetOptions targetOptions;
+	const llvm::DataLayout dataLayout;
+
+	TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel);
+
+private:
+	static JITGlobals create();
+	static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
+	JITGlobals(const char *mcpu,
+	           const std::vector<std::string> &mattrs,
+	           const char *march,
+	           const llvm::TargetOptions &targetOptions,
+	           const llvm::DataLayout &dataLayout);
+	JITGlobals(const JITGlobals &) = default;
+
+	Cache<rr::Optimization::Level, TargetMachineSPtr> targetMachines;
+};
+
+JITGlobals *JITGlobals::get()
+{
+	static JITGlobals instance = create();
+	return &instance;
+}
+
+JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel)
+{
+#ifdef ENABLE_RR_DEBUG_INFO
+	auto llvmOptLevel = toLLVM(rr::Optimization::Level::None);
+#else   // ENABLE_RR_DEBUG_INFO
+	auto llvmOptLevel = toLLVM(optlevel);
+#endif  // ENABLE_RR_DEBUG_INFO
+
+	return targetMachines.getOrCreate(optlevel, [&]() {
+		return TargetMachineSPtr(llvm::EngineBuilder()
+		                             .setOptLevel(llvmOptLevel)
+		                             .setMCPU(mcpu)
+		                             .setMArch(march)
+		                             .setMAttrs(mattrs)
+		                             .setTargetOptions(targetOptions)
+		                             .selectTarget());
+	});
+}
+
+JITGlobals JITGlobals::create()
+{
+	struct LLVMInitializer
+	{
+		LLVMInitializer()
+		{
+			llvm::InitializeNativeTarget();
+			llvm::InitializeNativeTargetAsmPrinter();
+			llvm::InitializeNativeTargetAsmParser();
+		}
+	};
+	static LLVMInitializer initializeLLVM;
+
+	auto mcpu = llvm::sys::getHostCPUName();
+
+	llvm::StringMap<bool> features;
+	bool ok = llvm::sys::getHostCPUFeatures(features);
+
+#if defined(__i386__) || defined(__x86_64__) || \
+    (defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
+	ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
+#else
+	(void)ok;  // getHostCPUFeatures always returns false on other platforms
+#endif
+
+	std::vector<std::string> mattrs;
+	for(auto &feature : features)
+	{
+		if(feature.second) { mattrs.push_back(feature.first().str()); }
+	}
+
+	const char *march = nullptr;
+#if defined(__x86_64__)
+	march = "x86-64";
+#elif defined(__i386__)
+	march = "x86";
+#elif defined(__aarch64__)
+	march = "arm64";
+#elif defined(__arm__)
+	march = "arm";
+#elif defined(__mips__)
+#	if defined(__mips64)
+	march = "mips64el";
+#	else
+	march = "mipsel";
+#	endif
+#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	march = "ppc64le";
+#else
+#	error "unknown architecture"
+#endif
+
+	llvm::TargetOptions targetOptions;
+	targetOptions.UnsafeFPMath = false;
+
+	auto targetMachine = std::unique_ptr<llvm::TargetMachine>(
+	    llvm::EngineBuilder()
+	        .setOptLevel(llvm::CodeGenOpt::None)
+	        .setMCPU(mcpu)
+	        .setMArch(march)
+	        .setMAttrs(mattrs)
+	        .setTargetOptions(targetOptions)
+	        .selectTarget());
+
+	auto dataLayout = targetMachine->createDataLayout();
+
+	return JITGlobals(mcpu.data(), mattrs, march, targetOptions, dataLayout);
+}
+
+llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
+{
+	switch(level)
+	{
+		case rr::Optimization::Level::None: return ::llvm::CodeGenOpt::None;
+		case rr::Optimization::Level::Less: return ::llvm::CodeGenOpt::Less;
+		case rr::Optimization::Level::Default: return ::llvm::CodeGenOpt::Default;
+		case rr::Optimization::Level::Aggressive: return ::llvm::CodeGenOpt::Aggressive;
+		default: UNREACHABLE("Unknown Optimization Level %d", int(level));
+	}
+	return ::llvm::CodeGenOpt::Default;
+}
+
+JITGlobals::JITGlobals(const char *mcpu,
+                       const std::vector<std::string> &mattrs,
+                       const char *march,
+                       const llvm::TargetOptions &targetOptions,
+                       const llvm::DataLayout &dataLayout)
+    : mcpu(mcpu)
+    , mattrs(mattrs)
+    , march(march)
+    , targetOptions(targetOptions)
+    , dataLayout(dataLayout)
+{
+}
+
+class MemoryMapper : public llvm::SectionMemoryManager::MemoryMapper
+{
+public:
+	MemoryMapper() {}
+	~MemoryMapper() final {}
+
+	llvm::sys::MemoryBlock allocateMappedMemory(
+	    llvm::SectionMemoryManager::AllocationPurpose purpose,
+	    size_t numBytes, const llvm::sys::MemoryBlock *const nearBlock,
+	    unsigned flags, std::error_code &errorCode) final
+	{
+		errorCode = std::error_code();
+
+		// Round up numBytes to page size.
+		size_t pageSize = rr::memoryPageSize();
+		numBytes = (numBytes + pageSize - 1) & ~(pageSize - 1);
+
+		bool need_exec =
+		    purpose == llvm::SectionMemoryManager::AllocationPurpose::Code;
+		void *addr = rr::allocateMemoryPages(
+		    numBytes, flagsToPermissions(flags), need_exec);
+		if(!addr)
+			return llvm::sys::MemoryBlock();
+		return llvm::sys::MemoryBlock(addr, numBytes);
+	}
+
+	std::error_code protectMappedMemory(const llvm::sys::MemoryBlock &block,
+	                                    unsigned flags)
+	{
+		// Round down base address to align with a page boundary. This matches
+		// DefaultMMapper behavior.
+		void *addr = block.base();
+#if LLVM_VERSION_MAJOR >= 9
+		size_t size = block.allocatedSize();
+#else
+		size_t size = block.size();
+#endif
+		size_t pageSize = rr::memoryPageSize();
+		addr = reinterpret_cast<void *>(
+		    reinterpret_cast<uintptr_t>(addr) & ~(pageSize - 1));
+		size += reinterpret_cast<uintptr_t>(block.base()) -
+		        reinterpret_cast<uintptr_t>(addr);
+
+		rr::protectMemoryPages(addr, size, flagsToPermissions(flags));
+		return std::error_code();
+	}
+
+	std::error_code releaseMappedMemory(llvm::sys::MemoryBlock &block)
+	{
+#if LLVM_VERSION_MAJOR >= 9
+		size_t size = block.allocatedSize();
+#else
+		size_t size = block.size();
+#endif
+
+		rr::deallocateMemoryPages(block.base(), size);
+		return std::error_code();
+	}
+
+private:
+	int flagsToPermissions(unsigned flags)
+	{
+		int result = 0;
+		if(flags & llvm::sys::Memory::MF_READ)
+		{
+			result |= rr::PERMISSION_READ;
+		}
+		if(flags & llvm::sys::Memory::MF_WRITE)
+		{
+			result |= rr::PERMISSION_WRITE;
+		}
+		if(flags & llvm::sys::Memory::MF_EXEC)
+		{
+			result |= rr::PERMISSION_EXECUTE;
+		}
+		return result;
+	}
+};
+
+template<typename T>
+T alignUp(T val, T alignment)
+{
+	return alignment * ((val + alignment - 1) / alignment);
+}
+
+void *alignedAlloc(size_t size, size_t alignment)
+{
+	ASSERT(alignment < 256);
+	auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
+	auto aligned = allocation;
+	aligned += sizeof(uint8_t);                                                                       // Make space for the base-address offset.
+	aligned = reinterpret_cast<uint8_t *>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment));  // align
+	auto offset = static_cast<uint8_t>(aligned - allocation);
+	aligned[-1] = offset;
+	return aligned;
+}
+
+void alignedFree(void *ptr)
+{
+	auto aligned = reinterpret_cast<uint8_t *>(ptr);
+	auto offset = aligned[-1];
+	auto allocation = aligned - offset;
+	delete[] allocation;
+}
+
+template<typename T>
+static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
+{
+	*reinterpret_cast<T *>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), rr::atomicOrdering(ordering));
+}
+
+template<typename T>
+static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
+{
+	std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), *reinterpret_cast<T *>(val), rr::atomicOrdering(ordering));
+}
+
+#ifdef __ANDROID__
+template<typename F>
+static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
+{
+	// Build an arbitrary op out of looped CAS
+	for(;;)
+	{
+		uint32_t expected = *ptr;
+		uint32_t desired = f(expected, val);
+
+		if(expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
+		{
+			return expected;
+		}
+	}
+}
+#endif
+
+void *resolveExternalSymbol(const char *name)
+{
+	struct Atomic
+	{
+		static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
+		{
+			switch(size)
+			{
+				case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
+				case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
+				case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
+				case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
+				default:
+					UNIMPLEMENTED("Atomic::load(size: %d)", int(size));
+			}
+		}
+		static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
+		{
+			switch(size)
+			{
+				case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
+				case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
+				case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
+				case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
+				default:
+					UNIMPLEMENTED("Atomic::store(size: %d)", int(size));
+			}
+		}
+	};
+
+	struct F
+	{
+		static void nop() {}
+		static void neverCalled() { UNREACHABLE("Should never be called"); }
+
+		static void *coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
+		static void coroutine_free_frame(void *ptr) { alignedFree(ptr); }
+
+#ifdef __ANDROID__
+		// forwarders since we can't take address of builtins
+		static void sync_synchronize() { __sync_synchronize(); }
+		static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
+		static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
+		static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
+		static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
+		static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
+		static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
+		static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
+
+		static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val)
+		{
+			return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a, b); });
+		}
+		static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val)
+		{
+			return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a, b); });
+		}
+		static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val)
+		{
+			return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a, b); });
+		}
+		static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val)
+		{
+			return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a, b); });
+		}
+#endif
+	};
+
+	class Resolver
+	{
+	public:
+		using FunctionMap = std::unordered_map<std::string, void *>;
+
+		FunctionMap functions;
+
+		Resolver()
+		{
+			functions.emplace("nop", reinterpret_cast<void *>(F::nop));
+			functions.emplace("floorf", reinterpret_cast<void *>(floorf));
+			functions.emplace("nearbyintf", reinterpret_cast<void *>(nearbyintf));
+			functions.emplace("truncf", reinterpret_cast<void *>(truncf));
+			functions.emplace("printf", reinterpret_cast<void *>(printf));
+			functions.emplace("puts", reinterpret_cast<void *>(puts));
+			functions.emplace("fmodf", reinterpret_cast<void *>(fmodf));
+
+			functions.emplace("sinf", reinterpret_cast<void *>(sinf));
+			functions.emplace("cosf", reinterpret_cast<void *>(cosf));
+			functions.emplace("asinf", reinterpret_cast<void *>(asinf));
+			functions.emplace("acosf", reinterpret_cast<void *>(acosf));
+			functions.emplace("atanf", reinterpret_cast<void *>(atanf));
+			functions.emplace("sinhf", reinterpret_cast<void *>(sinhf));
+			functions.emplace("coshf", reinterpret_cast<void *>(coshf));
+			functions.emplace("tanhf", reinterpret_cast<void *>(tanhf));
+			functions.emplace("asinhf", reinterpret_cast<void *>(asinhf));
+			functions.emplace("acoshf", reinterpret_cast<void *>(acoshf));
+			functions.emplace("atanhf", reinterpret_cast<void *>(atanhf));
+			functions.emplace("atan2f", reinterpret_cast<void *>(atan2f));
+			functions.emplace("powf", reinterpret_cast<void *>(powf));
+			functions.emplace("expf", reinterpret_cast<void *>(expf));
+			functions.emplace("logf", reinterpret_cast<void *>(logf));
+			functions.emplace("exp2f", reinterpret_cast<void *>(exp2f));
+			functions.emplace("log2f", reinterpret_cast<void *>(log2f));
+
+			functions.emplace("sin", reinterpret_cast<void *>(static_cast<double (*)(double)>(sin)));
+			functions.emplace("cos", reinterpret_cast<void *>(static_cast<double (*)(double)>(cos)));
+			functions.emplace("asin", reinterpret_cast<void *>(static_cast<double (*)(double)>(asin)));
+			functions.emplace("acos", reinterpret_cast<void *>(static_cast<double (*)(double)>(acos)));
+			functions.emplace("atan", reinterpret_cast<void *>(static_cast<double (*)(double)>(atan)));
+			functions.emplace("sinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(sinh)));
+			functions.emplace("cosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(cosh)));
+			functions.emplace("tanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(tanh)));
+			functions.emplace("asinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(asinh)));
+			functions.emplace("acosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(acosh)));
+			functions.emplace("atanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(atanh)));
+			functions.emplace("atan2", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(atan2)));
+			functions.emplace("pow", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(pow)));
+			functions.emplace("exp", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp)));
+			functions.emplace("log", reinterpret_cast<void *>(static_cast<double (*)(double)>(log)));
+			functions.emplace("exp2", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp2)));
+			functions.emplace("log2", reinterpret_cast<void *>(static_cast<double (*)(double)>(log2)));
+
+			functions.emplace("atomic_load", reinterpret_cast<void *>(Atomic::load));
+			functions.emplace("atomic_store", reinterpret_cast<void *>(Atomic::store));
+
+			// FIXME(b/119409619): use an allocator here so we can control all memory allocations
+			functions.emplace("coroutine_alloc_frame", reinterpret_cast<void *>(F::coroutine_alloc_frame));
+			functions.emplace("coroutine_free_frame", reinterpret_cast<void *>(F::coroutine_free_frame));
+
+#ifdef __APPLE__
+			functions.emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
+#elif defined(__linux__)
+			functions.emplace("sincosf", reinterpret_cast<void *>(sincosf));
+#elif defined(_WIN64)
+			functions.emplace("chkstk", reinterpret_cast<void *>(__chkstk));
+#elif defined(_WIN32)
+			functions.emplace("chkstk", reinterpret_cast<void *>(_chkstk));
+#endif
+
+#ifdef __ANDROID__
+			functions.emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void *>(F::neverCalled));
+			functions.emplace("sync_synchronize", reinterpret_cast<void *>(F::sync_synchronize));
+			functions.emplace("sync_fetch_and_add_4", reinterpret_cast<void *>(F::sync_fetch_and_add_4));
+			functions.emplace("sync_fetch_and_and_4", reinterpret_cast<void *>(F::sync_fetch_and_and_4));
+			functions.emplace("sync_fetch_and_or_4", reinterpret_cast<void *>(F::sync_fetch_and_or_4));
+			functions.emplace("sync_fetch_and_xor_4", reinterpret_cast<void *>(F::sync_fetch_and_xor_4));
+			functions.emplace("sync_fetch_and_sub_4", reinterpret_cast<void *>(F::sync_fetch_and_sub_4));
+			functions.emplace("sync_lock_test_and_set_4", reinterpret_cast<void *>(F::sync_lock_test_and_set_4));
+			functions.emplace("sync_val_compare_and_swap_4", reinterpret_cast<void *>(F::sync_val_compare_and_swap_4));
+			functions.emplace("sync_fetch_and_max_4", reinterpret_cast<void *>(F::sync_fetch_and_max_4));
+			functions.emplace("sync_fetch_and_min_4", reinterpret_cast<void *>(F::sync_fetch_and_min_4));
+			functions.emplace("sync_fetch_and_umax_4", reinterpret_cast<void *>(F::sync_fetch_and_umax_4));
+			functions.emplace("sync_fetch_and_umin_4", reinterpret_cast<void *>(F::sync_fetch_and_umin_4));
+#endif
+		}
+	};
+
+	static Resolver resolver;
+
+	// Trim off any underscores from the start of the symbol. LLVM likes
+	// to append these on macOS.
+	const char *trimmed = name;
+	while(trimmed[0] == '_') { trimmed++; }
+
+	auto it = resolver.functions.find(trimmed);
+	// Missing functions will likely make the module fail in exciting non-obvious ways.
+	ASSERT_MSG(it != resolver.functions.end(), "Missing external function: '%s'", name);
+	return it->second;
+}
+
+// JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
+// object layer as each routine may require different target machine
+// settings and no Reactor routine directly links against another.
+class JITRoutine : public rr::Routine
+{
+#if LLVM_VERSION_MAJOR >= 8
+	using ObjLayer = llvm::orc::LegacyRTDyldObjectLinkingLayer;
+	using CompileLayer = llvm::orc::LegacyIRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
+#else
+	using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
+	using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
+#endif
+
+public:
+	JITRoutine(
+	    std::unique_ptr<llvm::Module> module,
+	    llvm::Function **funcs,
+	    size_t count,
+	    const rr::Config &config)
+	    : resolver(createLegacyLookupResolver(
+	          session,
+	          [&](const llvm::StringRef &name) {
+		          void *func = resolveExternalSymbol(name.str().c_str());
+		          if(func != nullptr)
+		          {
+			          return llvm::JITSymbol(
+			              reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
+		          }
+		          return objLayer.findSymbol(name, true);
+	          },
+	          [](llvm::Error err) {
+		          if(err)
+		          {
+			          // TODO: Log the symbol resolution errors.
+			          return;
+		          }
+	          }))
+	    , targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel()))
+	    , compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine))
+	    , objLayer(
+	          session,
+	          [this](llvm::orc::VModuleKey) {
+		          return ObjLayer::Resources{ std::make_shared<llvm::SectionMemoryManager>(&memoryMapper), resolver };
+	          },
+	          ObjLayer::NotifyLoadedFtor(),
+	          [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
+#ifdef ENABLE_RR_DEBUG_INFO
+		          rr::DebugInfo::NotifyObjectEmitted(Obj, L);
+#endif  // ENABLE_RR_DEBUG_INFO
+	          },
+	          [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
+#ifdef ENABLE_RR_DEBUG_INFO
+		          rr::DebugInfo::NotifyFreeingObject(Obj);
+#endif  // ENABLE_RR_DEBUG_INFO
+	          })
+	    , addresses(count)
+	{
+		std::vector<std::string> mangledNames(count);
+		for(size_t i = 0; i < count; i++)
+		{
+			auto func = funcs[i];
+			static size_t numEmittedFunctions = 0;
+			std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
+			func->setName(name);
+			func->setLinkage(llvm::GlobalValue::ExternalLinkage);
+			func->setDoesNotThrow();
+
+			llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
+			llvm::Mangler::getNameWithPrefix(mangledNameStream, name, JITGlobals::get()->dataLayout);
+		}
+
+		auto moduleKey = session.allocateVModule();
+
+		// Once the module is passed to the compileLayer, the
+		// llvm::Functions are freed. Make sure funcs are not referenced
+		// after this point.
+		funcs = nullptr;
+
+		llvm::cantFail(compileLayer.addModule(moduleKey, std::move(module)));
+
+		// Resolve the function addresses.
+		for(size_t i = 0; i < count; i++)
+		{
+			auto symbol = compileLayer.findSymbolIn(moduleKey, mangledNames[i], false);
+			if(auto address = symbol.getAddress())
+			{
+				addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(address.get()));
+			}
+		}
+	}
+
+	const void *getEntry(int index) const override
+	{
+		return addresses[index];
+	}
+
+private:
+	std::shared_ptr<llvm::orc::SymbolResolver> resolver;
+	std::shared_ptr<llvm::TargetMachine> targetMachine;
+	llvm::orc::ExecutionSession session;
+	CompileLayer compileLayer;
+	MemoryMapper memoryMapper;
+	ObjLayer objLayer;
+	std::vector<const void *> addresses;
+};
+
+}  // anonymous namespace
+
+namespace rr {
+
+JITBuilder::JITBuilder(const rr::Config &config)
+    : config(config)
+    , module(new llvm::Module("", context))
+    , builder(new llvm::IRBuilder<>(context))
+{
+	module->setDataLayout(JITGlobals::get()->dataLayout);
+}
+
+void JITBuilder::optimize(const rr::Config &cfg)
+{
+
+#ifdef ENABLE_RR_DEBUG_INFO
+	if(debugInfo != nullptr)
+	{
+		return;  // Don't optimize if we're generating debug info.
+	}
+#endif  // ENABLE_RR_DEBUG_INFO
+
+	std::unique_ptr<llvm::legacy::PassManager> passManager(
+	    new llvm::legacy::PassManager());
+
+	for(auto pass : cfg.getOptimization().getPasses())
+	{
+		switch(pass)
+		{
+			case rr::Optimization::Pass::Disabled: break;
+			case rr::Optimization::Pass::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
+			case rr::Optimization::Pass::LICM: passManager->add(llvm::createLICMPass()); break;
+			case rr::Optimization::Pass::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
+			case rr::Optimization::Pass::GVN: passManager->add(llvm::createGVNPass()); break;
+			case rr::Optimization::Pass::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
+			case rr::Optimization::Pass::Reassociate: passManager->add(llvm::createReassociatePass()); break;
+			case rr::Optimization::Pass::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
+			case rr::Optimization::Pass::SCCP: passManager->add(llvm::createSCCPPass()); break;
+			case rr::Optimization::Pass::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
+			case rr::Optimization::Pass::EarlyCSEPass: passManager->add(llvm::createEarlyCSEPass()); break;
+			default:
+				UNREACHABLE("pass: %d", int(pass));
+		}
+	}
+
+	passManager->run(*module);
+}
+
+std::shared_ptr<rr::Routine> JITBuilder::acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
+{
+	ASSERT(module);
+	return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg);
+}
+
+}  // namespace rr

diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 9df9eb9..d3d021d 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp

@@ -13,70 +13,22 @@
 // limitations under the License.
 
 #include "LLVMReactor.hpp"
+
+#include "CPUID.hpp"
 #include "Debug.hpp"
 #include "LLVMReactorDebugInfo.hpp"
 #include "Reactor.hpp"
-
-#include "CPUID.hpp"
-#include "ExecutableMemory.hpp"
-#include "Thread.hpp"
 #include "x86.hpp"
 
-#undef min
-#undef max
-
-#if defined(__clang__)
-// LLVM has occurances of the extra-semi warning in its headers, which will be
-// treated as an error in SwiftShader targets.
-#	pragma clang diagnostic push
-#	pragma clang diagnostic ignored "-Wextra-semi"
-#endif  // defined(__clang__)
-
-#ifdef _MSC_VER
-__pragma(warning(push))
-    __pragma(warning(disable : 4146))  // unary minus operator applied to unsigned type, result still unsigned
-#endif
-
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
-#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
-#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
-#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Intrinsics.h"
 #if LLVM_VERSION_MAJOR >= 9
 #	include "llvm/IR/IntrinsicsX86.h"
 #endif
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Coroutines.h"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-
-#if defined(__clang__)
-#	pragma clang diagnostic pop
-#endif  // defined(__clang__)
-
-#ifdef _MSC_VER
-    __pragma(warning(pop))
-#endif
 
 #define ARGS(...)   \
 	{               \
@@ -85,13 +37,12 @@
 #define CreateCall2 CreateCall
 #define CreateCall3 CreateCall
 
-#include <unordered_map>
-
 #include <fstream>
 #include <iostream>
 #include <mutex>
 #include <numeric>
 #include <thread>
+#include <unordered_map>
 
 #if defined(__i386__) || defined(__x86_64__)
 #	include <xmmintrin.h>
@@ -100,26 +51,17 @@
 #include <math.h>
 
 #if defined(__x86_64__) && defined(_WIN32)
-        extern "C" void X86CompilationCallback()
+extern "C" void X86CompilationCallback()
 {
 	UNIMPLEMENTED("X86CompilationCallback");
 }
 #endif
 
-#if defined(_WIN64)
-extern "C" void __chkstk();
-#elif defined(_WIN32)
-extern "C" void _chkstk();
-#endif
-
-namespace rr {
-
-void *resolveExternalSymbol(const char *);
-
-}  // namespace rr
-
 namespace {
 
+std::unique_ptr<rr::JITBuilder> jit;
+std::mutex codegenMutex;
+
 // Default configuration settings. Must be accessed under mutex lock.
 std::mutex defaultConfigLock;
 rr::Config &defaultConfig()
@@ -133,460 +75,6 @@
 	return config;
 }
 
-// Cache provides a simple, thread-safe key-value store.
-template<typename KEY, typename VALUE>
-class Cache
-{
-public:
-	Cache() = default;
-	Cache(const Cache &other);
-	VALUE getOrCreate(KEY key, std::function<VALUE()> create);
-
-private:
-	mutable std::mutex mutex;  // mutable required for copy constructor.
-	std::unordered_map<KEY, VALUE> map;
-};
-
-template<typename KEY, typename VALUE>
-Cache<KEY, VALUE>::Cache(const Cache &other)
-{
-	std::unique_lock<std::mutex> lock(other.mutex);
-	map = other.map;
-}
-
-template<typename KEY, typename VALUE>
-VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create)
-{
-	std::unique_lock<std::mutex> lock(mutex);
-	auto it = map.find(key);
-	if(it != map.end())
-	{
-		return it->second;
-	}
-	auto value = create();
-	map.emplace(key, value);
-	return value;
-}
-
-// JITGlobals is a singleton that holds all the immutable machine specific
-// information for the host device.
-class JITGlobals
-{
-public:
-	using TargetMachineSPtr = std::shared_ptr<llvm::TargetMachine>;
-
-	static JITGlobals *get();
-
-	const std::string mcpu;
-	const std::vector<std::string> mattrs;
-	const char *const march;
-	const llvm::TargetOptions targetOptions;
-	const llvm::DataLayout dataLayout;
-
-	TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel);
-
-private:
-	static JITGlobals create();
-	static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
-	JITGlobals(const char *mcpu,
-	           const std::vector<std::string> &mattrs,
-	           const char *march,
-	           const llvm::TargetOptions &targetOptions,
-	           const llvm::DataLayout &dataLayout);
-	JITGlobals(const JITGlobals &) = default;
-
-	Cache<rr::Optimization::Level, TargetMachineSPtr> targetMachines;
-};
-
-JITGlobals *JITGlobals::get()
-{
-	static JITGlobals instance = create();
-	return &instance;
-}
-
-JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel)
-{
-#ifdef ENABLE_RR_DEBUG_INFO
-	auto llvmOptLevel = toLLVM(rr::Optimization::Level::None);
-#else   // ENABLE_RR_DEBUG_INFO
-	auto llvmOptLevel = toLLVM(optlevel);
-#endif  // ENABLE_RR_DEBUG_INFO
-
-	return targetMachines.getOrCreate(optlevel, [&]() {
-		return TargetMachineSPtr(llvm::EngineBuilder()
-		                             .setOptLevel(llvmOptLevel)
-		                             .setMCPU(mcpu)
-		                             .setMArch(march)
-		                             .setMAttrs(mattrs)
-		                             .setTargetOptions(targetOptions)
-		                             .selectTarget());
-	});
-}
-
-JITGlobals JITGlobals::create()
-{
-	struct LLVMInitializer
-	{
-		LLVMInitializer()
-		{
-			llvm::InitializeNativeTarget();
-			llvm::InitializeNativeTargetAsmPrinter();
-			llvm::InitializeNativeTargetAsmParser();
-		}
-	};
-	static LLVMInitializer initializeLLVM;
-
-	auto mcpu = llvm::sys::getHostCPUName();
-
-	llvm::StringMap<bool> features;
-	bool ok = llvm::sys::getHostCPUFeatures(features);
-
-#if defined(__i386__) || defined(__x86_64__) || \
-    (defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
-	ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
-#else
-	(void)ok;  // getHostCPUFeatures always returns false on other platforms
-#endif
-
-	std::vector<std::string> mattrs;
-
-	for(auto &feature : features)
-	{
-		if(feature.second) { mattrs.push_back(feature.first().str()); }
-	}
-
-	const char *march = nullptr;
-#if defined(__x86_64__)
-	march = "x86-64";
-#elif defined(__i386__)
-	march = "x86";
-#elif defined(__aarch64__)
-	march = "arm64";
-#elif defined(__arm__)
-	march = "arm";
-#elif defined(__mips__)
-#	if defined(__mips64)
-	march = "mips64el";
-#	else
-	march = "mipsel";
-#	endif
-#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-	march = "ppc64le";
-#else
-#	error "unknown architecture"
-#endif
-
-	llvm::TargetOptions targetOptions;
-	targetOptions.UnsafeFPMath = false;
-
-	auto targetMachine = std::unique_ptr<llvm::TargetMachine>(
-	    llvm::EngineBuilder()
-	        .setOptLevel(llvm::CodeGenOpt::None)
-	        .setMCPU(mcpu)
-	        .setMArch(march)
-	        .setMAttrs(mattrs)
-	        .setTargetOptions(targetOptions)
-	        .selectTarget());
-
-	auto dataLayout = targetMachine->createDataLayout();
-
-	return JITGlobals(mcpu.data(), mattrs, march, targetOptions, dataLayout);
-}
-
-llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
-{
-	switch(level)
-	{
-		case rr::Optimization::Level::None: return ::llvm::CodeGenOpt::None;
-		case rr::Optimization::Level::Less: return ::llvm::CodeGenOpt::Less;
-		case rr::Optimization::Level::Default: return ::llvm::CodeGenOpt::Default;
-		case rr::Optimization::Level::Aggressive: return ::llvm::CodeGenOpt::Aggressive;
-		default: UNREACHABLE("Unknown Optimization Level %d", int(level));
-	}
-	return ::llvm::CodeGenOpt::Default;
-}
-
-JITGlobals::JITGlobals(const char *mcpu,
-                       const std::vector<std::string> &mattrs,
-                       const char *march,
-                       const llvm::TargetOptions &targetOptions,
-                       const llvm::DataLayout &dataLayout)
-    : mcpu(mcpu)
-    , mattrs(mattrs)
-    , march(march)
-    , targetOptions(targetOptions)
-    , dataLayout(dataLayout)
-{
-}
-
-class MemoryMapper : public llvm::SectionMemoryManager::MemoryMapper
-{
-public:
-	MemoryMapper() {}
-	~MemoryMapper() final {}
-
-	llvm::sys::MemoryBlock allocateMappedMemory(
-	    llvm::SectionMemoryManager::AllocationPurpose purpose,
-	    size_t numBytes, const llvm::sys::MemoryBlock *const nearBlock,
-	    unsigned flags, std::error_code &errorCode) final
-	{
-		errorCode = std::error_code();
-
-		// Round up numBytes to page size.
-		size_t pageSize = rr::memoryPageSize();
-		numBytes = (numBytes + pageSize - 1) & ~(pageSize - 1);
-
-		bool need_exec =
-		    purpose == llvm::SectionMemoryManager::AllocationPurpose::Code;
-		void *addr = rr::allocateMemoryPages(
-		    numBytes, flagsToPermissions(flags), need_exec);
-		if(!addr)
-			return llvm::sys::MemoryBlock();
-		return llvm::sys::MemoryBlock(addr, numBytes);
-	}
-
-	std::error_code protectMappedMemory(const llvm::sys::MemoryBlock &block,
-	                                    unsigned flags)
-	{
-		// Round down base address to align with a page boundary. This matches
-		// DefaultMMapper behavior.
-		void *addr = block.base();
-#if LLVM_VERSION_MAJOR >= 9
-		size_t size = block.allocatedSize();
-#else
-		size_t size = block.size();
-#endif
-		size_t pageSize = rr::memoryPageSize();
-		addr = reinterpret_cast<void *>(
-		    reinterpret_cast<uintptr_t>(addr) & ~(pageSize - 1));
-		size += reinterpret_cast<uintptr_t>(block.base()) -
-		        reinterpret_cast<uintptr_t>(addr);
-
-		rr::protectMemoryPages(addr, size, flagsToPermissions(flags));
-		return std::error_code();
-	}
-
-	std::error_code releaseMappedMemory(llvm::sys::MemoryBlock &block)
-	{
-#if LLVM_VERSION_MAJOR >= 9
-		size_t size = block.allocatedSize();
-#else
-		size_t size = block.size();
-#endif
-
-		rr::deallocateMemoryPages(block.base(), size);
-		return std::error_code();
-	}
-
-private:
-	int flagsToPermissions(unsigned flags)
-	{
-		int result = 0;
-		if(flags & llvm::sys::Memory::MF_READ)
-		{
-			result |= rr::PERMISSION_READ;
-		}
-		if(flags & llvm::sys::Memory::MF_WRITE)
-		{
-			result |= rr::PERMISSION_WRITE;
-		}
-		if(flags & llvm::sys::Memory::MF_EXEC)
-		{
-			result |= rr::PERMISSION_EXECUTE;
-		}
-		return result;
-	}
-};
-
-// JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
-// object layer as each routine may require different target machine
-// settings and no Reactor routine directly links against another.
-class JITRoutine : public rr::Routine
-{
-#if LLVM_VERSION_MAJOR >= 8
-	using ObjLayer = llvm::orc::LegacyRTDyldObjectLinkingLayer;
-	using CompileLayer = llvm::orc::LegacyIRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
-#else
-	using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
-	using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
-#endif
-
-public:
-	JITRoutine(
-	    std::unique_ptr<llvm::Module> module,
-	    llvm::Function **funcs,
-	    size_t count,
-	    const rr::Config &config)
-	    : resolver(createLegacyLookupResolver(
-	          session,
-	          [&](const llvm::StringRef &name) {
-		          void *func = rr::resolveExternalSymbol(name.str().c_str());
-		          if(func != nullptr)
-		          {
-			          return llvm::JITSymbol(
-			              reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
-		          }
-		          return objLayer.findSymbol(name, true);
-	          },
-	          [](llvm::Error err) {
-		          if(err)
-		          {
-			          // TODO: Log the symbol resolution errors.
-			          return;
-		          }
-	          }))
-	    , targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel()))
-	    , compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine))
-	    , objLayer(
-	          session,
-	          [this](llvm::orc::VModuleKey) {
-		          return ObjLayer::Resources{ std::make_shared<llvm::SectionMemoryManager>(&memoryMapper), resolver };
-	          },
-	          ObjLayer::NotifyLoadedFtor(),
-	          [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
-#ifdef ENABLE_RR_DEBUG_INFO
-		          rr::DebugInfo::NotifyObjectEmitted(Obj, L);
-#endif  // ENABLE_RR_DEBUG_INFO
-	          },
-	          [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
-#ifdef ENABLE_RR_DEBUG_INFO
-		          rr::DebugInfo::NotifyFreeingObject(Obj);
-#endif  // ENABLE_RR_DEBUG_INFO
-	          })
-	    , addresses(count)
-	{
-		std::vector<std::string> mangledNames(count);
-		for(size_t i = 0; i < count; i++)
-		{
-			auto func = funcs[i];
-			static size_t numEmittedFunctions = 0;
-			std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
-			func->setName(name);
-			func->setLinkage(llvm::GlobalValue::ExternalLinkage);
-			func->setDoesNotThrow();
-
-			llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
-			llvm::Mangler::getNameWithPrefix(mangledNameStream, name, JITGlobals::get()->dataLayout);
-		}
-
-		auto moduleKey = session.allocateVModule();
-
-		// Once the module is passed to the compileLayer, the
-		// llvm::Functions are freed. Make sure funcs are not referenced
-		// after this point.
-		funcs = nullptr;
-
-		llvm::cantFail(compileLayer.addModule(moduleKey, std::move(module)));
-
-		// Resolve the function addresses.
-		for(size_t i = 0; i < count; i++)
-		{
-			auto symbol = compileLayer.findSymbolIn(moduleKey, mangledNames[i], false);
-			if(auto address = symbol.getAddress())
-			{
-				addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(address.get()));
-			}
-		}
-	}
-
-	const void *getEntry(int index) const override
-	{
-		return addresses[index];
-	}
-
-private:
-	std::shared_ptr<llvm::orc::SymbolResolver> resolver;
-	std::shared_ptr<llvm::TargetMachine> targetMachine;
-	llvm::orc::ExecutionSession session;
-	CompileLayer compileLayer;
-	MemoryMapper memoryMapper;
-	ObjLayer objLayer;
-	std::vector<const void *> addresses;
-};
-
-// JITBuilder holds all the LLVM state for building routines.
-class JITBuilder
-{
-public:
-	JITBuilder(const rr::Config &config)
-	    : config(config)
-	    , module(new llvm::Module("", context))
-	    , builder(new llvm::IRBuilder<>(context))
-	{
-		module->setDataLayout(JITGlobals::get()->dataLayout);
-	}
-
-	void optimize(const rr::Config &cfg)
-	{
-
-#ifdef ENABLE_RR_DEBUG_INFO
-		if(debugInfo != nullptr)
-		{
-			return;  // Don't optimize if we're generating debug info.
-		}
-#endif  // ENABLE_RR_DEBUG_INFO
-
-		std::unique_ptr<llvm::legacy::PassManager> passManager(
-		    new llvm::legacy::PassManager());
-
-		for(auto pass : cfg.getOptimization().getPasses())
-		{
-			switch(pass)
-			{
-				case rr::Optimization::Pass::Disabled: break;
-				case rr::Optimization::Pass::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
-				case rr::Optimization::Pass::LICM: passManager->add(llvm::createLICMPass()); break;
-				case rr::Optimization::Pass::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
-				case rr::Optimization::Pass::GVN: passManager->add(llvm::createGVNPass()); break;
-				case rr::Optimization::Pass::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
-				case rr::Optimization::Pass::Reassociate: passManager->add(llvm::createReassociatePass()); break;
-				case rr::Optimization::Pass::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
-				case rr::Optimization::Pass::SCCP: passManager->add(llvm::createSCCPPass()); break;
-				case rr::Optimization::Pass::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
-				case rr::Optimization::Pass::EarlyCSEPass: passManager->add(llvm::createEarlyCSEPass()); break;
-				default:
-					UNREACHABLE("pass: %d", int(pass));
-			}
-		}
-
-		passManager->run(*module);
-	}
-
-	std::shared_ptr<rr::Routine> acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
-	{
-		ASSERT(module);
-		return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg);
-	}
-
-	const rr::Config config;
-	llvm::LLVMContext context;
-	std::unique_ptr<llvm::Module> module;
-	std::unique_ptr<llvm::IRBuilder<>> builder;
-	llvm::Function *function = nullptr;
-
-	struct CoroutineState
-	{
-		llvm::Function *await = nullptr;
-		llvm::Function *destroy = nullptr;
-		llvm::Value *handle = nullptr;
-		llvm::Value *id = nullptr;
-		llvm::Value *promise = nullptr;
-		llvm::Type *yieldType = nullptr;
-		llvm::BasicBlock *entryBlock = nullptr;
-		llvm::BasicBlock *suspendBlock = nullptr;
-		llvm::BasicBlock *endBlock = nullptr;
-		llvm::BasicBlock *destroyBlock = nullptr;
-	};
-	CoroutineState coroutine;
-
-#ifdef ENABLE_RR_DEBUG_INFO
-	std::unique_ptr<rr::DebugInfo> debugInfo;
-#endif
-};
-
-std::unique_ptr<JITBuilder> jit;
-std::mutex codegenMutex;
-
 #ifdef ENABLE_RR_PRINT
 std::string replace(std::string str, const std::string &substr, const std::string &replacement)
 {
@@ -600,32 +88,6 @@
 }
 #endif  // ENABLE_RR_PRINT
 
-template<typename T>
-T alignUp(T val, T alignment)
-{
-	return alignment * ((val + alignment - 1) / alignment);
-}
-
-void *alignedAlloc(size_t size, size_t alignment)
-{
-	ASSERT(alignment < 256);
-	auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
-	auto aligned = allocation;
-	aligned += sizeof(uint8_t);                                                                       // Make space for the base-address offset.
-	aligned = reinterpret_cast<uint8_t *>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment));  // align
-	auto offset = static_cast<uint8_t>(aligned - allocation);
-	aligned[-1] = offset;
-	return aligned;
-}
-
-void alignedFree(void *ptr)
-{
-	auto aligned = reinterpret_cast<uint8_t *>(ptr);
-	auto offset = aligned[-1];
-	auto allocation = aligned - offset;
-	delete[] allocation;
-}
-
 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
 {
 	llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
@@ -1021,239 +483,6 @@
 	true,  // CoroutinesSupported
 };
 
-static std::memory_order atomicOrdering(llvm::AtomicOrdering memoryOrder)
-{
-	switch(memoryOrder)
-	{
-		case llvm::AtomicOrdering::Monotonic: return std::memory_order_relaxed;  // https://llvm.org/docs/Atomics.html#monotonic
-		case llvm::AtomicOrdering::Acquire: return std::memory_order_acquire;
-		case llvm::AtomicOrdering::Release: return std::memory_order_release;
-		case llvm::AtomicOrdering::AcquireRelease: return std::memory_order_acq_rel;
-		case llvm::AtomicOrdering::SequentiallyConsistent: return std::memory_order_seq_cst;
-		default:
-			UNREACHABLE("memoryOrder: %d", int(memoryOrder));
-			return std::memory_order_acq_rel;
-	}
-}
-
-static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
-{
-	if(!atomic)
-	{
-		return llvm::AtomicOrdering::NotAtomic;
-	}
-
-	switch(memoryOrder)
-	{
-		case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic;  // https://llvm.org/docs/Atomics.html#monotonic
-		case std::memory_order_consume: return llvm::AtomicOrdering::Acquire;    // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
-		case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
-		case std::memory_order_release: return llvm::AtomicOrdering::Release;
-		case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
-		case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
-		default:
-			UNREACHABLE("memoryOrder: %d", int(memoryOrder));
-			return llvm::AtomicOrdering::AcquireRelease;
-	}
-}
-
-template<typename T>
-static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
-{
-	*reinterpret_cast<T *>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), atomicOrdering(ordering));
-}
-
-template<typename T>
-static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
-{
-	std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), *reinterpret_cast<T *>(val), atomicOrdering(ordering));
-}
-
-#ifdef __ANDROID__
-template<typename F>
-static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
-{
-	// Build an arbitrary op out of looped CAS
-	for(;;)
-	{
-		uint32_t expected = *ptr;
-		uint32_t desired = f(expected, val);
-
-		if(expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
-			return expected;
-	}
-}
-#endif
-
-void *resolveExternalSymbol(const char *name)
-{
-	struct Atomic
-	{
-		static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
-		{
-			switch(size)
-			{
-				case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
-				case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
-				case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
-				case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
-				default:
-					UNIMPLEMENTED("Atomic::load(size: %d)", int(size));
-			}
-		}
-		static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
-		{
-			switch(size)
-			{
-				case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
-				case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
-				case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
-				case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
-				default:
-					UNIMPLEMENTED("Atomic::store(size: %d)", int(size));
-			}
-		}
-	};
-
-	struct F
-	{
-		static void nop() {}
-		static void neverCalled() { UNREACHABLE("Should never be called"); }
-
-		static void *coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
-		static void coroutine_free_frame(void *ptr) { alignedFree(ptr); }
-
-#ifdef __ANDROID__
-		// forwarders since we can't take address of builtins
-		static void sync_synchronize() { __sync_synchronize(); }
-		static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
-		static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
-		static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
-		static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
-		static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
-		static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
-		static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
-
-		static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val)
-		{
-			return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a, b); });
-		}
-		static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val)
-		{
-			return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a, b); });
-		}
-		static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val)
-		{
-			return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a, b); });
-		}
-		static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val)
-		{
-			return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a, b); });
-		}
-#endif
-	};
-
-	class Resolver
-	{
-	public:
-		using FunctionMap = std::unordered_map<std::string, void *>;
-
-		FunctionMap functions;
-
-		Resolver()
-		{
-			functions.emplace("nop", reinterpret_cast<void *>(F::nop));
-			functions.emplace("floorf", reinterpret_cast<void *>(floorf));
-			functions.emplace("nearbyintf", reinterpret_cast<void *>(nearbyintf));
-			functions.emplace("truncf", reinterpret_cast<void *>(truncf));
-			functions.emplace("printf", reinterpret_cast<void *>(printf));
-			functions.emplace("puts", reinterpret_cast<void *>(puts));
-			functions.emplace("fmodf", reinterpret_cast<void *>(fmodf));
-
-			functions.emplace("sinf", reinterpret_cast<void *>(sinf));
-			functions.emplace("cosf", reinterpret_cast<void *>(cosf));
-			functions.emplace("asinf", reinterpret_cast<void *>(asinf));
-			functions.emplace("acosf", reinterpret_cast<void *>(acosf));
-			functions.emplace("atanf", reinterpret_cast<void *>(atanf));
-			functions.emplace("sinhf", reinterpret_cast<void *>(sinhf));
-			functions.emplace("coshf", reinterpret_cast<void *>(coshf));
-			functions.emplace("tanhf", reinterpret_cast<void *>(tanhf));
-			functions.emplace("asinhf", reinterpret_cast<void *>(asinhf));
-			functions.emplace("acoshf", reinterpret_cast<void *>(acoshf));
-			functions.emplace("atanhf", reinterpret_cast<void *>(atanhf));
-			functions.emplace("atan2f", reinterpret_cast<void *>(atan2f));
-			functions.emplace("powf", reinterpret_cast<void *>(powf));
-			functions.emplace("expf", reinterpret_cast<void *>(expf));
-			functions.emplace("logf", reinterpret_cast<void *>(logf));
-			functions.emplace("exp2f", reinterpret_cast<void *>(exp2f));
-			functions.emplace("log2f", reinterpret_cast<void *>(log2f));
-
-			functions.emplace("sin", reinterpret_cast<void *>(static_cast<double (*)(double)>(sin)));
-			functions.emplace("cos", reinterpret_cast<void *>(static_cast<double (*)(double)>(cos)));
-			functions.emplace("asin", reinterpret_cast<void *>(static_cast<double (*)(double)>(asin)));
-			functions.emplace("acos", reinterpret_cast<void *>(static_cast<double (*)(double)>(acos)));
-			functions.emplace("atan", reinterpret_cast<void *>(static_cast<double (*)(double)>(atan)));
-			functions.emplace("sinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(sinh)));
-			functions.emplace("cosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(cosh)));
-			functions.emplace("tanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(tanh)));
-			functions.emplace("asinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(asinh)));
-			functions.emplace("acosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(acosh)));
-			functions.emplace("atanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(atanh)));
-			functions.emplace("atan2", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(atan2)));
-			functions.emplace("pow", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(pow)));
-			functions.emplace("exp", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp)));
-			functions.emplace("log", reinterpret_cast<void *>(static_cast<double (*)(double)>(log)));
-			functions.emplace("exp2", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp2)));
-			functions.emplace("log2", reinterpret_cast<void *>(static_cast<double (*)(double)>(log2)));
-
-			functions.emplace("atomic_load", reinterpret_cast<void *>(Atomic::load));
-			functions.emplace("atomic_store", reinterpret_cast<void *>(Atomic::store));
-
-			// FIXME (b/119409619): use an allocator here so we can control all memory allocations
-			functions.emplace("coroutine_alloc_frame", reinterpret_cast<void *>(F::coroutine_alloc_frame));
-			functions.emplace("coroutine_free_frame", reinterpret_cast<void *>(F::coroutine_free_frame));
-
-#ifdef __APPLE__
-			functions.emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
-#elif defined(__linux__)
-			functions.emplace("sincosf", reinterpret_cast<void *>(sincosf));
-#elif defined(_WIN64)
-			functions.emplace("chkstk", reinterpret_cast<void *>(__chkstk));
-#elif defined(_WIN32)
-			functions.emplace("chkstk", reinterpret_cast<void *>(_chkstk));
-#endif
-
-#ifdef __ANDROID__
-			functions.emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void *>(F::neverCalled));
-			functions.emplace("sync_synchronize", reinterpret_cast<void *>(F::sync_synchronize));
-			functions.emplace("sync_fetch_and_add_4", reinterpret_cast<void *>(F::sync_fetch_and_add_4));
-			functions.emplace("sync_fetch_and_and_4", reinterpret_cast<void *>(F::sync_fetch_and_and_4));
-			functions.emplace("sync_fetch_and_or_4", reinterpret_cast<void *>(F::sync_fetch_and_or_4));
-			functions.emplace("sync_fetch_and_xor_4", reinterpret_cast<void *>(F::sync_fetch_and_xor_4));
-			functions.emplace("sync_fetch_and_sub_4", reinterpret_cast<void *>(F::sync_fetch_and_sub_4));
-			functions.emplace("sync_lock_test_and_set_4", reinterpret_cast<void *>(F::sync_lock_test_and_set_4));
-			functions.emplace("sync_val_compare_and_swap_4", reinterpret_cast<void *>(F::sync_val_compare_and_swap_4));
-			functions.emplace("sync_fetch_and_max_4", reinterpret_cast<void *>(F::sync_fetch_and_max_4));
-			functions.emplace("sync_fetch_and_min_4", reinterpret_cast<void *>(F::sync_fetch_and_min_4));
-			functions.emplace("sync_fetch_and_umax_4", reinterpret_cast<void *>(F::sync_fetch_and_umax_4));
-			functions.emplace("sync_fetch_and_umin_4", reinterpret_cast<void *>(F::sync_fetch_and_umin_4));
-#endif
-		}
-	};
-
-	static Resolver resolver;
-
-	// Trim off any underscores from the start of the symbol. LLVM likes
-	// to append these on macOS.
-	const char *trimmed = name;
-	while(trimmed[0] == '_') { trimmed++; }
-
-	auto it = resolver.functions.find(trimmed);
-	// Missing functions will likely make the module fail in exciting non-obvious ways.
-	ASSERT_MSG(it != resolver.functions.end(), "Missing external function: '%s'", name);
-	return it->second;
-}
-
 // The abstract Type* types are implemented as LLVM types, except that
 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
 // and VFP in ARM, and eliminate the overhead of converting them to explicit
@@ -4632,58 +3861,72 @@
 {
 	return toInt({ v.value }, false);
 }
+
 std::vector<Value *> PrintValue::Ty<Byte4>::val(const RValue<Byte4> &v)
 {
 	return toInt(extractAll(v.value, 4), false);
 }
+
 std::vector<Value *> PrintValue::Ty<Int>::val(const RValue<Int> &v)
 {
 	return toInt({ v.value }, true);
 }
+
 std::vector<Value *> PrintValue::Ty<Int2>::val(const RValue<Int2> &v)
 {
 	return toInt(extractAll(v.value, 2), true);
 }
+
 std::vector<Value *> PrintValue::Ty<Int4>::val(const RValue<Int4> &v)
 {
 	return toInt(extractAll(v.value, 4), true);
 }
+
 std::vector<Value *> PrintValue::Ty<UInt>::val(const RValue<UInt> &v)
 {
 	return toInt({ v.value }, false);
 }
+
 std::vector<Value *> PrintValue::Ty<UInt2>::val(const RValue<UInt2> &v)
 {
 	return toInt(extractAll(v.value, 2), false);
 }
+
 std::vector<Value *> PrintValue::Ty<UInt4>::val(const RValue<UInt4> &v)
 {
 	return toInt(extractAll(v.value, 4), false);
 }
+
 std::vector<Value *> PrintValue::Ty<Short>::val(const RValue<Short> &v)
 {
 	return toInt({ v.value }, true);
 }
+
 std::vector<Value *> PrintValue::Ty<Short4>::val(const RValue<Short4> &v)
 {
 	return toInt(extractAll(v.value, 4), true);
 }
+
 std::vector<Value *> PrintValue::Ty<UShort>::val(const RValue<UShort> &v)
 {
 	return toInt({ v.value }, false);
 }
+
 std::vector<Value *> PrintValue::Ty<UShort4>::val(const RValue<UShort4> &v)
 {
 	return toInt(extractAll(v.value, 4), false);
 }
+
 std::vector<Value *> PrintValue::Ty<Float>::val(const RValue<Float> &v)
 {
 	return toDouble({ v.value });
 }
+
 std::vector<Value *> PrintValue::Ty<Float4>::val(const RValue<Float4> &v)
 {
 	return toDouble(extractAll(v.value, 4));
 }
+
 std::vector<Value *> PrintValue::Ty<const char *>::val(const char *v)
 {
 	return { V(jit->builder->CreateGlobalStringPtr(v)) };

diff --git a/src/Reactor/LLVMReactor.hpp b/src/Reactor/LLVMReactor.hpp
index 0b91ef8..db7df10 100644
--- a/src/Reactor/LLVMReactor.hpp
+++ b/src/Reactor/LLVMReactor.hpp

@@ -15,10 +15,31 @@
 #ifndef rr_LLVMReactor_hpp
 #define rr_LLVMReactor_hpp
 
-namespace llvm {
+#include "Nucleus.hpp"
 
-class Type;
-class Value;
+#include "Debug.hpp"
+#include "LLVMReactorDebugInfo.hpp"
+#include "Print.hpp"
+
+#ifdef _MSC_VER
+__pragma(warning(push))
+    __pragma(warning(disable : 4146))  // unary minus operator applied to unsigned type, result still unsigned
+#endif
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+
+#ifdef _MSC_VER
+    __pragma(warning(pop))
+#endif
+
+#include <memory>
+
+        namespace llvm
+{
+
+	class Type;
+	class Value;
 
 }  // namespace llvm
 
@@ -49,6 +70,81 @@
 // effect.
 void Nop();
 
+class Routine;
+class Config;
+
+// JITBuilder holds all the LLVM state for building routines.
+class JITBuilder
+{
+public:
+	JITBuilder(const rr::Config &config);
+
+	void optimize(const rr::Config &cfg);
+
+	std::shared_ptr<rr::Routine> acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg);
+
+	const Config config;
+	llvm::LLVMContext context;
+	std::unique_ptr<llvm::Module> module;
+	std::unique_ptr<llvm::IRBuilder<>> builder;
+	llvm::Function *function = nullptr;
+
+	struct CoroutineState
+	{
+		llvm::Function *await = nullptr;
+		llvm::Function *destroy = nullptr;
+		llvm::Value *handle = nullptr;
+		llvm::Value *id = nullptr;
+		llvm::Value *promise = nullptr;
+		llvm::Type *yieldType = nullptr;
+		llvm::BasicBlock *entryBlock = nullptr;
+		llvm::BasicBlock *suspendBlock = nullptr;
+		llvm::BasicBlock *endBlock = nullptr;
+		llvm::BasicBlock *destroyBlock = nullptr;
+	};
+	CoroutineState coroutine;
+
+#ifdef ENABLE_RR_DEBUG_INFO
+	std::unique_ptr<rr::DebugInfo> debugInfo;
+#endif
+};
+
+inline std::memory_order atomicOrdering(llvm::AtomicOrdering memoryOrder)
+{
+	switch(memoryOrder)
+	{
+		case llvm::AtomicOrdering::Monotonic: return std::memory_order_relaxed;  // https://llvm.org/docs/Atomics.html#monotonic
+		case llvm::AtomicOrdering::Acquire: return std::memory_order_acquire;
+		case llvm::AtomicOrdering::Release: return std::memory_order_release;
+		case llvm::AtomicOrdering::AcquireRelease: return std::memory_order_acq_rel;
+		case llvm::AtomicOrdering::SequentiallyConsistent: return std::memory_order_seq_cst;
+		default:
+			UNREACHABLE("memoryOrder: %d", int(memoryOrder));
+			return std::memory_order_acq_rel;
+	}
+}
+
+inline llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
+{
+	if(!atomic)
+	{
+		return llvm::AtomicOrdering::NotAtomic;
+	}
+
+	switch(memoryOrder)
+	{
+		case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic;  // https://llvm.org/docs/Atomics.html#monotonic
+		case std::memory_order_consume: return llvm::AtomicOrdering::Acquire;    // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
+		case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
+		case std::memory_order_release: return llvm::AtomicOrdering::Release;
+		case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
+		case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
+		default:
+			UNREACHABLE("memoryOrder: %d", int(memoryOrder));
+			return llvm::AtomicOrdering::AcquireRelease;
+	}
+}
+
 }  // namespace rr
 
 #endif  // rr_LLVMReactor_hpp

diff --git a/src/Reactor/LLVMReactorDebugInfo.cpp b/src/Reactor/LLVMReactorDebugInfo.cpp
index f36f9c9..21a759c 100644
--- a/src/Reactor/LLVMReactorDebugInfo.cpp
+++ b/src/Reactor/LLVMReactorDebugInfo.cpp

@@ -21,12 +21,22 @@
 
 #	include "boost/stacktrace.hpp"
 
+// TODO(b/143539525): Eliminate when warning has been fixed.
+#	ifdef _MSC_VER
+__pragma(warning(push))
+    __pragma(warning(disable : 4146))  // unary minus operator applied to unsigned type, result still unsigned
+#	endif
+
 #	include "llvm/Demangle/Demangle.h"
 #	include "llvm/ExecutionEngine/JITEventListener.h"
 #	include "llvm/IR/DIBuilder.h"
 #	include "llvm/IR/IRBuilder.h"
 #	include "llvm/IR/Intrinsics.h"
 
+#	ifdef _MSC_VER
+    __pragma(warning(pop))
+#	endif
+
 #	include <cctype>
 #	include <fstream>
 #	include <mutex>
@@ -40,17 +50,18 @@
 #		define LOG(msg, ...)
 #	endif
 
-namespace {
-
-std::pair<llvm::StringRef, llvm::StringRef> splitPath(const char *path)
+        namespace
 {
-	return llvm::StringRef(path).rsplit('/');
-}
 
-// Note: createGDBRegistrationListener() returns a pointer to a singleton.
-// Nothing is actually created.
-auto jitEventListener = llvm::JITEventListener::createGDBRegistrationListener();  // guarded by jitEventListenerMutex
-std::mutex jitEventListenerMutex;
+	std::pair<llvm::StringRef, llvm::StringRef> splitPath(const char *path)
+	{
+		return llvm::StringRef(path).rsplit('/');
+	}
+
+	// Note: createGDBRegistrationListener() returns a pointer to a singleton.
+	// Nothing is actually created.
+	auto jitEventListener = llvm::JITEventListener::createGDBRegistrationListener();  // guarded by jitEventListenerMutex
+	std::mutex jitEventListenerMutex;
 
 }  // namespace
 

diff --git a/src/Reactor/Reactor.vcxproj b/src/Reactor/Reactor.vcxproj
index 31723c3..c2c4299 100644
--- a/src/Reactor/Reactor.vcxproj
+++ b/src/Reactor/Reactor.vcxproj

@@ -287,6 +287,7 @@
     <ClCompile Include="CPUID.cpp" />

     <ClCompile Include="Debug.cpp" />

     <ClCompile Include="LLVMReactor.cpp" />

+    <ClCompile Include="LLVMJIT.cpp" />

     <ClCompile Include="LLVMReactorDebugInfo.cpp" />

     <ClCompile Include="ExecutableMemory.cpp" />

     <ClCompile Include="Reactor.cpp" />


diff --git a/src/Reactor/Reactor.vcxproj.filters b/src/Reactor/Reactor.vcxproj.filters
index 09796d9..15d7c4a 100644
--- a/src/Reactor/Reactor.vcxproj.filters
+++ b/src/Reactor/Reactor.vcxproj.filters

@@ -18,6 +18,9 @@
     <ClCompile Include="LLVMReactor.cpp">

       <Filter>Source Files</Filter>

     </ClCompile>

+    <ClCompile Include="LLVMJIT.cpp">

+      <Filter>Source Files</Filter>

+    </ClCompile>

     <ClCompile Include="LLVMReactorDebugInfo.cpp">

       <Filter>Source Files</Filter>

     </ClCompile>
commit	41a7302e9c62c207e5ac5bcce40f16a1e418c42b	[log] [tgz]
author	Nicolas Capens <capn@google.com>	Thu Jan 30 00:30:14 2020 -0500
committer	Nicolas Capens <nicolascapens@google.com>	Fri Jan 31 19:02:57 2020 +0000
tree	82e3432ccf458653aad10f3f679daf23521599c9
parent	55890e1afe33d3fd47c9dd5c0e625e4d1b84d88b [diff]