blob: 07dc65440c3e15ad7996f0854b7d9b3a245d452e [file] [log] [blame]
// Copyright 2020 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "LLVMReactor.hpp"
#include "Debug.hpp"
#include "ExecutableMemory.hpp"
#include "LLVMAsm.hpp"
#include "PragmaInternals.hpp"
#include "Routine.hpp"
// TODO(b/143539525): Eliminate when warning has been fixed.
#ifdef _MSC_VER
__pragma(warning(push))
__pragma(warning(disable : 4146)) // unary minus operator applied to unsigned type, result still unsigned
#endif
// See https://groups.google.com/g/llvm-dev/c/CAE7Va57h2c/m/74ITeXFEAQAJ
// for information about `RTDyldObjectLinkingLayer` vs `ObjectLinkingLayer`.
// On RISC-V, only `ObjectLinkingLayer` is supported.
#if defined(__riscv)
#define USE_LEGACY_OBJECT_LINKING_LAYER 0
#else
#define USE_LEGACY_OBJECT_LINKING_LAYER 1
#endif
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#if USE_LEGACY_OBJECT_LINKING_LAYER
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#else
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#endif
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#if LLVM_VERSION_MAJOR >= 13 // New pass manager
# include "llvm/IR/PassManager.h"
# include "llvm/Passes/PassBuilder.h"
# include "llvm/Transforms/Scalar/ADCE.h"
# include "llvm/Transforms/Scalar/DeadStoreElimination.h"
# include "llvm/Transforms/Scalar/EarlyCSE.h"
# include "llvm/Transforms/Scalar/LICM.h"
# include "llvm/Transforms/Scalar/Reassociate.h"
# include "llvm/Transforms/Scalar/SCCP.h"
# include "llvm/Transforms/Scalar/SROA.h"
# include "llvm/Transforms/Scalar/SimplifyCFG.h"
#else // Legacy pass manager
# include "llvm/IR/LegacyPassManager.h"
# include "llvm/Pass.h"
# include "llvm/Transforms/Coroutines.h"
# include "llvm/Transforms/IPO.h"
#endif
#ifdef _MSC_VER
__pragma(warning(pop))
#endif
#if defined(__unix__) || defined(__APPLE__) || defined(__Fuchsia__)
# define ADDRESS_SANITIZER_INSTRUMENTATION_SUPPORTED true
# if __has_feature(memory_sanitizer) || __has_feature(address_sanitizer)
# include <dlfcn.h> // dlsym()
# endif
#else
# define ADDRESS_SANITIZER_INSTRUMENTATION_SUPPORTED false
#endif
#ifndef REACTOR_ASM_EMIT_DIR
# define REACTOR_ASM_EMIT_DIR "./"
#endif
#if defined(_WIN64)
extern "C" void __chkstk();
#elif defined(_WIN32)
extern "C" void _chkstk();
#endif
#ifdef __ARM_EABI__
extern "C" signed __aeabi_idivmod();
#endif
#if __has_feature(memory_sanitizer)
# include "sanitizer/msan_interface.h"
// MemorySanitizer uses thread-local storage (TLS) data arrays for passing around
// the 'shadow' values of function arguments and return values. The LLVM JIT can't
// access TLS directly, but it calls __emutls_get_address() to obtain the address.
// Typically, it would be passed a pointer to an __emutls_control structure with a
// name starting with "__emutls_v." that represents the TLS. Both the address of
// __emutls_get_address and the __emutls_v. structures are provided to the JIT by
// the symbol resolver, which can be overridden.
// We take advantage of this by substituting __emutls_get_address() with our own
// implementation, namely rr::getTLSAddress(), and substituting the __emutls_v
// variables with rr::MSanTLS enums. getTLSAddress() can then provide the address
// of the real TLS variable corresponding to the enum, in statically compiled C++.
// Forward declare the real TLS variables used by MemorySanitizer. These are
// defined in llvm-project/compiler-rt/lib/msan/msan.cpp.
extern __thread unsigned long long __msan_param_tls[];
extern __thread unsigned int __msan_param_origin_tls[];
extern __thread unsigned long long __msan_retval_tls[];
extern __thread unsigned int __msan_retval_origin_tls;
extern __thread unsigned long long __msan_va_arg_tls[];
extern __thread unsigned int __msan_va_arg_origin_tls[];
extern __thread unsigned long long __msan_va_arg_overflow_size_tls;
extern __thread unsigned int __msan_origin_tls;
namespace rr {
enum class MSanTLS
{
param = 1, // __msan_param_tls
param_origin, //__msan_param_origin_tls
retval, // __msan_retval_tls
retval_origin, //__msan_retval_origin_tls
va_arg, // __msan_va_arg_tls
va_arg_origin, // __msan_va_arg_origin_tls
va_arg_overflow_size, // __msan_va_arg_overflow_size_tls
origin, //__msan_origin_tls
};
static void *getTLSAddress(void *control)
{
auto tlsIndex = static_cast<MSanTLS>(reinterpret_cast<uintptr_t>(control));
switch(tlsIndex)
{
case MSanTLS::param: return reinterpret_cast<void *>(&__msan_param_tls);
case MSanTLS::param_origin: return reinterpret_cast<void *>(&__msan_param_origin_tls);
case MSanTLS::retval: return reinterpret_cast<void *>(&__msan_retval_tls);
case MSanTLS::retval_origin: return reinterpret_cast<void *>(&__msan_retval_origin_tls);
case MSanTLS::va_arg: return reinterpret_cast<void *>(&__msan_va_arg_tls);
case MSanTLS::va_arg_origin: return reinterpret_cast<void *>(&__msan_va_arg_origin_tls);
case MSanTLS::va_arg_overflow_size: return reinterpret_cast<void *>(&__msan_va_arg_overflow_size_tls);
case MSanTLS::origin: return reinterpret_cast<void *>(&__msan_origin_tls);
default:
UNSUPPORTED("MemorySanitizer used an unrecognized TLS variable: %d", static_cast<int>(tlsIndex));
return nullptr;
}
}
} // namespace rr
#endif
namespace {
// TODO(b/174587935): Eliminate command-line parsing.
bool parseCommandLineOptionsOnce(int argc, const char *const *argv)
{
// Use a static immediately invoked lambda to make this thread safe
static auto initialized = [=]() {
return llvm::cl::ParseCommandLineOptions(argc, argv);
}();
return initialized;
}
// JITGlobals is a singleton that holds all the immutable machine specific
// information for the host device.
class JITGlobals
{
public:
static JITGlobals *get();
llvm::orc::JITTargetMachineBuilder getTargetMachineBuilder() const;
const llvm::DataLayout &getDataLayout() const;
const llvm::Triple &getTargetTriple() const;
private:
JITGlobals(llvm::orc::JITTargetMachineBuilder &&jitTargetMachineBuilder, llvm::DataLayout &&dataLayout);
static llvm::CodeGenOpt::Level toLLVM(int level);
const llvm::orc::JITTargetMachineBuilder jitTargetMachineBuilder;
const llvm::DataLayout dataLayout;
};
JITGlobals *JITGlobals::get()
{
static JITGlobals instance = [] {
const char *argv[] = {
"Reactor",
#if defined(__i386__) || defined(__x86_64__)
"-x86-asm-syntax=intel", // Use Intel syntax rather than the default AT&T
#endif
#if LLVM_VERSION_MAJOR <= 12
"-warn-stack-size=524288", // Warn when a function uses more than 512 KiB of stack memory
#endif
};
parseCommandLineOptionsOnce(sizeof(argv) / sizeof(argv[0]), argv);
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
llvm::InitializeNativeTargetAsmParser();
// TODO(b/171236524): JITTargetMachineBuilder::detectHost() currently uses the target triple of the host,
// rather than a valid triple for the current process. Once fixed, we can use that function instead.
llvm::orc::JITTargetMachineBuilder jitTargetMachineBuilder(llvm::Triple(LLVM_DEFAULT_TARGET_TRIPLE));
// Retrieve host CPU name and sub-target features and add them to builder.
// Relocation model, code model and codegen opt level are kept to default values.
llvm::StringMap<bool> cpuFeatures;
bool ok = llvm::sys::getHostCPUFeatures(cpuFeatures);
#if defined(__i386__) || defined(__x86_64__) || \
(defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
#else
(void)ok; // getHostCPUFeatures always returns false on other platforms
#endif
for(auto &feature : cpuFeatures)
{
jitTargetMachineBuilder.getFeatures().AddFeature(feature.first(), feature.second);
}
#if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
#if defined(__riscv) && __riscv_xlen == 64
// jitTargetMachineBuilder.getFeatures() on RISC-V does
// not return the RISC-V CPU extensions, so they are
// manually added.
jitTargetMachineBuilder.getFeatures().AddFeature("+m");
jitTargetMachineBuilder.getFeatures().AddFeature("+a");
jitTargetMachineBuilder.getFeatures().AddFeature("+f");
jitTargetMachineBuilder.getFeatures().AddFeature("+d");
jitTargetMachineBuilder.getFeatures().AddFeature("+c");
// The default code model is "Small".
// On RISC-V, using the default code model results in an
// "Unsupported riscv relocation" error.
jitTargetMachineBuilder.setCodeModel(llvm::CodeModel::Medium);
#endif
jitTargetMachineBuilder.setCPU(std::string(llvm::sys::getHostCPUName()));
#else
jitTargetMachineBuilder.setCPU(llvm::sys::getHostCPUName());
#endif
// Reactor's MemorySanitizer support depends on intercepting __emutls_get_address calls.
ASSERT(!__has_feature(memory_sanitizer) || (jitTargetMachineBuilder.getOptions().ExplicitEmulatedTLS &&
jitTargetMachineBuilder.getOptions().EmulatedTLS));
auto dataLayout = jitTargetMachineBuilder.getDefaultDataLayoutForTarget();
ASSERT_MSG(dataLayout, "JITTargetMachineBuilder::getDefaultDataLayoutForTarget() failed");
return JITGlobals(std::move(jitTargetMachineBuilder), std::move(dataLayout.get()));
}();
return &instance;
}
llvm::orc::JITTargetMachineBuilder JITGlobals::getTargetMachineBuilder() const
{
llvm::orc::JITTargetMachineBuilder out = jitTargetMachineBuilder;
out.setCodeGenOptLevel(toLLVM(rr::getPragmaState(rr::OptimizationLevel)));
return out;
}
const llvm::DataLayout &JITGlobals::getDataLayout() const
{
return dataLayout;
}
const llvm::Triple &JITGlobals::getTargetTriple() const
{
return jitTargetMachineBuilder.getTargetTriple();
}
JITGlobals::JITGlobals(llvm::orc::JITTargetMachineBuilder &&jitTargetMachineBuilder, llvm::DataLayout &&dataLayout)
: jitTargetMachineBuilder(jitTargetMachineBuilder)
, dataLayout(dataLayout)
{
}
llvm::CodeGenOpt::Level JITGlobals::toLLVM(int level)
{
// TODO(b/173257647): MemorySanitizer instrumentation produces IR which takes
// a lot longer to process by the machine code optimization passes. Disabling
// them has a negligible effect on code quality but compiles much faster.
if(__has_feature(memory_sanitizer))
{
return llvm::CodeGenOpt::None;
}
switch(level)
{
case 0: return llvm::CodeGenOpt::None;
case 1: return llvm::CodeGenOpt::Less;
case 2: return llvm::CodeGenOpt::Default;
case 3: return llvm::CodeGenOpt::Aggressive;
default: UNREACHABLE("Unknown Optimization Level %d", int(level));
}
return llvm::CodeGenOpt::Default;
}
class MemoryMapper final : public llvm::SectionMemoryManager::MemoryMapper
{
public:
MemoryMapper() {}
~MemoryMapper() final {}
llvm::sys::MemoryBlock allocateMappedMemory(
llvm::SectionMemoryManager::AllocationPurpose purpose,
size_t numBytes, const llvm::sys::MemoryBlock *const nearBlock,
unsigned flags, std::error_code &errorCode) final
{
errorCode = std::error_code();
// Round up numBytes to page size.
size_t pageSize = rr::memoryPageSize();
numBytes = (numBytes + pageSize - 1) & ~(pageSize - 1);
bool need_exec =
purpose == llvm::SectionMemoryManager::AllocationPurpose::Code;
void *addr = rr::allocateMemoryPages(
numBytes, flagsToPermissions(flags), need_exec);
if(!addr)
return llvm::sys::MemoryBlock();
return llvm::sys::MemoryBlock(addr, numBytes);
}
std::error_code protectMappedMemory(const llvm::sys::MemoryBlock &block,
unsigned flags)
{
// Round down base address to align with a page boundary. This matches
// DefaultMMapper behavior.
void *addr = block.base();
size_t size = block.allocatedSize();
size_t pageSize = rr::memoryPageSize();
addr = reinterpret_cast<void *>(
reinterpret_cast<uintptr_t>(addr) & ~(pageSize - 1));
size += reinterpret_cast<uintptr_t>(block.base()) -
reinterpret_cast<uintptr_t>(addr);
rr::protectMemoryPages(addr, size, flagsToPermissions(flags));
return std::error_code();
}
std::error_code releaseMappedMemory(llvm::sys::MemoryBlock &block)
{
size_t size = block.allocatedSize();
rr::deallocateMemoryPages(block.base(), size);
return std::error_code();
}
private:
int flagsToPermissions(unsigned flags)
{
int result = 0;
if(flags & llvm::sys::Memory::MF_READ)
{
result |= rr::PERMISSION_READ;
}
if(flags & llvm::sys::Memory::MF_WRITE)
{
result |= rr::PERMISSION_WRITE;
}
if(flags & llvm::sys::Memory::MF_EXEC)
{
result |= rr::PERMISSION_EXECUTE;
}
return result;
}
};
template<typename T>
T alignUp(T val, T alignment)
{
return alignment * ((val + alignment - 1) / alignment);
}
void *alignedAlloc(size_t size, size_t alignment)
{
ASSERT(alignment < 256);
auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
auto aligned = allocation;
aligned += sizeof(uint8_t); // Make space for the base-address offset.
aligned = reinterpret_cast<uint8_t *>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment)); // align
auto offset = static_cast<uint8_t>(aligned - allocation);
aligned[-1] = offset;
return aligned;
}
void alignedFree(void *ptr)
{
auto aligned = reinterpret_cast<uint8_t *>(ptr);
auto offset = aligned[-1];
auto allocation = aligned - offset;
delete[] allocation;
}
template<typename T>
static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
{
*reinterpret_cast<T *>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), rr::atomicOrdering(ordering));
}
template<typename T>
static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
{
std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), *reinterpret_cast<T *>(val), rr::atomicOrdering(ordering));
}
#ifdef __ANDROID__
template<typename F>
static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
{
// Build an arbitrary op out of looped CAS
for(;;)
{
uint32_t expected = *ptr;
uint32_t desired = f(expected, val);
if(expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
{
return expected;
}
}
}
#endif
#if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
class ExternalSymbolGenerator : public llvm::orc::DefinitionGenerator
#else
class ExternalSymbolGenerator : public llvm::orc::JITDylib::DefinitionGenerator
#endif
{
struct Atomic
{
static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
{
switch(size)
{
case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
default:
UNIMPLEMENTED_NO_BUG("Atomic::load(size: %d)", int(size));
}
}
static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
{
switch(size)
{
case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
default:
UNIMPLEMENTED_NO_BUG("Atomic::store(size: %d)", int(size));
}
}
};
static void nop() {}
static void neverCalled() { UNREACHABLE("Should never be called"); }
static void *coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
static void coroutine_free_frame(void *ptr) { alignedFree(ptr); }
#ifdef __ANDROID__
// forwarders since we can't take address of builtins
static void sync_synchronize() { __sync_synchronize(); }
static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val)
{
return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a, b); });
}
static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val)
{
return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a, b); });
}
static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val)
{
return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a, b); });
}
static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val)
{
return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a, b); });
}
#endif
class Resolver
{
public:
using FunctionMap = llvm::StringMap<void *>;
FunctionMap functions;
Resolver()
{
#ifdef ENABLE_RR_PRINT
functions.try_emplace("rr::DebugPrintf", reinterpret_cast<void *>(rr::DebugPrintf));
#endif
functions.try_emplace("nop", reinterpret_cast<void *>(nop));
functions.try_emplace("floorf", reinterpret_cast<void *>(floorf));
functions.try_emplace("nearbyintf", reinterpret_cast<void *>(nearbyintf));
functions.try_emplace("truncf", reinterpret_cast<void *>(truncf));
functions.try_emplace("printf", reinterpret_cast<void *>(printf));
functions.try_emplace("puts", reinterpret_cast<void *>(puts));
functions.try_emplace("fmodf", reinterpret_cast<void *>(fmodf));
functions.try_emplace("sinf", reinterpret_cast<void *>(sinf));
functions.try_emplace("cosf", reinterpret_cast<void *>(cosf));
functions.try_emplace("asinf", reinterpret_cast<void *>(asinf));
functions.try_emplace("acosf", reinterpret_cast<void *>(acosf));
functions.try_emplace("atanf", reinterpret_cast<void *>(atanf));
functions.try_emplace("sinhf", reinterpret_cast<void *>(sinhf));
functions.try_emplace("coshf", reinterpret_cast<void *>(coshf));
functions.try_emplace("tanhf", reinterpret_cast<void *>(tanhf));
functions.try_emplace("asinhf", reinterpret_cast<void *>(asinhf));
functions.try_emplace("acoshf", reinterpret_cast<void *>(acoshf));
functions.try_emplace("atanhf", reinterpret_cast<void *>(atanhf));
functions.try_emplace("atan2f", reinterpret_cast<void *>(atan2f));
functions.try_emplace("powf", reinterpret_cast<void *>(powf));
functions.try_emplace("expf", reinterpret_cast<void *>(expf));
functions.try_emplace("logf", reinterpret_cast<void *>(logf));
functions.try_emplace("exp2f", reinterpret_cast<void *>(exp2f));
functions.try_emplace("log2f", reinterpret_cast<void *>(log2f));
functions.try_emplace("fmaf", reinterpret_cast<void *>(fmaf));
functions.try_emplace("fmod", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(fmod)));
functions.try_emplace("sin", reinterpret_cast<void *>(static_cast<double (*)(double)>(sin)));
functions.try_emplace("cos", reinterpret_cast<void *>(static_cast<double (*)(double)>(cos)));
functions.try_emplace("asin", reinterpret_cast<void *>(static_cast<double (*)(double)>(asin)));
functions.try_emplace("acos", reinterpret_cast<void *>(static_cast<double (*)(double)>(acos)));
functions.try_emplace("atan", reinterpret_cast<void *>(static_cast<double (*)(double)>(atan)));
functions.try_emplace("sinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(sinh)));
functions.try_emplace("cosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(cosh)));
functions.try_emplace("tanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(tanh)));
functions.try_emplace("asinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(asinh)));
functions.try_emplace("acosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(acosh)));
functions.try_emplace("atanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(atanh)));
functions.try_emplace("atan2", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(atan2)));
functions.try_emplace("pow", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(pow)));
functions.try_emplace("exp", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp)));
functions.try_emplace("log", reinterpret_cast<void *>(static_cast<double (*)(double)>(log)));
functions.try_emplace("exp2", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp2)));
functions.try_emplace("log2", reinterpret_cast<void *>(static_cast<double (*)(double)>(log2)));
functions.try_emplace("atomic_load", reinterpret_cast<void *>(Atomic::load));
functions.try_emplace("atomic_store", reinterpret_cast<void *>(Atomic::store));
// FIXME(b/119409619): use an allocator here so we can control all memory allocations
functions.try_emplace("coroutine_alloc_frame", reinterpret_cast<void *>(coroutine_alloc_frame));
functions.try_emplace("coroutine_free_frame", reinterpret_cast<void *>(coroutine_free_frame));
functions.try_emplace("memset", reinterpret_cast<void *>(memset));
#ifdef __APPLE__
functions.try_emplace("__sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
#elif defined(__linux__)
functions.try_emplace("sincosf", reinterpret_cast<void *>(sincosf));
#elif defined(_WIN64)
functions.try_emplace("__chkstk", reinterpret_cast<void *>(__chkstk));
#elif defined(_WIN32)
functions.try_emplace("_chkstk", reinterpret_cast<void *>(_chkstk));
#endif
#ifdef __ARM_EABI__
functions.try_emplace("__aeabi_idivmod", reinterpret_cast<void *>(__aeabi_idivmod));
#endif
#ifdef __ANDROID__
functions.try_emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void *>(neverCalled));
functions.try_emplace("sync_synchronize", reinterpret_cast<void *>(sync_synchronize));
functions.try_emplace("sync_fetch_and_add_4", reinterpret_cast<void *>(sync_fetch_and_add_4));
functions.try_emplace("sync_fetch_and_and_4", reinterpret_cast<void *>(sync_fetch_and_and_4));
functions.try_emplace("sync_fetch_and_or_4", reinterpret_cast<void *>(sync_fetch_and_or_4));
functions.try_emplace("sync_fetch_and_xor_4", reinterpret_cast<void *>(sync_fetch_and_xor_4));
functions.try_emplace("sync_fetch_and_sub_4", reinterpret_cast<void *>(sync_fetch_and_sub_4));
functions.try_emplace("sync_lock_test_and_set_4", reinterpret_cast<void *>(sync_lock_test_and_set_4));
functions.try_emplace("sync_val_compare_and_swap_4", reinterpret_cast<void *>(sync_val_compare_and_swap_4));
functions.try_emplace("sync_fetch_and_max_4", reinterpret_cast<void *>(sync_fetch_and_max_4));
functions.try_emplace("sync_fetch_and_min_4", reinterpret_cast<void *>(sync_fetch_and_min_4));
functions.try_emplace("sync_fetch_and_umax_4", reinterpret_cast<void *>(sync_fetch_and_umax_4));
functions.try_emplace("sync_fetch_and_umin_4", reinterpret_cast<void *>(sync_fetch_and_umin_4));
# if defined(__i386__)
// TODO(b/172974501): Workaround for an x86-32 issue where an R_386_PC32 relocation is used
// When calling a C function from Reactor code, who's address is not associated with any symbol
// (since it's an absolute constant), but it still invokes the symbol resolver for "".
functions.try_emplace("", nullptr);
# endif
#endif
#if __has_feature(memory_sanitizer)
functions.try_emplace("__emutls_get_address", reinterpret_cast<void *>(rr::getTLSAddress));
functions.try_emplace("__emutls_v.__msan_param_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::param)));
functions.try_emplace("__emutls_v.__msan_param_origin_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::param_origin)));
functions.try_emplace("__emutls_v.__msan_retval_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::retval)));
functions.try_emplace("__emutls_v.__msan_retval_origin_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::retval_origin)));
functions.try_emplace("__emutls_v.__msan_va_arg_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg)));
functions.try_emplace("__emutls_v.__msan_va_arg_origin_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg_origin)));
functions.try_emplace("__emutls_v.__msan_va_arg_overflow_size_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg_overflow_size)));
functions.try_emplace("__emutls_v.__msan_origin_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::origin)));
functions.try_emplace("__msan_unpoison", reinterpret_cast<void *>(__msan_unpoison));
functions.try_emplace("__msan_unpoison_param", reinterpret_cast<void *>(__msan_unpoison_param));
#endif
}
};
llvm::Error tryToGenerate(
#if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
llvm::orc::LookupState &state,
#endif
llvm::orc::LookupKind kind,
llvm::orc::JITDylib &dylib,
llvm::orc::JITDylibLookupFlags flags,
const llvm::orc::SymbolLookupSet &set) override
{
static Resolver resolver;
llvm::orc::SymbolMap symbols;
#if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
std::string missing;
#endif // !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
for(auto symbol : set)
{
auto name = symbol.first;
#if defined(__APPLE__)
// Trim the underscore from the start of the symbol. LLVM adds it for Mach-O mangling convention.
ASSERT((*name)[0] == '_');
auto unmangled = (*name).drop_front(1);
#else
auto unmangled = *name;
#endif
auto it = resolver.functions.find(unmangled.str());
if(it != resolver.functions.end())
{
symbols[name] = llvm::JITEvaluatedSymbol(
static_cast<llvm::JITTargetAddress>(reinterpret_cast<uintptr_t>(it->second)),
llvm::JITSymbolFlags::Exported);
continue;
}
#if __has_feature(memory_sanitizer) || (__has_feature(address_sanitizer) && ADDRESS_SANITIZER_INSTRUMENTATION_SUPPORTED)
// Sanitizers use a dynamically linked runtime. Instrumented routines reference some
// symbols from this library. Look them up dynamically in the default namespace.
// Note this approach should not be used for other symbols, since they might not be
// visible (e.g. due to static linking), we may wish to provide an alternate
// implementation, and/or it would be a security vulnerability.
void *address = dlsym(RTLD_DEFAULT, unmangled.data());
if(address)
{
symbols[name] = llvm::JITEvaluatedSymbol(
static_cast<llvm::JITTargetAddress>(reinterpret_cast<uintptr_t>(address)),
llvm::JITSymbolFlags::Exported);
continue;
}
#endif
#if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
missing += (missing.empty() ? "'" : ", '") + unmangled.str() + "'";
#endif
}
#if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
// Missing functions will likely make the module fail in non-obvious ways.
if(!missing.empty())
{
WARN("Missing external functions: %s", missing.c_str());
}
#endif
if(symbols.empty())
{
return llvm::Error::success();
}
return dylib.define(llvm::orc::absoluteSymbols(std::move(symbols)));
}
};
// As we must support different LLVM versions, add a generic Unwrap for functions that return Expected<T> or the actual T.
// TODO(b/165000222): Remove after LLVM 11 upgrade
template<typename T>
T &Unwrap(llvm::Expected<T> &&v)
{
assert(v);
return v.get();
}
template<typename T>
T &Unwrap(T &&v)
{
return v;
}
// Sets *fatal to true if a diagnostic is received which makes a routine invalid or unusable.
struct FatalDiagnosticsHandler : public llvm::DiagnosticHandler
{
FatalDiagnosticsHandler(bool *fatal)
: fatal(fatal)
{}
bool handleDiagnostics(const llvm::DiagnosticInfo &info) override
{
switch(info.getSeverity())
{
case llvm::DS_Error:
ASSERT_MSG(false, "LLVM JIT compilation failure");
*fatal = true;
break;
case llvm::DS_Warning:
if(info.getKind() == llvm::DK_StackSize)
{
// Stack size limit exceeded
*fatal = true;
}
break;
case llvm::DS_Remark:
break;
case llvm::DS_Note:
break;
}
return true; // Diagnostic handled, don't let LLVM print it.
}
bool *fatal;
};
// JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
// object layer as each routine may require different target machine
// settings and no Reactor routine directly links against another.
class JITRoutine : public rr::Routine
{
public:
JITRoutine(
std::unique_ptr<llvm::Module> module,
std::unique_ptr<llvm::LLVMContext> context,
const char *name,
llvm::Function **funcs,
size_t count)
: name(name)
#if LLVM_VERSION_MAJOR >= 13
, session(std::move(Unwrap(llvm::orc::SelfExecutorProcessControl::Create())))
#endif
#if USE_LEGACY_OBJECT_LINKING_LAYER
, objectLayer(session, [this]() {
return std::make_unique<llvm::SectionMemoryManager>(&memoryMapper);
})
#else
, objectLayer(session, llvm::cantFail(llvm::jitlink::InProcessMemoryManager::Create()))
#endif
, addresses(count)
{
bool fatalCompileIssue = false;
context->setDiagnosticHandler(std::make_unique<FatalDiagnosticsHandler>(&fatalCompileIssue), true);
#ifdef ENABLE_RR_DEBUG_INFO
// TODO(b/165000222): Update this on next LLVM roll.
// https://github.com/llvm/llvm-project/commit/98f2bb4461072347dcca7d2b1b9571b3a6525801
// introduces RTDyldObjectLinkingLayer::registerJITEventListener().
// The current API does not appear to have any way to bind the
// rr::DebugInfo::NotifyFreeingObject event.
# if LLVM_VERSION_MAJOR >= 12
objectLayer.setNotifyLoaded([](llvm::orc::MaterializationResponsibility &R,
const llvm::object::ObjectFile &obj,
const llvm::RuntimeDyld::LoadedObjectInfo &l) {
static std::atomic<uint64_t> unique_key{ 0 };
rr::DebugInfo::NotifyObjectEmitted(unique_key++, obj, l);
});
# else
objectLayer.setNotifyLoaded([](llvm::orc::VModuleKey,
const llvm::object::ObjectFile &obj,
const llvm::RuntimeDyld::LoadedObjectInfo &l) {
static std::atomic<uint64_t> unique_key{ 0 };
rr::DebugInfo::NotifyObjectEmitted(unique_key++, obj, l);
});
# endif
#endif // ENABLE_RR_DEBUG_INFO
if(JITGlobals::get()->getTargetTriple().isOSBinFormatCOFF())
{
// Hack to support symbol visibility in COFF.
// Matches hack in llvm::orc::LLJIT::createObjectLinkingLayer().
// See documentation on these functions for more detail.
objectLayer.setOverrideObjectFlagsWithResponsibilityFlags(true);
objectLayer.setAutoClaimResponsibilityForObjectSymbols(true);
}
llvm::SmallVector<llvm::orc::SymbolStringPtr, 8> functionNames(count);
llvm::orc::MangleAndInterner mangle(session, JITGlobals::get()->getDataLayout());
for(size_t i = 0; i < count; i++)
{
llvm::Function *func = funcs[i];
if(!func->hasName())
{
func->setName("f" + llvm::Twine(i).str());
}
functionNames[i] = mangle(func->getName());
}
#ifdef ENABLE_RR_EMIT_ASM_FILE
const auto asmFilename = rr::AsmFile::generateFilename(REACTOR_ASM_EMIT_DIR, name);
rr::AsmFile::emitAsmFile(asmFilename, JITGlobals::get()->getTargetMachineBuilder(), *module);
#endif
// Once the module is passed to the compileLayer, the llvm::Functions are freed.
// Make sure funcs are not referenced after this point.
funcs = nullptr;
llvm::orc::IRCompileLayer compileLayer(session, objectLayer, std::make_unique<llvm::orc::ConcurrentIRCompiler>(JITGlobals::get()->getTargetMachineBuilder()));
llvm::orc::JITDylib &dylib(Unwrap(session.createJITDylib("<routine>")));
dylib.addGenerator(std::make_unique<ExternalSymbolGenerator>());
llvm::cantFail(compileLayer.add(dylib, llvm::orc::ThreadSafeModule(std::move(module), std::move(context))));
// Resolve the function addresses.
for(size_t i = 0; i < count; i++)
{
fatalCompileIssue = false; // May be set to true by session.lookup()
// This is where the actual compilation happens.
auto symbol = session.lookup({ &dylib }, functionNames[i]);
ASSERT_MSG(symbol, "Failed to lookup address of routine function %d: %s",
(int)i, llvm::toString(symbol.takeError()).c_str());
if(fatalCompileIssue)
{
addresses[i] = nullptr;
}
else // Successful compilation
{
addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(symbol->getAddress()));
}
}
#ifdef ENABLE_RR_EMIT_ASM_FILE
rr::AsmFile::fixupAsmFile(asmFilename, addresses);
#endif
}
~JITRoutine()
{
#if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
if(auto err = session.endSession())
{
session.reportError(std::move(err));
}
#endif
}
const void *getEntry(int index) const override
{
return addresses[index];
}
private:
std::string name;
llvm::orc::ExecutionSession session;
MemoryMapper memoryMapper;
#if USE_LEGACY_OBJECT_LINKING_LAYER
llvm::orc::RTDyldObjectLinkingLayer objectLayer;
#else
llvm::orc::ObjectLinkingLayer objectLayer;
#endif
std::vector<const void *> addresses;
};
} // anonymous namespace
namespace rr {
JITBuilder::JITBuilder()
: context(new llvm::LLVMContext())
, module(new llvm::Module("", *context))
, builder(new llvm::IRBuilder<>(*context))
{
module->setTargetTriple(LLVM_DEFAULT_TARGET_TRIPLE);
module->setDataLayout(JITGlobals::get()->getDataLayout());
msanInstrumentation = getPragmaState(MemorySanitizerInstrumentation);
}
void JITBuilder::runPasses()
{
#if defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
if(llvm::verifyModule(*module, &llvm::errs()))
{
llvm::report_fatal_error("Invalid LLVM module");
}
#endif
int optimizationLevel = getPragmaState(OptimizationLevel);
#ifdef ENABLE_RR_DEBUG_INFO
if(debugInfo != nullptr)
{
optimizationLevel = 0; // Don't optimize if we're generating debug info.
}
#endif // ENABLE_RR_DEBUG_INFO
#if LLVM_VERSION_MAJOR >= 13 // New pass manager
llvm::LoopAnalysisManager lam;
llvm::FunctionAnalysisManager fam;
llvm::CGSCCAnalysisManager cgam;
llvm::ModuleAnalysisManager mam;
llvm::PassBuilder pb;
pb.registerModuleAnalyses(mam);
pb.registerCGSCCAnalyses(cgam);
pb.registerFunctionAnalyses(fam);
pb.registerLoopAnalyses(lam);
pb.crossRegisterProxies(lam, fam, cgam, mam);
llvm::ModulePassManager pm;
llvm::FunctionPassManager fpm;
if(coroutine.id)
{
// Adds mandatory coroutine transforms.
pm = pb.buildO0DefaultPipeline(llvm::OptimizationLevel::O0);
}
if(optimizationLevel > 0)
{
fpm.addPass(llvm::SROAPass(llvm::SROAOptions::PreserveCFG));
fpm.addPass(llvm::InstCombinePass());
}
if(!fpm.isEmpty())
{
pm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(fpm)));
}
if(__has_feature(memory_sanitizer) && msanInstrumentation)
{
llvm::MemorySanitizerOptions msanOpts(0 /* TrackOrigins */, false /* Recover */, false /* Kernel */, true /* EagerChecks */);
pm.addPass(llvm::MemorySanitizerPass(msanOpts));
}
if(__has_feature(address_sanitizer) && ADDRESS_SANITIZER_INSTRUMENTATION_SUPPORTED)
{
pm.addPass(llvm::AddressSanitizerPass(llvm::AddressSanitizerOptions{}));
}
pm.run(*module, mam);
#else // Legacy pass manager
llvm::legacy::PassManager passManager;
if(coroutine.id)
{
// Run mandatory coroutine transforms.
passManager.add(llvm::createCoroEarlyLegacyPass());
passManager.add(llvm::createCoroSplitLegacyPass());
passManager.add(llvm::createCoroElideLegacyPass());
passManager.add(llvm::createBarrierNoopPass());
passManager.add(llvm::createCoroCleanupLegacyPass());
}
if(optimizationLevel > 0)
{
passManager.add(llvm::createSROAPass());
passManager.add(llvm::createInstructionCombiningPass());
}
if(__has_feature(memory_sanitizer) && msanInstrumentation)
{
llvm::MemorySanitizerOptions msanOpts(0 /* TrackOrigins */, false /* Recover */, false /* Kernel */);
passManager.add(llvm::createMemorySanitizerLegacyPassPass(msanOpts));
}
if(__has_feature(address_sanitizer) && ADDRESS_SANITIZER_INSTRUMENTATION_SUPPORTED)
{
passManager.add(llvm::createAddressSanitizerFunctionPass());
}
passManager.run(*module);
#endif
}
std::shared_ptr<rr::Routine> JITBuilder::acquireRoutine(const char *name, llvm::Function **funcs, size_t count)
{
ASSERT(module);
return std::make_shared<JITRoutine>(std::move(module), std::move(context), name, funcs, count);
}
} // namespace rr