Implement MemorySanitizer instrumentation of Reactor routines

MemorySanitizer checks for uninitialized memory and variable usage. It
instruments each instruction to update 'shadow' memory which indicates
which bits are 'poisoned', and inserts tests for dereferencing pointers
and conditional branching which makes use of not fully initialized data.

The instrumentation is done by the llvm::MemorySanitizerLegacyPass.
Functions must opt-in to the instrumentation with the 'SanitizeMemory'
attribute.

MemorySanitizer relies on several TLS variables for storing the shadow
value of function parameters and return values. The JIT makes calls to
__emutls_get_address() to obtain the address off these variables,
passing it the address of a __emutls_v.* control structure unique to
the TLS variable. We replace the former with our own function through
the symbol resolver, and the latter are represented by enum values, to
allow obtaining the real TLS variable's address in C++ code.

This is enabled behind a REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION
flag. For now, only enable CMake builds.

Bug: b/155148722
Change-Id: I6d755244589c9b0de19a283f9dff5d8a3bf6f24b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/49829
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c50fc2b..49dadc5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -166,6 +166,10 @@
 set(REACTOR_DEFAULT_OPT_LEVEL "Default" CACHE STRING "Reactor default optimization level")
 set_property(CACHE REACTOR_DEFAULT_OPT_LEVEL PROPERTY STRINGS "None" "Less" "Default" "Aggressive")
 
+# Enable instrumentation of Reactor routines for MemorySanitizer builds (LLVM backend).
+# TODO(b/155148722): Remove when unconditionally instrumenting for all build systems.
+set(REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION TRUE)
+
 if(NOT DEFINED SWIFTSHADER_LOGGING_LEVEL)
     set(SWIFTSHADER_LOGGING_LEVEL "Info" CACHE STRING "SwiftShader logging level")
     set_property(CACHE SWIFTSHADER_LOGGING_LEVEL PROPERTY STRINGS "Verbose" "Debug" "Info" "Warn" "Error" "Fatal" "Disabled")
@@ -511,7 +515,6 @@
 
     if(SWIFTSHADER_MSAN)
         if(NOT DEFINED ENV{SWIFTSHADER_MSAN_INSTRUMENTED_LIBCXX_PATH})
-
             message(FATAL_ERROR " \n"
                     " MemorySanitizer usage requires an instrumented build of libc++.\n"
                     " Set the SWIFTSHADER_MSAN_INSTRUMENTED_LIBCXX_PATH environment variable to the\n"
diff --git a/src/Reactor/CMakeLists.txt b/src/Reactor/CMakeLists.txt
index be96c61..ea450d7 100644
--- a/src/Reactor/CMakeLists.txt
+++ b/src/Reactor/CMakeLists.txt
@@ -64,6 +64,11 @@
     list(APPEND REACTOR_PRIVATE_LINK_LIBRARIES Boost::boost)
 endif(REACTOR_EMIT_DEBUG_INFO)
 
+# Enable instrumentation of Reactor routines for MemorySanitizer builds (LLVM backend).
+# TODO(b/155148722): Remove when unconditionally instrumenting for all build systems.
+if(REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION)
+    list(APPEND REACTOR_PUBLIC_COMPILE_DEFINITIONS "REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION")
+endif()
 
 # SubzeroReactor library
 
@@ -83,6 +88,8 @@
 )
 
 target_compile_definitions(ReactorSubzero
+    PUBLIC
+        ${REACTOR_PUBLIC_COMPILE_DEFINITIONS}
     PRIVATE
         ${REACTOR_PRIVATE_COMPILE_DEFINITIONS}
 )
@@ -118,6 +125,8 @@
 )
 
 target_compile_definitions(ReactorLLVM
+    PUBLIC
+        ${REACTOR_PUBLIC_COMPILE_DEFINITIONS}
     PRIVATE
         ${REACTOR_PRIVATE_COMPILE_DEFINITIONS}
 )
diff --git a/src/Reactor/LLVMJIT.cpp b/src/Reactor/LLVMJIT.cpp
index 567f11d..fc5a74d 100644
--- a/src/Reactor/LLVMJIT.cpp
+++ b/src/Reactor/LLVMJIT.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 
@@ -53,6 +54,53 @@
 #	include "sanitizer/msan_interface.h"  // TODO(b/155148722): Remove when we no longer unpoison all writes.
 
 #	include <dlfcn.h>  // dlsym()
+
+// MemorySanitizer uses thread-local storage (TLS) data arrays for passing around
+// the 'shadow' values of function arguments and return values. The LLVM JIT can't
+// access TLS directly, but it calls __emutls_get_address() to obtain the address.
+// Typically, it would be passed a pointer to an __emutls_control structure with a
+// name starting with "__emutls_v." that represents the TLS. Both the address of
+// __emutls_get_address and the __emutls_v. structures are provided to the JIT by
+// the symbol resolver, which can be overridden.
+// We take advantage of this by substituting __emutls_get_address() with our own
+// implementation, namely rr::getTLSAddress(), and substituting the __emutls_v
+// variables with rr::MSanTLS enums. getTLSAddress() can then provide the address
+// of the real TLS variable corresponding to the enum, in statically compiled C++.
+
+// Forward declare the real TLS variables used by MemorySanitizer. These are
+// defined in llvm-project/compiler-rt/lib/msan/msan.cpp.
+extern __thread unsigned long long __msan_param_tls[];
+extern __thread unsigned long long __msan_retval_tls[];
+extern __thread unsigned long long __msan_va_arg_tls[];
+extern __thread unsigned long long __msan_va_arg_overflow_size_tls;
+
+namespace rr {
+
+enum class MSanTLS
+{
+	param = 1,            // __msan_param_tls
+	retval,               // __msan_retval_tls
+	va_arg,               // __msan_va_arg_tls
+	va_arg_overflow_size  // __msan_va_arg_overflow_size_tls
+};
+
+static void *getTLSAddress(void *control)
+{
+	auto tlsIndex = static_cast<MSanTLS>(reinterpret_cast<uintptr_t>(control));
+	switch(tlsIndex)
+	{
+
+		case MSanTLS::param: return reinterpret_cast<void *>(&__msan_param_tls);
+		case MSanTLS::retval: return reinterpret_cast<void *>(&__msan_retval_tls);
+		case MSanTLS::va_arg: return reinterpret_cast<void *>(&__msan_va_arg_tls);
+		case MSanTLS::va_arg_overflow_size: return reinterpret_cast<void *>(&__msan_va_arg_overflow_size_tls);
+		default:
+			UNSUPPORTED("MemorySanitizer used an unrecognized TLS variable: %d", tlsIndex);
+			return nullptr;
+	}
+}
+
+}  // namespace rr
 #endif
 
 namespace {
@@ -111,6 +159,10 @@
 		jitTargetMachineBuilder.setCPU(llvm::sys::getHostCPUName());
 #endif
 
+		// Reactor's MemorySanitizer support depends on intercepting __emutls_get_address calls.
+		ASSERT(!__has_feature(memory_sanitizer) || (jitTargetMachineBuilder.getOptions().ExplicitEmulatedTLS &&
+		                                            jitTargetMachineBuilder.getOptions().EmulatedTLS));
+
 		auto dataLayout = jitTargetMachineBuilder.getDefaultDataLayoutForTarget();
 		ASSERT_MSG(dataLayout, "JITTargetMachineBuilder::getDefaultDataLayoutForTarget() failed");
 
@@ -416,6 +468,8 @@
 			functions.try_emplace("coroutine_alloc_frame", reinterpret_cast<void *>(coroutine_alloc_frame));
 			functions.try_emplace("coroutine_free_frame", reinterpret_cast<void *>(coroutine_free_frame));
 
+			functions.try_emplace("memset", reinterpret_cast<void *>(memset));
+
 #ifdef __APPLE__
 			functions.try_emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
 #elif defined(__linux__)
@@ -446,6 +500,12 @@
 #endif
 #if __has_feature(memory_sanitizer)
 			functions.try_emplace("msan_unpoison", reinterpret_cast<void *>(__msan_unpoison));  // TODO(b/155148722): Remove when we no longer unpoison all writes.
+
+			functions.try_emplace("emutls_get_address", reinterpret_cast<void *>(rr::getTLSAddress));
+			functions.try_emplace("emutls_v.__msan_retval_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::retval)));
+			functions.try_emplace("emutls_v.__msan_param_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::param)));
+			functions.try_emplace("emutls_v.__msan_va_arg_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg)));
+			functions.try_emplace("emutls_v.__msan_va_arg_overflow_size_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg_overflow_size)));
 #endif
 		}
 	};
@@ -664,6 +724,13 @@
 
 	llvm::legacy::PassManager passManager;
 
+#ifdef REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION
+	if(__has_feature(memory_sanitizer))
+	{
+		passManager.add(llvm::createMemorySanitizerLegacyPassPass());
+	}
+#endif
+
 	for(auto pass : cfg.getOptimization().getPasses())
 	{
 		switch(pass)
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 31491c3..5f55ca8 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -490,6 +490,11 @@
 	auto func = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, name, jit->module.get());
 	func->setDoesNotThrow();
 	func->setCallingConv(llvm::CallingConv::C);
+	if(__has_feature(memory_sanitizer))
+	{
+		func->addFnAttr(llvm::Attribute::SanitizeMemory);
+	}
+
 	return func;
 }
 
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 4b07ce9..05d5925 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -52,6 +52,18 @@
 }
 #endif
 
+// A Clang extension to determine compiler features.
+// We use it to detect Sanitizer builds (e.g. -fsanitize=memory).
+#ifndef __has_feature
+#	define __has_feature(x) 0
+#endif
+
+// Whether Reactor routine instrumentation is enabled for MSan builds.
+// TODO(b/155148722): Remove when unconditionally instrumenting for all build systems.
+#if !defined REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION
+#	define REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION 0
+#endif
+
 namespace rr {
 
 std::string BackendName();
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index f1deb2f..5482f82 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -93,8 +93,24 @@
 
 	auto routine = function("one");
 
-	int result = routine();
-	EXPECT_EQ(result, result);  // Anything is fine, just don't crash
+	if(!__has_feature(memory_sanitizer) || !REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION)
+	{
+		int result = routine();
+		EXPECT_EQ(result, result);  // Anything is fine, just don't crash
+	}
+	else
+	{
+		// Optimizations may turn the conditional If() in the Reactor code
+		// into a conditional move or arithmetic operations, which would not
+		// trigger a MemorySanitizer error. However, in that case the equals
+		// operator below should trigger it before the abort is reached.
+		EXPECT_DEATH(
+		    {
+			    int result = routine();
+			    if(result == 0) abort();
+		    },
+		    "MemorySanitizer: use-of-uninitialized-value");
+	}
 }
 
 TEST(ReactorUnitTests, Unreachable)
diff --git a/third_party/llvm-10.0/CMakeLists.txt b/third_party/llvm-10.0/CMakeLists.txt
index e0a4416..6018a12 100644
--- a/third_party/llvm-10.0/CMakeLists.txt
+++ b/third_party/llvm-10.0/CMakeLists.txt
@@ -688,6 +688,7 @@
     ${LLVM_DIR}/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
     ${LLVM_DIR}/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
     ${LLVM_DIR}/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
+    ${LLVM_DIR}/lib/Transforms/Instrumentation/MemorySanitizer.cpp
     ${LLVM_DIR}/lib/Transforms/IPO/ArgumentPromotion.cpp
     ${LLVM_DIR}/lib/Transforms/IPO/Attributor.cpp
     ${LLVM_DIR}/lib/Transforms/IPO/BarrierNoopPass.cpp