Fix unaligned access undefined behavior.

Unaligned accesses are undefined behavior, but they're common in our
ELF binary patching code for variable length instruction sets (namely
x86).

We can use memcpy() and rely on target-specific compiler optimizations
to make it efficient. Utility functions and classes were added to aid
readability.

Bug b/119823623

Change-Id: I8a82672a0d18d1e1783f580eb629f8cc09a009cd
Reviewed-on: https://swiftshader-review.googlesource.com/c/22828
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Reactor/ExecutableMemory.hpp b/src/Reactor/ExecutableMemory.hpp
index 1a2b4bd..95dac5e 100644
--- a/src/Reactor/ExecutableMemory.hpp
+++ b/src/Reactor/ExecutableMemory.hpp
@@ -15,8 +15,9 @@
 #ifndef rr_ExecutableMemory_hpp
 #define rr_ExecutableMemory_hpp
 
-#include <stddef.h>
-#include <stdint.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
 
 namespace rr
 {
@@ -25,6 +26,67 @@
 void *allocateExecutable(size_t bytes);   // Allocates memory that can be made executable using markExecutable()
 void markExecutable(void *memory, size_t bytes);
 void deallocateExecutable(void *memory, size_t bytes);
+
+template<typename P>
+P unaligned_read(P *address)
+{
+	P value;
+	memcpy(&value, address, sizeof(P));
+	return value;
+}
+
+template<typename P, typename V>
+void unaligned_write(P *address, V value)
+{
+	static_assert(sizeof(V) == sizeof(P), "value size must match pointee size");
+	memcpy(address, &value, sizeof(P));
+}
+
+template<typename P>
+class unaligned_ref
+{
+public:
+	explicit unaligned_ref(void *ptr) : ptr((P*)ptr) {}
+
+	template<typename V>
+	P operator=(V value)
+	{
+		unaligned_write(ptr, value);
+		return value;
+	}
+
+	operator P()
+	{
+		return unaligned_read((P*)ptr);
+	}
+
+private:
+	P *ptr;
+};
+
+template<typename P>
+class unaligned_ptr
+{
+	template<typename S>
+	friend class unaligned_ptr;
+
+public:
+	unaligned_ptr(P *ptr) : ptr(ptr) {}
+
+	unaligned_ref<P> operator*()
+	{
+		return unaligned_ref<P>(ptr);
+	}
+
+	template<typename S>
+	operator S()
+	{
+		return S(ptr);
+	}
+
+private:
+	void *ptr;
+};
 }
 
 #endif   // rr_ExecutableMemory_hpp
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 76f5a72..08fc013 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -217,8 +217,6 @@
 	{
 		const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 
-		intptr_t address = (intptr_t)elfHeader + target->sh_offset;
-		int32_t *patchSite = (int*)(address + relocation.r_offset);
 		uint32_t index = relocation.getSymbol();
 		int table = relocationTable.sh_link;
 		void *symbolValue = nullptr;
@@ -250,6 +248,9 @@
 			}
 		}
 
+		intptr_t address = (intptr_t)elfHeader + target->sh_offset;
+		unaligned_ptr<int32_t> patchSite = (int32_t*)(address + relocation.r_offset);
+
 		if(CPUID::ARM)
 		{
 			switch(relocation.getType())
@@ -301,8 +302,6 @@
 	{
 		const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 
-		intptr_t address = (intptr_t)elfHeader + target->sh_offset;
-		int32_t *patchSite = (int*)(address + relocation.r_offset);
 		uint32_t index = relocation.getSymbol();
 		int table = relocationTable.sh_link;
 		void *symbolValue = nullptr;
@@ -334,19 +333,23 @@
 			}
 		}
 
+		intptr_t address = (intptr_t)elfHeader + target->sh_offset;
+		unaligned_ptr<int32_t> patchSite32 = (int32_t*)(address + relocation.r_offset);
+		unaligned_ptr<int64_t> patchSite64 = (int64_t*)(address + relocation.r_offset);
+
 		switch(relocation.getType())
 		{
 		case R_X86_64_NONE:
 			// No relocation
 			break;
 		case R_X86_64_64:
-			*(int64_t*)patchSite = (int64_t)((intptr_t)symbolValue + *(int64_t*)patchSite) + relocation.r_addend;
+			*patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
 			break;
 		case R_X86_64_PC32:
-			*patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite) + relocation.r_addend;
+			*patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
 			break;
 		case R_X86_64_32S:
-			*patchSite = (int32_t)((intptr_t)symbolValue + *patchSite) + relocation.r_addend;
+			*patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
 			break;
 		default:
 			assert(false && "Unsupported relocation type");