LLVMReactor: Align the coroutine stack frame.

This was being allocated with no particular alignment, causing spurious crashes when vectors were loaded or stored in the stack frame.

Bug: b/135691587
Change-Id: I0240fdeb08df17dbbc156e508013376e15c514b4
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33211
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index a65f843..cc70e91 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -125,6 +125,32 @@
 	}
 #endif // ENABLE_RR_PRINT
 
+	template <typename T>
+	T alignUp(T val, T alignment)
+	{
+		return alignment * ((val + alignment - 1) / alignment);
+	}
+
+	void* alignedAlloc(size_t size, size_t alignment)
+	{
+		ASSERT(alignment < 256);
+		auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
+		auto aligned = allocation;
+		aligned += sizeof(uint8_t); // Make space for the base-address offset.
+		aligned = reinterpret_cast<uint8_t*>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment)); // align
+		auto offset = static_cast<uint8_t>(aligned - allocation);
+		aligned[-1] = offset;
+		return aligned;
+	}
+
+	void alignedFree(void* ptr)
+	{
+		auto aligned = reinterpret_cast<uint8_t*>(ptr);
+		auto offset = aligned[-1];
+		auto allocation = aligned - offset;
+		delete[] allocation;
+	}
+
 	llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
 	{
 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
@@ -539,6 +565,9 @@
 				static void nop() {}
 				static void neverCalled() { UNREACHABLE("Should never be called"); }
 
+				static void* coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
+				static void coroutine_free_frame(void* ptr) { alignedFree(ptr); }
+
 #ifdef __ANDROID__
 				// forwarders since we can't take address of builtins
 				static void sync_synchronize() { __sync_synchronize(); }
@@ -574,8 +603,8 @@
 			func_.emplace("atomic_store", reinterpret_cast<void*>(Atomic::store));
 
 			// FIXME (b/119409619): use an allocator here so we can control all memory allocations
-			func_.emplace("coroutine_alloc_frame", reinterpret_cast<void*>(malloc));
-			func_.emplace("coroutine_free_frame", reinterpret_cast<void*>(free));
+			func_.emplace("coroutine_alloc_frame", reinterpret_cast<void*>(F::coroutine_alloc_frame));
+			func_.emplace("coroutine_free_frame", reinterpret_cast<void*>(F::coroutine_free_frame));
 
 #ifdef __APPLE__
 			func_.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));