marl: manually apply partial patch to fix x86 crashes

Original patch: https://github.com/google/marl/commit/69797fcf044d7c4b077a38c587e5dd168b530d0b

Bug: angleproject:4482
Change-Id: I1aea95267e63bd776479c74bc2160ecf83f9e816
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/42549
Reviewed-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/third_party/marl/src/osfiber_x86.c b/third_party/marl/src/osfiber_x86.c
index 6c486aa..cac72cb 100644
--- a/third_party/marl/src/osfiber_x86.c
+++ b/third_party/marl/src/osfiber_x86.c
@@ -25,12 +25,19 @@
                            uint32_t stack_size,
                            void (*target)(void*),
                            void* arg) {
+  // The stack pointer needs to be 16-byte aligned when making a 'call'.
+  // The 'call' instruction automatically pushes the return instruction to the
+  // stack (4-bytes), before making the jump.
+  // The marl_fiber_swap() assembly function does not use 'call', instead it
+  // uses 'jmp', so we need to offset the ESP pointer by 4 bytes so that the
+  // stack is still 16-byte aligned when the return target is stack-popped by
+  // the callee.
   uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size);
   ctx->EIP = (uintptr_t)&marl_fiber_trampoline;
-  ctx->ESP = (uintptr_t)&stack_top[-3];
-  stack_top[-1] = (uintptr_t)arg;
-  stack_top[-2] = (uintptr_t)target;
-  stack_top[-3] = 0;  // No return target.
+  ctx->ESP = (uintptr_t)&stack_top[-5];
+  stack_top[-3] = (uintptr_t)arg;
+  stack_top[-4] = (uintptr_t)target;
+  stack_top[-5] = 0;  // No return target.
 }
 
 #endif  // defined(__i386__)