Subzero Win64: fix Call for ptr and spillover args

When lowering Call to external functions on x64, Subzero was not
correctly allocating 32 bytes for the shadow store. This became
apparent when trying to pass pointers to local variables to Call, in
which case, the address of the stack variable would be passed via
register. However, in the called external function, on x64, it's allowed
to use the first 32 bytes on the stack - the shadow store - as scratch
space. In my use-case, the called function ended up overwriting the
local stack variable that the callee passed a pointer to.

Another use-case where this error was apparent was in passing more than
four arguments to a Call, in which case, the callee expected the spilled
args to be present after the shadow store.

This fixes these problems by correctly allocating the 32 byte shadow
store when lowering Call on x64.

Note that this is similar to e81e8b3c3e15bb727a959eb8d184ef3b02e8f912
where I fixed the prolog to the generated entry point to take the shadow
store into account.

Bug: b/144688789
Change-Id: I9d7960bb9c520f5cc3c0ad885305ca5a3c06b5dc
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/38473
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index 75702fb..584bd8b 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -1252,6 +1252,147 @@
 	EXPECT_EQ(c.str, "hello world");
 }
 
+TEST(ReactorUnitTests, Call_Args4)
+{
+	struct Class
+	{
+		static int Func(int a, int b, int c, int d)
+		{
+			return a + b + c + d;
+		}
+	};
+
+	{
+		FunctionT<int()> function;
+		{
+			auto res = Call(Class::Func, 1, 2, 3, 4);
+			Return(res);
+		}
+
+		auto routine = function("one");
+
+		if(routine)
+		{
+			int res = routine();
+			EXPECT_EQ(res, 1 + 2 + 3 + 4);
+		}
+	}
+}
+
+TEST(ReactorUnitTests, Call_Args5)
+{
+	struct Class
+	{
+		static int Func(int a, int b, int c, int d, int e)
+		{
+			return a + b + c + d + e;
+		}
+	};
+
+	{
+		FunctionT<int()> function;
+		{
+			auto res = Call(Class::Func, 1, 2, 3, 4, 5);
+			Return(res);
+		}
+
+		auto routine = function("one");
+
+		if(routine)
+		{
+			int res = routine();
+			EXPECT_EQ(res, 1 + 2 + 3 + 4 + 5);
+		}
+	}
+}
+
+TEST(ReactorUnitTests, Call_ArgsMany)
+{
+	struct Class
+	{
+		static int Func(int a, int b, int c, int d, int e, int f, int g, int h)
+		{
+			return a + b + c + d + e + f + g + h;
+		}
+	};
+
+	{
+		FunctionT<int()> function;
+		{
+			auto res = Call(Class::Func, 1, 2, 3, 4, 5, 6, 7, 8);
+			Return(res);
+		}
+
+		auto routine = function("one");
+
+		if(routine)
+		{
+			int res = routine();
+			EXPECT_EQ(res, 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8);
+		}
+	}
+}
+
+TEST(ReactorUnitTests, Call_ArgsMixed)
+{
+	struct Class
+	{
+		static int Func(int a, float b, int* c, float* d, int e, float f, int* g, float* h)
+		{
+			return a + b + *c + *d + e + f + *g + *h;
+		}
+	};
+
+	{
+		FunctionT<int()> function;
+		{
+			Int c(3);
+			Float d(4);
+			Int g(7);
+			Float h(8);
+			auto res = Call(Class::Func, 1, 2.f, &c, &d, 5, 6.f, &g, &h);
+			Return(res);
+		}
+
+		auto routine = function("one");
+
+		if(routine)
+		{
+			int res = routine();
+			EXPECT_EQ(res, 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8);
+		}
+	}
+}
+
+TEST(ReactorUnitTests, Call_ArgsPointer)
+{
+	struct Class
+	{
+		static int Func(int *a)
+		{
+			return *a;
+		}
+	};
+
+	{
+		FunctionT<int()> function;
+		{
+			Int a(12345);
+			auto res = Call(Class::Func, &a);
+			Return(res);
+		}
+
+		auto routine = function("one");
+
+		if(routine)
+		{
+			int res = routine();
+			EXPECT_EQ(res, 12345);
+		}
+	}
+}
+
+
 TEST(ReactorUnitTests, CallExternalCallRoutine)
 {
 	// routine1 calls Class::Func, passing it a pointer to routine2, and Class::Func calls routine2
diff --git a/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h b/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h
index 4731b5d..e7d04f6 100644
--- a/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h
+++ b/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h
@@ -89,6 +89,19 @@
 
 namespace X86NAMESPACE {
 
+// The Microsoft x64 ABI requires the caller to allocate a minimum 32 byte
+// "shadow store" (aka "home space") so that the callee may copy the 4
+// register args to it.
+template <typename Traits> SizeT getShadowStoreSize() {
+#if defined(SUBZERO_USE_MICROSOFT_ABI)
+  static const SizeT ShadowStoreSize =
+      Traits::Is64Bit ? 4 * typeWidthInBytes(Traits::WordType) : 0;
+  return ShadowStoreSize;
+#else
+  return 0;
+#endif
+}
+
 using Utils::BoolFlagSaver;
 
 template <typename Traits> class BoolFoldingEntry {
@@ -1051,14 +1064,7 @@
   // space on the frame for globals (variables with multi-block lifetime), and
   // one block to share for locals (single-block lifetime).
 
-  // The Microsoft x64 ABI requires the caller to allocate a minimum 32 byte
-  // "shadow store" (aka "home space") so that the callee may copy the 4
-  // register args to it.
-#if defined(SUBZERO_USE_MICROSOFT_ABI)
-	const SizeT ShadowStoreSize = Traits::Is64Bit ? 4 * typeWidthInBytes(Traits::WordType) : 0;
-#else
-	const SizeT ShadowStoreSize = 0;
-#endif
+  const SizeT ShadowStoreSize = getShadowStoreSize<Traits>();
 
   // StackPointer: points just past return address of calling function
 
@@ -2673,6 +2679,8 @@
   OperandList StackArgs, StackArgLocations;
   uint32_t ParameterAreaSizeBytes = 0;
 
+  ParameterAreaSizeBytes += getShadowStoreSize<Traits>();
+
   // Classify each argument operand according to the location where the argument
   // is passed.
   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
@@ -7656,16 +7664,23 @@
   uint32_t OutArgumentsSizeBytes = 0;
   uint32_t XmmArgCount = 0;
   uint32_t GprArgCount = 0;
-  for (Type Ty : ArgTypes) {
+  for (SizeT i = 0, NumArgTypes = ArgTypes.size(); i < NumArgTypes; ++i) {
+    Type Ty = ArgTypes[i];
     // The PNaCl ABI requires the width of arguments to be at least 32 bits.
     assert(typeWidthInBytes(Ty) >= 4);
-    if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
+    if (isVectorType(Ty) &&
+        Traits::getRegisterForXmmArgNum(Traits::getArgIndex(i, XmmArgCount))
+            .hasValue()) {
       ++XmmArgCount;
     } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
-               XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
+               Traits::getRegisterForXmmArgNum(
+                   Traits::getArgIndex(i, XmmArgCount))
+                   .hasValue()) {
       ++XmmArgCount;
     } else if (isScalarIntegerType(Ty) &&
-               GprArgCount < Traits::X86_MAX_GPR_ARGS) {
+               Traits::getRegisterForGprArgNum(
+                   Ty, Traits::getArgIndex(i, GprArgCount))
+                   .hasValue()) {
       // The 64 bit ABI allows some integers to be passed in GPRs.
       ++GprArgCount;
     } else {
@@ -7704,7 +7719,7 @@
   Variable *Dest = Instr->getDest();
   if (Dest != nullptr)
     ReturnType = Dest->getType();
-  return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
+  return getShadowStoreSize<Traits>() + getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
 }
 
 template <typename TraitsType>