Lower stacksave and restore intrinsics.

Just copies the current stack pointer to/from a variable.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/396993009
diff --git a/crosstest/runtests.sh b/crosstest/runtests.sh
index 2073fb5..a5dc31f 100755
--- a/crosstest/runtests.sh
+++ b/crosstest/runtests.sh
@@ -71,6 +71,13 @@
         --driver=test_icmp_main.cpp \
         --output=test_icmp_O${optlevel}
 
+    ./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
+        --dir="${OUTDIR}" \
+        --llvm-bin-path="${LLVM_BIN_PATH}" \
+        --test=test_stacksave.c \
+        --driver=test_stacksave_main.c \
+        --output=test_stacksave_O${optlevel}
+
     # Compile the non-subzero object files straight from source
     # since the native LLVM backend does not understand how to
     # lower NaCl-specific intrinsics.
@@ -100,6 +107,7 @@
     "${OUTDIR}"/test_fcmp_O${optlevel}
     "${OUTDIR}"/test_global_O${optlevel}
     "${OUTDIR}"/test_icmp_O${optlevel}
+    "${OUTDIR}"/test_stacksave_O${optlevel}
     "${OUTDIR}"/test_sync_atomic_O${optlevel}
     "${OUTDIR}"/test_vector_ops_O${optlevel}
 done
diff --git a/crosstest/test_stacksave.c b/crosstest/test_stacksave.c
new file mode 100644
index 0000000..b0e9b5e
--- /dev/null
+++ b/crosstest/test_stacksave.c
@@ -0,0 +1,76 @@
+//===- subzero/crosstest/test_stacksave.c - Implementation for tests ------===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This aims to test that C99's VLAs (which use stacksave/stackrestore
+// intrinsics) work fine.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+#include "test_stacksave.h"
+DECLARE_TESTS()
+
+/* NOTE: This has 0 stacksaves, because the vla isn't in a loop,
+ * so the vla can just be freed by the epilogue.
+ */
+uint32_t test_basic_vla(uint32_t size, uint32_t start, uint32_t inc) {
+  uint32_t vla[size];
+  uint32_t mid = start + ((size - start) / 2);
+  for (uint32_t i = start; i < size; ++i) {
+    vla[i] = i + inc;
+  }
+  return (vla[start] << 2) + (vla[mid] << 1) + vla[size - 1];
+}
+
+static uint32_t __attribute__((noinline)) foo(uint32_t x) {
+  return x * x;
+}
+
+/* NOTE: This has 1 stacksave, because the vla is in a loop and should
+ * be freed before the next iteration.
+ */
+uint32_t test_vla_in_loop(uint32_t size, uint32_t start, uint32_t inc) {
+  uint32_t sum = 0;
+  for (uint32_t i = start; i < size; ++i) {
+    uint32_t size1 = size - i;
+    uint32_t vla[size1];
+    for (uint32_t j = 0; j < size1; ++j) {
+      /* Adjust stack again with a function call. */
+      vla[j] = foo(start * j + inc);
+    }
+    for (uint32_t j = 0; j < size1; ++j) {
+      sum += vla[j];
+    }
+  }
+  return sum;
+}
+
+uint32_t test_two_vlas_in_loops(uint32_t size, uint32_t start, uint32_t inc) {
+  uint32_t sum = 0;
+  for (uint32_t i = start; i < size; ++i) {
+    uint32_t size1 = size - i;
+    uint32_t vla1[size1];
+    for (uint32_t j = 0; j < size1; ++j) {
+      uint32_t size2 = size - j;
+      uint32_t start2 = 0;
+      uint32_t mid2 = size2 / 2;
+      uint32_t vla2[size2];
+      for (uint32_t k = start2; k < size2; ++k) {
+        /* Adjust stack again with a function call. */
+        vla2[k] = foo(start * k + inc);
+      }
+      vla1[j] = (vla2[start2] << 2) + (vla2[mid2] << 1) + vla2[size2 - 1];
+    }
+    for (uint32_t j = 0; j < size1; ++j) {
+      sum += vla1[j];
+    }
+  }
+  return sum;
+}
diff --git a/crosstest/test_stacksave.h b/crosstest/test_stacksave.h
new file mode 100644
index 0000000..8d0af01
--- /dev/null
+++ b/crosstest/test_stacksave.h
@@ -0,0 +1,26 @@
+//===- subzero/crosstest/test_stacksave.h - Test prototypes -----*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function prototypes for cross testing
+// stacksave and stackrestore intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_STACKSAVE_H
+#define TEST_STACKSAVE_H
+
+#define DECLARE_TESTS(PREFIX)                                            \
+  uint32_t PREFIX##test_basic_vla(uint32_t size, uint32_t start,         \
+                                  uint32_t inc);                         \
+  uint32_t PREFIX##test_vla_in_loop(uint32_t size, uint32_t start,       \
+                                    uint32_t inc);                       \
+  uint32_t PREFIX##test_two_vlas_in_loops(uint32_t size, uint32_t start, \
+                                          uint32_t inc);
+
+#endif
diff --git a/crosstest/test_stacksave_main.c b/crosstest/test_stacksave_main.c
new file mode 100644
index 0000000..a5ab248
--- /dev/null
+++ b/crosstest/test_stacksave_main.c
@@ -0,0 +1,61 @@
+//===- subzero/crosstest/test_stacksave_main.c - Driver for tests ---------===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Driver for cross testing stacksave/stackrestore intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+/* crosstest.py --test=test_stacksave.c --driver=test_stacksave_main.c \
+   --prefix=Subzero_ --output=test_stacksave */
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "test_stacksave.h"
+DECLARE_TESTS()
+DECLARE_TESTS(Subzero_)
+
+int main(int argc, char **argv) {
+  size_t TotalTests = 0;
+  size_t Passes = 0;
+  size_t Failures = 0;
+  typedef uint32_t (*FuncType)(uint32_t, uint32_t, uint32_t);
+  static struct {
+    const char *Name;
+    FuncType FuncLlc;
+    FuncType FuncSz;
+  } Funcs[] = {
+    { "test_basic_vla", test_basic_vla, Subzero_test_basic_vla },
+    { "test_vla_in_loop", test_vla_in_loop, Subzero_test_vla_in_loop },
+    { "test_two_vlas_in_loops", test_two_vlas_in_loops,
+      Subzero_test_two_vlas_in_loops }
+  };
+  const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
+  const uint32_t size_to_test = 128;
+  for (size_t f = 0; f < NumFuncs; ++f) {
+    for (uint32_t start = 0; start < size_to_test / 2; ++start) {
+      ++TotalTests;
+      uint32_t inc = (start / 10) + 1;
+      uint32_t llc_result = Funcs[f].FuncLlc(size_to_test, start, inc);
+      uint32_t sz_result = Funcs[f].FuncSz(size_to_test, start, inc);
+      if (llc_result == sz_result) {
+        ++Passes;
+      } else {
+        ++Failures;
+        printf("Failure %s: start=%" PRIu32 ", "
+               "llc=%" PRIu32 ", sz=%" PRIu32 "\n",
+               Funcs[f].Name, start, llc_result, sz_result);
+      }
+    }
+  }
+  printf("TotalTests=%zu Passes=%zu Failures=%zu\n",
+         TotalTests, Passes, Failures);
+  return Failures;
+}
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index b38481b..b572578 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2705,7 +2705,7 @@
   }
   case Intrinsics::Memset: {
     // The value operand needs to be extended to a stack slot size
-    // because we "push" only works for a specific operand size.
+    // because "push" only works for a specific operand size.
     Operand *ValOp = Instr->getArg(1);
     assert(ValOp->getType() == IceType_i8);
     Variable *ValExt = makeReg(stackSlotType());
@@ -2741,11 +2741,17 @@
     _mov(Dest, T);
     return;
   }
-  case Intrinsics::Stacksave:
-  case Intrinsics::Stackrestore:
-    // TODO(jvoung): fill it in.
-    Func->setError("Unhandled intrinsic");
+  case Intrinsics::Stacksave: {
+    Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
+    Variable *Dest = Instr->getDest();
+    _mov(Dest, esp);
     return;
+  }
+  case Intrinsics::Stackrestore: {
+    Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
+    _mov(esp, Instr->getArg(0));
+    return;
+  }
   case Intrinsics::Trap:
     _ud2();
     return;
diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
index dbebf11..8a11a84 100644
--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -31,6 +31,8 @@
 declare i64 @llvm.cttz.i64(i64, i1)
 declare i32 @llvm.ctpop.i32(i32)
 declare i64 @llvm.ctpop.i64(i64)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
 
 define i32 @test_nacl_read_tp() {
 entry:
@@ -394,6 +396,50 @@
 ; CHECKO2REM: call __popcountdi2
 ; CHECKO2REM-NOT: mov {{.*}}, 0
 
+define void @test_stacksave_noalloca() {
+entry:
+  %sp = call i8* @llvm.stacksave()
+  call void @llvm.stackrestore(i8* %sp)
+  ret void
+}
+; CHECK-LABEL: test_stacksave_noalloca
+; CHECK: mov {{.*}}, esp
+; CHECK: mov esp, {{.*}}
+
+declare i32 @foo(i32 %x)
+
+define void @test_stacksave_multiple(i32 %x) {
+entry:
+  %x_4 = mul i32 %x, 4
+  %sp1 = call i8* @llvm.stacksave()
+  %tmp1 = alloca i8, i32 %x_4, align 4
+
+  %sp2 = call i8* @llvm.stacksave()
+  %tmp2 = alloca i8, i32 %x_4, align 4
+
+  %y = call i32 @foo(i32 %x)
+
+  %sp3 = call i8* @llvm.stacksave()
+  %tmp3 = alloca i8, i32 %x_4, align 4
+
+  %__9 = bitcast i8* %tmp1 to i32*
+  store i32 %y, i32* %__9, align 1
+
+  %__10 = bitcast i8* %tmp2 to i32*
+  store i32 %x, i32* %__10, align 1
+
+  %__11 = bitcast i8* %tmp3 to i32*
+  store i32 %x, i32* %__11, align 1
+
+  call void @llvm.stackrestore(i8* %sp1)
+  ret void
+}
+; CHECK-LABEL: test_stacksave_multiple
+; At least 3 copies of esp, but probably more from having to do the allocas.
+; CHECK: mov {{.*}}, esp
+; CHECK: mov {{.*}}, esp
+; CHECK: mov {{.*}}, esp
+; CHECK: mov esp, {{.*}}
 
 ; ERRORS-NOT: ICE translation error
 ; DUMP-NOT: SZ