[Subzero][MIPS32] Implements lowering of alloca instruction

BUG=none
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/2067183002 .

Patch from Sagar Thakur <sagar.thakur@imgtec.com>.
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 21262f6..ccb1676 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -163,10 +163,28 @@
                           RegMIPS32::getRegName, getRegClassName);
 }
 
+void TargetMIPS32::unsetIfNonLeafFunc() {
+  for (CfgNode *Node : Func->getNodes()) {
+    for (Inst &Instr : Node->getInsts()) {
+      if (llvm::isa<InstCall>(&Instr)) {
+        // Unset MaybeLeafFunc if call instruction exists.
+        MaybeLeafFunc = false;
+        return;
+      }
+    }
+  }
+}
+
+uint32_t TargetMIPS32::getStackAlignment() const {
+  return MIPS32_STACK_ALIGNMENT_BYTES;
+}
+
 void TargetMIPS32::findMaxStackOutArgsSize() {
   // MinNeededOutArgsBytes should be updated if the Target ever creates a
   // high-level InstCall that requires more stack bytes.
-  constexpr size_t MinNeededOutArgsBytes = 16;
+  size_t MinNeededOutArgsBytes = 0;
+  if (!MaybeLeafFunc)
+    MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
   MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
   for (CfgNode *Node : Func->getNodes()) {
     Context.init(Node);
@@ -188,10 +206,12 @@
   // https://code.google.com/p/nativeclient/issues/detail?id=4094
   genTargetHelperCalls();
 
+  unsetIfNonLeafFunc();
+
   findMaxStackOutArgsSize();
 
   // Merge Alloca instructions, and lay out the stack.
-  static constexpr bool SortAndCombineAllocas = false;
+  static constexpr bool SortAndCombineAllocas = true;
   Func->processAllocas(SortAndCombineAllocas);
   Func->dump("After Alloca processing");
 
@@ -291,6 +311,8 @@
   // TODO: share passes with X86?
   genTargetHelperCalls();
 
+  unsetIfNonLeafFunc();
+
   findMaxStackOutArgsSize();
 
   // Do not merge Alloca instructions, and lay out the stack.
@@ -441,8 +463,8 @@
   // hold the operand.
   auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg));
   return OperandMIPS32Mem::create(
-      Func, Ty, Base,
-      llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
+      Func, Ty, Base, llvm::cast<ConstantInteger32>(
+                          Ctx->getConstantInt32(Base->getStackOffset())));
 }
 
 void TargetMIPS32::emitVariable(const Variable *Var) const {
@@ -808,15 +830,9 @@
   uint32_t GlobalsAndSubsequentPaddingSize =
       GlobalsSize + LocalsSlotsPaddingBytes;
 
-  if (MaybeLeafFunc)
-    MaxOutArgsSizeBytes = 0;
-
   // Adds the out args space to the stack, and align SP if necessary.
-  uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes;
-
-  // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with
-  // TotalStackSizeBytes once lowerAlloca is implemented and leaf function
-  // information is generated by lowerCall.
+  uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes +
+                                 FixedAllocaSizeBytes + MaxOutArgsSizeBytes;
 
   // Generate "addiu sp, sp, -TotalStackSizeBytes"
   if (TotalStackSizeBytes) {
@@ -854,7 +870,7 @@
   // those that were register-allocated. Args are pushed right to left, so
   // Arg[0] is closest to the stack/frame pointer.
   const VarList &Args = Func->getArgs();
-  size_t InArgsSizeBytes = 0;
+  size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
   TargetMIPS32::CallingConv CC;
   uint32_t ArgNo = 0;
 
@@ -1002,14 +1018,64 @@
 }
 
 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
-  UsesFramePointer = true;
   // Conservatively require the stack to be aligned. Some stack adjustment
   // operations implemented below assume that the stack is aligned before the
   // alloca. All the alloca code ensures that the stack alignment is preserved
   // after the alloca. The stack alignment restriction can be relaxed in some
   // cases.
   NeedsStackAlignment = true;
-  UnimplementedLoweringError(this, Instr);
+
+  // For default align=0, set it to the real value 1, to avoid any
+  // bit-manipulation problems below.
+  const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
+
+  // LLVM enforces power of 2 alignment.
+  assert(llvm::isPowerOf2_32(AlignmentParam));
+  assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
+
+  const uint32_t Alignment =
+      std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
+  const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
+  const bool OptM1 = getFlags().getOptLevel() == Opt_m1;
+  const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
+  const bool UseFramePointer =
+      hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
+
+  if (UseFramePointer)
+    setHasFramePointer();
+
+  Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
+
+  Variable *Dest = Instr->getDest();
+  Operand *TotalSize = Instr->getSizeInBytes();
+
+  if (const auto *ConstantTotalSize =
+          llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
+    const uint32_t Value =
+        Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
+    FixedAllocaSizeBytes += Value;
+    // Constant size alloca.
+    if (!UseFramePointer) {
+      // If we don't need a Frame Pointer, this alloca has a known offset to the
+      // stack pointer. We don't need adjust the stack pointer, nor assign any
+      // value to Dest, as Dest is rematerializable.
+      assert(Dest->isRematerializable());
+      Context.insert<InstFakeDef>(Dest);
+      return;
+    }
+  } else {
+    UnimplementedLoweringError(this, Instr);
+    return;
+  }
+
+  // Add enough to the returned address to account for the out args area.
+  if (MaxOutArgsSizeBytes > 0) {
+    Variable *T = makeReg(getPointerType());
+    _addiu(T, SP, MaxOutArgsSizeBytes);
+    _mov(Dest, T);
+  } else {
+    _mov(Dest, SP);
+  }
 }
 
 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index cfbaa6f..155b608 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -89,15 +89,12 @@
     // are rounded up to 4 bytes.
     return (typeWidthInBytes(Ty) + 3) & ~3;
   }
-  uint32_t getStackAlignment() const override {
-    // TODO(sehr): what is the stack alignment?
-    return 1;
-  }
+  uint32_t getStackAlignment() const override;
   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
-    // TODO(sehr): Implement fixed stack layout.
-    (void)Size;
-    (void)Align;
-    llvm::report_fatal_error("Not yet implemented");
+    FixedAllocaSizeBytes = Size;
+    assert(llvm::isPowerOf2_32(Align));
+    FixedAllocaAlignBytes = Align;
+    PrologEmitsFixedAllocas = true;
   }
   int32_t getFrameFixedAllocaOffset() const override {
     // TODO(sehr): Implement fixed stack layout.
@@ -105,6 +102,8 @@
     return 0;
   }
 
+  uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
+
   bool shouldSplitToVariable64On32(Type Ty) const override {
     return Ty == IceType_i64;
   }
@@ -447,6 +446,8 @@
   static Type stackSlotType();
   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
 
+  void unsetIfNonLeafFunc();
+
   // Iterates over the CFG and determines the maximum outgoing stack arguments
   // bytes. This information is later used during addProlog() to pre-allocate
   // the outargs area
@@ -563,6 +564,8 @@
   static constexpr uint32_t CHAR_BITS = 8;
   static constexpr uint32_t INT32_BITS = 32;
   size_t SpillAreaSizeBytes = 0;
+  size_t FixedAllocaSizeBytes = 0;
+  size_t FixedAllocaAlignBytes = 0;
 
 private:
   ENABLE_MAKE_UNIQUE;
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index cf0926c..3be09f6 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -26,6 +26,20 @@
 ; RUN:   | %if --need=target_ARM32 --need=allow_dump \
 ; RUN:   --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
 
+; RUN: %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target mips32 -i %s --args -O2 --skip-unimplemented \
+; RUN:   -allow-externally-defined-symbols \
+; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command FileCheck --check-prefix MIPS32 --check-prefix=MIPS32-OPT2 %s
+
+; RUN: %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target mips32 -i %s --args -Om1 --skip-unimplemented \
+; RUN:   -allow-externally-defined-symbols \
+; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
+; RUN:   --command FileCheck --check-prefix MIPS32 --check-prefix=MIPS32-OPTM1 %s
+
 define internal void @fixed_416_align_16(i32 %n) {
 entry:
   %array = alloca i8, i32 416, align 16
@@ -50,6 +64,10 @@
 ; ARM32-OPTM1: sub sp, sp, #416
 ; ARM32:       bl {{.*}} R_{{.*}}    f1
 
+; MIPS32-LABEL: fixed_416_align_16
+; MIPS32-OPT2: addiu sp,sp,-440
+; MIPS32-OPTM1: addiu sp,sp,-448
+
 define internal void @fixed_416_align_32(i32 %n) {
 entry:
   %array = alloca i8, i32 400, align 32
@@ -72,6 +90,10 @@
 ; ARM32:       bic sp, sp, #31
 ; ARM32:       bl {{.*}} R_{{.*}}    f1
 
+; MIPS32-LABEL: fixed_416_align_32
+; MIPS32-OPT2: addiu sp,sp,-440
+; MIPS32-OPTM1: addiu sp,sp,-448
+
 ; Show that the amount to allocate will be rounded up.
 define internal void @fixed_351_align_16(i32 %n) {
 entry:
@@ -97,6 +119,10 @@
 ; ARM32-OPTM1: sub sp, sp, #352
 ; ARM32:       bl {{.*}} R_{{.*}}    f1
 
+; MIPS32-LABEL: fixed_351_align_16
+; MIPS32-OPT2: addiu sp,sp,-376
+; MIPS32-OPTM1: addiu sp,sp,-384
+
 define internal void @fixed_351_align_32(i32 %n) {
 entry:
   %array = alloca i8, i32 351, align 32
@@ -119,6 +145,10 @@
 ; ARM32:       bic sp, sp, #31
 ; ARM32:       bl {{.*}} R_{{.*}}    f1
 
+; MIPS32-LABEL: fixed_351_align_32
+; MIPS32-OPT2: addiu sp,sp,-376
+; MIPS32-OPTM1: addiu sp,sp,-384
+
 declare void @f1(i32 %ignored)
 
 declare void @f2(i32 %ignored)