[Subzero][MIPS32] Adds prolog instructions for MIPS32

BUG=none
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/2051713002 .

Patch from Sagar Thakur <sagar.thakur@imgtec.com>.
diff --git a/src/IceInstMIPS32.def b/src/IceInstMIPS32.def
index b2fed3f..0115bc4 100644
--- a/src/IceInstMIPS32.def
+++ b/src/IceInstMIPS32.def
@@ -101,7 +101,7 @@
     ALIASES1(Reg_SP))                                                          \
   X(Reg_FP,       30,  "fp",    0, 0, 0, 1, 0, 0, 0, 0, 0,                     \
     ALIASES1(Reg_FP))                                                          \
-  X(Reg_RA,       31,  "ra",    0, 1, 0, 0, 0, 0, 0, 0, 0,                     \
+  X(Reg_RA,       31,  "ra",    0, 0, 0, 0, 0, 0, 0, 0, 0,                     \
     ALIASES1(Reg_RA))                                                          \
   X(Reg_LO,       0,   "lo",    0, 0, 0, 0, 0, 0, 0, 0, 0,                     \
     ALIASES2(Reg_LO, Reg_LOHI))                                                \
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 68655de..0c0f05c 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -83,6 +83,18 @@
   }
 }
 
+// Stack alignment
+constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 8;
+
+// Value is in bytes. Return Value adjusted to the next highest multiple of the
+// stack alignment required for the given type.
+uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
+  size_t typeAlignInBytes = typeWidthInBytes(Ty);
+  if (isVectorType(Ty))
+    UnimplementedError(getFlags());
+  return Utils::applyAlignment(Value, typeAlignInBytes);
+}
+
 } // end of anonymous namespace
 
 TargetMIPS32::TargetMIPS32(Cfg *Func) : TargetLowering(Func) {}
@@ -151,6 +163,24 @@
                           RegMIPS32::getRegName, getRegClassName);
 }
 
+void TargetMIPS32::findMaxStackOutArgsSize() {
+  // MinNeededOutArgsBytes should be updated if the Target ever creates a
+  // high-level InstCall that requires more stack bytes.
+  constexpr size_t MinNeededOutArgsBytes = 16;
+  MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
+  for (CfgNode *Node : Func->getNodes()) {
+    Context.init(Node);
+    while (!Context.atEnd()) {
+      PostIncrLoweringContext PostIncrement(Context);
+      Inst *CurInstr = iteratorToInst(Context.getCur());
+      if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
+        SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
+        MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
+      }
+    }
+  }
+}
+
 void TargetMIPS32::translateO2() {
   TimerMarker T(TimerStack::TT_O2, Func);
 
@@ -158,6 +188,8 @@
   // https://code.google.com/p/nativeclient/issues/detail?id=4094
   genTargetHelperCalls();
 
+  findMaxStackOutArgsSize();
+
   // Merge Alloca instructions, and lay out the stack.
   static constexpr bool SortAndCombineAllocas = false;
   Func->processAllocas(SortAndCombineAllocas);
@@ -259,6 +291,8 @@
   // TODO: share passes with X86?
   genTargetHelperCalls();
 
+  findMaxStackOutArgsSize();
+
   // Do not merge Alloca instructions, and lay out the stack.
   static constexpr bool SortAndCombineAllocas = false;
   Func->processAllocas(SortAndCombineAllocas);
@@ -632,10 +666,277 @@
 
 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
 
+// Helper function for addProlog().
+//
+// This assumes Arg is an argument passed on the stack. This sets the frame
+// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
+// I64 arg that has been split into Lo and Hi components, it calls itself
+// recursively on the components, taking care to handle Lo first because of the
+// little-endian architecture. Lastly, this function generates an instruction
+// to copy Arg into its assigned register if applicable.
+void TargetMIPS32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
+                                          size_t BasicFrameOffset,
+                                          size_t *InArgsSizeBytes) {
+  const Type Ty = Arg->getType();
+  *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
+
+  if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
+    Variable *const Lo = Arg64On32->getLo();
+    Variable *const Hi = Arg64On32->getHi();
+    finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
+    finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
+    return;
+  }
+  assert(Ty != IceType_i64);
+
+  const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
+  *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
+
+  if (!Arg->hasReg()) {
+    Arg->setStackOffset(ArgStackOffset);
+    return;
+  }
+
+  // If the argument variable has been assigned a register, we need to copy the
+  // value from the stack slot.
+  Variable *Parameter = Func->makeVariable(Ty);
+  Parameter->setMustNotHaveReg();
+  Parameter->setStackOffset(ArgStackOffset);
+  _mov(Arg, Parameter);
+}
+
 void TargetMIPS32::addProlog(CfgNode *Node) {
-  (void)Node;
+  // Stack frame layout:
+  //
+  // +------------------------+
+  // | 1. preserved registers |
+  // +------------------------+
+  // | 2. padding             |
+  // +------------------------+
+  // | 3. global spill area   |
+  // +------------------------+
+  // | 4. padding             |
+  // +------------------------+
+  // | 5. local spill area    |
+  // +------------------------+
+  // | 6. padding             |
+  // +------------------------+
+  // | 7. allocas             |
+  // +------------------------+
+  // | 8. padding             |
+  // +------------------------+
+  // | 9. out args           |
+  // +------------------------+ <--- StackPointer
+  //
+  // The following variables record the size in bytes of the given areas:
+  //  * PreservedRegsSizeBytes: area 1
+  //  * SpillAreaPaddingBytes:  area 2
+  //  * GlobalsSize:            area 3
+  //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
+  //  * LocalsSpillAreaSize:    area 5
+  //  * SpillAreaSizeBytes:     areas 2 - 9
+  //  * maxOutArgsSizeBytes():  area 9
+
+  Context.init(Node);
+  Context.setInsertPoint(Context.getCur());
+
+  SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
+  RegsUsed = SmallBitVector(CalleeSaves.size());
+
+  VarList SortedSpilledVariables;
+
+  size_t GlobalsSize = 0;
+  // If there is a separate locals area, this represents that area. Otherwise
+  // it counts any variable not counted by GlobalsSize.
+  SpillAreaSizeBytes = 0;
+  // If there is a separate locals area, this specifies the alignment for it.
+  uint32_t LocalsSlotsAlignmentBytes = 0;
+  // The entire spill locations area gets aligned to largest natural alignment
+  // of the variables that have a spill slot.
+  uint32_t SpillAreaAlignmentBytes = 0;
+  // For now, we don't have target-specific variables that need special
+  // treatment (no stack-slot-linked SpillVariable type).
+  std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
+    static constexpr bool AssignStackSlot = false;
+    static constexpr bool DontAssignStackSlot = !AssignStackSlot;
+    if (llvm::isa<Variable64On32>(Var)) {
+      return DontAssignStackSlot;
+    }
+    return AssignStackSlot;
+  };
+
+  // Compute the list of spilled variables and bounds for GlobalsSize, etc.
+  getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
+                        &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
+                        &LocalsSlotsAlignmentBytes, TargetVarHook);
+  uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
+  SpillAreaSizeBytes += GlobalsSize;
+
+  PreservedGPRs.reserve(CalleeSaves.size());
+
+  // Consider FP and RA as callee-save / used as needed.
+  if (UsesFramePointer) {
+    if (RegsUsed[RegMIPS32::Reg_FP]) {
+      llvm::report_fatal_error("Frame pointer has been used.");
+    }
+    CalleeSaves[RegMIPS32::Reg_FP] = true;
+    RegsUsed[RegMIPS32::Reg_FP] = true;
+  }
+  if (!MaybeLeafFunc) {
+    CalleeSaves[RegMIPS32::Reg_RA] = true;
+    RegsUsed[RegMIPS32::Reg_RA] = true;
+  }
+
+  // Make two passes over the used registers. The first pass records all the
+  // used registers -- and their aliases. Then, we figure out which GPR
+  // registers should be saved.
+  SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
+  for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
+    if (CalleeSaves[i] && RegsUsed[i]) {
+      ToPreserve |= RegisterAliases[i];
+    }
+  }
+
+  uint32_t NumCallee = 0;
+  size_t PreservedRegsSizeBytes = 0;
+
+  // RegClasses is a tuple of
+  //
+  // <First Register in Class, Last Register in Class, Vector of Save Registers>
+  //
+  // We use this tuple to figure out which register we should save/restore
+  // during
+  // prolog/epilog.
+  using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
+  const RegClassType RegClass = RegClassType(
+      RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_GPR_Last, &PreservedGPRs);
+  const uint32_t FirstRegInClass = std::get<0>(RegClass);
+  const uint32_t LastRegInClass = std::get<1>(RegClass);
+  VarList *const PreservedRegsInClass = std::get<2>(RegClass);
+  for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
+    if (!ToPreserve[Reg]) {
+      continue;
+    }
+    ++NumCallee;
+    Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
+    PreservedRegsSizeBytes +=
+        typeWidthInBytesOnStack(PhysicalRegister->getType());
+    PreservedRegsInClass->push_back(PhysicalRegister);
+  }
+
+  Ctx->statsUpdateRegistersSaved(NumCallee);
+
+  // Align the variables area. SpillAreaPaddingBytes is the size of the region
+  // after the preserved registers and before the spill areas.
+  // LocalsSlotsPaddingBytes is the amount of padding between the globals and
+  // locals area if they are separate.
+  assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
+  (void)MIPS32_STACK_ALIGNMENT_BYTES;
+  assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
+  uint32_t SpillAreaPaddingBytes = 0;
+  uint32_t LocalsSlotsPaddingBytes = 0;
+  alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
+                       GlobalsSize, LocalsSlotsAlignmentBytes,
+                       &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
+  SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
+  uint32_t GlobalsAndSubsequentPaddingSize =
+      GlobalsSize + LocalsSlotsPaddingBytes;
+
+  if (MaybeLeafFunc)
+    MaxOutArgsSizeBytes = 0;
+
+  // Adds the out args space to the stack, and align SP if necessary.
+  uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes;
+
+  // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with
+  // TotalStackSizeBytes once lowerAlloca is implemented and leaf function
+  // information is generated by lowerCall.
+
+  // Generate "addiu sp, sp, -TotalStackSizeBytes"
+  if (TotalStackSizeBytes) {
+    // Use the scratch register if needed to legalize the immediate.
+    Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
+    _addiu(SP, SP, -(TotalStackSizeBytes));
+  }
+
+  Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
+
+  if (!PreservedGPRs.empty()) {
+    uint32_t StackOffset = TotalStackSizeBytes;
+    for (Variable *Var : *PreservedRegsInClass) {
+      Variable *PhysicalRegister = getPhysicalRegister(Var->getRegNum());
+      StackOffset -= typeWidthInBytesOnStack(PhysicalRegister->getType());
+      Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
+      OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
+          Func, IceType_i32, SP,
+          llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
+      _sw(PhysicalRegister, MemoryLocation);
+    }
+  }
+
+  Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
+
+  // Generate "mov FP, SP" if needed.
+  if (UsesFramePointer) {
+    Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
+    _mov(FP, SP);
+    // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
+    Context.insert<InstFakeUse>(FP);
+  }
+
+  // Fill in stack offsets for stack args, and copy args into registers for
+  // those that were register-allocated. Args are pushed right to left, so
+  // Arg[0] is closest to the stack/frame pointer.
+  const VarList &Args = Func->getArgs();
+  size_t InArgsSizeBytes = 0;
+  TargetMIPS32::CallingConv CC;
+  uint32_t ArgNo = 0;
+
+  for (Variable *Arg : Args) {
+    RegNumT DummyReg;
+    const Type Ty = Arg->getType();
+    // Skip arguments passed in registers.
+    if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
+      ArgNo++;
+      continue;
+    } else {
+      finishArgumentLowering(Arg, FP, TotalStackSizeBytes, &InArgsSizeBytes);
+    }
+  }
+
+  // Fill in stack offsets for locals.
+  assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
+                      SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
+                      UsesFramePointer);
+  this->HasComputedFrame = true;
+
+  if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
+    OstreamLocker _(Func->getContext());
+    Ostream &Str = Func->getContext()->getStrDump();
+
+    Str << "Stack layout:\n";
+    uint32_t SPAdjustmentPaddingSize =
+        SpillAreaSizeBytes - LocalsSpillAreaSize -
+        GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
+        MaxOutArgsSizeBytes;
+    Str << " in-args = " << InArgsSizeBytes << " bytes\n"
+        << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
+        << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
+        << " globals spill area = " << GlobalsSize << " bytes\n"
+        << " globals-locals spill areas intermediate padding = "
+        << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
+        << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
+        << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
+
+    Str << "Stack details:\n"
+        << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
+        << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
+        << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
+        << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
+        << " bytes\n"
+        << " is FP based = " << 1 << "\n";
+  }
   return;
-  UnimplementedError(getFlags());
 }
 
 void TargetMIPS32::addEpilog(CfgNode *Node) {
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 4348a7c..cfbaa6f 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -447,6 +447,11 @@
   static Type stackSlotType();
   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
 
+  // Iterates over the CFG and determines the maximum outgoing stack arguments
+  // bytes. This information is later used during addProlog() to pre-allocate
+  // the outargs area
+  void findMaxStackOutArgsSize();
+
   void addProlog(CfgNode *Node) override;
   void addEpilog(CfgNode *Node) override;
 
@@ -457,6 +462,9 @@
   Operand *loOperand(Operand *Operand);
   Operand *hiOperand(Operand *Operand);
 
+  void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
+                              size_t BasicFrameOffset, size_t *InArgsSizeBytes);
+
   Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
 
   /// Helper class that understands the Calling Convention and register
@@ -543,13 +551,18 @@
 
   bool UsesFramePointer = false;
   bool NeedsStackAlignment = false;
+  bool MaybeLeafFunc = true;
+  bool PrologEmitsFixedAllocas = false;
+  uint32_t MaxOutArgsSizeBytes = 0;
   static SmallBitVector TypeToRegisterSet[RCMIPS32_NUM];
   static SmallBitVector TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
   static SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM];
   SmallBitVector RegsUsed;
   VarList PhysicalRegisters[IceType_NUM];
+  VarList PreservedGPRs;
   static constexpr uint32_t CHAR_BITS = 8;
   static constexpr uint32_t INT32_BITS = 32;
+  size_t SpillAreaSizeBytes = 0;
 
 private:
   ENABLE_MAKE_UNIQUE;
diff --git a/tests_lit/llvm2ice_tests/uncond_br.ll b/tests_lit/llvm2ice_tests/uncond_br.ll
index 1e5bcae..bc1d8b8 100644
--- a/tests_lit/llvm2ice_tests/uncond_br.ll
+++ b/tests_lit/llvm2ice_tests/uncond_br.ll
@@ -23,8 +23,8 @@
 }
 
 ; MIPS32-LABEL: uncond1
-; MIPS32:   b  {{[0-9]+}} <.Luncond1$target>
+; MIPS32:   b  {{[0-9a-f]+}} <.Luncond1$target>
 ; MIPS32: <.Luncond1$target>:
 ; MIPS32: li
 ; MIPS32: addu
-; MIPS32: b {{[0-9]+}} <.Luncond1$target>
+; MIPS32: b {{[0-9a-f]+}} <.Luncond1$target>