Subzero. Moves code around in preparations for 64-bit lowering.

Specifically, it moves

lowerArguments
lowerRet
addProlog
addEpilog

from the x86 lowering template to the concrete lowering implementations.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1261383002.
diff --git a/src/IceInstX8664.h b/src/IceInstX8664.h
index 75e3719..2520b75 100644
--- a/src/IceInstX8664.h
+++ b/src/IceInstX8664.h
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file used to house all the X8664 instructions. Subzero has been
-/// modified to use templates for X86 instructions, so all those definitions are
-/// are in IceInstX86Base.h
+/// (Note: x86 instructions are templates, and they are defined in
+///  src/IceInstX86Base.)
 ///
 /// When interacting with the X8664 target (which should only happen in the
 /// X8664 TargetLowering) clients have should use the Ice::X8664::Traits::Insts
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 8adfab2..6724a61 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -89,6 +89,563 @@
 
 } // end of namespace X86Internal
 
+//------------------------------------------------------------------------------
+//     __      ______  __     __  ______  ______  __  __   __  ______
+//    /\ \    /\  __ \/\ \  _ \ \/\  ___\/\  == \/\ \/\ "-.\ \/\  ___\
+//    \ \ \___\ \ \/\ \ \ \/ ".\ \ \  __\\ \  __<\ \ \ \ \-.  \ \ \__ \
+//     \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
+//      \/_____/\/_____/\/_/   \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
+//
+//------------------------------------------------------------------------------
+void TargetX8632::lowerCall(const InstCall *Instr) {
+  // x86-32 calling convention:
+  //
+  // * At the point before the call, the stack must be aligned to 16
+  // bytes.
+  //
+  // * The first four arguments of vector type, regardless of their
+  // position relative to the other arguments in the argument list, are
+  // placed in registers xmm0 - xmm3.
+  //
+  // * Other arguments are pushed onto the stack in right-to-left order,
+  // such that the left-most argument ends up on the top of the stack at
+  // the lowest memory address.
+  //
+  // * Stack arguments of vector type are aligned to start at the next
+  // highest multiple of 16 bytes.  Other stack arguments are aligned to
+  // 4 bytes.
+  //
+  // This intends to match the section "IA-32 Function Calling
+  // Convention" of the document "OS X ABI Function Call Guide" by
+  // Apple.
+  NeedsStackAlignment = true;
+
+  typedef std::vector<Operand *> OperandList;
+  OperandList XmmArgs;
+  OperandList StackArgs, StackArgLocations;
+  uint32_t ParameterAreaSizeBytes = 0;
+
+  // Classify each argument operand according to the location where the
+  // argument is passed.
+  for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
+    Operand *Arg = Instr->getArg(i);
+    Type Ty = Arg->getType();
+    // The PNaCl ABI requires the width of arguments to be at least 32 bits.
+    assert(typeWidthInBytes(Ty) >= 4);
+    if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
+      XmmArgs.push_back(Arg);
+    } else {
+      StackArgs.push_back(Arg);
+      if (isVectorType(Arg->getType())) {
+        ParameterAreaSizeBytes =
+            Traits::applyStackAlignment(ParameterAreaSizeBytes);
+      }
+      Variable *esp =
+          Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+      Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
+      StackArgLocations.push_back(
+          Traits::X86OperandMem::create(Func, Ty, esp, Loc));
+      ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
+    }
+  }
+
+  // Adjust the parameter area so that the stack is aligned.  It is
+  // assumed that the stack is already aligned at the start of the
+  // calling sequence.
+  ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
+
+  // Subtract the appropriate amount for the argument area.  This also
+  // takes care of setting the stack adjustment during emission.
+  //
+  // TODO: If for some reason the call instruction gets dead-code
+  // eliminated after lowering, we would need to ensure that the
+  // pre-call and the post-call esp adjustment get eliminated as well.
+  if (ParameterAreaSizeBytes) {
+    _adjust_stack(ParameterAreaSizeBytes);
+  }
+
+  // Copy arguments that are passed on the stack to the appropriate
+  // stack locations.
+  for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
+    lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
+  }
+
+  // Copy arguments to be passed in registers to the appropriate
+  // registers.
+  // TODO: Investigate the impact of lowering arguments passed in
+  // registers after lowering stack arguments as opposed to the other
+  // way around.  Lowering register arguments after stack arguments may
+  // reduce register pressure.  On the other hand, lowering register
+  // arguments first (before stack arguments) may result in more compact
+  // code, as the memory operand displacements may end up being smaller
+  // before any stack adjustment is done.
+  for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
+    Variable *Reg =
+        legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
+    // Generate a FakeUse of register arguments so that they do not get
+    // dead code eliminated as a result of the FakeKill of scratch
+    // registers after the call.
+    Context.insert(InstFakeUse::create(Func, Reg));
+  }
+  // Generate the call instruction.  Assign its result to a temporary
+  // with high register allocation weight.
+  Variable *Dest = Instr->getDest();
+  // ReturnReg doubles as ReturnRegLo as necessary.
+  Variable *ReturnReg = nullptr;
+  Variable *ReturnRegHi = nullptr;
+  if (Dest) {
+    switch (Dest->getType()) {
+    case IceType_NUM:
+    case IceType_void:
+      llvm::report_fatal_error("Invalid Call dest type");
+      break;
+    case IceType_i1:
+    case IceType_i8:
+    case IceType_i16:
+    case IceType_i32:
+      ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
+      break;
+    case IceType_i64:
+      ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+      ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+      break;
+    case IceType_f32:
+    case IceType_f64:
+      // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
+      // the fstp instruction.
+      break;
+    case IceType_v4i1:
+    case IceType_v8i1:
+    case IceType_v16i1:
+    case IceType_v16i8:
+    case IceType_v8i16:
+    case IceType_v4i32:
+    case IceType_v4f32:
+      ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
+      break;
+    }
+  }
+  Operand *CallTarget = legalize(Instr->getCallTarget());
+  const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
+  if (NeedSandboxing) {
+    if (llvm::isa<Constant>(CallTarget)) {
+      _bundle_lock(InstBundleLock::Opt_AlignToEnd);
+    } else {
+      Variable *CallTargetVar = nullptr;
+      _mov(CallTargetVar, CallTarget);
+      _bundle_lock(InstBundleLock::Opt_AlignToEnd);
+      const SizeT BundleSize =
+          1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
+      _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
+      CallTarget = CallTargetVar;
+    }
+  }
+  Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
+  Context.insert(NewCall);
+  if (NeedSandboxing)
+    _bundle_unlock();
+  if (ReturnRegHi)
+    Context.insert(InstFakeDef::create(Func, ReturnRegHi));
+
+  // Add the appropriate offset to esp.  The call instruction takes care
+  // of resetting the stack offset during emission.
+  if (ParameterAreaSizeBytes) {
+    Variable *esp =
+        Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+    _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
+  }
+
+  // Insert a register-kill pseudo instruction.
+  Context.insert(InstFakeKill::create(Func, NewCall));
+
+  // Generate a FakeUse to keep the call live if necessary.
+  if (Instr->hasSideEffects() && ReturnReg) {
+    Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
+    Context.insert(FakeUse);
+  }
+
+  if (!Dest)
+    return;
+
+  // Assign the result of the call to Dest.
+  if (ReturnReg) {
+    if (ReturnRegHi) {
+      assert(Dest->getType() == IceType_i64);
+      split64(Dest);
+      Variable *DestLo = Dest->getLo();
+      Variable *DestHi = Dest->getHi();
+      _mov(DestLo, ReturnReg);
+      _mov(DestHi, ReturnRegHi);
+    } else {
+      assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
+             Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
+             isVectorType(Dest->getType()));
+      if (isVectorType(Dest->getType())) {
+        _movp(Dest, ReturnReg);
+      } else {
+        _mov(Dest, ReturnReg);
+      }
+    }
+  } else if (isScalarFloatingType(Dest->getType())) {
+    // Special treatment for an FP function which returns its result in
+    // st(0).
+    // If Dest ends up being a physical xmm register, the fstp emit code
+    // will route st(0) through a temporary stack slot.
+    _fstp(Dest);
+    // Create a fake use of Dest in case it actually isn't used,
+    // because st(0) still needs to be popped.
+    Context.insert(InstFakeUse::create(Func, Dest));
+  }
+}
+
+void TargetX8632::lowerArguments() {
+  VarList &Args = Func->getArgs();
+  // The first four arguments of vector type, regardless of their
+  // position relative to the other arguments in the argument list, are
+  // passed in registers xmm0 - xmm3.
+  unsigned NumXmmArgs = 0;
+
+  Context.init(Func->getEntryNode());
+  Context.setInsertPoint(Context.getCur());
+
+  for (SizeT I = 0, E = Args.size();
+       I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
+    Variable *Arg = Args[I];
+    Type Ty = Arg->getType();
+    if (!isVectorType(Ty))
+      continue;
+    // Replace Arg in the argument list with the home register.  Then
+    // generate an instruction in the prolog to copy the home register
+    // to the assigned location of Arg.
+    int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
+    ++NumXmmArgs;
+    Variable *RegisterArg = Func->makeVariable(Ty);
+    if (BuildDefs::dump())
+      RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
+    RegisterArg->setRegNum(RegNum);
+    RegisterArg->setIsArg();
+    Arg->setIsArg(false);
+
+    Args[I] = RegisterArg;
+    Context.insert(InstAssign::create(Func, Arg, RegisterArg));
+  }
+}
+
+void TargetX8632::lowerRet(const InstRet *Inst) {
+  Variable *Reg = nullptr;
+  if (Inst->hasRetValue()) {
+    Operand *Src0 = legalize(Inst->getRetValue());
+    // TODO(jpp): this is not needed.
+    if (Src0->getType() == IceType_i64) {
+      Variable *eax =
+          legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
+      Variable *edx =
+          legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
+      Reg = eax;
+      Context.insert(InstFakeUse::create(Func, edx));
+    } else if (isScalarFloatingType(Src0->getType())) {
+      _fld(Src0);
+    } else if (isVectorType(Src0->getType())) {
+      Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
+    } else {
+      _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
+    }
+  }
+  // Add a ret instruction even if sandboxing is enabled, because
+  // addEpilog explicitly looks for a ret instruction as a marker for
+  // where to insert the frame removal instructions.
+  _ret(Reg);
+  // Add a fake use of esp to make sure esp stays alive for the entire
+  // function.  Otherwise post-call esp adjustments get dead-code
+  // eliminated.  TODO: Are there more places where the fake use
+  // should be inserted?  E.g. "void f(int n){while(1) g(n);}" may not
+  // have a ret instruction.
+  Variable *esp =
+      Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+  Context.insert(InstFakeUse::create(Func, esp));
+}
+
+void TargetX8632::addProlog(CfgNode *Node) {
+  // Stack frame layout:
+  //
+  // +------------------------+
+  // | 1. return address      |
+  // +------------------------+
+  // | 2. preserved registers |
+  // +------------------------+
+  // | 3. padding             |
+  // +------------------------+
+  // | 4. global spill area   |
+  // +------------------------+
+  // | 5. padding             |
+  // +------------------------+
+  // | 6. local spill area    |
+  // +------------------------+
+  // | 7. padding             |
+  // +------------------------+
+  // | 8. allocas             |
+  // +------------------------+
+  //
+  // The following variables record the size in bytes of the given areas:
+  //  * X86_RET_IP_SIZE_BYTES:  area 1
+  //  * PreservedRegsSizeBytes: area 2
+  //  * SpillAreaPaddingBytes:  area 3
+  //  * GlobalsSize:            area 4
+  //  * GlobalsAndSubsequentPaddingSize: areas 4 - 5
+  //  * LocalsSpillAreaSize:    area 6
+  //  * SpillAreaSizeBytes:     areas 3 - 7
+
+  // Determine stack frame offsets for each Variable without a
+  // register assignment.  This can be done as one variable per stack
+  // slot.  Or, do coalescing by running the register allocator again
+  // with an infinite set of registers (as a side effect, this gives
+  // variables a second chance at physical register assignment).
+  //
+  // A middle ground approach is to leverage sparsity and allocate one
+  // block of space on the frame for globals (variables with
+  // multi-block lifetime), and one block to share for locals
+  // (single-block lifetime).
+
+  Context.init(Node);
+  Context.setInsertPoint(Context.getCur());
+
+  llvm::SmallBitVector CalleeSaves =
+      getRegisterSet(RegSet_CalleeSave, RegSet_None);
+  RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
+  VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
+  size_t GlobalsSize = 0;
+  // If there is a separate locals area, this represents that area.
+  // Otherwise it counts any variable not counted by GlobalsSize.
+  SpillAreaSizeBytes = 0;
+  // If there is a separate locals area, this specifies the alignment
+  // for it.
+  uint32_t LocalsSlotsAlignmentBytes = 0;
+  // The entire spill locations area gets aligned to largest natural
+  // alignment of the variables that have a spill slot.
+  uint32_t SpillAreaAlignmentBytes = 0;
+  // A spill slot linked to a variable with a stack slot should reuse
+  // that stack slot.
+  std::function<bool(Variable *)> TargetVarHook =
+      [&VariablesLinkedToSpillSlots](Variable *Var) {
+        if (auto *SpillVar =
+                llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
+          assert(Var->getWeight().isZero());
+          if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
+            VariablesLinkedToSpillSlots.push_back(Var);
+            return true;
+          }
+        }
+        return false;
+      };
+
+  // Compute the list of spilled variables and bounds for GlobalsSize, etc.
+  getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
+                        &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
+                        &LocalsSlotsAlignmentBytes, TargetVarHook);
+  uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
+  SpillAreaSizeBytes += GlobalsSize;
+
+  // Add push instructions for preserved registers.
+  uint32_t NumCallee = 0;
+  size_t PreservedRegsSizeBytes = 0;
+  for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
+    if (CalleeSaves[i] && RegsUsed[i]) {
+      ++NumCallee;
+      PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
+      _push(getPhysicalRegister(i));
+    }
+  }
+  Ctx->statsUpdateRegistersSaved(NumCallee);
+
+  // Generate "push ebp; mov ebp, esp"
+  if (IsEbpBasedFrame) {
+    assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
+               .count() == 0);
+    PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
+    Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
+    Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+    _push(ebp);
+    _mov(ebp, esp);
+    // Keep ebp live for late-stage liveness analysis
+    // (e.g. asm-verbose mode).
+    Context.insert(InstFakeUse::create(Func, ebp));
+  }
+
+  // Align the variables area. SpillAreaPaddingBytes is the size of
+  // the region after the preserved registers and before the spill areas.
+  // LocalsSlotsPaddingBytes is the amount of padding between the globals
+  // and locals area if they are separate.
+  assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
+  assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
+  uint32_t SpillAreaPaddingBytes = 0;
+  uint32_t LocalsSlotsPaddingBytes = 0;
+  alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
+                       SpillAreaAlignmentBytes, GlobalsSize,
+                       LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
+                       &LocalsSlotsPaddingBytes);
+  SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
+  uint32_t GlobalsAndSubsequentPaddingSize =
+      GlobalsSize + LocalsSlotsPaddingBytes;
+
+  // Align esp if necessary.
+  if (NeedsStackAlignment) {
+    uint32_t StackOffset =
+        Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
+    uint32_t StackSize =
+        Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
+    SpillAreaSizeBytes = StackSize - StackOffset;
+  }
+
+  // Generate "sub esp, SpillAreaSizeBytes"
+  if (SpillAreaSizeBytes)
+    _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
+         Ctx->getConstantInt32(SpillAreaSizeBytes));
+  Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
+
+  resetStackAdjustment();
+
+  // Fill in stack offsets for stack args, and copy args into registers
+  // for those that were register-allocated.  Args are pushed right to
+  // left, so Arg[0] is closest to the stack/frame pointer.
+  Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
+  size_t BasicFrameOffset =
+      PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
+  if (!IsEbpBasedFrame)
+    BasicFrameOffset += SpillAreaSizeBytes;
+
+  const VarList &Args = Func->getArgs();
+  size_t InArgsSizeBytes = 0;
+  unsigned NumXmmArgs = 0;
+  for (Variable *Arg : Args) {
+    // Skip arguments passed in registers.
+    if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
+      ++NumXmmArgs;
+      continue;
+    }
+    finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
+  }
+
+  // Fill in stack offsets for locals.
+  assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
+                      SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
+                      IsEbpBasedFrame);
+  // Assign stack offsets to variables that have been linked to spilled
+  // variables.
+  for (Variable *Var : VariablesLinkedToSpillSlots) {
+    Variable *Linked =
+        (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
+    Var->setStackOffset(Linked->getStackOffset());
+  }
+  this->HasComputedFrame = true;
+
+  if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
+    OstreamLocker L(Func->getContext());
+    Ostream &Str = Func->getContext()->getStrDump();
+
+    Str << "Stack layout:\n";
+    uint32_t EspAdjustmentPaddingSize =
+        SpillAreaSizeBytes - LocalsSpillAreaSize -
+        GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
+    Str << " in-args = " << InArgsSizeBytes << " bytes\n"
+        << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
+        << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
+        << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
+        << " globals spill area = " << GlobalsSize << " bytes\n"
+        << " globals-locals spill areas intermediate padding = "
+        << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
+        << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
+        << " esp alignment padding = " << EspAdjustmentPaddingSize
+        << " bytes\n";
+
+    Str << "Stack details:\n"
+        << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
+        << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
+        << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
+        << " bytes\n"
+        << " is ebp based = " << IsEbpBasedFrame << "\n";
+  }
+}
+
+void TargetX8632::addEpilog(CfgNode *Node) {
+  InstList &Insts = Node->getInsts();
+  InstList::reverse_iterator RI, E;
+  for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
+    if (llvm::isa<typename Traits::Insts::Ret>(*RI))
+      break;
+  }
+  if (RI == E)
+    return;
+
+  // Convert the reverse_iterator position into its corresponding
+  // (forward) iterator position.
+  InstList::iterator InsertPoint = RI.base();
+  --InsertPoint;
+  Context.init(Node);
+  Context.setInsertPoint(InsertPoint);
+
+  Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+  if (IsEbpBasedFrame) {
+    Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
+    // For late-stage liveness analysis (e.g. asm-verbose mode),
+    // adding a fake use of esp before the assignment of esp=ebp keeps
+    // previous esp adjustments from being dead-code eliminated.
+    Context.insert(InstFakeUse::create(Func, esp));
+    _mov(esp, ebp);
+    _pop(ebp);
+  } else {
+    // add esp, SpillAreaSizeBytes
+    if (SpillAreaSizeBytes)
+      _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
+  }
+
+  // Add pop instructions for preserved registers.
+  llvm::SmallBitVector CalleeSaves =
+      getRegisterSet(RegSet_CalleeSave, RegSet_None);
+  for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
+    SizeT j = CalleeSaves.size() - i - 1;
+    if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
+      continue;
+    if (CalleeSaves[j] && RegsUsed[j]) {
+      _pop(getPhysicalRegister(j));
+    }
+  }
+
+  if (!Ctx->getFlags().getUseSandboxing())
+    return;
+  // Change the original ret instruction into a sandboxed return sequence.
+  // t:ecx = pop
+  // bundle_lock
+  // and t, ~31
+  // jmp *t
+  // bundle_unlock
+  // FakeUse <original_ret_operand>
+  Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
+  _pop(T_ecx);
+  lowerIndirectJump(T_ecx);
+  if (RI->getSrcSize()) {
+    Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
+    Context.insert(InstFakeUse::create(Func, RetValue));
+  }
+  RI->setDeleted();
+}
+
+void TargetX8632::emitJumpTable(const Cfg *Func,
+                                const InstJumpTable *JumpTable) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Ctx->getStrEmit();
+  IceString MangledName = Ctx->mangleName(Func->getFunctionName());
+  Str << "\t.section\t.rodata." << MangledName
+      << "$jumptable,\"a\",@progbits\n";
+  Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
+  Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
+
+  // On X8632 pointers are 32-bit hence the use of .long
+  for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
+    Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
+  Str << "\n";
+}
+
 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
     : TargetDataLowering(Ctx) {}
 
@@ -159,23 +716,6 @@
 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
 } // end of anonymous namespace
 
-void TargetX8632::emitJumpTable(const Cfg *Func,
-                                const InstJumpTable *JumpTable) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Ctx->getStrEmit();
-  IceString MangledName = Ctx->mangleName(Func->getFunctionName());
-  Str << "\t.section\t.rodata." << MangledName
-      << "$jumptable,\"a\",@progbits\n";
-  Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
-  Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
-
-  // On X8632 pointers are 32-bit hence the use of .long
-  for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
-    Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
-  Str << "\n";
-}
-
 template <typename T>
 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
   if (!BuildDefs::dump())
@@ -407,214 +947,4 @@
 } // end of namespace dummy3
 } // end of anonymous namespace
 
-//------------------------------------------------------------------------------
-//     __      ______  __     __  ______  ______  __  __   __  ______
-//    /\ \    /\  __ \/\ \  _ \ \/\  ___\/\  == \/\ \/\ "-.\ \/\  ___\
-//    \ \ \___\ \ \/\ \ \ \/ ".\ \ \  __\\ \  __<\ \ \ \ \-.  \ \ \__ \
-//     \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
-//      \/_____/\/_____/\/_/   \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
-//
-//------------------------------------------------------------------------------
-void TargetX8632::lowerCall(const InstCall *Instr) {
-  // x86-32 calling convention:
-  //
-  // * At the point before the call, the stack must be aligned to 16
-  // bytes.
-  //
-  // * The first four arguments of vector type, regardless of their
-  // position relative to the other arguments in the argument list, are
-  // placed in registers xmm0 - xmm3.
-  //
-  // * Other arguments are pushed onto the stack in right-to-left order,
-  // such that the left-most argument ends up on the top of the stack at
-  // the lowest memory address.
-  //
-  // * Stack arguments of vector type are aligned to start at the next
-  // highest multiple of 16 bytes.  Other stack arguments are aligned to
-  // 4 bytes.
-  //
-  // This intends to match the section "IA-32 Function Calling
-  // Convention" of the document "OS X ABI Function Call Guide" by
-  // Apple.
-  NeedsStackAlignment = true;
-
-  typedef std::vector<Operand *> OperandList;
-  OperandList XmmArgs;
-  OperandList StackArgs, StackArgLocations;
-  uint32_t ParameterAreaSizeBytes = 0;
-
-  // Classify each argument operand according to the location where the
-  // argument is passed.
-  for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
-    Operand *Arg = Instr->getArg(i);
-    Type Ty = Arg->getType();
-    // The PNaCl ABI requires the width of arguments to be at least 32 bits.
-    assert(typeWidthInBytes(Ty) >= 4);
-    if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
-      XmmArgs.push_back(Arg);
-    } else {
-      StackArgs.push_back(Arg);
-      if (isVectorType(Arg->getType())) {
-        ParameterAreaSizeBytes =
-            Traits::applyStackAlignment(ParameterAreaSizeBytes);
-      }
-      Variable *esp =
-          Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
-      Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
-      StackArgLocations.push_back(
-          Traits::X86OperandMem::create(Func, Ty, esp, Loc));
-      ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
-    }
-  }
-
-  // Adjust the parameter area so that the stack is aligned.  It is
-  // assumed that the stack is already aligned at the start of the
-  // calling sequence.
-  ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
-
-  // Subtract the appropriate amount for the argument area.  This also
-  // takes care of setting the stack adjustment during emission.
-  //
-  // TODO: If for some reason the call instruction gets dead-code
-  // eliminated after lowering, we would need to ensure that the
-  // pre-call and the post-call esp adjustment get eliminated as well.
-  if (ParameterAreaSizeBytes) {
-    _adjust_stack(ParameterAreaSizeBytes);
-  }
-
-  // Copy arguments that are passed on the stack to the appropriate
-  // stack locations.
-  for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
-    lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
-  }
-
-  // Copy arguments to be passed in registers to the appropriate
-  // registers.
-  // TODO: Investigate the impact of lowering arguments passed in
-  // registers after lowering stack arguments as opposed to the other
-  // way around.  Lowering register arguments after stack arguments may
-  // reduce register pressure.  On the other hand, lowering register
-  // arguments first (before stack arguments) may result in more compact
-  // code, as the memory operand displacements may end up being smaller
-  // before any stack adjustment is done.
-  for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
-    Variable *Reg =
-        legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
-    // Generate a FakeUse of register arguments so that they do not get
-    // dead code eliminated as a result of the FakeKill of scratch
-    // registers after the call.
-    Context.insert(InstFakeUse::create(Func, Reg));
-  }
-  // Generate the call instruction.  Assign its result to a temporary
-  // with high register allocation weight.
-  Variable *Dest = Instr->getDest();
-  // ReturnReg doubles as ReturnRegLo as necessary.
-  Variable *ReturnReg = nullptr;
-  Variable *ReturnRegHi = nullptr;
-  if (Dest) {
-    switch (Dest->getType()) {
-    case IceType_NUM:
-      llvm_unreachable("Invalid Call dest type");
-      break;
-    case IceType_void:
-      break;
-    case IceType_i1:
-    case IceType_i8:
-    case IceType_i16:
-    case IceType_i32:
-      ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
-      break;
-    case IceType_i64:
-      ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
-      ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
-      break;
-    case IceType_f32:
-    case IceType_f64:
-      // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
-      // the fstp instruction.
-      break;
-    case IceType_v4i1:
-    case IceType_v8i1:
-    case IceType_v16i1:
-    case IceType_v16i8:
-    case IceType_v8i16:
-    case IceType_v4i32:
-    case IceType_v4f32:
-      ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
-      break;
-    }
-  }
-  Operand *CallTarget = legalize(Instr->getCallTarget());
-  const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
-  if (NeedSandboxing) {
-    if (llvm::isa<Constant>(CallTarget)) {
-      _bundle_lock(InstBundleLock::Opt_AlignToEnd);
-    } else {
-      Variable *CallTargetVar = nullptr;
-      _mov(CallTargetVar, CallTarget);
-      _bundle_lock(InstBundleLock::Opt_AlignToEnd);
-      const SizeT BundleSize =
-          1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
-      _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
-      CallTarget = CallTargetVar;
-    }
-  }
-  Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
-  Context.insert(NewCall);
-  if (NeedSandboxing)
-    _bundle_unlock();
-  if (ReturnRegHi)
-    Context.insert(InstFakeDef::create(Func, ReturnRegHi));
-
-  // Add the appropriate offset to esp.  The call instruction takes care
-  // of resetting the stack offset during emission.
-  if (ParameterAreaSizeBytes) {
-    Variable *esp =
-        Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
-    _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
-  }
-
-  // Insert a register-kill pseudo instruction.
-  Context.insert(InstFakeKill::create(Func, NewCall));
-
-  // Generate a FakeUse to keep the call live if necessary.
-  if (Instr->hasSideEffects() && ReturnReg) {
-    Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
-    Context.insert(FakeUse);
-  }
-
-  if (!Dest)
-    return;
-
-  // Assign the result of the call to Dest.
-  if (ReturnReg) {
-    if (ReturnRegHi) {
-      assert(Dest->getType() == IceType_i64);
-      split64(Dest);
-      Variable *DestLo = Dest->getLo();
-      Variable *DestHi = Dest->getHi();
-      _mov(DestLo, ReturnReg);
-      _mov(DestHi, ReturnRegHi);
-    } else {
-      assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
-             Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
-             isVectorType(Dest->getType()));
-      if (isVectorType(Dest->getType())) {
-        _movp(Dest, ReturnReg);
-      } else {
-        _mov(Dest, ReturnReg);
-      }
-    }
-  } else if (isScalarFloatingType(Dest->getType())) {
-    // Special treatment for an FP function which returns its result in
-    // st(0).
-    // If Dest ends up being a physical xmm register, the fstp emit code
-    // will route st(0) through a temporary stack slot.
-    _fstp(Dest);
-    // Create a fake use of Dest in case it actually isn't used,
-    // because st(0) still needs to be popped.
-    Context.insert(InstFakeUse::create(Func, Dest));
-  }
-}
-
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index d086135..6187809 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -43,6 +43,10 @@
 
 protected:
   void lowerCall(const InstCall *Instr) override;
+  void lowerArguments() override;
+  void lowerRet(const InstRet *Inst) override;
+  void addProlog(CfgNode *Node) override;
+  void addEpilog(CfgNode *Node) override;
 
 private:
   friend class ::Ice::X86Internal::TargetX86Base<TargetX8632>;
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index f5d4ead..9056648 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -243,9 +243,8 @@
   if (Dest) {
     switch (Dest->getType()) {
     case IceType_NUM:
-      llvm_unreachable("Invalid Call dest type");
-      break;
     case IceType_void:
+      llvm::report_fatal_error("Invalid Call dest type");
       break;
     case IceType_i1:
     case IceType_i8:
@@ -339,36 +338,383 @@
   }
 }
 
-void TargetDataX8664::lowerJumpTables() {
-  switch (Ctx->getFlags().getOutFileType()) {
-  case FT_Elf: {
-    ELFObjectWriter *Writer = Ctx->getObjectWriter();
-    for (const JumpTableData &JumpTable : Ctx->getJumpTables())
-      // TODO(jpp): not 386.
-      Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
-  } break;
-  case FT_Asm:
-    // Already emitted from Cfg
-    break;
-  case FT_Iasm: {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Ctx->getStrEmit();
-    for (const JumpTableData &JT : Ctx->getJumpTables()) {
-      Str << "\t.section\t.rodata." << JT.getFunctionName()
-          << "$jumptable,\"a\",@progbits\n";
-      Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
-      Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
+void TargetX8664::lowerArguments() {
+  VarList &Args = Func->getArgs();
+  // The first eight vetcor typed arguments (as well as fp arguments) are passed
+  // in %xmm0 through %xmm7 regardless of their position in the argument list.
+  unsigned NumXmmArgs = 0;
+  // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, %rcx,
+  // %r8, and %r9 regardless of their position in the argument list.
+  unsigned NumGprArgs = 0;
 
-      // On X8664 ILP32 pointers are 32-bit hence the use of .long
-      for (intptr_t TargetOffset : JT.getTargetOffsets())
-        Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
-      Str << "\n";
+  Context.init(Func->getEntryNode());
+  Context.setInsertPoint(Context.getCur());
+
+  for (SizeT i = 0, End = Args.size();
+       i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS ||
+                   NumGprArgs < Traits::X86_MAX_XMM_ARGS);
+       ++i) {
+    Variable *Arg = Args[i];
+    Type Ty = Arg->getType();
+    if ((isVectorType(Ty) || isScalarFloatingType(Ty)) &&
+        NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
+      // Replace Arg in the argument list with the home register.  Then
+      // generate an instruction in the prolog to copy the home register
+      // to the assigned location of Arg.
+      int32_t RegNum = getRegisterForXmmArgNum(NumXmmArgs);
+      ++NumXmmArgs;
+      Variable *RegisterArg = Func->makeVariable(Ty);
+      if (BuildDefs::dump())
+        RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
+      RegisterArg->setRegNum(RegNum);
+      RegisterArg->setIsArg();
+      Arg->setIsArg(false);
+
+      Args[i] = RegisterArg;
+      Context.insert(InstAssign::create(Func, Arg, RegisterArg));
+    } else if (isScalarIntegerType(Ty) &&
+               NumGprArgs < Traits::X86_MAX_GPR_ARGS) {
+      int32_t RegNum = getRegisterForGprArgNum(NumGprArgs);
+      ++NumGprArgs;
+      Variable *RegisterArg = Func->makeVariable(Ty);
+      if (BuildDefs::dump())
+        RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
+      RegisterArg->setRegNum(RegNum);
+      RegisterArg->setIsArg();
+      Arg->setIsArg(false);
+
+      Args[i] = RegisterArg;
+      Context.insert(InstAssign::create(Func, Arg, RegisterArg));
     }
-  } break;
   }
 }
 
+void TargetX8664::lowerRet(const InstRet *Inst) {
+  Variable *Reg = nullptr;
+  if (Inst->hasRetValue()) {
+    Operand *Src0 = legalize(Inst->getRetValue());
+    // TODO(jpp): this is not needed.
+    if (Src0->getType() == IceType_i64) {
+      Variable *eax =
+          legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
+      Variable *edx =
+          legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
+      Reg = eax;
+      Context.insert(InstFakeUse::create(Func, edx));
+    } else if (isScalarFloatingType(Src0->getType())) {
+      _fld(Src0);
+    } else if (isVectorType(Src0->getType())) {
+      Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
+    } else {
+      _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
+    }
+  }
+  // Add a ret instruction even if sandboxing is enabled, because
+  // addEpilog explicitly looks for a ret instruction as a marker for
+  // where to insert the frame removal instructions.
+  _ret(Reg);
+  // Add a fake use of esp to make sure esp stays alive for the entire
+  // function.  Otherwise post-call esp adjustments get dead-code
+  // eliminated.  TODO: Are there more places where the fake use
+  // should be inserted?  E.g. "void f(int n){while(1) g(n);}" may not
+  // have a ret instruction.
+  Variable *esp =
+      Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+  Context.insert(InstFakeUse::create(Func, esp));
+}
+
+void TargetX8664::addProlog(CfgNode *Node) {
+  // Stack frame layout:
+  //
+  // +------------------------+
+  // | 1. return address      |
+  // +------------------------+
+  // | 2. preserved registers |
+  // +------------------------+
+  // | 3. padding             |
+  // +------------------------+
+  // | 4. global spill area   |
+  // +------------------------+
+  // | 5. padding             |
+  // +------------------------+
+  // | 6. local spill area    |
+  // +------------------------+
+  // | 7. padding             |
+  // +------------------------+
+  // | 8. allocas             |
+  // +------------------------+
+  //
+  // The following variables record the size in bytes of the given areas:
+  //  * X86_RET_IP_SIZE_BYTES:  area 1
+  //  * PreservedRegsSizeBytes: area 2
+  //  * SpillAreaPaddingBytes:  area 3
+  //  * GlobalsSize:            area 4
+  //  * GlobalsAndSubsequentPaddingSize: areas 4 - 5
+  //  * LocalsSpillAreaSize:    area 6
+  //  * SpillAreaSizeBytes:     areas 3 - 7
+
+  // Determine stack frame offsets for each Variable without a
+  // register assignment.  This can be done as one variable per stack
+  // slot.  Or, do coalescing by running the register allocator again
+  // with an infinite set of registers (as a side effect, this gives
+  // variables a second chance at physical register assignment).
+  //
+  // A middle ground approach is to leverage sparsity and allocate one
+  // block of space on the frame for globals (variables with
+  // multi-block lifetime), and one block to share for locals
+  // (single-block lifetime).
+
+  Context.init(Node);
+  Context.setInsertPoint(Context.getCur());
+
+  llvm::SmallBitVector CalleeSaves =
+      getRegisterSet(RegSet_CalleeSave, RegSet_None);
+  RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
+  VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
+  size_t GlobalsSize = 0;
+  // If there is a separate locals area, this represents that area.
+  // Otherwise it counts any variable not counted by GlobalsSize.
+  SpillAreaSizeBytes = 0;
+  // If there is a separate locals area, this specifies the alignment
+  // for it.
+  uint32_t LocalsSlotsAlignmentBytes = 0;
+  // The entire spill locations area gets aligned to largest natural
+  // alignment of the variables that have a spill slot.
+  uint32_t SpillAreaAlignmentBytes = 0;
+  // A spill slot linked to a variable with a stack slot should reuse
+  // that stack slot.
+  std::function<bool(Variable *)> TargetVarHook =
+      [&VariablesLinkedToSpillSlots](Variable *Var) {
+        if (auto *SpillVar =
+                llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
+          assert(Var->getWeight().isZero());
+          if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
+            VariablesLinkedToSpillSlots.push_back(Var);
+            return true;
+          }
+        }
+        return false;
+      };
+
+  // Compute the list of spilled variables and bounds for GlobalsSize, etc.
+  getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
+                        &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
+                        &LocalsSlotsAlignmentBytes, TargetVarHook);
+  uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
+  SpillAreaSizeBytes += GlobalsSize;
+
+  // Add push instructions for preserved registers.
+  uint32_t NumCallee = 0;
+  size_t PreservedRegsSizeBytes = 0;
+  for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
+    if (CalleeSaves[i] && RegsUsed[i]) {
+      ++NumCallee;
+      PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);
+      _push(getPhysicalRegister(i));
+    }
+  }
+  Ctx->statsUpdateRegistersSaved(NumCallee);
+
+  // Generate "push ebp; mov ebp, esp"
+  if (IsEbpBasedFrame) {
+    assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
+               .count() == 0);
+    PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);
+    Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
+    Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+    _push(ebp);
+    _mov(ebp, esp);
+    // Keep ebp live for late-stage liveness analysis
+    // (e.g. asm-verbose mode).
+    Context.insert(InstFakeUse::create(Func, ebp));
+  }
+
+  // Align the variables area. SpillAreaPaddingBytes is the size of
+  // the region after the preserved registers and before the spill areas.
+  // LocalsSlotsPaddingBytes is the amount of padding between the globals
+  // and locals area if they are separate.
+  assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
+  assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
+  uint32_t SpillAreaPaddingBytes = 0;
+  uint32_t LocalsSlotsPaddingBytes = 0;
+  alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
+                       SpillAreaAlignmentBytes, GlobalsSize,
+                       LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
+                       &LocalsSlotsPaddingBytes);
+  SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
+  uint32_t GlobalsAndSubsequentPaddingSize =
+      GlobalsSize + LocalsSlotsPaddingBytes;
+
+  // Align esp if necessary.
+  if (NeedsStackAlignment) {
+    uint32_t StackOffset =
+        Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
+    uint32_t StackSize =
+        Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
+    SpillAreaSizeBytes = StackSize - StackOffset;
+  }
+
+  // Generate "sub esp, SpillAreaSizeBytes"
+  if (SpillAreaSizeBytes)
+    _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
+         Ctx->getConstantInt32(SpillAreaSizeBytes));
+  Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
+
+  resetStackAdjustment();
+
+  // Fill in stack offsets for stack args, and copy args into registers
+  // for those that were register-allocated.  Args are pushed right to
+  // left, so Arg[0] is closest to the stack/frame pointer.
+  Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
+  size_t BasicFrameOffset =
+      PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
+  if (!IsEbpBasedFrame)
+    BasicFrameOffset += SpillAreaSizeBytes;
+
+  const VarList &Args = Func->getArgs();
+  size_t InArgsSizeBytes = 0;
+  unsigned NumXmmArgs = 0;
+  unsigned NumGPRArgs = 0;
+  for (Variable *Arg : Args) {
+    // Skip arguments passed in registers.
+    if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
+      ++NumXmmArgs;
+      continue;
+    }
+    if (isScalarFloatingType(Arg->getType()) &&
+        NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
+      ++NumXmmArgs;
+      continue;
+    }
+    if (isScalarIntegerType(Arg->getType()) &&
+        NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
+      ++NumGPRArgs;
+      continue;
+    }
+    finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
+  }
+
+  // Fill in stack offsets for locals.
+  assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
+                      SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
+                      IsEbpBasedFrame);
+  // Assign stack offsets to variables that have been linked to spilled
+  // variables.
+  for (Variable *Var : VariablesLinkedToSpillSlots) {
+    Variable *Linked =
+        (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
+    Var->setStackOffset(Linked->getStackOffset());
+  }
+  this->HasComputedFrame = true;
+
+  if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
+    OstreamLocker L(Func->getContext());
+    Ostream &Str = Func->getContext()->getStrDump();
+
+    Str << "Stack layout:\n";
+    uint32_t EspAdjustmentPaddingSize =
+        SpillAreaSizeBytes - LocalsSpillAreaSize -
+        GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
+    Str << " in-args = " << InArgsSizeBytes << " bytes\n"
+        << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
+        << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
+        << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
+        << " globals spill area = " << GlobalsSize << " bytes\n"
+        << " globals-locals spill areas intermediate padding = "
+        << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
+        << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
+        << " esp alignment padding = " << EspAdjustmentPaddingSize
+        << " bytes\n";
+
+    Str << "Stack details:\n"
+        << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
+        << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
+        << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
+        << " bytes\n"
+        << " is ebp based = " << IsEbpBasedFrame << "\n";
+  }
+}
+
+void TargetX8664::addEpilog(CfgNode *Node) {
+  InstList &Insts = Node->getInsts();
+  InstList::reverse_iterator RI, E;
+  for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
+    if (llvm::isa<typename Traits::Insts::Ret>(*RI))
+      break;
+  }
+  if (RI == E)
+    return;
+
+  // Convert the reverse_iterator position into its corresponding
+  // (forward) iterator position.
+  InstList::iterator InsertPoint = RI.base();
+  --InsertPoint;
+  Context.init(Node);
+  Context.setInsertPoint(InsertPoint);
+
+  Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+  if (IsEbpBasedFrame) {
+    Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
+    // For late-stage liveness analysis (e.g. asm-verbose mode),
+    // adding a fake use of esp before the assignment of esp=ebp keeps
+    // previous esp adjustments from being dead-code eliminated.
+    Context.insert(InstFakeUse::create(Func, esp));
+    _mov(esp, ebp);
+    _pop(ebp);
+  } else {
+    // add esp, SpillAreaSizeBytes
+    if (SpillAreaSizeBytes)
+      _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
+  }
+
+  // Add pop instructions for preserved registers.
+  llvm::SmallBitVector CalleeSaves =
+      getRegisterSet(RegSet_CalleeSave, RegSet_None);
+  for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
+    SizeT j = CalleeSaves.size() - i - 1;
+    if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
+      continue;
+    if (CalleeSaves[j] && RegsUsed[j]) {
+      _pop(getPhysicalRegister(j));
+    }
+  }
+
+  if (!Ctx->getFlags().getUseSandboxing())
+    return;
+  // Change the original ret instruction into a sandboxed return sequence.
+  // t:ecx = pop
+  // bundle_lock
+  // and t, ~31
+  // jmp *t
+  // bundle_unlock
+  // FakeUse <original_ret_operand>
+  Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
+  _pop(T_ecx);
+  lowerIndirectJump(T_ecx);
+  if (RI->getSrcSize()) {
+    Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
+    Context.insert(InstFakeUse::create(Func, RetValue));
+  }
+  RI->setDeleted();
+}
+
+void TargetX8664::emitJumpTable(const Cfg *Func,
+                                const InstJumpTable *JumpTable) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Ctx->getStrEmit();
+  IceString MangledName = Ctx->mangleName(Func->getFunctionName());
+  Str << "\t.section\t.rodata." << MangledName
+      << "$jumptable,\"a\",@progbits\n";
+  Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
+  Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
+
+  // On X8664 ILP32 pointers are 32-bit hence the use of .long
+  for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
+    Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
+  Str << "\n";
+}
+
 namespace {
 template <typename T> struct PoolTypeConverter {};
 
@@ -507,21 +853,34 @@
   }
 }
 
-void TargetX8664::emitJumpTable(const Cfg *Func,
-                                const InstJumpTable *JumpTable) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Ctx->getStrEmit();
-  IceString MangledName = Ctx->mangleName(Func->getFunctionName());
-  Str << "\t.section\t.rodata." << MangledName
-      << "$jumptable,\"a\",@progbits\n";
-  Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
-  Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
+void TargetDataX8664::lowerJumpTables() {
+  switch (Ctx->getFlags().getOutFileType()) {
+  case FT_Elf: {
+    ELFObjectWriter *Writer = Ctx->getObjectWriter();
+    for (const JumpTableData &JumpTable : Ctx->getJumpTables())
+      // TODO(jpp): not 386.
+      Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
+  } break;
+  case FT_Asm:
+    // Already emitted from Cfg
+    break;
+  case FT_Iasm: {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Ctx->getStrEmit();
+    for (const JumpTableData &JT : Ctx->getJumpTables()) {
+      Str << "\t.section\t.rodata." << JT.getFunctionName()
+          << "$jumptable,\"a\",@progbits\n";
+      Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
+      Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
 
-  // On X8664 ILP32 pointers are 32-bit hence the use of .long
-  for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
-    Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
-  Str << "\n";
+      // On X8664 ILP32 pointers are 32-bit hence the use of .long
+      for (intptr_t TargetOffset : JT.getTargetOffsets())
+        Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
+      Str << "\n";
+    }
+  } break;
+  }
 }
 
 void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,
diff --git a/src/IceTargetLoweringX8664.h b/src/IceTargetLoweringX8664.h
index 1e012b5..b26db59 100644
--- a/src/IceTargetLoweringX8664.h
+++ b/src/IceTargetLoweringX8664.h
@@ -40,6 +40,10 @@
 
 protected:
   void lowerCall(const InstCall *Instr) override;
+  void lowerArguments() override;
+  void lowerRet(const InstRet *Inst) override;
+  void addProlog(CfgNode *Node) override;
+  void addEpilog(CfgNode *Node) override;
 
 private:
   friend class ::Ice::X86Internal::TargetX86Base<TargetX8664>;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 1e33a96..dbbb747 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -98,10 +98,7 @@
   void emit(const ConstantFloat *C) const final;
   void emit(const ConstantDouble *C) const final;
 
-  void lowerArguments() override;
   void initNodeForLowering(CfgNode *Node) override;
-  void addProlog(CfgNode *Node) override;
-  void addEpilog(CfgNode *Node) override;
   /// Ensure that a 64-bit Variable has been split into 2 32-bit
   /// Variables, creating them if necessary.  This is needed for all
   /// I64 operations, and it is needed for pushing F64 arguments for
@@ -136,7 +133,6 @@
   void lowerInsertElement(const InstInsertElement *Inst) override;
   void lowerLoad(const InstLoad *Inst) override;
   void lowerPhi(const InstPhi *Inst) override;
-  void lowerRet(const InstRet *Inst) override;
   void lowerSelect(const InstSelect *Inst) override;
   void lowerStore(const InstStore *Inst) override;
   void lowerSwitch(const InstSwitch *Inst) override;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 73c0075..74fa5d7 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -793,39 +793,6 @@
       Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);
 }
 
-template <class Machine> void TargetX86Base<Machine>::lowerArguments() {
-  VarList &Args = Func->getArgs();
-  // The first four arguments of vector type, regardless of their
-  // position relative to the other arguments in the argument list, are
-  // passed in registers xmm0 - xmm3.
-  unsigned NumXmmArgs = 0;
-
-  Context.init(Func->getEntryNode());
-  Context.setInsertPoint(Context.getCur());
-
-  for (SizeT I = 0, E = Args.size();
-       I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
-    Variable *Arg = Args[I];
-    Type Ty = Arg->getType();
-    if (!isVectorType(Ty))
-      continue;
-    // Replace Arg in the argument list with the home register.  Then
-    // generate an instruction in the prolog to copy the home register
-    // to the assigned location of Arg.
-    int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
-    ++NumXmmArgs;
-    Variable *RegisterArg = Func->makeVariable(Ty);
-    if (BuildDefs::dump())
-      RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
-    RegisterArg->setRegNum(RegNum);
-    RegisterArg->setIsArg();
-    Arg->setIsArg(false);
-
-    Args[I] = RegisterArg;
-    Context.insert(InstAssign::create(Func, Arg, RegisterArg));
-  }
-}
-
 /// Helper function for addProlog().
 ///
 /// This assumes Arg is an argument passed on the stack.  This sets the
@@ -844,6 +811,7 @@
   Variable *Hi = Arg->getHi();
   Type Ty = Arg->getType();
   if (Lo && Hi && Ty == IceType_i64) {
+    // TODO(jpp): This special case is not needed for x86-64.
     assert(Lo->getType() != IceType_i64); // don't want infinite recursion
     assert(Hi->getType() != IceType_i64); // don't want infinite recursion
     finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
@@ -872,273 +840,10 @@
 }
 
 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
+  // TODO(jpp): this is wrong for x86-64.
   return IceType_i32;
 }
 
-template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) {
-  // Stack frame layout:
-  //
-  // +------------------------+
-  // | 1. return address      |
-  // +------------------------+
-  // | 2. preserved registers |
-  // +------------------------+
-  // | 3. padding             |
-  // +------------------------+
-  // | 4. global spill area   |
-  // +------------------------+
-  // | 5. padding             |
-  // +------------------------+
-  // | 6. local spill area    |
-  // +------------------------+
-  // | 7. padding             |
-  // +------------------------+
-  // | 8. allocas             |
-  // +------------------------+
-  //
-  // The following variables record the size in bytes of the given areas:
-  //  * X86_RET_IP_SIZE_BYTES:  area 1
-  //  * PreservedRegsSizeBytes: area 2
-  //  * SpillAreaPaddingBytes:  area 3
-  //  * GlobalsSize:            area 4
-  //  * GlobalsAndSubsequentPaddingSize: areas 4 - 5
-  //  * LocalsSpillAreaSize:    area 6
-  //  * SpillAreaSizeBytes:     areas 3 - 7
-
-  // Determine stack frame offsets for each Variable without a
-  // register assignment.  This can be done as one variable per stack
-  // slot.  Or, do coalescing by running the register allocator again
-  // with an infinite set of registers (as a side effect, this gives
-  // variables a second chance at physical register assignment).
-  //
-  // A middle ground approach is to leverage sparsity and allocate one
-  // block of space on the frame for globals (variables with
-  // multi-block lifetime), and one block to share for locals
-  // (single-block lifetime).
-
-  Context.init(Node);
-  Context.setInsertPoint(Context.getCur());
-
-  llvm::SmallBitVector CalleeSaves =
-      getRegisterSet(RegSet_CalleeSave, RegSet_None);
-  RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
-  VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
-  size_t GlobalsSize = 0;
-  // If there is a separate locals area, this represents that area.
-  // Otherwise it counts any variable not counted by GlobalsSize.
-  SpillAreaSizeBytes = 0;
-  // If there is a separate locals area, this specifies the alignment
-  // for it.
-  uint32_t LocalsSlotsAlignmentBytes = 0;
-  // The entire spill locations area gets aligned to largest natural
-  // alignment of the variables that have a spill slot.
-  uint32_t SpillAreaAlignmentBytes = 0;
-  // A spill slot linked to a variable with a stack slot should reuse
-  // that stack slot.
-  std::function<bool(Variable *)> TargetVarHook =
-      [&VariablesLinkedToSpillSlots](Variable *Var) {
-        if (auto *SpillVar =
-                llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
-          assert(Var->getWeight().isZero());
-          if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
-            VariablesLinkedToSpillSlots.push_back(Var);
-            return true;
-          }
-        }
-        return false;
-      };
-
-  // Compute the list of spilled variables and bounds for GlobalsSize, etc.
-  getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
-                        &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
-                        &LocalsSlotsAlignmentBytes, TargetVarHook);
-  uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
-  SpillAreaSizeBytes += GlobalsSize;
-
-  // Add push instructions for preserved registers.
-  uint32_t NumCallee = 0;
-  size_t PreservedRegsSizeBytes = 0;
-  for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
-    if (CalleeSaves[i] && RegsUsed[i]) {
-      ++NumCallee;
-      PreservedRegsSizeBytes += 4;
-      _push(getPhysicalRegister(i));
-    }
-  }
-  Ctx->statsUpdateRegistersSaved(NumCallee);
-
-  // Generate "push ebp; mov ebp, esp"
-  if (IsEbpBasedFrame) {
-    assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
-               .count() == 0);
-    PreservedRegsSizeBytes += 4;
-    Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
-    Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
-    _push(ebp);
-    _mov(ebp, esp);
-    // Keep ebp live for late-stage liveness analysis
-    // (e.g. asm-verbose mode).
-    Context.insert(InstFakeUse::create(Func, ebp));
-  }
-
-  // Align the variables area. SpillAreaPaddingBytes is the size of
-  // the region after the preserved registers and before the spill areas.
-  // LocalsSlotsPaddingBytes is the amount of padding between the globals
-  // and locals area if they are separate.
-  assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
-  assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
-  uint32_t SpillAreaPaddingBytes = 0;
-  uint32_t LocalsSlotsPaddingBytes = 0;
-  alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
-                       SpillAreaAlignmentBytes, GlobalsSize,
-                       LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
-                       &LocalsSlotsPaddingBytes);
-  SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
-  uint32_t GlobalsAndSubsequentPaddingSize =
-      GlobalsSize + LocalsSlotsPaddingBytes;
-
-  // Align esp if necessary.
-  if (NeedsStackAlignment) {
-    uint32_t StackOffset =
-        Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
-    uint32_t StackSize =
-        Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
-    SpillAreaSizeBytes = StackSize - StackOffset;
-  }
-
-  // Generate "sub esp, SpillAreaSizeBytes"
-  if (SpillAreaSizeBytes)
-    _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
-         Ctx->getConstantInt32(SpillAreaSizeBytes));
-  Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
-
-  resetStackAdjustment();
-
-  // Fill in stack offsets for stack args, and copy args into registers
-  // for those that were register-allocated.  Args are pushed right to
-  // left, so Arg[0] is closest to the stack/frame pointer.
-  Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
-  size_t BasicFrameOffset =
-      PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
-  if (!IsEbpBasedFrame)
-    BasicFrameOffset += SpillAreaSizeBytes;
-
-  const VarList &Args = Func->getArgs();
-  size_t InArgsSizeBytes = 0;
-  unsigned NumXmmArgs = 0;
-  for (Variable *Arg : Args) {
-    // Skip arguments passed in registers.
-    if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
-      ++NumXmmArgs;
-      continue;
-    }
-    finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
-  }
-
-  // Fill in stack offsets for locals.
-  assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
-                      SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
-                      IsEbpBasedFrame);
-  // Assign stack offsets to variables that have been linked to spilled
-  // variables.
-  for (Variable *Var : VariablesLinkedToSpillSlots) {
-    Variable *Linked =
-        (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
-    Var->setStackOffset(Linked->getStackOffset());
-  }
-  this->HasComputedFrame = true;
-
-  if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
-    OstreamLocker L(Func->getContext());
-    Ostream &Str = Func->getContext()->getStrDump();
-
-    Str << "Stack layout:\n";
-    uint32_t EspAdjustmentPaddingSize =
-        SpillAreaSizeBytes - LocalsSpillAreaSize -
-        GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
-    Str << " in-args = " << InArgsSizeBytes << " bytes\n"
-        << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
-        << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
-        << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
-        << " globals spill area = " << GlobalsSize << " bytes\n"
-        << " globals-locals spill areas intermediate padding = "
-        << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
-        << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
-        << " esp alignment padding = " << EspAdjustmentPaddingSize
-        << " bytes\n";
-
-    Str << "Stack details:\n"
-        << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
-        << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
-        << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
-        << " bytes\n"
-        << " is ebp based = " << IsEbpBasedFrame << "\n";
-  }
-}
-
-template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) {
-  InstList &Insts = Node->getInsts();
-  InstList::reverse_iterator RI, E;
-  for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
-    if (llvm::isa<typename Traits::Insts::Ret>(*RI))
-      break;
-  }
-  if (RI == E)
-    return;
-
-  // Convert the reverse_iterator position into its corresponding
-  // (forward) iterator position.
-  InstList::iterator InsertPoint = RI.base();
-  --InsertPoint;
-  Context.init(Node);
-  Context.setInsertPoint(InsertPoint);
-
-  Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
-  if (IsEbpBasedFrame) {
-    Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
-    // For late-stage liveness analysis (e.g. asm-verbose mode),
-    // adding a fake use of esp before the assignment of esp=ebp keeps
-    // previous esp adjustments from being dead-code eliminated.
-    Context.insert(InstFakeUse::create(Func, esp));
-    _mov(esp, ebp);
-    _pop(ebp);
-  } else {
-    // add esp, SpillAreaSizeBytes
-    if (SpillAreaSizeBytes)
-      _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
-  }
-
-  // Add pop instructions for preserved registers.
-  llvm::SmallBitVector CalleeSaves =
-      getRegisterSet(RegSet_CalleeSave, RegSet_None);
-  for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
-    SizeT j = CalleeSaves.size() - i - 1;
-    if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
-      continue;
-    if (CalleeSaves[j] && RegsUsed[j]) {
-      _pop(getPhysicalRegister(j));
-    }
-  }
-
-  if (!Ctx->getFlags().getUseSandboxing())
-    return;
-  // Change the original ret instruction into a sandboxed return sequence.
-  // t:ecx = pop
-  // bundle_lock
-  // and t, ~31
-  // jmp *t
-  // bundle_unlock
-  // FakeUse <original_ret_operand>
-  Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
-  _pop(T_ecx);
-  lowerIndirectJump(T_ecx);
-  if (RI->getSrcSize()) {
-    Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
-    Context.insert(InstFakeUse::create(Func, RetValue));
-  }
-  RI->setDeleted();
-}
-
 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
   switch (Var->getType()) {
   default:
@@ -4236,40 +3941,6 @@
 }
 
 template <class Machine>
-void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) {
-  Variable *Reg = nullptr;
-  if (Inst->hasRetValue()) {
-    Operand *Src0 = legalize(Inst->getRetValue());
-    if (Src0->getType() == IceType_i64) {
-      Variable *eax =
-          legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
-      Variable *edx =
-          legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
-      Reg = eax;
-      Context.insert(InstFakeUse::create(Func, edx));
-    } else if (isScalarFloatingType(Src0->getType())) {
-      _fld(Src0);
-    } else if (isVectorType(Src0->getType())) {
-      Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
-    } else {
-      _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
-    }
-  }
-  // Add a ret instruction even if sandboxing is enabled, because
-  // addEpilog explicitly looks for a ret instruction as a marker for
-  // where to insert the frame removal instructions.
-  _ret(Reg);
-  // Add a fake use of esp to make sure esp stays alive for the entire
-  // function.  Otherwise post-call esp adjustments get dead-code
-  // eliminated.  TODO: Are there more places where the fake use
-  // should be inserted?  E.g. "void f(int n){while(1) g(n);}" may not
-  // have a ret instruction.
-  Variable *esp =
-      Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
-  Context.insert(InstFakeUse::create(Func, esp));
-}
-
-template <class Machine>
 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
   Variable *Dest = Inst->getDest();
   Type DestTy = Dest->getType();