Eliminate stack adjustment for float-returning functions This involves changing AdjustStack to grow/shrink the stack, and to use that operation exclusively to move the StackAdjustment variable in lowering, rather than in call emission as before. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1449523002 .

commit: 21fd103c847f7968e8df731d7bf4fa5733bd0f3f [log] [tgz]
author: David Sehr <sehr@chromium.org> Fri Nov 13 16:32:37 2015 -0800
committer: David Sehr <sehr@chromium.org> Fri Nov 13 16:32:37 2015 -0800
tree: 7496d8e3137b6cfe2d0e050acab85e04d03147a9
parent: 3607b6c924922d261cbc75b68e07a1e2a5aec5dc [diff]
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index c19ec9d..96dc1a7 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h

@@ -157,7 +157,7 @@
                   const Operand *Src,
                   const typename Traits::Assembler::GPREmitterShiftOp &Emitter);
 
-  static X86TargetLowering *getTarget(const Cfg* Func) {
+  static X86TargetLowering *getTarget(const Cfg *Func) {
     return static_cast<X86TargetLowering *>(Func->getTarget());
   }
 
@@ -405,8 +405,9 @@
   InstX86Jmp(Cfg *Func, Operand *Target);
 };
 
-/// AdjustStack instruction - subtracts esp by the given amount and updates the
-/// stack offset during code emission.
+/// AdjustStack instruction - grows the stack (moves esp down) by the given
+/// amount.  If the amount is negative, it shrinks the stack (moves esp up).
+/// It also updates the target lowering StackAdjustment during code emission.
 template <class Machine>
 class InstX86AdjustStack final : public InstX86Base<Machine> {
   InstX86AdjustStack() = delete;
@@ -414,7 +415,7 @@
   InstX86AdjustStack &operator=(const InstX86AdjustStack &) = delete;
 
 public:
-  static InstX86AdjustStack *create(Cfg *Func, SizeT Amount, Variable *Esp) {
+  static InstX86AdjustStack *create(Cfg *Func, int32_t Amount, Variable *Esp) {
     return new (Func->allocate<InstX86AdjustStack>())
         InstX86AdjustStack(Func, Amount, Esp);
   }
@@ -427,8 +428,8 @@
   }
 
 private:
-  InstX86AdjustStack(Cfg *Func, SizeT Amount, Variable *Esp);
-  SizeT Amount;
+  InstX86AdjustStack(Cfg *Func, int32_t Amount, Variable *Esp);
+  const int32_t Amount;
 };
 
 /// Call instruction. Arguments should have already been pushed.

diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index de9e566..07c2625 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h

@@ -57,7 +57,7 @@
 }
 
 template <class Machine>
-InstX86AdjustStack<Machine>::InstX86AdjustStack(Cfg *Func, SizeT Amount,
+InstX86AdjustStack<Machine>::InstX86AdjustStack(Cfg *Func, int32_t Amount,
                                                 Variable *Esp)
     : InstX86Base<Machine>(Func, InstX86Base<Machine>::Adjuststack, 1, Esp),
       Amount(Amount) {
@@ -581,7 +581,6 @@
     Str << "*";
     CallTarget->emit(Func);
   }
-  Target->resetStackAdjustment();
 }
 
 template <class Machine>
@@ -610,7 +609,6 @@
   } else {
     llvm_unreachable("Unexpected operand type");
   }
-  Target->resetStackAdjustment();
 }
 
 template <class Machine>
@@ -1597,10 +1595,9 @@
                                 this->getDest()->getRegNum()),
           InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar->getRegNum()));
     } else {
-      Asm->cmov(
-          SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
-                                this->getDest()->getRegNum()),
-          Target->stackVarToAsmOperand(SrcVar));
+      Asm->cmov(SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
+                                      this->getDest()->getRegNum()),
+                Target->stackVarToAsmOperand(SrcVar));
     }
   } else if (const auto *Mem = llvm::dyn_cast<
                  typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
@@ -2635,16 +2632,14 @@
     return;
   }
   Type Ty = this->getDest()->getType();
-  size_t Width = typeWidthInBytes(Ty);
   if (!this->getDest()->hasReg()) {
     Str << "\tfstp" << this->getFldString(Ty) << "\t";
     this->getDest()->emit(Func);
     return;
   }
   // Dest is a physical (xmm) register, so st(0) needs to go through memory.
-  // Hack this by creating a temporary stack slot, spilling st(0) there,
-  // loading it into the xmm register, and deallocating the stack slot.
-  Str << "\tsubl\t$" << Width << ", %esp\n";
+  // Hack this by using caller-reserved memory at the top of stack, spilling
+  // st(0) there, and loading it into the xmm register.
   Str << "\tfstp" << this->getFldString(Ty) << "\t"
       << "(%esp)\n";
   Str << "\tmov" << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString
@@ -2652,7 +2647,6 @@
       << "(%esp), ";
   this->getDest()->emit(Func);
   Str << "\n";
-  Str << "\taddl\t$" << Width << ", %esp";
 }
 
 template <class Machine>
@@ -2676,11 +2670,8 @@
     Asm->fstp(Ty, StackAddr);
   } else {
     // Dest is a physical (xmm) register, so st(0) needs to go through memory.
-    // Hack this by creating a temporary stack slot, spilling st(0) there,
-    // loading it into the xmm register, and deallocating the stack slot.
-    Immediate Width(typeWidthInBytes(Ty));
-    Asm->sub(IceType_i32,
-             InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
+    // Hack this by using caller-reserved memory at the top of stack, spilling
+    // st(0) there, and loading it into the xmm register.
     typename InstX86Base<Machine>::Traits::Address StackSlot =
         typename InstX86Base<Machine>::Traits::Address(
             InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, 0,
@@ -2689,8 +2680,6 @@
     Asm->movss(Ty,
                InstX86Base<Machine>::Traits::getEncodedXmm(Dest->getRegNum()),
                StackSlot);
-    Asm->add(IceType_i32,
-             InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
   }
 }
 
@@ -2932,7 +2921,10 @@
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
-  Str << "\tsubl\t$" << Amount << ", %esp";
+  if (Amount > 0)
+    Str << "\tsubl\t$" << Amount << ", %esp";
+  else
+    Str << "\taddl\t$" << -Amount << ", %esp";
   auto *Target = InstX86Base<Machine>::getTarget(Func);
   Target->updateStackAdjustment(Amount);
 }
@@ -2941,9 +2933,14 @@
 void InstX86AdjustStack<Machine>::emitIAS(const Cfg *Func) const {
   typename InstX86Base<Machine>::Traits::Assembler *Asm =
       Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-  Asm->sub(IceType_i32,
-           InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
-           Immediate(Amount));
+  if (Amount > 0)
+    Asm->sub(IceType_i32,
+             InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
+             Immediate(Amount));
+  else
+    Asm->add(IceType_i32,
+             InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
+             Immediate(-Amount));
   auto *Target = InstX86Base<Machine>::getTarget(Func);
   Target->updateStackAdjustment(Amount);
 }
@@ -2953,7 +2950,10 @@
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
-  Str << "esp = sub.i32 esp, " << Amount;
+  if (Amount > 0)
+    Str << "esp = sub.i32 esp, " << Amount;
+  else
+    Str << "esp = add.i32 esp, " << -Amount;
 }
 
 template <class Machine>

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 52ba7b5..27f9ae0 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp

@@ -131,7 +131,7 @@
 
   OperandList XmmArgs;
   OperandList StackArgs, StackArgLocations;
-  uint32_t ParameterAreaSizeBytes = 0;
+  int32_t ParameterAreaSizeBytes = 0;
 
   // Classify each argument operand according to the location where the
   // argument is passed.
@@ -158,6 +158,13 @@
       ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
     }
   }
+  // Ensure there is enough space for the fstp/movs for floating returns.
+  Variable *Dest = Instr->getDest();
+  if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
+    ParameterAreaSizeBytes =
+        std::max(static_cast<size_t>(ParameterAreaSizeBytes),
+                 typeWidthInBytesOnStack(Dest->getType()));
+  }
 
   // Adjust the parameter area so that the stack is aligned. It is assumed that
   // the stack is already aligned at the start of the calling sequence.
@@ -197,7 +204,6 @@
   }
   // Generate the call instruction. Assign its result to a temporary with high
   // register allocation weight.
-  Variable *Dest = Instr->getDest();
   // ReturnReg doubles as ReturnRegLo as necessary.
   Variable *ReturnReg = nullptr;
   Variable *ReturnRegHi = nullptr;
@@ -255,17 +261,24 @@
   if (ReturnRegHi)
     Context.insert(InstFakeDef::create(Func, ReturnRegHi));
 
-  // Add the appropriate offset to esp. The call instruction takes care of
-  // resetting the stack offset during emission.
-  if (ParameterAreaSizeBytes) {
-    Variable *esp =
-        Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
-    _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
-  }
-
   // Insert a register-kill pseudo instruction.
   Context.insert(InstFakeKill::create(Func, NewCall));
 
+  if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
+    // Special treatment for an FP function which returns its result in st(0).
+    // If Dest ends up being a physical xmm register, the fstp emit code will
+    // route st(0) through the space reserved in the function argument area
+    // we allocated.
+    _fstp(Dest);
+    // Create a fake use of Dest in case it actually isn't used, because st(0)
+    // still needs to be popped.
+    Context.insert(InstFakeUse::create(Func, Dest));
+  }
+
+  // Add the appropriate offset to esp.
+  if (ParameterAreaSizeBytes)
+    _adjust_stack(-ParameterAreaSizeBytes);
+
   // Generate a FakeUse to keep the call live if necessary.
   if (Instr->hasSideEffects() && ReturnReg) {
     Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
@@ -293,14 +306,6 @@
         _mov(Dest, ReturnReg);
       }
     }
-  } else if (isScalarFloatingType(Dest->getType())) {
-    // Special treatment for an FP function which returns its result in st(0).
-    // If Dest ends up being a physical xmm register, the fstp emit code will
-    // route st(0) through a temporary stack slot.
-    _fstp(Dest);
-    // Create a fake use of Dest in case it actually isn't used, because st(0)
-    // still needs to be popped.
-    Context.insert(InstFakeUse::create(Func, Dest));
   }
 }
 
@@ -363,11 +368,7 @@
   _ret(Reg);
   // Add a fake use of esp to make sure esp stays alive for the entire
   // function. Otherwise post-call esp adjustments get dead-code eliminated.
-  // TODO: Are there more places where the fake use should be inserted? E.g.
-  // "void f(int n){while(1) g(n);}" may not have a ret instruction.
-  Variable *esp =
-      Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
-  Context.insert(InstFakeUse::create(Func, esp));
+  keepEspLiveAtExit();
 }
 
 void TargetX8632::addProlog(CfgNode *Node) {

diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index bdebfbe..4987646 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp

@@ -386,11 +386,7 @@
   _ret(Reg);
   // Add a fake use of esp to make sure esp stays alive for the entire
   // function. Otherwise post-call esp adjustments get dead-code eliminated.
-  // TODO: Are there more places where the fake use should be inserted? E.g.
-  // "void f(int n){while(1) g(n);}" may not have a ret instruction.
-  Variable *esp =
-      Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
-  Context.insert(InstFakeUse::create(Func, esp));
+  keepEspLiveAtExit();
 }
 
 void TargetX8664::addProlog(CfgNode *Node) {

diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index f004d6e..d23e35b 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h

@@ -226,6 +226,13 @@
   void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
                            Operand *Src0, Operand *Src1);
 
+  /// Emit a fake use of esp to make sure esp stays alive for the entire
+  /// function. Otherwise some esp adjustments get dead-code eliminated.
+  void keepEspLiveAtExit() {
+    Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());
+    Context.insert(InstFakeUse::create(Func, esp));
+  }
+
   /// Operand legalization helpers. To deal with address mode constraints, the
   /// helpers will create a new Operand and emit instructions that guarantee
   /// that the Operand kind is one of those indicated by the LegalMask (a

diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 6b08db8..729bc91 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h

@@ -5207,6 +5207,9 @@
 void TargetX86Base<Machine>::lowerUnreachable(
     const InstUnreachable * /*Inst*/) {
   _ud2();
+  // Add a fake use of esp to make sure esp adjustments after the unreachable
+  // do not get dead-code eliminated.
+  keepEspLiveAtExit();
 }
 
 template <class Machine>
commit	21fd103c847f7968e8df731d7bf4fa5733bd0f3f	[log] [tgz]
author	David Sehr <sehr@chromium.org>	Fri Nov 13 16:32:37 2015 -0800
committer	David Sehr <sehr@chromium.org>	Fri Nov 13 16:32:37 2015 -0800
tree	7496d8e3137b6cfe2d0e050acab85e04d03147a9
parent	3607b6c924922d261cbc75b68e07a1e2a5aec5dc [diff]