Eliminate stack adjustment for float-returning functions
This involves changing AdjustStack to grow/shrink the stack, and to use that
operation exclusively to move the StackAdjustment variable in lowering, rather
than in call emission as before.
BUG=
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1449523002 .
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index c19ec9d..96dc1a7 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -157,7 +157,7 @@
const Operand *Src,
const typename Traits::Assembler::GPREmitterShiftOp &Emitter);
- static X86TargetLowering *getTarget(const Cfg* Func) {
+ static X86TargetLowering *getTarget(const Cfg *Func) {
return static_cast<X86TargetLowering *>(Func->getTarget());
}
@@ -405,8 +405,9 @@
InstX86Jmp(Cfg *Func, Operand *Target);
};
-/// AdjustStack instruction - subtracts esp by the given amount and updates the
-/// stack offset during code emission.
+/// AdjustStack instruction - grows the stack (moves esp down) by the given
+/// amount. If the amount is negative, it shrinks the stack (moves esp up).
+/// It also updates the target lowering StackAdjustment during code emission.
template <class Machine>
class InstX86AdjustStack final : public InstX86Base<Machine> {
InstX86AdjustStack() = delete;
@@ -414,7 +415,7 @@
InstX86AdjustStack &operator=(const InstX86AdjustStack &) = delete;
public:
- static InstX86AdjustStack *create(Cfg *Func, SizeT Amount, Variable *Esp) {
+ static InstX86AdjustStack *create(Cfg *Func, int32_t Amount, Variable *Esp) {
return new (Func->allocate<InstX86AdjustStack>())
InstX86AdjustStack(Func, Amount, Esp);
}
@@ -427,8 +428,8 @@
}
private:
- InstX86AdjustStack(Cfg *Func, SizeT Amount, Variable *Esp);
- SizeT Amount;
+ InstX86AdjustStack(Cfg *Func, int32_t Amount, Variable *Esp);
+ const int32_t Amount;
};
/// Call instruction. Arguments should have already been pushed.
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index de9e566..07c2625 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -57,7 +57,7 @@
}
template <class Machine>
-InstX86AdjustStack<Machine>::InstX86AdjustStack(Cfg *Func, SizeT Amount,
+InstX86AdjustStack<Machine>::InstX86AdjustStack(Cfg *Func, int32_t Amount,
Variable *Esp)
: InstX86Base<Machine>(Func, InstX86Base<Machine>::Adjuststack, 1, Esp),
Amount(Amount) {
@@ -581,7 +581,6 @@
Str << "*";
CallTarget->emit(Func);
}
- Target->resetStackAdjustment();
}
template <class Machine>
@@ -610,7 +609,6 @@
} else {
llvm_unreachable("Unexpected operand type");
}
- Target->resetStackAdjustment();
}
template <class Machine>
@@ -1597,10 +1595,9 @@
this->getDest()->getRegNum()),
InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar->getRegNum()));
} else {
- Asm->cmov(
- SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
- this->getDest()->getRegNum()),
- Target->stackVarToAsmOperand(SrcVar));
+ Asm->cmov(SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
+ this->getDest()->getRegNum()),
+ Target->stackVarToAsmOperand(SrcVar));
}
} else if (const auto *Mem = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
@@ -2635,16 +2632,14 @@
return;
}
Type Ty = this->getDest()->getType();
- size_t Width = typeWidthInBytes(Ty);
if (!this->getDest()->hasReg()) {
Str << "\tfstp" << this->getFldString(Ty) << "\t";
this->getDest()->emit(Func);
return;
}
// Dest is a physical (xmm) register, so st(0) needs to go through memory.
- // Hack this by creating a temporary stack slot, spilling st(0) there,
- // loading it into the xmm register, and deallocating the stack slot.
- Str << "\tsubl\t$" << Width << ", %esp\n";
+ // Hack this by using caller-reserved memory at the top of stack, spilling
+ // st(0) there, and loading it into the xmm register.
Str << "\tfstp" << this->getFldString(Ty) << "\t"
<< "(%esp)\n";
Str << "\tmov" << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString
@@ -2652,7 +2647,6 @@
<< "(%esp), ";
this->getDest()->emit(Func);
Str << "\n";
- Str << "\taddl\t$" << Width << ", %esp";
}
template <class Machine>
@@ -2676,11 +2670,8 @@
Asm->fstp(Ty, StackAddr);
} else {
// Dest is a physical (xmm) register, so st(0) needs to go through memory.
- // Hack this by creating a temporary stack slot, spilling st(0) there,
- // loading it into the xmm register, and deallocating the stack slot.
- Immediate Width(typeWidthInBytes(Ty));
- Asm->sub(IceType_i32,
- InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
+ // Hack this by using caller-reserved memory at the top of stack, spilling
+ // st(0) there, and loading it into the xmm register.
typename InstX86Base<Machine>::Traits::Address StackSlot =
typename InstX86Base<Machine>::Traits::Address(
InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, 0,
@@ -2689,8 +2680,6 @@
Asm->movss(Ty,
InstX86Base<Machine>::Traits::getEncodedXmm(Dest->getRegNum()),
StackSlot);
- Asm->add(IceType_i32,
- InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
}
}
@@ -2932,7 +2921,10 @@
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
- Str << "\tsubl\t$" << Amount << ", %esp";
+ if (Amount > 0)
+ Str << "\tsubl\t$" << Amount << ", %esp";
+ else
+ Str << "\taddl\t$" << -Amount << ", %esp";
auto *Target = InstX86Base<Machine>::getTarget(Func);
Target->updateStackAdjustment(Amount);
}
@@ -2941,9 +2933,14 @@
void InstX86AdjustStack<Machine>::emitIAS(const Cfg *Func) const {
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
- Asm->sub(IceType_i32,
- InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
- Immediate(Amount));
+ if (Amount > 0)
+ Asm->sub(IceType_i32,
+ InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
+ Immediate(Amount));
+ else
+ Asm->add(IceType_i32,
+ InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
+ Immediate(-Amount));
auto *Target = InstX86Base<Machine>::getTarget(Func);
Target->updateStackAdjustment(Amount);
}
@@ -2953,7 +2950,10 @@
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
- Str << "esp = sub.i32 esp, " << Amount;
+ if (Amount > 0)
+ Str << "esp = sub.i32 esp, " << Amount;
+ else
+ Str << "esp = add.i32 esp, " << -Amount;
}
template <class Machine>
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 52ba7b5..27f9ae0 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -131,7 +131,7 @@
OperandList XmmArgs;
OperandList StackArgs, StackArgLocations;
- uint32_t ParameterAreaSizeBytes = 0;
+ int32_t ParameterAreaSizeBytes = 0;
// Classify each argument operand according to the location where the
// argument is passed.
@@ -158,6 +158,13 @@
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
}
}
+ // Ensure there is enough space for the fstp/movs for floating returns.
+ Variable *Dest = Instr->getDest();
+ if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
+ ParameterAreaSizeBytes =
+ std::max(static_cast<size_t>(ParameterAreaSizeBytes),
+ typeWidthInBytesOnStack(Dest->getType()));
+ }
// Adjust the parameter area so that the stack is aligned. It is assumed that
// the stack is already aligned at the start of the calling sequence.
@@ -197,7 +204,6 @@
}
// Generate the call instruction. Assign its result to a temporary with high
// register allocation weight.
- Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
Variable *ReturnRegHi = nullptr;
@@ -255,17 +261,24 @@
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
- // Add the appropriate offset to esp. The call instruction takes care of
- // resetting the stack offset during emission.
- if (ParameterAreaSizeBytes) {
- Variable *esp =
- Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
- _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
- }
-
// Insert a register-kill pseudo instruction.
Context.insert(InstFakeKill::create(Func, NewCall));
+ if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
+ // Special treatment for an FP function which returns its result in st(0).
+ // If Dest ends up being a physical xmm register, the fstp emit code will
+ // route st(0) through the space reserved in the function argument area
+ // we allocated.
+ _fstp(Dest);
+ // Create a fake use of Dest in case it actually isn't used, because st(0)
+ // still needs to be popped.
+ Context.insert(InstFakeUse::create(Func, Dest));
+ }
+
+ // Add the appropriate offset to esp.
+ if (ParameterAreaSizeBytes)
+ _adjust_stack(-ParameterAreaSizeBytes);
+
// Generate a FakeUse to keep the call live if necessary.
if (Instr->hasSideEffects() && ReturnReg) {
Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
@@ -293,14 +306,6 @@
_mov(Dest, ReturnReg);
}
}
- } else if (isScalarFloatingType(Dest->getType())) {
- // Special treatment for an FP function which returns its result in st(0).
- // If Dest ends up being a physical xmm register, the fstp emit code will
- // route st(0) through a temporary stack slot.
- _fstp(Dest);
- // Create a fake use of Dest in case it actually isn't used, because st(0)
- // still needs to be popped.
- Context.insert(InstFakeUse::create(Func, Dest));
}
}
@@ -363,11 +368,7 @@
_ret(Reg);
// Add a fake use of esp to make sure esp stays alive for the entire
// function. Otherwise post-call esp adjustments get dead-code eliminated.
- // TODO: Are there more places where the fake use should be inserted? E.g.
- // "void f(int n){while(1) g(n);}" may not have a ret instruction.
- Variable *esp =
- Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
- Context.insert(InstFakeUse::create(Func, esp));
+ keepEspLiveAtExit();
}
void TargetX8632::addProlog(CfgNode *Node) {
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index bdebfbe..4987646 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -386,11 +386,7 @@
_ret(Reg);
// Add a fake use of esp to make sure esp stays alive for the entire
// function. Otherwise post-call esp adjustments get dead-code eliminated.
- // TODO: Are there more places where the fake use should be inserted? E.g.
- // "void f(int n){while(1) g(n);}" may not have a ret instruction.
- Variable *esp =
- Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
- Context.insert(InstFakeUse::create(Func, esp));
+ keepEspLiveAtExit();
}
void TargetX8664::addProlog(CfgNode *Node) {
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index f004d6e..d23e35b 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -226,6 +226,13 @@
void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Operand *Src0, Operand *Src1);
+ /// Emit a fake use of esp to make sure esp stays alive for the entire
+ /// function. Otherwise some esp adjustments get dead-code eliminated.
+ void keepEspLiveAtExit() {
+ Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());
+ Context.insert(InstFakeUse::create(Func, esp));
+ }
+
/// Operand legalization helpers. To deal with address mode constraints, the
/// helpers will create a new Operand and emit instructions that guarantee
/// that the Operand kind is one of those indicated by the LegalMask (a
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 6b08db8..729bc91 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -5207,6 +5207,9 @@
void TargetX86Base<Machine>::lowerUnreachable(
const InstUnreachable * /*Inst*/) {
_ud2();
+ // Add a fake use of esp to make sure esp adjustments after the unreachable
+ // do not get dead-code eliminated.
+ keepEspLiveAtExit();
}
template <class Machine>