Subzero: Improve effectiveness of local register availability peephole.
X86 only. The register availability peephole optimization during lowering disallows available register substitution when the variable is pre-colored. This is for good reasons (too complex to be discussed here). However, that leaves some potential substitutions on the table.
Specifically, this happens a lot around register arguments to function calls, both at the call site and in the prolog.
The simplest solution seems to be to launder the pre-colored variable through a separate infinite-weight variable, as implemented in this CL through a combination of such copies and extra legalize() calls.
There are other situations where this technique can also work, which may be handled in a separate CL.
This CL also fixes a problem where the stack pointer adjustment in the prolog is subject to dead-code elimination if the function has no epilog. This would only happen in asm-verbose mode, in the final liveness analysis pass prior to code emission.
BUG= none
R=eholk@chromium.org
Review URL: https://codereview.chromium.org/2052683003 .
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index a612268..02dd7fa 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -195,6 +195,9 @@
void TargetX8632::_sub_sp(Operand *Adjustment) {
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_sub(esp, Adjustment);
+ // Add a fake use of the stack pointer, to prevent the stack pointer adustment
+ // from being dead-code eliminated in a function that doesn't return.
+ Context.insert<InstFakeUse>(esp);
}
void TargetX8632::_link_bp() {
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 73ad386..37b7b36 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -485,25 +485,29 @@
void TargetX8664::_sub_sp(Operand *Adjustment) {
Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
- if (!NeedSandboxing) {
+
+ if (NeedSandboxing) {
+ Variable *esp =
+ getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
+ Variable *r15 =
+ getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
+
+ // .bundle_start
+ // sub Adjustment, %esp
+ // add %r15, %rsp
+ // .bundle_end
+ AutoBundle _(this);
+ _redefined(Context.insert<InstFakeDef>(esp, rsp));
+ _sub(esp, Adjustment);
+ _redefined(Context.insert<InstFakeDef>(rsp, esp));
+ _add(rsp, r15);
+ } else {
_sub(rsp, Adjustment);
- return;
}
- Variable *esp =
- getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
- Variable *r15 =
- getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
-
- // .bundle_start
- // sub Adjustment, %esp
- // add %r15, %rsp
- // .bundle_end
- AutoBundle _(this);
- _redefined(Context.insert<InstFakeDef>(esp, rsp));
- _sub(esp, Adjustment);
- _redefined(Context.insert<InstFakeDef>(rsp, esp));
- _add(rsp, r15);
+ // Add a fake use of the stack pointer, to prevent the stack pointer adustment
+ // from being dead-code eliminated in a function that doesn't return.
+ Context.insert<InstFakeUse>(rsp);
}
void TargetX8664::initRebasePtr() {
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index af05b7b..6b25823 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1506,6 +1506,7 @@
template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerArguments() {
+ const bool OptM1 = Func->getOptLevel() == Opt_m1;
VarList &Args = Func->getArgs();
unsigned NumXmmArgs = 0;
bool XmmSlotsRemain = true;
@@ -1561,8 +1562,20 @@
Arg->setIsArg(false);
Args[i] = RegisterArg;
- Context.insert<InstAssign>(Arg, RegisterArg);
+ // When not Om1, do the assignment through a temporary, instead of directly
+ // from the pre-colored variable, so that a subsequent availabilityGet()
+ // call has a chance to work. (In Om1, don't bother creating extra
+ // instructions with extra variables to register-allocate.)
+ if (OptM1) {
+ Context.insert<InstAssign>(Arg, RegisterArg);
+ } else {
+ Variable *Tmp = makeReg(RegisterArg->getType());
+ Context.insert<InstAssign>(Tmp, RegisterArg);
+ Context.insert<InstAssign>(Arg, Tmp);
+ }
}
+ if (!OptM1)
+ Context.availabilityUpdate();
}
/// Strength-reduce scalar integer multiplication by a constant (for i32 or
@@ -2588,29 +2601,35 @@
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());
// Copy arguments that are passed on the stack to the appropriate stack
- // locations.
+ // locations. We make sure legalize() is called on each argument at this
+ // point, to allow availabilityGet() to work.
for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {
- lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
+ lowerStore(
+ InstStore::create(Func, legalize(StackArgs[i]), StackArgLocations[i]));
}
// Copy arguments to be passed in registers to the appropriate registers.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
- Variable *Reg =
- legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i));
- // Generate a FakeUse of register arguments so that they do not get dead
- // code eliminated as a result of the FakeKill of scratch registers after
- // the call.
- Context.insert<InstFakeUse>(Reg);
+ XmmArgs[i] =
+ legalizeToReg(legalize(XmmArgs[i]), Traits::getRegisterForXmmArgNum(i));
}
// Materialize moves for arguments passed in GPRs.
for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
const Type SignatureTy = GprArgs[i].first;
- Operand *Arg = GprArgs[i].second;
- Variable *Reg =
+ Operand *Arg = legalize(GprArgs[i].second);
+ GprArgs[i].second =
legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
assert(SignatureTy == Arg->getType());
(void)SignatureTy;
- Context.insert<InstFakeUse>(Reg);
+ }
+ // Generate a FakeUse of register arguments so that they do not get dead code
+ // eliminated as a result of the FakeKill of scratch registers after the call.
+ // These need to be right before the call instruction.
+ for (auto *Arg : XmmArgs) {
+ Context.insert<InstFakeUse>(llvm::cast<Variable>(Arg));
+ }
+ for (auto &ArgPair : GprArgs) {
+ Context.insert<InstFakeUse>(llvm::cast<Variable>(ArgPair.second));
}
// Generate the call instruction. Assign its result to a temporary with high
// register allocation weight.