Subzero: Improve effectiveness of local register availability peephole.

X86 only.  The register availability peephole optimization during lowering disallows available register substitution when the variable is pre-colored.  This is for good reasons (too complex to be discussed here).  However, that leaves some potential substitutions on the table.

Specifically, this happens a lot around register arguments to function calls, both at the call site and in the prolog.

The simplest solution seems to be to launder the pre-colored variable through a separate infinite-weight variable, as implemented in this CL through a combination of such copies and extra legalize() calls.

There are other situations where this technique can also work, which may be handled in a separate CL.

This CL also fixes a problem where the stack pointer adjustment in the prolog is subject to dead-code elimination if the function has no epilog.  This would only happen in asm-verbose mode, in the final liveness analysis pass prior to code emission.

BUG= none
R=eholk@chromium.org

Review URL: https://codereview.chromium.org/2052683003 .
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index a612268..02dd7fa 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -195,6 +195,9 @@
 void TargetX8632::_sub_sp(Operand *Adjustment) {
   Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
   _sub(esp, Adjustment);
+  // Add a fake use of the stack pointer, to prevent the stack pointer adustment
+  // from being dead-code eliminated in a function that doesn't return.
+  Context.insert<InstFakeUse>(esp);
 }
 
 void TargetX8632::_link_bp() {
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 73ad386..37b7b36 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -485,25 +485,29 @@
 void TargetX8664::_sub_sp(Operand *Adjustment) {
   Variable *rsp =
       getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
-  if (!NeedSandboxing) {
+
+  if (NeedSandboxing) {
+    Variable *esp =
+        getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
+    Variable *r15 =
+        getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
+
+    // .bundle_start
+    // sub Adjustment, %esp
+    // add %r15, %rsp
+    // .bundle_end
+    AutoBundle _(this);
+    _redefined(Context.insert<InstFakeDef>(esp, rsp));
+    _sub(esp, Adjustment);
+    _redefined(Context.insert<InstFakeDef>(rsp, esp));
+    _add(rsp, r15);
+  } else {
     _sub(rsp, Adjustment);
-    return;
   }
 
-  Variable *esp =
-      getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
-  Variable *r15 =
-      getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
-
-  // .bundle_start
-  // sub Adjustment, %esp
-  // add %r15, %rsp
-  // .bundle_end
-  AutoBundle _(this);
-  _redefined(Context.insert<InstFakeDef>(esp, rsp));
-  _sub(esp, Adjustment);
-  _redefined(Context.insert<InstFakeDef>(rsp, esp));
-  _add(rsp, r15);
+  // Add a fake use of the stack pointer, to prevent the stack pointer adustment
+  // from being dead-code eliminated in a function that doesn't return.
+  Context.insert<InstFakeUse>(rsp);
 }
 
 void TargetX8664::initRebasePtr() {
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index af05b7b..6b25823 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1506,6 +1506,7 @@
 
 template <typename TraitsType>
 void TargetX86Base<TraitsType>::lowerArguments() {
+  const bool OptM1 = Func->getOptLevel() == Opt_m1;
   VarList &Args = Func->getArgs();
   unsigned NumXmmArgs = 0;
   bool XmmSlotsRemain = true;
@@ -1561,8 +1562,20 @@
     Arg->setIsArg(false);
 
     Args[i] = RegisterArg;
-    Context.insert<InstAssign>(Arg, RegisterArg);
+    // When not Om1, do the assignment through a temporary, instead of directly
+    // from the pre-colored variable, so that a subsequent availabilityGet()
+    // call has a chance to work.  (In Om1, don't bother creating extra
+    // instructions with extra variables to register-allocate.)
+    if (OptM1) {
+      Context.insert<InstAssign>(Arg, RegisterArg);
+    } else {
+      Variable *Tmp = makeReg(RegisterArg->getType());
+      Context.insert<InstAssign>(Tmp, RegisterArg);
+      Context.insert<InstAssign>(Arg, Tmp);
+    }
   }
+  if (!OptM1)
+    Context.availabilityUpdate();
 }
 
 /// Strength-reduce scalar integer multiplication by a constant (for i32 or
@@ -2588,29 +2601,35 @@
   ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
   assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());
   // Copy arguments that are passed on the stack to the appropriate stack
-  // locations.
+  // locations.  We make sure legalize() is called on each argument at this
+  // point, to allow availabilityGet() to work.
   for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {
-    lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
+    lowerStore(
+        InstStore::create(Func, legalize(StackArgs[i]), StackArgLocations[i]));
   }
   // Copy arguments to be passed in registers to the appropriate registers.
   for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
-    Variable *Reg =
-        legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i));
-    // Generate a FakeUse of register arguments so that they do not get dead
-    // code eliminated as a result of the FakeKill of scratch registers after
-    // the call.
-    Context.insert<InstFakeUse>(Reg);
+    XmmArgs[i] =
+        legalizeToReg(legalize(XmmArgs[i]), Traits::getRegisterForXmmArgNum(i));
   }
   // Materialize moves for arguments passed in GPRs.
   for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
     const Type SignatureTy = GprArgs[i].first;
-    Operand *Arg = GprArgs[i].second;
-    Variable *Reg =
+    Operand *Arg = legalize(GprArgs[i].second);
+    GprArgs[i].second =
         legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
     assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
     assert(SignatureTy == Arg->getType());
     (void)SignatureTy;
-    Context.insert<InstFakeUse>(Reg);
+  }
+  // Generate a FakeUse of register arguments so that they do not get dead code
+  // eliminated as a result of the FakeKill of scratch registers after the call.
+  // These need to be right before the call instruction.
+  for (auto *Arg : XmmArgs) {
+    Context.insert<InstFakeUse>(llvm::cast<Variable>(Arg));
+  }
+  for (auto &ArgPair : GprArgs) {
+    Context.insert<InstFakeUse>(llvm::cast<Variable>(ArgPair.second));
   }
   // Generate the call instruction. Assign its result to a temporary with high
   // register allocation weight.