Subzero: Automatically infer regalloc preferences and overlap.

Originally, for a given Variable, register preference and overlap were manually specified.  That is, when choosing a free register for a Variable, it would be manually specified which (if any) related Variable would be a good choice for register selection, all things being equal.  Also, it allowed the rather dangerous "AllowOverlap" specification which let the Variable use its preferred Variable's register, even if their live ranges overlap.

Now, all this selection is automatic, and the machinery for manual specification is removed.

A few other changes in this CL:

- Address mode inference leverages the more precise

- Better regalloc dump messages to follow the logic

- "-verbose most" enables all verbose options except regalloc and time

- "-ias" is an alias for "-integrated-as"

- Bug fix: prevent 8-bit register ah from being used in register allocation, unless it is pre-colored

- Bug fix: the _mov helper where Dest is NULL wasn't always actually creating a new Variable

- A few tests are updated based on slightly different O2 register allocation decisions

The static stats actually improve slightly across the board (around 1%), except that frame size improves by 6-10%.  This is probably from smarter register allocation decisions, particularly involving phi lowering temporaries, where the manual hints weren't too good to start with.

BUG= none
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/597003004
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index d2548a6..8f51f43 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -377,13 +377,6 @@
         Str << "?";
       Str << " weight=" << Var->getWeight() << " ";
       Var->dump(this);
-      if (Variable *Pref = Var->getPreferredRegister()) {
-        Str << " pref=";
-        Pref->dump(this);
-        if (Var->getRegisterOverlap())
-          Str << ",overlap";
-        Str << " ";
-      }
       Str << " LIVE=" << Var->getLiveRange() << "\n";
     }
   }
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index eff4e97..4efec93 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -200,13 +200,6 @@
       Variable *Dest = (*I2)->getDest();
       assert(Dest);
       InstAssign *NewInst = InstAssign::create(Func, Dest, Operand);
-      // If Src is a variable, set the Src and Dest variables to
-      // prefer each other for register allocation.
-      if (Variable *Src = llvm::dyn_cast<Variable>(Operand)) {
-        bool AllowOverlap = false;
-        Dest->setPreferredRegister(Src, AllowOverlap);
-        Src->setPreferredRegister(Dest, AllowOverlap);
-      }
       if (CmpInstDest == Operand)
         Insts.insert(SafeInsertionPoint, NewInst);
       else
diff --git a/src/IceDefs.h b/src/IceDefs.h
index 322e9b4..4aa99e2 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -105,7 +105,8 @@
   IceV_Frame = 1 << 9,
   IceV_Timing = 1 << 10,
   IceV_AddrOpt = 1 << 11,
-  IceV_All = ~IceV_None
+  IceV_All = ~IceV_None,
+  IceV_Most = IceV_All & ~(IceV_Timing | IceV_LinearScan)
 };
 typedef uint32_t VerboseMask;
 
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index a88194e..9706ce9 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -346,12 +346,7 @@
   IceString PhiName = Dest->getName() + "_phi";
   Variable *NewSrc = Func->makeVariable(Dest->getType(), PhiName);
   this->Dest = NewSrc;
-  InstAssign *NewInst = InstAssign::create(Func, Dest, NewSrc);
-  // Set Dest and NewSrc to have affinity with each other, as a hint
-  // for register allocation.
-  Dest->setPreferredRegister(NewSrc, false);
-  NewSrc->setPreferredRegister(Dest, false);
-  return NewInst;
+  return InstAssign::create(Func, Dest, NewSrc);
 }
 
 InstRet::InstRet(Cfg *Func, Operand *RetValue)
diff --git a/src/IceInst.h b/src/IceInst.h
index 5c07dc0..10caa8b 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -92,6 +92,8 @@
     return NodeList();
   }
 
+  virtual bool isSimpleAssign() const { return false; }
+
   void livenessLightweight(Cfg *Func, llvm::BitVector &Live);
   void liveness(InstNumberT InstNumber, llvm::BitVector &Live,
                 Liveness *Liveness, const CfgNode *Node);
@@ -233,6 +235,7 @@
     return new (Func->allocateInst<InstAssign>())
         InstAssign(Func, Dest, Source);
   }
+  virtual bool isSimpleAssign() const { return true; }
   virtual void dump(const Cfg *Func) const;
   static bool classof(const Inst *Inst) { return Inst->getKind() == Assign; }
 
diff --git a/src/IceInstX8632.def b/src/IceInstX8632.def
index e8ba59a..8e7df05 100644
--- a/src/IceInstX8632.def
+++ b/src/IceInstX8632.def
@@ -44,11 +44,16 @@
 // all of the registers are considered and have distinct numberings.
 // This is in contrast to the above, where the "encode" is based on how
 // the register numbers will be encoded in binaries and values can overlap.
+// Note that the isI8 attributed of Reg_ah is not set.  In general we
+// don't want the register allocator choosing Reg_ah, in particular
+// for lowering insertelement to pinsrb where internally we use an
+// 8-bit operand but externally pinsrb uses a 32-bit register, in
+// which Reg_ah doesn't map to eax.
 #define REGX8632_TABLE                                                  \
   /* val, encode, name, name16, name8, scratch, preserved, stackptr,    \
      frameptr, isI8, isInt, isFP */                                     \
   REGX8632_GPR_TABLE                                                    \
-  X(Reg_ah,  = Reg_eax + 4,   "???",  ""  , "ah", 0, 0, 0, 0, 1, 0, 0)  \
+  X(Reg_ah,  = Reg_eax + 4,   "???",  ""  , "ah", 0, 0, 0, 0, 0, 0, 0)  \
   REGX8632_XMM_TABLE
 //#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,
 //          frameptr, isI8, isInt, isFP)
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index ac30870..f7447c3 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -744,6 +744,7 @@
   virtual bool isRedundantAssign() const {
     return checkForRedundantAssign(getDest(), getSrc(0));
   }
+  virtual bool isSimpleAssign() const { return true; }
   virtual void emit(const Cfg *Func) const;
   virtual void dump(const Cfg *Func) const {
     Ostream &Str = Func->getContext()->getStrDump();
@@ -1199,7 +1200,7 @@
   virtual ~InstX8632Movsx() {}
 };
 
-// Movsx - copy from a narrower integer type to a wider integer
+// Movzx - copy from a narrower integer type to a wider integer
 // type, with zero extension.
 class InstX8632Movzx : public InstX8632 {
 public:
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index 9e523be..d1854f4 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -165,13 +165,13 @@
   // of the block that actually uses a Variable.
   assert(Node);
   bool MakeMulti = false;
+  if (IsImplicit)
+    MakeMulti = true;
   // A phi source variable conservatively needs to be marked as
   // multi-block, even if its definition is in the same block.  This
   // is because there can be additional control flow before branching
   // back to this node, and the variable is live throughout those
   // nodes.
-  if (IsImplicit)
-    MakeMulti = true;
   if (!IsFromDef && Instr && llvm::isa<InstPhi>(Instr))
     MakeMulti = true;
 
@@ -201,20 +201,60 @@
   // of the block, consider not marking this as a separate use.  But
   // be careful not to omit all uses of the variable if markDef() and
   // markUse() both use this optimization.
+  assert(Node);
+  Definitions.push_back(Instr);
   const bool IsFromDef = true;
   const bool IsImplicit = false;
   markUse(Instr, Node, IsFromDef, IsImplicit);
   switch (MultiDef) {
   case MDS_Unknown:
+    assert(SingleDefNode == NULL);
     MultiDef = MDS_SingleDef;
-    SingleDefInst = Instr;
+    SingleDefNode = Node;
     break;
   case MDS_SingleDef:
-    MultiDef = MDS_MultiDef;
-    SingleDefInst = NULL;
+    assert(SingleDefNode);
+    if (Node == SingleDefNode) {
+      MultiDef = MDS_MultiDefSingleBlock;
+    } else {
+      MultiDef = MDS_MultiDefMultiBlock;
+      SingleDefNode = NULL;
+    }
     break;
-  case MDS_MultiDef:
+  case MDS_MultiDefSingleBlock:
+    assert(SingleDefNode);
+    if (Node != SingleDefNode) {
+      MultiDef = MDS_MultiDefMultiBlock;
+      SingleDefNode = NULL;
+    }
     break;
+  case MDS_MultiDefMultiBlock:
+    assert(SingleDefNode == NULL);
+    break;
+  }
+}
+
+const Inst *VariableTracking::getFirstDefinition() const {
+  switch (MultiDef) {
+  case MDS_Unknown:
+  case MDS_MultiDefMultiBlock:
+    return NULL;
+  case MDS_SingleDef:
+  case MDS_MultiDefSingleBlock:
+    assert(!Definitions.empty());
+    return Definitions[0];
+  }
+}
+
+const Inst *VariableTracking::getSingleDefinition() const {
+  switch (MultiDef) {
+  case MDS_Unknown:
+  case MDS_MultiDefMultiBlock:
+  case MDS_MultiDefSingleBlock:
+    return NULL;
+  case MDS_SingleDef:
+    assert(!Definitions.empty());
+    return Definitions[0];
   }
 }
 
@@ -243,6 +283,18 @@
          ++I) {
       if ((*I)->isDeleted())
         continue;
+      if (InstFakeKill *Kill = llvm::dyn_cast<InstFakeKill>(*I)) {
+        // A FakeKill instruction indicates certain Variables (usually
+        // physical scratch registers) are redefined, so we register
+        // them as defs.
+        for (SizeT SrcNum = 0; SrcNum < (*I)->getSrcSize(); ++SrcNum) {
+          Variable *Var = llvm::cast<Variable>((*I)->getSrc(SrcNum));
+          SizeT VarNum = Var->getIndex();
+          assert(VarNum < Metadata.size());
+          Metadata[VarNum].markDef(Kill, Node);
+        }
+        continue; // no point in executing the rest
+      }
       if (Variable *Dest = (*I)->getDest()) {
         SizeT DestNum = Dest->getIndex();
         assert(DestNum < Metadata.size());
@@ -275,18 +327,35 @@
 }
 
 bool VariablesMetadata::isMultiBlock(const Variable *Var) const {
-  if (getDefinition(Var) == NULL)
+  if (Var->getIsArg())
     return true;
+  if (!isTracked(Var))
+    return true; // conservative answer
   SizeT VarNum = Var->getIndex();
   // Conservatively return true if the state is unknown.
   return Metadata[VarNum].getMultiBlock() != VariableTracking::MBS_SingleBlock;
 }
 
-const Inst *VariablesMetadata::getDefinition(const Variable *Var) const {
+const Inst *VariablesMetadata::getFirstDefinition(const Variable *Var) const {
   if (!isTracked(Var))
     return NULL; // conservative answer
   SizeT VarNum = Var->getIndex();
-  return Metadata[VarNum].getDefinition();
+  return Metadata[VarNum].getFirstDefinition();
+}
+
+const Inst *VariablesMetadata::getSingleDefinition(const Variable *Var) const {
+  if (!isTracked(Var))
+    return NULL; // conservative answer
+  SizeT VarNum = Var->getIndex();
+  return Metadata[VarNum].getSingleDefinition();
+}
+
+const InstDefList &
+VariablesMetadata::getDefinitions(const Variable *Var) const {
+  if (!isTracked(Var))
+    return NoDefinitions;
+  SizeT VarNum = Var->getIndex();
+  return Metadata[VarNum].getDefinitions();
 }
 
 const CfgNode *VariablesMetadata::getLocalUseNode(const Variable *Var) const {
@@ -296,6 +365,8 @@
   return Metadata[VarNum].getNode();
 }
 
+const InstDefList VariablesMetadata::NoDefinitions;
+
 // ======================== dump routines ======================== //
 
 void Variable::emit(const Cfg *Func) const {
diff --git a/src/IceOperand.h b/src/IceOperand.h
index 727ea3d..6a1560f 100644
--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -385,13 +385,6 @@
   void setWeight(uint32_t NewWeight) { Weight = NewWeight; }
   void setWeightInfinite() { Weight = RegWeight::Inf; }
 
-  Variable *getPreferredRegister() const { return RegisterPreference; }
-  bool getRegisterOverlap() const { return AllowRegisterOverlap; }
-  void setPreferredRegister(Variable *Prefer, bool Overlap) {
-    RegisterPreference = Prefer;
-    AllowRegisterOverlap = Overlap;
-  }
-
   const LiveRange &getLiveRange() const { return Live; }
   void setLiveRange(const LiveRange &Range) { Live = Range; }
   void resetLiveRange() { Live.reset(); }
@@ -416,8 +409,8 @@
   // Creates a temporary copy of the variable with a different type.
   // Used primarily for syntactic correctness of textual assembly
   // emission.  Note that only basic information is copied, in
-  // particular not DefInst, IsArgument, Weight, RegisterPreference,
-  // AllowRegisterOverlap, LoVar, HiVar, VarsReal.
+  // particular not DefInst, IsArgument, Weight, LoVar, HiVar,
+  // VarsReal.
   Variable asType(Type Ty);
 
   virtual void emit(const Cfg *Func) const;
@@ -436,8 +429,7 @@
   Variable(OperandKind K, Type Ty, SizeT Index, const IceString &Name)
       : Operand(K, Ty), Number(Index), Name(Name), IsArgument(false),
         IsImplicitArgument(false), StackOffset(0), RegNum(NoRegister),
-        RegNumTmp(NoRegister), Weight(1), RegisterPreference(NULL),
-        AllowRegisterOverlap(false), LoVar(NULL), HiVar(NULL) {
+        RegNumTmp(NoRegister), Weight(1), LoVar(NULL), HiVar(NULL) {
     Vars = VarsReal;
     Vars[0] = this;
     NumVars = 1;
@@ -450,7 +442,7 @@
   bool IsArgument;
   bool IsImplicitArgument;
   // StackOffset is the canonical location on stack (only if
-  // RegNum<0 || IsArgument).
+  // RegNum==NoRegister || IsArgument).
   int32_t StackOffset;
   // RegNum is the allocated register, or NoRegister if it isn't
   // register-allocated.
@@ -458,16 +450,6 @@
   // RegNumTmp is the tentative assignment during register allocation.
   int32_t RegNumTmp;
   RegWeight Weight; // Register allocation priority
-  // RegisterPreference says that if possible, the register allocator
-  // should prefer the register that was assigned to this linked
-  // variable.  It also allows a spill slot to share its stack
-  // location with another variable, if that variable does not get
-  // register-allocated and therefore has a stack location.
-  Variable *RegisterPreference;
-  // AllowRegisterOverlap says that it is OK to honor
-  // RegisterPreference and "share" a register even if the two live
-  // ranges overlap.
-  bool AllowRegisterOverlap;
   LiveRange Live;
   // LoVar and HiVar are needed for lowering from 64 to 32 bits.  When
   // lowering from I64 to I32 on a 32-bit architecture, we split the
@@ -483,14 +465,18 @@
   Variable *VarsReal[1];
 };
 
-// VariableTracking tracks the metadata for a single variable.
+typedef std::vector<const Inst *> InstDefList;
+
+// VariableTracking tracks the metadata for a single variable.  It is
+// only meant to be used internally by VariablesMetadata.
 class VariableTracking {
 public:
   enum MultiDefState {
     // TODO(stichnot): Consider using just a simple counter.
     MDS_Unknown,
     MDS_SingleDef,
-    MDS_MultiDef
+    MDS_MultiDefSingleBlock,
+    MDS_MultiDefMultiBlock
   };
   enum MultiBlockState {
     MBS_Unknown,
@@ -499,10 +485,12 @@
   };
   VariableTracking()
       : MultiDef(MDS_Unknown), MultiBlock(MBS_Unknown), SingleUseNode(NULL),
-        SingleDefInst(NULL) {}
+        SingleDefNode(NULL) {}
   MultiDefState getMultiDef() const { return MultiDef; }
   MultiBlockState getMultiBlock() const { return MultiBlock; }
-  const Inst *getDefinition() const { return SingleDefInst; }
+  const Inst *getFirstDefinition() const;
+  const Inst *getSingleDefinition() const;
+  const InstDefList &getDefinitions() const { return Definitions; }
   const CfgNode *getNode() const { return SingleUseNode; }
   void markUse(const Inst *Instr, const CfgNode *Node, bool IsFromDef,
                bool IsImplicit);
@@ -513,7 +501,12 @@
   MultiDefState MultiDef;
   MultiBlockState MultiBlock;
   const CfgNode *SingleUseNode;
-  const Inst *SingleDefInst;
+  const CfgNode *SingleDefNode;
+  // All definitions of the variable are collected here, in the order
+  // encountered.  Definitions in the same basic block are in
+  // instruction order, but there's no guarantee for the basic block
+  // order.
+  InstDefList Definitions;
 };
 
 // VariablesMetadata analyzes and summarizes the metadata for the
@@ -521,18 +514,50 @@
 class VariablesMetadata {
 public:
   VariablesMetadata(const Cfg *Func) : Func(Func) {}
+  // Initialize the state by traversing all instructions/variables in
+  // the CFG.
   void init();
+  // Returns whether the given Variable is tracked in this object.  It
+  // should only return false if changes were made to the CFG after
+  // running init(), in which case the state is stale and the results
+  // shouldn't be trusted (but it may be OK e.g. for dumping).
   bool isTracked(const Variable *Var) const {
     return Var->getIndex() < Metadata.size();
   }
+
+  // Returns whether the given Variable has multiple definitions.
   bool isMultiDef(const Variable *Var) const;
-  const Inst *getDefinition(const Variable *Var) const;
+  // Returns the first definition instruction of the given Variable.
+  // This is only valid for variables whose definitions are all within
+  // the same block, e.g. T after the lowered sequence "T=B; T+=C;
+  // A=T", for which getFirstDefinition(T) would return the "T=B"
+  // instruction.  For variables with definitions span multiple
+  // blocks, NULL is returned.
+  const Inst *getFirstDefinition(const Variable *Var) const;
+  // Returns the definition instruction of the given Variable, when
+  // the variable has exactly one definition.  Otherwise, NULL is
+  // returned.
+  const Inst *getSingleDefinition(const Variable *Var) const;
+  // Returns the list of all definition instructions of the given
+  // Variable.
+  const InstDefList &getDefinitions(const Variable *Var) const;
+
+  // Returns whether the given Variable is live across multiple
+  // blocks.  Mainly, this is used to partition Variables into
+  // single-block versus multi-block sets for leveraging sparsity in
+  // liveness analysis, and for implementing simple stack slot
+  // coalescing.  As a special case, function arguments are always
+  // considered multi-block because they are live coming into the
+  // entry block.
   bool isMultiBlock(const Variable *Var) const;
+  // Returns the node that the given Variable is used in, assuming
+  // isMultiBlock() returns false.  Otherwise, NULL is returned.
   const CfgNode *getLocalUseNode(const Variable *Var) const;
 
 private:
   const Cfg *Func;
   std::vector<VariableTracking> Metadata;
+  const static InstDefList NoDefinitions;
   VariablesMetadata(const VariablesMetadata &) LLVM_DELETED_FUNCTION;
   VariablesMetadata &operator=(const VariablesMetadata &) LLVM_DELETED_FUNCTION;
 };
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index f79e8dc..36ada12 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -21,6 +21,37 @@
 
 namespace Ice {
 
+namespace {
+
+// Returns true if Var has any definitions within Item's live range.
+bool overlapsDefs(const Cfg *Func, const LiveRangeWrapper &Item,
+                  const Variable *Var) {
+  const InstDefList &Defs = Func->getVMetadata()->getDefinitions(Var);
+  for (size_t i = 0; i < Defs.size(); ++i) {
+    if (Item.range().overlaps(Defs[i]->getNumber()))
+      return true;
+  }
+  return false;
+}
+
+void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
+                        const char *Reason) {
+  if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+    Ostream &Str = Func->getContext()->getStrDump();
+    Str << "Disabling Overlap due to " << Reason << " " << *Var
+        << " LIVE=" << Var->getLiveRange() << " Defs=";
+    const InstDefList &Defs = Func->getVMetadata()->getDefinitions(Var);
+    for (size_t i = 0; i < Defs.size(); ++i) {
+      if (i > 0)
+        Str << ",";
+      Str << Defs[i]->getNumber();
+    }
+    Str << "\n";
+  }
+}
+
+} // end of anonymous namespace
+
 // Implements the linear-scan algorithm.  Based on "Linear Scan
 // Register Allocation in the Context of SSA Form and Register
 // Constraints" by Hanspeter Mössenböck and Michael Pfeiffer,
@@ -40,6 +71,7 @@
   Active.clear();
   Ostream &Str = Func->getContext()->getStrDump();
   Func->resetCurrentNode();
+  VariablesMetadata *VMetadata = Func->getVMetadata();
 
   // Gather the live ranges of all variables and add them to the
   // Unhandled set.  TODO: Unhandled is a set<> which is based on a
@@ -185,6 +217,58 @@
         Free[i] = false;
     }
 
+    // Infer register preference and allowable overlap.  Only form a
+    // preference when the current Variable has an unambiguous "first"
+    // definition.  The preference is some source Variable of the
+    // defining instruction that either is assigned a register that is
+    // currently free, or that is assigned a register that is not free
+    // but overlap is allowed.  Overlap is allowed when the Variable
+    // under consideration is single-definition, and its definition is
+    // a simple assignment - i.e., the register gets copied/aliased
+    // but is never modified.  Furthermore, overlap is only allowed
+    // when preferred Variable definition instructions do not appear
+    // within the current Variable's live range.
+    Variable *Prefer = NULL;
+    int32_t PreferReg = Variable::NoRegister;
+    bool AllowOverlap = false;
+    if (const Inst *DefInst = VMetadata->getFirstDefinition(Cur.Var)) {
+      assert(DefInst->getDest() == Cur.Var);
+      bool IsAssign = DefInst->isSimpleAssign();
+      bool IsSingleDef = !VMetadata->isMultiDef(Cur.Var);
+      for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) {
+        // TODO(stichnot): Iterate through the actual Variables of the
+        // instruction, not just the source operands.  This could
+        // capture Load instructions, including address mode
+        // optimization, for Prefer (but not for AllowOverlap).
+        if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) {
+          int32_t SrcReg = SrcVar->getRegNumTmp();
+          // Only consider source variables that have (so far) been
+          // assigned a register.  That register must be one in the
+          // RegMask set, e.g. don't try to prefer the stack pointer
+          // as a result of the stacksave intrinsic.
+          if (SrcVar->hasRegTmp() && RegMask[SrcReg]) {
+            if (!Free[SrcReg]) {
+              // Don't bother trying to enable AllowOverlap if the
+              // register is already free.
+              AllowOverlap =
+                  IsSingleDef && IsAssign && !overlapsDefs(Func, Cur, SrcVar);
+            }
+            if (AllowOverlap || Free[SrcReg]) {
+              Prefer = SrcVar;
+              PreferReg = SrcReg;
+            }
+          }
+        }
+      }
+    }
+    if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+      if (Prefer) {
+        Str << "Initial Prefer=" << *Prefer << " R=" << PreferReg
+            << " LIVE=" << Prefer->getLiveRange() << " Overlap=" << AllowOverlap
+            << "\n";
+      }
+    }
+
     // Remove registers from the Free[] list where an Inactive range
     // overlaps with the current range.
     for (UnorderedRanges::const_iterator I = Inactive.begin(),
@@ -198,6 +282,28 @@
         // variables that were allowed marked with
         // AllowRegisterOverlap.
         Free[RegNum] = false;
+        // Disable AllowOverlap if an Inactive variable, which is not
+        // Prefer, shares Prefer's register, and has a definition
+        // within Cur's live range.
+        if (AllowOverlap && Item.Var != Prefer && RegNum == PreferReg &&
+            overlapsDefs(Func, Cur, Item.Var)) {
+          AllowOverlap = false;
+          dumpDisableOverlap(Func, Item.Var, "Inactive");
+        }
+      }
+    }
+
+    // Disable AllowOverlap if an Active variable, which is not
+    // Prefer, shares Prefer's register, and has a definition within
+    // Cur's live range.
+    for (UnorderedRanges::iterator I = Active.begin(), E = Active.end();
+         AllowOverlap && I != E; ++I) {
+      LiveRangeWrapper Item = *I;
+      int32_t RegNum = Item.Var->getRegNumTmp();
+      if (Item.Var != Prefer && RegNum == PreferReg &&
+          overlapsDefs(Func, Cur, Item.Var)) {
+        AllowOverlap = false;
+        dumpDisableOverlap(Func, Item.Var, "Active");
       }
     }
 
@@ -206,13 +312,21 @@
     // Cur.endsBefore(*I) is an early exit check that turns a
     // guaranteed O(N^2) algorithm into expected linear complexity.
     llvm::SmallBitVector PrecoloredUnhandled(RegMask.size());
+    // Note: PrecoloredUnhandled is only used for dumping.
     for (OrderedRanges::const_iterator I = Unhandled.begin(),
                                        E = Unhandled.end();
          I != E && !Cur.endsBefore(*I); ++I) {
       LiveRangeWrapper Item = *I;
       if (Item.Var->hasReg() && Item.overlaps(Cur)) {
-        Free[Item.Var->getRegNum()] = false; // Note: getRegNum not getRegNumTmp
-        PrecoloredUnhandled[Item.Var->getRegNum()] = true;
+        int32_t ItemReg = Item.Var->getRegNum(); // Note: not getRegNumTmp()
+        Free[ItemReg] = false;
+        PrecoloredUnhandled[ItemReg] = true;
+        // Disable AllowOverlap if the preferred register is one of
+        // these precolored unhandled overlapping ranges.
+        if (AllowOverlap && ItemReg == PreferReg) {
+          AllowOverlap = false;
+          dumpDisableOverlap(Func, Item.Var, "PrecoloredUnhandled");
+        }
       }
     }
 
@@ -228,15 +342,7 @@
       Str << "\n";
     }
 
-    Variable *Prefer = Cur.Var->getPreferredRegister();
-    int32_t PreferReg = Prefer && Prefer->hasRegTmp() ? Prefer->getRegNumTmp()
-                                                      : Variable::NoRegister;
-    bool AllowedToOverlap = Cur.Var->getRegisterOverlap() &&
-                            PreferReg != Variable::NoRegister &&
-                            RegMask[PreferReg] &&
-                            !PrecoloredUnhandled[PreferReg];
-    if (PreferReg != Variable::NoRegister &&
-        (AllowedToOverlap || Free[PreferReg])) {
+    if (Prefer && (AllowOverlap || Free[PreferReg])) {
       // First choice: a preferred register that is either free or is
       // allowed to overlap with its linked variable.
       Cur.Var->setRegNumTmp(PreferReg);
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 6828940..8a01f00 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -382,6 +382,7 @@
   // associated cleanup, to make the dump cleaner and more useful.
   Func->dump("After initial x8632 codegen");
   Timer T_regAlloc;
+  Func->getVMetadata()->init();
   regAlloc();
   if (Func->hasError())
     return;
@@ -1590,21 +1591,21 @@
     case InstArithmetic::Shl:
       _mov(T, Src0);
       if (!llvm::isa<Constant>(Src1))
-        Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx);
+        Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
       _shl(T, Src1);
       _mov(Dest, T);
       break;
     case InstArithmetic::Lshr:
       _mov(T, Src0);
       if (!llvm::isa<Constant>(Src1))
-        Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx);
+        Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
       _shr(T, Src1);
       _mov(Dest, T);
       break;
     case InstArithmetic::Ashr:
       _mov(T, Src0);
       if (!llvm::isa<Constant>(Src1))
-        Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx);
+        Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
       _sar(T, Src1);
       _mov(Dest, T);
       break;
@@ -1725,9 +1726,8 @@
     _mov(T_Hi, Src0Hi);
     _mov(DestHi, T_Hi);
   } else {
-    const bool AllowOverlap = true;
     // RI is either a physical register or an immediate.
-    Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap);
+    Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm);
     if (isVectorType(Dest->getType()))
       _movp(Dest, RI);
     else
@@ -1830,7 +1830,7 @@
   // code, as the memory operand displacements may end up being smaller
   // before any stack adjustment is done.
   for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
-    Variable *Reg = legalizeToVar(XmmArgs[i], false, RegX8632::Reg_xmm0 + i);
+    Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
     // Generate a FakeUse of register arguments so that they do not get
     // dead code eliminated as a result of the FakeKill of scratch
     // registers after the call.
@@ -1914,15 +1914,12 @@
       split64(Dest);
       Variable *DestLo = Dest->getLo();
       Variable *DestHi = Dest->getHi();
-      DestLo->setPreferredRegister(ReturnReg, false);
-      DestHi->setPreferredRegister(ReturnRegHi, false);
       _mov(DestLo, ReturnReg);
       _mov(DestHi, ReturnRegHi);
     } else {
       assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
              Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
              isVectorType(Dest->getType()));
-      Dest->setPreferredRegister(ReturnReg, false);
       if (isVectorType(Dest->getType())) {
         _movp(Dest, ReturnReg);
       } else {
@@ -2137,7 +2134,6 @@
       if (Dest->getType() == IceType_i1)
         _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
       _mov(Dest, T_2);
-      T_2->setPreferredRegister(T_1, true);
     }
     break;
   case InstCast::Fptoui:
@@ -2174,7 +2170,6 @@
       if (Dest->getType() == IceType_i1)
         _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
       _mov(Dest, T_2);
-      T_2->setPreferredRegister(T_1, true);
     }
     break;
   case InstCast::Sitofp:
@@ -2686,8 +2681,8 @@
     if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
         Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
       NextBr->setDeleted();
-      Operand *Src0RM = legalize(
-          Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg, true);
+      Operand *Src0RM =
+          legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
       _cmp(Src0RM, Src1);
       _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
           NextBr->getTargetFalse());
@@ -2736,8 +2731,8 @@
   }
 
   // cmp b, c
-  Operand *Src0RM = legalize(
-      Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg, true);
+  Operand *Src0RM =
+      legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
   InstX8632Label *Label = InstX8632Label::create(Func, this);
   _cmp(Src0RM, Src1);
   _mov(Dest, One);
@@ -3588,7 +3583,8 @@
   //   set Var:=SrcVar
   if (Var == NULL)
     return false;
-  if (const Inst *VarAssign = VMetadata->getDefinition(Var)) {
+  if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
+    assert(!VMetadata->isMultiDef(Var));
     if (llvm::isa<InstAssign>(VarAssign)) {
       Operand *SrcOp = VarAssign->getSrc(0);
       assert(SrcOp);
@@ -3615,9 +3611,10 @@
     return false;
   if (Index != NULL)
     return false;
-  const Inst *BaseInst = VMetadata->getDefinition(Base);
+  const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
   if (BaseInst == NULL)
     return false;
+  assert(!VMetadata->isMultiDef(Base));
   if (BaseInst->getSrcSize() < 2)
     return false;
   if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
@@ -3646,9 +3643,10 @@
   //   Index=Var, Shift+=log2(Const)
   if (Index == NULL)
     return false;
-  const Inst *IndexInst = VMetadata->getDefinition(Index);
+  const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
   if (IndexInst == NULL)
     return false;
+  assert(!VMetadata->isMultiDef(Index));
   if (IndexInst->getSrcSize() < 2)
     return false;
   if (const InstArithmetic *ArithInst =
@@ -3697,9 +3695,10 @@
   //   set Base=Var, Offset-=Const
   if (Base == NULL)
     return false;
-  const Inst *BaseInst = VMetadata->getDefinition(Base);
+  const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
   if (BaseInst == NULL)
     return false;
+  assert(!VMetadata->isMultiDef(Base));
   if (const InstArithmetic *ArithInst =
           llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
     if (ArithInst->getOp() != InstArithmetic::Add &&
@@ -3878,15 +3877,15 @@
   if (Inst->hasRetValue()) {
     Operand *Src0 = legalize(Inst->getRetValue());
     if (Src0->getType() == IceType_i64) {
-      Variable *eax = legalizeToVar(loOperand(Src0), false, RegX8632::Reg_eax);
-      Variable *edx = legalizeToVar(hiOperand(Src0), false, RegX8632::Reg_edx);
+      Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
+      Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
       Reg = eax;
       Context.insert(InstFakeUse::create(Func, edx));
     } else if (Src0->getType() == IceType_f32 ||
                Src0->getType() == IceType_f64) {
       _fld(Src0);
     } else if (isVectorType(Src0->getType())) {
-      Reg = legalizeToVar(Src0, false, RegX8632::Reg_xmm0);
+      Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
     } else {
       _mov(Reg, Src0, RegX8632::Reg_eax);
     }
@@ -3973,8 +3972,8 @@
   if (Dest->getType() == IceType_i64) {
     Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
     Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
-    Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
-    Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
+    Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
+    Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
     _cmp(ConditionRM, Zero);
     _mov(DestLo, SrcLoRI);
     _mov(DestHi, SrcHiRI);
@@ -3983,17 +3982,17 @@
     Context.insert(InstFakeUse::create(Func, DestHi));
     Operand *SrcFLo = loOperand(SrcF);
     Operand *SrcFHi = hiOperand(SrcF);
-    SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);
-    SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);
+    SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
+    SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
     _mov(DestLo, SrcLoRI);
     _mov(DestHi, SrcHiRI);
   } else {
     _cmp(ConditionRM, Zero);
-    SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
+    SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
     _mov(Dest, SrcT);
     _br(CondX86::Br_ne, Label);
     Context.insert(InstFakeUse::create(Func, Dest));
-    SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);
+    SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
     _mov(Dest, SrcF);
   }
 
@@ -4008,14 +4007,14 @@
 
   if (Ty == IceType_i64) {
     Value = legalize(Value);
-    Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
-    Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
+    Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
+    Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
     _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
     _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
   } else if (isVectorType(Ty)) {
     _storep(legalizeToVar(Value), NewAddr);
   } else {
-    Value = legalize(Value, Legal_Reg | Legal_Imm, true);
+    Value = legalize(Value, Legal_Reg | Legal_Imm);
     _store(Value, NewAddr);
   }
 }
@@ -4054,7 +4053,7 @@
   if (NumCases >= 2)
     Src0 = legalizeToVar(Src0, true);
   else
-    Src0 = legalize(Src0, Legal_Reg | Legal_Mem, true);
+    Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
   for (SizeT I = 0; I < NumCases; ++I) {
     // TODO(stichnot): Correct lowering for IceType_i64.
     Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));
@@ -4209,7 +4208,7 @@
 }
 
 Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
-                               bool AllowOverlap, int32_t RegNum) {
+                               int32_t RegNum) {
   // Assert that a physical register is allowed.  To date, all calls
   // to legalize() allow a physical register.  If a physical register
   // needs to be explicitly disallowed, then new code will need to be
@@ -4228,10 +4227,10 @@
     Variable *RegBase = NULL;
     Variable *RegIndex = NULL;
     if (Base) {
-      RegBase = legalizeToVar(Base, true);
+      RegBase = legalizeToVar(Base);
     }
     if (Index) {
-      RegIndex = legalizeToVar(Index, true);
+      RegIndex = legalizeToVar(Index);
     }
     if (Base != RegBase || Index != RegIndex) {
       From = OperandX8632Mem::create(
@@ -4293,11 +4292,7 @@
     //   RegNum is required and Var->getRegNum() doesn't match.
     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
         (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
-      Variable *Reg = copyToReg(From, RegNum);
-      if (RegNum == Variable::NoRegister) {
-        Reg->setPreferredRegister(Var, AllowOverlap);
-      }
-      From = Reg;
+      From = copyToReg(From, RegNum);
     }
     return From;
   }
@@ -4306,9 +4301,8 @@
 }
 
 // Provide a trivial wrapper to legalize() for this common usage.
-Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,
-                                     int32_t RegNum) {
-  return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
+Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
+  return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
 }
 
 OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index f5bd300..6fe41cd 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -147,10 +147,8 @@
   };
   typedef uint32_t LegalMask;
   Operand *legalize(Operand *From, LegalMask Allowed = Legal_All & ~Legal_Reloc,
-                    bool AllowOverlap = false,
                     int32_t RegNum = Variable::NoRegister);
-  Variable *legalizeToVar(Operand *From, bool AllowOverlap = false,
-                          int32_t RegNum = Variable::NoRegister);
+  Variable *legalizeToVar(Operand *From, int32_t RegNum = Variable::NoRegister);
   // Turn a pointer operand into a memory operand that can be
   // used by a real load/store operation. Legalizes the operand as well.
   // This is a nop if the operand is already a legal memory operand.
@@ -297,11 +295,9 @@
   // in/out Dest argument.
   void _mov(Variable *&Dest, Operand *Src0,
             int32_t RegNum = Variable::NoRegister) {
-    if (Dest == NULL) {
-      Dest = legalizeToVar(Src0, false, RegNum);
-    } else {
-      Context.insert(InstX8632Mov::create(Func, Dest, Src0));
-    }
+    if (Dest == NULL)
+      Dest = makeReg(Src0->getType(), RegNum);
+    Context.insert(InstX8632Mov::create(Func, Dest, Src0));
   }
   void _movd(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Movd::create(Func, Dest, Src0));
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index 3da3f4f..515e2fe 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -47,6 +47,8 @@
         clEnumValN(Ice::IceV_Timing, "time", "Pass timing details"),
         clEnumValN(Ice::IceV_AddrOpt, "addropt", "Address mode optimization"),
         clEnumValN(Ice::IceV_All, "all", "Use all verbose options"),
+        clEnumValN(Ice::IceV_Most, "most",
+                   "Use all verbose options except 'regalloc' and 'time'"),
         clEnumValN(Ice::IceV_None, "none", "No verbosity"), clEnumValEnd));
 static cl::opt<Ice::TargetArch> TargetArch(
     "target", cl::desc("Target architecture:"), cl::init(Ice::Target_X8632),
@@ -134,6 +136,8 @@
     UseIntegratedAssembler("integrated-as",
                            cl::desc("Use integrated assembler (default yes)"),
                            cl::init(true));
+static cl::alias UseIas("ias", cl::desc("Alias for -integrated-as"),
+                        cl::NotHidden, cl::aliasopt(UseIntegratedAssembler));
 
 int main(int argc, char **argv) {
 
diff --git a/tests_lit/llvm2ice_tests/address-mode-opt.ll b/tests_lit/llvm2ice_tests/address-mode-opt.ll
index 997349a..ba42d65 100644
--- a/tests_lit/llvm2ice_tests/address-mode-opt.ll
+++ b/tests_lit/llvm2ice_tests/address-mode-opt.ll
@@ -46,7 +46,7 @@
   %addr.load = load float* %addr.ptr, align 4
   ret float %addr.load
 ; CHECK-LABEL: load_200000_minus_arg:
-; CHECK: movss xmm0, dword ptr [eax]
+; CHECK: movss xmm0, dword ptr [e{{..}}]
 }
 
 define float @address_mode_opt_chaining(float* %arg) {
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
index 4d471d4..4f48f48 100644
--- a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
@@ -205,7 +205,7 @@
 ; CHECK-LABEL: test_atomic_rmw_add_8
 ; CHECK: lock
 ; CHECK-NEXT: xadd byte {{.*}}, [[REG:.*]]
-; CHECK: mov {{.*}}, [[REG]]
+; CHECK: {{mov|movzx}} {{.*}}, [[REG]]
 
 define i32 @test_atomic_rmw_add_16(i32 %iptr, i32 %v) {
 entry:
@@ -218,7 +218,7 @@
 ; CHECK-LABEL: test_atomic_rmw_add_16
 ; CHECK: lock
 ; CHECK-NEXT: xadd word {{.*}}, [[REG:.*]]
-; CHECK: mov {{.*}}, [[REG]]
+; CHECK: {{mov|movzx}} {{.*}}, [[REG]]
 
 define i32 @test_atomic_rmw_add_32(i32 %iptr, i32 %v) {
 entry:
@@ -347,7 +347,7 @@
 ; CHECK: neg [[REG:.*]]
 ; CHECK: lock
 ; CHECK-NEXT: xadd byte {{.*}}, [[REG]]
-; CHECK: mov {{.*}}, [[REG]]
+; CHECK: {{mov|movzx}} {{.*}}, [[REG]]
 
 define i32 @test_atomic_rmw_sub_16(i32 %iptr, i32 %v) {
 entry:
@@ -361,7 +361,7 @@
 ; CHECK: neg [[REG:.*]]
 ; CHECK: lock
 ; CHECK-NEXT: xadd word {{.*}}, [[REG]]
-; CHECK: mov {{.*}}, [[REG]]
+; CHECK: {{mov|movzx}} {{.*}}, [[REG]]
 
 define i32 @test_atomic_rmw_sub_32(i32 %iptr, i32 %v) {
 entry: