Subzero: Various fixes in preparation for x86-32 register aliasing.

1. Helper function sameVarOrReg() also needs to return true if the two physical registers alias or overlap.  Otherwise advanced phi lowering may pick an incorrect ordering.

2. With -asm-verbose, redundant truncation assignments expressed as _mov instructions, like "mov cl, ecx", need to have their register use counts updated properly, so that the LIVEEND= annotations are correct.

3. The register allocator should consider suitably typed aliases when choosing a register preference.

4. When evicting a variable, the register allocator should decrement the use count of all aliases.

5. When saving/restoring callee-save registers in the prolog/epilog, map each register to its "canonical" register (e.g. %bl --> %ebx) and make sure each canonical register is only considered once.

6. Remove some unnecessary Variable::setMustHaveReg() calls.

7. When assigning bool results as a constant 0 or 1, use an 8-bit constant instead of 32-bit so that only the 8-bit register gets assigned.

BUG= none
TEST= make check, plus spec2k -asm-verbose output is unchanged
R=kschimpf@google.com

Review URL: https://codereview.chromium.org/1405643003 .
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 2bcea9a..f6c6379 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -299,14 +299,29 @@
 namespace {
 
 // Helper function used by advancedPhiLowering().
-bool sameVarOrReg(const Variable *Var, const Operand *Opnd) {
-  if (Var == Opnd)
+bool sameVarOrReg(TargetLowering *Target, const Variable *Var1,
+                  const Operand *Opnd) {
+  if (Var1 == Opnd)
     return true;
-  if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) {
-    if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum())
-      return true;
-  }
-  return false;
+  const auto Var2 = llvm::dyn_cast<Variable>(Opnd);
+  if (Var2 == nullptr)
+    return false;
+
+  // If either operand lacks a register, they cannot be the same.
+  if (!Var1->hasReg())
+    return false;
+  if (!Var2->hasReg())
+    return false;
+
+  int32_t RegNum1 = Var1->getRegNum();
+  int32_t RegNum2 = Var2->getRegNum();
+  // Quick common-case check.
+  if (RegNum1 == RegNum2)
+    return true;
+
+  assert(Target->getAliasesForRegister(RegNum1)[RegNum2] ==
+         Target->getAliasesForRegister(RegNum2)[RegNum1]);
+  return Target->getAliasesForRegister(RegNum1)[RegNum2];
 }
 
 } // end of anonymous namespace
@@ -383,6 +398,7 @@
   if (NumPhis == 0)
     return;
 
+  TargetLowering *Target = Func->getTarget();
   SizeT InEdgeIndex = 0;
   for (CfgNode *Pred : InEdges) {
     CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
@@ -397,7 +413,7 @@
       Desc[I].NumPred = 0;
       // Cherry-pick any trivial assignments, so that they don't contribute to
       // the running complexity of the topological sort.
-      if (sameVarOrReg(Dest, Src)) {
+      if (sameVarOrReg(Target, Dest, Src)) {
         Desc[I].Processed = true;
         --Remaining;
         if (Dest != Src)
@@ -420,10 +436,10 @@
         if (I != J) {
           // There shouldn't be two Phis with the same Dest variable or
           // register.
-          assert(!sameVarOrReg(Dest, Desc[J].Dest));
+          assert(!sameVarOrReg(Target, Dest, Desc[J].Dest));
         }
         const Operand *Src = Desc[J].Src;
-        if (sameVarOrReg(Dest, Src))
+        if (sameVarOrReg(Target, Dest, Src))
           ++Desc[I].NumPred;
       }
     }
@@ -473,7 +489,7 @@
       assert(Desc[BestIndex].NumPred <= 1);
       Variable *Dest = Desc[BestIndex].Dest;
       Operand *Src = Desc[BestIndex].Src;
-      assert(!sameVarOrReg(Dest, Src));
+      assert(!sameVarOrReg(Target, Dest, Src));
       // Break a cycle by introducing a temporary.
       if (Desc[BestIndex].NumPred) {
         bool Found = false;
@@ -484,7 +500,7 @@
           if (Desc[J].Processed)
             continue;
           Operand *OtherSrc = Desc[J].Src;
-          if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) {
+          if (Desc[J].NumPred && sameVarOrReg(Target, Dest, OtherSrc)) {
             SizeT VarNum = Func->getNumVariables();
             Variable *Tmp = Func->makeVariable(OtherSrc->getType());
             if (BuildDefs::dump())
@@ -505,7 +521,7 @@
         for (size_t I = 0; I < NumPhis; ++I) {
           if (Desc[I].Processed)
             continue;
-          if (sameVarOrReg(Var, Desc[I].Dest)) {
+          if (sameVarOrReg(Target, Var, Desc[I].Dest)) {
             if (--Desc[I].NumPred == 0)
               Desc[I].Weight += WeightNoPreds;
           }
@@ -1030,8 +1046,11 @@
       // That normally would have happened as part of emitLiveRangesEnded(),
       // but that isn't called for redundant assignments.
       Variable *Dest = I.getDest();
-      if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0)))
+      if (DecorateAsm && Dest->hasReg()) {
         ++LiveRegCount[Dest->getRegNum()];
+        if (I.isLastUse(I.getSrc(0)))
+          --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()];
+      }
       continue;
     }
     I.emit(Func);
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 42cebb8..f5084ff 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -947,7 +947,7 @@
 }
 
 bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) {
-  const auto SrcVar = llvm::dyn_cast<const Variable>(Source);
+  const auto *SrcVar = llvm::dyn_cast<const Variable>(Source);
   if (!SrcVar)
     return false;
   if (Dest->hasReg() && Dest->getRegNum() == SrcVar->getRegNum()) {
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 26b350a..908020f 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -501,9 +501,10 @@
         // try to prefer the stack pointer as a result of the stacksave
         // intrinsic.
         if (SrcVar->hasRegTmp()) {
-          const int32_t SrcReg = SrcVar->getRegNumTmp();
-          const bool IsAliasAvailable =
-              (Iter.RegMask & *RegAliases[SrcReg]).any();
+          const llvm::SmallBitVector &Aliases =
+              *RegAliases[SrcVar->getRegNumTmp()];
+          const int32_t SrcReg = (Iter.RegMask & Aliases).find_first();
+          const bool IsAliasAvailable = (SrcReg != -1);
           if (IsAliasAvailable) {
             if (FindOverlap && !Iter.Free[SrcReg]) {
               // Don't bother trying to enable AllowOverlap if the register is
@@ -694,8 +695,12 @@
       int32_t RegNum = Item->getRegNumTmp();
       if (Aliases[RegNum]) {
         dumpLiveRangeTrace("Evicting A   ", Item);
-        --RegUses[RegNum];
-        assert(RegUses[RegNum] >= 0);
+        const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
+        for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
+             RegAlias = Aliases.find_next(RegAlias)) {
+          --RegUses[RegAlias];
+          assert(RegUses[RegAlias] >= 0);
+        }
         Item->setRegNumTmp(Variable::NoRegister);
         moveItem(Active, Index, Handled);
         Evicted.push_back(Item);
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index efadb85..dfb42d8 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -436,8 +436,16 @@
   // Add push instructions for preserved registers.
   uint32_t NumCallee = 0;
   size_t PreservedRegsSizeBytes = 0;
+  llvm::SmallBitVector Pushed(CalleeSaves.size());
   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
+    // SizeT Canonical = RegX8632::getCanonicalReg(i); // TODO(stichnot)
+    SizeT Canonical = i;
     if (CalleeSaves[i] && RegsUsed[i]) {
+      Pushed[Canonical] = true;
+    }
+  }
+  for (SizeT i = 0; i < Pushed.size(); ++i) {
+    if (Pushed[i]) {
       ++NumCallee;
       PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
       _push(getPhysicalRegister(i));
@@ -601,11 +609,19 @@
   // Add pop instructions for preserved registers.
   llvm::SmallBitVector CalleeSaves =
       getRegisterSet(RegSet_CalleeSave, RegSet_None);
+  llvm::SmallBitVector Popped(CalleeSaves.size());
   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
-    SizeT j = CalleeSaves.size() - i - 1;
+    // SizeT Canonical = RegX8632::getCanonicalReg(i); // TODO(stichnot)
+    SizeT Canonical = i;
+    if (CalleeSaves[i] && RegsUsed[i]) {
+      Popped[Canonical] = true;
+    }
+  }
+  for (SizeT i = 0; i < Popped.size(); ++i) {
+    SizeT j = Popped.size() - i - 1;
     if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
       continue;
-    if (CalleeSaves[j] && RegsUsed[j]) {
+    if (Popped[j]) {
       _pop(getPhysicalRegister(j));
     }
   }
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 24ec6ec..a85c7e8 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -2136,7 +2136,6 @@
         T_1 = makeReg(IceType_i32);
       }
       // cvt() requires its integer argument to be a GPR.
-      T_1->setMustHaveReg();
       Variable *T_2 = makeReg(Dest->getType());
       _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
       _mov(T_2, T_1); // T_1 and T_2 may have different integer types
@@ -2185,7 +2184,6 @@
         assert(Dest->getType() != IceType_i32);
         T_1 = makeReg(IceType_i32);
       }
-      T_1->setMustHaveReg();
       Variable *T_2 = makeReg(Dest->getType());
       _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
       _mov(T_2, T_1); // T_1 and T_2 may have different integer types
@@ -2227,7 +2225,6 @@
         assert(Src0RM->getType() != IceType_i64);
         T_1 = makeReg(IceType_i32);
       }
-      T_1->setMustHaveReg();
       Variable *T_2 = makeReg(Dest->getType());
       if (Src0RM->getType() == T_1->getType())
         _mov(T_1, Src0RM);
@@ -2276,7 +2273,6 @@
         assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
         T_1 = makeReg(IceType_i32);
       }
-      T_1->setMustHaveReg();
       Variable *T_2 = makeReg(Dest->getType());
       if (Src0RM->getType() == T_1->getType())
         _mov(T_1, Src0RM);
@@ -2385,7 +2381,6 @@
         Variable *T = makeReg(IceType_f64);
         // Movd requires its fp argument (in this case, the bitcast
         // destination) to be an xmm register.
-        T->setMustHaveReg();
         _movd(T, Src0RM);
         _mov(Dest, T);
       } else {
@@ -2632,7 +2627,8 @@
       return;
     }
   }
-  Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);
+  Constant *Default =
+      Ctx->getConstantInt(Dest->getType(), Traits::TableFcmp[Index].Default);
   _mov(Dest, Default);
   if (HasC1) {
     typename Traits::Insts::Label *Label =
@@ -2642,7 +2638,7 @@
       _br(Traits::TableFcmp[Index].C2, Label);
     }
     Constant *NonDefault =
-        Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);
+        Ctx->getConstantInt(Dest->getType(), !Traits::TableFcmp[Index].Default);
     _mov_redefined(Dest, NonDefault);
     Context.insert(Label);
   }
@@ -2819,8 +2815,8 @@
     // which needs the upper and lower halves legalized.
     case InstIcmp::Sgt:
     case InstIcmp::Sle:
-    // These four compare after performing an "or" of the high and low half, so they
-    // need the upper and lower halves legalized.
+    // These four compare after performing an "or" of the high and low half, so
+    // they need the upper and lower halves legalized.
     case InstIcmp::Eq:
     case InstIcmp::Ule:
     case InstIcmp::Ne:
@@ -5186,7 +5182,6 @@
     if (Traits::Is64Bit) {
       if (llvm::isa<ConstantInteger64>(Const)) {
         Variable *V = copyToReg(Const, RegNum);
-        V->setMustHaveReg();
         return V;
       }
     }