Subzero: Various fixes in preparation for x86-32 register aliasing. 1. Helper function sameVarOrReg() also needs to return true if the two physical registers alias or overlap. Otherwise advanced phi lowering may pick an incorrect ordering. 2. With -asm-verbose, redundant truncation assignments expressed as _mov instructions, like "mov cl, ecx", need to have their register use counts updated properly, so that the LIVEEND= annotations are correct. 3. The register allocator should consider suitably typed aliases when choosing a register preference. 4. When evicting a variable, the register allocator should decrement the use count of all aliases. 5. When saving/restoring callee-save registers in the prolog/epilog, map each register to its "canonical" register (e.g. %bl --> %ebx) and make sure each canonical register is only considered once. 6. Remove some unnecessary Variable::setMustHaveReg() calls. 7. When assigning bool results as a constant 0 or 1, use an 8-bit constant instead of 32-bit so that only the 8-bit register gets assigned. BUG= none TEST= make check, plus spec2k -asm-verbose output is unchanged R=kschimpf@google.com Review URL: https://codereview.chromium.org/1405643003 .
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp index 2bcea9a..f6c6379 100644 --- a/src/IceCfgNode.cpp +++ b/src/IceCfgNode.cpp
@@ -299,14 +299,29 @@ namespace { // Helper function used by advancedPhiLowering(). -bool sameVarOrReg(const Variable *Var, const Operand *Opnd) { - if (Var == Opnd) +bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, + const Operand *Opnd) { + if (Var1 == Opnd) return true; - if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { - if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) - return true; - } - return false; + const auto Var2 = llvm::dyn_cast<Variable>(Opnd); + if (Var2 == nullptr) + return false; + + // If either operand lacks a register, they cannot be the same. + if (!Var1->hasReg()) + return false; + if (!Var2->hasReg()) + return false; + + int32_t RegNum1 = Var1->getRegNum(); + int32_t RegNum2 = Var2->getRegNum(); + // Quick common-case check. + if (RegNum1 == RegNum2) + return true; + + assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == + Target->getAliasesForRegister(RegNum2)[RegNum1]); + return Target->getAliasesForRegister(RegNum1)[RegNum2]; } } // end of anonymous namespace @@ -383,6 +398,7 @@ if (NumPhis == 0) return; + TargetLowering *Target = Func->getTarget(); SizeT InEdgeIndex = 0; for (CfgNode *Pred : InEdges) { CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); @@ -397,7 +413,7 @@ Desc[I].NumPred = 0; // Cherry-pick any trivial assignments, so that they don't contribute to // the running complexity of the topological sort. - if (sameVarOrReg(Dest, Src)) { + if (sameVarOrReg(Target, Dest, Src)) { Desc[I].Processed = true; --Remaining; if (Dest != Src) @@ -420,10 +436,10 @@ if (I != J) { // There shouldn't be two Phis with the same Dest variable or // register. - assert(!sameVarOrReg(Dest, Desc[J].Dest)); + assert(!sameVarOrReg(Target, Dest, Desc[J].Dest)); } const Operand *Src = Desc[J].Src; - if (sameVarOrReg(Dest, Src)) + if (sameVarOrReg(Target, Dest, Src)) ++Desc[I].NumPred; } } @@ -473,7 +489,7 @@ assert(Desc[BestIndex].NumPred <= 1); Variable *Dest = Desc[BestIndex].Dest; Operand *Src = Desc[BestIndex].Src; - assert(!sameVarOrReg(Dest, Src)); + assert(!sameVarOrReg(Target, Dest, Src)); // Break a cycle by introducing a temporary. if (Desc[BestIndex].NumPred) { bool Found = false; @@ -484,7 +500,7 @@ if (Desc[J].Processed) continue; Operand *OtherSrc = Desc[J].Src; - if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { + if (Desc[J].NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { SizeT VarNum = Func->getNumVariables(); Variable *Tmp = Func->makeVariable(OtherSrc->getType()); if (BuildDefs::dump()) @@ -505,7 +521,7 @@ for (size_t I = 0; I < NumPhis; ++I) { if (Desc[I].Processed) continue; - if (sameVarOrReg(Var, Desc[I].Dest)) { + if (sameVarOrReg(Target, Var, Desc[I].Dest)) { if (--Desc[I].NumPred == 0) Desc[I].Weight += WeightNoPreds; } @@ -1030,8 +1046,11 @@ // That normally would have happened as part of emitLiveRangesEnded(), // but that isn't called for redundant assignments. Variable *Dest = I.getDest(); - if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0))) + if (DecorateAsm && Dest->hasReg()) { ++LiveRegCount[Dest->getRegNum()]; + if (I.isLastUse(I.getSrc(0))) + --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()]; + } continue; } I.emit(Func);
diff --git a/src/IceInst.cpp b/src/IceInst.cpp index 42cebb8..f5084ff 100644 --- a/src/IceInst.cpp +++ b/src/IceInst.cpp
@@ -947,7 +947,7 @@ } bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) { - const auto SrcVar = llvm::dyn_cast<const Variable>(Source); + const auto *SrcVar = llvm::dyn_cast<const Variable>(Source); if (!SrcVar) return false; if (Dest->hasReg() && Dest->getRegNum() == SrcVar->getRegNum()) {
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp index 26b350a..908020f 100644 --- a/src/IceRegAlloc.cpp +++ b/src/IceRegAlloc.cpp
@@ -501,9 +501,10 @@ // try to prefer the stack pointer as a result of the stacksave // intrinsic. if (SrcVar->hasRegTmp()) { - const int32_t SrcReg = SrcVar->getRegNumTmp(); - const bool IsAliasAvailable = - (Iter.RegMask & *RegAliases[SrcReg]).any(); + const llvm::SmallBitVector &Aliases = + *RegAliases[SrcVar->getRegNumTmp()]; + const int32_t SrcReg = (Iter.RegMask & Aliases).find_first(); + const bool IsAliasAvailable = (SrcReg != -1); if (IsAliasAvailable) { if (FindOverlap && !Iter.Free[SrcReg]) { // Don't bother trying to enable AllowOverlap if the register is @@ -694,8 +695,12 @@ int32_t RegNum = Item->getRegNumTmp(); if (Aliases[RegNum]) { dumpLiveRangeTrace("Evicting A ", Item); - --RegUses[RegNum]; - assert(RegUses[RegNum] >= 0); + const llvm::SmallBitVector &Aliases = *RegAliases[RegNum]; + for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; + RegAlias = Aliases.find_next(RegAlias)) { + --RegUses[RegAlias]; + assert(RegUses[RegAlias] >= 0); + } Item->setRegNumTmp(Variable::NoRegister); moveItem(Active, Index, Handled); Evicted.push_back(Item);
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp index efadb85..dfb42d8 100644 --- a/src/IceTargetLoweringX8632.cpp +++ b/src/IceTargetLoweringX8632.cpp
@@ -436,8 +436,16 @@ // Add push instructions for preserved registers. uint32_t NumCallee = 0; size_t PreservedRegsSizeBytes = 0; + llvm::SmallBitVector Pushed(CalleeSaves.size()); for (SizeT i = 0; i < CalleeSaves.size(); ++i) { + // SizeT Canonical = RegX8632::getCanonicalReg(i); // TODO(stichnot) + SizeT Canonical = i; if (CalleeSaves[i] && RegsUsed[i]) { + Pushed[Canonical] = true; + } + } + for (SizeT i = 0; i < Pushed.size(); ++i) { + if (Pushed[i]) { ++NumCallee; PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32); _push(getPhysicalRegister(i)); @@ -601,11 +609,19 @@ // Add pop instructions for preserved registers. llvm::SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None); + llvm::SmallBitVector Popped(CalleeSaves.size()); for (SizeT i = 0; i < CalleeSaves.size(); ++i) { - SizeT j = CalleeSaves.size() - i - 1; + // SizeT Canonical = RegX8632::getCanonicalReg(i); // TODO(stichnot) + SizeT Canonical = i; + if (CalleeSaves[i] && RegsUsed[i]) { + Popped[Canonical] = true; + } + } + for (SizeT i = 0; i < Popped.size(); ++i) { + SizeT j = Popped.size() - i - 1; if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame) continue; - if (CalleeSaves[j] && RegsUsed[j]) { + if (Popped[j]) { _pop(getPhysicalRegister(j)); } }
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h index 24ec6ec..a85c7e8 100644 --- a/src/IceTargetLoweringX86BaseImpl.h +++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -2136,7 +2136,6 @@ T_1 = makeReg(IceType_i32); } // cvt() requires its integer argument to be a GPR. - T_1->setMustHaveReg(); Variable *T_2 = makeReg(Dest->getType()); _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); _mov(T_2, T_1); // T_1 and T_2 may have different integer types @@ -2185,7 +2184,6 @@ assert(Dest->getType() != IceType_i32); T_1 = makeReg(IceType_i32); } - T_1->setMustHaveReg(); Variable *T_2 = makeReg(Dest->getType()); _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); _mov(T_2, T_1); // T_1 and T_2 may have different integer types @@ -2227,7 +2225,6 @@ assert(Src0RM->getType() != IceType_i64); T_1 = makeReg(IceType_i32); } - T_1->setMustHaveReg(); Variable *T_2 = makeReg(Dest->getType()); if (Src0RM->getType() == T_1->getType()) _mov(T_1, Src0RM); @@ -2276,7 +2273,6 @@ assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); T_1 = makeReg(IceType_i32); } - T_1->setMustHaveReg(); Variable *T_2 = makeReg(Dest->getType()); if (Src0RM->getType() == T_1->getType()) _mov(T_1, Src0RM); @@ -2385,7 +2381,6 @@ Variable *T = makeReg(IceType_f64); // Movd requires its fp argument (in this case, the bitcast // destination) to be an xmm register. - T->setMustHaveReg(); _movd(T, Src0RM); _mov(Dest, T); } else { @@ -2632,7 +2627,8 @@ return; } } - Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default); + Constant *Default = + Ctx->getConstantInt(Dest->getType(), Traits::TableFcmp[Index].Default); _mov(Dest, Default); if (HasC1) { typename Traits::Insts::Label *Label = @@ -2642,7 +2638,7 @@ _br(Traits::TableFcmp[Index].C2, Label); } Constant *NonDefault = - Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default); + Ctx->getConstantInt(Dest->getType(), !Traits::TableFcmp[Index].Default); _mov_redefined(Dest, NonDefault); Context.insert(Label); } @@ -2819,8 +2815,8 @@ // which needs the upper and lower halves legalized. case InstIcmp::Sgt: case InstIcmp::Sle: - // These four compare after performing an "or" of the high and low half, so they - // need the upper and lower halves legalized. + // These four compare after performing an "or" of the high and low half, so + // they need the upper and lower halves legalized. case InstIcmp::Eq: case InstIcmp::Ule: case InstIcmp::Ne: @@ -5186,7 +5182,6 @@ if (Traits::Is64Bit) { if (llvm::isa<ConstantInteger64>(Const)) { Variable *V = copyToReg(Const, RegNum); - V->setMustHaveReg(); return V; } }