Subzero: Use cmov to improve lowering for the select instruction.
This is instead of explicit control flow which may interfere with branch prediction. However, explicit control flow is still needed for types other than i16 and i32, due to cmov limitations.
The assembler for cmov is extended to allow the non-dest operand to be a memory operand.
The select lowering is getting large enough that it was in our best interest to combine the default lowering with the bool-folding optimization.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/1125323004
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index e7863ca..74db41d 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -84,6 +84,10 @@
return TypeX8632Attributes[Ty].FldString;
}
+CondX86::BrCond InstX8632::getOppositeCondition(CondX86::BrCond Cond) {
+ return InstX8632BrAttributes[Cond].Opposite;
+}
+
OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base,
Constant *Offset, Variable *Index,
uint16_t Shift, SegmentRegisters SegmentReg)
@@ -181,7 +185,7 @@
// condition, swap the targets, and set new fallthrough to nullptr.
if (getTargetTrue() == NextNode) {
assert(Condition != CondX86::Br_None);
- Condition = InstX8632BrAttributes[Condition].Opposite;
+ Condition = getOppositeCondition(Condition);
TargetTrue = getTargetFalse();
TargetFalse = nullptr;
return true;
@@ -1558,13 +1562,28 @@
assert(Condition != CondX86::Br_None);
assert(getDest()->hasReg());
assert(getSrcSize() == 2);
- // Only need the reg/reg form now.
- const auto SrcVar = llvm::cast<Variable>(getSrc(1));
- assert(SrcVar->hasReg());
- assert(SrcVar->getType() == IceType_i32);
+ Operand *Src = getSrc(1);
+ Type SrcTy = Src->getType();
+ assert(SrcTy == IceType_i16 || SrcTy == IceType_i32);
X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
- Asm->cmov(Condition, RegX8632::getEncodedGPR(getDest()->getRegNum()),
- RegX8632::getEncodedGPR(SrcVar->getRegNum()));
+ if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
+ if (SrcVar->hasReg()) {
+ Asm->cmov(SrcTy, Condition,
+ RegX8632::getEncodedGPR(getDest()->getRegNum()),
+ RegX8632::getEncodedGPR(SrcVar->getRegNum()));
+ } else {
+ Asm->cmov(SrcTy, Condition,
+ RegX8632::getEncodedGPR(getDest()->getRegNum()),
+ static_cast<TargetX8632 *>(Func->getTarget())
+ ->stackVarToAsmOperand(SrcVar));
+ }
+ } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
+ assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
+ Asm->cmov(SrcTy, Condition, RegX8632::getEncodedGPR(getDest()->getRegNum()),
+ Mem->toAsmAddress(Asm));
+ } else {
+ llvm_unreachable("Unexpected operand type");
+ }
}
void InstX8632Cmov::dump(const Cfg *Func) const {
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index bee9843..976ccc4 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -265,6 +265,7 @@
static const char *getWidthString(Type Ty);
static const char *getFldString(Type Ty);
+ static CondX86::BrCond getOppositeCondition(CondX86::BrCond Cond);
void dump(const Cfg *Func) const override;
protected:
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 0aaafa2..478a6b6 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -4076,11 +4076,12 @@
void TargetX8632::lowerSelect(const InstSelect *Inst) {
Variable *Dest = Inst->getDest();
+ Type DestTy = Dest->getType();
Operand *SrcT = Inst->getTrueOperand();
Operand *SrcF = Inst->getFalseOperand();
Operand *Condition = Inst->getCondition();
- if (isVectorType(Dest->getType())) {
+ if (isVectorType(DestTy)) {
Type SrcTy = SrcT->getType();
Variable *T = makeReg(SrcTy);
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
@@ -4138,6 +4139,9 @@
return;
}
+ CondX86::BrCond Cond = CondX86::Br_ne;
+ Operand *CmpOpnd0 = nullptr;
+ Operand *CmpOpnd1 = nullptr;
// Handle folding opportunities.
if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
assert(Producer->isDeleted());
@@ -4145,73 +4149,69 @@
default:
break;
case BoolFolding::PK_Icmp32: {
- // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- Operand *Src0 = Producer->getSrc(0);
- Operand *Src1 = legalize(Producer->getSrc(1));
- Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
- _cmp(Src0RM, Src1);
- // This is the same code as below (for both i64 and non-i64),
- // except without the _cmp instruction and with a different
- // branch condition. TODO(stichnot): refactor.
- if (Dest->getType() == IceType_i64) {
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
- Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
- _mov(DestLo, SrcLoRI);
- _mov(DestHi, SrcHiRI);
- _br(getIcmp32Mapping(Cmp->getCondition()), Label);
- Operand *SrcFLo = loOperand(SrcF);
- Operand *SrcFHi = hiOperand(SrcF);
- SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
- SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
- _mov_nonkillable(DestLo, SrcLoRI);
- _mov_nonkillable(DestHi, SrcHiRI);
- } else {
- SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
- _mov(Dest, SrcT);
- _br(getIcmp32Mapping(Cmp->getCondition()), Label);
- SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
- _mov_nonkillable(Dest, SrcF);
- }
- Context.insert(Label);
- return;
- }
+ Cond = getIcmp32Mapping(Cmp->getCondition());
+ CmpOpnd1 = legalize(Producer->getSrc(1));
+ CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
+ } break;
}
}
+ if (CmpOpnd0 == nullptr) {
+ CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
+ CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
+ }
+ assert(CmpOpnd0);
+ assert(CmpOpnd1);
- // a=d?b:c ==> cmp d,0; a=b; jne L1; a=c; L1:
- Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
-
- if (Dest->getType() == IceType_i64) {
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
- Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
- _cmp(ConditionRM, Zero);
- _mov(DestLo, SrcLoRI);
- _mov(DestHi, SrcHiRI);
- _br(CondX86::Br_ne, Label);
- Operand *SrcFLo = loOperand(SrcF);
- Operand *SrcFHi = hiOperand(SrcF);
- SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
- SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
- _mov_nonkillable(DestLo, SrcLoRI);
- _mov_nonkillable(DestHi, SrcHiRI);
- } else {
- _cmp(ConditionRM, Zero);
+ _cmp(CmpOpnd0, CmpOpnd1);
+ if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
+ // The cmov instruction doesn't allow 8-bit or FP operands, so
+ // we need explicit control flow.
+ // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
+ InstX8632Label *Label = InstX8632Label::create(Func, this);
SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
_mov(Dest, SrcT);
- _br(CondX86::Br_ne, Label);
+ _br(Cond, Label);
SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
_mov_nonkillable(Dest, SrcF);
+ Context.insert(Label);
+ return;
+ }
+ // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
+ // But if SrcT is immediate, we might be able to do better, as
+ // the cmov instruction doesn't allow an immediate operand:
+ // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
+ if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
+ std::swap(SrcT, SrcF);
+ Cond = InstX8632::getOppositeCondition(Cond);
+ }
+ if (DestTy == IceType_i64) {
+ // Set the low portion.
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *TLo = nullptr;
+ Operand *SrcFLo = legalize(loOperand(SrcF));
+ _mov(TLo, SrcFLo);
+ Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
+ _cmov(TLo, SrcTLo, Cond);
+ _mov(DestLo, TLo);
+ // Set the high portion.
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Variable *THi = nullptr;
+ Operand *SrcFHi = legalize(hiOperand(SrcF));
+ _mov(THi, SrcFHi);
+ Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
+ _cmov(THi, SrcTHi, Cond);
+ _mov(DestHi, THi);
+ return;
}
- Context.insert(Label);
+ assert(DestTy == IceType_i16 || DestTy == IceType_i32);
+ Variable *T = nullptr;
+ SrcF = legalize(SrcF);
+ _mov(T, SrcF);
+ SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
+ _cmov(T, SrcT, Cond);
+ _mov(Dest, T);
}
void TargetX8632::lowerStore(const InstStore *Inst) {
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp
index f14c216..edfbc3e 100644
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp
@@ -272,14 +272,30 @@
EmitOperand(dst, src);
}
-void AssemblerX8632::cmov(CondX86::BrCond cond, GPRRegister dst,
+void AssemblerX8632::cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst,
GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ if (Ty == IceType_i16)
+ EmitOperandSizeOverride();
+ else
+ assert(Ty == IceType_i32);
EmitUint8(0x0F);
EmitUint8(0x40 + cond);
EmitRegisterOperand(dst, src);
}
+void AssemblerX8632::cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ if (Ty == IceType_i16)
+ EmitOperandSizeOverride();
+ else
+ assert(Ty == IceType_i32);
+ EmitUint8(0x0F);
+ EmitUint8(0x40 + cond);
+ EmitOperand(dst, src);
+}
+
void AssemblerX8632::rep_movsb() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index f567516..2a928b0 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -510,7 +510,8 @@
void lea(Type Ty, GPRRegister dst, const Address &src);
- void cmov(CondX86::BrCond cond, GPRRegister dst, GPRRegister src);
+ void cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst, GPRRegister src);
+ void cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst, const Address &src);
void rep_movsb();