Lower the select instruction when the operands are of vector type.
Select of vectors is implemented by appropriately masking and
combining the inputs with sign extend / bitwise operations
and without the use of branches.
BUG=none
R=jvoung@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/417653004
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 004b555..6893856 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -376,7 +376,7 @@
InstSelect::InstSelect(Cfg *Func, Variable *Dest, Operand *Condition,
Operand *SourceTrue, Operand *SourceFalse)
: Inst(Func, Inst::Select, 3, Dest) {
- assert(Condition->getType() == IceType_i1);
+ assert(typeElementType(Condition->getType()) == IceType_i1);
addSource(Condition);
addSource(SourceTrue);
addSource(SourceFalse);
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index f1a68da..93a872c 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -458,6 +458,7 @@
template <> const char *InstX8632Psub::Opcode = "psub";
template <> const char *InstX8632And::Opcode = "and";
template <> const char *InstX8632Pand::Opcode = "pand";
+template <> const char *InstX8632Pandn::Opcode = "pandn";
template <> const char *InstX8632Or::Opcode = "or";
template <> const char *InstX8632Por::Opcode = "por";
template <> const char *InstX8632Xor::Opcode = "xor";
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index b50199b..ce1cc65 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -174,6 +174,7 @@
Or,
Padd,
Pand,
+ Pandn,
Pcmpeq,
Pcmpgt,
Pextrw,
@@ -564,6 +565,7 @@
typedef InstX8632Binop<InstX8632::Psub> InstX8632Psub;
typedef InstX8632Binop<InstX8632::And> InstX8632And;
typedef InstX8632Binop<InstX8632::Pand> InstX8632Pand;
+typedef InstX8632Binop<InstX8632::Pandn> InstX8632Pandn;
typedef InstX8632Binop<InstX8632::Or> InstX8632Or;
typedef InstX8632Binop<InstX8632::Por> InstX8632Por;
typedef InstX8632Binop<InstX8632::Xor> InstX8632Xor;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 71b4c17..c8cf170 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -3410,11 +3410,46 @@
}
void TargetX8632::lowerSelect(const InstSelect *Inst) {
- // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
Variable *Dest = Inst->getDest();
Operand *SrcT = Inst->getTrueOperand();
Operand *SrcF = Inst->getFalseOperand();
- Operand *Condition = legalize(Inst->getCondition());
+ Operand *Condition = Inst->getCondition();
+
+ if (isVectorType(Dest->getType())) {
+ // a=d?b:c ==> d=sext(d); a=(b&d)|(c&~d)
+ // TODO(wala): SSE4.1 has blendvps and pblendvb. SSE4.1 also has
+ // blendps and pblendw for constant condition operands.
+ Type SrcTy = SrcT->getType();
+ Variable *T = makeReg(SrcTy);
+ Variable *T2 = makeReg(SrcTy);
+ // Sign extend the condition operand if applicable.
+ if (SrcTy == IceType_v4f32) {
+ // The sext operation takes only integer arguments.
+ Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());
+ lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
+ _movp(T, T3);
+ } else if (typeElementType(SrcTy) != IceType_i1) {
+ lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
+ } else {
+ _movp(T, Condition);
+ }
+ // ALIGNHACK: Until stack alignment support is implemented, the
+ // bitwise vector instructions need to have both operands in
+ // registers. Once there is support for stack alignment, LEGAL_HACK
+ // can be removed.
+#define LEGAL_HACK(Vect) legalizeToVar((Vect))
+ _movp(T2, T);
+ _pand(T, LEGAL_HACK(SrcT));
+ _pandn(T2, LEGAL_HACK(SrcF));
+ _por(T, T2);
+ _movp(Dest, T);
+#undef LEGAL_HACK
+
+ return;
+ }
+
+ // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
+ Operand *ConditionRMI = legalize(Condition);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
InstX8632Label *Label = InstX8632Label::create(Func, this);
@@ -3423,7 +3458,7 @@
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
- _cmp(Condition, Zero);
+ _cmp(ConditionRMI, Zero);
_mov(DestLo, SrcLoRI);
_mov(DestHi, SrcHiRI);
_br(InstX8632Br::Br_ne, Label);
@@ -3436,7 +3471,7 @@
_mov(DestLo, SrcLoRI);
_mov(DestHi, SrcHiRI);
} else {
- _cmp(Condition, Zero);
+ _cmp(ConditionRMI, Zero);
SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
_mov(Dest, SrcT);
_br(InstX8632Br::Br_ne, Label);
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 864881f..cbc98ce 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -304,6 +304,9 @@
void _pand(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pand::create(Func, Dest, Src0));
}
+ void _pandn(Variable *Dest, Operand *Src0) {
+ Context.insert(InstX8632Pandn::create(Func, Dest, Src0));
+ }
void _pcmpeq(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Pcmpeq::create(Func, Dest, Src0));
}