emitIAS for the couple of blend instructions.
BUG=none
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/650573002
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index d51eeca..274d02c 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -1067,7 +1067,6 @@
const Cfg *Func) {
Ostream &Str = Func->getContext()->getStrEmit();
assert(Inst->getSrcSize() == 3);
- assert(llvm::isa<Variable>(Inst->getSrc(2)));
assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() ==
RegX8632::Reg_xmm0);
Str << "\t" << Opcode << "\t";
@@ -1077,6 +1076,17 @@
Str << "\n";
}
+void
+emitIASVariableBlendInst(const Inst *Inst, const Cfg *Func,
+ const x86::AssemblerX86::XmmEmitterRegOp &Emitter) {
+ assert(Inst->getSrcSize() == 3);
+ assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() ==
+ RegX8632::Reg_xmm0);
+ const Variable *Dest = Inst->getDest();
+ const Operand *Src = Inst->getSrc(1);
+ emitIASRegOpTyXMM(Func, Dest->getType(), Dest, Src, Emitter);
+}
+
} // end anonymous namespace
template <> void InstX8632Blendvps::emit(const Cfg *Func) const {
@@ -1085,12 +1095,28 @@
emitVariableBlendInst(Opcode, this, Func);
}
+template <> void InstX8632Blendvps::emitIAS(const Cfg *Func) const {
+ assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
+ TargetX8632::SSE4_1);
+ static const x86::AssemblerX86::XmmEmitterRegOp Emitter = {
+ &x86::AssemblerX86::blendvps, &x86::AssemblerX86::blendvps};
+ emitIASVariableBlendInst(this, Func, Emitter);
+}
+
template <> void InstX8632Pblendvb::emit(const Cfg *Func) const {
assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
TargetX8632::SSE4_1);
emitVariableBlendInst(Opcode, this, Func);
}
+template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const {
+ assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
+ TargetX8632::SSE4_1);
+ static const x86::AssemblerX86::XmmEmitterRegOp Emitter = {
+ &x86::AssemblerX86::pblendvb, &x86::AssemblerX86::pblendvb};
+ emitIASVariableBlendInst(this, Func, Emitter);
+}
+
template <> void InstX8632Imul::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 96978d4..02cec79 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -1513,6 +1513,7 @@
template <> void InstX8632Sqrtss::emit(const Cfg *Func) const;
template <> void InstX8632Subss::emit(const Cfg *Func) const;
+template <> void InstX8632Blendvps::emitIAS(const Cfg *Func) const;
template <> void InstX8632Div::emitIAS(const Cfg *Func) const;
template <> void InstX8632Idiv::emitIAS(const Cfg *Func) const;
template <> void InstX8632Imul::emitIAS(const Cfg *Func) const;
@@ -1521,6 +1522,7 @@
template <> void InstX8632Movp::emitIAS(const Cfg *Func) const;
template <> void InstX8632Movq::emitIAS(const Cfg *Func) const;
template <> void InstX8632MovssRegs::emitIAS(const Cfg *Func) const;
+template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pmull::emitIAS(const Cfg *Func) const;
} // end of namespace Ice
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp
index 6fc7026..76ff93f 100644
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp
@@ -803,6 +803,44 @@
EmitXmmRegisterOperand(dst, src);
}
+void AssemblerX86::blendvps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x14);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX86::blendvps(Type /* Ty */, XmmRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x14);
+ EmitOperand(dst, src);
+}
+
+void AssemblerX86::pblendvb(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x10);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX86::pblendvb(Type /* Ty */, XmmRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x10);
+ EmitOperand(dst, src);
+}
+
void AssemblerX86::cmpps(XmmRegister dst, XmmRegister src,
CondX86::CmppsCond CmpCondition) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index 676f4f6..dc600a2 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -523,6 +523,11 @@
void andps(XmmRegister dst, const Address &src);
void orps(XmmRegister dst, XmmRegister src);
+ void blendvps(Type Ty, XmmRegister dst, XmmRegister src);
+ void blendvps(Type Ty, XmmRegister dst, const Address &src);
+ void pblendvb(Type Ty, XmmRegister dst, XmmRegister src);
+ void pblendvb(Type Ty, XmmRegister dst, const Address &src);
+
void cmpps(XmmRegister dst, XmmRegister src, CondX86::CmppsCond CmpCondition);
void cmpps(XmmRegister dst, const Address &src,
CondX86::CmppsCond CmpCondition);
diff --git a/tests_lit/llvm2ice_tests/vector-select.ll b/tests_lit/llvm2ice_tests/vector-select.ll
index 3fb4940..ca954d3 100644
--- a/tests_lit/llvm2ice_tests/vector-select.ll
+++ b/tests_lit/llvm2ice_tests/vector-select.ll
@@ -27,7 +27,7 @@
; CHECK: por
; SSE41-LABEL: test_select_v16i8:
-; SSE41: pblendvb
+; SSE41: pblendvb xmm{{[0-7]}}, {{xmm[0-7]|xmmword}}
}
define <16 x i1> @test_select_v16i1(<16 x i1> %cond, <16 x i1> %arg1, <16 x i1> %arg2) {
@@ -40,7 +40,7 @@
; CHECK: por
; SSE41-LABEL: test_select_v16i1:
-; SSE41: pblendvb
+; SSE41: pblendvb xmm{{[0-7]}}, {{xmm[0-7]|xmmword}}
}
define <8 x i16> @test_select_v8i16(<8 x i1> %cond, <8 x i16> %arg1, <8 x i16> %arg2) {
@@ -53,7 +53,7 @@
; CHECK: por
; SSE41-LABEL: test_select_v8i16:
-; SSE41: pblendvb
+; SSE41: pblendvb xmm{{[0-7]}}, {{xmm[0-7]|xmmword}}
}
define <8 x i1> @test_select_v8i1(<8 x i1> %cond, <8 x i1> %arg1, <8 x i1> %arg2) {
@@ -66,7 +66,7 @@
; CHECK: por
; SSE41-LABEL: test_select_v8i1:
-; SSE41: pblendvb
+; SSE41: pblendvb xmm{{[0-7]}}, {{xmm[0-7]|xmmword}}
}
define <4 x i32> @test_select_v4i32(<4 x i1> %cond, <4 x i32> %arg1, <4 x i32> %arg2) {
@@ -80,7 +80,7 @@
; SSE41-LABEL: test_select_v4i32:
; SSE41: pslld xmm0, 31
-; SSE41: blendvps
+; SSE41: blendvps xmm{{[0-7]}}, {{xmm[0-7]|xmmword}}
}
define <4 x float> @test_select_v4f32(<4 x i1> %cond, <4 x float> %arg1, <4 x float> %arg2) {
@@ -94,7 +94,7 @@
; SSE41-LABEL: test_select_v4f32:
; SSE41: pslld xmm0, 31
-; SSE41: blendvps
+; SSE41: blendvps xmm{{[0-7]}}, {{xmm[0-7]|xmmword}}
}
define <4 x i1> @test_select_v4i1(<4 x i1> %cond, <4 x i1> %arg1, <4 x i1> %arg2) {
@@ -108,7 +108,7 @@
; SSE41-LABEL: test_select_v4i1:
; SSE41: pslld xmm0, 31
-; SSE41: blendvps
+; SSE41: blendvps xmm{{[0-7]}}, {{xmm[0-7]|xmmword}}
}
; ERRORS-NOT: ICE translation error