emitIAS for the couple of blend instructions. BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/650573002
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp index d51eeca..274d02c 100644 --- a/src/IceInstX8632.cpp +++ b/src/IceInstX8632.cpp
@@ -1067,7 +1067,6 @@ const Cfg *Func) { Ostream &Str = Func->getContext()->getStrEmit(); assert(Inst->getSrcSize() == 3); - assert(llvm::isa<Variable>(Inst->getSrc(2))); assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() == RegX8632::Reg_xmm0); Str << "\t" << Opcode << "\t"; @@ -1077,6 +1076,17 @@ Str << "\n"; } +void +emitIASVariableBlendInst(const Inst *Inst, const Cfg *Func, + const x86::AssemblerX86::XmmEmitterRegOp &Emitter) { + assert(Inst->getSrcSize() == 3); + assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() == + RegX8632::Reg_xmm0); + const Variable *Dest = Inst->getDest(); + const Operand *Src = Inst->getSrc(1); + emitIASRegOpTyXMM(Func, Dest->getType(), Dest, Src, Emitter); +} + } // end anonymous namespace template <> void InstX8632Blendvps::emit(const Cfg *Func) const { @@ -1085,12 +1095,28 @@ emitVariableBlendInst(Opcode, this, Func); } +template <> void InstX8632Blendvps::emitIAS(const Cfg *Func) const { + assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >= + TargetX8632::SSE4_1); + static const x86::AssemblerX86::XmmEmitterRegOp Emitter = { + &x86::AssemblerX86::blendvps, &x86::AssemblerX86::blendvps}; + emitIASVariableBlendInst(this, Func, Emitter); +} + template <> void InstX8632Pblendvb::emit(const Cfg *Func) const { assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >= TargetX8632::SSE4_1); emitVariableBlendInst(Opcode, this, Func); } +template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const { + assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >= + TargetX8632::SSE4_1); + static const x86::AssemblerX86::XmmEmitterRegOp Emitter = { + &x86::AssemblerX86::pblendvb, &x86::AssemblerX86::pblendvb}; + emitIASVariableBlendInst(this, Func, Emitter); +} + template <> void InstX8632Imul::emit(const Cfg *Func) const { Ostream &Str = Func->getContext()->getStrEmit(); assert(getSrcSize() == 2);
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h index 96978d4..02cec79 100644 --- a/src/IceInstX8632.h +++ b/src/IceInstX8632.h
@@ -1513,6 +1513,7 @@ template <> void InstX8632Sqrtss::emit(const Cfg *Func) const; template <> void InstX8632Subss::emit(const Cfg *Func) const; +template <> void InstX8632Blendvps::emitIAS(const Cfg *Func) const; template <> void InstX8632Div::emitIAS(const Cfg *Func) const; template <> void InstX8632Idiv::emitIAS(const Cfg *Func) const; template <> void InstX8632Imul::emitIAS(const Cfg *Func) const; @@ -1521,6 +1522,7 @@ template <> void InstX8632Movp::emitIAS(const Cfg *Func) const; template <> void InstX8632Movq::emitIAS(const Cfg *Func) const; template <> void InstX8632MovssRegs::emitIAS(const Cfg *Func) const; +template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const; template <> void InstX8632Pmull::emitIAS(const Cfg *Func) const; } // end of namespace Ice
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp index 6fc7026..76ff93f 100644 --- a/src/assembler_ia32.cpp +++ b/src/assembler_ia32.cpp
@@ -803,6 +803,44 @@ EmitXmmRegisterOperand(dst, src); } +void AssemblerX86::blendvps(Type /* Ty */, XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x14); + EmitXmmRegisterOperand(dst, src); +} + +void AssemblerX86::blendvps(Type /* Ty */, XmmRegister dst, + const Address &src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x14); + EmitOperand(dst, src); +} + +void AssemblerX86::pblendvb(Type /* Ty */, XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x10); + EmitXmmRegisterOperand(dst, src); +} + +void AssemblerX86::pblendvb(Type /* Ty */, XmmRegister dst, + const Address &src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x10); + EmitOperand(dst, src); +} + void AssemblerX86::cmpps(XmmRegister dst, XmmRegister src, CondX86::CmppsCond CmpCondition) { AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h index 676f4f6..dc600a2 100644 --- a/src/assembler_ia32.h +++ b/src/assembler_ia32.h
@@ -523,6 +523,11 @@ void andps(XmmRegister dst, const Address &src); void orps(XmmRegister dst, XmmRegister src); + void blendvps(Type Ty, XmmRegister dst, XmmRegister src); + void blendvps(Type Ty, XmmRegister dst, const Address &src); + void pblendvb(Type Ty, XmmRegister dst, XmmRegister src); + void pblendvb(Type Ty, XmmRegister dst, const Address &src); + void cmpps(XmmRegister dst, XmmRegister src, CondX86::CmppsCond CmpCondition); void cmpps(XmmRegister dst, const Address &src, CondX86::CmppsCond CmpCondition);
diff --git a/tests_lit/llvm2ice_tests/vector-select.ll b/tests_lit/llvm2ice_tests/vector-select.ll index 3fb4940..ca954d3 100644 --- a/tests_lit/llvm2ice_tests/vector-select.ll +++ b/tests_lit/llvm2ice_tests/vector-select.ll
@@ -27,7 +27,7 @@ ; CHECK: por ; SSE41-LABEL: test_select_v16i8: -; SSE41: pblendvb +; SSE41: pblendvb xmm{{[0-7]}}, {{xmm[0-7]|xmmword}} } define <16 x i1> @test_select_v16i1(<16 x i1> %cond, <16 x i1> %arg1, <16 x i1> %arg2) { @@ -40,7 +40,7 @@ ; CHECK: por ; SSE41-LABEL: test_select_v16i1: -; SSE41: pblendvb +; SSE41: pblendvb xmm{{[0-7]}}, {{xmm[0-7]|xmmword}} } define <8 x i16> @test_select_v8i16(<8 x i1> %cond, <8 x i16> %arg1, <8 x i16> %arg2) { @@ -53,7 +53,7 @@ ; CHECK: por ; SSE41-LABEL: test_select_v8i16: -; SSE41: pblendvb +; SSE41: pblendvb xmm{{[0-7]}}, {{xmm[0-7]|xmmword}} } define <8 x i1> @test_select_v8i1(<8 x i1> %cond, <8 x i1> %arg1, <8 x i1> %arg2) { @@ -66,7 +66,7 @@ ; CHECK: por ; SSE41-LABEL: test_select_v8i1: -; SSE41: pblendvb +; SSE41: pblendvb xmm{{[0-7]}}, {{xmm[0-7]|xmmword}} } define <4 x i32> @test_select_v4i32(<4 x i1> %cond, <4 x i32> %arg1, <4 x i32> %arg2) { @@ -80,7 +80,7 @@ ; SSE41-LABEL: test_select_v4i32: ; SSE41: pslld xmm0, 31 -; SSE41: blendvps +; SSE41: blendvps xmm{{[0-7]}}, {{xmm[0-7]|xmmword}} } define <4 x float> @test_select_v4f32(<4 x i1> %cond, <4 x float> %arg1, <4 x float> %arg2) { @@ -94,7 +94,7 @@ ; SSE41-LABEL: test_select_v4f32: ; SSE41: pslld xmm0, 31 -; SSE41: blendvps +; SSE41: blendvps xmm{{[0-7]}}, {{xmm[0-7]|xmmword}} } define <4 x i1> @test_select_v4i1(<4 x i1> %cond, <4 x i1> %arg1, <4 x i1> %arg2) { @@ -108,7 +108,7 @@ ; SSE41-LABEL: test_select_v4i1: ; SSE41: pslld xmm0, 31 -; SSE41: blendvps +; SSE41: blendvps xmm{{[0-7]}}, {{xmm[0-7]|xmmword}} } ; ERRORS-NOT: ICE translation error