Implement integer vector multiply intrinsics.
BUG=swiftshader:15
Change-Id: Ib822b50c0a14e5ebc114db9759cbeecbb9f7a3c1
Reviewed-on: https://chromium-review.googlesource.com/403472
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 6633e65..2db0f74 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -363,6 +363,12 @@
void pandn(Type Ty, XmmRegister dst, const Address &src);
void pmull(Type Ty, XmmRegister dst, XmmRegister src);
void pmull(Type Ty, XmmRegister dst, const Address &src);
+ void pmulhw(Type Ty, XmmRegister dst, XmmRegister src);
+ void pmulhw(Type Ty, XmmRegister dst, const Address &src);
+ void pmulhuw(Type Ty, XmmRegister dst, XmmRegister src);
+ void pmulhuw(Type Ty, XmmRegister dst, const Address &src);
+ void pmaddwd(Type Ty, XmmRegister dst, XmmRegister src);
+ void pmaddwd(Type Ty, XmmRegister dst, const Address &src);
void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
void pmuludq(Type Ty, XmmRegister dst, const Address &src);
void por(Type Ty, XmmRegister dst, XmmRegister src);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 347a07e..3674d4a 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -839,6 +839,87 @@
}
template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
+ XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitRexRB(RexTypeIrrelevant, dst, src);
+ emitUint8(0x0F);
+ assert(Ty == IceType_v8i16);
+ (void)Ty;
+ emitUint8(0xE5);
+ emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitAddrSizeOverridePrefix();
+ emitRex(RexTypeIrrelevant, src, dst);
+ emitUint8(0x0F);
+ assert(Ty == IceType_v8i16);
+ (void)Ty;
+ emitUint8(0xE5);
+ emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
+ XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitRexRB(RexTypeIrrelevant, dst, src);
+ emitUint8(0x0F);
+ assert(Ty == IceType_v8i16);
+ (void)Ty;
+ emitUint8(0xE4);
+ emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitAddrSizeOverridePrefix();
+ emitRex(RexTypeIrrelevant, src, dst);
+ emitUint8(0x0F);
+ assert(Ty == IceType_v8i16);
+ (void)Ty;
+ emitUint8(0xE4);
+ emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
+ XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitRexRB(RexTypeIrrelevant, dst, src);
+ emitUint8(0x0F);
+ assert(Ty == IceType_v8i16);
+ (void)Ty;
+ emitUint8(0xF5);
+ emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitAddrSizeOverridePrefix();
+ emitRex(RexTypeIrrelevant, src, dst);
+ emitUint8(0x0F);
+ assert(Ty == IceType_v8i16);
+ (void)Ty;
+ emitUint8(0xF5);
+ emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 52d566c..6d74b93 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -140,6 +140,9 @@
Pextr,
Pinsr,
Pmull,
+ Pmulhw,
+ Pmulhuw,
+ Pmaddwd,
Pmuludq,
Pop,
Por,
@@ -1848,6 +1851,60 @@
Source) {}
};
+ class InstX86Pmulhw
+ : public InstX86BaseBinopXmm<InstX86Base::Pmulhw, false,
+ InstX86Base::SseSuffix::None> {
+ public:
+ static InstX86Pmulhw *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ assert(Dest->getType() == IceType_v8i16 &&
+ Source->getType() == IceType_v8i16);
+ return new (Func->allocate<InstX86Pmulhw>())
+ InstX86Pmulhw(Func, Dest, Source);
+ }
+
+ private:
+ InstX86Pmulhw(Cfg *Func, Variable *Dest, Operand *Source)
+ : InstX86BaseBinopXmm<InstX86Base::Pmulhw, false,
+ InstX86Base::SseSuffix::None>(Func, Dest,
+ Source) {}
+ };
+
+ class InstX86Pmulhuw
+ : public InstX86BaseBinopXmm<InstX86Base::Pmulhuw, false,
+ InstX86Base::SseSuffix::None> {
+ public:
+ static InstX86Pmulhuw *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ assert(Dest->getType() == IceType_v8i16 &&
+ Source->getType() == IceType_v8i16);
+ return new (Func->allocate<InstX86Pmulhuw>())
+ InstX86Pmulhuw(Func, Dest, Source);
+ }
+
+ private:
+ InstX86Pmulhuw(Cfg *Func, Variable *Dest, Operand *Source)
+ : InstX86BaseBinopXmm<InstX86Base::Pmulhuw, false,
+ InstX86Base::SseSuffix::None>(Func, Dest,
+ Source) {}
+ };
+
+ class InstX86Pmaddwd
+ : public InstX86BaseBinopXmm<InstX86Base::Pmaddwd, false,
+ InstX86Base::SseSuffix::None> {
+ public:
+ static InstX86Pmaddwd *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ assert(Dest->getType() == IceType_v8i16 &&
+ Source->getType() == IceType_v8i16);
+ return new (Func->allocate<InstX86Pmaddwd>())
+ InstX86Pmaddwd(Func, Dest, Source);
+ }
+
+ private:
+ InstX86Pmaddwd(Cfg *Func, Variable *Dest, Operand *Source)
+ : InstX86BaseBinopXmm<InstX86Base::Pmaddwd, false,
+ InstX86Base::SseSuffix::None>(Func, Dest,
+ Source) {}
+ };
+
class InstX86Pmuludq
: public InstX86BaseBinopXmm<InstX86Base::Pmuludq, false,
InstX86Base::SseSuffix::None> {
@@ -3066,6 +3123,9 @@
using Mulps = typename InstImpl<TraitsType>::InstX86Mulps;
using Mulss = typename InstImpl<TraitsType>::InstX86Mulss;
using Pmull = typename InstImpl<TraitsType>::InstX86Pmull;
+ using Pmulhw = typename InstImpl<TraitsType>::InstX86Pmulhw;
+ using Pmulhuw = typename InstImpl<TraitsType>::InstX86Pmulhuw;
+ using Pmaddwd = typename InstImpl<TraitsType>::InstX86Pmaddwd;
using Pmuludq = typename InstImpl<TraitsType>::InstX86Pmuludq;
using Divps = typename InstImpl<TraitsType>::InstX86Divps;
using Divss = typename InstImpl<TraitsType>::InstX86Divss;
@@ -3291,6 +3351,15 @@
const char *InstImpl<TraitsType>::InstX86Pmull::Base::Opcode = "pmull"; \
template <> \
template <> \
+ const char *InstImpl<TraitsType>::InstX86Pmulhw::Base::Opcode = "pmulhw"; \
+ template <> \
+ template <> \
+ const char *InstImpl<TraitsType>::InstX86Pmulhuw::Base::Opcode = "pmulhuw"; \
+ template <> \
+ template <> \
+ const char *InstImpl<TraitsType>::InstX86Pmaddwd::Base::Opcode = "pmaddwd"; \
+ template <> \
+ template <> \
const char *InstImpl<TraitsType>::InstX86Pmuludq::Base::Opcode = "pmuludq"; \
template <> \
template <> \
@@ -3644,6 +3713,24 @@
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
+ InstImpl<TraitsType>::InstX86Pmulhw::Base::Emitter = { \
+ &InstImpl<TraitsType>::Assembler::pmulhw, \
+ &InstImpl<TraitsType>::Assembler::pmulhw}; \
+ template <> \
+ template <> \
+ const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
+ InstImpl<TraitsType>::InstX86Pmulhuw::Base::Emitter = { \
+ &InstImpl<TraitsType>::Assembler::pmulhuw, \
+ &InstImpl<TraitsType>::Assembler::pmulhuw}; \
+ template <> \
+ template <> \
+ const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
+ InstImpl<TraitsType>::InstX86Pmaddwd::Base::Emitter = { \
+ &InstImpl<TraitsType>::Assembler::pmaddwd, \
+ &InstImpl<TraitsType>::Assembler::pmaddwd}; \
+ template <> \
+ template <> \
+ const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Pmuludq::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::pmuludq, \
&InstImpl<TraitsType>::Assembler::pmuludq}; \
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index c06b256..8eae1b3 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -1082,8 +1082,7 @@
Type Ty = this->getSrc(0)->getType();
assert(isScalarFloatingType(Ty));
Str << "\t"
- "sqrt"
- << Traits::TypeAttributes[Ty].SpSdString << "\t";
+ "sqrt" << Traits::TypeAttributes[Ty].SpSdString << "\t";
this->getSrc(0)->emit(Func);
Str << ", ";
this->getDest()->emit(Func);
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index b20a893..b5cab5e 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -63,10 +63,13 @@
Trap,
// The intrinsics below are not part of the PNaCl specification.
LoadSubVector,
+ MultiplyAddPairs,
+ MultiplyHighSigned,
+ MultiplyHighUnsigned,
+ SignMask,
StoreSubVector,
VectorPackSigned,
- VectorPackUnsigned,
- SignMask
+ VectorPackUnsigned
};
/// Operations that can be represented by the AtomicRMW intrinsic.
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index da1fff4..41b91b8 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -801,6 +801,18 @@
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Pmull>(Dest, Src0);
}
+ void _pmulhw(Variable *Dest, Operand *Src0) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Pmulhw>(Dest, Src0);
+ }
+ void _pmulhuw(Variable *Dest, Operand *Src0) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Pmulhuw>(Dest, Src0);
+ }
+ void _pmaddwd(Variable *Dest, Operand *Src0) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Pmaddwd>(Dest, Src0);
+ }
void _pmuludq(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Pmuludq>(Dest, Src0);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index e0739df..d4c8833 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4473,6 +4473,42 @@
_mov(Dest, T);
return;
}
+ case Intrinsics::MultiplyHighSigned: {
+ Operand *Src0 = Instr->getArg(0);
+ Operand *Src1 = Instr->getArg(1);
+ Variable *Dest = Instr->getDest();
+ auto *T = makeReg(Dest->getType());
+ auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ _movp(T, Src0RM);
+ _pmulhw(T, Src1RM);
+ _movp(Dest, T);
+ return;
+ }
+ case Intrinsics::MultiplyHighUnsigned: {
+ Operand *Src0 = Instr->getArg(0);
+ Operand *Src1 = Instr->getArg(1);
+ Variable *Dest = Instr->getDest();
+ auto *T = makeReg(Dest->getType());
+ auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ _movp(T, Src0RM);
+ _pmulhuw(T, Src1RM);
+ _movp(Dest, T);
+ return;
+ }
+ case Intrinsics::MultiplyAddPairs: {
+ Operand *Src0 = Instr->getArg(0);
+ Operand *Src1 = Instr->getArg(1);
+ Variable *Dest = Instr->getDest();
+ auto *T = makeReg(Dest->getType());
+ auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ _movp(T, Src0RM);
+ _pmaddwd(T, Src1RM);
+ _movp(Dest, T);
+ return;
+ }
default: // UnknownIntrinsic
Func->setError("Unexpected intrinsic");
return;