Implement saturated vector add/subtract.
BUG=swiftshader:15
Change-Id: Ic120eddd1761e33b7d76bf3ed8ec5ca74634f958
Reviewed-on: https://chromium-review.googlesource.com/403477
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 2db0f74..5cd4faf 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -357,6 +357,10 @@
void padd(Type Ty, XmmRegister dst, XmmRegister src);
void padd(Type Ty, XmmRegister dst, const Address &src);
+ void padds(Type Ty, XmmRegister dst, XmmRegister src);
+ void padds(Type Ty, XmmRegister dst, const Address &src);
+ void paddus(Type Ty, XmmRegister dst, XmmRegister src);
+ void paddus(Type Ty, XmmRegister dst, const Address &src);
void pand(Type Ty, XmmRegister dst, XmmRegister src);
void pand(Type Ty, XmmRegister dst, const Address &src);
void pandn(Type Ty, XmmRegister dst, XmmRegister src);
@@ -375,6 +379,10 @@
void por(Type Ty, XmmRegister dst, const Address &src);
void psub(Type Ty, XmmRegister dst, XmmRegister src);
void psub(Type Ty, XmmRegister dst, const Address &src);
+ void psubs(Type Ty, XmmRegister dst, XmmRegister src);
+ void psubs(Type Ty, XmmRegister dst, const Address &src);
+ void psubus(Type Ty, XmmRegister dst, XmmRegister src);
+ void psubus(Type Ty, XmmRegister dst, const Address &src);
void pxor(Type Ty, XmmRegister dst, XmmRegister src);
void pxor(Type Ty, XmmRegister dst, const Address &src);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 3674d4a..0abf587 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -758,6 +758,76 @@
}
template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
+ XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitRexRB(RexTypeIrrelevant, dst, src);
+ emitUint8(0x0F);
+ if (isByteSizedArithType(Ty)) {
+ emitUint8(0xEC);
+ } else if (Ty == IceType_i16) {
+ emitUint8(0xED);
+ } else {
+ assert(false && "Unexpected padds operand type");
+ }
+ emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitAddrSizeOverridePrefix();
+ emitRex(RexTypeIrrelevant, src, dst);
+ emitUint8(0x0F);
+ if (isByteSizedArithType(Ty)) {
+ emitUint8(0xEC);
+ } else if (Ty == IceType_i16) {
+ emitUint8(0xED);
+ } else {
+ assert(false && "Unexpected padds operand type");
+ }
+ emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
+ XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitRexRB(RexTypeIrrelevant, dst, src);
+ emitUint8(0x0F);
+ if (isByteSizedArithType(Ty)) {
+ emitUint8(0xDC);
+ } else if (Ty == IceType_i16) {
+ emitUint8(0xDD);
+ } else {
+ assert(false && "Unexpected paddus operand type");
+ }
+ emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitAddrSizeOverridePrefix();
+ emitRex(RexTypeIrrelevant, src, dst);
+ emitUint8(0x0F);
+ if (isByteSizedArithType(Ty)) {
+ emitUint8(0xDC);
+ } else if (Ty == IceType_i16) {
+ emitUint8(0xDD);
+ } else {
+ assert(false && "Unexpected paddus operand type");
+ }
+ emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
@@ -1001,6 +1071,75 @@
}
template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
+ XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitRexRB(RexTypeIrrelevant, dst, src);
+ emitUint8(0x0F);
+ if (isByteSizedArithType(Ty)) {
+ emitUint8(0xE8);
+ } else if (Ty == IceType_i16) {
+ emitUint8(0xE9);
+ } else {
+ assert(false && "Unexpected psubs operand type");
+ }
+ emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitAddrSizeOverridePrefix();
+ emitRex(RexTypeIrrelevant, src, dst);
+ emitUint8(0x0F);
+ if (isByteSizedArithType(Ty)) {
+ emitUint8(0xE8);
+ } else if (Ty == IceType_i16) {
+ emitUint8(0xE9);
+ } else {
+ assert(false && "Unexpected psubs operand type");
+ }
+ emitOperand(gprEncoding(dst), src);
+}
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
+ XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitRexRB(RexTypeIrrelevant, dst, src);
+ emitUint8(0x0F);
+ if (isByteSizedArithType(Ty)) {
+ emitUint8(0xD8);
+ } else if (Ty == IceType_i16) {
+ emitUint8(0xD9);
+ } else {
+ assert(false && "Unexpected psubus operand type");
+ }
+ emitXmmRegisterOperand(dst, src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitAddrSizeOverridePrefix();
+ emitRex(RexTypeIrrelevant, src, dst);
+ emitUint8(0x0F);
+ if (isByteSizedArithType(Ty)) {
+ emitUint8(0xD8);
+ } else if (Ty == IceType_i16) {
+ emitUint8(0xD9);
+ } else {
+ assert(false && "Unexpected psubus operand type");
+ }
+ emitOperand(gprEncoding(dst), src);
+}
+
+template <typename TraitsType>
void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 6d74b93..489ffea 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -132,6 +132,8 @@
Orps,
OrRMW,
Padd,
+ Padds,
+ Paddus,
Pand,
Pandn,
Pblendvb,
@@ -156,6 +158,8 @@
Psra,
Psrl,
Psub,
+ Psubs,
+ Psubus,
Push,
Pxor,
Ret,
@@ -1435,6 +1439,38 @@
Source) {}
};
+ class InstX86Padds
+ : public InstX86BaseBinopXmm<InstX86Base::Padds, true,
+ InstX86Base::SseSuffix::Integral> {
+ public:
+ static InstX86Padds *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ return new (Func->allocate<InstX86Padds>())
+ InstX86Padds(Func, Dest, Source);
+ }
+
+ private:
+ InstX86Padds(Cfg *Func, Variable *Dest, Operand *Source)
+ : InstX86BaseBinopXmm<InstX86Base::Padds, true,
+ InstX86Base::SseSuffix::Integral>(Func, Dest,
+ Source) {}
+ };
+
+ class InstX86Paddus
+ : public InstX86BaseBinopXmm<InstX86Base::Paddus, true,
+ InstX86Base::SseSuffix::Integral> {
+ public:
+ static InstX86Paddus *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ return new (Func->allocate<InstX86Paddus>())
+ InstX86Paddus(Func, Dest, Source);
+ }
+
+ private:
+ InstX86Paddus(Cfg *Func, Variable *Dest, Operand *Source)
+ : InstX86BaseBinopXmm<InstX86Base::Paddus, true,
+ InstX86Base::SseSuffix::Integral>(Func, Dest,
+ Source) {}
+ };
+
class InstX86Sub : public InstX86BaseBinopGPR<InstX86Base::Sub> {
public:
static InstX86Sub *create(Cfg *Func, Variable *Dest, Operand *Source) {
@@ -1531,6 +1567,38 @@
Source) {}
};
+ class InstX86Psubs
+ : public InstX86BaseBinopXmm<InstX86Base::Psubs, true,
+ InstX86Base::SseSuffix::Integral> {
+ public:
+ static InstX86Psubs *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ return new (Func->allocate<InstX86Psubs>())
+ InstX86Psubs(Func, Dest, Source);
+ }
+
+ private:
+ InstX86Psubs(Cfg *Func, Variable *Dest, Operand *Source)
+ : InstX86BaseBinopXmm<InstX86Base::Psubs, true,
+ InstX86Base::SseSuffix::Integral>(Func, Dest,
+ Source) {}
+ };
+
+ class InstX86Psubus
+ : public InstX86BaseBinopXmm<InstX86Base::Psubus, true,
+ InstX86Base::SseSuffix::Integral> {
+ public:
+ static InstX86Psubus *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ return new (Func->allocate<InstX86Psubus>())
+ InstX86Psubus(Func, Dest, Source);
+ }
+
+ private:
+ InstX86Psubus(Cfg *Func, Variable *Dest, Operand *Source)
+ : InstX86BaseBinopXmm<InstX86Base::Psubus, true,
+ InstX86Base::SseSuffix::Integral>(Func, Dest,
+ Source) {}
+ };
+
class InstX86And : public InstX86BaseBinopGPR<InstX86Base::And> {
public:
static InstX86And *create(Cfg *Func, Variable *Dest, Operand *Source) {
@@ -3097,6 +3165,8 @@
using Andnps = typename InstImpl<TraitsType>::InstX86Andnps;
using Andps = typename InstImpl<TraitsType>::InstX86Andps;
using Padd = typename InstImpl<TraitsType>::InstX86Padd;
+ using Padds = typename InstImpl<TraitsType>::InstX86Padds;
+ using Paddus = typename InstImpl<TraitsType>::InstX86Paddus;
using Sub = typename InstImpl<TraitsType>::InstX86Sub;
using SubRMW = typename InstImpl<TraitsType>::InstX86SubRMW;
using Subps = typename InstImpl<TraitsType>::InstX86Subps;
@@ -3104,6 +3174,8 @@
using Sbb = typename InstImpl<TraitsType>::InstX86Sbb;
using SbbRMW = typename InstImpl<TraitsType>::InstX86SbbRMW;
using Psub = typename InstImpl<TraitsType>::InstX86Psub;
+ using Psubs = typename InstImpl<TraitsType>::InstX86Psubs;
+ using Psubus = typename InstImpl<TraitsType>::InstX86Psubus;
using And = typename InstImpl<TraitsType>::InstX86And;
using AndRMW = typename InstImpl<TraitsType>::InstX86AndRMW;
using Pand = typename InstImpl<TraitsType>::InstX86Pand;
@@ -3279,6 +3351,12 @@
const char *InstImpl<TraitsType>::InstX86Padd::Base::Opcode = "padd"; \
template <> \
template <> \
+ const char *InstImpl<TraitsType>::InstX86Padds::Base::Opcode = "padds"; \
+ template <> \
+ template <> \
+ const char *InstImpl<TraitsType>::InstX86Paddus::Base::Opcode = "paddus"; \
+ template <> \
+ template <> \
const char *InstImpl<TraitsType>::InstX86Sub::Base::Opcode = "sub"; \
template <> \
template <> \
@@ -3300,6 +3378,12 @@
const char *InstImpl<TraitsType>::InstX86Psub::Base::Opcode = "psub"; \
template <> \
template <> \
+ const char *InstImpl<TraitsType>::InstX86Psubs::Base::Opcode = "psubs"; \
+ template <> \
+ template <> \
+ const char *InstImpl<TraitsType>::InstX86Psubus::Base::Opcode = "psubus"; \
+ template <> \
+ template <> \
const char *InstImpl<TraitsType>::InstX86And::Base::Opcode = "and"; \
template <> \
template <> \
@@ -3683,6 +3767,18 @@
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
+ InstImpl<TraitsType>::InstX86Padds::Base::Emitter = { \
+ &InstImpl<TraitsType>::Assembler::padds, \
+ &InstImpl<TraitsType>::Assembler::padds}; \
+ template <> \
+ template <> \
+ const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
+ InstImpl<TraitsType>::InstX86Paddus::Base::Emitter = { \
+ &InstImpl<TraitsType>::Assembler::paddus, \
+ &InstImpl<TraitsType>::Assembler::paddus}; \
+ template <> \
+ template <> \
+ const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Pand::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::pand, \
&InstImpl<TraitsType>::Assembler::pand}; \
@@ -3749,6 +3845,18 @@
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
+ InstImpl<TraitsType>::InstX86Psubs::Base::Emitter = { \
+ &InstImpl<TraitsType>::Assembler::psubs, \
+ &InstImpl<TraitsType>::Assembler::psubs}; \
+ template <> \
+ template <> \
+ const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
+ InstImpl<TraitsType>::InstX86Psubus::Base::Emitter = { \
+ &InstImpl<TraitsType>::Assembler::psubus, \
+ &InstImpl<TraitsType>::Assembler::psubus}; \
+ template <> \
+ template <> \
+ const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Pxor::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::pxor, \
&InstImpl<TraitsType>::Assembler::pxor}; \
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index b5cab5e..3e2a738 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -62,12 +62,16 @@
Stackrestore,
Trap,
// The intrinsics below are not part of the PNaCl specification.
+ AddSaturateSigned,
+ AddSaturateUnsigned,
LoadSubVector,
MultiplyAddPairs,
MultiplyHighSigned,
MultiplyHighUnsigned,
SignMask,
StoreSubVector,
+ SubtractSaturateSigned,
+ SubtractSaturateUnsigned,
VectorPackSigned,
VectorPackUnsigned
};
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 41b91b8..e6276ce 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -767,6 +767,14 @@
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Padd>(Dest, Src0);
}
+ void _padds(Variable *Dest, Operand *Src0) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Padds>(Dest, Src0);
+ }
+ void _paddus(Variable *Dest, Operand *Src0) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Paddus>(Dest, Src0);
+ }
void _pand(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Pand>(Dest, Src0);
@@ -864,6 +872,14 @@
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Psub>(Dest, Src0);
}
+ void _psubs(Variable *Dest, Operand *Src0) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Psubs>(Dest, Src0);
+ }
+ void _psubus(Variable *Dest, Operand *Src0) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Psubus>(Dest, Src0);
+ }
void _push(Operand *Src0) {
Context.insert<typename Traits::Insts::Push>(Src0);
}
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index d4c8833..a5de2ae 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4509,6 +4509,54 @@
_movp(Dest, T);
return;
}
+ case Intrinsics::AddSaturateSigned: {
+ Operand *Src0 = Instr->getArg(0);
+ Operand *Src1 = Instr->getArg(1);
+ Variable *Dest = Instr->getDest();
+ auto *T = makeReg(Dest->getType());
+ auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ _movp(T, Src0RM);
+ _padds(T, Src1RM);
+ _movp(Dest, T);
+ return;
+ }
+ case Intrinsics::SubtractSaturateSigned: {
+ Operand *Src0 = Instr->getArg(0);
+ Operand *Src1 = Instr->getArg(1);
+ Variable *Dest = Instr->getDest();
+ auto *T = makeReg(Dest->getType());
+ auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ _movp(T, Src0RM);
+ _psubs(T, Src1RM);
+ _movp(Dest, T);
+ return;
+ }
+ case Intrinsics::AddSaturateUnsigned: {
+ Operand *Src0 = Instr->getArg(0);
+ Operand *Src1 = Instr->getArg(1);
+ Variable *Dest = Instr->getDest();
+ auto *T = makeReg(Dest->getType());
+ auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ _movp(T, Src0RM);
+ _paddus(T, Src1RM);
+ _movp(Dest, T);
+ return;
+ }
+ case Intrinsics::SubtractSaturateUnsigned: {
+ Operand *Src0 = Instr->getArg(0);
+ Operand *Src1 = Instr->getArg(1);
+ Variable *Dest = Instr->getDest();
+ auto *T = makeReg(Dest->getType());
+ auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ _movp(T, Src0RM);
+ _psubus(T, Src1RM);
+ _movp(Dest, T);
+ return;
+ }
default: // UnknownIntrinsic
Func->setError("Unexpected intrinsic");
return;