Add x86 vector packing instructions. BUG=swiftshader:15 Change-Id: I0d40fab6287130143693e8e4752859b7142a503d Reviewed-on: https://chromium-review.googlesource.com/394007 Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h index c9baf23..be0ff00 100644 --- a/src/IceAssemblerX86Base.h +++ b/src/IceAssemblerX86Base.h
@@ -437,6 +437,10 @@ const Immediate &mask); void punpckl(Type Ty, XmmRegister Dst, XmmRegister Src); void punpckl(Type Ty, XmmRegister Dst, const Address &Src); + void packss(Type Ty, XmmRegister Dst, XmmRegister Src); + void packss(Type Ty, XmmRegister Dst, const Address &Src); + void packus(Type Ty, XmmRegister Dst, XmmRegister Src); + void packus(Type Ty, XmmRegister Dst, const Address &Src); void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask); void shufps(Type Ty, XmmRegister dst, const Address &src, const Immediate &mask);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h index e5819ce..11da946 100644 --- a/src/IceAssemblerX86BaseImpl.h +++ b/src/IceAssemblerX86BaseImpl.h
@@ -1628,6 +1628,78 @@ } template <typename TraitsType> +void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst, + XmmRegister Src) { + AssemblerBuffer::EnsureCapacity ensured(&Buffer); + emitUint8(0x66); + emitRexRB(RexTypeIrrelevant, Dst, Src); + emitUint8(0x0F); + if (Ty == IceType_v4i32 || Ty == IceType_v4f32) { + emitUint8(0x6B); + } else if (Ty == IceType_v8i16) { + emitUint8(0x63); + } else { + assert(false && "Unexpected vector pack operand type"); + } + emitXmmRegisterOperand(Dst, Src); +} + +template <typename TraitsType> +void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst, + const Address &Src) { + AssemblerBuffer::EnsureCapacity ensured(&Buffer); + emitUint8(0x66); + emitAddrSizeOverridePrefix(); + emitRex(RexTypeIrrelevant, Src, Dst); + emitUint8(0x0F); + if (Ty == IceType_v4i32 || Ty == IceType_v4f32) { + emitUint8(0x6B); + } else if (Ty == IceType_v8i16) { + emitUint8(0x63); + } else { + assert(false && "Unexpected vector pack operand type"); + } + emitOperand(gprEncoding(Dst), Src); +} + +template <typename TraitsType> +void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst, + XmmRegister Src) { + AssemblerBuffer::EnsureCapacity ensured(&Buffer); + emitUint8(0x66); + emitRexRB(RexTypeIrrelevant, Dst, Src); + emitUint8(0x0F); + if (Ty == IceType_v4i32 || Ty == IceType_v4f32) { + emitUint8(0x38); + emitUint8(0x2B); + } else if (Ty == IceType_v8i16) { + emitUint8(0x67); + } else { + assert(false && "Unexpected vector pack operand type"); + } + emitXmmRegisterOperand(Dst, Src); +} + +template <typename TraitsType> +void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst, + const Address &Src) { + AssemblerBuffer::EnsureCapacity ensured(&Buffer); + emitUint8(0x66); + emitAddrSizeOverridePrefix(); + emitRex(RexTypeIrrelevant, Src, Dst); + emitUint8(0x0F); + if (Ty == IceType_v4i32 || Ty == IceType_v4f32) { + emitUint8(0x38); + emitUint8(0x2B); + } else if (Ty == IceType_v8i16) { + emitUint8(0x67); + } else { + assert(false && "Unexpected vector pack operand type"); + } + emitOperand(gprEncoding(Dst), Src); +} + +template <typename TraitsType> void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst, XmmRegister src, const Immediate &imm) {
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp index a0ff546..00a455c 100644 --- a/src/IceInstX8632.cpp +++ b/src/IceInstX8632.cpp
@@ -50,8 +50,8 @@ const TargetX8632Traits::TypeAttributesType TargetX8632Traits::TypeAttributes[] = { -#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \ - { cvt, sdss, pdps, spsd, pack, unpack, width, fld } \ +#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \ + { cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld } \ , ICETYPEX8632_TABLE #undef X
diff --git a/src/IceInstX8632.def b/src/IceInstX8632.def index 173164f..c71d6df 100644 --- a/src/IceInstX8632.def +++ b/src/IceInstX8632.def
@@ -212,22 +212,22 @@ //#define X(val, emit) #define ICETYPEX8632_TABLE \ - /* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \ - X(void, void, "?", "", "", "", "", "", "", "") \ - X(i1, void, "si", "", "", "", "", "", "b", "") \ - X(i8, void, "si", "", "", "", "", "", "b", "") \ - X(i16, void, "si", "", "", "", "", "", "w", "") \ - X(i32, void, "si", "", "", "", "", "", "l", "") \ - X(i64, void, "si", "", "", "", "", "", "q", "") \ - X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \ - X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \ - X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \ - X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \ - X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \ - X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \ - X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \ - X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \ - X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "") -//#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) + /* tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld */ \ + X(void, void, "?", "", "", "", "", "", "", "", "") \ + X(i1, void, "si", "", "", "", "", "", "", "b", "") \ + X(i8, void, "si", "", "", "", "", "", "", "b", "") \ + X(i16, void, "si", "", "", "", "", "", "", "w", "") \ + X(i32, void, "si", "", "", "", "", "", "", "l", "") \ + X(i64, void, "si", "", "", "", "", "", "", "q", "") \ + X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "", "s") \ + X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "", "l") \ + X(v4i1, i32, "?", "", "", "", "d", "dq", "", "", "") \ + X(v8i1, i16, "?", "", "", "", "w", "wd", "", "", "") \ + X(v16i1, i8, "?", "", "", "", "b", "bw", "", "", "") \ + X(v16i8, i8, "?", "", "", "", "b", "bw", "", "", "") \ + X(v8i16, i16, "?", "", "", "", "w", "wd", "wb", "", "") \ + X(v4i32, i32, "dq", "", "", "", "d", "dq", "dw", "", "") \ + X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "", "") +//#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) #endif // SUBZERO_SRC_ICEINSTX8632_DEF
diff --git a/src/IceInstX8664.cpp b/src/IceInstX8664.cpp index d83c47e..d44f35d 100644 --- a/src/IceInstX8664.cpp +++ b/src/IceInstX8664.cpp
@@ -51,8 +51,8 @@ const TargetX8664Traits::TypeAttributesType TargetX8664Traits::TypeAttributes[] = { -#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \ - { cvt, sdss, pdps, spsd, pack, unpack, width, fld } \ +#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \ + { cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld } \ , ICETYPEX8664_TABLE #undef X
diff --git a/src/IceInstX8664.def b/src/IceInstX8664.def index caee1ad..db97dbf 100644 --- a/src/IceInstX8664.def +++ b/src/IceInstX8664.def
@@ -295,22 +295,22 @@ //#define X(val, emit) #define ICETYPEX8664_TABLE \ - /* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \ - X(void, void, "?", "", "", "", "", "", "", "") \ - X(i1, void, "si", "", "", "", "", "", "b", "") \ - X(i8, void, "si", "", "", "", "", "", "b", "") \ - X(i16, void, "si", "", "", "", "", "", "w", "") \ - X(i32, void, "si", "", "", "", "", "", "l", "") \ - X(i64, void, "si", "", "", "", "", "", "q", "") \ - X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \ - X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \ - X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \ - X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \ - X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \ - X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \ - X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \ - X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \ - X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "") -//#define X(tag, elementty, cvt, sdss, pdps, pack, unpack, width, fld) + /* tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld */ \ + X(void, void, "?", "", "", "", "", "", "", "", "") \ + X(i1, void, "si", "", "", "", "", "", "", "b", "") \ + X(i8, void, "si", "", "", "", "", "", "", "b", "") \ + X(i16, void, "si", "", "", "", "", "", "", "w", "") \ + X(i32, void, "si", "", "", "", "", "", "", "l", "") \ + X(i64, void, "si", "", "", "", "", "", "", "q", "") \ + X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "", "s") \ + X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "", "l") \ + X(v4i1, i32, "?", "", "", "", "d", "dq", "", "", "") \ + X(v8i1, i16, "?", "", "", "", "w", "wd", "", "", "") \ + X(v16i1, i8, "?", "", "", "", "b", "bw", "", "", "") \ + X(v16i8, i8, "?", "", "", "", "b", "bw", "", "", "") \ + X(v8i16, i16, "?", "", "", "", "w", "wd", "wb", "", "") \ + X(v4i32, i32, "dq", "", "", "", "d", "dq", "dw", "", "") \ + X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "", "") +//#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) #endif // SUBZERO_SRC_ICEINSTX8664_DEF
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h index 1ce6d6f..7e96de5 100644 --- a/src/IceInstX86Base.h +++ b/src/IceInstX86Base.h
@@ -145,6 +145,8 @@ Pshufb, Pshufd, Punpckl, + Packss, + Packus, Psll, Psra, Psrl, @@ -186,7 +188,7 @@ IacaEnd }; - enum SseSuffix { None, Packed, Unpack, Scalar, Integral }; + enum SseSuffix { None, Packed, Unpack, Scalar, Integral, Pack }; static const char *getWidthString(Type Ty); static const char *getFldString(Type Ty); @@ -878,6 +880,9 @@ SuffixString = Traits::TypeAttributes[DestTy].SdSsString; break; case InstX86Base::SseSuffix::Integral: + SuffixString = Traits::TypeAttributes[DestTy].IntegralString; + break; + case InstX86Base::SseSuffix::Pack: SuffixString = Traits::TypeAttributes[DestTy].PackString; break; } @@ -934,7 +939,7 @@ // Shift operations are always integral, and hence always need a suffix. const Type DestTy = this->getDest()->getType(); this->emitTwoAddress(Func, this->Opcode, - Traits::TypeAttributes[DestTy].PackString); + Traits::TypeAttributes[DestTy].IntegralString); } void emitIAS(const Cfg *Func) const override { this->validateVectorAddrMode(); @@ -2927,6 +2932,38 @@ Source) {} }; + class InstX86Packss + : public InstX86BaseBinopXmm<InstX86Base::Packss, false, + InstX86Base::SseSuffix::Pack> { + public: + static InstX86Packss *create(Cfg *Func, Variable *Dest, Operand *Source) { + return new (Func->allocate<InstX86Packss>()) + InstX86Packss(Func, Dest, Source); + } + + private: + InstX86Packss(Cfg *Func, Variable *Dest, Operand *Source) + : InstX86BaseBinopXmm<InstX86Base::Packss, false, + InstX86Base::SseSuffix::Pack>(Func, Dest, + Source) {} + }; + + class InstX86Packus + : public InstX86BaseBinopXmm<InstX86Base::Packus, false, + InstX86Base::SseSuffix::Pack> { + public: + static InstX86Packus *create(Cfg *Func, Variable *Dest, Operand *Source) { + return new (Func->allocate<InstX86Packus>()) + InstX86Packus(Func, Dest, Source); + } + + private: + InstX86Packus(Cfg *Func, Variable *Dest, Operand *Source) + : InstX86BaseBinopXmm<InstX86Base::Packus, false, + InstX86Base::SseSuffix::Pack>(Func, Dest, + Source) {} + }; + }; // struct InstImpl /// struct Insts is a template that can be used to instantiate all the X86 @@ -3052,6 +3089,8 @@ using Pshufb = typename InstImpl<TraitsType>::InstX86Pshufb; using Punpckl = typename InstImpl<TraitsType>::InstX86Punpckl; + using Packss = typename InstImpl<TraitsType>::InstX86Packss; + using Packus = typename InstImpl<TraitsType>::InstX86Packus; }; /// X86 Instructions have static data (particularly, opcodes and instruction @@ -3287,6 +3326,12 @@ template <> \ template <> \ const char *InstImpl<TraitsType>::InstX86Punpckl::Base::Opcode = "punpckl"; \ + template <> \ + template <> \ + const char *InstImpl<TraitsType>::InstX86Packss::Base::Opcode = "packss"; \ + template <> \ + template <> \ + const char *InstImpl<TraitsType>::InstX86Packus::Base::Opcode = "packus"; \ /* Inplace GPR ops */ \ template <> \ template <> \ @@ -3660,6 +3705,18 @@ InstImpl<TraitsType>::InstX86Punpckl::Base::Emitter = { \ &InstImpl<TraitsType>::Assembler::punpckl, \ &InstImpl<TraitsType>::Assembler::punpckl}; \ + template <> \ + template <> \ + const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \ + InstImpl<TraitsType>::InstX86Packss::Base::Emitter = { \ + &InstImpl<TraitsType>::Assembler::packss, \ + &InstImpl<TraitsType>::Assembler::packss}; \ + template <> \ + template <> \ + const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \ + InstImpl<TraitsType>::InstX86Packus::Base::Emitter = { \ + &InstImpl<TraitsType>::Assembler::packus, \ + &InstImpl<TraitsType>::Assembler::packus}; \ } \ }
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h index 1bc2a0c..5c6c005 100644 --- a/src/IceInstX86BaseImpl.h +++ b/src/IceInstX86BaseImpl.h
@@ -2607,7 +2607,8 @@ assert(this->getSrcSize() == 2); // pextrb and pextrd are SSE4.1 instructions. Str << "\t" << this->Opcode - << Traits::TypeAttributes[this->getSrc(0)->getType()].PackString << "\t"; + << Traits::TypeAttributes[this->getSrc(0)->getType()].IntegralString + << "\t"; this->getSrc(1)->emit(Func); Str << ", "; this->getSrc(0)->emit(Func); @@ -2646,7 +2647,8 @@ Ostream &Str = Func->getContext()->getStrEmit(); assert(this->getSrcSize() == 3); Str << "\t" << this->Opcode - << Traits::TypeAttributes[this->getDest()->getType()].PackString << "\t"; + << Traits::TypeAttributes[this->getDest()->getType()].IntegralString + << "\t"; this->getSrc(2)->emit(Func); Str << ", "; Operand *Src1 = this->getSrc(1);
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp index fd1cf75..36e5964 100644 --- a/src/IceTargetLoweringX8632.cpp +++ b/src/IceTargetLoweringX8632.cpp
@@ -101,8 +101,8 @@ const TargetX8632Traits::TableTypeX8632AttributesType TargetX8632Traits::TableTypeX8632Attributes[] = { -#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \ - { IceType_##elementty } \ +#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \ + { IceType_##elty } \ , ICETYPEX8632_TABLE #undef X @@ -462,7 +462,7 @@ namespace dummy3 { // Define a temporary set of enum values based on low-level table entries. enum _tmp_enum { -#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \ +#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \ _tmp_##tag, ICETYPEX8632_TABLE #undef X @@ -475,7 +475,7 @@ #undef X // Define a set of constants based on low-level table entries, and ensure the // table entry keys are consistent. -#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \ +#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \ static const int _table2_##tag = _tmp_##tag; \ static_assert(_table1_##tag == _table2_##tag, \ "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h index 334dbdc..8844519 100644 --- a/src/IceTargetLoweringX8632Traits.h +++ b/src/IceTargetLoweringX8632Traits.h
@@ -946,14 +946,15 @@ } InstCmppsAttributes[]; static const struct TypeAttributesType { - const char *CvtString; // i (integer), s (single FP), d (double FP) - const char *SdSsString; // ss, sd, or <blank> - const char *PdPsString; // ps, pd, or <blank> - const char *SpsdString; // ss, sd, ps, pd, or <blank> - const char *PackString; // b, w, d, or <blank> - const char *UnpackString; // bw, wd, dq, or <blank> - const char *WidthString; // b, w, l, q, or <blank> - const char *FldString; // s, l, or <blank> + const char *CvtString; // i (integer), s (single FP), d (double FP) + const char *SdSsString; // ss, sd, or <blank> + const char *PdPsString; // ps, pd, or <blank> + const char *SpsdString; // ss, sd, ps, pd, or <blank> + const char *IntegralString; // b, w, d, or <blank> + const char *UnpackString; // bw, wd, dq, or <blank> + const char *PackString; // wb, dw, or <blank> + const char *WidthString; // b, w, l, q, or <blank> + const char *FldString; // s, l, or <blank> } TypeAttributes[]; static const char *InstSegmentRegNames[];
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp index 66a1581..df454b0 100644 --- a/src/IceTargetLoweringX8664.cpp +++ b/src/IceTargetLoweringX8664.cpp
@@ -94,8 +94,8 @@ const TargetX8664Traits::TableTypeX8664AttributesType TargetX8664Traits::TableTypeX8664Attributes[] = { -#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \ - { IceType_##elementty } \ +#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \ + { IceType_##elty } \ , ICETYPEX8664_TABLE #undef X @@ -801,7 +801,7 @@ namespace dummy3 { // Define a temporary set of enum values based on low-level table entries. enum _tmp_enum { -#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \ +#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \ _tmp_##tag, ICETYPEX8664_TABLE #undef X @@ -814,7 +814,7 @@ #undef X // Define a set of constants based on low-level table entries, and ensure the // table entry keys are consistent. -#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \ +#define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \ static const int _table2_##tag = _tmp_##tag; \ static_assert(_table1_##tag == _table2_##tag, \ "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h index 27d8b5f..5e3e3c1 100644 --- a/src/IceTargetLoweringX8664Traits.h +++ b/src/IceTargetLoweringX8664Traits.h
@@ -1022,14 +1022,15 @@ } InstCmppsAttributes[]; static const struct TypeAttributesType { - const char *CvtString; // i (integer), s (single FP), d (double FP) - const char *SdSsString; // ss, sd, or <blank> - const char *PdPsString; // ps, pd, or <blank> - const char *SpSdString; // ss, sd, ps, pd, or <blank> - const char *PackString; // b, w, d, or <blank> - const char *UnpackString; // bw, wd, dq, or <blank> - const char *WidthString; // b, w, l, q, or <blank> - const char *FldString; // s, l, or <blank> + const char *CvtString; // i (integer), s (single FP), d (double FP) + const char *SdSsString; // ss, sd, or <blank> + const char *PdPsString; // ps, pd, or <blank> + const char *SpSdString; // ss, sd, ps, pd, or <blank> + const char *IntegralString; // b, w, d, or <blank> + const char *UnpackString; // bw, wd, dq, or <blank> + const char *PackString; // wb, dw, or <blank> + const char *WidthString; // b, w, l, q, or <blank> + const char *FldString; // s, l, or <blank> } TypeAttributes[]; };
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h index a7c89f9..0f31dd4 100644 --- a/src/IceTargetLoweringX86Base.h +++ b/src/IceTargetLoweringX86Base.h
@@ -815,6 +815,14 @@ AutoMemorySandboxer<> _(this, &Dest, &Src0); Context.insert<typename Traits::Insts::Punpckl>(Dest, Src0); } + void _packss(Variable *Dest, Operand *Src0) { + AutoMemorySandboxer<> _(this, &Dest, &Src0); + Context.insert<typename Traits::Insts::Packss>(Dest, Src0); + } + void _packus(Variable *Dest, Operand *Src0) { + AutoMemorySandboxer<> _(this, &Dest, &Src0); + Context.insert<typename Traits::Insts::Packus>(Dest, Src0); + } void _pshufb(Variable *Dest, Operand *Src0) { AutoMemorySandboxer<> _(this, &Dest, &Src0); Context.insert<typename Traits::Insts::Pshufb>(Dest, Src0);
diff --git a/unittest/AssemblerX8632/XmmArith.cpp b/unittest/AssemblerX8632/XmmArith.cpp index 1571874..1c85e2b 100644 --- a/unittest/AssemblerX8632/XmmArith.cpp +++ b/unittest/AssemblerX8632/XmmArith.cpp
@@ -1103,6 +1103,160 @@ #undef TestImplXmmXmm } +TEST_F(AssemblerX8632Test, Packss) { + const Dqword V0_v4i32(uint64_t(0x0001000000001234ull), + uint64_t(0x7FFFFFFF80000000ull)); + const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull), + uint64_t(0x0000800100007FFEull)); + const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull), + uint64_t(0x7FFF7FFEFFFEFFFFull)); + + const Dqword V0_v8i16(uint64_t(0x0001000000120034ull), + uint64_t(0xFFFEFFFF7FFF8000ull)); + const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull), + uint64_t(0x0088007700660055ull)); + const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull), + uint64_t(0x7F776655057F7F7Eull)); + +#define TestImplXmmXmm(Dst, Src, Inst, Ty) \ + do { \ + static constexpr char TestString[] = \ + "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ + const uint32_t T0 = allocateDqword(); \ + const uint32_t T1 = allocateDqword(); \ + \ + __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ + __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ + __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ + XmmRegister::Encoded_Reg_##Src); \ + \ + AssembledTest test = assemble(); \ + test.setDqwordTo(T0, V0_##Ty); \ + test.setDqwordTo(T1, V1_##Ty); \ + test.run(); \ + \ + ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ + reset(); \ + } while (0) + +#define TestImplXmmAddr(Dst, Inst, Ty) \ + do { \ + static constexpr char TestString[] = \ + "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ + const uint32_t T0 = allocateDqword(); \ + const uint32_t T1 = allocateDqword(); \ + \ + __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ + __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ + \ + AssembledTest test = assemble(); \ + test.setDqwordTo(T0, V0_##Ty); \ + test.setDqwordTo(T1, V1_##Ty); \ + test.run(); \ + \ + ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ + reset(); \ + } while (0) + +#define TestImpl(Dst, Src) \ + do { \ + TestImplXmmXmm(Dst, Src, packss, v4i32); \ + TestImplXmmAddr(Dst, packss, v4i32); \ + TestImplXmmXmm(Dst, Src, packss, v8i16); \ + TestImplXmmAddr(Dst, packss, v8i16); \ + } while (0) + + TestImpl(xmm0, xmm1); + TestImpl(xmm1, xmm2); + TestImpl(xmm2, xmm3); + TestImpl(xmm3, xmm4); + TestImpl(xmm4, xmm5); + TestImpl(xmm5, xmm6); + TestImpl(xmm6, xmm7); + TestImpl(xmm7, xmm0); + +#undef TestImpl +#undef TestImplXmmAddr +#undef TestImplXmmXmm +} + +TEST_F(AssemblerX8632Test, Packus) { + const Dqword V0_v4i32(uint64_t(0x0001000000001234ull), + uint64_t(0x7FFFFFFF80000000ull)); + const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull), + uint64_t(0x0000800100007FFEull)); + const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull), + uint64_t(0x80017FFE00000000ull)); + + const Dqword V0_v8i16(uint64_t(0x0001000000120034ull), + uint64_t(0xFFFEFFFF7FFF8000ull)); + const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull), + uint64_t(0x0088007700660055ull)); + const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull), + uint64_t(0x8877665505FF817Eull)); + +#define TestImplXmmXmm(Dst, Src, Inst, Ty) \ + do { \ + static constexpr char TestString[] = \ + "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ + const uint32_t T0 = allocateDqword(); \ + const uint32_t T1 = allocateDqword(); \ + \ + __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ + __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ + __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ + XmmRegister::Encoded_Reg_##Src); \ + \ + AssembledTest test = assemble(); \ + test.setDqwordTo(T0, V0_##Ty); \ + test.setDqwordTo(T1, V1_##Ty); \ + test.run(); \ + \ + ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ + reset(); \ + } while (0) + +#define TestImplXmmAddr(Dst, Inst, Ty) \ + do { \ + static constexpr char TestString[] = \ + "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ + const uint32_t T0 = allocateDqword(); \ + const uint32_t T1 = allocateDqword(); \ + \ + __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ + __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ + \ + AssembledTest test = assemble(); \ + test.setDqwordTo(T0, V0_##Ty); \ + test.setDqwordTo(T1, V1_##Ty); \ + test.run(); \ + \ + ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ + reset(); \ + } while (0) + +#define TestImpl(Dst, Src) \ + do { \ + TestImplXmmXmm(Dst, Src, packus, v4i32); \ + TestImplXmmAddr(Dst, packus, v4i32); \ + TestImplXmmXmm(Dst, Src, packus, v8i16); \ + TestImplXmmAddr(Dst, packus, v8i16); \ + } while (0) + + TestImpl(xmm0, xmm1); + TestImpl(xmm1, xmm2); + TestImpl(xmm2, xmm3); + TestImpl(xmm3, xmm4); + TestImpl(xmm4, xmm5); + TestImpl(xmm5, xmm6); + TestImpl(xmm6, xmm7); + TestImpl(xmm7, xmm0); + +#undef TestImpl +#undef TestImplXmmAddr +#undef TestImplXmmXmm +} + TEST_F(AssemblerX8632Test, Pshufb) { const Dqword V0(uint64_t(0x1122334455667788ull), uint64_t(0x99aabbccddeeff32ull));
diff --git a/unittest/AssemblerX8664/XmmArith.cpp b/unittest/AssemblerX8664/XmmArith.cpp index cadb88e..4ceed00 100644 --- a/unittest/AssemblerX8664/XmmArith.cpp +++ b/unittest/AssemblerX8664/XmmArith.cpp
@@ -1169,6 +1169,160 @@ #undef TestImplXmmXmm } +TEST_F(AssemblerX8664Test, Packss) { + const Dqword V0_v4i32(uint64_t(0x0001000000001234ull), + uint64_t(0x7FFFFFFF80000000ull)); + const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull), + uint64_t(0x0000800100007FFEull)); + const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull), + uint64_t(0x7FFF7FFEFFFEFFFFull)); + + const Dqword V0_v8i16(uint64_t(0x0001000000120034ull), + uint64_t(0xFFFEFFFF7FFF8000ull)); + const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull), + uint64_t(0x0088007700660055ull)); + const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull), + uint64_t(0x7F776655057F7F7Eull)); + +#define TestImplXmmXmm(Dst, Src, Inst, Ty) \ + do { \ + static constexpr char TestString[] = \ + "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ + const uint32_t T0 = allocateDqword(); \ + const uint32_t T1 = allocateDqword(); \ + \ + __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ + __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ + __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ + XmmRegister::Encoded_Reg_##Src); \ + \ + AssembledTest test = assemble(); \ + test.setDqwordTo(T0, V0_##Ty); \ + test.setDqwordTo(T1, V1_##Ty); \ + test.run(); \ + \ + ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ + reset(); \ + } while (0) + +#define TestImplXmmAddr(Dst, Inst, Ty) \ + do { \ + static constexpr char TestString[] = \ + "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ + const uint32_t T0 = allocateDqword(); \ + const uint32_t T1 = allocateDqword(); \ + \ + __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ + __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ + \ + AssembledTest test = assemble(); \ + test.setDqwordTo(T0, V0_##Ty); \ + test.setDqwordTo(T1, V1_##Ty); \ + test.run(); \ + \ + ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ + reset(); \ + } while (0) + +#define TestImpl(Dst, Src) \ + do { \ + TestImplXmmXmm(Dst, Src, packss, v4i32); \ + TestImplXmmAddr(Dst, packss, v4i32); \ + TestImplXmmXmm(Dst, Src, packss, v8i16); \ + TestImplXmmAddr(Dst, packss, v8i16); \ + } while (0) + + TestImpl(xmm0, xmm1); + TestImpl(xmm1, xmm2); + TestImpl(xmm2, xmm3); + TestImpl(xmm3, xmm4); + TestImpl(xmm4, xmm5); + TestImpl(xmm5, xmm6); + TestImpl(xmm6, xmm7); + TestImpl(xmm7, xmm0); + +#undef TestImpl +#undef TestImplXmmAddr +#undef TestImplXmmXmm +} + +TEST_F(AssemblerX8664Test, Packus) { + const Dqword V0_v4i32(uint64_t(0x0001000000001234ull), + uint64_t(0x7FFFFFFF80000000ull)); + const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull), + uint64_t(0x0000800100007FFEull)); + const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull), + uint64_t(0x80017FFE00000000ull)); + + const Dqword V0_v8i16(uint64_t(0x0001000000120034ull), + uint64_t(0xFFFEFFFF7FFF8000ull)); + const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull), + uint64_t(0x0088007700660055ull)); + const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull), + uint64_t(0x8877665505FF817Eull)); + +#define TestImplXmmXmm(Dst, Src, Inst, Ty) \ + do { \ + static constexpr char TestString[] = \ + "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ + const uint32_t T0 = allocateDqword(); \ + const uint32_t T1 = allocateDqword(); \ + \ + __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ + __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ + __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ + XmmRegister::Encoded_Reg_##Src); \ + \ + AssembledTest test = assemble(); \ + test.setDqwordTo(T0, V0_##Ty); \ + test.setDqwordTo(T1, V1_##Ty); \ + test.run(); \ + \ + ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ + reset(); \ + } while (0) + +#define TestImplXmmAddr(Dst, Inst, Ty) \ + do { \ + static constexpr char TestString[] = \ + "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ + const uint32_t T0 = allocateDqword(); \ + const uint32_t T1 = allocateDqword(); \ + \ + __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ + __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ + \ + AssembledTest test = assemble(); \ + test.setDqwordTo(T0, V0_##Ty); \ + test.setDqwordTo(T1, V1_##Ty); \ + test.run(); \ + \ + ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ + reset(); \ + } while (0) + +#define TestImpl(Dst, Src) \ + do { \ + TestImplXmmXmm(Dst, Src, packus, v4i32); \ + TestImplXmmAddr(Dst, packus, v4i32); \ + TestImplXmmXmm(Dst, Src, packus, v8i16); \ + TestImplXmmAddr(Dst, packus, v8i16); \ + } while (0) + + TestImpl(xmm0, xmm1); + TestImpl(xmm1, xmm2); + TestImpl(xmm2, xmm3); + TestImpl(xmm3, xmm4); + TestImpl(xmm4, xmm5); + TestImpl(xmm5, xmm6); + TestImpl(xmm6, xmm7); + TestImpl(xmm7, xmm0); + +#undef TestImpl +#undef TestImplXmmAddr +#undef TestImplXmmXmm +} + TEST_F(AssemblerX8664Test, Pshufb) { const Dqword V0(uint64_t(0x1122334455667788ull), uint64_t(0x99aabbccddeeff32ull));