Implement vector sign mask intrinsic. BUG=swiftshader:15 Change-Id: I7fec56518a5b4e07d2189ab01a0a223b038564c1
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h index 7ba22f2..937e997 100644 --- a/src/IceAssemblerX86Base.h +++ b/src/IceAssemblerX86Base.h
@@ -465,8 +465,7 @@ void ucomiss(Type Ty, XmmRegister a, XmmRegister b); void ucomiss(Type Ty, XmmRegister a, const Address &b); - void movmskpd(GPRRegister dst, XmmRegister src); - void movmskps(GPRRegister dst, XmmRegister src); + void movmsk(Type Ty, GPRRegister dst, XmmRegister src); void sqrtss(Type Ty, XmmRegister dst, const Address &src); void sqrtss(Type Ty, XmmRegister dst, XmmRegister src);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h index b28d8e8..9c50ea9 100644 --- a/src/IceAssemblerX86BaseImpl.h +++ b/src/IceAssemblerX86BaseImpl.h
@@ -1916,21 +1916,25 @@ } template <typename TraitsType> -void AssemblerX86Base<TraitsType>::movmskpd(GPRRegister dst, XmmRegister src) { +void AssemblerX86Base<TraitsType>::movmsk(Type Ty, GPRRegister dst, + XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&Buffer); - emitUint8(0x66); + if (Ty == IceType_v16i8) { + emitUint8(0x66); + } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) { + // No operand size prefix + } else { + assert(false && "Unexpected movmsk operand type"); + } emitRexRB(RexTypeIrrelevant, dst, src); emitUint8(0x0F); - emitUint8(0x50); - emitXmmRegisterOperand(dst, src); -} - -template <typename TraitsType> -void AssemblerX86Base<TraitsType>::movmskps(GPRRegister dst, XmmRegister src) { - AssemblerBuffer::EnsureCapacity ensured(&Buffer); - emitRexRB(RexTypeIrrelevant, dst, src); - emitUint8(0x0F); - emitUint8(0x50); + if (Ty == IceType_v16i8) { + emitUint8(0xD7); + } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) { + emitUint8(0x50); + } else { + assert(false && "Unexpected movmsk operand type"); + } emitXmmRegisterOperand(dst, src); }
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h index 09025aa..bd10572 100644 --- a/src/IceInstX86Base.h +++ b/src/IceInstX86Base.h
@@ -117,6 +117,7 @@ Maxss, Mov, Movd, + Movmsk, Movp, Movq, MovssRegs, @@ -1250,6 +1251,27 @@ : InstX86BaseUnaryopXmm<InstX86Base::Movd>(Func, Dest, Src) {} }; + class InstX86Movmsk final : public InstX86Base { + InstX86Movmsk() = delete; + InstX86Movmsk(const InstX86Movmsk &) = delete; + InstX86Movmsk &operator=(const InstX86Movmsk &) = delete; + + public: + static InstX86Movmsk *create(Cfg *Func, Variable *Dest, Operand *Source) { + return new (Func->allocate<InstX86Movmsk>()) + InstX86Movmsk(Func, Dest, Source); + } + void emit(const Cfg *Func) const override; + void emitIAS(const Cfg *Func) const override; + void dump(const Cfg *Func) const override; + static bool classof(const Inst *Instr) { + return InstX86Base::isClassof(Instr, InstX86Base::InstX86Movmsk); + } + + private: + InstX86Movmsk(Cfg *Func, Variable *Dest, Operand *Source); + }; + class InstX86Sqrtss : public InstX86BaseUnaryopXmm<InstX86Base::Sqrtss> { public: static InstX86Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Src) { @@ -3005,6 +3027,7 @@ using Movsx = typename InstImpl<TraitsType>::InstX86Movsx; using Movzx = typename InstImpl<TraitsType>::InstX86Movzx; using Movd = typename InstImpl<TraitsType>::InstX86Movd; + using Movmsk = typename InstImpl<TraitsType>::InstX86Movmsk; using Sqrtss = typename InstImpl<TraitsType>::InstX86Sqrtss; using Mov = typename InstImpl<TraitsType>::InstX86Mov; using Movp = typename InstImpl<TraitsType>::InstX86Movp;
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h index 5c6c005..4075bf3 100644 --- a/src/IceInstX86BaseImpl.h +++ b/src/IceInstX86BaseImpl.h
@@ -181,6 +181,13 @@ } template <typename TraitsType> +InstImpl<TraitsType>::InstX86Movmsk::InstX86Movmsk(Cfg *Func, Variable *Dest, + Operand *Source) + : InstX86Base(Func, InstX86Base::Movmsk, 1, Dest) { + this->addSource(Source); +} + +template <typename TraitsType> InstImpl<TraitsType>::InstX86Cmov::InstX86Cmov(Cfg *Func, Variable *Dest, Operand *Source, BrCond Condition) @@ -1008,6 +1015,64 @@ } template <typename TraitsType> +void InstImpl<TraitsType>::InstX86Movmsk::dump(const Cfg *Func) const { + if (!BuildDefs::dump()) + return; + Ostream &Str = Func->getContext()->getStrDump(); + this->dumpDest(Func); + Str << " = movmsk." << this->getSrc(0)->getType() << " "; + this->dumpSources(Func); +} + +template <typename TraitsType> +void InstImpl<TraitsType>::InstX86Movmsk::emit(const Cfg *Func) const { + if (!BuildDefs::dump()) + return; + Ostream &Str = Func->getContext()->getStrEmit(); + assert(this->getSrcSize() == 1); + Type SrcTy = this->getSrc(0)->getType(); + assert(isVectorType(SrcTy)); + switch (SrcTy) { + case IceType_v16i8: + Str << "\t" + "pmovmskb" + "\t"; + break; + case IceType_v4i32: + case IceType_v4f32: + Str << "\t" + "movmskps" + "\t"; + break; + default: + llvm_unreachable("Unexpected operand type"); + } + this->getSrc(0)->emit(Func); + Str << ", "; + this->getDest()->emit(Func); +} + +template <typename TraitsType> +void InstImpl<TraitsType>::InstX86Movmsk::emitIAS(const Cfg *Func) const { + assert(this->getSrcSize() == 1); + Assembler *Asm = Func->getAssembler<Assembler>(); + const Variable *Dest = this->getDest(); + const Variable *Src = llvm::cast<Variable>(this->getSrc(0)); + const Type DestTy = Dest->getType(); + const Type SrcTy = Src->getType(); + assert(isVectorType(SrcTy)); + assert(isScalarIntegerType(DestTy)); + if (!Traits::Is64Bit) { + assert(typeWidthInBytes(DestTy) <= 4); + } else { + assert(DestTy == IceType_i32 || DestTy == IceType_i64); + } + XmmRegister SrcReg = Traits::getEncodedXmm(Src->getRegNum()); + GPRRegister DestReg = Traits::getEncodedGPR(Dest->getRegNum()); + Asm->movmsk(SrcTy, DestReg, SrcReg); +} + +template <typename TraitsType> void InstImpl<TraitsType>::InstX86Sqrtss::emit(const Cfg *Func) const { if (!BuildDefs::dump()) return;
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h index e61fb1d..b20a893 100644 --- a/src/IceIntrinsics.h +++ b/src/IceIntrinsics.h
@@ -65,7 +65,8 @@ LoadSubVector, StoreSubVector, VectorPackSigned, - VectorPackUnsigned + VectorPackUnsigned, + SignMask }; /// Operations that can be represented by the AtomicRMW intrinsic.
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h index 97576ee..fa0b9f1 100644 --- a/src/IceTargetLoweringX86Base.h +++ b/src/IceTargetLoweringX86Base.h
@@ -905,6 +905,10 @@ AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1); Context.insert<typename Traits::Insts::Shufps>(Dest, Src0, Src1); } + void _movmsk(Variable *Dest, Operand *Src0) { + AutoMemorySandboxer<> _(this, &Dest, &Src0); + Context.insert<typename Traits::Insts::Movmsk>(Dest, Src0); + } void _sqrtss(Variable *Dest, Operand *Src0) { AutoMemorySandboxer<> _(this, &Dest, &Src0); Context.insert<typename Traits::Insts::Sqrtss>(Dest, Src0);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h index a038c43..38dfba7 100644 --- a/src/IceTargetLoweringX86BaseImpl.h +++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4455,6 +4455,21 @@ _movp(Dest, T); return; } + case Intrinsics::SignMask: { + Operand *SrcReg = legalizeToReg(Instr->getArg(0)); + Variable *Dest = Instr->getDest(); + Variable *T = makeReg(IceType_i32); + if (SrcReg->getType() == IceType_v4f32 || + SrcReg->getType() == IceType_v4i32 || + SrcReg->getType() == IceType_v16i8) { + _movmsk(T, SrcReg); + } else { + // TODO(capn): We could implement v8i16 sign mask using packsswb/pmovmskb + llvm::report_fatal_error("Invalid type for SignMask intrinsic"); + } + _mov(Dest, T); + return; + } default: // UnknownIntrinsic Func->setError("Unexpected intrinsic"); return;