Implement vector sign mask intrinsic.
BUG=swiftshader:15
Change-Id: I7fec56518a5b4e07d2189ab01a0a223b038564c1
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 7ba22f2..937e997 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -465,8 +465,7 @@
void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
void ucomiss(Type Ty, XmmRegister a, const Address &b);
- void movmskpd(GPRRegister dst, XmmRegister src);
- void movmskps(GPRRegister dst, XmmRegister src);
+ void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
void sqrtss(Type Ty, XmmRegister dst, const Address &src);
void sqrtss(Type Ty, XmmRegister dst, XmmRegister src);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index b28d8e8..9c50ea9 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -1916,21 +1916,25 @@
}
template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movmskpd(GPRRegister dst, XmmRegister src) {
+void AssemblerX86Base<TraitsType>::movmsk(Type Ty, GPRRegister dst,
+ XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- emitUint8(0x66);
+ if (Ty == IceType_v16i8) {
+ emitUint8(0x66);
+ } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
+ // No operand size prefix
+ } else {
+ assert(false && "Unexpected movmsk operand type");
+ }
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
- emitUint8(0x50);
- emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movmskps(GPRRegister dst, XmmRegister src) {
- AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- emitRexRB(RexTypeIrrelevant, dst, src);
- emitUint8(0x0F);
- emitUint8(0x50);
+ if (Ty == IceType_v16i8) {
+ emitUint8(0xD7);
+ } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
+ emitUint8(0x50);
+ } else {
+ assert(false && "Unexpected movmsk operand type");
+ }
emitXmmRegisterOperand(dst, src);
}
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 09025aa..bd10572 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -117,6 +117,7 @@
Maxss,
Mov,
Movd,
+ Movmsk,
Movp,
Movq,
MovssRegs,
@@ -1250,6 +1251,27 @@
: InstX86BaseUnaryopXmm<InstX86Base::Movd>(Func, Dest, Src) {}
};
+ class InstX86Movmsk final : public InstX86Base {
+ InstX86Movmsk() = delete;
+ InstX86Movmsk(const InstX86Movmsk &) = delete;
+ InstX86Movmsk &operator=(const InstX86Movmsk &) = delete;
+
+ public:
+ static InstX86Movmsk *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ return new (Func->allocate<InstX86Movmsk>())
+ InstX86Movmsk(Func, Dest, Source);
+ }
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+ void dump(const Cfg *Func) const override;
+ static bool classof(const Inst *Instr) {
+ return InstX86Base::isClassof(Instr, InstX86Base::InstX86Movmsk);
+ }
+
+ private:
+ InstX86Movmsk(Cfg *Func, Variable *Dest, Operand *Source);
+ };
+
class InstX86Sqrtss : public InstX86BaseUnaryopXmm<InstX86Base::Sqrtss> {
public:
static InstX86Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Src) {
@@ -3005,6 +3027,7 @@
using Movsx = typename InstImpl<TraitsType>::InstX86Movsx;
using Movzx = typename InstImpl<TraitsType>::InstX86Movzx;
using Movd = typename InstImpl<TraitsType>::InstX86Movd;
+ using Movmsk = typename InstImpl<TraitsType>::InstX86Movmsk;
using Sqrtss = typename InstImpl<TraitsType>::InstX86Sqrtss;
using Mov = typename InstImpl<TraitsType>::InstX86Mov;
using Movp = typename InstImpl<TraitsType>::InstX86Movp;
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 5c6c005..4075bf3 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -181,6 +181,13 @@
}
template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Movmsk::InstX86Movmsk(Cfg *Func, Variable *Dest,
+ Operand *Source)
+ : InstX86Base(Func, InstX86Base::Movmsk, 1, Dest) {
+ this->addSource(Source);
+}
+
+template <typename TraitsType>
InstImpl<TraitsType>::InstX86Cmov::InstX86Cmov(Cfg *Func, Variable *Dest,
Operand *Source,
BrCond Condition)
@@ -1008,6 +1015,64 @@
}
template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movmsk::dump(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ this->dumpDest(Func);
+ Str << " = movmsk." << this->getSrc(0)->getType() << " ";
+ this->dumpSources(Func);
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movmsk::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(this->getSrcSize() == 1);
+ Type SrcTy = this->getSrc(0)->getType();
+ assert(isVectorType(SrcTy));
+ switch (SrcTy) {
+ case IceType_v16i8:
+ Str << "\t"
+ "pmovmskb"
+ "\t";
+ break;
+ case IceType_v4i32:
+ case IceType_v4f32:
+ Str << "\t"
+ "movmskps"
+ "\t";
+ break;
+ default:
+ llvm_unreachable("Unexpected operand type");
+ }
+ this->getSrc(0)->emit(Func);
+ Str << ", ";
+ this->getDest()->emit(Func);
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movmsk::emitIAS(const Cfg *Func) const {
+ assert(this->getSrcSize() == 1);
+ Assembler *Asm = Func->getAssembler<Assembler>();
+ const Variable *Dest = this->getDest();
+ const Variable *Src = llvm::cast<Variable>(this->getSrc(0));
+ const Type DestTy = Dest->getType();
+ const Type SrcTy = Src->getType();
+ assert(isVectorType(SrcTy));
+ assert(isScalarIntegerType(DestTy));
+ if (!Traits::Is64Bit) {
+ assert(typeWidthInBytes(DestTy) <= 4);
+ } else {
+ assert(DestTy == IceType_i32 || DestTy == IceType_i64);
+ }
+ XmmRegister SrcReg = Traits::getEncodedXmm(Src->getRegNum());
+ GPRRegister DestReg = Traits::getEncodedGPR(Dest->getRegNum());
+ Asm->movmsk(SrcTy, DestReg, SrcReg);
+}
+
+template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Sqrtss::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index e61fb1d..b20a893 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -65,7 +65,8 @@
LoadSubVector,
StoreSubVector,
VectorPackSigned,
- VectorPackUnsigned
+ VectorPackUnsigned,
+ SignMask
};
/// Operations that can be represented by the AtomicRMW intrinsic.
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 97576ee..fa0b9f1 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -905,6 +905,10 @@
AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
Context.insert<typename Traits::Insts::Shufps>(Dest, Src0, Src1);
}
+ void _movmsk(Variable *Dest, Operand *Src0) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Movmsk>(Dest, Src0);
+ }
void _sqrtss(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Sqrtss>(Dest, Src0);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index a038c43..38dfba7 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4455,6 +4455,21 @@
_movp(Dest, T);
return;
}
+ case Intrinsics::SignMask: {
+ Operand *SrcReg = legalizeToReg(Instr->getArg(0));
+ Variable *Dest = Instr->getDest();
+ Variable *T = makeReg(IceType_i32);
+ if (SrcReg->getType() == IceType_v4f32 ||
+ SrcReg->getType() == IceType_v4i32 ||
+ SrcReg->getType() == IceType_v16i8) {
+ _movmsk(T, SrcReg);
+ } else {
+ // TODO(capn): We could implement v8i16 sign mask using packsswb/pmovmskb
+ llvm::report_fatal_error("Invalid type for SignMask intrinsic");
+ }
+ _mov(Dest, T);
+ return;
+ }
default: // UnknownIntrinsic
Func->setError("Unexpected intrinsic");
return;