Implement vector packing intrinsics.
BUG=swiftshader:15
Change-Id: Id95a08f82c47ec20bb958358c01f389b6fb5565b
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index adf9105..b28d8e8 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -1667,15 +1667,15 @@
}
template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
+void AssemblerX86Base<TraitsType>::packss(Type DestTy, XmmRegister Dst,
XmmRegister Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, Dst, Src);
emitUint8(0x0F);
- if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+ if (DestTy == IceType_v8i16) {
emitUint8(0x6B);
- } else if (Ty == IceType_v8i16) {
+ } else if (DestTy == IceType_v16i8) {
emitUint8(0x63);
} else {
assert(false && "Unexpected vector pack operand type");
@@ -1684,16 +1684,16 @@
}
template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
+void AssemblerX86Base<TraitsType>::packss(Type DestTy, XmmRegister Dst,
const Address &Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, Src, Dst);
emitUint8(0x0F);
- if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+ if (DestTy == IceType_v8i16) {
emitUint8(0x6B);
- } else if (Ty == IceType_v8i16) {
+ } else if (DestTy == IceType_v16i8) {
emitUint8(0x63);
} else {
assert(false && "Unexpected vector pack operand type");
@@ -1702,16 +1702,16 @@
}
template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
+void AssemblerX86Base<TraitsType>::packus(Type DestTy, XmmRegister Dst,
XmmRegister Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, Dst, Src);
emitUint8(0x0F);
- if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+ if (DestTy == IceType_v8i16) {
emitUint8(0x38);
emitUint8(0x2B);
- } else if (Ty == IceType_v8i16) {
+ } else if (DestTy == IceType_v16i8) {
emitUint8(0x67);
} else {
assert(false && "Unexpected vector pack operand type");
@@ -1720,17 +1720,17 @@
}
template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
+void AssemblerX86Base<TraitsType>::packus(Type DestTy, XmmRegister Dst,
const Address &Src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, Src, Dst);
emitUint8(0x0F);
- if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+ if (DestTy == IceType_v8i16) {
emitUint8(0x38);
emitUint8(0x2B);
- } else if (Ty == IceType_v8i16) {
+ } else if (DestTy == IceType_v16i8) {
emitUint8(0x67);
} else {
assert(false && "Unexpected vector pack operand type");
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 3acfbf4..e61fb1d 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -63,7 +63,9 @@
Trap,
// The intrinsics below are not part of the PNaCl specification.
LoadSubVector,
- StoreSubVector
+ StoreSubVector,
+ VectorPackSigned,
+ VectorPackUnsigned
};
/// Operations that can be represented by the AtomicRMW intrinsic.
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index e9fee53..e7d3100 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4424,6 +4424,30 @@
}
return;
}
+ case Intrinsics::VectorPackSigned: {
+ Operand *Src0 = Instr->getArg(0);
+ Operand *Src1 = Instr->getArg(1);
+ Variable *Dest = Instr->getDest();
+ auto *T = makeReg(Dest->getType());
+ auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ _movp(T, Src0RM);
+ _packss(T, Src1RM);
+ _movp(Dest, T);
+ return;
+ }
+ case Intrinsics::VectorPackUnsigned: {
+ Operand *Src0 = Instr->getArg(0);
+ Operand *Src1 = Instr->getArg(1);
+ Variable *Dest = Instr->getDest();
+ auto *T = makeReg(Dest->getType());
+ auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ _movp(T, Src0RM);
+ _packus(T, Src1RM);
+ _movp(Dest, T);
+ return;
+ }
default: // UnknownIntrinsic
Func->setError("Unexpected intrinsic");
return;