Implement vector packing intrinsics.

BUG=swiftshader:15

Change-Id: Id95a08f82c47ec20bb958358c01f389b6fb5565b
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index adf9105..b28d8e8 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -1667,15 +1667,15 @@
 }
 
 template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
+void AssemblerX86Base<TraitsType>::packss(Type DestTy, XmmRegister Dst,
                                           XmmRegister Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, Dst, Src);
   emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+  if (DestTy == IceType_v8i16) {
     emitUint8(0x6B);
-  } else if (Ty == IceType_v8i16) {
+  } else if (DestTy == IceType_v16i8) {
     emitUint8(0x63);
   } else {
     assert(false && "Unexpected vector pack operand type");
@@ -1684,16 +1684,16 @@
 }
 
 template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
+void AssemblerX86Base<TraitsType>::packss(Type DestTy, XmmRegister Dst,
                                           const Address &Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, Src, Dst);
   emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+  if (DestTy == IceType_v8i16) {
     emitUint8(0x6B);
-  } else if (Ty == IceType_v8i16) {
+  } else if (DestTy == IceType_v16i8) {
     emitUint8(0x63);
   } else {
     assert(false && "Unexpected vector pack operand type");
@@ -1702,16 +1702,16 @@
 }
 
 template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
+void AssemblerX86Base<TraitsType>::packus(Type DestTy, XmmRegister Dst,
                                           XmmRegister Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, Dst, Src);
   emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+  if (DestTy == IceType_v8i16) {
     emitUint8(0x38);
     emitUint8(0x2B);
-  } else if (Ty == IceType_v8i16) {
+  } else if (DestTy == IceType_v16i8) {
     emitUint8(0x67);
   } else {
     assert(false && "Unexpected vector pack operand type");
@@ -1720,17 +1720,17 @@
 }
 
 template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
+void AssemblerX86Base<TraitsType>::packus(Type DestTy, XmmRegister Dst,
                                           const Address &Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, Src, Dst);
   emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+  if (DestTy == IceType_v8i16) {
     emitUint8(0x38);
     emitUint8(0x2B);
-  } else if (Ty == IceType_v8i16) {
+  } else if (DestTy == IceType_v16i8) {
     emitUint8(0x67);
   } else {
     assert(false && "Unexpected vector pack operand type");
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 3acfbf4..e61fb1d 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -63,7 +63,9 @@
     Trap,
     // The intrinsics below are not part of the PNaCl specification.
     LoadSubVector,
-    StoreSubVector
+    StoreSubVector,
+    VectorPackSigned,
+    VectorPackUnsigned
   };
 
   /// Operations that can be represented by the AtomicRMW intrinsic.
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index e9fee53..e7d3100 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4424,6 +4424,30 @@
     }
     return;
   }
+  case Intrinsics::VectorPackSigned: {
+    Operand *Src0 = Instr->getArg(0);
+    Operand *Src1 = Instr->getArg(1);
+    Variable *Dest = Instr->getDest();
+    auto *T = makeReg(Dest->getType());
+    auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T, Src0RM);
+    _packss(T, Src1RM);
+    _movp(Dest, T);
+    return;
+  }
+  case Intrinsics::VectorPackUnsigned: {
+    Operand *Src0 = Instr->getArg(0);
+    Operand *Src1 = Instr->getArg(1);
+    Variable *Dest = Instr->getDest();
+    auto *T = makeReg(Dest->getType());
+    auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T, Src0RM);
+    _packus(T, Src1RM);
+    _movp(Dest, T);
+    return;
+  }
   default: // UnknownIntrinsic
     Func->setError("Unexpected intrinsic");
     return;