Optimize single-vector shuffling.

Change-Id: Id3d40a72cb74c75ef4431e6af8855e08bde2bb5c
Reviewed-on: https://chromium-review.googlesource.com/433329
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index c2bce04..841218c 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -6196,28 +6196,36 @@
           IDX_IN_SRC(Idx12, 0), IDX_IN_SRC(Idx13, 0), IDX_IN_SRC(Idx14, 0),
           IDX_IN_SRC(Idx15, 0)),
       NotRebased);
-  auto *Mask1M = X86OperandMem::create(
-      Func, MaskType, NoBase,
-      lowerShuffleVector_CreatePshufbMask(
-          IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1),
-          IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1),
-          IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1),
-          IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1),
-          IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1),
-          IDX_IN_SRC(Idx15, 1)),
-      NotRebased);
-#undef IDX_IN_SRC
+
   auto *T0 = makeReg(DestTy);
-  auto *T1 = makeReg(DestTy);
   auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
   _movp(T0, Src0RM);
-  auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
-  _movp(T1, Src1RM);
 
-  _pshufb(T1, Mask1M);
   _pshufb(T0, Mask0M);
-  _por(T1, T0);
-  _movp(Dest, T1);
+
+  if (Idx0 > 16 || Idx1 > 16 || Idx2 > 16 || Idx3 > 16 || Idx4 > 16 ||
+      Idx5 > 16 || Idx6 > 16 || Idx7 > 16 || Idx8 > 16 || Idx9 > 16 ||
+      Idx10 > 16 || Idx11 > 16 || Idx12 > 16 || Idx13 > 16 || Idx14 > 16 ||
+      Idx15 > 16) {
+    auto *Mask1M = X86OperandMem::create(
+        Func, MaskType, NoBase,
+        lowerShuffleVector_CreatePshufbMask(
+            IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1),
+            IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1),
+            IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1),
+            IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1),
+            IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1),
+            IDX_IN_SRC(Idx15, 1)),
+        NotRebased);
+#undef IDX_IN_SRC
+    auto *T1 = makeReg(DestTy);
+    auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+    _movp(T1, Src1RM);
+    _pshufb(T1, Mask1M);
+    _por(T0, T1);
+  }
+
+  _movp(Dest, T0);
 }
 
 template <typename TraitsType>