Subzero. X86. Lowers shufflevector using xmm instructions.
BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4136
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1909013002 .
diff --git a/crosstest/crosstest.cfg b/crosstest/crosstest.cfg
index afc75b4..a7d30b6 100644
--- a/crosstest/crosstest.cfg
+++ b/crosstest/crosstest.cfg
@@ -56,4 +56,4 @@
[test_vector_ops]
driver: test_vector_ops_main.cpp
-test: test_vector_ops.ll
+test: test_vector_ops.cpp test_vector_ops_ll.ll
diff --git a/crosstest/test_vector_ops.cpp b/crosstest/test_vector_ops.cpp
new file mode 100644
index 0000000..3d55bc2
--- /dev/null
+++ b/crosstest/test_vector_ops.cpp
@@ -0,0 +1,753 @@
+//===- subzero/crosstest/test_vector_ops.cpp - Vector tests -----*- C++ -*-===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the vector shuffle routines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "test_vector_ops.h"
+
+#include <algorithm>
+#include <type_traits>
+
+namespace {
+// SHUFFLETESTS_TABLE declares several shufflevector test cases. Each test case
+// has 16 indexes because 16 is the max number of elements in a vector type in
+// PNaCl bitcode. For vector types with fewer than 16 elements, the additional
+// indexes are ignored. This strategy allows a single test table definition.
+#define SHUFFLETESTS_TABLE \
+ /* Indexes... */ \
+ /* Simple tests splatting elements. */ \
+ X(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) \
+ X(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \
+ X(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2) \
+ X(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3) \
+ X(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4) \
+ X(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5) \
+ X(6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6) \
+ X(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7) \
+ X(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8) \
+ X(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9) \
+ X(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10) \
+ X(11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11) \
+ X(12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12) \
+ X(13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13) \
+ X(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14) \
+ X(15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15) \
+ X(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16) \
+ X(17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17) \
+ X(18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18) \
+ X(19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19) \
+ X(20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20) \
+ X(21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21) \
+ X(22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22) \
+ X(23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23) \
+ X(24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24) \
+ X(25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25) \
+ X(26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26) \
+ X(27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27) \
+ X(28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28) \
+ X(29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29) \
+ X(30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30) \
+ X(31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31) \
+ /* Rotating vectors. */ \
+ X(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) \
+ X(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) \
+ X(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18) \
+ X(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) \
+ X(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20) \
+ X(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21) \
+ X(7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22) \
+ X(8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23) \
+ X(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24) \
+ X(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25) \
+ X(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26) \
+ X(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) \
+ X(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28) \
+ X(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29) \
+ X(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30) \
+ X(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) \
+ X(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0) \
+ X(18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1) \
+ X(19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2) \
+ X(20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3) \
+ X(21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4) \
+ X(22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5) \
+ X(23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6) \
+ X(24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7) \
+ X(25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8) \
+ X(26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) \
+ X(27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) \
+ X(28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) \
+ X(29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) \
+ X(30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13) \
+ X(31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) \
+ /* Swapping elements. */ \
+ X(1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(17, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(18, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(19, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(21, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(25, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(26, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(27, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(28, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(29, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(30, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(31, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 4, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 5, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 6, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 7, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 10, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 11, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 12, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 13, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 15, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 17, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 18, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 19, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 20, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 21, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 22, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 23, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 24, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 25, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 26, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 27, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 28, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 29, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 30, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 31, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 4, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 5, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 6, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 7, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 8, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 9, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 10, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 11, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 12, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 13, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 14, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 15, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 16, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 17, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 18, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 19, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 20, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 21, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 22, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 23, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 24, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 25, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 26, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 27, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 28, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 29, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 30, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 31, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 5, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 6, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 9, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 10, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 11, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 12, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 13, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 14, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 15, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 16, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 17, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 18, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 20, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 21, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 22, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 23, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 24, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 25, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 26, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 27, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 28, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 29, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 30, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 31, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 6, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 7, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 8, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 9, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 10, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 11, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 12, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 13, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 14, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 15, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 16, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 17, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 18, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 19, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 21, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 22, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 22, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 23, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 24, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 25, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 26, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 27, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 28, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 29, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 30, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 31, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 6, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 7, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 8, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 9, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 11, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 12, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 13, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 14, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 15, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 17, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 18, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 19, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 20, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 21, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 22, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 23, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 24, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 25, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 26, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 27, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 28, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 29, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 30, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 31, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 3, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 8, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 9, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 11, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 12, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 13, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 14, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 15, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 17, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 18, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 19, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 20, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 21, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 22, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 23, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 24, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 25, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 26, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 27, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 28, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 29, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 30, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 31, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 0, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 2, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 3, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 4, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 5, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 6, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 8, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 9, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 10, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 11, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 12, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 13, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 14, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 15, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 16, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 18, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 19, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 20, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 21, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 22, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 23, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 24, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 25, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 26, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 27, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 28, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 29, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 30, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 31, 0, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 1, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 2, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 3, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 4, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 5, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 6, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 7, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 9, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 10, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 11, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 12, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 13, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 14, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 15, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 16, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 17, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 18, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 19, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 20, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 21, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 22, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 23, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 24, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 25, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 26, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 27, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 28, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 29, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 30, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 2, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 3, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 4, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 5, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 6, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 7, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 13, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 15, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 17, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 18, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 19, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 20, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 21, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 22, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 23, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 24, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 25, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 26, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 27, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 28, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 29, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 30, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 31, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 6, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 7, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 17, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 19, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 20, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 21, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 22, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 23, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 24, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 25, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 26, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 27, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 28, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 29, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 30, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 3, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 4, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 5, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 6, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 7, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 8, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 18, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 21, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 22, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 23, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 24, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 25, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 26, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 27, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 28, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 29, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 31, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 2, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 3, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 4, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 5, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 6, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 8, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 9, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 17, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 18, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 19, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 21, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 22, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 23, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 24, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 25, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 26, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 27, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 29, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 31, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 2, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 3, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 4, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 5, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 6, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 7, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 11, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 17, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 19, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 21, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 22, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 23, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 24, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 25, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 26, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 27, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 28, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 29, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 30, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 31, 14, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 3, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 4, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 5, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 6, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 7, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 8, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 9, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 10, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 11, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 12, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 13, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 18, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 19, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 20, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 21, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 22, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 23, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 24, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 25, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 26, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 27, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 28, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 29, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 30, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 31, 15) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 2) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 3) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 4) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 5) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 6) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 7) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 8) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 9) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 10) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 11) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 12) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 18) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 19) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 20) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 21) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 22) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 23) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 24) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 25) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 26) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 27) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 28) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 29) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 30) \
+ X(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31) \
+ /* Testing the optimized shufflevectors for x86. */ \
+ /* (Src0, Src0, Src0, Src0) */ \
+ X(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0) \
+ X(2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3) \
+ /* (Src0, Src0, Src0, Src1) */ \
+ X(0, 1, 3, 7, 0, 1, 3, 7, 0, 1, 3, 7, 0, 1, 3, 7) \
+ X(2, 2, 2, 5, 2, 2, 2, 5, 2, 2, 2, 5, 2, 2, 2, 5) \
+ /* (Src0, Src0, Src1, Src0) */ \
+ X(1, 2, 6, 0, 1, 2, 6, 0, 1, 2, 6, 0, 1, 2, 6, 0) \
+ X(3, 2, 3, 5, 3, 2, 3, 5, 3, 2, 3, 5, 3, 2, 3, 5) \
+ /* (Src0, Src0, Src1, Src1) */ \
+ X(2, 3, 5, 7, 2, 3, 5, 7, 2, 3, 5, 7, 2, 3, 5, 7) \
+ X(3, 3, 7, 6, 3, 3, 7, 6, 3, 3, 7, 6, 3, 3, 7, 6) \
+ /* (Src0, Src1, Src0, Src0) */ \
+ X(3, 7, 0, 0, 3, 7, 0, 0, 3, 7, 0, 0, 3, 7, 0, 0) \
+ X(1, 7, 3, 3, 1, 7, 3, 3, 1, 7, 3, 3, 1, 7, 3, 3) \
+ /* (Src0, Src1, Src0, Src1) */ \
+ X(0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 1, 5) \
+ X(0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7) \
+ X(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4) \
+ X(1, 7, 3, 6, 1, 7, 3, 6, 1, 7, 3, 6, 1, 7, 3, 6) \
+ X(0, 6, 3, 7, 0, 6, 3, 7, 0, 6, 3, 7, 0, 6, 3, 7) \
+ /* (Src0, Src1, Src1, Src0) */ \
+ X(0, 7, 7, 0, 0, 7, 7, 0, 0, 7, 7, 0, 0, 7, 7, 0) \
+ X(3, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3) \
+ X(1, 5, 3, 6, 1, 5, 3, 6, 1, 5, 3, 6, 1, 5, 3, 6) \
+ X(0, 6, 2, 6, 0, 6, 2, 6, 0, 6, 2, 6, 0, 6, 2, 6) \
+ /* (Src0, Src1, Src1, Src1) */ \
+ X(0, 7, 7, 7, 0, 7, 7, 7, 0, 7, 7, 7, 0, 7, 7, 7) \
+ X(3, 6, 7, 4, 3, 6, 7, 4, 3, 6, 7, 4, 3, 6, 7, 4) \
+ /* (Src1, Src0, Src0, Src0) */ \
+ X(4, 3, 3, 0, 4, 3, 3, 0, 4, 3, 3, 0, 4, 3, 3, 0) \
+ X(6, 0, 0, 3, 6, 0, 0, 3, 6, 0, 0, 3, 6, 0, 0, 3) \
+ /* (Src1, Src0, Src0, Src1) */ \
+ X(4, 3, 2, 6, 4, 3, 2, 6, 4, 3, 2, 6, 4, 3, 2, 6) \
+ X(5, 2, 1, 4, 5, 2, 1, 4, 5, 2, 1, 4, 5, 2, 1, 4) \
+ X(6, 0, 0, 4, 6, 0, 0, 4, 6, 0, 0, 4, 6, 0, 0, 4) \
+ X(5, 2, 2, 6, 5, 2, 2, 6, 5, 2, 2, 6, 5, 2, 2, 6) \
+ /* (Src1, Src0, Src1, Src0) */ \
+ X(4, 0, 5, 1, 4, 0, 5, 1, 4, 0, 5, 1, 4, 0, 5, 1) \
+ X(7, 2, 7, 2, 7, 2, 7, 2, 7, 2, 7, 2, 7, 2, 7, 2) \
+ X(4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3) \
+ X(7, 1, 5, 0, 7, 1, 5, 0, 7, 1, 5, 0, 7, 1, 5, 0) \
+ X(4, 3, 6, 2, 4, 3, 6, 2, 4, 3, 6, 2, 4, 3, 6, 2) \
+ /* (Src1, Src0, Src1, Src1) */ \
+ X(6, 0, 3, 2, 6, 0, 3, 2, 6, 0, 3, 2, 6, 0, 3, 2) \
+ X(4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7) \
+ /* (Src1, Src1, Src0, Src0) */ \
+ X(6, 5, 2, 3, 6, 5, 2, 3, 6, 5, 2, 3, 6, 5, 2, 3) \
+ X(7, 7, 0, 1, 7, 7, 0, 1, 7, 7, 0, 1, 7, 7, 0, 1) \
+ /* (Src1, Src1, Src0, Src1) */ \
+ X(7, 6, 0, 5, 7, 6, 0, 5, 7, 6, 0, 5, 7, 6, 0, 5) \
+ X(4, 5, 3, 7, 4, 5, 3, 7, 4, 5, 3, 7, 4, 5, 3, 7) \
+ /* (Src1, Src1, Src1, Src0) */ \
+ X(6, 6, 4, 0, 6, 6, 4, 0, 6, 6, 4, 0, 6, 6, 4, 0) \
+ X(7, 4, 6, 1, 7, 4, 6, 1, 7, 4, 6, 1, 7, 4, 6, 1) \
+ /* (Src1, Src1, Src1, Src1) */ \
+ X(7, 6, 4, 4, 7, 6, 4, 4, 7, 6, 4, 4, 7, 6, 4, 4) \
+ X(5, 7, 7, 6, 5, 7, 7, 6, 5, 7, 7, 6, 5, 7, 7, 6)
+/* End of x86-optimized shufflevectors. */
+//#define X(...)
+
+// ShuffleVectorTest declares the template functions that are used to shuffle
+// the test vectors. It has specific template methods depending on how many
+// elements VecTy has.
+template <typename VecTy> class ShuffleVectorTest {
+ //----------------------------------------------------------------------------
+ //
+ // V4??? Shuffles.
+ //
+ //----------------------------------------------------------------------------
+ template <typename Ty, uint8_t Idx0, uint8_t Idx1, uint8_t Idx2, uint8_t Idx3,
+ uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
+ uint8_t, uint8_t, uint8_t, uint8_t, uint8_t>
+ static typename std::enable_if<Vectors<Ty>::NumElements == 4, Ty>::type
+ shufflevector(Ty V1, Ty V2) {
+ const uint8_t NumElements = 4;
+ return __builtin_shufflevector(
+ V1, V2, Idx0 % (NumElements * 2), Idx1 % (NumElements * 2),
+ Idx2 % (NumElements * 2), Idx3 % (NumElements * 2));
+ }
+
+ //----------------------------------------------------------------------------
+ //
+ // V8??? Shuffles.
+ //
+ //----------------------------------------------------------------------------
+ template <typename Ty, uint8_t Idx0, uint8_t Idx1, uint8_t Idx2, uint8_t Idx3,
+ uint8_t Idx4, uint8_t Idx5, uint8_t Idx6, uint8_t Idx7, uint8_t,
+ uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t>
+ static typename std::enable_if<Vectors<Ty>::NumElements == 8, Ty>::type
+ shufflevector(Ty V1, Ty V2) {
+ const uint8_t NumElements = 8;
+ return __builtin_shufflevector(
+ V1, V2, Idx0 % (NumElements * 2), Idx1 % (NumElements * 2),
+ Idx2 % (NumElements * 2), Idx3 % (NumElements * 2),
+ Idx4 % (NumElements * 2), Idx5 % (NumElements * 2),
+ Idx6 % (NumElements * 2), Idx7 % (NumElements * 2));
+ }
+
+ //----------------------------------------------------------------------------
+ //
+ // V16??? Shuffles.
+ //
+ //----------------------------------------------------------------------------
+ template <typename Ty, uint8_t Idx0, uint8_t Idx1, uint8_t Idx2, uint8_t Idx3,
+ uint8_t Idx4, uint8_t Idx5, uint8_t Idx6, uint8_t Idx7,
+ uint8_t Idx8, uint8_t Idx9, uint8_t Idx10, uint8_t Idx11,
+ uint8_t Idx12, uint8_t Idx13, uint8_t Idx14, uint8_t Idx15>
+ static typename std::enable_if<Vectors<Ty>::NumElements == 16, Ty>::type
+ shufflevector(Ty V1, Ty V2) {
+ const uint8_t NumElements = 16;
+ return __builtin_shufflevector(
+ V1, V2, Idx0, Idx1 % (NumElements * 2), Idx2 % (NumElements * 2),
+ Idx3 % (NumElements * 2), Idx4 % (NumElements * 2),
+ Idx5 % (NumElements * 2), Idx6 % (NumElements * 2),
+ Idx7 % (NumElements * 2), Idx8 % (NumElements * 2),
+ Idx9 % (NumElements * 2), Idx10 % (NumElements * 2),
+ Idx11 % (NumElements * 2), Idx12 % (NumElements * 2),
+ Idx13 % (NumElements * 2), Idx14 % (NumElements * 2),
+ Idx15 % (NumElements * 2));
+ }
+
+public:
+ typedef VecTy (*TestFn)(VecTy V1, VecTy V2);
+ static TestFn Tests[];
+ static const uint32_t NumTests;
+};
+
+template <typename Ty>
+typename ShuffleVectorTest<Ty>::TestFn ShuffleVectorTest<Ty>::Tests[] = {
+#define X(...) &ShuffleVectorTest<Ty>::shufflevector<Ty, __VA_ARGS__>,
+ SHUFFLETESTS_TABLE
+#undef X
+};
+
+template <typename Ty>
+const uint32_t ShuffleVectorTest<Ty>::NumTests = 0
+#define X(...) +1
+ SHUFFLETESTS_TABLE
+#undef X
+ ;
+} // end of anonymous namespace
+
+extern "C" {
+#define X(Ty, ElmtTy, CastTy) \
+ TY(Ty) shufflevector_##Ty(TY(Ty) V1, TY(Ty) V2, uint32_t Which) { \
+ return (*ShuffleVectorTest<TY(Ty)>::Tests[Which])(V1, V2); \
+ } \
+ uint32_t shufflevector_count_##Ty() { \
+ return ShuffleVectorTest<TY(Ty)>::NumTests; \
+ }
+VECTOR_TYPE_TABLE
+#undef X
+
+#define X(I1Ty, Ty, numelements) \
+ TY(I1Ty) shufflevector_##I1Ty(TY(I1Ty) V1, TY(I1Ty) V2, uint32_t Which) { \
+ return (*ShuffleVectorTest<TY(I1Ty)>::Tests[Which])(V1, V2); \
+ } \
+ uint32_t shufflevector_count_##I1Ty() { \
+ return ShuffleVectorTest<TY(I1Ty)>::NumTests; \
+ }
+I1_VECTOR_TYPE_TABLE
+#undef X
+} // end of extern "C"
diff --git a/crosstest/test_vector_ops.h b/crosstest/test_vector_ops.h
index 32903a9..e9e8737 100644
--- a/crosstest/test_vector_ops.h
+++ b/crosstest/test_vector_ops.h
@@ -18,8 +18,8 @@
#include "vectors.h"
// The VectorOps<> class acts like Vectors<> but also has insertelement,
-// Subzero_insertelement, extractelement, and Subzero_extractelement
-// fields.
+// Subzero_insertelement, extractelement, Subzero_extractelement,
+// shufflevector, Subzero_shufflevector, and shufflevector_count fields.
template <typename T> struct VectorOps;
#define FIELD(TYNAME, FIELDNAME) VectorOps<TYNAME>::FIELDNAME
@@ -28,15 +28,21 @@
#define DECLARE_VECTOR_OPS(NAME) \
template <> struct VectorOps<NAME> : public Vectors<NAME> { \
static Ty (*insertelement)(Ty, CastTy, int32_t); \
+ static Ty (*shufflevector)(Ty, Ty, uint32_t); \
static CastTy (*extractelement)(Ty, int32_t); \
static Ty (*Subzero_insertelement)(Ty, CastTy, int32_t); \
+ static Ty (*Subzero_shufflevector)(Ty, Ty, uint32_t); \
static CastTy (*Subzero_extractelement)(Ty, int32_t); \
+ static uint32_t (*shufflevector_count)(); \
}; \
extern "C" { \
TY(NAME) insertelement_##NAME(TY(NAME), CASTTY(NAME), int32_t); \
TY(NAME) Subzero_insertelement_##NAME(TY(NAME), CASTTY(NAME), int32_t); \
CASTTY(NAME) extractelement_##NAME(TY(NAME), int32_t); \
CASTTY(NAME) Subzero_extractelement_##NAME(TY(NAME), int32_t); \
+ TY(NAME) shufflevector_##NAME(TY(NAME), TY(NAME), uint32_t); \
+ TY(NAME) Subzero_shufflevector_##NAME(TY(NAME), TY(NAME), uint32_t); \
+ uint32_t shufflevector_count_##NAME(); \
} \
TY(NAME) (*FIELD(NAME, insertelement))(TY(NAME), CASTTY(NAME), int32_t) = \
&insertelement_##NAME; \
@@ -45,7 +51,12 @@
CASTTY(NAME) (*FIELD(NAME, extractelement))(TY(NAME), int32_t) = \
&extractelement_##NAME; \
CASTTY(NAME) (*FIELD(NAME, Subzero_extractelement))(TY(NAME), int32_t) = \
- &Subzero_extractelement_##NAME;
+ &Subzero_extractelement_##NAME; \
+ TY(NAME) (*FIELD(NAME, shufflevector))(TY(NAME), TY(NAME), uint32_t) = \
+ &shufflevector_##NAME; \
+ TY(NAME) (*FIELD(NAME, Subzero_shufflevector))( \
+ TY(NAME), TY(NAME), uint32_t) = &Subzero_shufflevector_##NAME; \
+ uint32_t (*FIELD(NAME, shufflevector_count))() = &shufflevector_count_##NAME;
#define X(ty, eltty, castty) DECLARE_VECTOR_OPS(ty)
VECTOR_TYPE_TABLE
diff --git a/crosstest/test_vector_ops.ll b/crosstest/test_vector_ops_ll.ll
similarity index 100%
rename from crosstest/test_vector_ops.ll
rename to crosstest/test_vector_ops_ll.ll
diff --git a/crosstest/test_vector_ops_main.cpp b/crosstest/test_vector_ops_main.cpp
index 1232799..9f27417 100644
--- a/crosstest/test_vector_ops_main.cpp
+++ b/crosstest/test_vector_ops_main.cpp
@@ -130,6 +130,41 @@
free(TestVectors);
}
+template <typename T>
+void testShuffleVector(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+ typedef typename VectorOps<T>::Ty Ty;
+ typedef typename VectorOps<T>::ElementTy ElementTy;
+
+ size_t NumTestVectors;
+ Ty *TestVectors = getTestVectors<T>(NumTestVectors);
+
+ for (size_t VI = 0; VI < NumTestVectors; ++VI) {
+ Ty Vect0 = TestVectors[VI];
+ for (size_t VJ = 0; VJ < NumTestVectors; ++VJ) {
+ Ty Vect1 = TestVectors[VJ];
+ for (uint32_t Which = 0; Which < VectorOps<T>::shufflevector_count();
+ ++Which) {
+ Ty ResultLlc = VectorOps<T>::shufflevector(Vect0, Vect1, Which);
+ Ty ResultSz = VectorOps<T>::Subzero_shufflevector(Vect0, Vect1, Which);
+ ++TotalTests;
+ if (!memcmp(&ResultLlc, &ResultSz, sizeof(ResultLlc))) {
+ ++Passes;
+ } else {
+ ++Failures;
+ std::cout << "shufflevector<" << VectorOps<T>::TypeName << ">(Vect0=";
+ std::cout << vectAsString<T>(Vect0)
+ << ", Vect1=" << vectAsString<T>(Vect1) << ", Which=" << VJ
+ << ")\n";
+ std::cout << "llc=" << vectAsString<T>(ResultLlc) << "\n";
+ std::cout << "sz =" << vectAsString<T>(ResultSz) << "\n";
+ }
+ }
+ }
+ }
+
+ free(TestVectors);
+}
+
int main(int argc, char *argv[]) {
size_t TotalTests = 0;
size_t Passes = 0;
@@ -157,6 +192,17 @@
testExtractElement<v4ui32>(TotalTests, Passes, Failures);
testExtractElement<v4f32>(TotalTests, Passes, Failures);
+ testShuffleVector<v4i1>(TotalTests, Passes, Failures);
+ testShuffleVector<v8i1>(TotalTests, Passes, Failures);
+ testShuffleVector<v16i1>(TotalTests, Passes, Failures);
+ testShuffleVector<v16si8>(TotalTests, Passes, Failures);
+ testShuffleVector<v16ui8>(TotalTests, Passes, Failures);
+ testShuffleVector<v8si16>(TotalTests, Passes, Failures);
+ testShuffleVector<v8ui16>(TotalTests, Passes, Failures);
+ testShuffleVector<v4si32>(TotalTests, Passes, Failures);
+ testShuffleVector<v4ui32>(TotalTests, Passes, Failures);
+ testShuffleVector<v4f32>(TotalTests, Passes, Failures);
+
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 265085d..bd56dbc 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -432,6 +432,8 @@
void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void pshufd(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask);
+ void punpckldq(Type, XmmRegister Dst, XmmRegister Src);
+ void punpckldq(Type, XmmRegister Dst, const Address &Src);
void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
void shufps(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mask);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 1bf1550..a986515 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -1565,6 +1565,29 @@
}
template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::punpckldq(Type, XmmRegister Dst,
+ XmmRegister Src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitRexRB(RexTypeIrrelevant, Dst, Src);
+ emitUint8(0x0F);
+ emitUint8(0x62);
+ emitXmmRegisterOperand(Dst, Src);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::punpckldq(Type, XmmRegister Dst,
+ const Address &Src) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitAddrSizeOverridePrefix();
+ emitRex(RexTypeIrrelevant, Src, Dst);
+ emitUint8(0x0F);
+ emitUint8(0x62);
+ emitOperand(gprEncoding(Dst), Src);
+}
+
+template <typename TraitsType>
void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
XmmRegister src,
const Immediate &imm) {
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 006d781..a0ff546 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -50,8 +50,8 @@
const TargetX8632Traits::TypeAttributesType
TargetX8632Traits::TypeAttributes[] = {
-#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
- { cvt, sdss, pdps, spsd, pack, width, fld } \
+#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
+ { cvt, sdss, pdps, spsd, pack, unpack, width, fld } \
,
ICETYPEX8632_TABLE
#undef X
diff --git a/src/IceInstX8632.def b/src/IceInstX8632.def
index 0ed4b80..173164f 100644
--- a/src/IceInstX8632.def
+++ b/src/IceInstX8632.def
@@ -212,22 +212,22 @@
//#define X(val, emit)
#define ICETYPEX8632_TABLE \
- /* tag, element type, cvt , sdss, pdps, spsd, pack, width, fld */ \
- X(void, void, "?", "", "", "", "", "", "") \
- X(i1, void, "si", "", "", "", "", "b", "") \
- X(i8, void, "si", "", "", "", "", "b", "") \
- X(i16, void, "si", "", "", "", "", "w", "") \
- X(i32, void, "si", "", "", "", "", "l", "") \
- X(i64, void, "si", "", "", "", "", "q", "") \
- X(f32, void, "ss", "ss", "ps", "ss", "d", "", "s") \
- X(f64, void, "sd", "sd", "pd", "sd", "q", "", "l") \
- X(v4i1, i32, "?", "", "", "", "d", "", "") \
- X(v8i1, i16, "?", "", "", "", "w", "", "") \
- X(v16i1, i8, "?", "", "", "", "b", "", "") \
- X(v16i8, i8, "?", "", "", "", "b", "", "") \
- X(v8i16, i16, "?", "", "", "", "w", "", "") \
- X(v4i32, i32, "dq", "", "", "", "d", "", "") \
- X(v4f32, f32, "ps", "", "ps", "ps", "d", "", "")
-//#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld)
+ /* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \
+ X(void, void, "?", "", "", "", "", "", "", "") \
+ X(i1, void, "si", "", "", "", "", "", "b", "") \
+ X(i8, void, "si", "", "", "", "", "", "b", "") \
+ X(i16, void, "si", "", "", "", "", "", "w", "") \
+ X(i32, void, "si", "", "", "", "", "", "l", "") \
+ X(i64, void, "si", "", "", "", "", "", "q", "") \
+ X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \
+ X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \
+ X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \
+ X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \
+ X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \
+ X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \
+ X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \
+ X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \
+ X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "")
+//#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8632_DEF
diff --git a/src/IceInstX8664.cpp b/src/IceInstX8664.cpp
index 0915bc8..afb4580 100644
--- a/src/IceInstX8664.cpp
+++ b/src/IceInstX8664.cpp
@@ -51,8 +51,8 @@
const TargetX8664Traits::TypeAttributesType
TargetX8664Traits::TypeAttributes[] = {
-#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
- { cvt, sdss, pdps, spsd, pack, width, fld } \
+#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
+ { cvt, sdss, pdps, spsd, pack, unpack, width, fld } \
,
ICETYPEX8664_TABLE
#undef X
diff --git a/src/IceInstX8664.def b/src/IceInstX8664.def
index 8ed221d..ad686c7 100644
--- a/src/IceInstX8664.def
+++ b/src/IceInstX8664.def
@@ -293,22 +293,22 @@
//#define X(val, emit)
#define ICETYPEX8664_TABLE \
- /* tag, element type, cvt , sdss, pdps, spsd, pack, width, fld */ \
- X(void, void, "?", "", "", "", "", "", "") \
- X(i1, void, "si", "", "", "", "", "b", "") \
- X(i8, void, "si", "", "", "", "", "b", "") \
- X(i16, void, "si", "", "", "", "", "w", "") \
- X(i32, void, "si", "", "", "", "", "l", "") \
- X(i64, void, "si", "", "", "", "", "q", "") \
- X(f32, void, "ss", "ss", "ps", "ss", "d", "", "s") \
- X(f64, void, "sd", "sd", "pd", "sd", "q", "", "l") \
- X(v4i1, i32, "?", "", "", "", "d", "", "") \
- X(v8i1, i16, "?", "", "", "", "w", "", "") \
- X(v16i1, i8, "?", "", "", "", "b", "", "") \
- X(v16i8, i8, "?", "", "", "", "b", "", "") \
- X(v8i16, i16, "?", "", "", "", "w", "", "") \
- X(v4i32, i32, "dq", "", "", "", "d", "", "") \
- X(v4f32, f32, "ps", "", "ps", "ps", "d", "", "")
-//#define X(tag, elementty, cvt, sdss, pdps, pack, width, fld)
+ /* tag, element type, cvt , sdss, pdps, spsd, pack, unpack, width, fld */ \
+ X(void, void, "?", "", "", "", "", "", "", "") \
+ X(i1, void, "si", "", "", "", "", "", "b", "") \
+ X(i8, void, "si", "", "", "", "", "", "b", "") \
+ X(i16, void, "si", "", "", "", "", "", "w", "") \
+ X(i32, void, "si", "", "", "", "", "", "l", "") \
+ X(i64, void, "si", "", "", "", "", "", "q", "") \
+ X(f32, void, "ss", "ss", "ps", "ss", "d", "", "", "s") \
+ X(f64, void, "sd", "sd", "pd", "sd", "q", "", "", "l") \
+ X(v4i1, i32, "?", "", "", "", "d", "dq", "", "") \
+ X(v8i1, i16, "?", "", "", "", "w", "wd", "", "") \
+ X(v16i1, i8, "?", "", "", "", "b", "bw", "", "") \
+ X(v16i8, i8, "?", "", "", "", "b", "bw", "", "") \
+ X(v8i16, i16, "?", "", "", "", "w", "wd", "", "") \
+ X(v4i32, i32, "dq", "", "", "", "d", "dq", "", "") \
+ X(v4f32, f32, "ps", "", "ps", "ps", "d", "dq", "", "")
+//#define X(tag, elementty, cvt, sdss, pdps, pack, unpack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8664_DEF
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 1c2e8e6..c29538a 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -143,6 +143,7 @@
Pop,
Por,
Pshufd,
+ Punpckl,
Psll,
Psra,
Psrl,
@@ -183,7 +184,7 @@
IacaEnd
};
- enum SseSuffix { None, Packed, Scalar, Integral };
+ enum SseSuffix { None, Packed, Unpack, Scalar, Integral };
static const char *getWidthString(Type Ty);
static const char *getFldString(Type Ty);
@@ -841,6 +842,9 @@
case InstX86Base::SseSuffix::Packed:
SuffixString = Traits::TypeAttributes[DestTy].PdPsString;
break;
+ case InstX86Base::SseSuffix::Unpack:
+ SuffixString = Traits::TypeAttributes[DestTy].UnpackString;
+ break;
case InstX86Base::SseSuffix::Scalar:
SuffixString = Traits::TypeAttributes[DestTy].SdSsString;
break;
@@ -2839,6 +2843,23 @@
private:
InstX86IacaEnd(Cfg *Func);
};
+
+ class InstX86Punpckl
+ : public InstX86BaseBinopXmm<InstX86Base::Punpckl, false,
+ InstX86Base::SseSuffix::Unpack> {
+ public:
+ static InstX86Punpckl *create(Cfg *Func, Variable *Dest, Operand *Source) {
+ return new (Func->allocate<InstX86Punpckl>())
+ InstX86Punpckl(Func, Dest, Source);
+ }
+
+ private:
+ InstX86Punpckl(Cfg *Func, Variable *Dest, Operand *Source)
+ : InstX86BaseBinopXmm<InstX86Base::Punpckl, false,
+ InstX86Base::SseSuffix::Unpack>(Func, Dest,
+ Source) {}
+ };
+
}; // struct InstImpl
/// struct Insts is a template that can be used to instantiate all the X86
@@ -2960,6 +2981,8 @@
using IacaStart = typename InstImpl<TraitsType>::InstX86IacaStart;
using IacaEnd = typename InstImpl<TraitsType>::InstX86IacaEnd;
+
+ using Punpckl = typename InstImpl<TraitsType>::InstX86Punpckl;
};
/// X86 Instructions have static data (particularly, opcodes and instruction
@@ -3189,6 +3212,9 @@
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Pshufd::Base::Opcode = "pshufd"; \
+ template <> \
+ template <> \
+ const char *InstImpl<TraitsType>::InstX86Punpckl::Base::Opcode = "punpckl"; \
/* Inplace GPR ops */ \
template <> \
template <> \
@@ -3550,6 +3576,12 @@
&InstImpl<TraitsType>::Assembler::psrl, \
&InstImpl<TraitsType>::Assembler::psrl, \
&InstImpl<TraitsType>::Assembler::psrl}; \
+ template <> \
+ template <> \
+ const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
+ InstImpl<TraitsType>::InstX86Punpckl::Base::Emitter = { \
+ &InstImpl<TraitsType>::Assembler::punpckldq, \
+ &InstImpl<TraitsType>::Assembler::punpckldq}; \
} \
}
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index f69d19a..a612268 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -101,7 +101,7 @@
const TargetX8632Traits::TableTypeX8632AttributesType
TargetX8632Traits::TableTypeX8632Attributes[] = {
-#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
+#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ IceType_##elementty } \
,
ICETYPEX8632_TABLE
@@ -459,7 +459,8 @@
namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
-#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) _tmp_##tag,
+#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
+ _tmp_##tag,
ICETYPEX8632_TABLE
#undef X
_num
@@ -471,7 +472,7 @@
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
-#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
+#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 554b916..2716a34 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -976,13 +976,14 @@
} InstCmppsAttributes[];
static const struct TypeAttributesType {
- const char *CvtString; // i (integer), s (single FP), d (double FP)
- const char *SdSsString; // ss, sd, or <blank>
- const char *PdPsString; // ps, pd, or <blank>
- const char *SpsdString; // ss, sd, ps, pd, or <blank>
- const char *PackString; // b, w, d, or <blank>
- const char *WidthString; // b, w, l, q, or <blank>
- const char *FldString; // s, l, or <blank>
+ const char *CvtString; // i (integer), s (single FP), d (double FP)
+ const char *SdSsString; // ss, sd, or <blank>
+ const char *PdPsString; // ps, pd, or <blank>
+ const char *SpsdString; // ss, sd, ps, pd, or <blank>
+ const char *PackString; // b, w, d, or <blank>
+ const char *UnpackString; // bw, wd, dq, or <blank>
+ const char *WidthString; // b, w, l, q, or <blank>
+ const char *FldString; // s, l, or <blank>
} TypeAttributes[];
static const char *InstSegmentRegNames[];
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 33606d4..73ad386 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -94,7 +94,7 @@
const TargetX8664Traits::TableTypeX8664AttributesType
TargetX8664Traits::TableTypeX8664Attributes[] = {
-#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
+#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
{ IceType_##elementty } \
,
ICETYPEX8664_TABLE
@@ -787,7 +787,8 @@
namespace dummy3 {
// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
-#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) _tmp_##tag,
+#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
+ _tmp_##tag,
ICETYPEX8664_TABLE
#undef X
_num
@@ -799,7 +800,7 @@
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
-#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld) \
+#define X(tag, elementty, cvt, sdss, pdps, spsd, pack, unpack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h
index 0fc4800..4e9173a 100644
--- a/src/IceTargetLoweringX8664Traits.h
+++ b/src/IceTargetLoweringX8664Traits.h
@@ -1021,13 +1021,14 @@
} InstCmppsAttributes[];
static const struct TypeAttributesType {
- const char *CvtString; // i (integer), s (single FP), d (double FP)
- const char *SdSsString; // ss, sd, or <blank>
- const char *PdPsString; // ps, pd, or <blank>
- const char *SpSdString; // ss, sd, ps, pd, or <blank>
- const char *PackString; // b, w, d, or <blank>
- const char *WidthString; // b, w, l, q, or <blank>
- const char *FldString; // s, l, or <blank>
+ const char *CvtString; // i (integer), s (single FP), d (double FP)
+ const char *SdSsString; // ss, sd, or <blank>
+ const char *PdPsString; // ps, pd, or <blank>
+ const char *SpSdString; // ss, sd, ps, pd, or <blank>
+ const char *PackString; // b, w, d, or <blank>
+ const char *UnpackString; // bw, wd, dq, or <blank>
+ const char *WidthString; // b, w, l, q, or <blank>
+ const char *FldString; // s, l, or <blank>
} TypeAttributes[];
};
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index f84c6df..71b824f 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -801,6 +801,10 @@
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Por>(Dest, Src0);
}
+ void _punpckl(Variable *Dest, Operand *Src0) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Punpckl>(Dest, Src0);
+ }
void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
Context.insert<typename Traits::Insts::Pshufd>(Dest, Src0, Src1);
@@ -1082,6 +1086,23 @@
BoolFolding<Traits> FoldingInfo;
+ /// Helpers for lowering ShuffleVector
+ /// @{
+ Variable *lowerShuffleVector_AllFromSameSrc(Variable *Src, SizeT Index0,
+ SizeT Index1, SizeT Index2,
+ SizeT Index3);
+ static constexpr SizeT IGNORE_INDEX = 0x80000000u;
+ Variable *lowerShuffleVector_TwoFromSameSrc(Variable *Src0, SizeT Index0,
+ SizeT Index1, Variable *Src1,
+ SizeT Index2, SizeT Index3);
+ static constexpr SizeT UNIFIED_INDEX_0 = 0;
+ static constexpr SizeT UNIFIED_INDEX_1 = 2;
+ Variable *lowerShuffleVector_UnifyFromDifferentSrcs(Variable *Src0,
+ SizeT Index0,
+ Variable *Src1,
+ SizeT Index1);
+ /// @}
+
static FixupKind PcRelFixup;
static FixupKind AbsFixup;
};
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index d96c0ca..35d7ea0 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -5610,25 +5610,295 @@
keepEspLiveAtExit();
}
+inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2,
+ SizeT Index3) {
+ const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) |
+ ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6);
+ assert(Mask < 256);
+ return Mask;
+}
+
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc(
+ Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) {
+ constexpr SizeT SrcBit = 1 << 2;
+ assert((Index0 & SrcBit) == (Index1 & SrcBit));
+ assert((Index0 & SrcBit) == (Index2 & SrcBit));
+ assert((Index0 & SrcBit) == (Index3 & SrcBit));
+ (void)SrcBit;
+
+ const Type SrcTy = Src->getType();
+ auto *T = makeReg(SrcTy);
+ auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
+ auto *Mask =
+ Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
+ _pshufd(T, SrcRM, Mask);
+ return T;
+}
+
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc(
+ Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2,
+ SizeT Index3) {
+ constexpr SizeT SrcBit = 1 << 2;
+ assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX));
+ assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX));
+ (void)SrcBit;
+
+ const Type SrcTy = Src0->getType();
+ assert(Src1->getType() == SrcTy);
+ auto *T = makeReg(SrcTy);
+ auto *Src0R = legalizeToReg(Src0);
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ auto *Mask =
+ Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
+ _movp(T, Src0R);
+ _shufps(T, Src1RM, Mask);
+ return T;
+}
+
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs(
+ Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) {
+ return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1,
+ Index1, IGNORE_INDEX);
+}
+
+inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2,
+ SizeT Index3) {
+ constexpr SizeT SrcBit = 1 << 2;
+ const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0);
+ const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1);
+ const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2);
+ const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3);
+ return Index0Bits | Index1Bits | Index2Bits | Index3Bits;
+}
+
template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerShuffleVector(
const InstShuffleVector *Instr) {
auto *Dest = Instr->getDest();
const Type DestTy = Dest->getType();
+ auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
+ auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
+ const SizeT NumElements = typeNumElements(DestTy);
auto *T = makeReg(DestTy);
switch (DestTy) {
default:
break;
- // TODO(jpp): figure out how to properly lower this without scalarization.
+ // TODO(jpp): figure out how to properly lower the remaining cases without
+ // scalarization.
+ case IceType_v4i1:
+ case IceType_v4i32:
+ case IceType_v4f32: {
+ static constexpr SizeT ExpectedNumElements = 4;
+ assert(ExpectedNumElements == Instr->getNumIndexes());
+ const SizeT Index0 = Instr->getIndex(0)->getValue();
+ const SizeT Index1 = Instr->getIndex(1)->getValue();
+ const SizeT Index2 = Instr->getIndex(2)->getValue();
+ const SizeT Index3 = Instr->getIndex(3)->getValue();
+ Variable *T = nullptr;
+ switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) {
+#define CASE_SRCS_IN(S0, S1, S2, S3) \
+ case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3))
+ CASE_SRCS_IN(0, 0, 0, 0) : {
+ T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2,
+ Index3);
+ }
+ break;
+ CASE_SRCS_IN(0, 0, 0, 1) : {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
+ Src1, Index3);
+ T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1);
+ }
+ break;
+ CASE_SRCS_IN(0, 0, 1, 0) : {
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
+ Src0, Index3);
+ T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1);
+ }
+ break;
+ CASE_SRCS_IN(0, 0, 1, 1) : {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1,
+ Index2, Index3);
+ }
+ break;
+ CASE_SRCS_IN(0, 1, 0, 0) : {
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
+ Src1, Index1);
+ T = lowerShuffleVector_TwoFromSameSrc(
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
+ }
+ break;
+ CASE_SRCS_IN(0, 1, 0, 1) : {
+ if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 &&
+ (Index3 - ExpectedNumElements) == 1) {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+ auto *Src0R = legalizeToReg(Src0);
+ T = makeReg(DestTy);
+ _movp(T, Src0R);
+ _punpckl(T, Src1RM);
+ } else if (Index0 == Index2 && Index1 == Index3) {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src0, Index0, Src1, Index1);
+ T = lowerShuffleVector_AllFromSameSrc(
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
+ UNIFIED_INDEX_1);
+ } else {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src0, Index0, Src1, Index1);
+ auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src0, Index2, Src1, Index3);
+ T = lowerShuffleVector_TwoFromSameSrc(
+ Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1);
+ }
+ }
+ break;
+ CASE_SRCS_IN(0, 1, 1, 0) : {
+ if (Index0 == Index3 && Index1 == Index2) {
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src0, Index0, Src1, Index1);
+ T = lowerShuffleVector_AllFromSameSrc(
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
+ UNIFIED_INDEX_0);
+ } else {
+ auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src0, Index0, Src1, Index1);
+ auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src1, Index2, Src0, Index3);
+ T = lowerShuffleVector_TwoFromSameSrc(
+ Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1);
+ }
+ }
+ break;
+ CASE_SRCS_IN(0, 1, 1, 1) : {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
+ Src1, Index1);
+ T = lowerShuffleVector_TwoFromSameSrc(
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
+ }
+ break;
+ CASE_SRCS_IN(1, 0, 0, 0) : {
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
+ Src0, Index1);
+ T = lowerShuffleVector_TwoFromSameSrc(
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
+ }
+ break;
+ CASE_SRCS_IN(1, 0, 0, 1) : {
+ if (Index0 == Index3 && Index1 == Index2) {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src1, Index0, Src0, Index1);
+ T = lowerShuffleVector_AllFromSameSrc(
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
+ UNIFIED_INDEX_0);
+ } else {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src1, Index0, Src0, Index1);
+ auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src0, Index2, Src1, Index3);
+ T = lowerShuffleVector_TwoFromSameSrc(
+ Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1);
+ }
+ }
+ break;
+ CASE_SRCS_IN(1, 0, 1, 0) : {
+ if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 &&
+ (Index2 - ExpectedNumElements) == 1 && Index3 == 1) {
+ auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ auto *Src0R = legalizeToReg(Src1);
+ T = makeReg(DestTy);
+ _movp(T, Src0R);
+ _punpckl(T, Src1RM);
+ } else if (Index0 == Index2 && Index1 == Index3) {
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src1, Index0, Src0, Index1);
+ T = lowerShuffleVector_AllFromSameSrc(
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
+ UNIFIED_INDEX_1);
+ } else {
+ auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src1, Index0, Src0, Index1);
+ auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
+ Src1, Index2, Src0, Index3);
+ T = lowerShuffleVector_TwoFromSameSrc(
+ Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1);
+ }
+ }
+ break;
+ CASE_SRCS_IN(1, 0, 1, 1) : {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
+ Src0, Index1);
+ T = lowerShuffleVector_TwoFromSameSrc(
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
+ }
+ break;
+ CASE_SRCS_IN(1, 1, 0, 0) : {
+ T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0,
+ Index2, Index3);
+ }
+ break;
+ CASE_SRCS_IN(1, 1, 0, 1) : {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
+ Src1, Index3);
+ T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1);
+ }
+ break;
+ CASE_SRCS_IN(1, 1, 1, 0) : {
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
+ Src0, Index3);
+ T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1);
+ }
+ break;
+ CASE_SRCS_IN(1, 1, 1, 1) : {
+ assert(false && "Following code is untested but likely correct; test "
+ "and remove assert.");
+ T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2,
+ Index3);
+ }
+ break;
+#undef CASE_SRCS_IN
+ }
+
+ assert(T != nullptr);
+ assert(T->getType() == DestTy);
+ _movp(Dest, T);
+ return;
+ } break;
}
// Unoptimized shuffle. Perform a series of inserts and extracts.
Context.insert<InstFakeDef>(T);
- auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
- auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
- const SizeT NumElements = typeNumElements(DestTy);
const Type ElementType = typeElementType(DestTy);
for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
auto *Index = Instr->getIndex(I);
diff --git a/tests_lit/llvm2ice_tests/vector-shuffle.ll b/tests_lit/llvm2ice_tests/vector-shuffle.ll
new file mode 100644
index 0000000..62fbc9d
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/vector-shuffle.ll
@@ -0,0 +1,53 @@
+; Some shufflevector optimized lowering. This list is by no means exhaustive. It
+; is only a **basic** smoke test. the vector_ops crosstest has a broader range
+; of test cases.
+
+; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble -a -O2 \
+; RUN: --allow-externally-defined-symbols | FileCheck %s --check-prefix=X86
+
+declare void @useV4I32(<4 x i32> %t);
+
+define internal void @shuffleV4I32(<4 x i32> %a, <4 x i32> %b) {
+; X86-LABEL: shuffleV4I32
+ %a_0 = extractelement <4 x i32> %a, i32 0
+ %a_1 = extractelement <4 x i32> %a, i32 1
+ %a_2 = extractelement <4 x i32> %a, i32 2
+ %a_3 = extractelement <4 x i32> %a, i32 3
+
+ %b_0 = extractelement <4 x i32> %b, i32 0
+ %b_1 = extractelement <4 x i32> %b, i32 1
+ %b_2 = extractelement <4 x i32> %b, i32 2
+ %b_3 = extractelement <4 x i32> %b, i32 3
+
+ %t0_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
+ %t0_1 = insertelement <4 x i32> %t0_0, i32 %b_0, i32 1
+ %t0_2 = insertelement <4 x i32> %t0_1, i32 %a_1, i32 2
+ %t0 = insertelement <4 x i32> %t0_2, i32 %b_1, i32 3
+; X86: punpckldq {{.*}}
+
+ call void @useV4I32(<4 x i32> %t0)
+; X86: call
+
+ %t1_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
+ %t1_1 = insertelement <4 x i32> %t1_0, i32 %b_1, i32 1
+ %t1_2 = insertelement <4 x i32> %t1_1, i32 %b_1, i32 2
+ %t1 = insertelement <4 x i32> %t1_2, i32 %a_0, i32 3
+; X86: shufps [[T:xmm[0-9]+]],{{.*}},0x10
+; X86: pshufd {{.*}},[[T]],0x28
+
+ call void @useV4I32(<4 x i32> %t1)
+; X86: call
+
+ %t2_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0
+ %t2_1 = insertelement <4 x i32> %t2_0, i32 %b_3, i32 1
+ %t2_2 = insertelement <4 x i32> %t2_1, i32 %a_2, i32 2
+ %t2 = insertelement <4 x i32> %t2_2, i32 %b_2, i32 3
+; X86: shufps {{.*}},0x30
+; X86: shufps {{.*}},0x22
+; X86: shufps {{.*}},0x88
+
+ call void @useV4I32(<4 x i32> %t2)
+; X86: call
+
+ ret void
+}
diff --git a/unittest/AssemblerX8632/XmmArith.cpp b/unittest/AssemblerX8632/XmmArith.cpp
index cf05b79..d19fcb8 100644
--- a/unittest/AssemblerX8632/XmmArith.cpp
+++ b/unittest/AssemblerX8632/XmmArith.cpp
@@ -995,27 +995,6 @@
reset(); \
} while (0)
-#define TestImplSingleXmmXmmUntyped(Dst, Src, Inst) \
- do { \
- static constexpr char TestString[] = \
- "(" #Dst ", " #Src ", " #Inst ", Untyped)"; \
- const uint32_t T0 = allocateDqword(); \
- const uint32_t T1 = allocateDqword(); \
- \
- __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
- __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
- __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src, \
- Immediate(Inst##Imm)); \
- \
- AssembledTest test = assemble(); \
- test.setDqwordTo(T0, V0); \
- test.setDqwordTo(T1, V1); \
- test.run(); \
- \
- ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString; \
- reset(); \
- } while (0)
-
#define TestImpl(Dst, Src) \
do { \
TestImplSingleXmmXmm(Dst, Src, pshufd); \
@@ -1034,11 +1013,77 @@
TestImpl(xmm7, xmm0);
#undef TestImpl
-#undef TestImplSingleXmmXmmUntyped
#undef TestImplSingleXmmAddr
#undef TestImplSingleXmmXmm
}
+TEST_F(AssemblerX8632Test, Punpckldq) {
+ const Dqword V0(uint64_t(0x1111111122222222ull),
+ uint64_t(0x5555555577777777ull));
+ const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
+ uint64_t(0xCCCCCCCCDDDDDDDDull));
+
+ const Dqword Expected(uint64_t(0xBBBBBBBB22222222ull),
+ uint64_t(0xAAAAAAAA11111111ull));
+
+#define TestImplXmmXmm(Dst, Src, Inst) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
+ __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
+ __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
+ XmmRegister::Encoded_Reg_##Src); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplXmmAddr(Dst, Inst) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
+ __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplXmmXmm(Dst, Src, punpckldq); \
+ TestImplXmmAddr(Dst, punpckldq); \
+ } while (0)
+
+ TestImpl(xmm0, xmm1);
+ TestImpl(xmm1, xmm2);
+ TestImpl(xmm2, xmm3);
+ TestImpl(xmm3, xmm4);
+ TestImpl(xmm4, xmm5);
+ TestImpl(xmm5, xmm6);
+ TestImpl(xmm6, xmm7);
+ TestImpl(xmm7, xmm0);
+
+#undef TestImpl
+#undef TestImplXmmAddr
+#undef TestImplXmmXmm
+}
+
TEST_F(AssemblerX8632Test, Cvt) {
const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
diff --git a/unittest/AssemblerX8664/XmmArith.cpp b/unittest/AssemblerX8664/XmmArith.cpp
index 6a0d9f5..c037520 100644
--- a/unittest/AssemblerX8664/XmmArith.cpp
+++ b/unittest/AssemblerX8664/XmmArith.cpp
@@ -1083,6 +1083,81 @@
#undef TestImplSingleXmmXmm
}
+TEST_F(AssemblerX8664Test, Punpckldq) {
+ const Dqword V0(uint64_t(0x1111111122222222ull),
+ uint64_t(0x5555555577777777ull));
+ const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
+ uint64_t(0xCCCCCCCCDDDDDDDDull));
+
+ const Dqword Expected(uint64_t(0xBBBBBBBB22222222ull),
+ uint64_t(0xAAAAAAAA11111111ull));
+
+#define TestImplXmmXmm(Dst, Src, Inst) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
+ __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
+ __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
+ XmmRegister::Encoded_Reg_##Src); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplXmmAddr(Dst, Inst) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
+ __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplXmmXmm(Dst, Src, punpckldq); \
+ TestImplXmmAddr(Dst, punpckldq); \
+ } while (0)
+
+ TestImpl(xmm0, xmm1);
+ TestImpl(xmm1, xmm2);
+ TestImpl(xmm2, xmm3);
+ TestImpl(xmm3, xmm4);
+ TestImpl(xmm4, xmm5);
+ TestImpl(xmm5, xmm6);
+ TestImpl(xmm6, xmm7);
+ TestImpl(xmm7, xmm8);
+ TestImpl(xmm8, xmm9);
+ TestImpl(xmm9, xmm10);
+ TestImpl(xmm10, xmm11);
+ TestImpl(xmm11, xmm12);
+ TestImpl(xmm12, xmm13);
+ TestImpl(xmm13, xmm14);
+ TestImpl(xmm14, xmm15);
+ TestImpl(xmm15, xmm0);
+
+#undef TestImpl
+#undef TestImplXmmAddr
+#undef TestImplXmmXmm
+}
+
TEST_F(AssemblerX8664Test, Cvt) {
const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
const Dqword dq2ps32SrcValue(-5, 3, 100, 200);