Implement byte swizzle operations
Add Swizzle() intrinsics for Byte16, Byte8, and Byte4, and add Byte4
constructors and assignment operators. Also move LLVM-specific
implementations to the generic Reactor.cpp source file.
On x86 these all translate to a pshufb instruction, which is very
efficient.
Bug: b/148295813
Change-Id: Icf88fe1621623f8104c4a642d560643a01b9ef55
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40549
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/docs/Reactor.md b/docs/Reactor.md
index df016d7..7824a14 100644
--- a/docs/Reactor.md
+++ b/docs/Reactor.md
@@ -141,6 +141,8 @@
Note that this is a bitwise cast. Unlike C++'s ```reinterpret_cast<>```, it does not allow casting between different sized types. Think of it as storing the value in memory and then loading from that same address into the casted type.
+An important exception is that 16-, 8-, and 4-byte vectors can be cast to other vectors of one of these sizes. Casting to a longer vector leaves the upper contents undefined.
+
### Pointers
Pointers also use a template class:
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 47144df..2600112 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -2906,35 +2906,6 @@
#endif
}
-RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
-{
- RR_DEBUG_INFO_UPDATE_LOC();
- int pshufb[16] = {
- select0 + 0,
- select0 + 1,
- select1 + 0,
- select1 + 1,
- select2 + 0,
- select2 + 1,
- select3 + 0,
- select3 + 1,
- select4 + 0,
- select4 + 1,
- select5 + 0,
- select5 + 1,
- select6 + 0,
- select6 + 1,
- select7 + 0,
- select7 + 1,
- };
-
- Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
- Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
- Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
-
- return RValue<UShort8>(short8);
-}
-
RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
{
RR_DEBUG_INFO_UPDATE_LOC();
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index b3ce607..8801f2a 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -1250,12 +1250,62 @@
storeValue(Nucleus::createBitCast(cast.value, getType()));
}
+Byte4::Byte4(RValue<UShort4> cast)
+{
+ // TODO(b/148379603): Optimize narrowing swizzle.
+ *this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246));
+}
+
+Byte4::Byte4(RValue<Short4> cast)
+{
+ // TODO(b/148379603): Optimize narrowing swizzle.
+ *this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246));
+}
+
+Byte4::Byte4(RValue<UInt4> cast)
+{
+ // TODO(b/148379603): Optimize narrowing swizzle.
+ *this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C));
+}
+
+Byte4::Byte4(RValue<Int4> cast)
+{
+ // TODO(b/148379603): Optimize narrowing swizzle.
+ *this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C));
+}
+
+Byte4::Byte4(RValue<Byte4> rhs)
+{
+ storeValue(rhs.value);
+}
+
+Byte4::Byte4(const Byte4 &rhs)
+{
+ Value *value = rhs.loadValue();
+ storeValue(value);
+}
+
Byte4::Byte4(const Reference<Byte4> &rhs)
{
Value *value = rhs.loadValue();
storeValue(value);
}
+RValue<Byte4> Byte4::operator=(RValue<Byte4> rhs)
+{
+ storeValue(rhs.value);
+
+ return rhs;
+}
+
+RValue<Byte4> Byte4::operator=(const Byte4 &rhs)
+{
+ Value *value = rhs.loadValue();
+ storeValue(value);
+
+ return RValue<Byte4>(value);
+}
+
Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
{
int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
@@ -1417,8 +1467,35 @@
return RValue<Byte8>(Nucleus::createNot(val.value));
}
+RValue<Byte8> Swizzle(RValue<Byte8> x, uint32_t select)
+{
+ // Real type is v16i8
+ // TODO(b/148379603): Optimize narrowing swizzle.
+ int shuffle[16] = {
+ static_cast<int>((select >> 28) & 0x07),
+ static_cast<int>((select >> 24) & 0x07),
+ static_cast<int>((select >> 20) & 0x07),
+ static_cast<int>((select >> 16) & 0x07),
+ static_cast<int>((select >> 12) & 0x07),
+ static_cast<int>((select >> 8) & 0x07),
+ static_cast<int>((select >> 4) & 0x07),
+ static_cast<int>((select >> 0) & 0x07),
+ static_cast<int>((select >> 28) & 0x07),
+ static_cast<int>((select >> 24) & 0x07),
+ static_cast<int>((select >> 20) & 0x07),
+ static_cast<int>((select >> 16) & 0x07),
+ static_cast<int>((select >> 12) & 0x07),
+ static_cast<int>((select >> 8) & 0x07),
+ static_cast<int>((select >> 4) & 0x07),
+ static_cast<int>((select >> 0) & 0x07),
+ };
+
+ return As<Byte8>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
+}
+
RValue<Short4> Unpack(RValue<Byte4> x)
{
+ // TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 }; // Real type is v16i8
return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
@@ -1430,12 +1507,14 @@
RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
{
+ // TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
{
+ // TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323));
@@ -1606,12 +1685,14 @@
RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
{
+ // TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
{
+ // TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8
auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323));
@@ -1657,6 +1738,30 @@
return RValue<Byte16>(value);
}
+RValue<Byte16> Swizzle(RValue<Byte16> x, uint64_t select)
+{
+ int shuffle[16] = {
+ static_cast<int>((select >> 60) & 0x0F),
+ static_cast<int>((select >> 56) & 0x0F),
+ static_cast<int>((select >> 52) & 0x0F),
+ static_cast<int>((select >> 48) & 0x0F),
+ static_cast<int>((select >> 44) & 0x0F),
+ static_cast<int>((select >> 40) & 0x0F),
+ static_cast<int>((select >> 36) & 0x0F),
+ static_cast<int>((select >> 32) & 0x0F),
+ static_cast<int>((select >> 28) & 0x0F),
+ static_cast<int>((select >> 24) & 0x0F),
+ static_cast<int>((select >> 20) & 0x0F),
+ static_cast<int>((select >> 16) & 0x0F),
+ static_cast<int>((select >> 12) & 0x0F),
+ static_cast<int>((select >> 8) & 0x0F),
+ static_cast<int>((select >> 4) & 0x0F),
+ static_cast<int>((select >> 0) & 0x0F),
+ };
+
+ return As<Byte16>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
+}
+
Short2::Short2(RValue<Short4> cast)
{
storeValue(Nucleus::createBitCast(cast.value, getType()));
@@ -1890,6 +1995,7 @@
RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
{
+ // TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 }; // Real type is v8i16
auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Int2>(Swizzle(As<Int4>(lowHigh), 0x2323));
@@ -1898,6 +2004,7 @@
RValue<Short4> Swizzle(RValue<Short4> x, uint16_t select)
{
// Real type is v8i16
+ // TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[8] = {
(select >> 12) & 0x03,
(select >> 8) & 0x03,
@@ -2214,6 +2321,22 @@
return RValue<UShort8>(Nucleus::createNot(val.value));
}
+RValue<UShort8> Swizzle(RValue<UShort8> x, uint32_t select)
+{
+ int swizzle[16] = {
+ static_cast<int>((select >> 28) & 0x07),
+ static_cast<int>((select >> 24) & 0x07),
+ static_cast<int>((select >> 20) & 0x07),
+ static_cast<int>((select >> 16) & 0x07),
+ static_cast<int>((select >> 12) & 0x07),
+ static_cast<int>((select >> 8) & 0x07),
+ static_cast<int>((select >> 4) & 0x07),
+ static_cast<int>((select >> 0) & 0x07),
+ };
+
+ return RValue<UShort8>(Nucleus::createShuffleVector(x.value, x.value, swizzle));
+}
+
Int::Int(Argument<Int> argument)
{
storeValue(argument.value);
@@ -3073,12 +3196,14 @@
RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
{
+ // TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32
return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
{
+ // TODO(b/148379603): Optimize narrowing swizzle.
int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32
auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
return As<Short4>(Swizzle(lowHigh, 0x2323));
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 3ddbec9..488c0be 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -517,15 +517,19 @@
{
public:
explicit Byte4(RValue<Byte8> cast);
+ explicit Byte4(RValue<UShort4> cast);
+ explicit Byte4(RValue<Short4> cast);
+ explicit Byte4(RValue<UInt4> cast);
+ explicit Byte4(RValue<Int4> cast);
Byte4() = default;
// Byte4(int x, int y, int z, int w);
- // Byte4(RValue<Byte4> rhs);
- // Byte4(const Byte4 &rhs);
+ Byte4(RValue<Byte4> rhs);
+ Byte4(const Byte4 &rhs);
Byte4(const Reference<Byte4> &rhs);
- // RValue<Byte4> operator=(RValue<Byte4> rhs);
- // RValue<Byte4> operator=(const Byte4 &rhs);
+ RValue<Byte4> operator=(RValue<Byte4> rhs);
+ RValue<Byte4> operator=(const Byte4 &rhs);
// RValue<Byte4> operator=(const Reference<Byte4> &rhs);
static Type *getType();
@@ -656,6 +660,7 @@
RValue<Int> SignMask(RValue<Byte8> x);
// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y);
+RValue<Byte8> Swizzle(RValue<Byte8> x, uint32_t select);
class SByte8 : public LValue<SByte8>
{
@@ -713,7 +718,6 @@
{
public:
Byte16() = default;
- // Byte16(int x, int y, int z, int w);
Byte16(RValue<Byte16> rhs);
Byte16(const Byte16 &rhs);
Byte16(const Reference<Byte16> &rhs);
@@ -752,6 +756,7 @@
// const Byte16 &operator++(Byte16 &val); // Pre-increment
// RValue<Byte16> operator--(Byte16 &val, int); // Post-decrement
// const Byte16 &operator--(Byte16 &val); // Pre-decrement
+RValue<Byte16> Swizzle(RValue<Byte16> x, uint64_t select);
class SByte16 : public LValue<SByte16>
{
@@ -1065,7 +1070,7 @@
// RValue<Bool> operator!=(RValue<UShort8> lhs, RValue<UShort8> rhs);
// RValue<Bool> operator==(RValue<UShort8> lhs, RValue<UShort8> rhs);
-RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7);
+RValue<UShort8> Swizzle(RValue<UShort8> x, uint32_t select);
RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y);
class Int : public LValue<Int>
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index 103643e..2ffbcdb 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -345,133 +345,199 @@
}
}
+TEST(ReactorUnitTests, Cast)
+{
+ FunctionT<void(void *)> function;
+ {
+ Pointer<Byte> out = function.Arg<0>();
+
+ Int4 c = Int4(0x01020304, 0x05060708, 0x09101112, 0x13141516);
+ *Pointer<Short4>(out + 16 * 0) = Short4(c);
+ *Pointer<Byte4>(out + 16 * 1 + 0) = Byte4(c);
+ *Pointer<Byte4>(out + 16 * 1 + 4) = Byte4(As<Byte8>(c));
+ *Pointer<Byte4>(out + 16 * 1 + 8) = Byte4(As<Short4>(c));
+ }
+
+ auto routine = function("one");
+
+ if(routine)
+ {
+ int out[2][4];
+
+ memset(&out, 0, sizeof(out));
+
+ routine(&out);
+
+ EXPECT_EQ(out[0][0], 0x07080304);
+ EXPECT_EQ(out[0][1], 0x15161112);
+
+ EXPECT_EQ(out[1][0], 0x16120804);
+ EXPECT_EQ(out[1][1], 0x01020304);
+ EXPECT_EQ(out[1][2], 0x06080204);
+ }
+}
+
+static uint16_t swizzleCode4(int i)
+{
+ auto x = (i >> 0) & 0x03;
+ auto y = (i >> 2) & 0x03;
+ auto z = (i >> 4) & 0x03;
+ auto w = (i >> 6) & 0x03;
+ return static_cast<uint16_t>((x << 12) | (y << 8) | (z << 4) | (w << 0));
+}
+
+TEST(ReactorUnitTests, Swizzle4)
+{
+ FunctionT<void(void *)> function;
+ {
+ Pointer<Byte> out = function.Arg<0>();
+
+ for(int i = 0; i < 256; i++)
+ {
+ *Pointer<Float4>(out + 16 * i) = Swizzle(Float4(1.0f, 2.0f, 3.0f, 4.0f), swizzleCode4(i));
+ }
+
+ for(int i = 0; i < 256; i++)
+ {
+ *Pointer<Float4>(out + 16 * (256 + i)) = ShuffleLowHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f), swizzleCode4(i));
+ }
+
+ *Pointer<Float4>(out + 16 * (512 + 0)) = UnpackLow(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
+ *Pointer<Float4>(out + 16 * (512 + 1)) = UnpackHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
+ *Pointer<Int2>(out + 16 * (512 + 2)) = UnpackLow(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
+ *Pointer<Int2>(out + 16 * (512 + 3)) = UnpackHigh(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
+ *Pointer<Short4>(out + 16 * (512 + 4)) = UnpackLow(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
+ *Pointer<Short4>(out + 16 * (512 + 5)) = UnpackHigh(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
+
+ for(int i = 0; i < 256; i++)
+ {
+ *Pointer<Short4>(out + 16 * (512 + 6) + (8 * i)) =
+ Swizzle(Short4(1, 2, 3, 4), swizzleCode4(i));
+ }
+
+ for(int i = 0; i < 256; i++)
+ {
+ *Pointer<Int4>(out + 16 * (512 + 6 + i) + (8 * 256)) =
+ Swizzle(Int4(1, 2, 3, 4), swizzleCode4(i));
+ }
+ }
+
+ auto routine = function("one");
+
+ if(routine)
+ {
+ struct
+ {
+ float f[256 + 256 + 2][4];
+ int i[388][4];
+ } out;
+
+ memset(&out, 0, sizeof(out));
+
+ routine(&out);
+
+ for(int i = 0; i < 256; i++)
+ {
+ EXPECT_EQ(out.f[i][0], float((i >> 0) & 0x03) + 1.0f);
+ EXPECT_EQ(out.f[i][1], float((i >> 2) & 0x03) + 1.0f);
+ EXPECT_EQ(out.f[i][2], float((i >> 4) & 0x03) + 1.0f);
+ EXPECT_EQ(out.f[i][3], float((i >> 6) & 0x03) + 1.0f);
+ }
+
+ for(int i = 0; i < 256; i++)
+ {
+ EXPECT_EQ(out.f[256 + i][0], float((i >> 0) & 0x03) + 1.0f);
+ EXPECT_EQ(out.f[256 + i][1], float((i >> 2) & 0x03) + 1.0f);
+ EXPECT_EQ(out.f[256 + i][2], float((i >> 4) & 0x03) + 5.0f);
+ EXPECT_EQ(out.f[256 + i][3], float((i >> 6) & 0x03) + 5.0f);
+ }
+
+ EXPECT_EQ(out.f[512 + 0][0], 1.0f);
+ EXPECT_EQ(out.f[512 + 0][1], 5.0f);
+ EXPECT_EQ(out.f[512 + 0][2], 2.0f);
+ EXPECT_EQ(out.f[512 + 0][3], 6.0f);
+
+ EXPECT_EQ(out.f[512 + 1][0], 3.0f);
+ EXPECT_EQ(out.f[512 + 1][1], 7.0f);
+ EXPECT_EQ(out.f[512 + 1][2], 4.0f);
+ EXPECT_EQ(out.f[512 + 1][3], 8.0f);
+
+ EXPECT_EQ(out.i[0][0], 0x00050001);
+ EXPECT_EQ(out.i[0][1], 0x00060002);
+ EXPECT_EQ(out.i[0][2], 0x00000000);
+ EXPECT_EQ(out.i[0][3], 0x00000000);
+
+ EXPECT_EQ(out.i[1][0], 0x00070003);
+ EXPECT_EQ(out.i[1][1], 0x00080004);
+ EXPECT_EQ(out.i[1][2], 0x00000000);
+ EXPECT_EQ(out.i[1][3], 0x00000000);
+
+ EXPECT_EQ(out.i[2][0], 0x0A020901);
+ EXPECT_EQ(out.i[2][1], 0x0C040B03);
+ EXPECT_EQ(out.i[2][2], 0x00000000);
+ EXPECT_EQ(out.i[2][3], 0x00000000);
+
+ EXPECT_EQ(out.i[3][0], 0x0E060D05);
+ EXPECT_EQ(out.i[3][1], 0x10080F07);
+ EXPECT_EQ(out.i[3][2], 0x00000000);
+ EXPECT_EQ(out.i[3][3], 0x00000000);
+
+ for(int i = 0; i < 256; i++)
+ {
+ EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] & 0xFFFF,
+ ((i >> 0) & 0x03) + 1);
+ EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] >> 16,
+ ((i >> 2) & 0x03) + 1);
+ EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] & 0xFFFF,
+ ((i >> 4) & 0x03) + 1);
+ EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] >> 16,
+ ((i >> 6) & 0x03) + 1);
+ }
+
+ for(int i = 0; i < 256; i++)
+ {
+ EXPECT_EQ(out.i[132 + i][0], ((i >> 0) & 0x03) + 1);
+ EXPECT_EQ(out.i[132 + i][1], ((i >> 2) & 0x03) + 1);
+ EXPECT_EQ(out.i[132 + i][2], ((i >> 4) & 0x03) + 1);
+ EXPECT_EQ(out.i[132 + i][3], ((i >> 6) & 0x03) + 1);
+ }
+ }
+}
+
TEST(ReactorUnitTests, Swizzle)
{
- auto swizzleCode = [](int i) -> uint16_t {
- auto x = (i >> 0) & 0x03;
- auto y = (i >> 2) & 0x03;
- auto z = (i >> 4) & 0x03;
- auto w = (i >> 6) & 0x03;
- return (x << 12) | (y << 8) | (z << 4) | (w << 0);
- };
-
+ FunctionT<void(void *)> function;
{
- FunctionT<int(void *)> function;
- {
- Pointer<Byte> out = function.Arg<0>();
+ Pointer<Byte> out = function.Arg<0>();
- for(int i = 0; i < 256; i++)
- {
- *Pointer<Float4>(out + 16 * i) = Swizzle(Float4(1.0f, 2.0f, 3.0f, 4.0f), swizzleCode(i));
- }
+ Int4 c = Int4(0x01020304, 0x05060708, 0x09101112, 0x13141516);
+ *Pointer<Byte16>(out + 16 * 0) = Swizzle(As<Byte16>(c), 0xFEDCBA9876543210ull);
+ *Pointer<Byte8>(out + 16 * 1) = Swizzle(As<Byte8>(c), 0x76543210u);
+ *Pointer<UShort8>(out + 16 * 2) = Swizzle(As<UShort8>(c), 0x76543210u);
+ }
- for(int i = 0; i < 256; i++)
- {
- *Pointer<Float4>(out + 16 * (256 + i)) = ShuffleLowHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f), swizzleCode(i));
- }
+ auto routine = function("one");
- *Pointer<Float4>(out + 16 * (512 + 0)) = UnpackLow(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
- *Pointer<Float4>(out + 16 * (512 + 1)) = UnpackHigh(Float4(1.0f, 2.0f, 3.0f, 4.0f), Float4(5.0f, 6.0f, 7.0f, 8.0f));
- *Pointer<Int2>(out + 16 * (512 + 2)) = UnpackLow(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
- *Pointer<Int2>(out + 16 * (512 + 3)) = UnpackHigh(Short4(1, 2, 3, 4), Short4(5, 6, 7, 8));
- *Pointer<Short4>(out + 16 * (512 + 4)) = UnpackLow(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
- *Pointer<Short4>(out + 16 * (512 + 5)) = UnpackHigh(Byte8(1, 2, 3, 4, 5, 6, 7, 8), Byte8(9, 10, 11, 12, 13, 14, 15, 16));
+ if(routine)
+ {
+ int out[3][4];
- for(int i = 0; i < 256; i++)
- {
- *Pointer<Short4>(out + 16 * (512 + 6) + (8 * i)) =
- Swizzle(Short4(1, 2, 3, 4), swizzleCode(i));
- }
+ memset(&out, 0, sizeof(out));
- for(int i = 0; i < 256; i++)
- {
- *Pointer<Int4>(out + 16 * (512 + 6 + i) + (8 * 256)) =
- Swizzle(Int4(1, 2, 3, 4), swizzleCode(i));
- }
+ routine(&out);
- Return(0);
- }
+ EXPECT_EQ(out[0][0], 0x16151413);
+ EXPECT_EQ(out[0][1], 0x12111009);
+ EXPECT_EQ(out[0][2], 0x08070605);
+ EXPECT_EQ(out[0][3], 0x04030201);
- auto routine = function("one");
+ EXPECT_EQ(out[1][0], 0x08070605);
+ EXPECT_EQ(out[1][1], 0x04030201);
- if(routine)
- {
- struct
- {
- float f[256 + 256 + 2][4];
- int i[388][4];
- } out;
-
- memset(&out, 0, sizeof(out));
-
- routine(&out);
-
- for(int i = 0; i < 256; i++)
- {
- EXPECT_EQ(out.f[i][0], float((i >> 0) & 0x03) + 1.0f);
- EXPECT_EQ(out.f[i][1], float((i >> 2) & 0x03) + 1.0f);
- EXPECT_EQ(out.f[i][2], float((i >> 4) & 0x03) + 1.0f);
- EXPECT_EQ(out.f[i][3], float((i >> 6) & 0x03) + 1.0f);
- }
-
- for(int i = 0; i < 256; i++)
- {
- EXPECT_EQ(out.f[256 + i][0], float((i >> 0) & 0x03) + 1.0f);
- EXPECT_EQ(out.f[256 + i][1], float((i >> 2) & 0x03) + 1.0f);
- EXPECT_EQ(out.f[256 + i][2], float((i >> 4) & 0x03) + 5.0f);
- EXPECT_EQ(out.f[256 + i][3], float((i >> 6) & 0x03) + 5.0f);
- }
-
- EXPECT_EQ(out.f[512 + 0][0], 1.0f);
- EXPECT_EQ(out.f[512 + 0][1], 5.0f);
- EXPECT_EQ(out.f[512 + 0][2], 2.0f);
- EXPECT_EQ(out.f[512 + 0][3], 6.0f);
-
- EXPECT_EQ(out.f[512 + 1][0], 3.0f);
- EXPECT_EQ(out.f[512 + 1][1], 7.0f);
- EXPECT_EQ(out.f[512 + 1][2], 4.0f);
- EXPECT_EQ(out.f[512 + 1][3], 8.0f);
-
- EXPECT_EQ(out.i[0][0], 0x00050001);
- EXPECT_EQ(out.i[0][1], 0x00060002);
- EXPECT_EQ(out.i[0][2], 0x00000000);
- EXPECT_EQ(out.i[0][3], 0x00000000);
-
- EXPECT_EQ(out.i[1][0], 0x00070003);
- EXPECT_EQ(out.i[1][1], 0x00080004);
- EXPECT_EQ(out.i[1][2], 0x00000000);
- EXPECT_EQ(out.i[1][3], 0x00000000);
-
- EXPECT_EQ(out.i[2][0], 0x0A020901);
- EXPECT_EQ(out.i[2][1], 0x0C040B03);
- EXPECT_EQ(out.i[2][2], 0x00000000);
- EXPECT_EQ(out.i[2][3], 0x00000000);
-
- EXPECT_EQ(out.i[3][0], 0x0E060D05);
- EXPECT_EQ(out.i[3][1], 0x10080F07);
- EXPECT_EQ(out.i[3][2], 0x00000000);
- EXPECT_EQ(out.i[3][3], 0x00000000);
-
- for(int i = 0; i < 256; i++)
- {
- EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] & 0xFFFF,
- ((i >> 0) & 0x03) + 1);
- EXPECT_EQ(out.i[4 + i / 2][0 + (i % 2) * 2] >> 16,
- ((i >> 2) & 0x03) + 1);
- EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] & 0xFFFF,
- ((i >> 4) & 0x03) + 1);
- EXPECT_EQ(out.i[4 + i / 2][1 + (i % 2) * 2] >> 16,
- ((i >> 6) & 0x03) + 1);
- }
-
- for(int i = 0; i < 256; i++)
- {
- EXPECT_EQ(out.i[132 + i][0], ((i >> 0) & 0x03) + 1);
- EXPECT_EQ(out.i[132 + i][1], ((i >> 2) & 0x03) + 1);
- EXPECT_EQ(out.i[132 + i][2], ((i >> 4) & 0x03) + 1);
- EXPECT_EQ(out.i[132 + i][3], ((i >> 6) & 0x03) + 1);
- }
- }
+ EXPECT_EQ(out[2][0], 0x15161314);
+ EXPECT_EQ(out[2][1], 0x11120910);
+ EXPECT_EQ(out[2][2], 0x07080506);
+ EXPECT_EQ(out[2][3], 0x03040102);
}
}
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index a9375c4..5d057b0 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -2682,24 +2682,12 @@
}
}
-RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
-{
- UNIMPLEMENTED("RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)");
- return UShort8(0);
-}
-
RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
{
UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
return UShort8(0);
}
-// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
-// RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
-// {
-// ASSERT(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
-// }
-
Type *UShort8::getType()
{
return T(Ice::IceType_v8i16);