Implement byte swizzle operations

Add Swizzle() intrinsics for Byte16, Byte8, and Byte4, and add Byte4
constructors and assignment operators. Also move LLVM-specific
implementations to the generic Reactor.cpp source file.

On x86 these all translate to a pshufb instruction, which is very
efficient.

Bug: b/148295813
Change-Id: Icf88fe1621623f8104c4a642d560643a01b9ef55
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40549
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index b3ce607..8801f2a 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -1250,12 +1250,62 @@
 	storeValue(Nucleus::createBitCast(cast.value, getType()));
 }
 
+Byte4::Byte4(RValue<UShort4> cast)
+{
+	// TODO(b/148379603): Optimize narrowing swizzle.
+	*this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246));
+}
+
+Byte4::Byte4(RValue<Short4> cast)
+{
+	// TODO(b/148379603): Optimize narrowing swizzle.
+	*this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246));
+}
+
+Byte4::Byte4(RValue<UInt4> cast)
+{
+	// TODO(b/148379603): Optimize narrowing swizzle.
+	*this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C));
+}
+
+Byte4::Byte4(RValue<Int4> cast)
+{
+	// TODO(b/148379603): Optimize narrowing swizzle.
+	*this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C));
+}
+
+Byte4::Byte4(RValue<Byte4> rhs)
+{
+	storeValue(rhs.value);
+}
+
+Byte4::Byte4(const Byte4 &rhs)
+{
+	Value *value = rhs.loadValue();
+	storeValue(value);
+}
+
 Byte4::Byte4(const Reference<Byte4> &rhs)
 {
 	Value *value = rhs.loadValue();
 	storeValue(value);
 }
 
+RValue<Byte4> Byte4::operator=(RValue<Byte4> rhs)
+{
+	storeValue(rhs.value);
+
+	return rhs;
+}
+
+RValue<Byte4> Byte4::operator=(const Byte4 &rhs)
+{
+	Value *value = rhs.loadValue();
+	storeValue(value);
+
+	return RValue<Byte4>(value);
+}
+
 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
 {
 	int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
@@ -1417,8 +1467,35 @@
 	return RValue<Byte8>(Nucleus::createNot(val.value));
 }
 
+RValue<Byte8> Swizzle(RValue<Byte8> x, uint32_t select)
+{
+	// Real type is v16i8
+	// TODO(b/148379603): Optimize narrowing swizzle.
+	int shuffle[16] = {
+		static_cast<int>((select >> 28) & 0x07),
+		static_cast<int>((select >> 24) & 0x07),
+		static_cast<int>((select >> 20) & 0x07),
+		static_cast<int>((select >> 16) & 0x07),
+		static_cast<int>((select >> 12) & 0x07),
+		static_cast<int>((select >> 8) & 0x07),
+		static_cast<int>((select >> 4) & 0x07),
+		static_cast<int>((select >> 0) & 0x07),
+		static_cast<int>((select >> 28) & 0x07),
+		static_cast<int>((select >> 24) & 0x07),
+		static_cast<int>((select >> 20) & 0x07),
+		static_cast<int>((select >> 16) & 0x07),
+		static_cast<int>((select >> 12) & 0x07),
+		static_cast<int>((select >> 8) & 0x07),
+		static_cast<int>((select >> 4) & 0x07),
+		static_cast<int>((select >> 0) & 0x07),
+	};
+
+	return As<Byte8>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
+}
+
 RValue<Short4> Unpack(RValue<Byte4> x)
 {
+	// TODO(b/148379603): Optimize narrowing swizzle.
 	int shuffle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };  // Real type is v16i8
 	return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
 }
@@ -1430,12 +1507,14 @@
 
 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
 {
+	// TODO(b/148379603): Optimize narrowing swizzle.
 	int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };  // Real type is v16i8
 	return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
 }
 
 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
 {
+	// TODO(b/148379603): Optimize narrowing swizzle.
 	int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };  // Real type is v16i8
 	auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
 	return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323));
@@ -1606,12 +1685,14 @@
 
 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
 {
+	// TODO(b/148379603): Optimize narrowing swizzle.
 	int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };  // Real type is v16i8
 	return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
 }
 
 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
 {
+	// TODO(b/148379603): Optimize narrowing swizzle.
 	int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };  // Real type is v16i8
 	auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
 	return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323));
@@ -1657,6 +1738,30 @@
 	return RValue<Byte16>(value);
 }
 
+RValue<Byte16> Swizzle(RValue<Byte16> x, uint64_t select)
+{
+	int shuffle[16] = {
+		static_cast<int>((select >> 60) & 0x0F),
+		static_cast<int>((select >> 56) & 0x0F),
+		static_cast<int>((select >> 52) & 0x0F),
+		static_cast<int>((select >> 48) & 0x0F),
+		static_cast<int>((select >> 44) & 0x0F),
+		static_cast<int>((select >> 40) & 0x0F),
+		static_cast<int>((select >> 36) & 0x0F),
+		static_cast<int>((select >> 32) & 0x0F),
+		static_cast<int>((select >> 28) & 0x0F),
+		static_cast<int>((select >> 24) & 0x0F),
+		static_cast<int>((select >> 20) & 0x0F),
+		static_cast<int>((select >> 16) & 0x0F),
+		static_cast<int>((select >> 12) & 0x0F),
+		static_cast<int>((select >> 8) & 0x0F),
+		static_cast<int>((select >> 4) & 0x0F),
+		static_cast<int>((select >> 0) & 0x0F),
+	};
+
+	return As<Byte16>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
+}
+
 Short2::Short2(RValue<Short4> cast)
 {
 	storeValue(Nucleus::createBitCast(cast.value, getType()));
@@ -1890,6 +1995,7 @@
 
 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
 {
+	// TODO(b/148379603): Optimize narrowing swizzle.
 	int shuffle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };  // Real type is v8i16
 	auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
 	return As<Int2>(Swizzle(As<Int4>(lowHigh), 0x2323));
@@ -1898,6 +2004,7 @@
 RValue<Short4> Swizzle(RValue<Short4> x, uint16_t select)
 {
 	// Real type is v8i16
+	// TODO(b/148379603): Optimize narrowing swizzle.
 	int shuffle[8] = {
 		(select >> 12) & 0x03,
 		(select >> 8) & 0x03,
@@ -2214,6 +2321,22 @@
 	return RValue<UShort8>(Nucleus::createNot(val.value));
 }
 
+RValue<UShort8> Swizzle(RValue<UShort8> x, uint32_t select)
+{
+	int swizzle[16] = {
+		static_cast<int>((select >> 28) & 0x07),
+		static_cast<int>((select >> 24) & 0x07),
+		static_cast<int>((select >> 20) & 0x07),
+		static_cast<int>((select >> 16) & 0x07),
+		static_cast<int>((select >> 12) & 0x07),
+		static_cast<int>((select >> 8) & 0x07),
+		static_cast<int>((select >> 4) & 0x07),
+		static_cast<int>((select >> 0) & 0x07),
+	};
+
+	return RValue<UShort8>(Nucleus::createShuffleVector(x.value, x.value, swizzle));
+}
+
 Int::Int(Argument<Int> argument)
 {
 	storeValue(argument.value);
@@ -3073,12 +3196,14 @@
 
 RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
 {
+	// TODO(b/148379603): Optimize narrowing swizzle.
 	int shuffle[4] = { 0, 4, 1, 5 };  // Real type is v4i32
 	return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
 }
 
 RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
 {
+	// TODO(b/148379603): Optimize narrowing swizzle.
 	int shuffle[4] = { 0, 4, 1, 5 };  // Real type is v4i32
 	auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
 	return As<Short4>(Swizzle(lowHigh, 0x2323));