Implement byte swizzle operations Add Swizzle() intrinsics for Byte16, Byte8, and Byte4, and add Byte4 constructors and assignment operators. Also move LLVM-specific implementations to the generic Reactor.cpp source file. On x86 these all translate to a pshufb instruction, which is very efficient. Bug: b/148295813 Change-Id: Icf88fe1621623f8104c4a642d560643a01b9ef55 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40549 Presubmit-Ready: Nicolas Capens <nicolascapens@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp index b3ce607..8801f2a 100644 --- a/src/Reactor/Reactor.cpp +++ b/src/Reactor/Reactor.cpp
@@ -1250,12 +1250,62 @@ storeValue(Nucleus::createBitCast(cast.value, getType())); } +Byte4::Byte4(RValue<UShort4> cast) +{ + // TODO(b/148379603): Optimize narrowing swizzle. + *this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246)); +} + +Byte4::Byte4(RValue<Short4> cast) +{ + // TODO(b/148379603): Optimize narrowing swizzle. + *this = As<Byte4>(Swizzle(As<Byte8>(cast), 0x0246'0246)); +} + +Byte4::Byte4(RValue<UInt4> cast) +{ + // TODO(b/148379603): Optimize narrowing swizzle. + *this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C)); +} + +Byte4::Byte4(RValue<Int4> cast) +{ + // TODO(b/148379603): Optimize narrowing swizzle. + *this = As<Byte4>(Swizzle(As<Byte16>(cast), 0x048C'048C'048C'048C)); +} + +Byte4::Byte4(RValue<Byte4> rhs) +{ + storeValue(rhs.value); +} + +Byte4::Byte4(const Byte4 &rhs) +{ + Value *value = rhs.loadValue(); + storeValue(value); +} + Byte4::Byte4(const Reference<Byte4> &rhs) { Value *value = rhs.loadValue(); storeValue(value); } +RValue<Byte4> Byte4::operator=(RValue<Byte4> rhs) +{ + storeValue(rhs.value); + + return rhs; +} + +RValue<Byte4> Byte4::operator=(const Byte4 &rhs) +{ + Value *value = rhs.loadValue(); + storeValue(value); + + return RValue<Byte4>(value); +} + Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) { int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 }; @@ -1417,8 +1467,35 @@ return RValue<Byte8>(Nucleus::createNot(val.value)); } +RValue<Byte8> Swizzle(RValue<Byte8> x, uint32_t select) +{ + // Real type is v16i8 + // TODO(b/148379603): Optimize narrowing swizzle. + int shuffle[16] = { + static_cast<int>((select >> 28) & 0x07), + static_cast<int>((select >> 24) & 0x07), + static_cast<int>((select >> 20) & 0x07), + static_cast<int>((select >> 16) & 0x07), + static_cast<int>((select >> 12) & 0x07), + static_cast<int>((select >> 8) & 0x07), + static_cast<int>((select >> 4) & 0x07), + static_cast<int>((select >> 0) & 0x07), + static_cast<int>((select >> 28) & 0x07), + static_cast<int>((select >> 24) & 0x07), + static_cast<int>((select >> 20) & 0x07), + static_cast<int>((select >> 16) & 0x07), + static_cast<int>((select >> 12) & 0x07), + static_cast<int>((select >> 8) & 0x07), + static_cast<int>((select >> 4) & 0x07), + static_cast<int>((select >> 0) & 0x07), + }; + + return As<Byte8>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); +} + RValue<Short4> Unpack(RValue<Byte4> x) { + // TODO(b/148379603): Optimize narrowing swizzle. int shuffle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 }; // Real type is v16i8 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); } @@ -1430,12 +1507,14 @@ RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y) { + // TODO(b/148379603): Optimize narrowing swizzle. int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); } RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y) { + // TODO(b/148379603): Optimize narrowing swizzle. int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323)); @@ -1606,12 +1685,14 @@ RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y) { + // TODO(b/148379603): Optimize narrowing swizzle. int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); } RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y) { + // TODO(b/148379603): Optimize narrowing swizzle. int shuffle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }; // Real type is v16i8 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); return As<Short4>(Swizzle(As<Int4>(lowHigh), 0x2323)); @@ -1657,6 +1738,30 @@ return RValue<Byte16>(value); } +RValue<Byte16> Swizzle(RValue<Byte16> x, uint64_t select) +{ + int shuffle[16] = { + static_cast<int>((select >> 60) & 0x0F), + static_cast<int>((select >> 56) & 0x0F), + static_cast<int>((select >> 52) & 0x0F), + static_cast<int>((select >> 48) & 0x0F), + static_cast<int>((select >> 44) & 0x0F), + static_cast<int>((select >> 40) & 0x0F), + static_cast<int>((select >> 36) & 0x0F), + static_cast<int>((select >> 32) & 0x0F), + static_cast<int>((select >> 28) & 0x0F), + static_cast<int>((select >> 24) & 0x0F), + static_cast<int>((select >> 20) & 0x0F), + static_cast<int>((select >> 16) & 0x0F), + static_cast<int>((select >> 12) & 0x0F), + static_cast<int>((select >> 8) & 0x0F), + static_cast<int>((select >> 4) & 0x0F), + static_cast<int>((select >> 0) & 0x0F), + }; + + return As<Byte16>(Nucleus::createShuffleVector(x.value, x.value, shuffle)); +} + Short2::Short2(RValue<Short4> cast) { storeValue(Nucleus::createBitCast(cast.value, getType())); @@ -1890,6 +1995,7 @@ RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y) { + // TODO(b/148379603): Optimize narrowing swizzle. int shuffle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 }; // Real type is v8i16 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); return As<Int2>(Swizzle(As<Int4>(lowHigh), 0x2323)); @@ -1898,6 +2004,7 @@ RValue<Short4> Swizzle(RValue<Short4> x, uint16_t select) { // Real type is v8i16 + // TODO(b/148379603): Optimize narrowing swizzle. int shuffle[8] = { (select >> 12) & 0x03, (select >> 8) & 0x03, @@ -2214,6 +2321,22 @@ return RValue<UShort8>(Nucleus::createNot(val.value)); } +RValue<UShort8> Swizzle(RValue<UShort8> x, uint32_t select) +{ + int swizzle[16] = { + static_cast<int>((select >> 28) & 0x07), + static_cast<int>((select >> 24) & 0x07), + static_cast<int>((select >> 20) & 0x07), + static_cast<int>((select >> 16) & 0x07), + static_cast<int>((select >> 12) & 0x07), + static_cast<int>((select >> 8) & 0x07), + static_cast<int>((select >> 4) & 0x07), + static_cast<int>((select >> 0) & 0x07), + }; + + return RValue<UShort8>(Nucleus::createShuffleVector(x.value, x.value, swizzle)); +} + Int::Int(Argument<Int> argument) { storeValue(argument.value); @@ -3073,12 +3196,14 @@ RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y) { + // TODO(b/148379603): Optimize narrowing swizzle. int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); } RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y) { + // TODO(b/148379603): Optimize narrowing swizzle. int shuffle[4] = { 0, 4, 1, 5 }; // Real type is v4i32 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); return As<Short4>(Swizzle(lowHigh, 0x2323));