Implement 128-bit insert/extract
These operations facilitate the transition from 4-wide to arbitrary
width SIMD.
Bug: b/214583550
Change-Id: I13b72c1ff1f758556fc544f8351bf10ede1f2d5b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/66673
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index e652dfb..c5dbf12 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -4258,6 +4258,21 @@
#endif
}
+RValue<Int4> Extract128(RValue<SIMD::Int> val, int i)
+{
+ llvm::Value *v128 = jit->builder->CreateBitCast(V(val.value()), llvm::FixedVectorType::get(llvm::IntegerType::get(*jit->context, 128), SIMD::Width / 4));
+
+ return As<Int4>(V(jit->builder->CreateExtractElement(v128, i)));
+}
+
+RValue<SIMD::Int> Insert128(RValue<SIMD::Int> val, RValue<Int4> element, int i)
+{
+ llvm::Value *v128 = jit->builder->CreateBitCast(V(val.value()), llvm::FixedVectorType::get(llvm::IntegerType::get(*jit->context, 128), SIMD::Width / 4));
+ llvm::Value *a = jit->builder->CreateBitCast(V(element.value()), llvm::IntegerType::get(*jit->context, 128));
+
+ return As<SIMD::Int>(V(jit->builder->CreateInsertElement(v128, a, i)));
+}
+
Type *SIMD::Int::type()
{
return T(llvm::VectorType::get(T(scalar::Int::type()), SIMD::Width, false));
@@ -4344,6 +4359,21 @@
return (x & less) | (y & ~less);
}
+RValue<UInt4> Extract128(RValue<SIMD::UInt> val, int i)
+{
+ llvm::Value *v128 = jit->builder->CreateBitCast(V(val.value()), llvm::FixedVectorType::get(llvm::IntegerType::get(*jit->context, 128), SIMD::Width / 4));
+
+ return As<UInt4>(V(jit->builder->CreateExtractElement(v128, i)));
+}
+
+RValue<SIMD::UInt> Insert128(RValue<SIMD::UInt> val, RValue<UInt4> element, int i)
+{
+ llvm::Value *v128 = jit->builder->CreateBitCast(V(val.value()), llvm::FixedVectorType::get(llvm::IntegerType::get(*jit->context, 128), SIMD::Width / 4));
+ llvm::Value *a = jit->builder->CreateBitCast(V(element.value()), llvm::IntegerType::get(*jit->context, 128));
+
+ return As<SIMD::UInt>(V(jit->builder->CreateInsertElement(v128, a, i)));
+}
+
Type *SIMD::UInt::type()
{
return T(llvm::VectorType::get(T(scalar::UInt::type()), SIMD::Width, false));
@@ -4508,6 +4538,21 @@
return -Floor(-x);
}
+RValue<Float4> Extract128(RValue<SIMD::Float> val, int i)
+{
+ llvm::Value *v128 = jit->builder->CreateBitCast(V(val.value()), llvm::FixedVectorType::get(llvm::IntegerType::get(*jit->context, 128), SIMD::Width / 4));
+
+ return As<Float4>(V(jit->builder->CreateExtractElement(v128, i)));
+}
+
+RValue<SIMD::Float> Insert128(RValue<SIMD::Float> val, RValue<Float4> element, int i)
+{
+ llvm::Value *v128 = jit->builder->CreateBitCast(V(val.value()), llvm::FixedVectorType::get(llvm::IntegerType::get(*jit->context, 128), SIMD::Width / 4));
+ llvm::Value *a = jit->builder->CreateBitCast(V(element.value()), llvm::IntegerType::get(*jit->context, 128));
+
+ return As<SIMD::Float>(V(jit->builder->CreateInsertElement(v128, a, i)));
+}
+
Type *SIMD::Float::type()
{
return T(llvm::VectorType::get(T(scalar::Float::type()), SIMD::Width, false));
diff --git a/src/Reactor/SIMD.hpp b/src/Reactor/SIMD.hpp
index cb15f60..de7c2c5 100644
--- a/src/Reactor/SIMD.hpp
+++ b/src/Reactor/SIMD.hpp
@@ -27,6 +27,12 @@
using Float = rr::Float;
} // namespace scalar
+namespace packed {
+using Int4 = rr::Int4;
+using UInt4 = rr::UInt4;
+using Float4 = rr::Float4;
+} // namespace packed
+
namespace SIMD {
extern const int Width;
@@ -252,6 +258,8 @@
RValue<SIMD::Int> RoundIntClamped(RValue<SIMD::Float> cast);
RValue<scalar::Int> Extract(RValue<SIMD::Int> val, int i);
RValue<SIMD::Int> Insert(RValue<SIMD::Int> val, RValue<scalar::Int> element, int i);
+RValue<packed::Int4> Extract128(RValue<SIMD::Int> val, int i);
+RValue<SIMD::Int> Insert128(RValue<SIMD::Int> val, RValue<packed::Int4> element, int i);
RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
@@ -307,6 +315,8 @@
RValue<SIMD::UInt> Min(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
RValue<scalar::UInt> Extract(RValue<SIMD::UInt> val, int i);
RValue<SIMD::UInt> Insert(RValue<SIMD::UInt> val, RValue<scalar::UInt> element, int i);
+RValue<packed::UInt4> Extract128(RValue<SIMD::UInt> val, int i);
+RValue<SIMD::UInt> Insert128(RValue<SIMD::UInt> val, RValue<packed::UInt4> element, int i);
// RValue<SIMD::UInt> RoundInt(RValue<SIMD::Float> cast);
RValue<SIMD::Float> operator+(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs);
@@ -336,6 +346,8 @@
RValue<SIMD::Float> Sqrt(RValue<SIMD::Float> x);
RValue<SIMD::Float> Insert(RValue<SIMD::Float> val, RValue<rr ::Float> element, int i);
RValue<rr ::Float> Extract(RValue<SIMD::Float> x, int i);
+RValue<packed::Float4> Extract128(RValue<SIMD::Float> val, int i);
+RValue<SIMD::Float> Insert128(RValue<SIMD::Float> val, RValue<packed::Float4> element, int i);
// Ordered comparison functions
RValue<SIMD::Int> CmpEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index f0c0aac..3f9e802 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -4886,6 +4886,22 @@
}
}
+RValue<Int4> Extract128(RValue<SIMD::Int> val, int i)
+{
+ ASSERT(SIMD::Width == 4);
+ ASSERT(i == 0);
+
+ return As<Int4>(val);
+}
+
+RValue<SIMD::Int> Insert128(RValue<SIMD::Int> val, RValue<Int4> element, int i)
+{
+ ASSERT(SIMD::Width == 4);
+ ASSERT(i == 0);
+
+ return As<SIMD::Int>(element);
+}
+
Type *SIMD::Int::type()
{
return T(Ice::IceType_v4i32);
@@ -5009,6 +5025,22 @@
return RValue<SIMD::UInt>(V(result));
}
+RValue<UInt4> Extract128(RValue<SIMD::UInt> val, int i)
+{
+ ASSERT(SIMD::Width == 4);
+ ASSERT(i == 0);
+
+ return As<UInt4>(val);
+}
+
+RValue<SIMD::UInt> Insert128(RValue<SIMD::UInt> val, RValue<UInt4> element, int i)
+{
+ ASSERT(SIMD::Width == 4);
+ ASSERT(i == 0);
+
+ return As<SIMD::UInt>(element);
+}
+
Type *SIMD::UInt::type()
{
return T(Ice::IceType_v4i32);
@@ -5277,6 +5309,22 @@
}
}
+RValue<Float4> Extract128(RValue<SIMD::Float> val, int i)
+{
+ ASSERT(SIMD::Width == 4);
+ ASSERT(i == 0);
+
+ return As<Float4>(val);
+}
+
+RValue<SIMD::Float> Insert128(RValue<SIMD::Float> val, RValue<Float4> element, int i)
+{
+ ASSERT(SIMD::Width == 4);
+ ASSERT(i == 0);
+
+ return As<SIMD::Float>(element);
+}
+
Type *SIMD::Float::type()
{
return T(Ice::IceType_v4f32);
diff --git a/tests/ReactorUnitTests/ReactorSIMD.cpp b/tests/ReactorUnitTests/ReactorSIMD.cpp
index fbc9071..de2deda 100644
--- a/tests/ReactorUnitTests/ReactorSIMD.cpp
+++ b/tests/ReactorUnitTests/ReactorSIMD.cpp
@@ -95,6 +95,45 @@
}
}
+TEST(ReactorSIMD, InsertExtract128)
+{
+ FunctionT<void(int *, int *)> function;
+ {
+ Pointer<Int> r = Pointer<Int>(function.Arg<0>());
+ Pointer<Int> a = Pointer<Int>(function.Arg<1>());
+
+ SIMD::Int x = *Pointer<SIMD::Int>(a);
+ SIMD::Int y = *Pointer<SIMD::Int>(r);
+
+ x -= y;
+
+ for(int i = 0; i < SIMD::Width / 4; i++)
+ {
+ y = Insert128(y, Extract128(x, i) << (i + 1), i);
+ }
+
+ *Pointer<SIMD::Int>(r) = y;
+ }
+
+ auto routine = function(testName().c_str());
+
+ std::vector<int> r(SIMD::Width);
+ std::vector<int> a(SIMD::Width);
+
+ for(int i = 0; i < SIMD::Width; i++)
+ {
+ r[i] = 0;
+ a[i] = 1 + i;
+ }
+
+ routine(r.data(), a.data());
+
+ for(int i = 0; i < SIMD::Width; i++)
+ {
+ ASSERT_EQ(r[i], a[i] << (i / 4 + 1));
+ }
+}
+
TEST(ReactorSIMD, Intrinsics_Scatter)
{
Function<Void(Pointer<Float> base, Pointer<Float4> val, Pointer<Int4> offsets)> function;