SprivShader: Replace hand-rolled bitreverse with LLVM intrinsic
Moved the hand-rolled implementation to Subzero.
As we've started exposing bit intrinsics, we might as fix the TODOs.
Bug: b/126873455
Tests: dEQP-VK.glsl.builtin.function.integer.bitfieldreverse.*
Change-Id: Ifadf25045b9ee4b435d9f6750c63334b7d4cad83
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/28791
Presubmit-Ready: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 031bbf1..201541d 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -2772,17 +2772,7 @@
}
case spv::OpBitReverse:
{
- // TODO: Add an intrinsic to reactor. Even if there isn't a
- // single vector instruction, there may be target-dependent
- // ways to make this faster.
- // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
- SIMD::UInt v = src.UInt(i);
- v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1);
- v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2);
- v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4);
- v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8);
- v = (v >> 16) | (v << 16);
- dst.move(i, v);
+ dst.move(i, BitReverse(src.UInt(i)));
break;
}
case spv::OpBitCount:
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 418e66d..de30f2e 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -3214,6 +3214,12 @@
return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
}
+ RValue<UInt4> BitReverse(RValue<UInt4> v)
+ {
+ auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::bitreverse, { T(UInt4::getType()) } );
+ return RValue<UInt4>(V(::builder->CreateCall(func, { V(v.value) })));
+ }
+
RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
{
#if REACTOR_LLVM_VERSION < 7
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 1d2b2b0..c119d45 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2232,6 +2232,7 @@
// Bit Manipulation functions.
// TODO: Currentlhy unimplemented for Subzero.
+ RValue<UInt4> BitReverse(RValue<UInt4> x);
// Count leading zeros.
// Returns 32 when: isZeroUndef && x == 0.
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 27604a8..c6e16f0 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3359,6 +3359,18 @@
}
}
+ RValue<UInt4> BitReverse(RValue<UInt4> x)
+ {
+ // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+ UInt4 v = x;
+ v = ((v >> 1) & UInt4(0x55555555)) | ((v & UInt4(0x55555555)) << 1);
+ v = ((v >> 2) & UInt4(0x33333333)) | ((v & UInt4(0x33333333)) << 2);
+ v = ((v >> 4) & UInt4(0x0F0F0F0F)) | ((v & UInt4(0x0F0F0F0F)) << 4);
+ v = ((v >> 8) & UInt4(0x00FF00FF)) | ((v & UInt4(0x00FF00FF)) << 8);
+ v = (v >> 16) | (v << 16);
+ return v;
+ }
+
Type *Float4::getType()
{
return T(Ice::IceType_v4f32);