Remove 'partial precision' intrinsics
These are artefacts of legacy SwiftShader and not useful for Vulkan
spec compliant shader operations by themselves. The Subzero
implementations used full-precision operations so e.g. Rcp_pp()
followed by a Newton- Raphson iteration was actually slower than just
1.0f / x.
Future spec compliant optimizations should use rr::Caps to indicate
which intrinsics are available, what their precision is, and/or which
ones are "fast" (cf. the FMA() vs. MulAdd() distinction).
Bug: b/147516027
Change-Id: I5e3e91ffe82fb3bac9488142bfd5f93b6c4a806d
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/65448
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index a181f29..77b9fbc 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -2871,32 +2871,6 @@
return T(llvm::Type::getInt16Ty(*jit->context));
}
-RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
-{
- RR_DEBUG_INFO_UPDATE_LOC();
-#if defined(__i386__) || defined(__x86_64__)
- if(exactAtPow2)
- {
- // rcpss uses a piecewise-linear approximation which minimizes the relative error
- // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
- return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
- }
- return x86::rcpss(x);
-#else
- return As<Float>(V(lowerRCP(V(x.value()))));
-#endif
-}
-
-RValue<Float> RcpSqrt_pp(RValue<Float> x)
-{
- RR_DEBUG_INFO_UPDATE_LOC();
-#if defined(__i386__) || defined(__x86_64__)
- return x86::rsqrtss(x);
-#else
- return As<Float>(V(lowerRSQRT(V(x.value()))));
-#endif
-}
-
bool HasRcpApprox()
{
#if defined(__i386__) || defined(__x86_64__)
@@ -3135,32 +3109,6 @@
#endif
}
-RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
-{
- RR_DEBUG_INFO_UPDATE_LOC();
-#if defined(__i386__) || defined(__x86_64__)
- if(exactAtPow2)
- {
- // rcpps uses a piecewise-linear approximation which minimizes the relative error
- // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
- return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
- }
- return x86::rcpps(x);
-#else
- return As<Float4>(V(lowerRCP(V(x.value()))));
-#endif
-}
-
-RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
-{
- RR_DEBUG_INFO_UPDATE_LOC();
-#if defined(__i386__) || defined(__x86_64__)
- return x86::rsqrtps(x);
-#else
- return As<Float4>(V(lowerRSQRT(V(x.value()))));
-#endif
-}
-
RValue<Float4> Sqrt(RValue<Float4> x)
{
RR_DEBUG_INFO_UPDATE_LOC();
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 0f992da..4a68b12 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -1771,12 +1771,6 @@
RValue<Float> Abs(RValue<Float> x);
RValue<Float> Max(RValue<Float> x, RValue<Float> y);
RValue<Float> Min(RValue<Float> x, RValue<Float> y);
-// Deprecated: use Rcp
-// TODO(b/147516027): Remove when GLES frontend is removed
-RValue<Float> Rcp_pp(RValue<Float> val, bool exactAtPow2 = false);
-// Deprecated: use RcpSqrt
-// TODO(b/147516027): Remove when GLES frontend is removed
-RValue<Float> RcpSqrt_pp(RValue<Float> val);
RValue<Float> Rcp(RValue<Float> x, bool relaxedPrecision, bool exactAtPow2 = false);
RValue<Float> RcpSqrt(RValue<Float> x, bool relaxedPrecision);
RValue<Float> Sqrt(RValue<Float> x);
@@ -1947,12 +1941,6 @@
RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y);
RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y);
-// Deprecated: use Rcp
-// TODO(b/147516027): Remove when GLES frontend is removed
-RValue<Float4> Rcp_pp(RValue<Float4> val, bool exactAtPow2 = false);
-// Deprecated: use RcpSqrt
-// TODO(b/147516027): Remove when GLES frontend is removed
-RValue<Float4> RcpSqrt_pp(RValue<Float4> val);
RValue<Float4> Rcp(RValue<Float4> x, bool relaxedPrecision, bool exactAtPow2 = false);
RValue<Float4> RcpSqrt(RValue<Float4> x, bool relaxedPrecision);
RValue<Float4> Sqrt(RValue<Float4> x);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index c5c083a..5518086 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3831,18 +3831,6 @@
return T(Ice::IceType_i16);
}
-RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
-{
- RR_DEBUG_INFO_UPDATE_LOC();
- return 1.0f / x;
-}
-
-RValue<Float> RcpSqrt_pp(RValue<Float> x)
-{
- RR_DEBUG_INFO_UPDATE_LOC();
- return Rcp_pp(Sqrt(x));
-}
-
RValue<Float> Sqrt(RValue<Float> x)
{
RR_DEBUG_INFO_UPDATE_LOC();
@@ -3998,18 +3986,6 @@
return RValue<Float4>(V(result));
}
-RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
-{
- RR_DEBUG_INFO_UPDATE_LOC();
- return Float4(1.0f) / x;
-}
-
-RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
-{
- RR_DEBUG_INFO_UPDATE_LOC();
- return Rcp_pp(Sqrt(x));
-}
-
bool HasRcpApprox()
{
// TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
diff --git a/tests/PipelineBenchmarks/PipelineBenchmarks.cpp b/tests/PipelineBenchmarks/PipelineBenchmarks.cpp
index 3103fb5..c200d72 100644
--- a/tests/PipelineBenchmarks/PipelineBenchmarks.cpp
+++ b/tests/PipelineBenchmarks/PipelineBenchmarks.cpp
@@ -157,7 +157,3 @@
BENCHMARK_CAPTURE(Transcendental1, rr_Log2, LIFT(rr::Log2))->Arg(REPS);
BENCHMARK_CAPTURE(Transcendental1, sw_Log2_highp, LIFT(sw::Log2), false /* relaxedPrecision */)->Arg(REPS);
BENCHMARK_CAPTURE(Transcendental1, sw_Log2_mediump, LIFT(sw::Log2), true /* relaxedPrecision */)->Arg(REPS);
-
-BENCHMARK_CAPTURE(Transcendental1, rr_Rcp_pp_exactAtPow2_true, LIFT(Rcp_pp), true)->Arg(REPS);
-BENCHMARK_CAPTURE(Transcendental1, rr_Rcp_pp_exactAtPow2_false, LIFT(Rcp_pp), false)->Arg(REPS);
-BENCHMARK_CAPTURE(Transcendental1, rr_RcpSqrt_pp, LIFT(RcpSqrt_pp))->Arg(REPS);
diff --git a/tests/ReactorBenchmarks/ReactorBenchmarks.cpp b/tests/ReactorBenchmarks/ReactorBenchmarks.cpp
index 6f42e13..d45a736 100644
--- a/tests/ReactorBenchmarks/ReactorBenchmarks.cpp
+++ b/tests/ReactorBenchmarks/ReactorBenchmarks.cpp
@@ -140,7 +140,3 @@
BENCHMARK_CAPTURE(Transcedental1, rr_Log, Log);
BENCHMARK_CAPTURE(Transcedental1, rr_Exp2, LIFT(Exp2));
BENCHMARK_CAPTURE(Transcedental1, rr_Log2, LIFT(Log2));
-
-BENCHMARK_CAPTURE(Transcedental1, rr_Rcp_pp_exactAtPow2_true, LIFT(Rcp_pp), true);
-BENCHMARK_CAPTURE(Transcedental1, rr_Rcp_pp_exactAtPow2_false, LIFT(Rcp_pp), false);
-BENCHMARK_CAPTURE(Transcedental1, rr_RcpSqrt_pp, LIFT(RcpSqrt_pp));