Remove 'partial precision' intrinsics

These are artefacts of legacy SwiftShader and not useful for Vulkan
spec compliant shader operations by themselves. The Subzero
implementations used full-precision operations so e.g. Rcp_pp()
followed by a Newton- Raphson iteration was actually slower than just
1.0f / x.

Future spec compliant optimizations should use rr::Caps to indicate
which intrinsics are available, what their precision is, and/or which
ones are "fast" (cf. the FMA() vs. MulAdd() distinction).

Bug: b/147516027
Change-Id: I5e3e91ffe82fb3bac9488142bfd5f93b6c4a806d
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/65448
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index a181f29..77b9fbc 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -2871,32 +2871,6 @@
 	return T(llvm::Type::getInt16Ty(*jit->context));
 }
 
-RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
-{
-	RR_DEBUG_INFO_UPDATE_LOC();
-#if defined(__i386__) || defined(__x86_64__)
-	if(exactAtPow2)
-	{
-		// rcpss uses a piecewise-linear approximation which minimizes the relative error
-		// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
-		return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
-	}
-	return x86::rcpss(x);
-#else
-	return As<Float>(V(lowerRCP(V(x.value()))));
-#endif
-}
-
-RValue<Float> RcpSqrt_pp(RValue<Float> x)
-{
-	RR_DEBUG_INFO_UPDATE_LOC();
-#if defined(__i386__) || defined(__x86_64__)
-	return x86::rsqrtss(x);
-#else
-	return As<Float>(V(lowerRSQRT(V(x.value()))));
-#endif
-}
-
 bool HasRcpApprox()
 {
 #if defined(__i386__) || defined(__x86_64__)
@@ -3135,32 +3109,6 @@
 #endif
 }
 
-RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
-{
-	RR_DEBUG_INFO_UPDATE_LOC();
-#if defined(__i386__) || defined(__x86_64__)
-	if(exactAtPow2)
-	{
-		// rcpps uses a piecewise-linear approximation which minimizes the relative error
-		// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
-		return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
-	}
-	return x86::rcpps(x);
-#else
-	return As<Float4>(V(lowerRCP(V(x.value()))));
-#endif
-}
-
-RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
-{
-	RR_DEBUG_INFO_UPDATE_LOC();
-#if defined(__i386__) || defined(__x86_64__)
-	return x86::rsqrtps(x);
-#else
-	return As<Float4>(V(lowerRSQRT(V(x.value()))));
-#endif
-}
-
 RValue<Float4> Sqrt(RValue<Float4> x)
 {
 	RR_DEBUG_INFO_UPDATE_LOC();
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 0f992da..4a68b12 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -1771,12 +1771,6 @@
 RValue<Float> Abs(RValue<Float> x);
 RValue<Float> Max(RValue<Float> x, RValue<Float> y);
 RValue<Float> Min(RValue<Float> x, RValue<Float> y);
-// Deprecated: use Rcp
-// TODO(b/147516027): Remove when GLES frontend is removed
-RValue<Float> Rcp_pp(RValue<Float> val, bool exactAtPow2 = false);
-// Deprecated: use RcpSqrt
-// TODO(b/147516027): Remove when GLES frontend is removed
-RValue<Float> RcpSqrt_pp(RValue<Float> val);
 RValue<Float> Rcp(RValue<Float> x, bool relaxedPrecision, bool exactAtPow2 = false);
 RValue<Float> RcpSqrt(RValue<Float> x, bool relaxedPrecision);
 RValue<Float> Sqrt(RValue<Float> x);
@@ -1947,12 +1941,6 @@
 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y);
 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y);
 
-// Deprecated: use Rcp
-// TODO(b/147516027): Remove when GLES frontend is removed
-RValue<Float4> Rcp_pp(RValue<Float4> val, bool exactAtPow2 = false);
-// Deprecated: use RcpSqrt
-// TODO(b/147516027): Remove when GLES frontend is removed
-RValue<Float4> RcpSqrt_pp(RValue<Float4> val);
 RValue<Float4> Rcp(RValue<Float4> x, bool relaxedPrecision, bool exactAtPow2 = false);
 RValue<Float4> RcpSqrt(RValue<Float4> x, bool relaxedPrecision);
 RValue<Float4> Sqrt(RValue<Float4> x);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index c5c083a..5518086 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3831,18 +3831,6 @@
 	return T(Ice::IceType_i16);
 }
 
-RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
-{
-	RR_DEBUG_INFO_UPDATE_LOC();
-	return 1.0f / x;
-}
-
-RValue<Float> RcpSqrt_pp(RValue<Float> x)
-{
-	RR_DEBUG_INFO_UPDATE_LOC();
-	return Rcp_pp(Sqrt(x));
-}
-
 RValue<Float> Sqrt(RValue<Float> x)
 {
 	RR_DEBUG_INFO_UPDATE_LOC();
@@ -3998,18 +3986,6 @@
 	return RValue<Float4>(V(result));
 }
 
-RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
-{
-	RR_DEBUG_INFO_UPDATE_LOC();
-	return Float4(1.0f) / x;
-}
-
-RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
-{
-	RR_DEBUG_INFO_UPDATE_LOC();
-	return Rcp_pp(Sqrt(x));
-}
-
 bool HasRcpApprox()
 {
 	// TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
diff --git a/tests/PipelineBenchmarks/PipelineBenchmarks.cpp b/tests/PipelineBenchmarks/PipelineBenchmarks.cpp
index 3103fb5..c200d72 100644
--- a/tests/PipelineBenchmarks/PipelineBenchmarks.cpp
+++ b/tests/PipelineBenchmarks/PipelineBenchmarks.cpp
@@ -157,7 +157,3 @@
 BENCHMARK_CAPTURE(Transcendental1, rr_Log2, LIFT(rr::Log2))->Arg(REPS);
 BENCHMARK_CAPTURE(Transcendental1, sw_Log2_highp, LIFT(sw::Log2), false /* relaxedPrecision */)->Arg(REPS);
 BENCHMARK_CAPTURE(Transcendental1, sw_Log2_mediump, LIFT(sw::Log2), true /* relaxedPrecision */)->Arg(REPS);
-
-BENCHMARK_CAPTURE(Transcendental1, rr_Rcp_pp_exactAtPow2_true, LIFT(Rcp_pp), true)->Arg(REPS);
-BENCHMARK_CAPTURE(Transcendental1, rr_Rcp_pp_exactAtPow2_false, LIFT(Rcp_pp), false)->Arg(REPS);
-BENCHMARK_CAPTURE(Transcendental1, rr_RcpSqrt_pp, LIFT(RcpSqrt_pp))->Arg(REPS);
diff --git a/tests/ReactorBenchmarks/ReactorBenchmarks.cpp b/tests/ReactorBenchmarks/ReactorBenchmarks.cpp
index 6f42e13..d45a736 100644
--- a/tests/ReactorBenchmarks/ReactorBenchmarks.cpp
+++ b/tests/ReactorBenchmarks/ReactorBenchmarks.cpp
@@ -140,7 +140,3 @@
 BENCHMARK_CAPTURE(Transcedental1, rr_Log, Log);
 BENCHMARK_CAPTURE(Transcedental1, rr_Exp2, LIFT(Exp2));
 BENCHMARK_CAPTURE(Transcedental1, rr_Log2, LIFT(Log2));
-
-BENCHMARK_CAPTURE(Transcedental1, rr_Rcp_pp_exactAtPow2_true, LIFT(Rcp_pp), true);
-BENCHMARK_CAPTURE(Transcedental1, rr_Rcp_pp_exactAtPow2_false, LIFT(Rcp_pp), false);
-BENCHMARK_CAPTURE(Transcedental1, rr_RcpSqrt_pp, LIFT(RcpSqrt_pp));