Eliminate rr::Precision
Reactor now only has full-precision implementations of transcendental
functions, so there's no need for a precision argument.
The only exceptions are the reciprocal function, and reciprocal square
root. Those are very useful to keep as part of the Reactor API because
there are fast approximate instructions for these operations. For now,
the precision is selected using a boolean parameter. This is intended to
later be replaced by having just an approximate intrinsic, and a query
Caps::rcpIsFast() which will allow users of Reactor to determine whether
to use 1/x or Rcp(). Specifically, when Rcp() is actually implemented as
1/x it would be wasteful to apply the Newton–Raphson method to obtain a
more precise result (cf. Caps::fmaIsFast() which addresses a similar
problem).
Bug: b/169755552
Change-Id: I553746aeee3ac01f6c47e06da17de1f6bfcd7df6
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/62289
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index e1edd21..c112cfa 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -163,7 +163,7 @@
WWWW += *Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]);
}
- WWWW = Rcp(WWWW, Precision::Relaxed);
+ WWWW = Rcp(WWWW, true /* relaxedPrecision */);
XXXX *= WWWW;
YYYY *= WWWW;
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index 37aa93a..510d062 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -1144,12 +1144,12 @@
uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask)));
vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask)));
- anisotropy = lod * Rcp(det, Precision::Relaxed);
+ anisotropy = lod * Rcp(det, true /* relaxedPrecision */);
anisotropy = Min(anisotropy, state.maxAnisotropy);
// TODO(b/151263485): While we always need `lod` above, when there's only
// a single mipmap level the following calculations could be skipped.
- lod *= Rcp(anisotropy * anisotropy, Precision::Relaxed);
+ lod *= Rcp(anisotropy * anisotropy, true /* relaxedPrecision */);
}
lod = log2sqrt(lod); // log2(sqrt(lod))
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index b796868..b56046a 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -253,20 +253,20 @@
(As<Int4>(x) & Int4(0x80000000)));
}
-RValue<Float4> Asin(RValue<Float4> x, Precision p)
+RValue<Float4> Asin(RValue<Float4> x, bool relaxedPrecision)
{
// TODO(b/169755566): Surprisingly, deqp-vk's precision.acos.highp/mediump tests pass when using the 4-term polynomial
// approximation version of acos, unlike for Asin, which requires higher precision algorithms.
- if(p == Precision::Full)
+ if(!relaxedPrecision)
{
- return rr::Asin(x, p);
+ return rr::Asin(x);
}
return Asin_8_terms(x);
}
-RValue<Float4> Acos(RValue<Float4> x, Precision p)
+RValue<Float4> Acos(RValue<Float4> x, bool relaxedPrecision)
{
// pi/2 - arcsin(x)
return Float4(1.57079632e+0f) - Asin_4_terms(x);
@@ -500,7 +500,7 @@
Float4 reciprocal(RValue<Float4> x, bool pp, bool exactAtPow2)
{
- return Rcp(x, pp ? Precision::Relaxed : Precision::Full, exactAtPow2);
+ return Rcp(x, pp, exactAtPow2);
}
Float4 reciprocalSquareRoot(RValue<Float4> x, bool absolute, bool pp)
@@ -512,7 +512,7 @@
abs = Abs(abs);
}
- return Rcp(abs, pp ? Precision::Relaxed : Precision::Full);
+ return Rcp(abs, pp);
}
Float4 modulo(RValue<Float4> x, RValue<Float4> y)
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp
index ad7a4bc..441e08e 100644
--- a/src/Pipeline/ShaderCore.hpp
+++ b/src/Pipeline/ShaderCore.hpp
@@ -187,8 +187,8 @@
Float4 Sin(RValue<Float4> x);
Float4 Cos(RValue<Float4> x);
Float4 Tan(RValue<Float4> x);
-RValue<Float4> Asin(RValue<Float4> x, rr::Precision p); // TODO(b/169755552): Remove rr::Precision
-RValue<Float4> Acos(RValue<Float4> x, rr::Precision p); // TODO(b/169755552): Remove rr::Precision
+RValue<Float4> Asin(RValue<Float4> x, bool relaxedPrecision);
+RValue<Float4> Acos(RValue<Float4> x, bool relaxedPrecision);
Float4 Atan(RValue<Float4> x);
Float4 Atan2(RValue<Float4> y, RValue<Float4> x);
Float4 Exp2(RValue<Float4> x);
diff --git a/src/Pipeline/SpirvShaderGLSLstd450.cpp b/src/Pipeline/SpirvShaderGLSLstd450.cpp
index 85cc6ff..77f67e3 100644
--- a/src/Pipeline/SpirvShaderGLSLstd450.cpp
+++ b/src/Pipeline/SpirvShaderGLSLstd450.cpp
@@ -624,7 +624,7 @@
ApplyDecorationsForId(&d, insn.word(5));
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Asin(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
+ dst.move(i, sw::Asin(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -635,7 +635,7 @@
ApplyDecorationsForId(&d, insn.word(5));
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Acos(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
+ dst.move(i, sw::Acos(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -775,7 +775,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, RcpSqrt(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
+ dst.move(i, RcpSqrt(val.Float(i), d.RelaxedPrecision));
}
}
break;
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 86799a6..a2781b7 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -3455,13 +3455,13 @@
return RValue<Float4>(V(out));
}
-RValue<Float4> Asin(RValue<Float4> v, Precision p)
+RValue<Float4> Asin(RValue<Float4> v)
{
RR_DEBUG_INFO_UPDATE_LOC();
return TransformFloat4PerElement(v, "asinf");
}
-RValue<Float4> Acos(RValue<Float4> v, Precision p)
+RValue<Float4> Acos(RValue<Float4> v)
{
RR_DEBUG_INFO_UPDATE_LOC();
return TransformFloat4PerElement(v, "acosf");
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index 148a46e..e155b80 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -4662,10 +4662,10 @@
RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2 = false);
template<typename T>
-static RValue<T> DoRcp(RValue<T> x, Precision p, bool exactAtPow2)
+static RValue<T> DoRcp(RValue<T> x, bool relaxedPrecision, bool exactAtPow2)
{
#if defined(__i386__) || defined(__x86_64__) // On x86, 1/x is fast enough, except for lower precision
- bool approx = HasRcpApprox() && (p != Precision::Full);
+ bool approx = HasRcpApprox() && relaxedPrecision;
#else
bool approx = HasRcpApprox();
#endif
@@ -4676,7 +4676,7 @@
{
rcp = RcpApprox(x, exactAtPow2);
- if(p == Precision::Full)
+ if(!relaxedPrecision)
{
// Perform one more iteration of Newton-Rhapson division to increase precision
rcp = (rcp + rcp) - (x * rcp * rcp);
@@ -4690,16 +4690,16 @@
return rcp;
}
-RValue<Float4> Rcp(RValue<Float4> x, Precision p, bool exactAtPow2)
+RValue<Float4> Rcp(RValue<Float4> x, bool relaxedPrecision, bool exactAtPow2)
{
RR_DEBUG_INFO_UPDATE_LOC();
- return DoRcp(x, p, exactAtPow2);
+ return DoRcp(x, relaxedPrecision, exactAtPow2);
}
-RValue<Float> Rcp(RValue<Float> x, Precision p, bool exactAtPow2)
+RValue<Float> Rcp(RValue<Float> x, bool relaxedPrecision, bool exactAtPow2)
{
RR_DEBUG_INFO_UPDATE_LOC();
- return DoRcp(x, p, exactAtPow2);
+ return DoRcp(x, relaxedPrecision, exactAtPow2);
}
// Functions implemented by backends
@@ -4729,10 +4729,10 @@
}
template<typename T>
-static RValue<T> DoRcpSqrt(RValue<T> x, Precision p)
+static RValue<T> DoRcpSqrt(RValue<T> x, bool relaxedPrecision)
{
#if defined(__i386__) || defined(__x86_64__) // On x86, 1/x is fast enough, except for lower precision
- bool approx = HasRcpApprox() && (p != Precision::Full);
+ bool approx = HasRcpApprox() && relaxedPrecision;
#else
bool approx = HasRcpApprox();
#endif
@@ -4743,7 +4743,7 @@
T rsq = RcpSqrtApprox(x);
- if(p == Precision::Full)
+ if(!relaxedPrecision)
{
rsq = rsq * (T(3.0f) - rsq * rsq * x) * T(0.5f);
rsq = As<T>(CmpNEQ(As<IntType>(x), IntType(0x7F800000)) & As<IntType>(rsq));
@@ -4757,14 +4757,14 @@
}
}
-RValue<Float4> RcpSqrt(RValue<Float4> x, Precision p)
+RValue<Float4> RcpSqrt(RValue<Float4> x, bool relaxedPrecision)
{
- return DoRcpSqrt(x, p);
+ return DoRcpSqrt(x, relaxedPrecision);
}
-RValue<Float> RcpSqrt(RValue<Float> x, Precision p)
+RValue<Float> RcpSqrt(RValue<Float> x, bool relaxedPrecision)
{
- return DoRcpSqrt(x, p);
+ return DoRcpSqrt(x, relaxedPrecision);
}
} // namespace rr
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index f99c545..c982de8 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -67,16 +67,6 @@
namespace rr {
-// These generally map to the precision types as specified by the Vulkan specification.
-// See https://www.khronos.org/registry/vulkan/specs/1.2/html/chap37.html#spirvenv-precision-operation
-enum class Precision
-{
- /*Exact,*/ // 0 ULP with correct rounding (i.e. Math.h)
- Full, // Single precision, but not relaxed
- Relaxed, // Single precision, relaxed
- /*Half,*/ // Half precision
-};
-
struct Caps
{
static std::string backendName();
@@ -2181,8 +2171,8 @@
// Deprecated: use RcpSqrt
// TODO(b/147516027): Remove when GLES frontend is removed
RValue<Float> RcpSqrt_pp(RValue<Float> val);
-RValue<Float> Rcp(RValue<Float> x, Precision p = Precision::Full, bool exactAtPow2 = false);
-RValue<Float> RcpSqrt(RValue<Float> x, Precision p = Precision::Full);
+RValue<Float> Rcp(RValue<Float> x, bool relaxedPrecision, bool exactAtPow2 = false);
+RValue<Float> RcpSqrt(RValue<Float> x, bool relaxedPrecision);
RValue<Float> Sqrt(RValue<Float> x);
// RValue<Int4> IsInf(RValue<Float> x);
@@ -2357,8 +2347,8 @@
// Deprecated: use RcpSqrt
// TODO(b/147516027): Remove when GLES frontend is removed
RValue<Float4> RcpSqrt_pp(RValue<Float4> val);
-RValue<Float4> Rcp(RValue<Float4> x, Precision p = Precision::Full, bool exactAtPow2 = false);
-RValue<Float4> RcpSqrt(RValue<Float4> x, Precision p = Precision::Full);
+RValue<Float4> Rcp(RValue<Float4> x, bool relaxedPrecision, bool exactAtPow2 = false);
+RValue<Float4> RcpSqrt(RValue<Float4> x, bool relaxedPrecision);
RValue<Float4> Sqrt(RValue<Float4> x);
RValue<Float4> Insert(RValue<Float4> val, RValue<Float> element, int i);
RValue<Float> Extract(RValue<Float4> x, int i);
@@ -2414,8 +2404,8 @@
RValue<Float4> Sin(RValue<Float4> x);
RValue<Float4> Cos(RValue<Float4> x);
RValue<Float4> Tan(RValue<Float4> x);
-RValue<Float4> Asin(RValue<Float4> x, Precision p);
-RValue<Float4> Acos(RValue<Float4> x, Precision p);
+RValue<Float4> Asin(RValue<Float4> x);
+RValue<Float4> Acos(RValue<Float4> x);
RValue<Float4> Atan(RValue<Float4> x);
RValue<Float4> Sinh(RValue<Float4> x);
RValue<Float4> Cosh(RValue<Float4> x);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 3eaf24f..09d3ff3 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -4379,13 +4379,13 @@
return emulated::Tan(x);
}
-RValue<Float4> Asin(RValue<Float4> x, Precision p)
+RValue<Float4> Asin(RValue<Float4> x)
{
RR_DEBUG_INFO_UPDATE_LOC();
return emulated::Asin(x);
}
-RValue<Float4> Acos(RValue<Float4> x, Precision p)
+RValue<Float4> Acos(RValue<Float4> x)
{
RR_DEBUG_INFO_UPDATE_LOC();
return emulated::Acos(x);
diff --git a/tests/ReactorBenchmarks/ReactorBenchmarks.cpp b/tests/ReactorBenchmarks/ReactorBenchmarks.cpp
index 2b30340..6f42e13 100644
--- a/tests/ReactorBenchmarks/ReactorBenchmarks.cpp
+++ b/tests/ReactorBenchmarks/ReactorBenchmarks.cpp
@@ -32,7 +32,7 @@
BENCHMARK_DEFINE_F(Coroutines, Fibonacci)
(benchmark::State &state)
{
- if(!Caps.CoroutinesSupported)
+ if(!Caps::coroutinesSupported())
{
state.SkipWithError("Coroutines are not supported");
return;
@@ -122,10 +122,8 @@
BENCHMARK_CAPTURE(Transcedental1, rr_Cos, Cos);
BENCHMARK_CAPTURE(Transcedental1, rr_Tan, Tan);
-BENCHMARK_CAPTURE(Transcedental1, rr_Asin_fullp, Asin, Precision::Full);
-BENCHMARK_CAPTURE(Transcedental1, rr_Asin_relaxedp, Asin, Precision::Relaxed);
-BENCHMARK_CAPTURE(Transcedental1, rr_Acos_fullp, Acos, Precision::Full);
-BENCHMARK_CAPTURE(Transcedental1, rr_Acos_relaxedp, Acos, Precision::Relaxed);
+BENCHMARK_CAPTURE(Transcedental1, rr_Asin, Asin);
+BENCHMARK_CAPTURE(Transcedental1, rr_Acos, Acos);
BENCHMARK_CAPTURE(Transcedental1, rr_Atan, Atan);
BENCHMARK_CAPTURE(Transcedental1, rr_Sinh, Sinh);
diff --git a/tests/ReactorUnitTests/ReactorUnitTests.cpp b/tests/ReactorUnitTests/ReactorUnitTests.cpp
index 19056db..5ed94ca 100644
--- a/tests/ReactorUnitTests/ReactorUnitTests.cpp
+++ b/tests/ReactorUnitTests/ReactorUnitTests.cpp
@@ -2973,23 +2973,23 @@
// clang-format off
constexpr float PI = 3.141592653589793f;
INSTANTIATE_TEST_SUITE_P(IntrinsicTestParams_Float4, IntrinsicTest_Float4, testing::Values(
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Sin(v); }, sinf, {0.f, 1.f, PI, 123.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Cos(v); }, cosf, {0.f, 1.f, PI, 123.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Tan(v); }, tanf, {0.f, 1.f, PI, 123.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Asin(v, Precision::Full); }, asinf, {0.f, 1.f, -1.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Acos(v, Precision::Full); }, acosf, {0.f, 1.f, -1.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Atan(v); }, atanf, {0.f, 1.f, PI, 123.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Sinh(v); }, vulkan_sinhf, {0.f, 1.f, PI} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Cosh(v); }, vulkan_coshf, {0.f, 1.f, PI} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Tanh(v); }, tanhf, {0.f, 1.f, PI} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Asinh(v); }, asinhf, {0.f, 1.f, PI, 123.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Acosh(v); }, acoshf, { 1.f, PI, 123.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Atanh(v); }, atanhf, {0.f, 0.9999f, -0.9999f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Exp(v); }, expf, {0.f, 1.f, PI} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Log(v); }, logf, {1.f, PI, 123.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Exp2(v); }, exp2f, {0.f, 1.f, PI, 123.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Log2(v); }, log2f, {1.f, PI, 123.f} },
- IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Sqrt(v); }, sqrtf, {0.f, 1.f, PI, 123.f} }
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Sin(v); }, sinf, {0.f, 1.f, PI, 123.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Cos(v); }, cosf, {0.f, 1.f, PI, 123.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Tan(v); }, tanf, {0.f, 1.f, PI, 123.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Asin(v); }, asinf, {0.f, 1.f, -1.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Acos(v); }, acosf, {0.f, 1.f, -1.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Atan(v); }, atanf, {0.f, 1.f, PI, 123.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Sinh(v); }, vulkan_sinhf, {0.f, 1.f, PI} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Cosh(v); }, vulkan_coshf, {0.f, 1.f, PI} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Tanh(v); }, tanhf, {0.f, 1.f, PI} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Asinh(v); }, asinhf, {0.f, 1.f, PI, 123.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Acosh(v); }, acoshf, { 1.f, PI, 123.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Atanh(v); }, atanhf, {0.f, 0.9999f, -0.9999f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Exp(v); }, expf, {0.f, 1.f, PI} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Log(v); }, logf, {1.f, PI, 123.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Exp2(v); }, exp2f, {0.f, 1.f, PI, 123.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Log2(v); }, log2f, {1.f, PI, 123.f} },
+ IntrinsicTestParams_Float4{ [](RValue<Float4> v) { return rr::Sqrt(v); }, sqrtf, {0.f, 1.f, PI, 123.f} }
));
// clang-format on