Support specifying math precision through a template argument The use of a Boolean parameter to select relaxed precision is fine for functions such as Asin() which are only called from the SpirvShader code and pass in a verbose d.RelaxedPrecision argument. But functions like Sqrt() are also used in places like the VK_BLEND_OP_SOFTLIGHT_EXT implementation, and Pow() is used in sRGB conversion, where there is no concept of relaxed precision decorations and passing in true or false would not have good readability. To create a reasonably elegant syntax the Highp and Mediump enums have been defined and they can be used as a template argument for these functions. The Vulkan spec states for VK_EXT_blend_operation_advanced that the "blending precision may be limited to 16-bit floating-point", so we can use Sqrt<Mediump>(). Likewise for sRGB conversion Pow<Mediump>() suffices since this non-linear fixed-point encoding only makes sense for relatively low bit width color components. Bug: b/222218659 Change-Id: Id3bc4fa68f38574ff23b125befbac68072d39ee1 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/63768 Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com>

commit: 16e26df6b41b544115be836088f255b15698719d [log] [tgz]
author: Nicolas Capens <capn@google.com> Thu Mar 03 16:50:57 2022 -0500
committer: Nicolas Capens <nicolascapens@google.com> Wed Mar 09 04:19:38 2022 +0000
tree: f4f5aef7f35df54ad5c33f352d71ff04143158cd
parent: ae51739f2fdd880fd95e4ea0e403ce6ec4f4a04a [diff]
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 5a4a9ba..24e90bd 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp

@@ -1487,7 +1487,7 @@
 Float4 Blitter::LinearToSRGB(const Float4 &c)
 {
 	Float4 lc = Min(c, 0.0031308f) * 12.92f;
-	Float4 ec = Float4(1.055f) * sw::Pow(c, (1.0f / 2.4f)) - 0.055f;
+	Float4 ec = Float4(1.055f) * Pow<Mediump>(c, (1.0f / 2.4f)) - 0.055f;  // TODO(b/149574741): Use a custom approximation.
 
 	Float4 s = c;
 	s.xyz = Max(lc, ec);
@@ -1498,7 +1498,7 @@
 Float4 Blitter::sRGBtoLinear(const Float4 &c)
 {
 	Float4 lc = c * (1.0f / 12.92f);
-	Float4 ec = sw::Pow((c + 0.055f) * (1.0f / 1.055f), 2.4f);
+	Float4 ec = Pow<Mediump>((c + 0.055f) * (1.0f / 1.055f), 2.4f);  // TODO(b/149574741): Use a custom approximation.
 
 	Int4 linear = CmpLT(c, 0.04045f);
 

diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 6788340..fb3172d 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp

@@ -2027,7 +2027,7 @@
 	return As<Float4>(
 	    (~largeSrc & As<Int4>(dst - ((1.0f - (2.0f * src)) * dst * (1.0f - dst)))) |
 	    (largeSrc & ((~largeDst & As<Int4>(dst + (((2.0f * src) - 1.0f) * dst * ((((16.0f * dst) - 12.0f) * dst) + 3.0f)))) |
-	                 (largeDst & As<Int4>(dst + (((2.0f * src) - 1.0f) * (Sqrt(dst) - dst)))))));
+	                 (largeDst & As<Int4>(dst + (((2.0f * src) - 1.0f) * (Sqrt<Mediump>(dst) - dst)))))));
 }
 
 Float4 PixelRoutine::maxRGB(Vector4f &c)

diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index 01ec942..dee7a30 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp

@@ -402,7 +402,7 @@
 	return 6.93147181e-1f * sw::Log2(x);  // ln(2)
 }
 
-Float4 Pow(RValue<Float4> x, RValue<Float4> y)
+Float4 Pow(RValue<Float4> x, RValue<Float4> y, bool relaxedPrecision)
 {
 	Float4 log = sw::Log2(x);
 	log *= y;
@@ -441,6 +441,11 @@
 	return sw::Log((1.0f + x) / (1.0f - x)) * 0.5f;
 }
 
+RValue<Float4> Sqrt(RValue<Float4> x, bool relaxedPrecision)
+{
+	return rr::Sqrt(x);  // TODO(b/222218659): Optimize for relaxed precision.
+}
+
 Float4 reciprocal(RValue<Float4> x, bool pp, bool exactAtPow2)
 {
 	return Rcp(x, pp, exactAtPow2);

diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp
index e33c4c0..7f7251d 100644
--- a/src/Pipeline/ShaderCore.hpp
+++ b/src/Pipeline/ShaderCore.hpp

@@ -195,13 +195,33 @@
 Float4 Log2(RValue<Float4> x);
 Float4 Exp(RValue<Float4> x);
 Float4 Log(RValue<Float4> x);
-Float4 Pow(RValue<Float4> x, RValue<Float4> y);
+Float4 Pow(RValue<Float4> x, RValue<Float4> y, bool relaxedPrecision);
 Float4 Sinh(RValue<Float4> x);
 Float4 Cosh(RValue<Float4> x);
 Float4 Tanh(RValue<Float4> x);
 Float4 Asinh(RValue<Float4> x);
 Float4 Acosh(RValue<Float4> x);
 Float4 Atanh(RValue<Float4> x);
+RValue<Float4> Sqrt(RValue<Float4> x, bool relaxedPrecision);
+
+// Math functions with uses outside of shaders can be invoked using a verbose template argument instead
+// of a Boolean argument to indicate precision. For example Sqrt<Mediump>(x) equals Sqrt(x, true).
+enum Precision
+{
+	Highp,
+	Relaxed,
+	Mediump = Relaxed,  // GLSL defines mediump and lowp as corresponding with SPIR-V's RelaxedPrecision
+};
+
+// clang-format off
+template<Precision precision> RValue<Float4> Sqrt(RValue<Float4> x);
+template<> inline RValue<Float4> Sqrt<Highp>(RValue<Float4> x) { return Sqrt(x, false); }
+template<> inline RValue<Float4> Sqrt<Mediump>(RValue<Float4> x) { return Sqrt(x, true); }
+
+template<Precision precision> RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y);
+template<> inline RValue<Float4> Pow<Highp>(RValue<Float4> x, RValue<Float4> y) { return Pow(x, y, false); }
+template<> inline RValue<Float4> Pow<Mediump>(RValue<Float4> x, RValue<Float4> y) { return Pow(x, y, true); }
+// clang-format on
 
 Float4 reciprocal(RValue<Float4> x, bool pp = false, bool exactAtPow2 = false);
 Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);

diff --git a/src/Pipeline/SpirvShaderGLSLstd450.cpp b/src/Pipeline/SpirvShaderGLSLstd450.cpp
index 87205ae..a58ea03 100644
--- a/src/Pipeline/SpirvShaderGLSLstd450.cpp
+++ b/src/Pipeline/SpirvShaderGLSLstd450.cpp

@@ -714,9 +714,11 @@
 		{
 			auto x = Operand(this, state, insn.word(5));
 			auto y = Operand(this, state, insn.word(6));
+			Decorations d = GetDecorationsForId(insn.resultId());
+
 			for(auto i = 0u; i < type.componentCount; i++)
 			{
-				dst.move(i, sw::Pow(x.Float(i), y.Float(i)));
+				dst.move(i, sw::Pow(x.Float(i), y.Float(i), d.RelaxedPrecision));
 			}
 		}
 		break;

diff --git a/src/Pipeline/SpirvShaderImage.cpp b/src/Pipeline/SpirvShaderImage.cpp
index bc01ab6..76ac670 100644
--- a/src/Pipeline/SpirvShaderImage.cpp
+++ b/src/Pipeline/SpirvShaderImage.cpp

@@ -23,6 +23,8 @@
 
 namespace {
 
+using namespace sw;
+
 vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
 {
 	switch(format)
@@ -74,14 +76,14 @@
 	}
 }
 
-sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
+SIMD::Float sRGBtoLinear(SIMD::Float c)
 {
-	sw::SIMD::Float lc = c * (1.0f / 12.92f);
-	sw::SIMD::Float ec = sw::Pow((c + 0.055f) * (1.0f / 1.055f), 2.4f);  // TODO(b/149574741): Use an optimized approximation.
+	SIMD::Float lc = c * (1.0f / 12.92f);
+	SIMD::Float ec = Pow<Mediump>((c + 0.055f) * (1.0f / 1.055f), 2.4f);  // TODO(b/149574741): Use a custom approximation.
 
-	sw::SIMD::Int linear = CmpLT(c, 0.04045f);
+	SIMD::Int linear = CmpLT(c, 0.04045f);
 
-	return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec)));  // TODO: IfThenElse()
+	return rr::As<SIMD::Float>((linear & rr::As<SIMD::Int>(lc)) | (~linear & rr::As<SIMD::Int>(ec)));  // TODO: IfThenElse()
 }
 
 }  // anonymous namespace

diff --git a/tests/PipelineBenchmarks/PipelineBenchmarks.cpp b/tests/PipelineBenchmarks/PipelineBenchmarks.cpp
index 8b5803d..2d063c4 100644
--- a/tests/PipelineBenchmarks/PipelineBenchmarks.cpp
+++ b/tests/PipelineBenchmarks/PipelineBenchmarks.cpp

@@ -20,6 +20,7 @@
 #include <vector>
 
 using namespace rr;
+using namespace sw;
 
 BENCHMARK_MAIN();
 
@@ -106,11 +107,11 @@
 BENCHMARK_CAPTURE(Transcendental1, sw_Tan, sw::Tan)->Arg(REPS);
 
 BENCHMARK_CAPTURE(Transcendental1, rr_Asin, rr::Asin)->Arg(REPS);
-BENCHMARK_CAPTURE(Transcendental1, sw_Asin_highpp, sw::Asin, false /* relaxedPrecision */)->Arg(REPS);
-BENCHMARK_CAPTURE(Transcendental1, sw_Asin_relaxedp, sw::Asin, true /* relaxedPrecision */)->Arg(REPS);
+BENCHMARK_CAPTURE(Transcendental1, sw_Asin_highp, sw::Asin, false /* relaxedPrecision */)->Arg(REPS);
+BENCHMARK_CAPTURE(Transcendental1, sw_Asin_mediump, sw::Asin, true /* relaxedPrecision */)->Arg(REPS);
 BENCHMARK_CAPTURE(Transcendental1, rr_Acos, rr::Acos)->Arg(REPS);
 BENCHMARK_CAPTURE(Transcendental1, sw_Acos_highp, sw::Acos, false /* relaxedPrecision */)->Arg(REPS);
-BENCHMARK_CAPTURE(Transcendental1, sw_Acos_relaxedp, sw::Acos, true /* relaxedPrecision */)->Arg(REPS);
+BENCHMARK_CAPTURE(Transcendental1, sw_Acos_mediump, sw::Acos, true /* relaxedPrecision */)->Arg(REPS);
 
 BENCHMARK_CAPTURE(Transcendental1, rr_Atan, rr::Atan)->Arg(REPS);
 BENCHMARK_CAPTURE(Transcendental1, sw_Atan, sw::Atan)->Arg(REPS);
@@ -131,7 +132,8 @@
 BENCHMARK_CAPTURE(Transcendental2, sw_Atan2, sw::Atan2)->Arg(REPS);
 
 BENCHMARK_CAPTURE(Transcendental2, rr_Pow, rr::Pow)->Arg(REPS);
-BENCHMARK_CAPTURE(Transcendental2, sw_Pow, sw::Pow)->Arg(REPS);
+BENCHMARK_CAPTURE(Transcendental2, sw_Pow_highp, sw::Pow<Highp>)->Arg(REPS);
+BENCHMARK_CAPTURE(Transcendental2, sw_Pow_mediump, sw::Pow<Mediump>)->Arg(REPS);
 BENCHMARK_CAPTURE(Transcendental1, rr_Exp, rr::Exp)->Arg(REPS);
 BENCHMARK_CAPTURE(Transcendental1, sw_Exp, sw::Exp)->Arg(REPS);
 BENCHMARK_CAPTURE(Transcendental1, rr_Log, rr::Log)->Arg(REPS);
commit	16e26df6b41b544115be836088f255b15698719d	[log] [tgz]
author	Nicolas Capens <capn@google.com>	Thu Mar 03 16:50:57 2022 -0500
committer	Nicolas Capens <nicolascapens@google.com>	Wed Mar 09 04:19:38 2022 +0000
tree	f4f5aef7f35df54ad5c33f352d71ff04143158cd
parent	ae51739f2fdd880fd95e4ea0e403ce6ec4f4a04a [diff]