Remove unused dot operations and clarify FMA dot2/dot3/dot4 were unused and are not useful for the SPIR-V OpDot instruction which supports a variable number of components: https://www.khronos.org/registry/SPIR-V/specs/unified1/SPIRV.html#OpDot Comments for FMA() have been updated to reflect that it may or may not fuse the multiplication and addition. This corresponds with the behavior of the Fma instruction in SPIR-V and Vulkan: https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#spirvenv-precision-operation Note that an FMA() intrinsic in Reactor will always represent a fused instruction, and a MulAdd() should be used to implement SPIR-V's Fma. Bug: b/214588983 Change-Id: If9dd76dd2f49c2d0bd9946497297177b3f445dce Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/61368 Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Sean Risser <srisser@google.com>

commit: d0fed5ae6c63d39b4ee2097d0c0cdb8ce3aba9a7 [log] [tgz]
author: Nicolas Capens <capn@google.com> Mon Jan 10 09:51:12 2022 -0500
committer: Nicolas Capens <nicolascapens@google.com> Tue Jan 18 15:33:58 2022 +0000
tree: 354bf4db476ec3bb4678899e405d6f00b90a0b84
parent: 9b1a72a8d2266960a0f036a39c644314c52877d3 [diff]
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index 55cb390..38d590d 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp

@@ -454,21 +454,6 @@
 	return logarithm((Float4(1.0f) + x) / (Float4(1.0f) - x), pp) * Float4(0.5f);
 }
 
-Float4 dot2(const Vector4f &v0, const Vector4f &v1)
-{
-	return v0.x * v1.x + v0.y * v1.y;
-}
-
-Float4 dot3(const Vector4f &v0, const Vector4f &v1)
-{
-	return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
-}
-
-Float4 dot4(const Vector4f &v0, const Vector4f &v1)
-{
-	return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w;
-}
-
 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
 {
 	Int2 tmp0 = UnpackHigh(row0, row1);
@@ -683,12 +668,13 @@
 	return NthBit32(bitCount) - sw::SIMD::UInt(1);
 }
 
-// Performs a fused-multiply add, returning a * b + c.
+// Computes `a * b + c`, which may be fused into one operation to produce a higher-precision result.
 rr::RValue<sw::SIMD::Float> FMA(
     rr::RValue<sw::SIMD::Float> const &a,
     rr::RValue<sw::SIMD::Float> const &b,
     rr::RValue<sw::SIMD::Float> const &c)
 {
+	// TODO(b/214591655): Use FMA when available.
 	return a * b + c;
 }
 

diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp
index e90c4a1..8817d06 100644
--- a/src/Pipeline/ShaderCore.hpp
+++ b/src/Pipeline/ShaderCore.hpp

@@ -207,10 +207,6 @@
 Float4 arcsinh(RValue<Float4> x, bool pp = false);
 Float4 arctanh(RValue<Float4> x, bool pp = false);  // Limited to ]-1, 1[ range
 
-Float4 dot2(const Vector4f &v0, const Vector4f &v1);
-Float4 dot3(const Vector4f &v0, const Vector4f &v1);
-Float4 dot4(const Vector4f &v0, const Vector4f &v1);
-
 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
 void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
@@ -252,7 +248,7 @@
 // Returns bitCount number of of 1's starting from the LSB.
 rr::RValue<sw::SIMD::UInt> Bitmask32(rr::RValue<sw::SIMD::UInt> const &bitCount);
 
-// Performs a fused-multiply add, returning a * b + c.
+// Computes `a * b + c`, which may be fused into one operation to produce a higher-precision result.
 rr::RValue<sw::SIMD::Float> FMA(
     rr::RValue<sw::SIMD::Float> const &a,
     rr::RValue<sw::SIMD::Float> const &b,
commit	d0fed5ae6c63d39b4ee2097d0c0cdb8ce3aba9a7	[log] [tgz]
author	Nicolas Capens <capn@google.com>	Mon Jan 10 09:51:12 2022 -0500
committer	Nicolas Capens <nicolascapens@google.com>	Tue Jan 18 15:33:58 2022 +0000
tree	354bf4db476ec3bb4678899e405d6f00b90a0b84
parent	9b1a72a8d2266960a0f036a39c644314c52877d3 [diff]