Remove unused dot operations and clarify FMA dot2/dot3/dot4 were unused and are not useful for the SPIR-V OpDot instruction which supports a variable number of components: https://www.khronos.org/registry/SPIR-V/specs/unified1/SPIRV.html#OpDot Comments for FMA() have been updated to reflect that it may or may not fuse the multiplication and addition. This corresponds with the behavior of the Fma instruction in SPIR-V and Vulkan: https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#spirvenv-precision-operation Note that an FMA() intrinsic in Reactor will always represent a fused instruction, and a MulAdd() should be used to implement SPIR-V's Fma. Bug: b/214588983 Change-Id: If9dd76dd2f49c2d0bd9946497297177b3f445dce Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/61368 Kokoro-Result: kokoro <noreply+kokoro@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Sean Risser <srisser@google.com>
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp index 55cb390..38d590d 100644 --- a/src/Pipeline/ShaderCore.cpp +++ b/src/Pipeline/ShaderCore.cpp
@@ -454,21 +454,6 @@ return logarithm((Float4(1.0f) + x) / (Float4(1.0f) - x), pp) * Float4(0.5f); } -Float4 dot2(const Vector4f &v0, const Vector4f &v1) -{ - return v0.x * v1.x + v0.y * v1.y; -} - -Float4 dot3(const Vector4f &v0, const Vector4f &v1) -{ - return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; -} - -Float4 dot4(const Vector4f &v0, const Vector4f &v1) -{ - return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w; -} - void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3) { Int2 tmp0 = UnpackHigh(row0, row1); @@ -683,12 +668,13 @@ return NthBit32(bitCount) - sw::SIMD::UInt(1); } -// Performs a fused-multiply add, returning a * b + c. +// Computes `a * b + c`, which may be fused into one operation to produce a higher-precision result. rr::RValue<sw::SIMD::Float> FMA( rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c) { + // TODO(b/214591655): Use FMA when available. return a * b + c; }
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp index e90c4a1..8817d06 100644 --- a/src/Pipeline/ShaderCore.hpp +++ b/src/Pipeline/ShaderCore.hpp
@@ -207,10 +207,6 @@ Float4 arcsinh(RValue<Float4> x, bool pp = false); Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range -Float4 dot2(const Vector4f &v0, const Vector4f &v1); -Float4 dot3(const Vector4f &v0, const Vector4f &v1); -Float4 dot4(const Vector4f &v0, const Vector4f &v1); - void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); @@ -252,7 +248,7 @@ // Returns bitCount number of of 1's starting from the LSB. rr::RValue<sw::SIMD::UInt> Bitmask32(rr::RValue<sw::SIMD::UInt> const &bitCount); -// Performs a fused-multiply add, returning a * b + c. +// Computes `a * b + c`, which may be fused into one operation to produce a higher-precision result. rr::RValue<sw::SIMD::Float> FMA( rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,