Remove unused dot operations and clarify FMA
dot2/dot3/dot4 were unused and are not useful for the SPIR-V OpDot
instruction which supports a variable number of components:
https://www.khronos.org/registry/SPIR-V/specs/unified1/SPIRV.html#OpDot
Comments for FMA() have been updated to reflect that it may or may not
fuse the multiplication and addition. This corresponds with the behavior
of the Fma instruction in SPIR-V and Vulkan:
https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html
https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#spirvenv-precision-operation
Note that an FMA() intrinsic in Reactor will always represent a fused
instruction, and a MulAdd() should be used to implement SPIR-V's Fma.
Bug: b/214588983
Change-Id: If9dd76dd2f49c2d0bd9946497297177b3f445dce
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/61368
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Sean Risser <srisser@google.com>
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index 55cb390..38d590d 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -454,21 +454,6 @@
return logarithm((Float4(1.0f) + x) / (Float4(1.0f) - x), pp) * Float4(0.5f);
}
-Float4 dot2(const Vector4f &v0, const Vector4f &v1)
-{
- return v0.x * v1.x + v0.y * v1.y;
-}
-
-Float4 dot3(const Vector4f &v0, const Vector4f &v1)
-{
- return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
-}
-
-Float4 dot4(const Vector4f &v0, const Vector4f &v1)
-{
- return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w;
-}
-
void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
{
Int2 tmp0 = UnpackHigh(row0, row1);
@@ -683,12 +668,13 @@
return NthBit32(bitCount) - sw::SIMD::UInt(1);
}
-// Performs a fused-multiply add, returning a * b + c.
+// Computes `a * b + c`, which may be fused into one operation to produce a higher-precision result.
rr::RValue<sw::SIMD::Float> FMA(
rr::RValue<sw::SIMD::Float> const &a,
rr::RValue<sw::SIMD::Float> const &b,
rr::RValue<sw::SIMD::Float> const &c)
{
+ // TODO(b/214591655): Use FMA when available.
return a * b + c;
}
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp
index e90c4a1..8817d06 100644
--- a/src/Pipeline/ShaderCore.hpp
+++ b/src/Pipeline/ShaderCore.hpp
@@ -207,10 +207,6 @@
Float4 arcsinh(RValue<Float4> x, bool pp = false);
Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range
-Float4 dot2(const Vector4f &v0, const Vector4f &v1);
-Float4 dot3(const Vector4f &v0, const Vector4f &v1);
-Float4 dot4(const Vector4f &v0, const Vector4f &v1);
-
void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
@@ -252,7 +248,7 @@
// Returns bitCount number of of 1's starting from the LSB.
rr::RValue<sw::SIMD::UInt> Bitmask32(rr::RValue<sw::SIMD::UInt> const &bitCount);
-// Performs a fused-multiply add, returning a * b + c.
+// Computes `a * b + c`, which may be fused into one operation to produce a higher-precision result.
rr::RValue<sw::SIMD::Float> FMA(
rr::RValue<sw::SIMD::Float> const &a,
rr::RValue<sw::SIMD::Float> const &b,