Optimize log2() polynomial evaluation using FMA
Bug: b/216472189
Bug: b/169754022
Change-Id: I514a972ae62fdebb4dd89c1bf0a067932effb71e
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/63568
Reviewed-by: Sean Risser <srisser@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index a3a9d6f..e47d6b5 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -379,11 +379,11 @@
x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000));
x1 = As<Float4>(As<UInt4>(x1) >> 8);
x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f)));
- x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f;
+ x1 = (x1 - 1.4960938f) * 256.0f;
x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f)));
- x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f);
- x3 = ((Float4(1.6618466e-2f) * x0 + Float4(2.0350508e-1f)) * x0 + Float4(2.7382900e-1f)) * x0 + Float4(4.0496687e-2f);
+ x2 = MulAdd(MulAdd(9.5428179e-2f, x0, 4.7779095e-1f), x0, 1.9782813e-1f);
+ x3 = MulAdd(MulAdd(MulAdd(1.6618466e-2f, x0, 2.0350508e-1f), x0, 2.7382900e-1f), x0, 4.0496687e-2f);
x2 /= x3;
x1 += (x0 - Float4(1.0f)) * x2;