Improve the precision and performance of highp exp2()
This algorithm is based on the relaxed precision exp2() algorithm, with
a higher degree polynomial and using integer addition to correct the
exponent bias.
It eliminates a shift operation and loading a constant.
The code for the relaxed precision exp2() unit test was refactored for
readability.
Bug: b/169754022
Change-Id: Ie103484f88d7f324a4d6dbd4ba7e9893c5a0e2d7
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/64088
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index 832ce0f..1ce667f 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -349,24 +349,22 @@
if(!relaxedPrecision) // highp
{
- // This implementation is based on 2^(i + f) = 2^i * 2^f,
- // where i is the integer part of x and f is the fraction.
-
- // For 2^i we can put the integer part directly in the exponent of the IEEE-754 floating-point number.
- Int4 i = Int4(xi);
- Float4 ii = As<Float4>((i + Int4(127)) << 23); // Add single-precision bias, and shift into exponent.
-
- // For the fractional part use a polynomial which approximates 2^f in the 0 to 1 range.
- // To be exact at integers it uses the form f(x) * x + 1.
+ // Polynomial which approximates (2^x-x-1)/x. Multiplying with x
+ // gives us a correction term to be added to 1+x to obtain 2^x.
const Float4 a = 1.8852974e-3f;
const Float4 b = 8.9733787e-3f;
const Float4 c = 5.5835927e-2f;
const Float4 d = 2.4015281e-1f;
- const Float4 e = 6.9315247e-1f;
+ const Float4 e = -3.0684753e-1f;
- Float4 ff = MulAdd(MulAdd(MulAdd(MulAdd(MulAdd(a, f, b), f, c), f, d), f, e), f, 1.0f);
+ Float4 r = MulAdd(MulAdd(MulAdd(MulAdd(a, f, b), f, c), f, d), f, e);
- return ii * ff;
+ // bit_cast<float>(int(x * 2^23)) is a piecewise linear approximation of 2^x.
+ // See "Fast Exponential Computation on SIMD Architectures" by Malossi et al.
+ Float4 y = MulAdd(r, f, x0);
+ Int4 i = Int4(y * (1 << 23)) + (127 << 23);
+
+ return As<Float4>(i);
}
else // RelaxedPrecision / mediump
{
diff --git a/tests/MathUnitTests/unittests.cpp b/tests/MathUnitTests/unittests.cpp
index 1873d90..a4f497e 100644
--- a/tests/MathUnitTests/unittests.cpp
+++ b/tests/MathUnitTests/unittests.cpp
@@ -136,9 +136,9 @@
CPUID::setFlushToZero(false);
}
-// lolremez --float - d 2 - r "0:1" "(2^x-x-1)/x" "1/x"
+// lolremez --float -d 2 -r "0:1" "(2^x-x-1)/x" "1/x"
// ULP-16: 0.130859017
-float P(float x)
+float Pr(float x)
{
float u = 7.8145574e-2f;
u = u * x + 2.2617357e-1f;
@@ -147,18 +147,17 @@
float Exp2Relaxed(float x)
{
- float x0 = x;
- x0 = min(x0, 128.0f);
- x0 = max(x0, bit_cast<float>(int(0xC2FDFFFF))); // -126.999992
-
- const float f = x0 - floor(x0);
+ x = min(x, 128.0f);
+ x = max(x, bit_cast<float>(int(0xC2FDFFFF))); // -126.999992
// 2^f - f - 1 as P(f) * f
- float y = P(f) * f + x0;
+ // This is a correction term to be added to 1+x to obtain 2^x.
+ float f = x - floor(x);
+ float y = Pr(f) * f + x;
- int i = (int)((1 << 23) * y + (127 << 23));
-
- return bit_cast<float>(i);
+ // bit_cast<float>(int(x * 2^23)) is a piecewise linear approximation of 2^(x-127).
+ // See "Fast Exponential Computation on SIMD Architectures" by Malossi et al.
+ return bit_cast<float>(int((1 << 23) * y + (127 << 23)));
}
TEST(MathTest, Exp2RelaxedExhaustive)
@@ -316,39 +315,30 @@
return ii * ff;
}
-// lolremez --float -d 4 -r "0:1" "(2^x-1)/x" "1/x"
-// ULP_32: 2.65837669, Vulkan margin: 0.847366512
-float f_r(float x)
+// lolremez --float -d 4 -r "0:1" "(2^x-x-1)/x" "1/x"
+// ULP_32: 2.14694786, Vulkan margin: 0.686957061
+float P(float x)
{
float u = 1.8852974e-3f;
u = u * x + 8.9733787e-3f;
u = u * x + 5.5835927e-2f;
u = u * x + 2.4015281e-1f;
- return u * x + 6.9315247e-1f;
+ return u * x + -3.0684753e-1f;
}
float Exp2(float x)
{
- // This implementation is based on 2^(i + f) = 2^i * 2^f,
- // where i is the integer part of x and f is the fraction.
+ x = min(x, 128.0f);
+ x = max(x, bit_cast<float>(0xC2FDFFFF)); // -126.999992
- // For 2^i we can put the integer part directly in the exponent of
- // the IEEE-754 floating-point number. Clamp to prevent overflow
- // past the representation of infinity.
- float x0 = x;
- x0 = min(x0, bit_cast<float>(int(0x4300FFFF))); // 128.999985
- x0 = max(x0, bit_cast<float>(int(0xC2FDFFFF))); // -126.999992
+ // 2^f - f - 1 as P(f) * f
+ // This is a correction term to be added to 1+x to obtain 2^x.
+ float f = x - floor(x);
+ float y = P(f) * f + x;
- float xi = floor(x0);
- int i = int(xi);
- float ii = bit_cast<float>((i + int(127)) << 23); // Add single-precision bias, and shift into exponent.
-
- // For the fractional part use a polynomial which approximates 2^f in the 0 to 1 range.
- // To be exact at integers it uses the form f(x) * x + 1.
- float f = x0 - xi;
- float ff = f_r(f) * f + 1.0f;
-
- return ii * ff;
+ // bit_cast<float>(int(x * 2^23)) is a piecewise linear approximation of 2^(x-127).
+ // See "Fast Exponential Computation on SIMD Architectures" by Malossi et al.
+ return bit_cast<float>(int(y * (1 << 23)) + (127 << 23));
}
TEST(MathTest, Exp2Exhaustive)