Remove duplicate transcendental functions
Functions such as exponential() were considered general purpose but we
practically only needed implementations of transcendental and
trigonometric functions for the corresponding shader operations, so they
ended up being duplicates.
power() was still used in sRGB color space conversion so it has been
replaced with the identical Pow() implementation. It should be optimized
using relaxed precision operations or a tailor made approximation.
Explicit broadcast operations have been removed to improve readability.
Bug: b/169755552
Change-Id: I7b0e25b58fda70ac1ca020b9462e80387d1ba0d6
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/63569
Reviewed-by: Sean Risser <srisser@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 5b5cd5f..5a4a9ba 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -1486,8 +1486,8 @@
Float4 Blitter::LinearToSRGB(const Float4 &c)
{
- Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
- Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
+ Float4 lc = Min(c, 0.0031308f) * 12.92f;
+ Float4 ec = Float4(1.055f) * sw::Pow(c, (1.0f / 2.4f)) - 0.055f;
Float4 s = c;
s.xyz = Max(lc, ec);
@@ -1497,10 +1497,10 @@
Float4 Blitter::sRGBtoLinear(const Float4 &c)
{
- Float4 lc = c * Float4(1.0f / 12.92f);
- Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
+ Float4 lc = c * (1.0f / 12.92f);
+ Float4 ec = sw::Pow((c + 0.055f) * (1.0f / 1.055f), 2.4f);
- Int4 linear = CmpLT(c, Float4(0.04045f));
+ Int4 linear = CmpLT(c, 0.04045f);
Float4 s = c;
s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index e47d6b5..9b07317 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -224,7 +224,7 @@
const Float4 a2(0.0742610f);
const Float4 a3(-0.0187293f);
Float4 absx = Abs(x);
- return As<Float4>(As<Int4>(half_pi - Sqrt(Float4(1.0f) - absx) * (a0 + absx * (a1 + absx * (a2 + absx * a3)))) ^
+ return As<Float4>(As<Int4>(half_pi - Sqrt(1.0f - absx) * (a0 + absx * (a1 + absx * (a2 + absx * a3)))) ^
(As<Int4>(x) & Int4(0x80000000)));
}
@@ -242,7 +242,7 @@
const Float4 a6(0.006700901f);
const Float4 a7(-0.0012624911f);
Float4 absx = Abs(x);
- return As<Float4>(As<Int4>(half_pi - Sqrt(Float4(1.0f) - absx) * (a0 + absx * (a1 + absx * (a2 + absx * (a3 + absx * (a4 + absx * (a5 + absx * (a6 + absx * a7)))))))) ^
+ return As<Float4>(As<Int4>(half_pi - Sqrt(1.0f - absx) * (a0 + absx * (a1 + absx * (a2 + absx * (a3 + absx * (a4 + absx * (a5 + absx * (a6 + absx * a7)))))))) ^
(As<Int4>(x) & Int4(0x80000000)));
}
@@ -262,14 +262,14 @@
RValue<Float4> Acos(RValue<Float4> x, bool relaxedPrecision)
{
// pi/2 - arcsin(x)
- return Float4(1.57079632e+0f) - Asin_4_terms(x);
+ return 1.57079632e+0f - Asin_4_terms(x);
}
Float4 Atan(RValue<Float4> x)
{
Float4 absx = Abs(x);
- Int4 O = CmpNLT(absx, Float4(1.0f));
- Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) | (~O & As<Int4>(absx))); // FIXME: Vector select
+ Int4 O = CmpNLT(absx, 1.0f);
+ Float4 y = As<Float4>((O & As<Int4>(1.0f / absx)) | (~O & As<Int4>(absx))); // FIXME: Vector select
const Float4 half_pi(1.57079632f);
Float4 theta = Atan_01(y);
@@ -285,13 +285,13 @@
const Float4 quarter_pi(7.85398163e-1f); // pi/4
// Rotate to upper semicircle when in lower semicircle
- Int4 S = CmpLT(y, Float4(0.0f));
+ Int4 S = CmpLT(y, 0.0f);
Float4 theta = As<Float4>(S & As<Int4>(minus_pi));
Float4 x0 = As<Float4>((As<Int4>(y) & Int4(0x80000000)) ^ As<Int4>(x));
Float4 y0 = Abs(y);
// Rotate to right quadrant when in left quadrant
- Int4 Q = CmpLT(x0, Float4(0.0f));
+ Int4 Q = CmpLT(x0, 0.0f);
theta += As<Float4>(Q & As<Int4>(half_pi));
Float4 x1 = As<Float4>((Q & As<Int4>(y0)) | (~Q & As<Int4>(x0))); // FIXME: Vector select
Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) | (~Q & As<Int4>(y0))); // FIXME: Vector select
@@ -302,7 +302,7 @@
Float4 y2 = As<Float4>((O & As<Int4>(x1)) | (~O & As<Int4>(y1))); // FIXME: Vector select
// Approximation of atan in [0..1]
- Int4 zero_x = CmpEQ(x2, Float4(0.0f));
+ Int4 zero_x = CmpEQ(x2, 0.0f);
Int4 inf_y = IsInf(y2); // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4
Float4 atan2_theta = Atan_01(y2 / x2);
theta += As<Float4>((~zero_x & ~inf_y & ((O & As<Int4>(half_pi - atan2_theta)) | (~O & (As<Int4>(atan2_theta))))) | // FIXME: Vector select
@@ -318,7 +318,7 @@
// TODO(chromium:1299047)
Float4 Exp2_legacy(RValue<Float4> x0)
{
- Int4 i = RoundInt(x0 - Float4(0.5f));
+ Int4 i = RoundInt(x0 - 0.5f);
Float4 ii = As<Float4>((i + Int4(127)) << 23);
Float4 f = x0 - Float4(i);
@@ -327,7 +327,7 @@
ff = ff * f + As<Float4>(Int4(0x3D64AA23));
ff = ff * f + As<Float4>(Int4(0x3E75EAD4));
ff = ff * f + As<Float4>(Int4(0x3F31727B));
- ff = ff * f + Float4(1.0f);
+ ff = ff * f + 1.0f;
return ii * ff;
}
@@ -386,7 +386,7 @@
x3 = MulAdd(MulAdd(MulAdd(1.6618466e-2f, x0, 2.0350508e-1f), x0, 2.7382900e-1f), x0, 4.0496687e-2f);
x2 /= x3;
- x1 += (x0 - Float4(1.0f)) * x2;
+ x1 += (x0 - 1.0f) * x2;
Int4 pos_inf_x = CmpEQ(As<Int4>(x), Int4(0x7F800000));
return As<Float4>((pos_inf_x & As<Int4>(x)) | (~pos_inf_x & As<Int4>(x1)));
@@ -411,12 +411,12 @@
Float4 Sinh(RValue<Float4> x)
{
- return (sw::Exp(x) - sw::Exp(-x)) * Float4(0.5f);
+ return (sw::Exp(x) - sw::Exp(-x)) * 0.5f;
}
Float4 Cosh(RValue<Float4> x)
{
- return (sw::Exp(x) + sw::Exp(-x)) * Float4(0.5f);
+ return (sw::Exp(x) + sw::Exp(-x)) * 0.5f;
}
Float4 Tanh(RValue<Float4> x)
@@ -428,89 +428,17 @@
Float4 Asinh(RValue<Float4> x)
{
- return sw::Log(x + Sqrt(x * x + Float4(1.0f)));
+ return sw::Log(x + Sqrt(x * x + 1.0f));
}
Float4 Acosh(RValue<Float4> x)
{
- return sw::Log(x + Sqrt(x + Float4(1.0f)) * Sqrt(x - Float4(1.0f)));
+ return sw::Log(x + Sqrt(x + 1.0f) * Sqrt(x - 1.0f));
}
Float4 Atanh(RValue<Float4> x)
{
- return sw::Log((Float4(1.0f) + x) / (Float4(1.0f) - x)) * Float4(0.5f);
-}
-
-Float4 exponential2(RValue<Float4> x, bool pp)
-{
- // This implementation is based on 2^(i + f) = 2^i * 2^f,
- // where i is the integer part of x and f is the fraction.
-
- // For 2^i we can put the integer part directly in the exponent of
- // the IEEE-754 floating-point number. Clamp to prevent overflow
- // past the representation of infinity.
- Float4 x0 = x;
- x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f
- x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f
-
- Int4 i = RoundInt(x0 - Float4(0.5f));
- Float4 ii = As<Float4>((i + Int4(127)) << 23); // Add single-precision bias, and shift into exponent.
-
- // For the fractional part use a polynomial
- // which approximates 2^f in the 0 to 1 range.
- Float4 f = x0 - Float4(i);
- Float4 ff = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f
- ff = ff * f + As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f
- ff = ff * f + As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f
- ff = ff * f + As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f
- ff = ff * f + As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f
- ff = ff * f + Float4(1.0f);
-
- return ii * ff;
-}
-
-Float4 logarithm2(RValue<Float4> x, bool pp)
-{
- Float4 x0;
- Float4 x1;
- Float4 x2;
- Float4 x3;
-
- x0 = x;
-
- x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000));
- x1 = As<Float4>(As<UInt4>(x1) >> 8);
- x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f)));
- x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f;
- x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f)));
-
- x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f);
- x3 = ((Float4(1.6618466e-2f) * x0 + Float4(2.0350508e-1f)) * x0 + Float4(2.7382900e-1f)) * x0 + Float4(4.0496687e-2f);
- x2 /= x3;
-
- x1 += (x0 - Float4(1.0f)) * x2;
-
- Int4 pos_inf_x = CmpEQ(As<Int4>(x), Int4(0x7F800000));
- return As<Float4>((pos_inf_x & As<Int4>(x)) | (~pos_inf_x & As<Int4>(x1)));
-}
-
-Float4 exponential(RValue<Float4> x, bool pp)
-{
- // TODO: Propagate the constant
- return exponential2(Float4(1.44269504f) * x, pp); // 1/ln(2)
-}
-
-Float4 logarithm(RValue<Float4> x, bool pp)
-{
- // TODO: Propagate the constant
- return Float4(6.93147181e-1f) * logarithm2(x, pp); // ln(2)
-}
-
-Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp)
-{
- Float4 log = logarithm2(x, pp);
- log *= y;
- return exponential2(log, pp);
+ return sw::Log((1.0f + x) / (1.0f - x)) * 0.5f;
}
Float4 reciprocal(RValue<Float4> x, bool pp, bool exactAtPow2)
@@ -530,141 +458,6 @@
return Rcp(abs, pp);
}
-Float4 modulo(RValue<Float4> x, RValue<Float4> y)
-{
- return x - y * Floor(x / y);
-}
-
-Float4 arccos(RValue<Float4> x, bool pp)
-{
- // pi/2 - arcsin(x)
- return Float4(1.57079632e+0f) - arcsin(x);
-}
-
-Float4 arcsin(RValue<Float4> x, bool pp)
-{
- if(false) // Simpler implementation fails even lowp precision tests
- {
- // x*(pi/2-sqrt(1-x*x)*pi/5)
- return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x * x) * Float4(6.28318531e-1f));
- }
- else
- {
- // From 4.4.45, page 81 of the Handbook of Mathematical Functions, by Milton Abramowitz and Irene Stegun
- const Float4 half_pi(1.57079632f);
- const Float4 a0(1.5707288f);
- const Float4 a1(-0.2121144f);
- const Float4 a2(0.0742610f);
- const Float4 a3(-0.0187293f);
- Float4 absx = Abs(x);
- return As<Float4>(As<Int4>(half_pi - Sqrt(Float4(1.0f) - absx) * (a0 + absx * (a1 + absx * (a2 + absx * a3)))) ^
- (As<Int4>(x) & Int4(0x80000000)));
- }
-}
-
-// Approximation of atan in [0..1]
-Float4 arctan_01(Float4 x, bool pp)
-{
- if(pp)
- {
- return x * (Float4(-0.27f) * x + Float4(1.05539816f));
- }
- else
- {
- // From 4.4.49, page 81 of the Handbook of Mathematical Functions, by Milton Abramowitz and Irene Stegun
- const Float4 a2(-0.3333314528f);
- const Float4 a4(0.1999355085f);
- const Float4 a6(-0.1420889944f);
- const Float4 a8(0.1065626393f);
- const Float4 a10(-0.0752896400f);
- const Float4 a12(0.0429096138f);
- const Float4 a14(-0.0161657367f);
- const Float4 a16(0.0028662257f);
- Float4 x2 = x * x;
- return (x + x * (x2 * (a2 + x2 * (a4 + x2 * (a6 + x2 * (a8 + x2 * (a10 + x2 * (a12 + x2 * (a14 + x2 * a16)))))))));
- }
-}
-
-Float4 arctan(RValue<Float4> x, bool pp)
-{
- Float4 absx = Abs(x);
- Int4 O = CmpNLT(absx, Float4(1.0f));
- Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / absx)) | (~O & As<Int4>(absx))); // FIXME: Vector select
-
- const Float4 half_pi(1.57079632f);
- Float4 theta = arctan_01(y, pp);
- return As<Float4>(((O & As<Int4>(half_pi - theta)) | (~O & As<Int4>(theta))) ^ // FIXME: Vector select
- (As<Int4>(x) & Int4(0x80000000)));
-}
-
-Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp)
-{
- const Float4 pi(3.14159265f); // pi
- const Float4 minus_pi(-3.14159265f); // -pi
- const Float4 half_pi(1.57079632f); // pi/2
- const Float4 quarter_pi(7.85398163e-1f); // pi/4
-
- // Rotate to upper semicircle when in lower semicircle
- Int4 S = CmpLT(y, Float4(0.0f));
- Float4 theta = As<Float4>(S & As<Int4>(minus_pi));
- Float4 x0 = As<Float4>((As<Int4>(y) & Int4(0x80000000)) ^ As<Int4>(x));
- Float4 y0 = Abs(y);
-
- // Rotate to right quadrant when in left quadrant
- Int4 Q = CmpLT(x0, Float4(0.0f));
- theta += As<Float4>(Q & As<Int4>(half_pi));
- Float4 x1 = As<Float4>((Q & As<Int4>(y0)) | (~Q & As<Int4>(x0))); // FIXME: Vector select
- Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) | (~Q & As<Int4>(y0))); // FIXME: Vector select
-
- // Mirror to first octant when in second octant
- Int4 O = CmpNLT(y1, x1);
- Float4 x2 = As<Float4>((O & As<Int4>(y1)) | (~O & As<Int4>(x1))); // FIXME: Vector select
- Float4 y2 = As<Float4>((O & As<Int4>(x1)) | (~O & As<Int4>(y1))); // FIXME: Vector select
-
- // Approximation of atan in [0..1]
- Int4 zero_x = CmpEQ(x2, Float4(0.0f));
- Int4 inf_y = IsInf(y2); // Since x2 >= y2, this means x2 == y2 == inf, so we use 45 degrees or pi/4
- Float4 atan2_theta = arctan_01(y2 / x2, pp);
- theta += As<Float4>((~zero_x & ~inf_y & ((O & As<Int4>(half_pi - atan2_theta)) | (~O & (As<Int4>(atan2_theta))))) | // FIXME: Vector select
- (inf_y & As<Int4>(quarter_pi)));
-
- // Recover loss of precision for tiny theta angles
- Int4 precision_loss = S & Q & O & ~inf_y; // This combination results in (-pi + half_pi + half_pi - atan2_theta) which is equivalent to -atan2_theta
- return As<Float4>((precision_loss & As<Int4>(-atan2_theta)) | (~precision_loss & As<Int4>(theta))); // FIXME: Vector select
-}
-
-Float4 sineh(RValue<Float4> x, bool pp)
-{
- return (exponential(x, pp) - exponential(-x, pp)) * Float4(0.5f);
-}
-
-Float4 cosineh(RValue<Float4> x, bool pp)
-{
- return (exponential(x, pp) + exponential(-x, pp)) * Float4(0.5f);
-}
-
-Float4 tangenth(RValue<Float4> x, bool pp)
-{
- Float4 e_x = exponential(x, pp);
- Float4 e_minus_x = exponential(-x, pp);
- return (e_x - e_minus_x) / (e_x + e_minus_x);
-}
-
-Float4 arccosh(RValue<Float4> x, bool pp)
-{
- return logarithm(x + Sqrt(x + Float4(1.0f)) * Sqrt(x - Float4(1.0f)), pp);
-}
-
-Float4 arcsinh(RValue<Float4> x, bool pp)
-{
- return logarithm(x + Sqrt(x * x + Float4(1.0f)), pp);
-}
-
-Float4 arctanh(RValue<Float4> x, bool pp)
-{
- return logarithm((Float4(1.0f) + x) / (Float4(1.0f) - x), pp) * Float4(0.5f);
-}
-
void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
{
Int2 tmp0 = UnpackHigh(row0, row1);
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp
index 876e729..e33c4c0 100644
--- a/src/Pipeline/ShaderCore.hpp
+++ b/src/Pipeline/ShaderCore.hpp
@@ -203,26 +203,8 @@
Float4 Acosh(RValue<Float4> x);
Float4 Atanh(RValue<Float4> x);
-// Legacy transcendental functions
-// TODO(b/169755552): Consolidate with the functions above
-Float4 exponential2(RValue<Float4> x, bool pp = false);
-Float4 logarithm2(RValue<Float4> x, bool pp = false);
-Float4 exponential(RValue<Float4> x, bool pp = false);
-Float4 logarithm(RValue<Float4> x, bool pp = false);
-Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
Float4 reciprocal(RValue<Float4> x, bool pp = false, bool exactAtPow2 = false);
Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
-Float4 modulo(RValue<Float4> x, RValue<Float4> y);
-Float4 arccos(RValue<Float4> x, bool pp = false);
-Float4 arcsin(RValue<Float4> x, bool pp = false);
-Float4 arctan(RValue<Float4> x, bool pp = false);
-Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
-Float4 sineh(RValue<Float4> x, bool pp = false);
-Float4 cosineh(RValue<Float4> x, bool pp = false);
-Float4 tangenth(RValue<Float4> x, bool pp = false);
-Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1
-Float4 arcsinh(RValue<Float4> x, bool pp = false);
-Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range
void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
diff --git a/src/Pipeline/SpirvShaderImage.cpp b/src/Pipeline/SpirvShaderImage.cpp
index 7a9b1dd..bc01ab6 100644
--- a/src/Pipeline/SpirvShaderImage.cpp
+++ b/src/Pipeline/SpirvShaderImage.cpp
@@ -76,10 +76,10 @@
sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
{
- sw::SIMD::Float lc = c * sw::SIMD::Float(1.0f / 12.92f);
- sw::SIMD::Float ec = sw::power((c + sw::SIMD::Float(0.055f)) * sw::SIMD::Float(1.0f / 1.055f), sw::SIMD::Float(2.4f));
+ sw::SIMD::Float lc = c * (1.0f / 12.92f);
+ sw::SIMD::Float ec = sw::Pow((c + 0.055f) * (1.0f / 1.055f), 2.4f); // TODO(b/149574741): Use an optimized approximation.
- sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
+ sw::SIMD::Int linear = CmpLT(c, 0.04045f);
return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec))); // TODO: IfThenElse()
}