blob: a6f782990f78701c46c1d5b7611b43612a1a3b0c [file] [log] [blame]
// SwiftShader Software Renderer
//
// Copyright(c) 2005-2011 TransGaming Inc.
//
// All rights reserved. No part of this software may be copied, distributed, transmitted,
// transcribed, stored in a retrieval system, translated into any human or computer
// language by any means, or disclosed to third parties without the explicit written
// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
// or implied, including but not limited to any patent rights, are granted to you.
//
#include "Shell.hpp"
namespace sw
{
TranscendentalPrecision logPrecision = ACCURATE;
TranscendentalPrecision expPrecision = ACCURATE;
TranscendentalPrecision rcpPrecision = ACCURATE;
TranscendentalPrecision rsqPrecision = ACCURATE;
bool perspectiveCorrection = true;
Color4i::Color4i() : x(r), y(g), z(b), w(a)
{
}
Color4i::Color4i(unsigned short red, unsigned short green, unsigned short blue, unsigned short alpha) : x(r), y(g), z(b), w(a)
{
r = Short4(red);
g = Short4(green);
b = Short4(blue);
a = Short4(alpha);
}
Color4i::Color4i(const Color4i &rhs) : x(r), y(g), z(b), w(a)
{
r = rhs.r;
g = rhs.g;
b = rhs.b;
a = rhs.a;
}
Color4i &Color4i::operator=(const Color4i &rhs)
{
r = rhs.r;
g = rhs.g;
b = rhs.b;
a = rhs.a;
return *this;
}
Short4 &Color4i::operator[](int i)
{
switch(i)
{
case 0: return x;
case 1: return y;
case 2: return z;
case 3: return w;
}
return x;
}
Color4f::Color4f() : x(r), y(g), z(b), w(a), u(r), v(g), s(b), t(a)
{
}
Color4f::Color4f(float red, float green, float blue, float alpha) : x(r), y(g), z(b), w(a), u(r), v(g), s(b), t(a)
{
r = Float4(red);
g = Float4(green);
b = Float4(blue);
a = Float4(alpha);
}
Color4f::Color4f(const Color4f &rhs) : x(r), y(g), z(b), w(a), u(r), v(g), s(b), t(a)
{
r = rhs.r;
g = rhs.g;
b = rhs.b;
a = rhs.a;
}
Color4f &Color4f::operator=(const Color4f &rhs)
{
r = rhs.r;
g = rhs.g;
b = rhs.b;
a = rhs.a;
return *this;
}
Float4 &Color4f::operator[](int i)
{
switch(i)
{
case 0: return x;
case 1: return y;
case 2: return z;
case 3: return w;
}
return x;
}
Float4 exponential(Float4 &src, bool pp)
{
Float4 x0;
Float4 x1;
Int4 x2;
x0 = src;
x0 = Min(x0, As<Float4>(Int4(0x43010000, 0x43010000, 0x43010000, 0x43010000))); // 129.00000e+0f
x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF, 0xC2FDFFFF, 0xC2FDFFFF, 0xC2FDFFFF))); // -126.99999e+0f
x1 = x0;
x1 -= Float4(0.5f, 0.5f, 0.5f, 0.5f);
x2 = RoundInt(x1);
x1 = Float4(x2);
x2 += Int4(0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F); // 127
x2 = x2 << 23;
x0 -= x1;
x1 = As<Float4>(Int4(0x3AF61905, 0x3AF61905, 0x3AF61905, 0x3AF61905)); // 1.8775767e-3f
x1 *= x0;
x1 += As<Float4>(Int4(0x3C134806, 0x3C134806, 0x3C134806, 0x3C134806)); // 8.9893397e-3f
x1 *= x0;
x1 += As<Float4>(Int4(0x3D64AA23, 0x3D64AA23, 0x3D64AA23, 0x3D64AA23)); // 5.5826318e-2f
x1 *= x0;
x1 += As<Float4>(Int4(0x3E75EAD4, 0x3E75EAD4, 0x3E75EAD4, 0x3E75EAD4)); // 2.4015361e-1f
x1 *= x0;
x1 += As<Float4>(Int4(0x3F31727B, 0x3F31727B, 0x3F31727B, 0x3F31727B)); // 6.9315308e-1f
x1 *= x0;
x1 += As<Float4>(Int4(0x3F7FFFFF, 0x3F7FFFFF, 0x3F7FFFFF, 0x3F7FFFFF)); // 9.9999994e-1f
x1 *= As<Float4>(x2);
return x1;
}
Float4 logarithm(Float4 &src, bool absolute, bool pp)
{
Float4 x0;
Float4 x1;
Float4 x2;
Float4 x3;
x0 = src;
x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000));
x1 = As<Float4>(As<UInt4>(x1) >> 8);
x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f)));
x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f;
x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f)));
x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f);
x3 = ((Float4(1.6618466e-2f) * x0 + Float4(2.0350508e-1f)) * x0 + Float4(2.7382900e-1f)) * x0 + Float4(4.0496687e-2f);
x2 /= x3;
x1 += (x0 - Float4(1.0f)) * x2;
return x1;
}
Float4 power(Float4 &src0, Float4 &src1, bool pp)
{
Float4 log = logarithm(src0, true, pp);
log *= src1;
return exponential(log, pp);
}
Float4 reciprocal(Float4 &src, bool pp, bool finite)
{
Float4 dst;
if(!pp && rcpPrecision >= WHQL)
{
dst = Float4(1, 1, 1, 1) / src;
}
else
{
dst = Rcp_pp(src);
if(!pp)
{
dst = (dst + dst) - (src * dst * dst);
}
}
if(finite)
{
int x = 0x7F7FFFFF;
dst = Min(dst, Float4((float&)x, (float&)x, (float&)x, (float&)x));
}
return dst;
}
Float4 reciprocalSquareRoot(Float4 &src, bool absolute, bool pp)
{
Float4 abs = src;
if(absolute)
{
abs = Abs(abs);
}
Float4 rsq;
if(!pp && rsqPrecision >= IEEE)
{
rsq = Float4(1.0f, 1.0f, 1.0f, 1.0f) / Sqrt(abs);
}
else
{
rsq = RcpSqrt_pp(abs);
if(!pp)
{
rsq = rsq * (Float4(3.0f) - rsq * rsq * abs) * Float4(0.5f);
}
}
int x = 0x7F7FFFFF;
rsq = Min(rsq, Float4((float&)x, (float&)x, (float&)x, (float&)x));
return rsq;
}
Float4 sine(Float4 &src, bool pp)
{
const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2
const Float4 B = Float4(1.27323954e+0f); // 4/pi
const Float4 C = Float4(7.75160950e-1f);
const Float4 D = Float4(2.24839049e-1f);
// Parabola approximating sine
Float4 sin = src * (Abs(src) * A + B);
// Improve precision from 0.06 to 0.001
if(true)
{
sin = sin * (Abs(sin) * D + C);
}
return sin;
}
Float4 dot3(Color4f &src0, Color4f &src1)
{
return src0.x * src1.x + src0.y * src1.y + src0.z * src1.z;
}
Float4 dot4(Color4f &src0, Color4f &src1)
{
return src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w;
}
void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
{
Int2 tmp0 = UnpackHigh(row0, row1);
Int2 tmp1 = UnpackHigh(row2, row3);
Int2 tmp2 = UnpackLow(row0, row1);
Int2 tmp3 = UnpackLow(row2, row3);
row0 = As<Short4>(UnpackLow(tmp2, tmp3));
row1 = As<Short4>(UnpackHigh(tmp2, tmp3));
row2 = As<Short4>(UnpackLow(tmp0, tmp1));
row3 = As<Short4>(UnpackHigh(tmp0, tmp1));
}
void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
{
Float4 tmp0 = UnpackLow(row0, row1);
Float4 tmp1 = UnpackLow(row2, row3);
Float4 tmp2 = UnpackHigh(row0, row1);
Float4 tmp3 = UnpackHigh(row2, row3);
row0 = Float4(tmp0.xy, tmp1.xy);
row1 = Float4(tmp0.zw, tmp1.zw);
row2 = Float4(tmp2.xy, tmp3.xy);
row3 = Float4(tmp2.zw, tmp3.zw);
}
void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
{
Float4 tmp0 = UnpackLow(row0, row1);
Float4 tmp1 = UnpackLow(row2, row3);
Float4 tmp2 = UnpackHigh(row0, row1);
Float4 tmp3 = UnpackHigh(row2, row3);
row0 = Float4(tmp0.xy, tmp1.xy);
row1 = Float4(tmp0.zw, tmp1.zw);
row2 = Float4(tmp2.xy, tmp3.xy);
}
void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
{
Float4 tmp0 = UnpackLow(row0, row1);
Float4 tmp1 = UnpackLow(row2, row3);
row0 = Float4(tmp0.xy, tmp1.xy);
row1 = Float4(tmp0.zw, tmp1.zw);
}
void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
{
Float4 tmp0 = UnpackLow(row0, row1);
Float4 tmp1 = UnpackLow(row2, row3);
row0 = Float4(tmp0.xy, tmp1.xy);
}
void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
{
row0 = UnpackLow(row0, row1);
row1 = Float4(row0.zw, row1.zw);
row2 = UnpackHigh(row0, row1);
row3 = Float4(row2.zw, row3.zw);
}
void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
{
row0 = UnpackLow(row2, row3);
row1 = Float4(row0.zw, row1.zw);
row2 = UnpackHigh(row2, row3);
row3 = Float4(row2.zw, row3.zw);
}
void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N)
{
switch(N)
{
case 1: transpose4x1(row0, row1, row2, row3); break;
case 2: transpose4x2(row0, row1, row2, row3); break;
case 3: transpose4x3(row0, row1, row2, row3); break;
case 4: transpose4x4(row0, row1, row2, row3); break;
}
}
}