blob: be9e2204fd36a61183cd8bfc780b9b2e475ada32 [file] [log] [blame]
// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef sw_ShaderCore_hpp
#define sw_ShaderCore_hpp
#include "Shader.hpp"
#include "Reactor/Reactor.hpp"
#include "Reactor/Print.hpp"
#include "Common/Debug.hpp"
namespace sw
{
using namespace rr;
class Vector4s
{
public:
Vector4s();
Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
Vector4s(const Vector4s &rhs);
Short4 &operator[](int i);
Vector4s &operator=(const Vector4s &rhs);
Short4 x;
Short4 y;
Short4 z;
Short4 w;
};
class Vector4f
{
public:
Vector4f();
Vector4f(float x, float y, float z, float w);
Vector4f(const Vector4f &rhs);
Float4 &operator[](int i);
Vector4f &operator=(const Vector4f &rhs);
Float4 x;
Float4 y;
Float4 z;
Float4 w;
};
Float4 exponential2(RValue<Float4> x, bool pp = false);
Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false);
Float4 exponential(RValue<Float4> x, bool pp = false);
Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false);
Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
Float4 modulo(RValue<Float4> x, RValue<Float4> y);
Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
Float4 sine(RValue<Float4> x, bool pp = false);
Float4 cosine(RValue<Float4> x, bool pp = false);
Float4 tangent(RValue<Float4> x, bool pp = false);
Float4 arccos(RValue<Float4> x, bool pp = false);
Float4 arcsin(RValue<Float4> x, bool pp = false);
Float4 arctan(RValue<Float4> x, bool pp = false);
Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
Float4 sineh(RValue<Float4> x, bool pp = false);
Float4 cosineh(RValue<Float4> x, bool pp = false);
Float4 tangenth(RValue<Float4> x, bool pp = false);
Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1
Float4 arcsinh(RValue<Float4> x, bool pp = false);
Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range
Float4 dot2(const Vector4f &v0, const Vector4f &v1);
Float4 dot3(const Vector4f &v0, const Vector4f &v1);
Float4 dot4(const Vector4f &v0, const Vector4f &v1);
void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
class Register
{
public:
Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w)
{
}
Register(const Register &rhs) = default;
Reference<Float4> &operator[](int i)
{
switch(i)
{
default:
case 0: return x;
case 1: return y;
case 2: return z;
case 3: return w;
}
}
Register &operator=(const Register &rhs) = default;
Register &operator=(const Vector4f &rhs)
{
x = rhs.x;
y = rhs.y;
z = rhs.z;
w = rhs.w;
return *this;
}
operator Vector4f()
{
Vector4f v;
v.x = x;
v.y = y;
v.z = z;
v.w = w;
return v;
}
Reference<Float4> x;
Reference<Float4> y;
Reference<Float4> z;
Reference<Float4> w;
};
class RegisterFile
{
public:
RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable)
{
if(indirectAddressable)
{
x = new Array<Float4>(size);
y = new Array<Float4>(size);
z = new Array<Float4>(size);
w = new Array<Float4>(size);
}
else
{
x = new Array<Float4>[size];
y = new Array<Float4>[size];
z = new Array<Float4>[size];
w = new Array<Float4>[size];
}
}
~RegisterFile()
{
if(indirectAddressable)
{
delete x;
delete y;
delete z;
delete w;
}
else
{
delete[] x;
delete[] y;
delete[] z;
delete[] w;
}
}
Register operator[](int i)
{
ASSERT(i < size);
if(indirectAddressable)
{
return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
}
else
{
return Register(x[i][0], y[i][0], z[i][0], w[i][0]);
}
}
Register operator[](RValue<Int> i)
{
ASSERT(indirectAddressable);
return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
}
const Vector4f operator[](RValue<Int4> i); // Gather operation (read only).
void scatter_x(Int4 i, RValue<Float4> r);
void scatter_y(Int4 i, RValue<Float4> r);
void scatter_z(Int4 i, RValue<Float4> r);
void scatter_w(Int4 i, RValue<Float4> r);
protected:
const int size;
const bool indirectAddressable;
Array<Float4> *x;
Array<Float4> *y;
Array<Float4> *z;
Array<Float4> *w;
};
template<int S, bool I = false>
class RegisterArray : public RegisterFile
{
public:
RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable)
{
}
};
class ShaderCore
{
typedef Shader::Control Control;
public:
void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false);
void neg(Vector4f &dst, const Vector4f &src);
void ineg(Vector4f &dst, const Vector4f &src);
void f2b(Vector4f &dst, const Vector4f &src);
void b2f(Vector4f &dst, const Vector4f &src);
void f2i(Vector4f &dst, const Vector4f &src);
void i2f(Vector4f &dst, const Vector4f &src);
void f2u(Vector4f &dst, const Vector4f &src);
void u2f(Vector4f &dst, const Vector4f &src);
void i2b(Vector4f &dst, const Vector4f &src);
void b2i(Vector4f &dst, const Vector4f &src);
void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false);
void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false);
void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false);
void rsq(Vector4f &dst, const Vector4f &src, bool pp = false);
void len2(Float4 &dst, const Vector4f &src, bool pp = false);
void len3(Float4 &dst, const Vector4f &src, bool pp = false);
void len4(Float4 &dst, const Vector4f &src, bool pp = false);
void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false);
void exp2(Vector4f &dst, const Vector4f &src, bool pp = false);
void exp(Vector4f &dst, const Vector4f &src, bool pp = false);
void log2x(Vector4f &dst, const Vector4f &src, bool pp = false);
void log2(Vector4f &dst, const Vector4f &src, bool pp = false);
void log(Vector4f &dst, const Vector4f &src, bool pp = false);
void lit(Vector4f &dst, const Vector4f &src);
void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void isinf(Vector4f &dst, const Vector4f &src);
void isnan(Vector4f &dst, const Vector4f &src);
void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void packHalf2x16(Vector4f &dst, const Vector4f &src);
void unpackHalf2x16(Vector4f &dst, const Vector4f &src);
void packSnorm2x16(Vector4f &dst, const Vector4f &src);
void packUnorm2x16(Vector4f &dst, const Vector4f &src);
void unpackSnorm2x16(Vector4f &dst, const Vector4f &src);
void unpackUnorm2x16(Vector4f &dst, const Vector4f &src);
void frc(Vector4f &dst, const Vector4f &src);
void trunc(Vector4f &dst, const Vector4f &src);
void floor(Vector4f &dst, const Vector4f &src);
void round(Vector4f &dst, const Vector4f &src);
void roundEven(Vector4f &dst, const Vector4f &src);
void ceil(Vector4f &dst, const Vector4f &src);
void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
void sgn(Vector4f &dst, const Vector4f &src);
void isgn(Vector4f &dst, const Vector4f &src);
void abs(Vector4f &dst, const Vector4f &src);
void iabs(Vector4f &dst, const Vector4f &src);
void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false);
void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false);
void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false);
void sincos(Vector4f &dst, const Vector4f &src, bool pp = false);
void cos(Vector4f &dst, const Vector4f &src, bool pp = false);
void sin(Vector4f &dst, const Vector4f &src, bool pp = false);
void tan(Vector4f &dst, const Vector4f &src, bool pp = false);
void acos(Vector4f &dst, const Vector4f &src, bool pp = false);
void asin(Vector4f &dst, const Vector4f &src, bool pp = false);
void atan(Vector4f &dst, const Vector4f &src, bool pp = false);
void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
void cosh(Vector4f &dst, const Vector4f &src, bool pp = false);
void sinh(Vector4f &dst, const Vector4f &src, bool pp = false);
void tanh(Vector4f &dst, const Vector4f &src, bool pp = false);
void acosh(Vector4f &dst, const Vector4f &src, bool pp = false);
void asinh(Vector4f &dst, const Vector4f &src, bool pp = false);
void atanh(Vector4f &dst, const Vector4f &src, bool pp = false);
void expp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
void logp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1);
void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index);
void all(Float4 &dst, const Vector4f &src);
void any(Float4 &dst, const Vector4f &src);
void bitwise_not(Vector4f &dst, const Vector4f &src);
void bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
private:
void sgn(Float4 &dst, const Float4 &src);
void isgn(Float4 &dst, const Float4 &src);
void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits);
void halfToFloatBits(Float4& dst, const Float4& halfBits);
};
}
#ifdef ENABLE_RR_PRINT
namespace rr {
template <> struct PrintValue::Ty<sw::Vector4f>
{
static std::string fmt(const sw::Vector4f& v)
{
return "[x: " + PrintValue::fmt(v.x) +
", y: " + PrintValue::fmt(v.y) +
", z: " + PrintValue::fmt(v.z) +
", w: " + PrintValue::fmt(v.w) + "]";
}
static std::vector<rr::Value*> val(const sw::Vector4f& v)
{
return PrintValue::vals(v.x, v.y, v.z, v.w);
}
};
template <> struct PrintValue::Ty<sw::Vector4s>
{
static std::string fmt(const sw::Vector4s& v)
{
return "[x: " + PrintValue::fmt(v.x) +
", y: " + PrintValue::fmt(v.y) +
", z: " + PrintValue::fmt(v.z) +
", w: " + PrintValue::fmt(v.w) + "]";
}
static std::vector<rr::Value*> val(const sw::Vector4s& v)
{
return PrintValue::vals(v.x, v.y, v.z, v.w);
}
};
}
#endif // ENABLE_RR_PRINT
#endif // sw_ShaderCore_hpp