Matrix determinant and inverse implementation
Implementation for determinant has been done directly in
ShaderCore in order to avoid having to allocate temporaries
manually in OutputASM.
For now, the implementation for the inverse matrix is very
simple, i.e., it doesn't attempt to re-use results from the
cofactor matrix computation to compute the determinant or
do any other kind of optimization, but it works.
Change-Id: I0fc70133809ae2752dc567bf58b60d7af7a88009
Reviewed-on: https://swiftshader-review.googlesource.com/4000
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp
index df629d9..aca41bd 100644
--- a/src/Shader/PixelProgram.cpp
+++ b/src/Shader/PixelProgram.cpp
@@ -171,6 +171,9 @@
case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break;
case Shader::OPCODE_DP3: dp3(d, s0, s1); break;
case Shader::OPCODE_DP4: dp4(d, s0, s1); break;
+ case Shader::OPCODE_DET2: det2(d, s0, s1); break;
+ case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break;
+ case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break;
case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break;
case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break;
case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break;
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp
index 11c3f20..f92ee73 100644
--- a/src/Shader/Shader.cpp
+++ b/src/Shader/Shader.cpp
@@ -764,6 +764,9 @@
case OPCODE_DIST4: return "dist4";
case OPCODE_DP3: return "dp3";
case OPCODE_DP4: return "dp4";
+ case OPCODE_DET2: return "det2";
+ case OPCODE_DET3: return "det3";
+ case OPCODE_DET4: return "det4";
case OPCODE_MIN: return "min";
case OPCODE_IMIN: return "imin";
case OPCODE_UMIN: return "umin";
diff --git a/src/Shader/Shader.hpp b/src/Shader/Shader.hpp
index 339279f..094d3b7 100644
--- a/src/Shader/Shader.hpp
+++ b/src/Shader/Shader.hpp
@@ -188,6 +188,9 @@
OPCODE_B2I, // Bool to int
OPCODE_U2B, // Uint to bool
OPCODE_B2U, // Bool to uint
+ OPCODE_DET2,
+ OPCODE_DET3,
+ OPCODE_DET4,
OPCODE_ALL,
OPCODE_ANY,
OPCODE_NEG,
diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp
index 406b038..6ad3953 100644
--- a/src/Shader/ShaderCore.cpp
+++ b/src/Shader/ShaderCore.cpp
@@ -1139,6 +1139,34 @@
Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw);
}
+ void ShaderCore::det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1)
+ {
+ dst.x = src0.x * src1.y - src0.y * src1.x;
+ dst.y = dst.z = dst.w = dst.x;
+ }
+
+ void ShaderCore::det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2)
+ {
+ crs(dst, src1, src2);
+ dp3(dst, dst, src0);
+ }
+
+ void ShaderCore::det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3)
+ {
+ dst.x = src2.z * src3.w - src2.w * src3.z;
+ dst.y = src1.w * src3.z - src1.z * src3.w;
+ dst.z = src1.z * src2.w - src1.w * src2.z;
+ dst.x = src0.x * (src1.y * dst.x + src2.y * dst.y + src3.y * dst.z) -
+ src0.y * (src1.x * dst.x + src2.x * dst.y + src3.x * dst.z) +
+ src0.z * (src1.x * (src2.y * src3.w - src2.w * src3.y) +
+ src2.x * (src1.w * src3.y - src1.y * src3.w) +
+ src3.x * (src1.y * src2.w - src1.w * src2.y)) +
+ src0.w * (src1.x * (src2.z * src3.y - src2.y * src3.z) +
+ src2.x * (src1.y * src3.z - src1.z * src3.y) +
+ src3.x * (src1.z * src2.y - src1.y * src2.z));
+ dst.y = dst.z = dst.w = dst.x;
+ }
+
void ShaderCore::frc(Vector4f &dst, const Vector4f &src)
{
dst.x = Frac(src.x);
diff --git a/src/Shader/ShaderCore.hpp b/src/Shader/ShaderCore.hpp
index c3308aa..565d682 100644
--- a/src/Shader/ShaderCore.hpp
+++ b/src/Shader/ShaderCore.hpp
@@ -284,6 +284,9 @@
void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
+ void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
+ void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
+ void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index 35581af..2040480 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -102,6 +102,7 @@
Src src0 = instruction->src[0];
Src src1 = instruction->src[1];
Src src2 = instruction->src[2];
+ Src src3 = instruction->src[3];
bool predicate = instruction->predicate;
Control control = instruction->control;
@@ -112,10 +113,12 @@
Vector4f s0;
Vector4f s1;
Vector4f s2;
+ Vector4f s3;
if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
+ if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(r, src3);
switch(opcode)
{
@@ -151,6 +154,9 @@
case Shader::OPCODE_DP2: dp2(d, s0, s1); break;
case Shader::OPCODE_DP3: dp3(d, s0, s1); break;
case Shader::OPCODE_DP4: dp4(d, s0, s1); break;
+ case Shader::OPCODE_DET2: det2(d, s0, s1); break;
+ case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break;
+ case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break;
case Shader::OPCODE_ATT: att(d, s0, s1); break;
case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break;
case Shader::OPCODE_EXP2: exp2(d, s0, pp); break;