Fixed some unary operators
There were a few issues in unary operators:
- Many were not compiling because the promote function had not
been adjusted to take the new builtin functions into account
- abs and sign had not been implemented for int
- For the integer abs version, used pabsd. Removed the extra
argument, which seemed unnecessary (abs should have 1 input,
1 output, AFAIK).
Change-Id: If02c5040438e8c45c99fc7b3c55107448c85cf58
Reviewed-on: https://swiftshader-review.googlesource.com/4970
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/OpenGL/compiler/Intermediate.cpp b/src/OpenGL/compiler/Intermediate.cpp
index a0247b9..7692081 100644
--- a/src/OpenGL/compiler/Intermediate.cpp
+++ b/src/OpenGL/compiler/Intermediate.cpp
@@ -752,6 +752,20 @@
case EOpAny:
case EOpAll:
case EOpVectorLogicalNot:
+ case EOpAbs:
+ case EOpSign:
+ case EOpIsNan:
+ case EOpIsInf:
+ case EOpFloatBitsToInt:
+ case EOpFloatBitsToUint:
+ case EOpIntBitsToFloat:
+ case EOpUintBitsToFloat:
+ case EOpPackSnorm2x16:
+ case EOpPackUnorm2x16:
+ case EOpPackHalf2x16:
+ case EOpUnpackSnorm2x16:
+ case EOpUnpackUnorm2x16:
+ case EOpUnpackHalf2x16:
return true;
default:
diff --git a/src/OpenGL/compiler/OutputASM.cpp b/src/OpenGL/compiler/OutputASM.cpp
index e14d6a5..52c1b87 100644
--- a/src/OpenGL/compiler/OutputASM.cpp
+++ b/src/OpenGL/compiler/OutputASM.cpp
@@ -368,6 +368,24 @@
default:
return op;
}
+ case sw::Shader::OPCODE_ABS:
+ switch(baseType)
+ {
+ case EbtInt:
+ return sw::Shader::OPCODE_IABS;
+ case EbtFloat:
+ default:
+ return op;
+ }
+ case sw::Shader::OPCODE_SGN:
+ switch(baseType)
+ {
+ case EbtInt:
+ return sw::Shader::OPCODE_ISGN;
+ case EbtFloat:
+ default:
+ return op;
+ }
case sw::Shader::OPCODE_ADD:
switch(baseType)
{
@@ -984,8 +1002,8 @@
case EOpLog2: if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
case EOpSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
case EOpInverseSqrt: if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
- case EOpAbs: if(visit == PostVisit) emit(sw::Shader::OPCODE_ABS, result, arg); break;
- case EOpSign: if(visit == PostVisit) emit(sw::Shader::OPCODE_SGN, result, arg); break;
+ case EOpAbs: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
+ case EOpSign: if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
case EOpFloor: if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
case EOpTrunc: if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
case EOpRound: if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
diff --git a/src/Reactor/Nucleus.cpp b/src/Reactor/Nucleus.cpp
index ce48b52..9b1d245 100644
--- a/src/Reactor/Nucleus.cpp
+++ b/src/Reactor/Nucleus.cpp
@@ -3579,6 +3579,19 @@
return x86::pmaddwd(x, y); // FIXME: Fallback required
}
+ RValue<Int4> Abs(RValue<Int4> x)
+ {
+ if(CPUID::supportsSSSE3())
+ {
+ return x86::pabsd(x);
+ }
+ else
+ {
+ Int4 mask = (x >> 31);
+ return (mask ^ x) - mask;
+ }
+ }
+
RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
{
return x86::pmulhw(x, y); // FIXME: Fallback required
@@ -7157,12 +7170,12 @@
return cmpss(x, y, 7);
}
- RValue<Int4> pabsd(RValue<Int4> x, RValue<Int4> y)
+ RValue<Int4> pabsd(RValue<Int4> x)
{
Module *module = Nucleus::getModule();
llvm::Function *pabsd = Intrinsic::getDeclaration(module, Intrinsic::x86_ssse3_pabs_d_128);
- return RValue<Int4>(Nucleus::createCall(pabsd, x.value, y.value));
+ return RValue<Int4>(Nucleus::createCall(pabsd, x.value));
}
RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index 6b6cdb4..184a2bb 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -1115,6 +1115,7 @@
RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y);
RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y);
+ RValue<Int4> Abs(RValue<Int4> x);
class UShort8 : public Variable<UShort8>
{
diff --git a/src/Reactor/x86.hpp b/src/Reactor/x86.hpp
index b2c3922..ad7cf59 100644
--- a/src/Reactor/x86.hpp
+++ b/src/Reactor/x86.hpp
@@ -58,7 +58,7 @@
RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y);
RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y);
- RValue<Int4> pabsd(RValue<Int4> x, RValue<Int4> y);
+ RValue<Int4> pabsd(RValue<Int4> x);
RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y);
RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y);
diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp
index 203387c..0286bc30 100644
--- a/src/Shader/PixelProgram.cpp
+++ b/src/Shader/PixelProgram.cpp
@@ -231,6 +231,7 @@
case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break;
case Shader::OPCODE_POW: pow(d, s0, s1, pp); break;
case Shader::OPCODE_SGN: sgn(d, s0); break;
+ case Shader::OPCODE_ISGN: isgn(d, s0); break;
case Shader::OPCODE_CRS: crs(d, s0, s1); break;
case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break;
case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break;
@@ -248,6 +249,7 @@
case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break;
case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break;
case Shader::OPCODE_ABS: abs(d, s0); break;
+ case Shader::OPCODE_IABS: iabs(d, s0); break;
case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break;
case Shader::OPCODE_COS: cos(d, s0, pp); break;
case Shader::OPCODE_SIN: sin(d, s0, pp); break;
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp
index 5cc9242..7c3f9bb 100644
--- a/src/Shader/Shader.cpp
+++ b/src/Shader/Shader.cpp
@@ -821,7 +821,9 @@
case OPCODE_POWX: return "powx";
case OPCODE_CRS: return "crs";
case OPCODE_SGN: return "sgn";
+ case OPCODE_ISGN: return "isgn";
case OPCODE_ABS: return "abs";
+ case OPCODE_IABS: return "iabs";
case OPCODE_NRM2: return "nrm2";
case OPCODE_NRM3: return "nrm3";
case OPCODE_NRM4: return "nrm4";
diff --git a/src/Shader/Shader.hpp b/src/Shader/Shader.hpp
index 03b95dc..a5785c8 100644
--- a/src/Shader/Shader.hpp
+++ b/src/Shader/Shader.hpp
@@ -244,6 +244,8 @@
// Integer opcodes
OPCODE_INEG,
+ OPCODE_IABS,
+ OPCODE_ISGN,
OPCODE_IADD,
OPCODE_ISUB,
OPCODE_IMUL,
diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp
index f177a68..d06dc9b 100644
--- a/src/Shader/ShaderCore.cpp
+++ b/src/Shader/ShaderCore.cpp
@@ -1357,6 +1357,14 @@
sgn(dst.w, src.w);
}
+ void ShaderCore::isgn(Vector4f &dst, const Vector4f &src)
+ {
+ isgn(dst.x, src.x);
+ isgn(dst.y, src.y);
+ isgn(dst.z, src.z);
+ isgn(dst.w, src.w);
+ }
+
void ShaderCore::abs(Vector4f &dst, const Vector4f &src)
{
dst.x = Abs(src.x);
@@ -1364,7 +1372,15 @@
dst.z = Abs(src.z);
dst.w = Abs(src.w);
}
-
+
+ void ShaderCore::iabs(Vector4f &dst, const Vector4f &src)
+ {
+ dst.x = As<Float4>(Abs(As<Int4>(src.x)));
+ dst.y = As<Float4>(Abs(As<Int4>(src.y)));
+ dst.z = As<Float4>(Abs(As<Int4>(src.z)));
+ dst.w = As<Float4>(Abs(As<Int4>(src.w)));
+ }
+
void ShaderCore::nrm2(Vector4f &dst, const Vector4f &src, bool pp)
{
Float4 dot = dot2(src, src);
@@ -1595,6 +1611,13 @@
dst = As<Float4>(neg | pos);
}
+ void ShaderCore::isgn(Float4 &dst, const Float4 &src)
+ {
+ Int4 neg = CmpLT(As<Int4>(src), Int4(0)) & Int4(-1);
+ Int4 pos = CmpNLE(As<Int4>(src), Int4(0)) & Int4(1);
+ dst = As<Float4>(neg | pos);
+ }
+
void ShaderCore::cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2)
{
Int4 pos = CmpLE(Float4(0.0f), src0);
diff --git a/src/Shader/ShaderCore.hpp b/src/Shader/ShaderCore.hpp
index 3f4b22c..161f0f9 100644
--- a/src/Shader/ShaderCore.hpp
+++ b/src/Shader/ShaderCore.hpp
@@ -335,7 +335,9 @@
void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
void sgn(Vector4f &dst, const Vector4f &src);
+ void isgn(Vector4f &dst, const Vector4f &src);
void abs(Vector4f &dst, const Vector4f &src);
+ void iabs(Vector4f &dst, const Vector4f &src);
void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false);
void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false);
void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false);
@@ -373,6 +375,7 @@
private:
void sgn(Float4 &dst, const Float4 &src);
+ void isgn(Float4 &dst, const Float4 &src);
void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index efae39cc..dc829ef 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -157,6 +157,7 @@
case Shader::OPCODE_DEFB: break;
case Shader::OPCODE_NOP: break;
case Shader::OPCODE_ABS: abs(d, s0); break;
+ case Shader::OPCODE_IABS: iabs(d, s0); break;
case Shader::OPCODE_ADD: add(d, s0, s1); break;
case Shader::OPCODE_IADD: iadd(d, s0, s1); break;
case Shader::OPCODE_CRS: crs(d, s0, s1); break;
@@ -256,6 +257,7 @@
case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break;
case Shader::OPCODE_SGE: step(d, s1, s0); break;
case Shader::OPCODE_SGN: sgn(d, s0); break;
+ case Shader::OPCODE_ISGN: isgn(d, s0); break;
case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break;
case Shader::OPCODE_COS: cos(d, s0, pp); break;
case Shader::OPCODE_SIN: sin(d, s0, pp); break;