Refactor vector packing.
x86 vector packing instructions always treat the input as having signed
integer components, but can perform signed or unsigned saturation on
the output. In Reactor the Pack() intrinsic has overloads which
differentiate between them based on the signedness of the input, but
this is confusing.
Also simplify emulation of saturating add/subtract.
Bug b/37496082
Change-Id: I0625fff429ffb40f42baf9600c7760d9858b5d89
Reviewed-on: https://swiftshader-review.googlesource.com/12548
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Shader/PixelPipeline.cpp b/src/Shader/PixelPipeline.cpp
index 66d6a09..4ed1830 100644
--- a/src/Shader/PixelPipeline.cpp
+++ b/src/Shader/PixelPipeline.cpp
@@ -1665,7 +1665,7 @@
void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src)
{
Short4 test = src.x | src.y | src.z;
- Int kill = SignMask(Pack(test, test)) ^ 0x0000000F;
+ Int kill = SignMask(PackSigned(test, test)) ^ 0x0000000F;
for(unsigned int q = 0; q < state.multiSample; q++)
{
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index 48a86a1..90e2073 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -549,29 +549,29 @@
break;
case ALPHA_EQUAL:
cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
- aMask = SignMask(Pack(cmp, Short4(0x0000)));
+ aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break;
case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
- aMask = SignMask(Pack(cmp, Short4(0x0000)));
+ aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break;
case ALPHA_LESS: // a < b ~ b > a
cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha);
- aMask = SignMask(Pack(cmp, Short4(0x0000)));
+ aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break;
case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
cmp |= equal;
- aMask = SignMask(Pack(cmp, Short4(0x0000)));
+ aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break;
case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
- aMask = SignMask(Pack(cmp, Short4(0x0000)));
+ aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break;
case ALPHA_GREATER: // a > b
cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
- aMask = SignMask(Pack(cmp, Short4(0x0000)));
+ aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
break;
default:
ASSERT(false);
@@ -1452,8 +1452,8 @@
current.y = As<Short4>(As<UShort4>(current.y) >> 8);
current.z = As<Short4>(As<UShort4>(current.z) >> 8);
- current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
- current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
+ current.z = As<Short4>(PackUnsigned(current.z, current.x));
+ current.y = As<Short4>(PackUnsigned(current.y, current.y));
current.x = current.z;
current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
@@ -1469,8 +1469,8 @@
current.z = As<Short4>(As<UShort4>(current.z) >> 8);
current.w = As<Short4>(As<UShort4>(current.w) >> 8);
- current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
- current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
+ current.z = As<Short4>(PackUnsigned(current.z, current.x));
+ current.y = As<Short4>(PackUnsigned(current.y, current.w));
current.x = current.z;
current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
@@ -1490,8 +1490,8 @@
current.y = As<Short4>(As<UShort4>(current.y) >> 8);
current.z = As<Short4>(As<UShort4>(current.z) >> 8);
- current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
- current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
+ current.z = As<Short4>(PackUnsigned(current.x, current.z));
+ current.y = As<Short4>(PackUnsigned(current.y, current.y));
current.x = current.z;
current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
@@ -1507,8 +1507,8 @@
current.z = As<Short4>(As<UShort4>(current.z) >> 8);
current.w = As<Short4>(As<UShort4>(current.w) >> 8);
- current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
- current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
+ current.z = As<Short4>(PackUnsigned(current.x, current.z));
+ current.y = As<Short4>(PackUnsigned(current.y, current.w));
current.x = current.z;
current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
@@ -1521,17 +1521,17 @@
case FORMAT_G8R8:
current.x = As<Short4>(As<UShort4>(current.x) >> 8);
current.y = As<Short4>(As<UShort4>(current.y) >> 8);
- current.x = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.x)));
- current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
+ current.x = As<Short4>(PackUnsigned(current.x, current.x));
+ current.y = As<Short4>(PackUnsigned(current.y, current.y));
current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
break;
case FORMAT_R8:
current.x = As<Short4>(As<UShort4>(current.x) >> 8);
- current.x = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.x)));
+ current.x = As<Short4>(PackUnsigned(current.x, current.x));
break;
case FORMAT_A8:
current.w = As<Short4>(As<UShort4>(current.w) >> 8);
- current.w = As<Short4>(Pack(As<UShort4>(current.w), As<UShort4>(current.w)));
+ current.w = As<Short4>(PackUnsigned(current.w, current.w));
break;
case FORMAT_G16R16:
current.z = current.x;
@@ -2367,11 +2367,11 @@
Short4 tmpCol = Short4(As<Int4>(oC.x));
if(state.targetFormat[index] == FORMAT_R8I)
{
- tmpCol = As<Short4>(Pack(tmpCol, tmpCol));
+ tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
}
else
{
- tmpCol = As<Short4>(Pack(As<UShort4>(tmpCol), As<UShort4>(tmpCol)));
+ tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
}
packedCol = Extract(As<Int2>(tmpCol), 0);
@@ -2466,11 +2466,11 @@
if(state.targetFormat[index] == FORMAT_G8R8I)
{
- packedCol = As<Int2>(Pack(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+ packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
}
else
{
- packedCol = As<Int2>(Pack(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y))));
+ packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
}
UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
@@ -2604,11 +2604,11 @@
if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
{
- packedCol = As<UInt2>(Pack(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
+ packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
}
else
{
- packedCol = As<UInt2>(Pack(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y))));
+ packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
}
value = *Pointer<UInt2>(buffer, 16);
mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
@@ -2622,11 +2622,11 @@
if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
{
- packedCol = As<UInt2>(Pack(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
+ packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
}
else
{
- packedCol = As<UInt2>(Pack(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w))));
+ packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
}
value = *Pointer<UInt2>(buffer, 16);
mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 62f76fa..290f402 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -570,7 +570,7 @@
void SamplerCore::border(Short4 &mask, Float4 &coordinates)
{
Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
- mask = As<Short4>(Int2(As<Int4>(Pack(border, border))));
+ mask = As<Short4>(Int2(As<Int4>(PackSigned(border, border))));
}
void SamplerCore::border(Int4 &mask, Float4 &coordinates)
@@ -2271,7 +2271,7 @@
// Clamp
convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000);
- convert = As<Int4>(Pack(convert, convert));
+ convert = As<Int4>(PackSigned(convert, convert));
return As<Short4>(Int2(convert)) + Short4(0x8000u);
}