Handle VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 sampling
As
https://swiftshader-review.googlesource.com/c/SwiftShader/+/63748
was incomplete and introduced some failures as seen in
https://swiftshader-review.googlesource.com/c/SwiftShader/+/63628.
Updates `SamplerCore::sampleTexel()` to handle both 8-bit and 10-bit
formats by reading the 8-bit or 10-bit buffers values without 8.8
packing (e.g. packing the 8-bit raw `u` value from the buffer into
both the high 8-bits and low 8-bits of `U`) and by using more
generic range expansion (as opposed to the hardcoded constants for
8-bit).
Bug: b/219756793
Test: dEQP-VK.ycbcr.*
Change-Id: I6e1681cf2f40dbbc389652136711bc2f20b413a1
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/64268
Presubmit-Ready: Jason Macnak <natsu@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Jason Macnak <natsu@google.com>
Commit-Queue: Jason Macnak <natsu@google.com>
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index 4268810..f126b4b 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -1727,12 +1727,14 @@
if(isYcbcrFormat())
{
+ // Generates 15-bit output.
+
// Pointers to the planes of YCbCr images are stored in consecutive mipmap levels.
Pointer<Byte> bufferY = buffer; // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmap + 1 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); // U/V for 2-plane interleaved formats.
Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmap + 2 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
- // Luminance
+ // Luminance (either 8-bit or 10-bit in bottom bits).
UShort4 Y;
{
switch(state.textureFormat)
@@ -1740,24 +1742,20 @@
case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
{
- Int c0 = Int(bufferY[index[0]]);
- Int c1 = Int(bufferY[index[1]]);
- Int c2 = Int(bufferY[index[2]]);
- Int c3 = Int(bufferY[index[3]]);
- c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
- Y = As<UShort4>(Unpack(As<Byte4>(c0)));
+ Y = Insert(Y, UShort(bufferY[index[0]]), 0);
+ Y = Insert(Y, UShort(bufferY[index[1]]), 1);
+ Y = Insert(Y, UShort(bufferY[index[2]]), 2);
+ Y = Insert(Y, UShort(bufferY[index[3]]), 3);
}
break;
case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
{
- Y = Insert(Y, Pointer<UShort>(bufferY)[index[0]], 0); // TODO: Insert(UShort4, UShort)
+ Y = Insert(Y, Pointer<UShort>(bufferY)[index[0]], 0);
Y = Insert(Y, Pointer<UShort>(bufferY)[index[1]], 1);
Y = Insert(Y, Pointer<UShort>(bufferY)[index[2]], 2);
Y = Insert(Y, Pointer<UShort>(bufferY)[index[3]], 3);
// Top 10 bits of each 16 bits:
Y = (Y & UShort4(0xFFC0u)) >> 6;
- // Scale from 10 bits to 16 bits:
- Y = Y << 6;
}
break;
default:
@@ -1766,7 +1764,7 @@
}
}
- // Chroma
+ // Chroma (either 8-bit or 10-bit in bottom bits).
UShort4 Cb, Cr;
{
computeIndices(index, uuuu, vvvv, wwww, layerIndex, offset, sample, mipmap + sizeof(Mipmap));
@@ -1776,30 +1774,27 @@
{
case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
{
- Int c0 = Int(bufferU[index[0]]);
- Int c1 = Int(bufferU[index[1]]);
- Int c2 = Int(bufferU[index[2]]);
- Int c3 = Int(bufferU[index[3]]);
- c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
- U = As<UShort4>(Unpack(As<Byte4>(c0)));
+ U = Insert(U, UShort(bufferU[index[0]]), 0);
+ U = Insert(U, UShort(bufferU[index[1]]), 1);
+ U = Insert(U, UShort(bufferU[index[2]]), 2);
+ U = Insert(U, UShort(bufferU[index[3]]), 3);
- c0 = Int(bufferV[index[0]]);
- c1 = Int(bufferV[index[1]]);
- c2 = Int(bufferV[index[2]]);
- c3 = Int(bufferV[index[3]]);
- c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
- V = As<UShort4>(Unpack(As<Byte4>(c0)));
+ V = Insert(V, UShort(bufferV[index[0]]), 0);
+ V = Insert(V, UShort(bufferV[index[1]]), 1);
+ V = Insert(V, UShort(bufferV[index[2]]), 2);
+ V = Insert(V, UShort(bufferV[index[3]]), 3);
}
break;
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
{
- Short4 UV;
- UV = Insert(UV, Pointer<Short>(bufferU)[index[0]], 0); // TODO: Insert(UShort4, UShort)
- UV = Insert(UV, Pointer<Short>(bufferU)[index[1]], 1);
- UV = Insert(UV, Pointer<Short>(bufferU)[index[2]], 2);
- UV = Insert(UV, Pointer<Short>(bufferU)[index[3]], 3);
- U = (UV & Short4(0x00FFu)) | (UV << 8);
- V = (UV & Short4(0xFF00u)) | As<Short4>(As<UShort4>(UV) >> 8);
+ UShort4 UV;
+ UV = Insert(UV, Pointer<UShort>(bufferU)[index[0]], 0);
+ UV = Insert(UV, Pointer<UShort>(bufferU)[index[1]], 1);
+ UV = Insert(UV, Pointer<UShort>(bufferU)[index[2]], 2);
+ UV = Insert(UV, Pointer<UShort>(bufferU)[index[3]], 3);
+
+ U = (UV & UShort4(0x00FFu));
+ V = (UV & UShort4(0xFF00u)) >> 8;
}
break;
case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
@@ -1809,13 +1804,10 @@
UV = Insert(UV, Pointer<UInt>(bufferU)[index[1]], 1);
UV = Insert(UV, Pointer<UInt>(bufferU)[index[2]], 2);
UV = Insert(UV, Pointer<UInt>(bufferU)[index[3]], 3);
- // Top 10 bits of first 16 bits:
- U = UShort4((UV & UInt4(0x0000FFC0u)) >> 6); // TODO: UnpackLower(UInt4)
- // Top 10 bits of second 16 bits:
- V = UShort4((UV & UInt4(0xFFC00000u)) >> 22); // TODO: UnpackUpper(UInt4)
- // Scale from 10 bits to 16 bits:
- U = U << 6;
- V = V << 6;
+ // Top 10 bits of first 16-bits:
+ U = UShort4((UV & UInt4(0x0000FFC0u)) >> 6);
+ // Top 10 bits of second 16-bits:
+ V = UShort4((UV & UInt4(0xFFC00000u)) >> 22);
}
break;
default:
@@ -1835,31 +1827,65 @@
}
}
+ uint8_t lumaBits = 8;
+ uint8_t chromaBits = 8;
+ switch(state.textureFormat)
+ {
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ lumaBits = 8;
+ chromaBits = 8;
+ break;
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+ lumaBits = 10;
+ chromaBits = 10;
+ break;
+ default:
+ UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
+ break;
+ }
+
if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
{
- // YCbCr formats are treated as signed 15-bit.
- c.x = Cr >> 1;
- c.y = Y >> 1;
- c.z = Cb >> 1;
+ // Scale to the output 15-bit.
+ c.x = Cr << (15 - chromaBits);
+ c.y = Y << (15 - lumaBits);
+ c.z = Cb << (15 - chromaBits);
}
else
{
- // Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
- // Scale down by 0x0101 to normalize the 8.8 samples, and up by 0x7FFF for signed 15-bit output.
- float yOffset = static_cast<float>(state.studioSwing ? 16 * 0x0101 : 0);
- float uvOffset = static_cast<float>(128 * 0x0101);
- float yFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 219 * 0x0101 : 255 * 0x0101);
- float uvFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 224 * 0x0101 : 255 * 0x0101);
+ const float twoPowLumaBits = static_cast<float>(0x1u << lumaBits);
+ const float twoPowLumaBitsMinus8 = static_cast<float>(0x1u << (lumaBits - 8));
+ const float twoPowChromaBits = static_cast<float>(0x1u << chromaBits);
+ const float twoPowChromaBitsMinus1 = static_cast<float>(0x1u << (chromaBits - 1));
+ const float twoPowChromaBitsMinus8 = static_cast<float>(0x1u << (chromaBits - 8));
- Float4 y = (Float4(Y) - Float4(yOffset)) * Float4(yFactor);
- Float4 u = (Float4(Cb) - Float4(uvOffset)) * Float4(uvFactor);
- Float4 v = (Float4(Cr) - Float4(uvOffset)) * Float4(uvFactor);
+ Float4 y = Float4(Y);
+ Float4 u = Float4(Cb);
+ Float4 v = Float4(Cr);
+
+ if(state.studioSwing)
+ {
+ // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_NARROW
+ y = ((y / Float4(twoPowLumaBitsMinus8)) - Float4(16.0f)) / Float4(219.0f);
+ u = ((u / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f);
+ v = ((v / Float4(twoPowChromaBitsMinus8)) - Float4(128.0f)) / Float4(224.0f);
+ }
+ else
+ {
+ // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#QUANTIZATION_FULL
+ y = y / Float4(twoPowLumaBits - 1.0f);
+ u = (u - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f);
+ v = (v - Float4(twoPowChromaBitsMinus1)) / Float4(twoPowChromaBits - 1.0f);
+ }
+
+ // Now, `y` is in [0, 1] and `u` and `v` are in [-0.5, 0.5].
if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
{
- c.x = Short4(v);
- c.y = Short4(y);
- c.z = Short4(u);
+ c.x = Short4(v * static_cast<float>(0x7FFF));
+ c.y = Short4(y * static_cast<float>(0x7FFF));
+ c.z = Short4(u * static_cast<float>(0x7FFF));
}
else
{
@@ -1900,9 +1926,9 @@
Float4 g = y + Float4(Gb) * u + Float4(Gr) * v;
Float4 b = y + Float4(Bb) * u;
- c.x = Short4(r);
- c.y = Short4(g);
- c.z = Short4(b);
+ c.x = Short4(r * static_cast<float>(0x7FFF));
+ c.y = Short4(g * static_cast<float>(0x7FFF));
+ c.z = Short4(b * static_cast<float>(0x7FFF));
}
}
}
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index 41268a9..3d6459f 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -1061,6 +1061,13 @@
storeValue(integer);
}
+UShort::UShort(RValue<Byte> cast)
+{
+ Value *integer = Nucleus::createZExt(cast.value(), UShort::type());
+
+ storeValue(integer);
+}
+
UShort::UShort(unsigned short x)
{
storeValue(Nucleus::createConstantShort(x));
@@ -1331,6 +1338,11 @@
return store(rhs.load());
}
+RValue<Byte4> Insert(RValue<Byte4> val, RValue<Byte> element, int i)
+{
+ return RValue<Byte4>(Nucleus::createInsertElement(val.value(), element.value(), i));
+}
+
Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
{
int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 453684e..aeaad54 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -561,6 +561,7 @@
explicit UShort(RValue<UInt> cast);
explicit UShort(RValue<Int> cast);
+ explicit UShort(RValue<Byte> cast);
UShort() = default;
UShort(unsigned short x);
@@ -632,6 +633,8 @@
static Type *type();
};
+RValue<Byte4> Insert(RValue<Byte4> val, RValue<Byte> element, int i);
+
// RValue<Byte4> operator+(RValue<Byte4> lhs, RValue<Byte4> rhs);
// RValue<Byte4> operator-(RValue<Byte4> lhs, RValue<Byte4> rhs);
// RValue<Byte4> operator*(RValue<Byte4> lhs, RValue<Byte4> rhs);