Enable B10G11R11_UFLOAT blending
B10G11R11_UFLOAT blending is required in order for SwANGLE
to expose GL_EXT_color_buffer_float.
In this cl:
- I added a minor readability improvement by storing
"*Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]))"
in a variable, "pitchB" since it's used in every case.
- Added a constant for B10G11R11 masking
- Added clamping in the floating point blending code for
unsigned floating point formats
- Fixed the VK_FORMAT_B10G11R11_UFLOAT_PACK32 output format
which now takes the coverage mask into account
- Included a minor followup cleanup in ShaderCore
Bug: b/146223877
Tests: dEQP-VK.*b10g11r11*
Change-Id: Ifb95f34a10cdbee9d185bc25feba0aeaca0d9e70
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39929
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/Constants.cpp b/src/Pipeline/Constants.cpp
index 1f98769..ab84e50 100644
--- a/src/Pipeline/Constants.cpp
+++ b/src/Pipeline/Constants.cpp
@@ -238,6 +238,7 @@
for(int i = 0; i < 8; i++)
{
mask565Q[i] = word4((i & 0x1 ? 0x001F : 0) | (i & 0x2 ? 0x07E0 : 0) | (i & 0x4 ? 0xF800 : 0));
+ mask11X[i] = dword4((i & 0x1 ? 0x000007FFu : 0) | (i & 0x2 ? 0x003FF800u : 0) | (i & 0x4 ? 0xFFC00000u : 0));
}
for(int i = 0; i < 16; i++)
diff --git a/src/Pipeline/Constants.hpp b/src/Pipeline/Constants.hpp
index c9c8a1f..1680f30 100644
--- a/src/Pipeline/Constants.hpp
+++ b/src/Pipeline/Constants.hpp
@@ -70,6 +70,7 @@
word4 mask565Q[8];
dword2 mask10Q[16]; // 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x
word4 mask5551Q[16]; // 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x
+ dword4 mask11X[8]; // 3 bit writemask -> B10G11R11 bit patterns, replicated 4x
unsigned short sRGBtoLinear8_16[256];
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index be79715..82a44f1 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -927,11 +927,13 @@
Pointer<Byte> buffer = cBuffer;
Pointer<Byte> buffer2;
+ Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
switch(state.targetFormat[index])
{
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
buffer += 2 * x;
- buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer2 = buffer + pitchB;
c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
pixel.x = (c01 & Short4(0x7C00u)) << 1;
@@ -949,7 +951,7 @@
break;
case VK_FORMAT_R5G6B5_UNORM_PACK16:
buffer += 2 * x;
- buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer2 = buffer + pitchB;
c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
pixel.x = c01 & Short4(0xF800u);
@@ -969,7 +971,7 @@
case VK_FORMAT_B8G8R8A8_SRGB:
buffer += 4 * x;
c01 = *Pointer<Short4>(buffer);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
c23 = *Pointer<Short4>(buffer);
pixel.z = c01;
pixel.y = c01;
@@ -989,7 +991,7 @@
case VK_FORMAT_R8G8B8A8_SRGB:
buffer += 4 * x;
c01 = *Pointer<Short4>(buffer);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
c23 = *Pointer<Short4>(buffer);
pixel.z = c01;
pixel.y = c01;
@@ -1008,7 +1010,7 @@
case VK_FORMAT_R8_UNORM:
buffer += 1 * x;
pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
pixel.y = Short4(0x0000);
@@ -1018,7 +1020,7 @@
case VK_FORMAT_R8G8_UNORM:
buffer += 2 * x;
c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
@@ -1026,17 +1028,19 @@
pixel.w = Short4(0xFFFFu);
break;
case VK_FORMAT_R16G16B16A16_UNORM:
- pixel.x = *Pointer<Short4>(buffer + 8 * x);
- pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.z = *Pointer<Short4>(buffer + 8 * x);
- pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
+ buffer += 8 * x;
+ pixel.x = *Pointer<Short4>(buffer + 0);
+ pixel.y = *Pointer<Short4>(buffer + 8);
+ buffer += pitchB;
+ pixel.z = *Pointer<Short4>(buffer + 0);
+ pixel.w = *Pointer<Short4>(buffer + 8);
transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
break;
case VK_FORMAT_R16G16_UNORM:
- pixel.x = *Pointer<Short4>(buffer + 4 * x);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.y = *Pointer<Short4>(buffer + 4 * x);
+ buffer += 4 * x;
+ pixel.x = *Pointer<Short4>(buffer);
+ buffer += pitchB;
+ pixel.y = *Pointer<Short4>(buffer);
pixel.z = pixel.x;
pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
@@ -1049,11 +1053,12 @@
case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
{
Int4 v = Int4(0);
- v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0);
- v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2);
- v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3);
+ buffer += 4 * x;
+ v = Insert(v, *Pointer<Int>(buffer + 0), 0);
+ v = Insert(v, *Pointer<Int>(buffer + 4), 1);
+ buffer += pitchB;
+ v = Insert(v, *Pointer<Int>(buffer + 0), 2);
+ v = Insert(v, *Pointer<Int>(buffer + 4), 3);
a2b10g10r10Unpack(v, pixel);
}
@@ -1368,6 +1373,7 @@
}
Pointer<Byte> buffer = cBuffer;
+ Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
switch(state.targetFormat[index])
{
@@ -1386,7 +1392,7 @@
}
*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<Int>(buffer);
Int c23 = Extract(As<Int2>(current.x), 1);
@@ -1413,7 +1419,7 @@
}
*Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<Int>(buffer);
Int c23 = Extract(As<Int2>(current.x), 1);
@@ -1439,7 +1445,7 @@
}
*Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<Short4>(buffer);
Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
@@ -1466,7 +1472,7 @@
}
*Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<Short4>(buffer);
Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
@@ -1483,8 +1489,7 @@
buffer += 2 * x;
Int2 value;
value = Insert(value, *Pointer<Int>(buffer), 0);
- Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
+ value = Insert(value, *Pointer<Int>(buffer + pitchB), 1);
Int2 packedCol = As<Int2>(current.x);
@@ -1499,7 +1504,7 @@
packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
- *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
+ *Pointer<UInt>(buffer + pitchB) = As<UInt>(Extract(packedCol, 1));
}
break;
case VK_FORMAT_R8_UNORM:
@@ -1508,15 +1513,14 @@
buffer += 1 * x;
Short4 value;
value = Insert(value, *Pointer<Short>(buffer), 0);
- Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
+ value = Insert(value, *Pointer<Short>(buffer + pitchB), 1);
current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
current.x |= value;
*Pointer<Short>(buffer) = Extract(current.x, 0);
- *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
+ *Pointer<Short>(buffer + pitchB) = Extract(current.x, 1);
}
break;
case VK_FORMAT_R16G16_UNORM:
@@ -1538,7 +1542,7 @@
current.x |= value;
*Pointer<Short4>(buffer) = current.x;
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<Short4>(buffer);
@@ -1594,7 +1598,7 @@
*Pointer<Short4>(buffer + 8) = current.y;
}
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
{
Short4 value = *Pointer<Short4>(buffer);
@@ -1643,7 +1647,7 @@
}
*Pointer<Int2>(buffer) = (As<Int2>(current.x) & mergedMask) | (value & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<Int2>(buffer, 16);
mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
@@ -1803,6 +1807,7 @@
}
Pointer<Byte> buffer = cBuffer;
+ Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
// pixel holds four texel color values.
// Note: Despite the type being Vector4f, the colors may be stored as
@@ -1831,20 +1836,22 @@
case VK_FORMAT_R32_UINT:
case VK_FORMAT_R32_SFLOAT:
// FIXME: movlps
- pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
- pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += 4 * x;
+ pixel.x.x = *Pointer<Float>(buffer + 0);
+ pixel.x.y = *Pointer<Float>(buffer + 4);
+ buffer += pitchB;
// FIXME: movhps
- pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
- pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
+ pixel.x.z = *Pointer<Float>(buffer + 0);
+ pixel.x.w = *Pointer<Float>(buffer + 4);
pixel.y = pixel.z = pixel.w = one;
break;
case VK_FORMAT_R32G32_SINT:
case VK_FORMAT_R32G32_UINT:
case VK_FORMAT_R32G32_SFLOAT:
- pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
+ buffer += 8 * x;
+ pixel.x = *Pointer<Float4>(buffer, 16);
+ buffer += pitchB;
+ pixel.y = *Pointer<Float4>(buffer, 16);
pixel.z = pixel.x;
pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202);
pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313);
@@ -1854,59 +1861,65 @@
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SINT:
case VK_FORMAT_R32G32B32A32_UINT:
- pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
- pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
- pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
+ buffer += 16 * x;
+ pixel.x = *Pointer<Float4>(buffer + 0, 16);
+ pixel.y = *Pointer<Float4>(buffer + 16, 16);
+ buffer += pitchB;
+ pixel.z = *Pointer<Float4>(buffer + 0, 16);
+ pixel.w = *Pointer<Float4>(buffer + 16, 16);
transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
break;
case VK_FORMAT_R16_SFLOAT:
- pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0));
- pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2));
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0));
- pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2));
+ buffer += 2 * x;
+ pixel.x.x = Float(*Pointer<Half>(buffer + 0));
+ pixel.x.y = Float(*Pointer<Half>(buffer + 2));
+ buffer += pitchB;
+ pixel.x.z = Float(*Pointer<Half>(buffer + 0));
+ pixel.x.w = Float(*Pointer<Half>(buffer + 2));
pixel.y = pixel.z = pixel.w = one;
break;
case VK_FORMAT_R16G16_SFLOAT:
- pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0));
- pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2));
- pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4));
- pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6));
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0));
- pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2));
- pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4));
- pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6));
+ buffer += 4 * x;
+ pixel.x.x = Float(*Pointer<Half>(buffer + 0));
+ pixel.y.x = Float(*Pointer<Half>(buffer + 2));
+ pixel.x.y = Float(*Pointer<Half>(buffer + 4));
+ pixel.y.y = Float(*Pointer<Half>(buffer + 6));
+ buffer += pitchB;
+ pixel.x.z = Float(*Pointer<Half>(buffer + 0));
+ pixel.y.z = Float(*Pointer<Half>(buffer + 2));
+ pixel.x.w = Float(*Pointer<Half>(buffer + 4));
+ pixel.y.w = Float(*Pointer<Half>(buffer + 6));
pixel.z = pixel.w = one;
break;
case VK_FORMAT_R16G16B16A16_SFLOAT:
- pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
- pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
- pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
- pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
- pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
- pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
- pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
- pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0));
- pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2));
- pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4));
- pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6));
- pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8));
- pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa));
- pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc));
- pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe));
+ buffer += 8 * x;
+ pixel.x.x = Float(*Pointer<Half>(buffer + 0x0));
+ pixel.y.x = Float(*Pointer<Half>(buffer + 0x2));
+ pixel.z.x = Float(*Pointer<Half>(buffer + 0x4));
+ pixel.w.x = Float(*Pointer<Half>(buffer + 0x6));
+ pixel.x.y = Float(*Pointer<Half>(buffer + 0x8));
+ pixel.y.y = Float(*Pointer<Half>(buffer + 0xa));
+ pixel.z.y = Float(*Pointer<Half>(buffer + 0xc));
+ pixel.w.y = Float(*Pointer<Half>(buffer + 0xe));
+ buffer += pitchB;
+ pixel.x.z = Float(*Pointer<Half>(buffer + 0x0));
+ pixel.y.z = Float(*Pointer<Half>(buffer + 0x2));
+ pixel.z.z = Float(*Pointer<Half>(buffer + 0x4));
+ pixel.w.z = Float(*Pointer<Half>(buffer + 0x6));
+ pixel.x.w = Float(*Pointer<Half>(buffer + 0x8));
+ pixel.y.w = Float(*Pointer<Half>(buffer + 0xa));
+ pixel.z.w = Float(*Pointer<Half>(buffer + 0xc));
+ pixel.w.w = Float(*Pointer<Half>(buffer + 0xe));
break;
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
- pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0));
- pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4));
- transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
+ buffer += 4 * x;
+ pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 0));
+ pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4));
+ buffer += pitchB;
+ pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 0));
+ pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4));
+ transpose4x3(pixel.x, pixel.y, pixel.z, pixel.w);
+ pixel.w = one;
break;
default:
UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
@@ -2007,6 +2020,11 @@
default:
UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
}
+
+ if(format.isUnsignedComponent(0)) { oC.x = Max(oC.x, Float4(0.0f)); }
+ if(format.isUnsignedComponent(1)) { oC.y = Max(oC.y, Float4(0.0f)); }
+ if(format.isUnsignedComponent(2)) { oC.z = Max(oC.z, Float4(0.0f)); }
+ if(format.isUnsignedComponent(3)) { oC.w = Max(oC.w, Float4(0.0f)); }
}
void PixelRoutine::writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &oC, const Int &sMask, const Int &zMask, const Int &cMask)
@@ -2074,6 +2092,7 @@
auto targetFormat = state.targetFormat[index];
Pointer<Byte> buffer = cBuffer;
+ Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Float4 value;
switch(targetFormat)
@@ -2089,7 +2108,7 @@
value.x = *Pointer<Float>(buffer + 0);
value.y = *Pointer<Float>(buffer + 4);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
// FIXME: movhps
value.z = *Pointer<Float>(buffer + 0);
@@ -2103,7 +2122,7 @@
*Pointer<Float>(buffer + 0) = oC.x.z;
*Pointer<Float>(buffer + 4) = oC.x.w;
- buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer -= pitchB;
// FIXME: movlps
*Pointer<Float>(buffer + 0) = oC.x.x;
@@ -2118,7 +2137,7 @@
value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0);
value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2);
value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3);
@@ -2130,7 +2149,7 @@
*Pointer<Half>(buffer + 0) = Half(oC.x.z);
*Pointer<Half>(buffer + 2) = Half(oC.x.w);
- buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer -= pitchB;
*Pointer<Half>(buffer + 0) = Half(oC.x.x);
*Pointer<Half>(buffer + 2) = Half(oC.x.y);
@@ -2145,7 +2164,7 @@
UShort4 xyzw;
xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
value = As<Float4>(Int4(xyzw));
@@ -2161,7 +2180,7 @@
component = oC.x.w;
*Pointer<Short>(buffer + 2) = Short(As<Int>(component));
- buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer -= pitchB;
component = oC.x.x;
*Pointer<Short>(buffer + 0) = Short(As<Int>(component));
@@ -2175,7 +2194,7 @@
component = oC.x.w;
*Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
- buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer -= pitchB;
component = oC.x.x;
*Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
@@ -2193,7 +2212,7 @@
UInt xyzw, packedCol;
xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
Short4 tmpCol = Short4(As<Int4>(oC.x));
@@ -2211,7 +2230,7 @@
(xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
*Pointer<UShort>(buffer) = UShort(packedCol >> 16);
- buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer -= pitchB;
*Pointer<UShort>(buffer) = UShort(packedCol);
}
break;
@@ -2235,7 +2254,7 @@
oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
*Pointer<Float4>(buffer) = oC.x;
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<Float4>(buffer);
@@ -2274,7 +2293,7 @@
}
*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0);
packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1);
@@ -2305,7 +2324,7 @@
}
*Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
packedCol = UShort4(As<Int4>(oC.y));
value = *Pointer<UShort4>(buffer);
@@ -2326,7 +2345,7 @@
Int2 xyzw, packedCol;
xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
if(targetFormat == VK_FORMAT_R8G8_SINT)
@@ -2349,7 +2368,7 @@
packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
- buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer -= pitchB;
*Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
}
break;
@@ -2392,7 +2411,7 @@
*Pointer<Float4>(buffer + 16, 16) = oC.y;
}
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
{
value = *Pointer<Float4>(buffer, 16);
@@ -2449,7 +2468,7 @@
}
*Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<UInt4>(buffer);
packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0);
@@ -2469,19 +2488,31 @@
{
buffer += 4 * x;
- unsigned int mask = ((rgbaWriteMask & 0x1) ? 0x000007FF : 0) |
- ((rgbaWriteMask & 0x2) ? 0x003FF800 : 0) |
- ((rgbaWriteMask & 0x4) ? 0xFFC00000 : 0);
- UInt2 mergedMask(mask, mask);
+ UInt4 packedCol;
+ packedCol = Insert(packedCol, r11g11b10Pack(oC.x), 0);
+ packedCol = Insert(packedCol, r11g11b10Pack(oC.y), 1);
+ packedCol = Insert(packedCol, r11g11b10Pack(oC.z), 2);
+ packedCol = Insert(packedCol, r11g11b10Pack(oC.w), 3);
- UInt2 value;
- value = Insert(value, r11g11b10Pack(oC.x), 0);
- value = Insert(value, r11g11b10Pack(oC.y), 1);
- *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
- value = Insert(value, r11g11b10Pack(oC.z), 0);
- value = Insert(value, r11g11b10Pack(oC.w), 1);
- *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask);
+ UInt4 value;
+ value = Insert(value, *Pointer<UInt>(buffer + 0), 0);
+ value = Insert(value, *Pointer<UInt>(buffer + 4), 1);
+ buffer += pitchB;
+ value = Insert(value, *Pointer<UInt>(buffer + 0), 2);
+ value = Insert(value, *Pointer<UInt>(buffer + 4), 3);
+
+ UInt4 mask = *Pointer<UInt4>(constants + OFFSET(Constants, maskD4X[0][0]) + xMask * 16, 16);
+ if((rgbaWriteMask & 0x7) != 0x7)
+ {
+ mask &= *Pointer<UInt4>(constants + OFFSET(Constants, mask11X[rgbaWriteMask & 0x7][0]), 16);
+ }
+ value = (packedCol & mask) | (value & ~mask);
+
+ *Pointer<UInt>(buffer + 0) = value.z;
+ *Pointer<UInt>(buffer + 4) = value.w;
+ buffer -= pitchB;
+ *Pointer<UInt>(buffer + 0) = value.x;
+ *Pointer<UInt>(buffer + 4) = value.y;
}
break;
case VK_FORMAT_R16G16B16A16_SINT:
@@ -2502,7 +2533,7 @@
}
*Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<UShort8>(buffer);
packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
@@ -2542,7 +2573,7 @@
}
*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
if(isSigned)
{
@@ -2579,7 +2610,7 @@
}
*Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
- buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+ buffer += pitchB;
value = *Pointer<Int2>(buffer, 16);
mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index 87f33c8..d8e5f02 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -594,7 +594,7 @@
return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16);
}
-sw::SIMD::Float r11g11b10Unpack(UInt r11g11b10bits)
+Float4 r11g11b10Unpack(UInt r11g11b10bits)
{
// 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa.
// Since the Half float format also has a 5 bit exponent, we can convert these formats to half by
@@ -606,7 +606,7 @@
halfBits = Insert(halfBits, (r11g11b10bits & UInt(0x003FF800u)) >> 7, 1);
halfBits = Insert(halfBits, (r11g11b10bits & UInt(0xFFC00000u)) >> 17, 2);
halfBits = Insert(halfBits, UInt(0x00003C00u), 3);
- return As<sw::SIMD::Float>(halfToFloatBits(halfBits));
+ return As<Float4>(halfToFloatBits(halfBits));
}
UInt r11g11b10Pack(sw::SIMD::Float &value)
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index d0b7142..f360a66 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -616,6 +616,7 @@
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
pFormatProperties->optimalTilingFeatures |=
VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
// Fall through
@@ -640,7 +641,6 @@
case VK_FORMAT_R32G32_SINT:
case VK_FORMAT_R32G32B32A32_UINT:
case VK_FORMAT_R32G32B32A32_SINT:
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
pFormatProperties->optimalTilingFeatures |=
VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT;