Enable B10G11R11_UFLOAT blending B10G11R11_UFLOAT blending is required in order for SwANGLE to expose GL_EXT_color_buffer_float. In this cl: - I added a minor readability improvement by storing "*Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]))" in a variable, "pitchB" since it's used in every case. - Added a constant for B10G11R11 masking - Added clamping in the floating point blending code for unsigned floating point formats - Fixed the VK_FORMAT_B10G11R11_UFLOAT_PACK32 output format which now takes the coverage mask into account - Included a minor followup cleanup in ShaderCore Bug: b/146223877 Tests: dEQP-VK.*b10g11r11* Change-Id: Ifb95f34a10cdbee9d185bc25feba0aeaca0d9e70 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39929 Reviewed-by: Chris Forbes <chrisforbes@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com> Presubmit-Ready: Alexis Hétu <sugoi@google.com> Tested-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/Constants.cpp b/src/Pipeline/Constants.cpp index 1f98769..ab84e50 100644 --- a/src/Pipeline/Constants.cpp +++ b/src/Pipeline/Constants.cpp
@@ -238,6 +238,7 @@ for(int i = 0; i < 8; i++) { mask565Q[i] = word4((i & 0x1 ? 0x001F : 0) | (i & 0x2 ? 0x07E0 : 0) | (i & 0x4 ? 0xF800 : 0)); + mask11X[i] = dword4((i & 0x1 ? 0x000007FFu : 0) | (i & 0x2 ? 0x003FF800u : 0) | (i & 0x4 ? 0xFFC00000u : 0)); } for(int i = 0; i < 16; i++)
diff --git a/src/Pipeline/Constants.hpp b/src/Pipeline/Constants.hpp index c9c8a1f..1680f30 100644 --- a/src/Pipeline/Constants.hpp +++ b/src/Pipeline/Constants.hpp
@@ -70,6 +70,7 @@ word4 mask565Q[8]; dword2 mask10Q[16]; // 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x word4 mask5551Q[16]; // 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x + dword4 mask11X[8]; // 3 bit writemask -> B10G11R11 bit patterns, replicated 4x unsigned short sRGBtoLinear8_16[256];
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp index be79715..82a44f1 100644 --- a/src/Pipeline/PixelRoutine.cpp +++ b/src/Pipeline/PixelRoutine.cpp
@@ -927,11 +927,13 @@ Pointer<Byte> buffer = cBuffer; Pointer<Byte> buffer2; + Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + switch(state.targetFormat[index]) { case VK_FORMAT_A1R5G5B5_UNORM_PACK16: buffer += 2 * x; - buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer2 = buffer + pitchB; c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); pixel.x = (c01 & Short4(0x7C00u)) << 1; @@ -949,7 +951,7 @@ break; case VK_FORMAT_R5G6B5_UNORM_PACK16: buffer += 2 * x; - buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer2 = buffer + pitchB; c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); pixel.x = c01 & Short4(0xF800u); @@ -969,7 +971,7 @@ case VK_FORMAT_B8G8R8A8_SRGB: buffer += 4 * x; c01 = *Pointer<Short4>(buffer); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; c23 = *Pointer<Short4>(buffer); pixel.z = c01; pixel.y = c01; @@ -989,7 +991,7 @@ case VK_FORMAT_R8G8B8A8_SRGB: buffer += 4 * x; c01 = *Pointer<Short4>(buffer); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; c23 = *Pointer<Short4>(buffer); pixel.z = c01; pixel.y = c01; @@ -1008,7 +1010,7 @@ case VK_FORMAT_R8_UNORM: buffer += 1 * x; pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1); pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); pixel.y = Short4(0x0000); @@ -1018,7 +1020,7 @@ case VK_FORMAT_R8G8_UNORM: buffer += 2 * x; c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0)); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1)); pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8); pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8); @@ -1026,17 +1028,19 @@ pixel.w = Short4(0xFFFFu); break; case VK_FORMAT_R16G16B16A16_UNORM: - pixel.x = *Pointer<Short4>(buffer + 8 * x); - pixel.y = *Pointer<Short4>(buffer + 8 * x + 8); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.z = *Pointer<Short4>(buffer + 8 * x); - pixel.w = *Pointer<Short4>(buffer + 8 * x + 8); + buffer += 8 * x; + pixel.x = *Pointer<Short4>(buffer + 0); + pixel.y = *Pointer<Short4>(buffer + 8); + buffer += pitchB; + pixel.z = *Pointer<Short4>(buffer + 0); + pixel.w = *Pointer<Short4>(buffer + 8); transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); break; case VK_FORMAT_R16G16_UNORM: - pixel.x = *Pointer<Short4>(buffer + 4 * x); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.y = *Pointer<Short4>(buffer + 4 * x); + buffer += 4 * x; + pixel.x = *Pointer<Short4>(buffer); + buffer += pitchB; + pixel.y = *Pointer<Short4>(buffer); pixel.z = pixel.x; pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y)); pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y)); @@ -1049,11 +1053,12 @@ case VK_FORMAT_A2B10G10R10_UNORM_PACK32: { Int4 v = Int4(0); - v = Insert(v, *Pointer<Int>(buffer + 4 * x), 0); - v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 1); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - v = Insert(v, *Pointer<Int>(buffer + 4 * x), 2); - v = Insert(v, *Pointer<Int>(buffer + 4 * x + 4), 3); + buffer += 4 * x; + v = Insert(v, *Pointer<Int>(buffer + 0), 0); + v = Insert(v, *Pointer<Int>(buffer + 4), 1); + buffer += pitchB; + v = Insert(v, *Pointer<Int>(buffer + 0), 2); + v = Insert(v, *Pointer<Int>(buffer + 4), 3); a2b10g10r10Unpack(v, pixel); } @@ -1368,6 +1373,7 @@ } Pointer<Byte> buffer = cBuffer; + Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); switch(state.targetFormat[index]) { @@ -1386,7 +1392,7 @@ } *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<Int>(buffer); Int c23 = Extract(As<Int2>(current.x), 1); @@ -1413,7 +1419,7 @@ } *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<Int>(buffer); Int c23 = Extract(As<Int2>(current.x), 1); @@ -1439,7 +1445,7 @@ } *Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<Short4>(buffer); Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); @@ -1466,7 +1472,7 @@ } *Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<Short4>(buffer); Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); @@ -1483,8 +1489,7 @@ buffer += 2 * x; Int2 value; value = Insert(value, *Pointer<Int>(buffer), 0); - Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - value = Insert(value, *Pointer<Int>(buffer + pitch), 1); + value = Insert(value, *Pointer<Int>(buffer + pitchB), 1); Int2 packedCol = As<Int2>(current.x); @@ -1499,7 +1504,7 @@ packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask)); *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); - *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1)); + *Pointer<UInt>(buffer + pitchB) = As<UInt>(Extract(packedCol, 1)); } break; case VK_FORMAT_R8_UNORM: @@ -1508,15 +1513,14 @@ buffer += 1 * x; Short4 value; value = Insert(value, *Pointer<Short>(buffer), 0); - Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - value = Insert(value, *Pointer<Short>(buffer + pitch), 1); + value = Insert(value, *Pointer<Short>(buffer + pitchB), 1); current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask); value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask); current.x |= value; *Pointer<Short>(buffer) = Extract(current.x, 0); - *Pointer<Short>(buffer + pitch) = Extract(current.x, 1); + *Pointer<Short>(buffer + pitchB) = Extract(current.x, 1); } break; case VK_FORMAT_R16G16_UNORM: @@ -1538,7 +1542,7 @@ current.x |= value; *Pointer<Short4>(buffer) = current.x; - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<Short4>(buffer); @@ -1594,7 +1598,7 @@ *Pointer<Short4>(buffer + 8) = current.y; } - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; { Short4 value = *Pointer<Short4>(buffer); @@ -1643,7 +1647,7 @@ } *Pointer<Int2>(buffer) = (As<Int2>(current.x) & mergedMask) | (value & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<Int2>(buffer, 16); mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); @@ -1803,6 +1807,7 @@ } Pointer<Byte> buffer = cBuffer; + Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); // pixel holds four texel color values. // Note: Despite the type being Vector4f, the colors may be stored as @@ -1831,20 +1836,22 @@ case VK_FORMAT_R32_UINT: case VK_FORMAT_R32_SFLOAT: // FIXME: movlps - pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0); - pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += 4 * x; + pixel.x.x = *Pointer<Float>(buffer + 0); + pixel.x.y = *Pointer<Float>(buffer + 4); + buffer += pitchB; // FIXME: movhps - pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0); - pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4); + pixel.x.z = *Pointer<Float>(buffer + 0); + pixel.x.w = *Pointer<Float>(buffer + 4); pixel.y = pixel.z = pixel.w = one; break; case VK_FORMAT_R32G32_SINT: case VK_FORMAT_R32G32_UINT: case VK_FORMAT_R32G32_SFLOAT: - pixel.x = *Pointer<Float4>(buffer + 8 * x, 16); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.y = *Pointer<Float4>(buffer + 8 * x, 16); + buffer += 8 * x; + pixel.x = *Pointer<Float4>(buffer, 16); + buffer += pitchB; + pixel.y = *Pointer<Float4>(buffer, 16); pixel.z = pixel.x; pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202); pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313); @@ -1854,59 +1861,65 @@ case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R32G32B32A32_SINT: case VK_FORMAT_R32G32B32A32_UINT: - pixel.x = *Pointer<Float4>(buffer + 16 * x, 16); - pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.z = *Pointer<Float4>(buffer + 16 * x, 16); - pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16); + buffer += 16 * x; + pixel.x = *Pointer<Float4>(buffer + 0, 16); + pixel.y = *Pointer<Float4>(buffer + 16, 16); + buffer += pitchB; + pixel.z = *Pointer<Float4>(buffer + 0, 16); + pixel.w = *Pointer<Float4>(buffer + 16, 16); transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); break; case VK_FORMAT_R16_SFLOAT: - pixel.x.x = Float(*Pointer<Half>(buffer + 2 * x + 0)); - pixel.x.y = Float(*Pointer<Half>(buffer + 2 * x + 2)); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.x.z = Float(*Pointer<Half>(buffer + 2 * x + 0)); - pixel.x.w = Float(*Pointer<Half>(buffer + 2 * x + 2)); + buffer += 2 * x; + pixel.x.x = Float(*Pointer<Half>(buffer + 0)); + pixel.x.y = Float(*Pointer<Half>(buffer + 2)); + buffer += pitchB; + pixel.x.z = Float(*Pointer<Half>(buffer + 0)); + pixel.x.w = Float(*Pointer<Half>(buffer + 2)); pixel.y = pixel.z = pixel.w = one; break; case VK_FORMAT_R16G16_SFLOAT: - pixel.x.x = Float(*Pointer<Half>(buffer + 4 * x + 0)); - pixel.y.x = Float(*Pointer<Half>(buffer + 4 * x + 2)); - pixel.x.y = Float(*Pointer<Half>(buffer + 4 * x + 4)); - pixel.y.y = Float(*Pointer<Half>(buffer + 4 * x + 6)); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.x.z = Float(*Pointer<Half>(buffer + 4 * x + 0)); - pixel.y.z = Float(*Pointer<Half>(buffer + 4 * x + 2)); - pixel.x.w = Float(*Pointer<Half>(buffer + 4 * x + 4)); - pixel.y.w = Float(*Pointer<Half>(buffer + 4 * x + 6)); + buffer += 4 * x; + pixel.x.x = Float(*Pointer<Half>(buffer + 0)); + pixel.y.x = Float(*Pointer<Half>(buffer + 2)); + pixel.x.y = Float(*Pointer<Half>(buffer + 4)); + pixel.y.y = Float(*Pointer<Half>(buffer + 6)); + buffer += pitchB; + pixel.x.z = Float(*Pointer<Half>(buffer + 0)); + pixel.y.z = Float(*Pointer<Half>(buffer + 2)); + pixel.x.w = Float(*Pointer<Half>(buffer + 4)); + pixel.y.w = Float(*Pointer<Half>(buffer + 6)); pixel.z = pixel.w = one; break; case VK_FORMAT_R16G16B16A16_SFLOAT: - pixel.x.x = Float(*Pointer<Half>(buffer + 8 * x + 0x0)); - pixel.y.x = Float(*Pointer<Half>(buffer + 8 * x + 0x2)); - pixel.z.x = Float(*Pointer<Half>(buffer + 8 * x + 0x4)); - pixel.w.x = Float(*Pointer<Half>(buffer + 8 * x + 0x6)); - pixel.x.y = Float(*Pointer<Half>(buffer + 8 * x + 0x8)); - pixel.y.y = Float(*Pointer<Half>(buffer + 8 * x + 0xa)); - pixel.z.y = Float(*Pointer<Half>(buffer + 8 * x + 0xc)); - pixel.w.y = Float(*Pointer<Half>(buffer + 8 * x + 0xe)); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.x.z = Float(*Pointer<Half>(buffer + 8 * x + 0x0)); - pixel.y.z = Float(*Pointer<Half>(buffer + 8 * x + 0x2)); - pixel.z.z = Float(*Pointer<Half>(buffer + 8 * x + 0x4)); - pixel.w.z = Float(*Pointer<Half>(buffer + 8 * x + 0x6)); - pixel.x.w = Float(*Pointer<Half>(buffer + 8 * x + 0x8)); - pixel.y.w = Float(*Pointer<Half>(buffer + 8 * x + 0xa)); - pixel.z.w = Float(*Pointer<Half>(buffer + 8 * x + 0xc)); - pixel.w.w = Float(*Pointer<Half>(buffer + 8 * x + 0xe)); + buffer += 8 * x; + pixel.x.x = Float(*Pointer<Half>(buffer + 0x0)); + pixel.y.x = Float(*Pointer<Half>(buffer + 0x2)); + pixel.z.x = Float(*Pointer<Half>(buffer + 0x4)); + pixel.w.x = Float(*Pointer<Half>(buffer + 0x6)); + pixel.x.y = Float(*Pointer<Half>(buffer + 0x8)); + pixel.y.y = Float(*Pointer<Half>(buffer + 0xa)); + pixel.z.y = Float(*Pointer<Half>(buffer + 0xc)); + pixel.w.y = Float(*Pointer<Half>(buffer + 0xe)); + buffer += pitchB; + pixel.x.z = Float(*Pointer<Half>(buffer + 0x0)); + pixel.y.z = Float(*Pointer<Half>(buffer + 0x2)); + pixel.z.z = Float(*Pointer<Half>(buffer + 0x4)); + pixel.w.z = Float(*Pointer<Half>(buffer + 0x6)); + pixel.x.w = Float(*Pointer<Half>(buffer + 0x8)); + pixel.y.w = Float(*Pointer<Half>(buffer + 0xa)); + pixel.z.w = Float(*Pointer<Half>(buffer + 0xc)); + pixel.w.w = Float(*Pointer<Half>(buffer + 0xe)); break; case VK_FORMAT_B10G11R11_UFLOAT_PACK32: - pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0)); - pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4)); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 0)); - pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4 * x + 4)); - transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); + buffer += 4 * x; + pixel.x = r11g11b10Unpack(*Pointer<UInt>(buffer + 0)); + pixel.y = r11g11b10Unpack(*Pointer<UInt>(buffer + 4)); + buffer += pitchB; + pixel.z = r11g11b10Unpack(*Pointer<UInt>(buffer + 0)); + pixel.w = r11g11b10Unpack(*Pointer<UInt>(buffer + 4)); + transpose4x3(pixel.x, pixel.y, pixel.z, pixel.w); + pixel.w = one; break; default: UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index])); @@ -2007,6 +2020,11 @@ default: UNIMPLEMENTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha)); } + + if(format.isUnsignedComponent(0)) { oC.x = Max(oC.x, Float4(0.0f)); } + if(format.isUnsignedComponent(1)) { oC.y = Max(oC.y, Float4(0.0f)); } + if(format.isUnsignedComponent(2)) { oC.z = Max(oC.z, Float4(0.0f)); } + if(format.isUnsignedComponent(3)) { oC.w = Max(oC.w, Float4(0.0f)); } } void PixelRoutine::writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &oC, const Int &sMask, const Int &zMask, const Int &cMask) @@ -2074,6 +2092,7 @@ auto targetFormat = state.targetFormat[index]; Pointer<Byte> buffer = cBuffer; + Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); Float4 value; switch(targetFormat) @@ -2089,7 +2108,7 @@ value.x = *Pointer<Float>(buffer + 0); value.y = *Pointer<Float>(buffer + 4); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; // FIXME: movhps value.z = *Pointer<Float>(buffer + 0); @@ -2103,7 +2122,7 @@ *Pointer<Float>(buffer + 0) = oC.x.z; *Pointer<Float>(buffer + 4) = oC.x.w; - buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer -= pitchB; // FIXME: movlps *Pointer<Float>(buffer + 0) = oC.x.x; @@ -2118,7 +2137,7 @@ value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 0); value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 1); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = Insert(value, Float(*Pointer<Half>(buffer + 0)), 2); value = Insert(value, Float(*Pointer<Half>(buffer + 2)), 3); @@ -2130,7 +2149,7 @@ *Pointer<Half>(buffer + 0) = Half(oC.x.z); *Pointer<Half>(buffer + 2) = Half(oC.x.w); - buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer -= pitchB; *Pointer<Half>(buffer + 0) = Half(oC.x.x); *Pointer<Half>(buffer + 2) = Half(oC.x.y); @@ -2145,7 +2164,7 @@ UShort4 xyzw; xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0)); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1)); value = As<Float4>(Int4(xyzw)); @@ -2161,7 +2180,7 @@ component = oC.x.w; *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); - buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer -= pitchB; component = oC.x.x; *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); @@ -2175,7 +2194,7 @@ component = oC.x.w; *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); - buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer -= pitchB; component = oC.x.x; *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); @@ -2193,7 +2212,7 @@ UInt xyzw, packedCol; xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF; - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; xyzw |= UInt(*Pointer<UShort>(buffer)) << 16; Short4 tmpCol = Short4(As<Int4>(oC.x)); @@ -2211,7 +2230,7 @@ (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask)); *Pointer<UShort>(buffer) = UShort(packedCol >> 16); - buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer -= pitchB; *Pointer<UShort>(buffer) = UShort(packedCol); } break; @@ -2235,7 +2254,7 @@ oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); *Pointer<Float4>(buffer) = oC.x; - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<Float4>(buffer); @@ -2274,7 +2293,7 @@ } *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.y))) << 16) | UInt(As<UShort>(Half(oC.y.x))), 0); packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.y.w))) << 16) | UInt(As<UShort>(Half(oC.y.z))), 1); @@ -2305,7 +2324,7 @@ } *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; packedCol = UShort4(As<Int4>(oC.y)); value = *Pointer<UShort4>(buffer); @@ -2326,7 +2345,7 @@ Int2 xyzw, packedCol; xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1); if(targetFormat == VK_FORMAT_R8G8_SINT) @@ -2349,7 +2368,7 @@ packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask)); *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1)); - buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer -= pitchB; *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); } break; @@ -2392,7 +2411,7 @@ *Pointer<Float4>(buffer + 16, 16) = oC.y; } - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; { value = *Pointer<Float4>(buffer, 16); @@ -2449,7 +2468,7 @@ } *Pointer<UInt4>(buffer) = (packedCol & mergedMask) | (As<UInt4>(value) & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<UInt4>(buffer); packedCol = Insert(packedCol, (UInt(As<UShort>(Half(oC.z.y))) << 16) | UInt(As<UShort>(Half(oC.z.x))), 0); @@ -2469,19 +2488,31 @@ { buffer += 4 * x; - unsigned int mask = ((rgbaWriteMask & 0x1) ? 0x000007FF : 0) | - ((rgbaWriteMask & 0x2) ? 0x003FF800 : 0) | - ((rgbaWriteMask & 0x4) ? 0xFFC00000 : 0); - UInt2 mergedMask(mask, mask); + UInt4 packedCol; + packedCol = Insert(packedCol, r11g11b10Pack(oC.x), 0); + packedCol = Insert(packedCol, r11g11b10Pack(oC.y), 1); + packedCol = Insert(packedCol, r11g11b10Pack(oC.z), 2); + packedCol = Insert(packedCol, r11g11b10Pack(oC.w), 3); - UInt2 value; - value = Insert(value, r11g11b10Pack(oC.x), 0); - value = Insert(value, r11g11b10Pack(oC.y), 1); - *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); - value = Insert(value, r11g11b10Pack(oC.z), 0); - value = Insert(value, r11g11b10Pack(oC.w), 1); - *Pointer<UInt2>(buffer) = (value & mergedMask) | ((*Pointer<UInt2>(buffer)) & ~mergedMask); + UInt4 value; + value = Insert(value, *Pointer<UInt>(buffer + 0), 0); + value = Insert(value, *Pointer<UInt>(buffer + 4), 1); + buffer += pitchB; + value = Insert(value, *Pointer<UInt>(buffer + 0), 2); + value = Insert(value, *Pointer<UInt>(buffer + 4), 3); + + UInt4 mask = *Pointer<UInt4>(constants + OFFSET(Constants, maskD4X[0][0]) + xMask * 16, 16); + if((rgbaWriteMask & 0x7) != 0x7) + { + mask &= *Pointer<UInt4>(constants + OFFSET(Constants, mask11X[rgbaWriteMask & 0x7][0]), 16); + } + value = (packedCol & mask) | (value & ~mask); + + *Pointer<UInt>(buffer + 0) = value.z; + *Pointer<UInt>(buffer + 4) = value.w; + buffer -= pitchB; + *Pointer<UInt>(buffer + 0) = value.x; + *Pointer<UInt>(buffer + 4) = value.y; } break; case VK_FORMAT_R16G16B16A16_SINT: @@ -2502,7 +2533,7 @@ } *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<UShort8>(buffer); packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w))); @@ -2542,7 +2573,7 @@ } *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; if(isSigned) { @@ -2579,7 +2610,7 @@ } *Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask); - buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); + buffer += pitchB; value = *Pointer<Int2>(buffer, 16); mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp index 87f33c8..d8e5f02 100644 --- a/src/Pipeline/ShaderCore.cpp +++ b/src/Pipeline/ShaderCore.cpp
@@ -594,7 +594,7 @@ return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16); } -sw::SIMD::Float r11g11b10Unpack(UInt r11g11b10bits) +Float4 r11g11b10Unpack(UInt r11g11b10bits) { // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa. // Since the Half float format also has a 5 bit exponent, we can convert these formats to half by @@ -606,7 +606,7 @@ halfBits = Insert(halfBits, (r11g11b10bits & UInt(0x003FF800u)) >> 7, 1); halfBits = Insert(halfBits, (r11g11b10bits & UInt(0xFFC00000u)) >> 17, 2); halfBits = Insert(halfBits, UInt(0x00003C00u), 3); - return As<sw::SIMD::Float>(halfToFloatBits(halfBits)); + return As<Float4>(halfToFloatBits(halfBits)); } UInt r11g11b10Pack(sw::SIMD::Float &value)
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp index d0b7142..f360a66 100644 --- a/src/Vulkan/VkPhysicalDevice.cpp +++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -616,6 +616,7 @@ case VK_FORMAT_R32_SFLOAT: case VK_FORMAT_R32G32_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_B10G11R11_UFLOAT_PACK32: pFormatProperties->optimalTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; // Fall through @@ -640,7 +641,6 @@ case VK_FORMAT_R32G32_SINT: case VK_FORMAT_R32G32B32A32_UINT: case VK_FORMAT_R32G32B32A32_SINT: - case VK_FORMAT_B10G11R11_UFLOAT_PACK32: pFormatProperties->optimalTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;