Implement support for VK_FORMAT_R16*_UNORM attachment formats There was some legacy code for the 2- and 4-component formats in the 16-bit code code path, but this caused too much loss of precision, and isn't any faster on modern CPUs. Instead support for these formats was added to the 32-bit code path, which previously only handled floating-point and unnormalized integer formats. Bug: b/204220035 Tests: dEQP-VK.* Change-Id: Ibd55c38fe018aaac914ff05132affd2ac39c4933 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/58669 Tested-by: Nicolas Capens <nicolascapens@google.com> Kokoro-Result: kokoro <noreply+kokoro@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp index 54e7ead..24d9499 100644 --- a/src/Pipeline/PixelProgram.cpp +++ b/src/Pipeline/PixelProgram.cpp
@@ -290,8 +290,6 @@ case VK_FORMAT_R8G8B8A8_SRGB: case VK_FORMAT_R8G8_UNORM: case VK_FORMAT_R8_UNORM: - case VK_FORMAT_R16G16_UNORM: - case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_A8B8G8R8_UNORM_PACK32: case VK_FORMAT_A8B8G8R8_SRGB_PACK32: case VK_FORMAT_A2B10G10R10_UNORM_PACK32: @@ -323,6 +321,9 @@ case VK_FORMAT_R32_UINT: case VK_FORMAT_R32G32_UINT: case VK_FORMAT_R32G32B32A32_UINT: + case VK_FORMAT_R16_UNORM: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R16_SINT: case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16B16A16_SINT: @@ -385,6 +386,7 @@ case VK_FORMAT_R8G8B8A8_SRGB: case VK_FORMAT_R8G8_UNORM: case VK_FORMAT_R8_UNORM: + case VK_FORMAT_R16_UNORM: case VK_FORMAT_R16G16_UNORM: case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp index 9ea0bd6..7d36ce6 100644 --- a/src/Pipeline/PixelRoutine.cpp +++ b/src/Pipeline/PixelRoutine.cpp
@@ -1308,29 +1308,6 @@ pixel.z = Short4(0x0000u); pixel.w = Short4(0xFFFFu); break; - case VK_FORMAT_R16G16B16A16_UNORM: - buffer += 8 * x; - pixel.x = *Pointer<Short4>(buffer + 0); - pixel.y = *Pointer<Short4>(buffer + 8); - buffer += pitchB; - pixel.z = *Pointer<Short4>(buffer + 0); - pixel.w = *Pointer<Short4>(buffer + 8); - transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); - break; - case VK_FORMAT_R16G16_UNORM: - buffer += 4 * x; - pixel.x = *Pointer<Short4>(buffer); - buffer += pitchB; - pixel.y = *Pointer<Short4>(buffer); - pixel.z = pixel.x; - pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y)); - pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y)); - pixel.y = pixel.z; - pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z)); - pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z)); - pixel.z = Short4(0xFFFFu); - pixel.w = Short4(0xFFFFu); - break; case VK_FORMAT_A2B10G10R10_UNORM_PACK32: { Int4 v = Int4(0); @@ -1721,15 +1698,6 @@ current.x = As<Short4>(As<UShort4>(current.x) >> 8); current.x = As<Short4>(PackUnsigned(current.x, current.x)); break; - case VK_FORMAT_R16G16_UNORM: - current.z = current.x; - current.x = As<Short4>(UnpackLow(current.x, current.y)); - current.z = As<Short4>(UnpackHigh(current.z, current.y)); - current.y = current.z; - break; - case VK_FORMAT_R16G16B16A16_UNORM: - transpose4x4(current.x, current.y, current.z, current.w); - break; case VK_FORMAT_A2B10G10R10_UNORM_PACK32: { auto r = (Int4(current.x) >> 6) & Int4(0x3ff); @@ -2031,118 +1999,6 @@ *Pointer<Short>(buffer + pitchB) = Extract(current.x, 1); } break; - case VK_FORMAT_R16G16_UNORM: - { - buffer += 4 * x; - - Short4 value = *Pointer<Short4>(buffer); - - if((rgbaWriteMask & 0x00000003) != 0x00000003) - { - Short4 masked = value; - current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0])); - current.x |= masked; - } - - current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD01Q) + xMask * 8); - current.x |= value; - *Pointer<Short4>(buffer) = current.x; - - buffer += pitchB; - - value = *Pointer<Short4>(buffer); - - if((rgbaWriteMask & 0x00000003) != 0x00000003) - { - Short4 masked = value; - current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[rgbaWriteMask & 0x3][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants, maskW01Q[~rgbaWriteMask & 0x3][0])); - current.y |= masked; - } - - current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskD23Q) + xMask * 8); - current.y |= value; - *Pointer<Short4>(buffer) = current.y; - } - break; - case VK_FORMAT_R16G16B16A16_UNORM: - { - buffer += 8 * x; - - { - Short4 value = *Pointer<Short4>(buffer); - - if(rgbaWriteMask != 0x0000000F) - { - Short4 masked = value; - current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0])); - current.x |= masked; - } - - current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ0Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ0Q) + xMask * 8); - current.x |= value; - *Pointer<Short4>(buffer) = current.x; - } - - { - Short4 value = *Pointer<Short4>(buffer + 8); - - if(rgbaWriteMask != 0x0000000F) - { - Short4 masked = value; - current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0])); - current.y |= masked; - } - - current.y &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ1Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ1Q) + xMask * 8); - current.y |= value; - *Pointer<Short4>(buffer + 8) = current.y; - } - - buffer += pitchB; - - { - Short4 value = *Pointer<Short4>(buffer); - - if(rgbaWriteMask != 0x0000000F) - { - Short4 masked = value; - current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0])); - current.z |= masked; - } - - current.z &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ2Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ2Q) + xMask * 8); - current.z |= value; - *Pointer<Short4>(buffer) = current.z; - } - - { - Short4 value = *Pointer<Short4>(buffer + 8); - - if(rgbaWriteMask != 0x0000000F) - { - Short4 masked = value; - current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q[rgbaWriteMask][0])); - current.w |= masked; - } - - current.w &= *Pointer<Short4>(constants + OFFSET(Constants, maskQ3Q) + xMask * 8); - value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskQ3Q) + xMask * 8); - current.w |= value; - *Pointer<Short4>(buffer + 8) = current.w; - } - } - break; case VK_FORMAT_A2R10G10B10_UNORM_PACK32: rgbaWriteMask = bgraWriteMask; // [[fallthrough]] @@ -2439,6 +2295,16 @@ destColor.w = *Pointer<Float4>(buffer + 16, 16); transpose4x4(destColor.x, destColor.y, destColor.z, destColor.w); break; + case VK_FORMAT_R16_UNORM: + buffer += 2 * x; + destColor.x.x = Float(Int(*Pointer<UShort>(buffer + 0))); + destColor.x.y = Float(Int(*Pointer<UShort>(buffer + 2))); + buffer += pitchB; + destColor.x.z = Float(Int(*Pointer<UShort>(buffer + 0))); + destColor.x.w = Float(Int(*Pointer<UShort>(buffer + 2))); + destColor.x *= Float4(1.0f / 0xFFFF); + destColor.y = destColor.z = destColor.w = Float4(1.0f); + break; case VK_FORMAT_R16_SFLOAT: buffer += 2 * x; destColor.x.x = Float(*Pointer<Half>(buffer + 0)); @@ -2448,6 +2314,21 @@ destColor.x.w = Float(*Pointer<Half>(buffer + 2)); destColor.y = destColor.z = destColor.w = Float4(1.0f); break; + case VK_FORMAT_R16G16_UNORM: + buffer += 4 * x; + destColor.x.x = Float(Int(*Pointer<UShort>(buffer + 0))); + destColor.y.x = Float(Int(*Pointer<UShort>(buffer + 2))); + destColor.x.y = Float(Int(*Pointer<UShort>(buffer + 4))); + destColor.y.y = Float(Int(*Pointer<UShort>(buffer + 6))); + buffer += pitchB; + destColor.x.z = Float(Int(*Pointer<UShort>(buffer + 0))); + destColor.y.z = Float(Int(*Pointer<UShort>(buffer + 2))); + destColor.x.w = Float(Int(*Pointer<UShort>(buffer + 4))); + destColor.y.w = Float(Int(*Pointer<UShort>(buffer + 6))); + destColor.x *= Float4(1.0f / 0xFFFF); + destColor.y *= Float4(1.0f / 0xFFFF); + destColor.z = destColor.w = Float4(1.0f); + break; case VK_FORMAT_R16G16_SFLOAT: buffer += 4 * x; destColor.x.x = Float(*Pointer<Half>(buffer + 0)); @@ -2461,6 +2342,30 @@ destColor.y.w = Float(*Pointer<Half>(buffer + 6)); destColor.z = destColor.w = Float4(1.0f); break; + case VK_FORMAT_R16G16B16A16_UNORM: + buffer += 8 * x; + destColor.x.x = Float(Int(*Pointer<UShort>(buffer + 0x0))); + destColor.y.x = Float(Int(*Pointer<UShort>(buffer + 0x2))); + destColor.z.x = Float(Int(*Pointer<UShort>(buffer + 0x4))); + destColor.w.x = Float(Int(*Pointer<UShort>(buffer + 0x6))); + destColor.x.y = Float(Int(*Pointer<UShort>(buffer + 0x8))); + destColor.y.y = Float(Int(*Pointer<UShort>(buffer + 0xa))); + destColor.z.y = Float(Int(*Pointer<UShort>(buffer + 0xc))); + destColor.w.y = Float(Int(*Pointer<UShort>(buffer + 0xe))); + buffer += pitchB; + destColor.x.z = Float(Int(*Pointer<UShort>(buffer + 0x0))); + destColor.y.z = Float(Int(*Pointer<UShort>(buffer + 0x2))); + destColor.z.z = Float(Int(*Pointer<UShort>(buffer + 0x4))); + destColor.w.z = Float(Int(*Pointer<UShort>(buffer + 0x6))); + destColor.x.w = Float(Int(*Pointer<UShort>(buffer + 0x8))); + destColor.y.w = Float(Int(*Pointer<UShort>(buffer + 0xa))); + destColor.z.w = Float(Int(*Pointer<UShort>(buffer + 0xc))); + destColor.w.w = Float(Int(*Pointer<UShort>(buffer + 0xe))); + destColor.x *= Float4(1.0f / 0xFFFF); + destColor.y *= Float4(1.0f / 0xFFFF); + destColor.z *= Float4(1.0f / 0xFFFF); + destColor.w *= Float4(1.0f / 0xFFFF); + break; case VK_FORMAT_R16G16B16A16_SFLOAT: buffer += 8 * x; destColor.x.x = Float(*Pointer<Half>(buffer + 0x0)); @@ -2590,6 +2495,20 @@ vk::Format format = state.colorFormat[index]; switch(format) { + case VK_FORMAT_R16G16B16A16_UNORM: + color.w = Min(Max(color.w, Float4(0.0f)), Float4(1.0f)); // TODO(b/204560089): Omit clamp if redundant + color.w = As<Float4>(RoundInt(color.w * Float4(0xFFFF))); + color.z = Min(Max(color.z, Float4(0.0f)), Float4(1.0f)); // TODO(b/204560089): Omit clamp if redundant + color.z = As<Float4>(RoundInt(color.z * Float4(0xFFFF))); + // [[fallthrough]] + case VK_FORMAT_R16G16_UNORM: + color.y = Min(Max(color.y, Float4(0.0f)), Float4(1.0f)); // TODO(b/204560089): Omit clamp if redundant + color.y = As<Float4>(RoundInt(color.y * Float4(0xFFFF))); + //[[fallthrough]] + case VK_FORMAT_R16_UNORM: + color.x = Min(Max(color.x, Float4(0.0f)), Float4(1.0f)); // TODO(b/204560089): Omit clamp if redundant + color.x = As<Float4>(RoundInt(color.x * Float4(0xFFFF))); + break; default: // TODO(b/204560089): Omit clamp if redundant if(format.isUnsignedNormalized()) @@ -2614,6 +2533,7 @@ case VK_FORMAT_R32_SFLOAT: case VK_FORMAT_R32_SINT: case VK_FORMAT_R32_UINT: + case VK_FORMAT_R16_UNORM: case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_UINT: case VK_FORMAT_R8_SINT: @@ -2625,6 +2545,7 @@ case VK_FORMAT_R32G32_SFLOAT: case VK_FORMAT_R32G32_SINT: case VK_FORMAT_R32G32_UINT: + case VK_FORMAT_R16G16_UNORM: case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R8G8_SINT: @@ -2639,6 +2560,7 @@ case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R32G32B32A32_SINT: case VK_FORMAT_R32G32B32A32_UINT: + case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R8G8B8A8_SINT: @@ -2734,6 +2656,7 @@ *Pointer<Half>(buffer + 2) = Half(color.x.y); } break; + case VK_FORMAT_R16_UNORM: case VK_FORMAT_R16_SINT: case VK_FORMAT_R16_UINT: if(rgbaWriteMask & 0x00000001) @@ -2868,6 +2791,7 @@ *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (As<UInt2>(value) & ~mergedMask); } break; + case VK_FORMAT_R16G16_UNORM: case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_UINT: if((rgbaWriteMask & 0x00000003) != 0x0) @@ -3077,6 +3001,7 @@ *Pointer<UInt>(buffer + 4) = value.y; } break; + case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R16G16B16A16_UINT: if((rgbaWriteMask & 0x0000000F) != 0x0)
diff --git a/src/Vulkan/VkFormat.cpp b/src/Vulkan/VkFormat.cpp index 2923f0a..968a6c0 100644 --- a/src/Vulkan/VkFormat.cpp +++ b/src/Vulkan/VkFormat.cpp
@@ -2018,18 +2018,21 @@ case VK_FORMAT_R16G16_SFLOAT: case VK_FORMAT_R16G16B16A16_SFLOAT: // Optional + case VK_FORMAT_R4G4B4A4_UNORM_PACK16: + case VK_FORMAT_B4G4R4A4_UNORM_PACK16: + case VK_FORMAT_B5G6R5_UNORM_PACK16: + case VK_FORMAT_R5G5B5A1_UNORM_PACK16: + case VK_FORMAT_B5G5R5A1_UNORM_PACK16: case VK_FORMAT_A2R10G10B10_UNORM_PACK32: + case VK_FORMAT_R16_UNORM: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R32_SFLOAT: case VK_FORMAT_R32G32_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_B10G11R11_UFLOAT_PACK32: - case VK_FORMAT_R4G4B4A4_UNORM_PACK16: - case VK_FORMAT_B4G4R4A4_UNORM_PACK16: case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT: case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT: - case VK_FORMAT_B5G6R5_UNORM_PACK16: - case VK_FORMAT_R5G5B5A1_UNORM_PACK16: - case VK_FORMAT_B5G5R5A1_UNORM_PACK16: return true; default: return false;
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp index 8186ea1..c9c95e5 100644 --- a/src/Vulkan/VkPhysicalDevice.cpp +++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -1369,10 +1369,13 @@ case VK_FORMAT_A8B8G8R8_SINT_PACK32: case VK_FORMAT_A2B10G10R10_UINT_PACK32: case VK_FORMAT_A2R10G10B10_UINT_PACK32: + case VK_FORMAT_R16_UNORM: case VK_FORMAT_R16_UINT: case VK_FORMAT_R16_SINT: + case VK_FORMAT_R16G16_UNORM: case VK_FORMAT_R16G16_UINT: case VK_FORMAT_R16G16_SINT: + case VK_FORMAT_R16G16B16A16_UNORM: case VK_FORMAT_R16G16B16A16_UINT: case VK_FORMAT_R16G16B16A16_SINT: case VK_FORMAT_R32_UINT: