Revert "Float only writeColor"
This reverts commit 978e2469e6b98de6784c27cbba69121e054f4351.
Reason for revert: Potentially causing some failures in https://angle-gold.skia.org/diff?grouping=name%3Dhearthstone%26source_type%3Dangle&left=f9178a683907f1c8d806c0970e2468c2&right=af6167f064bfd553d11eaed8d1abb29f&changelist_id=4098332&crs=gerrit
Change-Id: Ie7363edbf4e32a886d0732269c5584248f6a508b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/70329
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Shahbaz Youssefi <syoussefi@google.com>
diff --git a/src/Device/BUILD.gn b/src/Device/BUILD.gn
index 4497e3b..c1ae1ff 100644
--- a/src/Device/BUILD.gn
+++ b/src/Device/BUILD.gn
@@ -70,6 +70,7 @@
defines = [
"SWIFTSHADER_ENABLE_ASTC", # TODO(b/150130101)
+ "SWIFTSHADER_LEGACY_PRECISION=true", # TODO(b/226657516)
]
deps = [
diff --git a/src/Pipeline/BUILD.gn b/src/Pipeline/BUILD.gn
index d97f527..9816b0a 100644
--- a/src/Pipeline/BUILD.gn
+++ b/src/Pipeline/BUILD.gn
@@ -65,6 +65,10 @@
"../../third_party/SPIRV-Tools/include",
]
+ defines = [
+ "SWIFTSHADER_LEGACY_PRECISION=true", # TODO(chromium:1299047)
+ ]
+
deps = [
"../../third_party/SPIRV-Tools:spvtools_headers",
"../../third_party/marl:Marl_headers",
diff --git a/src/Pipeline/Constants.cpp b/src/Pipeline/Constants.cpp
index 6535272..dda1f39 100644
--- a/src/Pipeline/Constants.cpp
+++ b/src/Pipeline/Constants.cpp
@@ -275,6 +275,12 @@
sRGBtoLinearFF_FF00[i] = (unsigned short)(sRGBtoLinear((float)i / 0xFF) * 0xFF00 + 0.5f);
}
+ for(int i = 0; i < 0x1000; i++)
+ {
+ linearToSRGB12_16[i] = (unsigned short)(clamp(linearToSRGB((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF));
+ sRGBtoLinear12_16[i] = (unsigned short)(clamp(sRGBtoLinear((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF));
+ }
+
for(int q = 0; q < 4; q++)
{
for(int c = 0; c < 16; c++)
diff --git a/src/Pipeline/Constants.hpp b/src/Pipeline/Constants.hpp
index ab410f6..a857747 100644
--- a/src/Pipeline/Constants.hpp
+++ b/src/Pipeline/Constants.hpp
@@ -108,6 +108,9 @@
unsigned short sRGBtoLinearFF_FF00[256];
+ unsigned short linearToSRGB12_16[4096];
+ unsigned short sRGBtoLinear12_16[4096];
+
// Centroid parameters
float4 sampleX[4][16];
float4 sampleY[4][16];
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index 87a8dab..c73bb42 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -266,18 +266,91 @@
continue;
}
- for(unsigned int q : samples)
+ auto format = state.colorFormat[index];
+ switch(format)
{
- Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
+ case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_R8G8_UNORM:
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+ for(unsigned int q : samples)
+ {
+ Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
- SIMD::Float4 C = alphaBlend(index, buffer, c[index], x);
- ASSERT(SIMD::Width == 4);
- Vector4f color;
- color.x = Extract128(C.x, 0);
- color.y = Extract128(C.y, 0);
- color.z = Extract128(C.z, 0);
- color.w = Extract128(C.w, 0);
- writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+ SIMD::Float4 colorf = alphaBlend(index, buffer, c[index], x);
+
+ ASSERT(SIMD::Width == 4);
+ Vector4s color;
+ color.x = UShort4(Extract128(colorf.x, 0) * 0xFFFF, true); // Saturating
+ color.y = UShort4(Extract128(colorf.y, 0) * 0xFFFF, true); // Saturating
+ color.z = UShort4(Extract128(colorf.z, 0) * 0xFFFF, true); // Saturating
+ color.w = UShort4(Extract128(colorf.w, 0) * 0xFFFF, true); // Saturating
+ writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+ }
+ break;
+ case VK_FORMAT_R16_SFLOAT:
+ case VK_FORMAT_R16G16_SFLOAT:
+ case VK_FORMAT_R16G16B16A16_SFLOAT:
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32G32_UINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ case VK_FORMAT_R16_UNORM:
+ case VK_FORMAT_R16G16_UNORM:
+ case VK_FORMAT_R16G16B16A16_UNORM:
+ case VK_FORMAT_R16_SINT:
+ case VK_FORMAT_R16G16_SINT:
+ case VK_FORMAT_R16G16B16A16_SINT:
+ case VK_FORMAT_R16_UINT:
+ case VK_FORMAT_R16G16_UINT:
+ case VK_FORMAT_R16G16B16A16_UINT:
+ case VK_FORMAT_R8_SINT:
+ case VK_FORMAT_R8G8_SINT:
+ case VK_FORMAT_R8G8B8A8_SINT:
+ case VK_FORMAT_R8_UINT:
+ case VK_FORMAT_R8G8_UINT:
+ case VK_FORMAT_R8G8B8A8_UINT:
+ case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+ case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+ case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+ case VK_FORMAT_A2R10G10B10_UINT_PACK32:
+ for(unsigned int q : samples)
+ {
+ Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
+
+ SIMD::Float4 C = alphaBlend(index, buffer, c[index], x);
+ ASSERT(SIMD::Width == 4);
+ Vector4f color;
+ color.x = Extract128(C.x, 0);
+ color.y = Extract128(C.y, 0);
+ color.z = Extract128(C.z, 0);
+ color.w = Extract128(C.w, 0);
+ writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
+ }
+ break;
+ default:
+ UNSUPPORTED("VkFormat: %d", int(format));
}
}
}
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 409479c..1d46605 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -1172,6 +1172,576 @@
default:
UNSUPPORTED("VkFormat %d", int(format));
}
+
+ if(isSRGB(index))
+ {
+ sRGBtoLinear16_12_16(pixel);
+ }
+}
+
+void PixelRoutine::writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s ¤t, const Int &sMask, const Int &zMask, const Int &cMask)
+{
+ if(isSRGB(index))
+ {
+ linearToSRGB16_12_16(current);
+ }
+
+ vk::Format format = state.colorFormat[index];
+ switch(format)
+ {
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_R8G8_UNORM:
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
+ current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
+ current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
+ current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
+ break;
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+ current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 10) + Short4(0x0020);
+ current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 10) + Short4(0x0020);
+ current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 10) + Short4(0x0020);
+ current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 2) + Short4(0x2000);
+ break;
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
+ case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
+ current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 4) + Short4(0x0800);
+ current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 4) + Short4(0x0800);
+ current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 4) + Short4(0x0800);
+ current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 4) + Short4(0x0800);
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 5) + Short4(0x0400);
+ current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 5) + Short4(0x0400);
+ current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 5) + Short4(0x0400);
+ current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 1) + Short4(0x4000);
+ break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 5) + Short4(0x0400);
+ current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 6) + Short4(0x0200);
+ current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 5) + Short4(0x0400);
+ break;
+ default:
+ break;
+ }
+
+ int writeMask = state.colorWriteActive(index);
+ if(format.isBGRformat())
+ {
+ // For BGR formats, flip R and B channels in the channels mask
+ writeMask = (writeMask & 0x0000000A) | (writeMask & 0x00000001) << 2 | (writeMask & 0x00000004) >> 2;
+ }
+
+ switch(format)
+ {
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ {
+ current.x = As<UShort4>(current.x & Short4(0xF000));
+ current.y = As<UShort4>(current.y & Short4(0xF000)) >> 4;
+ current.z = As<UShort4>(current.z & Short4(0xF000)) >> 8;
+ current.w = As<UShort4>(current.w & Short4(0xF000u)) >> 12;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ {
+ current.z = As<UShort4>(current.z & Short4(0xF000));
+ current.y = As<UShort4>(current.y & Short4(0xF000)) >> 4;
+ current.x = As<UShort4>(current.x & Short4(0xF000)) >> 8;
+ current.w = As<UShort4>(current.w & Short4(0xF000u)) >> 12;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
+ {
+ current.w = As<UShort4>(current.w & Short4(0xF000));
+ current.x = As<UShort4>(current.x & Short4(0xF000)) >> 4;
+ current.y = As<UShort4>(current.y & Short4(0xF000)) >> 8;
+ current.z = As<UShort4>(current.z & Short4(0xF000u)) >> 12;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
+ {
+ current.w = As<UShort4>(current.w & Short4(0xF000));
+ current.z = As<UShort4>(current.z & Short4(0xF000)) >> 4;
+ current.y = As<UShort4>(current.y & Short4(0xF000)) >> 8;
+ current.x = As<UShort4>(current.x & Short4(0xF000u)) >> 12;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ {
+ current.x = As<UShort4>(current.x & Short4(0xF800));
+ current.y = As<UShort4>(current.y & Short4(0xF800)) >> 5;
+ current.z = As<UShort4>(current.z & Short4(0xF800)) >> 10;
+ current.w = As<UShort4>(current.w & Short4(0x8000u)) >> 15;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ {
+ current.z = As<UShort4>(current.z & Short4(0xF800));
+ current.y = As<UShort4>(current.y & Short4(0xF800)) >> 5;
+ current.x = As<UShort4>(current.x & Short4(0xF800)) >> 10;
+ current.w = As<UShort4>(current.w & Short4(0x8000u)) >> 15;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ {
+ current.w = current.w & Short4(0x8000u);
+ current.x = As<UShort4>(current.x & Short4(0xF800)) >> 1;
+ current.y = As<UShort4>(current.y & Short4(0xF800)) >> 6;
+ current.z = As<UShort4>(current.z & Short4(0xF800)) >> 11;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ {
+ current.x = current.x & Short4(0xF800u);
+ current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
+ current.z = As<UShort4>(current.z) >> 11;
+
+ current.x = current.x | current.y | current.z;
+ }
+ break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ {
+ current.z = current.z & Short4(0xF800u);
+ current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
+ current.x = As<UShort4>(current.x) >> 11;
+
+ current.x = current.x | current.y | current.z;
+ }
+ break;
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ if(writeMask == 0x7)
+ {
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+ current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+
+ current.z = As<Short4>(PackUnsigned(current.z, current.x));
+ current.y = As<Short4>(PackUnsigned(current.y, current.y));
+
+ current.x = current.z;
+ current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+ current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+ current.y = current.z;
+ current.z = As<Short4>(UnpackLow(current.z, current.x));
+ current.y = As<Short4>(UnpackHigh(current.y, current.x));
+ }
+ else
+ {
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+ current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+ current.w = As<Short4>(As<UShort4>(current.w) >> 8);
+
+ current.z = As<Short4>(PackUnsigned(current.z, current.x));
+ current.y = As<Short4>(PackUnsigned(current.y, current.w));
+
+ current.x = current.z;
+ current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+ current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+ current.y = current.z;
+ current.z = As<Short4>(UnpackLow(current.z, current.x));
+ current.y = As<Short4>(UnpackHigh(current.y, current.x));
+ }
+ break;
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ if(writeMask == 0x7)
+ {
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+ current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+
+ current.z = As<Short4>(PackUnsigned(current.x, current.z));
+ current.y = As<Short4>(PackUnsigned(current.y, current.y));
+
+ current.x = current.z;
+ current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+ current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+ current.y = current.z;
+ current.z = As<Short4>(UnpackLow(current.z, current.x));
+ current.y = As<Short4>(UnpackHigh(current.y, current.x));
+ }
+ else
+ {
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+ current.z = As<Short4>(As<UShort4>(current.z) >> 8);
+ current.w = As<Short4>(As<UShort4>(current.w) >> 8);
+
+ current.z = As<Short4>(PackUnsigned(current.x, current.z));
+ current.y = As<Short4>(PackUnsigned(current.y, current.w));
+
+ current.x = current.z;
+ current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
+ current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
+ current.y = current.z;
+ current.z = As<Short4>(UnpackLow(current.z, current.x));
+ current.y = As<Short4>(UnpackHigh(current.y, current.x));
+ }
+ break;
+ case VK_FORMAT_R8G8_UNORM:
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.y = As<Short4>(As<UShort4>(current.y) >> 8);
+ current.x = As<Short4>(PackUnsigned(current.x, current.x));
+ current.y = As<Short4>(PackUnsigned(current.y, current.y));
+ current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
+ break;
+ case VK_FORMAT_R8_UNORM:
+ current.x = As<Short4>(As<UShort4>(current.x) >> 8);
+ current.x = As<Short4>(PackUnsigned(current.x, current.x));
+ break;
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ {
+ auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
+ auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
+ auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
+ auto a = (Int4(current.w) >> 14) & Int4(0x3);
+ Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
+ auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
+ auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
+ current.x = UnpackLow(c02, c13);
+ current.y = UnpackHigh(c02, c13);
+ }
+ break;
+ case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+ {
+ auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
+ auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
+ auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
+ auto a = (Int4(current.w) >> 14) & Int4(0x3);
+ Int4 packed = (a << 30) | (r << 20) | (g << 10) | b;
+ auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
+ auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
+ current.x = UnpackLow(c02, c13);
+ current.y = UnpackHigh(c02, c13);
+ }
+ break;
+ default:
+ UNSUPPORTED("VkFormat: %d", int(format));
+ }
+
+ Short4 c01 = current.z;
+ Short4 c23 = current.y;
+
+ Int xMask; // Combination of all masks
+
+ if(state.depthTestActive)
+ {
+ xMask = zMask;
+ }
+ else
+ {
+ xMask = cMask;
+ }
+
+ if(state.stencilActive)
+ {
+ xMask &= sMask;
+ }
+
+ Pointer<Byte> buffer = cBuffer;
+ Int pitchB = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+ switch(format)
+ {
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
+ case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
+ {
+ buffer += 2 * x;
+ Int value = *Pointer<Int>(buffer);
+
+ Int channelMask;
+ switch(format)
+ {
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask4argbQ[writeMask][0]));
+ break;
+ case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
+ case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
+ channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask4rgbaQ[writeMask][0]));
+ break;
+ default:
+ UNREACHABLE("Format: %s", vk::Stringify(format).c_str());
+ }
+
+ Int c01 = Extract(As<Int2>(current.x), 0);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Int>(buffer);
+
+ Int c23 = Extract(As<Int2>(current.x), 1);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ {
+ buffer += 2 * x;
+ Int value = *Pointer<Int>(buffer);
+
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, maskr5g5b5a1Q[writeMask][0]));
+
+ Int c01 = Extract(As<Int2>(current.x), 0);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Int>(buffer);
+
+ Int c23 = Extract(As<Int2>(current.x), 1);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ {
+ buffer += 2 * x;
+ Int value = *Pointer<Int>(buffer);
+
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, maskb5g5r5a1Q[writeMask][0]));
+
+ Int c01 = Extract(As<Int2>(current.x), 0);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Int>(buffer);
+
+ Int c23 = Extract(As<Int2>(current.x), 1);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ {
+ buffer += 2 * x;
+ Int value = *Pointer<Int>(buffer);
+
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask5551Q[writeMask][0]));
+
+ Int c01 = Extract(As<Int2>(current.x), 0);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Int>(buffer);
+
+ Int c23 = Extract(As<Int2>(current.x), 1);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ {
+ buffer += 2 * x;
+ Int value = *Pointer<Int>(buffer);
+
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask565Q[writeMask & 0x7][0]));
+
+ Int c01 = Extract(As<Int2>(current.x), 0);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
+ if((writeMask & 0x00000007) != 0x00000007)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Int>(buffer);
+
+ Int c23 = Extract(As<Int2>(current.x), 1);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
+ if((writeMask & 0x00000007) != 0x00000007)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ {
+ buffer += x * 4;
+ Short4 value = *Pointer<Short4>(buffer);
+ Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[writeMask]));
+
+ Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Short4>(buffer);
+
+ Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ {
+ buffer += x * 4;
+ Short4 value = *Pointer<Short4>(buffer);
+ Short4 channelMask = *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q[writeMask]));
+
+ Short4 mask01 = *Pointer<Short4>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Short4>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Short4>(buffer);
+
+ Short4 mask23 = *Pointer<Short4>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+ if(writeMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Short4>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_R8G8_UNORM:
+ if((writeMask & 0x00000003) != 0x0)
+ {
+ buffer += 2 * x;
+ Int2 value;
+ value = Insert(value, *Pointer<Int>(buffer), 0);
+ value = Insert(value, *Pointer<Int>(buffer + pitchB), 1);
+
+ Int2 packedCol = As<Int2>(current.x);
+
+ UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
+ if((writeMask & 0x3) != 0x3)
+ {
+ Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (writeMask & 0x3)]));
+ UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
+ mergedMask &= rgbaMask;
+ }
+
+ packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
+
+ *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
+ *Pointer<UInt>(buffer + pitchB) = As<UInt>(Extract(packedCol, 1));
+ }
+ break;
+ case VK_FORMAT_R8_UNORM:
+ if(writeMask & 0x00000001)
+ {
+ buffer += 1 * x;
+ Short4 value;
+ value = Insert(value, *Pointer<Short>(buffer), 0);
+ value = Insert(value, *Pointer<Short>(buffer + pitchB), 1);
+
+ current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
+ value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
+ current.x |= value;
+
+ *Pointer<Short>(buffer) = Extract(current.x, 0);
+ *Pointer<Short>(buffer + pitchB) = Extract(current.x, 1);
+ }
+ break;
+ case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ {
+ buffer += 4 * x;
+
+ Int2 value = *Pointer<Int2>(buffer, 16);
+ Int2 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+ if(writeMask != 0xF)
+ {
+ mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[writeMask][0]));
+ }
+ *Pointer<Int2>(buffer) = (As<Int2>(current.x) & mergedMask) | (value & ~mergedMask);
+
+ buffer += pitchB;
+
+ value = *Pointer<Int2>(buffer, 16);
+ mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+ if(writeMask != 0xF)
+ {
+ mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[writeMask][0]));
+ }
+ *Pointer<Int2>(buffer) = (As<Int2>(current.y) & mergedMask) | (value & ~mergedMask);
+ }
+ break;
+ default:
+ UNSUPPORTED("VkFormat: %d", int(format));
+ }
}
Float PixelRoutine::blendConstant(vk::Format format, int component, BlendFactorModifier modifier)
@@ -1792,13 +2362,6 @@
texelColor.y = Float4(As<UShort4>(color.y)) * (1.0f / 0xFFFF);
texelColor.z = Float4(As<UShort4>(color.z)) * (1.0f / 0xFFFF);
texelColor.w = Float4(As<UShort4>(color.w)) * (1.0f / 0xFFFF);
-
- if(isSRGB(index))
- {
- texelColor.x = sRGBtoLinear(texelColor.x);
- texelColor.y = sRGBtoLinear(texelColor.y);
- texelColor.z = sRGBtoLinear(texelColor.z);
- }
}
break;
}
@@ -2909,4 +3472,65 @@
}
}
+void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
+{
+ Pointer<Byte> LUT = constants + OFFSET(Constants, sRGBtoLinear12_16);
+
+ c.x = AddSat(As<UShort4>(c.x), UShort4(0x0007)) >> 4;
+ c.y = AddSat(As<UShort4>(c.y), UShort4(0x0007)) >> 4;
+ c.z = AddSat(As<UShort4>(c.z), UShort4(0x0007)) >> 4;
+
+ c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
+ c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
+ c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
+ c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
+
+ c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
+ c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
+ c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
+ c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
+
+ c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
+ c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
+ c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
+ c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
+}
+
+void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
+{
+ c.x = AddSat(As<UShort4>(c.x), UShort4(0x0007)) >> 4;
+ c.y = AddSat(As<UShort4>(c.y), UShort4(0x0007)) >> 4;
+ c.z = AddSat(As<UShort4>(c.z), UShort4(0x0007)) >> 4;
+
+ linearToSRGB12_16(c);
+}
+
+void PixelRoutine::linearToSRGB12_16(Vector4s &c)
+{
+ Pointer<Byte> LUT = constants + OFFSET(Constants, linearToSRGB12_16);
+
+ c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
+ c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
+ c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
+ c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
+
+ c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
+ c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
+ c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
+ c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
+
+ c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
+ c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
+ c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
+ c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
+}
+
+Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
+{
+ Float4 linear = x * x;
+ linear = linear * 0.73f + linear * x * 0.27f;
+
+ return Min(Max(linear, 0.0f), 1.0f);
+}
+
} // namespace sw
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index e1c4eb7..3b5224a 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -56,8 +56,10 @@
void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &color, const Int &sMask, const Int &zMask, const Int &cMask);
SIMD::Float4 alphaBlend(int index, const Pointer<Byte> &cBuffer, const SIMD::Float4 &sourceColor, const Int &x);
+ void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s ¤t, const Int &sMask, const Int &zMask, const Int &cMask);
bool isSRGB(int index) const;
+ void linearToSRGB12_16(Vector4s &c);
private:
bool hasStencilReplaceRef() const;
@@ -99,6 +101,10 @@
void writeDepth(Pointer<Byte> &zBuffer, const Int &x, const Int zMask[4], const SampleSet &samples);
void occlusionSampleCount(const Int zMask[4], const Int sMask[4], const SampleSet &samples);
+ void sRGBtoLinear16_12_16(Vector4s &c);
+ void linearToSRGB16_12_16(Vector4s &c);
+ Float4 sRGBtoLinear(const Float4 &x);
+
SIMD::Float readDepth32F(const Pointer<Byte> &zBuffer, int q, const Int &x) const;
SIMD::Float readDepth16(const Pointer<Byte> &zBuffer, int q, const Int &x) const;
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index 635c925..ac9f91f 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -20,6 +20,11 @@
#include <limits.h>
+// TODO(chromium:1299047)
+#ifndef SWIFTSHADER_LEGACY_PRECISION
+# define SWIFTSHADER_LEGACY_PRECISION false
+#endif
+
namespace sw {
Vector4s::Vector4s()
@@ -311,6 +316,23 @@
return As<SIMD::Float>((precision_loss & As<SIMD::Int>(-atan2_theta)) | (~precision_loss & As<SIMD::Int>(theta))); // FIXME: Vector select
}
+// TODO(chromium:1299047)
+static RValue<SIMD::Float> Exp2_legacy(RValue<SIMD::Float> x0)
+{
+ SIMD::Int i = RoundInt(x0 - 0.5f);
+ SIMD::Float ii = As<SIMD::Float>((i + SIMD::Int(127)) << 23);
+
+ SIMD::Float f = x0 - SIMD::Float(i);
+ SIMD::Float ff = As<SIMD::Float>(SIMD::Int(0x3AF61905));
+ ff = ff * f + As<SIMD::Float>(SIMD::Int(0x3C134806));
+ ff = ff * f + As<SIMD::Float>(SIMD::Int(0x3D64AA23));
+ ff = ff * f + As<SIMD::Float>(SIMD::Int(0x3E75EAD4));
+ ff = ff * f + As<SIMD::Float>(SIMD::Int(0x3F31727B));
+ ff = ff * f + 1.0f;
+
+ return ii * ff;
+}
+
RValue<SIMD::Float> Exp2(RValue<SIMD::Float> x, bool relaxedPrecision)
{
// Clamp to prevent overflow past the representation of infinity.
@@ -318,6 +340,11 @@
x0 = Min(x0, 128.0f);
x0 = Max(x0, As<SIMD::Float>(SIMD::Int(0xC2FDFFFF))); // -126.999992
+ if(SWIFTSHADER_LEGACY_PRECISION) // TODO(chromium:1299047)
+ {
+ return Exp2_legacy(x0);
+ }
+
SIMD::Float xi = Floor(x0);
SIMD::Float f = x0 - xi;
@@ -378,6 +405,11 @@
RValue<SIMD::Float> Log2(RValue<SIMD::Float> x, bool relaxedPrecision)
{
+ if(SWIFTSHADER_LEGACY_PRECISION) // TODO(chromium:1299047)
+ {
+ return Log2_legacy(x);
+ }
+
if(!relaxedPrecision) // highp
{
// Reinterpretation as an integer provides a piecewise linear
@@ -681,6 +713,11 @@
// TODO(chromium:1299047): Eliminate when Chromium tests accept both fused and unfused multiply-add.
RValue<SIMD::Float> mulAdd(RValue<SIMD::Float> x, RValue<SIMD::Float> y, RValue<SIMD::Float> z)
{
+ if(SWIFTSHADER_LEGACY_PRECISION)
+ {
+ return x * y + z;
+ }
+
return MulAdd(x, y, z);
}
diff --git a/src/Vulkan/BUILD.gn b/src/Vulkan/BUILD.gn
index a7703f0..5556abd 100644
--- a/src/Vulkan/BUILD.gn
+++ b/src/Vulkan/BUILD.gn
@@ -63,6 +63,7 @@
defines += [
"SWIFTSHADER_ENABLE_ASTC", # TODO(b/150130101)
+ "SWIFTSHADER_LEGACY_PRECISION=true", # TODO(chromium:1299047)
"SWIFTSHADER_ZERO_INITIALIZE_DEVICE_MEMORY",
]
}
diff --git a/src/Vulkan/VkConfig.hpp b/src/Vulkan/VkConfig.hpp
index ca2f5ce..4e2c023 100644
--- a/src/Vulkan/VkConfig.hpp
+++ b/src/Vulkan/VkConfig.hpp
@@ -20,6 +20,10 @@
#include "Vulkan/VulkanPlatform.hpp"
#include "spirv-tools/libspirv.h"
+#ifndef SWIFTSHADER_LEGACY_PRECISION
+# define SWIFTSHADER_LEGACY_PRECISION false
+#endif
+
namespace vk {
// Note: Constant array initialization requires a string literal.
@@ -90,7 +94,7 @@
constexpr int MAX_SAMPLER_ALLOCATION_COUNT = 4000;
-constexpr int SUBPIXEL_PRECISION_BITS = 8;
+constexpr int SUBPIXEL_PRECISION_BITS = SWIFTSHADER_LEGACY_PRECISION ? 4 : 8;
constexpr float SUBPIXEL_PRECISION_FACTOR = static_cast<float>(1 << SUBPIXEL_PRECISION_BITS);
constexpr int SUBPIXEL_PRECISION_MASK = 0xFFFFFFFF >> (32 - SUBPIXEL_PRECISION_BITS);