Add support for VK_FORMAT_A2B10G10R10_UINT_PACK32 render targets

Bug: b/131896622
Test: dEQP-VK.*a2b10g10r10*
Change-Id: I0c50b14b007bd86dfaff39cf04938860408157f8
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30529
Tested-by: Chris Forbes <chrisforbes@google.com>
Presubmit-Ready: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/Constants.cpp b/src/Pipeline/Constants.cpp
index 45e8b50..ac9d636 100644
--- a/src/Pipeline/Constants.cpp
+++ b/src/Pipeline/Constants.cpp
@@ -262,6 +262,15 @@
 			maskD01X[i][3] =  -(i >> 1 & 1);
 		}
 
+		for (int i = 0; i < 16; i++)
+		{
+			mask10Q[i][0] = mask10Q[i][1] =
+					(i & 0x1 ? 0x3FF : 0) |
+					(i & 0x2 ? 0xFFC00 : 0) |
+					(i & 0x4 ? 0x3FF00000 : 0) |
+					(i & 0x8 ? 0xC0000000 : 0);
+		}
+
 		for(int i = 0; i < 256; i++)
 		{
 			sRGBtoLinear8_16[i] = (unsigned short)(sw::sRGBtoLinear((float)i / 0xFF) * 0xFFFF + 0.5f);
diff --git a/src/Pipeline/Constants.hpp b/src/Pipeline/Constants.hpp
index e3e2ee8..7f6c951 100644
--- a/src/Pipeline/Constants.hpp
+++ b/src/Pipeline/Constants.hpp
@@ -66,6 +66,7 @@
 		word4 maskW01Q[4];
 		dword4 maskD01X[4];
 		word4 mask565Q[8];
+		dword2 mask10Q[16];		// 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x
 
 		unsigned short sRGBtoLinear8_16[256];
 
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index 7fe2ec3..6189931 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -191,6 +191,7 @@
 			case VK_FORMAT_R8G8B8A8_UINT:
 			case VK_FORMAT_A8B8G8R8_UINT_PACK32:
 			case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+			case VK_FORMAT_A2B10G10R10_UINT_PACK32:
 				for(unsigned int q = 0; q < state.multiSample; q++)
 				{
 					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
@@ -233,6 +234,7 @@
 			case VK_FORMAT_R16G16B16A16_UNORM:
 			case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
 			case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+			case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
 				oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
 				oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
 				oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
@@ -261,6 +263,7 @@
 			case VK_FORMAT_R8G8B8A8_UINT:
 			case VK_FORMAT_A8B8G8R8_UINT_PACK32:
 			case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+			case VK_FORMAT_A2B10G10R10_UINT_PACK32:
 				break;
 			default:
 				UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 62b4666..1cb8b23 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -1982,6 +1982,7 @@
 		case VK_FORMAT_R16_UINT:
 		case VK_FORMAT_R8_SINT:
 		case VK_FORMAT_R8_UINT:
+		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
 			break;
 		case VK_FORMAT_R32G32_SFLOAT:
 		case VK_FORMAT_R32G32_SINT:
@@ -2401,6 +2402,35 @@
 				*Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
 			}
 			break;
+		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+			if ((rgbaWriteMask & 0x0000000F) != 0x0)
+			{
+				Int2 mergedMask, packedCol, value;
+				Int4 packed = ((As<Int4>(oC.w) & Int4(0x3)) << 30) |
+						((As<Int4>(oC.z) & Int4(0x3ff)) << 20) |
+						((As<Int4>(oC.y) & Int4(0x3ff)) << 10) |
+						((As<Int4>(oC.x) & Int4(0x3ff)));
+
+				buffer = cBuffer + 4 * x;
+				value = *Pointer<Int2>(buffer, 16);
+				mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+				if (rgbaWriteMask != 0xF)
+				{
+					mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+				}
+				*Pointer<Int2>(buffer) = (As<Int2>(packed) & mergedMask) | (value & ~mergedMask);
+
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+				value = *Pointer<Int2>(buffer, 16);
+				mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+				if (rgbaWriteMask != 0xF)
+				{
+					mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+				}
+				*Pointer<Int2>(buffer) = (As<Int2>(Int4(packed.zwww)) & mergedMask) | (value & ~mergedMask);
+			}
+			break;
 		default:
 			UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
 		}