Implement a2b10g10r10_unorm_pack32.

Adds support for a2b10g10r10_unorm_pack32 render targets, and input attachments.

Tests: dEQP-VK.renderpass.dedicated_allocation.formats.a2b10g10r10_unorm_pack32.*
Tests: dEQP-VK.renderpass.suballocation.formats.a2b10g10r10_unorm_pack32.*
Bug: b/131896622
Bug: b/131171141
Change-Id: I925723754858c6b3515d142d0c7abae275685d6d
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/31271
Presubmit-Ready: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index a90a398..de2758f 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -147,7 +147,8 @@
 				continue;
 			}
 
-			switch(state.targetFormat[index])
+			auto format = state.targetFormat[index];
+			switch(format)
 			{
 			case VK_FORMAT_R5G6B5_UNORM_PACK16:
 			case VK_FORMAT_B8G8R8A8_UNORM:
@@ -160,12 +161,13 @@
 			case VK_FORMAT_R16G16B16A16_UNORM:
 			case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
 			case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+			case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
 				for(unsigned int q = 0; q < state.multiSample; q++)
 				{
 					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
 					Vector4s color;
 
-					if(state.targetFormat[index] == VK_FORMAT_R5G6B5_UNORM_PACK16)
+					if(format == VK_FORMAT_R5G6B5_UNORM_PACK16)
 					{
 						color.x = UShort4(c[index].x * Float4(0xFBFF), false);
 						color.y = UShort4(c[index].y * Float4(0xFDFF), false);
@@ -227,7 +229,7 @@
 				}
 				break;
 			default:
-				UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
+				UNIMPLEMENTED("VkFormat: %d", int(format));
 			}
 		}
 	}
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 9b9bc7e..c762243 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -1368,6 +1368,19 @@
 		case VK_FORMAT_R16G16B16A16_UNORM:
 			transpose4x4(current.x, current.y, current.z, current.w);
 			break;
+		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+		{
+			auto r = Int4(current.x) & Int4(0x3ff);
+			auto g = Int4(current.y) & Int4(0x3ff);
+			auto b = Int4(current.z) & Int4(0x3ff);
+			auto a = Int4(current.w) & Int4(0x3);
+			Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
+			auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
+			auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;
+			current.x = UnpackLow(c02, c13);
+			current.y = UnpackHigh(c02, c13);
+			break;
+		}
 		default:
 			UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
 		}
@@ -1662,6 +1675,30 @@
 				}
 			}
 			break;
+			case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+			{
+				Pointer<Byte> buffer = cBuffer + 4 * x;
+
+				buffer = cBuffer + 4 * x;
+				Int2 value = *Pointer<Int2>(buffer, 16);
+				Int2 mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
+				if (rgbaWriteMask != 0xF)
+				{
+					mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+				}
+				*Pointer<Int2>(buffer) = (As<Int2>(current.x) & mergedMask) | (value & ~mergedMask);
+
+				buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
+
+				value = *Pointer<Int2>(buffer, 16);
+				mergedMask = *Pointer<Int2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
+				if (rgbaWriteMask != 0xF)
+				{
+					mergedMask &= *Pointer<Int2>(constants + OFFSET(Constants, mask10Q[rgbaWriteMask][0]));
+				}
+				*Pointer<Int2>(buffer) = (As<Int2>(current.y) & mergedMask) | (value & ~mergedMask);
+			}
+			break;
 		default:
 			UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
 		}
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 3ff890e..d4024bb 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -5285,6 +5285,12 @@
 			dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
 			dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
 			break;
+		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+			dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
+			dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
+			dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
+			dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
+			break;
 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
 			dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
 			dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));