A2R10G10B10 Support

A2B10G10R10 (RGBA) was already supported by
SwiftShader, but A2R10G10B10 (BGRA) was not. Most of
this cl is trivial, as it simply adds an equivalent
version of the new formats from the code used for the
already supported formats, with the R and B channels
swapped.

The only new piece of code is rounding for 1010102
formats at the top of the PixelRoutine::writeColor()
function. There was already rounding for 8 bit formats,
but not 1010102, which led to potential off by 1 errors
in the output, which is fairly large when it happens
on the 2 bit alpha channel. This fixes one of the
dEQP-VK.pipeline.blend.*a2r10g10b10* tests.

Tests: dEQP-VK.*a2r10g10b10*
Bug b/142661203

Change-Id: Ifcae17aecafab3ea7967fdc755391ddd5e651ca5
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40008
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index 63b15f9..3ca28a5 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -1701,6 +1701,16 @@
 
 		a2b10g10r10Unpack(cc, c);
 	}
+	else if(state.textureFormat == VK_FORMAT_A2R10G10B10_UNORM_PACK32)
+	{
+		Int4 cc;
+		cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
+		cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
+		cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
+		cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
+
+		a2r10g10b10Unpack(cc, c);
+	}
 	else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UINT_PACK32)
 	{
 		Int4 cc;
@@ -1714,6 +1724,19 @@
 		c.z = Short4(((cc >> 20) & Int4(0x3FF)));
 		c.w = Short4(((cc >> 30) & Int4(0x3)));
 	}
+	else if(state.textureFormat == VK_FORMAT_A2R10G10B10_UINT_PACK32)
+	{
+		Int4 cc;
+		cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
+		cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
+		cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
+		cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
+
+		c.z = Short4(((cc)&Int4(0x3FF)));
+		c.y = Short4(((cc >> 10) & Int4(0x3FF)));
+		c.x = Short4(((cc >> 20) & Int4(0x3FF)));
+		c.w = Short4(((cc >> 30) & Int4(0x3)));
+	}
 	else
 		ASSERT(false);