Fixed writing to a2b10g10r10

The code was using the LSB instead of the MSB.
Added shifts to fix the issue.

Change-Id: I6bdfc367c6daea98b0dd8878ea9c0076574206f5
Tests: dEQP-VK.renderpass.suballocation.multisample.a2b10g10r10_unorm_pack32.samples_4
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/32789
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index e510e6a..2eb9359 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -1328,10 +1328,10 @@
 			break;
 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
 		{
-			auto r = Int4(current.x) & Int4(0x3ff);
-			auto g = Int4(current.y) & Int4(0x3ff);
-			auto b = Int4(current.z) & Int4(0x3ff);
-			auto a = Int4(current.w) & Int4(0x3);
+			auto r = (Int4(current.x) >> 6) & Int4(0x3ff);
+			auto g = (Int4(current.y) >> 6) & Int4(0x3ff);
+			auto b = (Int4(current.z) >> 6) & Int4(0x3ff);
+			auto a = (Int4(current.w) >> 14) & Int4(0x3);
 			Int4 packed = (a << 30) | (b << 20) | (g << 10) | r;
 			auto c02 = As<Int2>(Int4(packed.xzzz)); // TODO: auto c02 = packed.xz;
 			auto c13 = As<Int2>(Int4(packed.ywww)); // TODO: auto c13 = packed.yw;