Fully support all 16-bit packed texture formats

Vulkan lists the following formats as 16-bit packed formats:
 * VK_FORMAT_R4G4B4A4_UNORM_PACK16
 * VK_FORMAT_B4G4R4A4_UNORM_PACK16
 * VK_FORMAT_R5G6B5_UNORM_PACK16
 * VK_FORMAT_B5G6R5_UNORM_PACK16
 * VK_FORMAT_R5G5B5A1_UNORM_PACK16
 * VK_FORMAT_B5G5R5A1_UNORM_PACK16
 * VK_FORMAT_A1R5G5B5_UNORM_PACK16

We already support VK_FORMAT_A1R5G5B5_UNORM_PACK16, so we can use that
as a crossreference to see which tests we expect to pass.

Bug: b/139351376
Tests: dEQP-VK.*pack16*

Change-Id: Ia7b0b9efe92c7f67c803579fee7c8e0d62ef081f
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/57048
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Sean Risser <srisser@google.com>
Commit-Queue: Sean Risser <srisser@google.com>
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index d821e46..9043038 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -152,16 +152,20 @@
 			switch(state.textureFormat)
 			{
 			case VK_FORMAT_R5G6B5_UNORM_PACK16:
+			case VK_FORMAT_B5G6R5_UNORM_PACK16:
 				c.x *= Float4(1.0f / 0xF800);
 				c.y *= Float4(1.0f / 0xFC00);
 				c.z *= Float4(1.0f / 0xF800);
 				break;
+			case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
 			case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
 				c.x *= Float4(1.0f / 0xF000);
 				c.y *= Float4(1.0f / 0xF000);
 				c.z *= Float4(1.0f / 0xF000);
 				c.w *= Float4(1.0f / 0xF000);
 				break;
+			case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+			case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
 			case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
 				c.x *= Float4(1.0f / 0xF800);
 				c.y *= Float4(1.0f / 0xF800);
@@ -214,16 +218,20 @@
 		switch(state.textureFormat)
 		{
 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
+		case VK_FORMAT_B5G6R5_UNORM_PACK16:
 			c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
 			c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
 			c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
 			break;
+		case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
 		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
 			c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000);
 			c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000);
 			c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000);
 			c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000);
 			break;
+		case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+		case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
 			c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
 			c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800);
@@ -1526,12 +1534,35 @@
 			c.y = (c.x & Short4(0x07E0u)) << 5;
 			c.x = (c.x & Short4(0xF800u));
 			break;
+		case VK_FORMAT_B5G6R5_UNORM_PACK16:
+			c.z = (c.x & Short4(0xF800u));
+			c.y = (c.x & Short4(0x07E0u)) << 5;
+			c.x = (c.x & Short4(0x001Fu)) << 11;
+			break;
+		case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+			c.w = (c.x << 12) & Short4(0xF000u);
+			c.z = (c.x << 8) & Short4(0xF000u);
+			c.y = (c.x << 4) & Short4(0xF000u);
+			c.x = (c.x) & Short4(0xF000u);
+			break;
 		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
 			c.w = (c.x << 12) & Short4(0xF000u);
 			c.z = (c.x) & Short4(0xF000u);
 			c.y = (c.x << 4) & Short4(0xF000u);
 			c.x = (c.x << 8) & Short4(0xF000u);
 			break;
+		case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+			c.w = (c.x << 15) & Short4(0x8000u);
+			c.z = (c.x << 10) & Short4(0xF800u);
+			c.y = (c.x << 5) & Short4(0xF800u);
+			c.x = (c.x) & Short4(0xF800u);
+			break;
+		case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+			c.w = (c.x << 15) & Short4(0x8000u);
+			c.z = (c.x) & Short4(0xF800u);
+			c.y = (c.x << 5) & Short4(0xF800u);
+			c.x = (c.x << 10) & Short4(0xF800u);
+			break;
 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
 			c.w = (c.x) & Short4(0x8000u);
 			c.z = (c.x << 11) & Short4(0xF800u);