Implement Y′CbCr conversion

As specified in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html

Bug: b/132437008
Change-Id: I730a9eb74796c625d3e635a66b18661c73f7c01f
Tests: dEQP-VK.*ycbcr*
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/31618
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Device/Sampler.hpp b/src/Device/Sampler.hpp
index 48ca936..e2ec5ef 100644
--- a/src/Device/Sampler.hpp
+++ b/src/Device/Sampler.hpp
@@ -153,6 +153,10 @@
 		bool unnormalizedCoordinates;
 		bool largeTexture;
 
+		VkSamplerYcbcrModelConversion ycbcrModel;
+		bool studioSwing;    // Narrow range
+		bool swappedChroma;  // Cb/Cr components in reverse order
+
 		#if PERF_PROFILE
 		bool compressedFormat;
 		#endif
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index cf11440..84a32b5 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -1603,60 +1603,7 @@
 
 		if(isYcbcrFormat())
 		{
-			// Generic YPbPr to RGB transformation
-			// R = Y                               +           2 * (1 - Kr) * Pr
-			// G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr
-			// B = Y +           2 * (1 - Kb) * Pb
-
-			float Kb = 0.114f;
-			float Kr = 0.299f;
-			int studioSwing = 1;
-
-			switch(state.textureFormat)
-			{
-			case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
-				// VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601
-				Kb = 0.114f;
-				Kr = 0.299f;
-				studioSwing = 1;
-				break;
-				/*
-				// VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709
-				Kb = 0.0722f;
-				Kr = 0.2126f;
-				studioSwing = 1;
-				break;
-				// VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020
-				Kb = 0.114f;
-				Kr = 0.299f;
-				studioSwing = 0;
-				break;
-				*/
-			default:
-				ASSERT(false);
-			}
-
-			const float Kg = 1.0f - Kr - Kb;
-
-			const float Rr = 2 * (1 - Kr);
-			const float Gb = -2 * Kb * (1 - Kb) / Kg;
-			const float Gr = -2 * Kr * (1 - Kr) / Kg;
-			const float Bb = 2 * (1 - Kb);
-
-			// Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
-			const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f;
-			const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f;
-			const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f;
-
-			const float Rv = Vv * Rr;
-			const float Gu = Uu * Gb;
-			const float Gv = Vv * Gr;
-			const float Bu = Uu * Bb;
-
-			const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255;
-			const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255;
-			const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255;
-
+			// Luminance
 			Int c0 = Int(buffer[0][index[0]]);
 			Int c1 = Int(buffer[0][index[1]]);
 			Int c2 = Int(buffer[0][index[2]]);
@@ -1664,39 +1611,118 @@
 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
 			UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
 
-			computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
-			c0 = Int(buffer[1][index[0]]);
-			c1 = Int(buffer[1][index[1]]);
-			c2 = Int(buffer[1][index[2]]);
-			c3 = Int(buffer[1][index[3]]);
-			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
-			UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
+			UShort4 Cb, Cr;
 
-			c0 = Int(buffer[2][index[0]]);
-			c1 = Int(buffer[2][index[1]]);
-			c2 = Int(buffer[2][index[2]]);
-			c3 = Int(buffer[2][index[3]]);
-			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
-			UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
+			// Chroma
+			{
+				computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
+				UShort4 U, V;
 
-			const UShort4 yY = UShort4(iround(Yy * 0x4000));
-			const UShort4 rV = UShort4(iround(Rv * 0x4000));
-			const UShort4 gU = UShort4(iround(-Gu * 0x4000));
-			const UShort4 gV = UShort4(iround(-Gv * 0x4000));
-			const UShort4 bU = UShort4(iround(Bu * 0x4000));
+				if(state.textureFormat == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM)
+				{
+					c0 = Int(buffer[1][index[0]]);
+					c1 = Int(buffer[1][index[1]]);
+					c2 = Int(buffer[1][index[2]]);
+					c3 = Int(buffer[1][index[3]]);
+					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
+					U = As<UShort4>(Unpack(As<Byte4>(c0)));
 
-			const UShort4 r0 = UShort4(iround(-R0 * 0x4000));
-			const UShort4 g0 = UShort4(iround(G0 * 0x4000));
-			const UShort4 b0 = UShort4(iround(-B0 * 0x4000));
+					c0 = Int(buffer[2][index[0]]);
+					c1 = Int(buffer[2][index[1]]);
+					c2 = Int(buffer[2][index[2]]);
+					c3 = Int(buffer[2][index[3]]);
+					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
+					V = As<UShort4>(Unpack(As<Byte4>(c0)));
+				}
+				else if(state.textureFormat == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM)
+				{
+					Short4 UV;
+					UV = Insert(UV, Pointer<Short>(buffer[1])[index[0]], 0);  // TODO: Insert(UShort4, UShort)
+					UV = Insert(UV, Pointer<Short>(buffer[1])[index[1]], 1);
+					UV = Insert(UV, Pointer<Short>(buffer[1])[index[2]], 2);
+					UV = Insert(UV, Pointer<Short>(buffer[1])[index[3]], 3);
+					U = (UV & Short4(0x00FFu)) | (UV << 8);
+					V = (UV & Short4(0xFF00u)) | As<Short4>(As<UShort4>(UV) >> 8);
+				}
+				else UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
 
-			UShort4 y = MulHigh(Y, yY);
-			UShort4 r = SubSat(y + MulHigh(V, rV), r0);
-			UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV));
-			UShort4 b = SubSat(y + MulHigh(U, bU), b0);
+				if(!state.swappedChroma)
+				{
+					Cb = U;
+					Cr = V;
+				}
+				else
+				{
+					Cb = V;
+					Cr = U;
+				}
+			}
 
-			c.x = Min(r, UShort4(0x3FFF)) << 2;
-			c.y = Min(g, UShort4(0x3FFF)) << 2;
-			c.z = Min(b, UShort4(0x3FFF)) << 2;
+			if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
+			{
+				// YCbCr formats are treated as signed 15-bit.
+				c.x = Cr >> 1;
+				c.y = Y  >> 1;
+				c.z = Cb >> 1;
+			}
+			else
+			{
+				// Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
+				// Scale down by 0x0101 to normalize the 8.8 samples, and up by 0x7FFF for signed 15-bit output.
+				Float4 y = (Float4(Y)  - Float4(state.studioSwing ? 16 * 0x0101 : 0)) * Float4(float(0x7FFF) / (state.studioSwing ? 219 * 0x0101 : 255 * 0x0101));
+				Float4 u = (Float4(Cb) - Float4(128 * 0x0101))                        * Float4(float(0x7FFF) / (state.studioSwing ? 224 * 0x0101 : 255 * 0x0101));
+				Float4 v = (Float4(Cr) - Float4(128 * 0x0101))                        * Float4(float(0x7FFF) / (state.studioSwing ? 224 * 0x0101 : 255 * 0x0101));
+
+				if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
+				{
+					c.x = Short4(v);
+					c.y = Short4(y);
+					c.z = Short4(u);
+				}
+				else
+				{
+					// Generic YCbCr to RGB transformation:
+					// R = Y                               +           2 * (1 - Kr) * Cr
+					// G = Y - 2 * Kb * (1 - Kb) / Kg * Cb - 2 * Kr * (1 - Kr) / Kg * Cr
+					// B = Y +           2 * (1 - Kb) * Cb
+
+					float Kb = 0.114f;
+					float Kr = 0.299f;
+
+					switch(state.ycbcrModel)
+					{
+					case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
+						Kb = 0.0722f;
+						Kr = 0.2126f;
+						break;
+					case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
+						Kb = 0.114f;
+						Kr = 0.299f;
+						break;
+					case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
+						Kb = 0.0593f;
+						Kr = 0.2627f;
+						break;
+					default:
+						UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel));
+					}
+
+					const float Kg = 1.0f - Kr - Kb;
+
+					const float Rr = 2 * (1 - Kr);
+					const float Gb = -2 * Kb * (1 - Kb) / Kg;
+					const float Gr = -2 * Kr * (1 - Kr) / Kg;
+					const float Bb = 2 * (1 - Kb);
+
+					Float4 r = y                  + Float4(Rr) * v;
+					Float4 g = y + Float4(Gb) * u + Float4(Gr) * v;
+					Float4 b = y + Float4(Bb) * u                 ;
+
+					c.x = Short4(r);
+					c.y = Short4(g);
+					c.z = Short4(b);
+				}
+			}
 		}
 		else
 		{
@@ -1991,8 +2017,8 @@
 
 			if(isYcbcrFormat())
 			{
-				buffer[1] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer[1]));
-				buffer[2] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer[2]));
+				buffer[1] = *Pointer<Pointer<Byte>>(mipmap + sizeof(Mipmap) * 1 + OFFSET(Mipmap,buffer[0]));
+				buffer[2] = *Pointer<Pointer<Byte>>(mipmap + sizeof(Mipmap) * 2 + OFFSET(Mipmap,buffer[0]));
 			}
 		}
 		else
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index 0d0e373..0c76457 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -96,6 +96,13 @@
 	                            (imageDescriptor->extent.height > SHRT_MAX) ||
 	                            (imageDescriptor->extent.depth  > SHRT_MAX);
 
+	if(sampler->ycbcrConversion)
+	{
+		samplerState.ycbcrModel = sampler->ycbcrConversion->ycbcrModel;
+		samplerState.studioSwing = (sampler->ycbcrConversion->ycbcrRange == VK_SAMPLER_YCBCR_RANGE_ITU_NARROW);
+		samplerState.swappedChroma = (sampler->ycbcrConversion->components.r != VK_COMPONENT_SWIZZLE_R);
+	}
+
 	if(sampler->anisotropyEnable != VK_FALSE)
 	{
 		UNSUPPORTED("anisotropyEnable");
@@ -238,6 +245,11 @@
 
 sw::MipmapType SpirvShader::convertMipmapMode(const vk::Sampler *sampler)
 {
+	if(sampler->ycbcrConversion)
+	{
+		return MIPMAP_NONE;  // YCbCr images can only have one mipmap level.
+	}
+
 	switch(sampler->mipmapMode)
 	{
 	case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MIPMAP_POINT;
diff --git a/src/Vulkan/VkDescriptorSetLayout.cpp b/src/Vulkan/VkDescriptorSetLayout.cpp
index 917e4df..41c9d4b 100644
--- a/src/Vulkan/VkDescriptorSetLayout.cpp
+++ b/src/Vulkan/VkDescriptorSetLayout.cpp
@@ -364,41 +364,76 @@
 
 			auto &subresourceRange = imageView->getSubresourceRange();
 
-			for(int mipmapLevel = 0; mipmapLevel < sw::MIPMAP_LEVELS; mipmapLevel++)
+			if(format.isYcbcrFormat())
 			{
-				int level = sw::clamp(mipmapLevel, 0, (int)subresourceRange.levelCount - 1);  // Level within the image view
+				ASSERT(subresourceRange.levelCount == 1);
 
-				VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(imageView->getSubresourceRange().aspectMask);
-				sw::Mipmap &mipmap = texture->mipmap[mipmapLevel];
+				// YCbCr images can only have one level, so we can store parameters for the
+				// different planes in the descriptor's mipmap levels instead.
 
-				if(imageView->getType() == VK_IMAGE_VIEW_TYPE_CUBE)
+				const int level = 0;
+				VkOffset3D offset = {0, 0, 0};
+				texture->mipmap[0].buffer[0] = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_0_BIT, level, 0, ImageView::SAMPLING);
+				texture->mipmap[1].buffer[0] = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_1_BIT, level, 0, ImageView::SAMPLING);
+				if(format.getAspects() & VK_IMAGE_ASPECT_PLANE_2_BIT)
 				{
-					for(int face = 0; face < 6; face++)
-					{
-						// Obtain the pointer to the corner of the level including the border, for seamless sampling.
-						// This is taken into account in the sampling routine, which can't handle negative texel coordinates.
-						VkOffset3D offset = {-1, -1, 0};
-
-						// TODO(b/129523279): Implement as 6 consecutive layers instead of separate pointers.
-						mipmap.buffer[face] = imageView->getOffsetPointer(offset, aspect, level, face, ImageView::SAMPLING);
-					}
-				}
-				else
-				{
-					VkOffset3D offset = {0, 0, 0};
-					mipmap.buffer[0] = imageView->getOffsetPointer(offset, aspect, level, 0, ImageView::SAMPLING);
+					texture->mipmap[2].buffer[0] = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_2_BIT, level, 0, ImageView::SAMPLING);
 				}
 
-				VkExtent3D extent = imageView->getMipLevelExtent(level);
+				VkExtent3D extent = imageView->getMipLevelExtent(0);
 
 				int width = extent.width;
 				int height = extent.height;
-				int layers = imageView->getSubresourceRange().layerCount;  // TODO(b/129523279): Untangle depth vs layers throughout the sampler
-				int depth = layers > 1 ? layers : extent.depth;
-				int pitchP = imageView->rowPitchBytes(aspect, level, ImageView::SAMPLING) / format.bytes();
-				int sliceP = (layers > 1 ? imageView->layerPitchBytes(aspect, ImageView::SAMPLING) : imageView->slicePitchBytes(aspect, level, ImageView::SAMPLING)) / format.bytes();
+				int pitchP0 = imageView->rowPitchBytes(VK_IMAGE_ASPECT_PLANE_0_BIT, level, ImageView::SAMPLING) /
+				              imageView->getFormat(VK_IMAGE_ASPECT_PLANE_0_BIT).bytes();
 
-				WriteTextureLevelInfo(texture, mipmapLevel, width, height, depth, pitchP, sliceP);
+				// Write plane 0 parameters to mipmap level 0.
+				WriteTextureLevelInfo(texture, 0, width, height, 1, pitchP0, 0);
+
+				// Plane 2, if present, has equal parameters to plane 1, so we use mipmap level 1 for both.
+				int pitchP1 = imageView->rowPitchBytes(VK_IMAGE_ASPECT_PLANE_1_BIT, level, ImageView::SAMPLING) /
+				              imageView->getFormat(VK_IMAGE_ASPECT_PLANE_1_BIT).bytes();
+
+				WriteTextureLevelInfo(texture, 1, width / 2, height / 2, 1, pitchP1, 0);
+			}
+			else
+			{
+				for(int mipmapLevel = 0; mipmapLevel < sw::MIPMAP_LEVELS; mipmapLevel++)
+				{
+					int level = sw::clamp(mipmapLevel, 0, (int)subresourceRange.levelCount - 1);  // Level within the image view
+
+					VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(imageView->getSubresourceRange().aspectMask);
+					sw::Mipmap &mipmap = texture->mipmap[mipmapLevel];
+
+					if(imageView->getType() == VK_IMAGE_VIEW_TYPE_CUBE)
+					{
+						for(int face = 0; face < 6; face++)
+						{
+							// Obtain the pointer to the corner of the level including the border, for seamless sampling.
+							// This is taken into account in the sampling routine, which can't handle negative texel coordinates.
+							VkOffset3D offset = {-1, -1, 0};
+
+							// TODO(b/129523279): Implement as 6 consecutive layers instead of separate pointers.
+							mipmap.buffer[face] = imageView->getOffsetPointer(offset, aspect, level, face, ImageView::SAMPLING);
+						}
+					}
+					else
+					{
+						VkOffset3D offset = {0, 0, 0};
+						mipmap.buffer[0] = imageView->getOffsetPointer(offset, aspect, level, 0, ImageView::SAMPLING);
+					}
+
+					VkExtent3D extent = imageView->getMipLevelExtent(level);
+
+					int width = extent.width;
+					int height = extent.height;
+					int layers = imageView->getSubresourceRange().layerCount;  // TODO(b/129523279): Untangle depth vs layers throughout the sampler
+					int depth = layers > 1 ? layers : extent.depth;
+					int pitchP = imageView->rowPitchBytes(aspect, level, ImageView::SAMPLING) / format.bytes();
+					int sliceP = (layers > 1 ? imageView->layerPitchBytes(aspect, ImageView::SAMPLING) : imageView->slicePitchBytes(aspect, level, ImageView::SAMPLING)) / format.bytes();
+
+					WriteTextureLevelInfo(texture, mipmapLevel, width, height, depth, pitchP, sliceP);
+				}
 			}
 		}
 	}