Implement sRGB texture sampling.

Previously sRGB data was converted to linear space on upload. This
caused a loss of precision. This change performs the conversion after
texel lookup. Note that we had a code path for performing the
conversion after filtering, but that leads to failures in dEQP and
unacceptable darkening between texels.

Also, glTexSubImage calls can update sRGB textures using a format/type
combination with no indication of the color space, which caused an
unintentional conversion on upload. Likewise we were missing support
for an A2B10G10R10UI implementation format.

Change-Id: Ib10845f628fb2d1849e88d7a9350868cdec32fa2
Reviewed-on: https://swiftshader-review.googlesource.com/15068
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index a50c1e8..8aebbd8 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -133,46 +133,23 @@
 
 			if(fixed12 && !hasFloatTexture())
 			{
-				if(has16bitTextureFormat())
+				if(state.textureFormat == FORMAT_R5G6B5)
 				{
-					switch(state.textureFormat)
-					{
-					case FORMAT_R5G6B5:
-						if(state.sRGB)
-						{
-							sRGBtoLinear16_5_12(c.x);
-							sRGBtoLinear16_6_12(c.y);
-							sRGBtoLinear16_5_12(c.z);
-						}
-						else
-						{
-							c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800));
-							c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00));
-							c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800));
-						}
-						break;
-					default:
-						ASSERT(false);
-					}
+					c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800));
+					c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00));
+					c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800));
 				}
 				else
 				{
 					for(int component = 0; component < textureComponentCount(); component++)
 					{
-						if(state.sRGB && isRGBComponent(component))
+						if(hasUnsignedTextureComponent(component))
 						{
-							sRGBtoLinear16_8_12(c[component]);   // FIXME: Perform linearization at surface level for read-only textures
+							c[component] = As<UShort4>(c[component]) >> 4;
 						}
 						else
 						{
-							if(hasUnsignedTextureComponent(component))
-							{
-								c[component] = As<UShort4>(c[component]) >> 4;
-							}
-							else
-							{
-								c[component] = c[component] >> 3;
-							}
+							c[component] = c[component] >> 3;
 						}
 					}
 				}
@@ -316,8 +293,8 @@
 		}
 		else
 		{
-			// FIXME: YUV and sRGB are not supported by the floating point path
-			bool forceFloatFiltering = state.highPrecisionFiltering && !state.sRGB && !hasYuvFormat() && (state.textureFilter != FILTER_POINT);
+			// FIXME: YUV is not supported by the floating point path
+			bool forceFloatFiltering = state.highPrecisionFiltering && !hasYuvFormat() && (state.textureFilter != FILTER_POINT);
 			bool seamlessCube = (state.addressingModeU == ADDRESSING_SEAMLESS);
 			if(hasFloatTexture() || hasUnnormalizedIntegerTexture() || forceFloatFiltering || seamlessCube)   // FIXME: Mostly identical to integer sampling
 			{
@@ -380,52 +357,23 @@
 			{
 				Vector4s cs = sampleTexture(texture, u, v, w, q, bias, dsx, dsy, offset, function, false);
 
-				if(has16bitTextureFormat())
+				if(state.textureFormat ==  FORMAT_R5G6B5)
 				{
-					switch(state.textureFormat)
-					{
-					case FORMAT_R5G6B5:
-						if(state.sRGB)
-						{
-							sRGBtoLinear16_5_12(cs.x);
-							sRGBtoLinear16_6_12(cs.y);
-							sRGBtoLinear16_5_12(cs.z);
-
-							convertSigned12(c.x, cs.x);
-							convertSigned12(c.y, cs.y);
-							convertSigned12(c.z, cs.z);
-						}
-						else
-						{
-							c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
-							c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
-							c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
-						}
-						break;
-					default:
-						ASSERT(false);
-					}
+					c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
+					c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
+					c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
 				}
 				else
 				{
 					for(int component = 0; component < textureComponentCount(); component++)
 					{
-						// Normalized integer formats
-						if(state.sRGB && isRGBComponent(component))
+						if(hasUnsignedTextureComponent(component))
 						{
-							sRGBtoLinear16_8_12(cs[component]);   // FIXME: Perform linearization at surface level for read-only textures
-							convertSigned12(c[component], cs[component]);
+							convertUnsigned16(c[component], cs[component]);
 						}
 						else
 						{
-							if(hasUnsignedTextureComponent(component))
-							{
-								convertUnsigned16(c[component], cs[component]);
-							}
-							else
-							{
-								convertSigned15(c[component], cs[component]);
-							}
+							convertSigned15(c[component], cs[component]);
 						}
 					}
 				}
@@ -2036,6 +1984,26 @@
 		}
 		else ASSERT(false);
 
+		if(state.sRGB)
+		{
+			if(state.textureFormat == FORMAT_R5G6B5)
+			{
+				sRGBtoLinear16_5_16(c.x);
+				sRGBtoLinear16_6_16(c.y);
+				sRGBtoLinear16_5_16(c.z);
+			}
+			else
+			{
+				for(int i = 0; i < textureComponentCount(); i++)
+				{
+					if(isRGBComponent(i))
+					{
+						sRGBtoLinear16_8_16(c[i]);
+					}
+				}
+			}
+		}
+
 		return c;
 	}
 
@@ -2238,7 +2206,7 @@
 
 			bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat);
 			int componentCount = textureComponentCount();
-			for(int n = 0; n < componentCount; ++n)
+			for(int n = 0; n < componentCount; n++)
 			{
 				if(hasUnsignedTextureComponent(n))
 				{
@@ -2555,11 +2523,11 @@
 		cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF);
 	}
 
-	void SamplerCore::sRGBtoLinear16_8_12(Short4 &c)
+	void SamplerCore::sRGBtoLinear16_8_16(Short4 &c)
 	{
 		c = As<UShort4>(c) >> 8;
 
-		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_12));
+		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_16));
 
 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
@@ -2567,11 +2535,11 @@
 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
 	}
 
-	void SamplerCore::sRGBtoLinear16_6_12(Short4 &c)
+	void SamplerCore::sRGBtoLinear16_6_16(Short4 &c)
 	{
 		c = As<UShort4>(c) >> 10;
 
-		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_12));
+		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_16));
 
 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
@@ -2579,11 +2547,11 @@
 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
 	}
 
-	void SamplerCore::sRGBtoLinear16_5_12(Short4 &c)
+	void SamplerCore::sRGBtoLinear16_5_16(Short4 &c)
 	{
 		c = As<UShort4>(c) >> 11;
 
-		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_12));
+		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_16));
 
 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);