SRGB implementation

The SRGB conversion code was already available, but wasn't used
specifically for the SRGB type framebuffers. Also, the SRGB
conversion should always be applied after blending.

According to the GLES 3.0 spec, section 4.1.8 - sRGB Conversion:
"the R, G, and B values after blending are converted
 into the non-linear sRGB color space by computing."

All related dEQP tests pass.

Change-Id: I9342d2f74aa650f28835a951bdfa8bd371bc6924
Reviewed-on: https://swiftshader-review.googlesource.com/5189
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp
index c12097e..6f04f45 100644
--- a/src/Shader/PixelProgram.cpp
+++ b/src/Shader/PixelProgram.cpp
@@ -578,7 +578,7 @@
 				continue;
 			}
 
-			if(!postBlendSRGB && state.writeSRGB)
+			if(!postBlendSRGB && state.writeSRGB && !isSRGB(index))
 			{
 				c[index].x = linearToSRGB(c[index].x);
 				c[index].y = linearToSRGB(c[index].y);
@@ -597,6 +597,8 @@
 			case FORMAT_X8B8G8R8:
 			case FORMAT_A8R8G8B8:
 			case FORMAT_A8B8G8R8:
+			case FORMAT_SRGB8_X8:
+			case FORMAT_SRGB8_A8:
 			case FORMAT_A8:
 			case FORMAT_G16R16:
 			case FORMAT_A16B16G16R16:
@@ -733,6 +735,8 @@
 			case FORMAT_A8B8G8R8:
 			case FORMAT_X8R8G8B8:
 			case FORMAT_X8B8G8R8:
+			case FORMAT_SRGB8_X8:
+			case FORMAT_SRGB8_A8:
 			case FORMAT_A8:
 			case FORMAT_G16R16:
 			case FORMAT_A16B16G16R16:
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index ca38074..a1a752c 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -996,6 +996,11 @@
 		}
 	}
 
+	bool PixelRoutine::isSRGB(int index) const
+	{
+		return state.targetFormat[index] == FORMAT_SRGB8_A8 || state.targetFormat[index] == FORMAT_SRGB8_X8;
+	}
+
 	void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
 	{
 		Short4 c01;
@@ -1035,6 +1040,7 @@
 			pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
 			break;
 		case FORMAT_A8B8G8R8:
+		case FORMAT_SRGB8_A8:
 			buffer = cBuffer + 4 * x;
 			c01 = *Pointer<Short4>(buffer);
 			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
@@ -1082,6 +1088,7 @@
 			pixel.w = Short4(0xFFFFu);
 			break;
 		case FORMAT_X8B8G8R8:
+		case FORMAT_SRGB8_X8:
 			buffer = cBuffer + 4 * x;
 			c01 = *Pointer<Short4>(buffer);
 			buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
@@ -1141,7 +1148,7 @@
 			ASSERT(false);
 		}
 
-		if(postBlendSRGB && state.writeSRGB)
+		if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
 		{
 			sRGBtoLinear16_12_16(pixel);
 		}
@@ -1363,7 +1370,7 @@
 
 	void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
 	{
-		if(postBlendSRGB && state.writeSRGB)
+		if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
 		{
 			linearToSRGB16_12_16(current);
 		}
@@ -1383,6 +1390,8 @@
 			case FORMAT_X8B8G8R8:
 			case FORMAT_A8R8G8B8:
 			case FORMAT_A8B8G8R8:
+			case FORMAT_SRGB8_X8:
+			case FORMAT_SRGB8_A8:
 				current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
 				current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
 				current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
@@ -1465,7 +1474,9 @@
 			break;
 		case FORMAT_X8B8G8R8:
 		case FORMAT_A8B8G8R8:
-			if(state.targetFormat[index] == FORMAT_X8B8G8R8 || rgbaWriteMask == 0x7)
+		case FORMAT_SRGB8_X8:
+		case FORMAT_SRGB8_A8:
+			if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7)
 			{
 				current.x = As<Short4>(As<UShort4>(current.x) >> 8);
 				current.y = As<Short4>(As<UShort4>(current.y) >> 8);
@@ -1655,13 +1666,17 @@
 			break;
 		case FORMAT_A8B8G8R8:
 		case FORMAT_X8B8G8R8:   // FIXME: Don't touch alpha?
+		case FORMAT_SRGB8_X8:
+		case FORMAT_SRGB8_A8:
 			{
 				Pointer<Byte> buffer = cBuffer + x * 4;
 				Short4 value = *Pointer<Short4>(buffer);
 
-				if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) ||
-				   ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) &&
-					(state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F)))   // FIXME: Need for masking when XBGR && Fh?
+				bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) ||
+				              (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) &&
+				               ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh?
+
+				if(masked)
 				{
 					Short4 masked = value;
 					c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
@@ -1677,9 +1692,7 @@
 				buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
 				value = *Pointer<Short4>(buffer);
 
-				if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) ||
-				   ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) &&
-					(state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F)))   // FIXME: Need for masking when XBGR && Fh?
+				if(masked)
 				{
 					Short4 masked = value;
 					c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
@@ -2026,7 +2039,7 @@
 			ASSERT(false);
 		}
 
-		if(postBlendSRGB && state.writeSRGB)
+		if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
 		{
 			sRGBtoLinear(pixel.x);
 			sRGBtoLinear(pixel.y);
diff --git a/src/Shader/PixelRoutine.hpp b/src/Shader/PixelRoutine.hpp
index e631b62..2cce393 100644
--- a/src/Shader/PixelRoutine.hpp
+++ b/src/Shader/PixelRoutine.hpp
@@ -58,6 +58,7 @@
 		void alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x);
 		void writeColor(int index, Pointer<Byte> &cBuffer, Int &i, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask);
 
+		bool isSRGB(int index) const;
 		UShort4 convertFixed16(Float4 &cf, bool saturate = true);
 		void linearToSRGB12_16(Vector4s &c);
 
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 1dc78bf..956f526 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -222,6 +222,8 @@
 				case FORMAT_X8B8G8R8:
 				case FORMAT_A8R8G8B8:
 				case FORMAT_A8B8G8R8:
+				case FORMAT_SRGB8_X8:
+				case FORMAT_SRGB8_A8:
 				case FORMAT_V8U8:
 				case FORMAT_Q8W8V8U8:
 				case FORMAT_X8L8V8U8:
@@ -480,6 +482,8 @@
 				case FORMAT_X8B8G8R8:
 				case FORMAT_A8R8G8B8:
 				case FORMAT_A8B8G8R8:
+				case FORMAT_SRGB8_X8:
+				case FORMAT_SRGB8_A8:
 				case FORMAT_V8U8:
 				case FORMAT_Q8W8V8U8:
 				case FORMAT_X8L8V8U8:
@@ -1738,6 +1742,7 @@
 					case FORMAT_A8B8G8R8UI:
 					case FORMAT_A8B8G8R8I_SNORM:
 					case FORMAT_Q8W8V8U8:
+					case FORMAT_SRGB8_A8:
 						c.z = c.x;
 						c.x = As<Short4>(UnpackLow(c.x, c.y));
 						c.z = As<Short4>(UnpackHigh(c.z, c.y));
@@ -1778,6 +1783,7 @@
 					case FORMAT_X8B8G8R8I:
 					case FORMAT_X8B8G8R8:
 					case FORMAT_X8L8V8U8:
+					case FORMAT_SRGB8_X8:
 						c.z = c.x;
 						c.x = As<Short4>(UnpackLow(c.x, c.y));
 						c.z = As<Short4>(UnpackHigh(c.z, c.y));
@@ -2213,6 +2219,8 @@
 		case FORMAT_X8B8G8R8:
 		case FORMAT_A8R8G8B8:
 		case FORMAT_A8B8G8R8:
+		case FORMAT_SRGB8_X8:
+		case FORMAT_SRGB8_A8:
 		case FORMAT_V8U8:
 		case FORMAT_Q8W8V8U8:
 		case FORMAT_X8L8V8U8:
@@ -2262,6 +2270,8 @@
 		case FORMAT_X8B8G8R8:
 		case FORMAT_A8R8G8B8:
 		case FORMAT_A8B8G8R8:
+		case FORMAT_SRGB8_X8:
+		case FORMAT_SRGB8_A8:
 		case FORMAT_V8U8:
 		case FORMAT_Q8W8V8U8:
 		case FORMAT_X8L8V8U8:
@@ -2354,6 +2364,8 @@
 		case FORMAT_X8B8G8R8:
 		case FORMAT_A8R8G8B8:
 		case FORMAT_A8B8G8R8:
+		case FORMAT_SRGB8_X8:
+		case FORMAT_SRGB8_A8:
 		case FORMAT_V8U8:
 		case FORMAT_Q8W8V8U8:
 		case FORMAT_X8L8V8U8:
@@ -2429,6 +2441,8 @@
 		case FORMAT_X8B8G8R8:
 		case FORMAT_A8R8G8B8:
 		case FORMAT_A8B8G8R8:
+		case FORMAT_SRGB8_X8:
+		case FORMAT_SRGB8_A8:
 		case FORMAT_V8U8:
 		case FORMAT_Q8W8V8U8:
 		case FORMAT_X8L8V8U8:
@@ -2496,6 +2510,8 @@
 		case FORMAT_X8B8G8R8:       return component < 3;
 		case FORMAT_A8R8G8B8:       return component < 3;
 		case FORMAT_A8B8G8R8:       return component < 3;
+		case FORMAT_SRGB8_X8:       return component < 3;
+		case FORMAT_SRGB8_A8:       return component < 3;
 		case FORMAT_V8U8:           return false;
 		case FORMAT_Q8W8V8U8:       return false;
 		case FORMAT_X8L8V8U8:       return false;