Perform all blend operations in floating-point

In order to fix loss of precision issues, this CL performs all
blending operations in floating point. Concretely, the alphaBlend()
function which used shorts is gone and conversion functions have been
added after readPixel (in shorts) and before writeColor (in shorts).

Bug: b/204322086
Change-Id: Ifad29eaf5b145d49da8611be598343a28f3005f4
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/58668
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Commit-Queue: Alexis Hétu <sugoi@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index 24d9499..1853b62 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -297,14 +297,14 @@
 			for(unsigned int q : samples)
 			{
 				Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
+
+				Vector4f colorf = alphaBlend(index, buffer, c[index], x);
+
 				Vector4s color;
-
-				color.x = convertFixed16(c[index].x, false);
-				color.y = convertFixed16(c[index].y, false);
-				color.z = convertFixed16(c[index].z, false);
-				color.w = convertFixed16(c[index].w, false);
-
-				alphaBlend(index, buffer, color, x);
+				color.x = convertFixed16(colorf.x, true);
+				color.y = convertFixed16(colorf.y, true);
+				color.z = convertFixed16(colorf.z, true);
+				color.w = convertFixed16(colorf.w, true);
 				writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
 			}
 			break;
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 7d36ce6..8053391 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -931,137 +931,6 @@
 	}
 }
 
-void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorActive)
-{
-	switch(blendFactorActive)
-	{
-	case VK_BLEND_FACTOR_ZERO:
-		// Optimized
-		break;
-	case VK_BLEND_FACTOR_ONE:
-		// Optimized
-		break;
-	case VK_BLEND_FACTOR_SRC_COLOR:
-		blendFactor.x = current.x;
-		blendFactor.y = current.y;
-		blendFactor.z = current.z;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
-		blendFactor.x = Short4(0xFFFFu) - current.x;
-		blendFactor.y = Short4(0xFFFFu) - current.y;
-		blendFactor.z = Short4(0xFFFFu) - current.z;
-		break;
-	case VK_BLEND_FACTOR_DST_COLOR:
-		blendFactor.x = pixel.x;
-		blendFactor.y = pixel.y;
-		blendFactor.z = pixel.z;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
-		blendFactor.x = Short4(0xFFFFu) - pixel.x;
-		blendFactor.y = Short4(0xFFFFu) - pixel.y;
-		blendFactor.z = Short4(0xFFFFu) - pixel.z;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA:
-		blendFactor.x = current.w;
-		blendFactor.y = current.w;
-		blendFactor.z = current.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
-		blendFactor.x = Short4(0xFFFFu) - current.w;
-		blendFactor.y = Short4(0xFFFFu) - current.w;
-		blendFactor.z = Short4(0xFFFFu) - current.w;
-		break;
-	case VK_BLEND_FACTOR_DST_ALPHA:
-		blendFactor.x = pixel.w;
-		blendFactor.y = pixel.w;
-		blendFactor.z = pixel.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
-		blendFactor.x = Short4(0xFFFFu) - pixel.w;
-		blendFactor.y = Short4(0xFFFFu) - pixel.w;
-		blendFactor.z = Short4(0xFFFFu) - pixel.w;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
-		blendFactor.x = Short4(0xFFFFu) - pixel.w;
-		blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
-		blendFactor.y = blendFactor.x;
-		blendFactor.z = blendFactor.x;
-		break;
-	case VK_BLEND_FACTOR_CONSTANT_COLOR:
-		blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[0]));
-		blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[1]));
-		blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[2]));
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
-		blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[0]));
-		blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[1]));
-		blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[2]));
-		break;
-	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
-		blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
-		blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
-		blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
-		blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
-		blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
-		blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
-		break;
-	default:
-		UNSUPPORTED("VkBlendFactor: %d", int(blendFactorActive));
-	}
-}
-
-void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorAlphaActive)
-{
-	switch(blendFactorAlphaActive)
-	{
-	case VK_BLEND_FACTOR_ZERO:
-		// Optimized
-		break;
-	case VK_BLEND_FACTOR_ONE:
-		// Optimized
-		break;
-	case VK_BLEND_FACTOR_SRC_COLOR:
-		blendFactor.w = current.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
-		blendFactor.w = Short4(0xFFFFu) - current.w;
-		break;
-	case VK_BLEND_FACTOR_DST_COLOR:
-		blendFactor.w = pixel.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
-		blendFactor.w = Short4(0xFFFFu) - pixel.w;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA:
-		blendFactor.w = current.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
-		blendFactor.w = Short4(0xFFFFu) - current.w;
-		break;
-	case VK_BLEND_FACTOR_DST_ALPHA:
-		blendFactor.w = pixel.w;
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
-		blendFactor.w = Short4(0xFFFFu) - pixel.w;
-		break;
-	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
-		blendFactor.w = Short4(0xFFFFu);
-		break;
-	case VK_BLEND_FACTOR_CONSTANT_COLOR:
-	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
-		blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.blendConstant4W[3]));
-		break;
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
-	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
-		blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.invBlendConstant4W[3]));
-		break;
-	default:
-		UNSUPPORTED("VkBlendFactor: %d", int(blendFactorAlphaActive));
-	}
-}
-
 bool PixelRoutine::isSRGB(int index) const
 {
 	return vk::Format(state.colorFormat[index]).isSRGBformat();
@@ -1343,127 +1212,6 @@
 	}
 }
 
-void PixelRoutine::alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4s &current, const Int &x)
-{
-	if(!state.blendState[index].alphaBlendEnable)
-	{
-		return;
-	}
-
-	ASSERT(state.colorFormat[index].supportsColorAttachmentBlend());
-
-	Vector4s pixel;
-	readPixel(index, cBuffer, x, pixel);
-
-	// Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
-	Vector4s sourceFactor;
-	Vector4s destFactor;
-
-	blendFactor(sourceFactor, current, pixel, state.blendState[index].sourceBlendFactor);
-	blendFactor(destFactor, current, pixel, state.blendState[index].destBlendFactor);
-
-	if(state.blendState[index].sourceBlendFactor != VK_BLEND_FACTOR_ONE && state.blendState[index].sourceBlendFactor != VK_BLEND_FACTOR_ZERO)
-	{
-		current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
-		current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
-		current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
-	}
-
-	if(state.blendState[index].destBlendFactor != VK_BLEND_FACTOR_ONE && state.blendState[index].destBlendFactor != VK_BLEND_FACTOR_ZERO)
-	{
-		pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
-		pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
-		pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
-	}
-
-	switch(state.blendState[index].blendOperation)
-	{
-	case VK_BLEND_OP_ADD:
-		current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
-		current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
-		current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
-		break;
-	case VK_BLEND_OP_SUBTRACT:
-		current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
-		current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
-		current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
-		break;
-	case VK_BLEND_OP_REVERSE_SUBTRACT:
-		current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
-		current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
-		current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
-		break;
-	case VK_BLEND_OP_MIN:
-		current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
-		current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
-		current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
-		break;
-	case VK_BLEND_OP_MAX:
-		current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
-		current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
-		current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
-		break;
-	case VK_BLEND_OP_SRC_EXT:
-		// No operation
-		break;
-	case VK_BLEND_OP_DST_EXT:
-		current.x = pixel.x;
-		current.y = pixel.y;
-		current.z = pixel.z;
-		break;
-	case VK_BLEND_OP_ZERO_EXT:
-		current.x = Short4(0x0000);
-		current.y = Short4(0x0000);
-		current.z = Short4(0x0000);
-		break;
-	default:
-		UNSUPPORTED("VkBlendOp: %d", int(state.blendState[index].blendOperation));
-	}
-
-	blendFactorAlpha(sourceFactor, current, pixel, state.blendState[index].sourceBlendFactorAlpha);
-	blendFactorAlpha(destFactor, current, pixel, state.blendState[index].destBlendFactorAlpha);
-
-	if(state.blendState[index].sourceBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.blendState[index].sourceBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
-	{
-		current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
-	}
-
-	if(state.blendState[index].destBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.blendState[index].destBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
-	{
-		pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
-	}
-
-	switch(state.blendState[index].blendOperationAlpha)
-	{
-	case VK_BLEND_OP_ADD:
-		current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
-		break;
-	case VK_BLEND_OP_SUBTRACT:
-		current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
-		break;
-	case VK_BLEND_OP_REVERSE_SUBTRACT:
-		current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
-		break;
-	case VK_BLEND_OP_MIN:
-		current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
-		break;
-	case VK_BLEND_OP_MAX:
-		current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
-		break;
-	case VK_BLEND_OP_SRC_EXT:
-		// No operation
-		break;
-	case VK_BLEND_OP_DST_EXT:
-		current.w = pixel.w;
-		break;
-	case VK_BLEND_OP_ZERO_EXT:
-		current.w = Short4(0x0000);
-		break;
-	default:
-		UNSUPPORTED("VkBlendOp: %d", int(state.blendState[index].blendOperationAlpha));
-	}
-}
-
 void PixelRoutine::writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &current, const Int &sMask, const Int &zMask, const Int &cMask)
 {
 	if(isSRGB(index))
@@ -2397,7 +2145,16 @@
 		destColor.w = Float4(1.0f);
 		break;
 	default:
-		UNSUPPORTED("VkFormat: %d", int(state.colorFormat[index]));
+		{
+			// Attempt to read an integer based format and convert it to float
+			Vector4s color;
+			readPixel(index, cBuffer, x, color);
+			destColor.x = convertFloat32(As<UShort4>(color.x));
+			destColor.y = convertFloat32(As<UShort4>(color.y));
+			destColor.z = convertFloat32(As<UShort4>(color.z));
+			destColor.w = convertFloat32(As<UShort4>(color.w));
+		}
+		break;
 	}
 
 	Vector4f sourceFactor;
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index 3b13ac1..3f410e7 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -54,7 +54,6 @@
 	void alphaTest(Int &aMask, const Short4 &alpha);
 	void alphaToCoverage(Int cMask[4], const Float4 &alpha, const SampleSet &samples);
 
-	void alphaBlend(int index, const Pointer<Byte> &cBuffer, Vector4s &current, const Int &x);
 	void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &color, const Int &sMask, const Int &zMask, const Int &cMask);
 	Vector4f alphaBlend(int index, const Pointer<Byte> &cBuffer, const Vector4f &sourceColor, const Int &x);
 	void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &current, const Int &sMask, const Int &zMask, const Int &cMask);
@@ -74,8 +73,6 @@
 	Bool depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask);
 	void depthBoundsTest(const Pointer<Byte> &zBuffer, int q, const Int &x, Int &zMask, Int &cMask);
 
-	void blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorActive);
-	void blendFactorAlpha(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, VkBlendFactor blendFactorAlphaActive);
 	void readPixel(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &pixel);
 	void blendFactorRGB(Vector4f &blendFactorRGB, const Vector4f &sourceColor, const Vector4f &destColor, VkBlendFactor colorBlendFactor, vk::Format format);
 	void blendFactorAlpha(Float4 &blendFactorAlpha, const Float4 &sourceAlpha, const Float4 &destAlpha, VkBlendFactor alphaBlendFactor, vk::Format format);