Clamp unsigned floating-point formats.

GL_R11F_G11F_B10F is a renderable format (in the EXT_color_buffer_float
extension) with unsigned floating-point values, which we implement using
(signed) half- or single-precision floating-point types. So we need to
clamp values to a positive range before writing to them.

Change-Id: Ic21a5b0b33905c0aeab35299fc268158f8c679f9
Reviewed-on: https://swiftshader-review.googlesource.com/15448
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/OpenGL/common/Image.cpp b/src/OpenGL/common/Image.cpp
index 6d63b47..0a7a3fe 100644
--- a/src/OpenGL/common/Image.cpp
+++ b/src/OpenGL/common/Image.cpp
@@ -668,7 +668,7 @@
 				return sw::FORMAT_X32B32G32R32F;
 			case GL_R11F_G11F_B10F:
 			case GL_RGB9_E5:
-				return sw::FORMAT_X16B16G16R16F;
+				return sw::FORMAT_X16B16G16R16F_UNSIGNED;
 			case GL_RGBA:
 			case GL_RGBA32F:
 				return sw::FORMAT_A32B32G32R32F;
@@ -710,9 +710,10 @@
 				return sw::FORMAT_A16B16G16R16F;
 			case GL_RGB:
 			case GL_RGB16F:
+				return sw::FORMAT_X16B16G16R16F;
 			case GL_R11F_G11F_B10F:
 			case GL_RGB9_E5:
-				return sw::FORMAT_X16B16G16R16F;
+				return sw::FORMAT_X16B16G16R16F_UNSIGNED;
 			default:
 				UNREACHABLE(format);
 			}
@@ -916,7 +917,7 @@
 			}
 		case GL_UNSIGNED_INT_10F_11F_11F_REV:
 		case GL_UNSIGNED_INT_5_9_9_9_REV:   // 5 is the exponent field, not alpha.
-			return sw::FORMAT_X16B16G16R16F;
+			return sw::FORMAT_X16B16G16R16F_UNSIGNED;
 		default:
 			UNREACHABLE(type);
 		}
diff --git a/src/OpenGL/libGLESv2/utilities.cpp b/src/OpenGL/libGLESv2/utilities.cpp
index e068b6a..b43d32f 100644
--- a/src/OpenGL/libGLESv2/utilities.cpp
+++ b/src/OpenGL/libGLESv2/utilities.cpp
@@ -2204,8 +2204,8 @@
 		case GL_RGBA32UI:             return sw::FORMAT_A32B32G32R32UI;
 		case GL_R16F:                 return sw::FORMAT_R16F;
 		case GL_RG16F:                return sw::FORMAT_G16R16F;
-		case GL_R11F_G11F_B10F:
-		case GL_RGB16F:               return sw::FORMAT_B16G16R16F;
+		case GL_R11F_G11F_B10F:       return sw::FORMAT_X16B16G16R16F_UNSIGNED;
+		case GL_RGB16F:               return sw::FORMAT_X16B16G16R16F;
 		case GL_RGBA16F:              return sw::FORMAT_A16B16G16R16F;
 		case GL_R32F:                 return sw::FORMAT_R32F;
 		case GL_RG32F:                return sw::FORMAT_G32R32F;
diff --git a/src/Renderer/Blitter.cpp b/src/Renderer/Blitter.cpp
index 7045462..1a1f36b 100644
--- a/src/Renderer/Blitter.cpp
+++ b/src/Renderer/Blitter.cpp
@@ -364,6 +364,7 @@
 			c = *Pointer<Float4>(element);
 			break;
 		case FORMAT_X32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_B32G32R32F:
 			c.z = *Pointer<Float>(element + 8);
 		case FORMAT_G32R32F:
@@ -514,6 +515,7 @@
 			}
 			break;
 		case FORMAT_X32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 			if(writeA) { *Pointer<Float>(element + 12) = 1.0f; }
 		case FORMAT_B32G32R32F:
 			if(writeR) { *Pointer<Float>(element) = c.x; }
@@ -1032,6 +1034,7 @@
 		case FORMAT_A32B32G32R32UI:
 		case FORMAT_A32B32G32R32F:
 		case FORMAT_X32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_B32G32R32F:
 		case FORMAT_G32R32F:
 		case FORMAT_R32F:
@@ -1125,7 +1128,11 @@
 			value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
 		}
 
-		if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
+		if(state.destFormat == FORMAT_X32B32G32R32F_UNSIGNED)
+		{
+			value = Max(value, Float4(0.0f));  // TODO: Only necessary if source is signed.
+		}
+		else if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
 		{
 			value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
 
@@ -1344,10 +1351,10 @@
 						}
 					}
 
-					if(!hasConstantColorI && !hasConstantColorF) { x += w; }
+					if(!state.clearOperation) { x += w; }
 				}
 
-				if(!hasConstantColorI && !hasConstantColorF) { y += h; }
+				if(!state.clearOperation) { y += h; }
 			}
 		}
 
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index 4a4d543..4e2e26b 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -304,8 +304,12 @@
 			((half*)element)[0] = (half)r;
 			((half*)element)[1] = (half)g;
 			break;
+		case FORMAT_X16B16G16R16F_UNSIGNED:
+			r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f);
+			// Fall through to FORMAT_X16B16G16R16F.
 		case FORMAT_X16B16G16R16F:
 			((half*)element)[3] = 1.0f;
+			// Fall through to FORMAT_B16G16R16F.
 		case FORMAT_B16G16R16F:
 			((half*)element)[0] = (half)r;
 			((half*)element)[1] = (half)g;
@@ -327,8 +331,12 @@
 			((float*)element)[0] = r;
 			((float*)element)[1] = g;
 			break;
+		case FORMAT_X32B32G32R32F_UNSIGNED:
+			r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f);
+			// Fall through to FORMAT_X32B32G32R32F.
 		case FORMAT_X32B32G32R32F:
 			((float*)element)[3] = 1.0f;
+			// Fall through to FORMAT_B32G32R32F.
 		case FORMAT_B32G32R32F:
 			((float*)element)[0] = r;
 			((float*)element)[1] = g;
@@ -965,6 +973,7 @@
 			g = ((half*)element)[1];
 			break;
 		case FORMAT_X16B16G16R16F:
+		case FORMAT_X16B16G16R16F_UNSIGNED:
 		case FORMAT_B16G16R16F:
 			r = ((half*)element)[0];
 			g = ((half*)element)[1];
@@ -987,6 +996,7 @@
 			g = ((float*)element)[1];
 			break;
 		case FORMAT_X32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_B32G32R32F:
 			r = ((float*)element)[0];
 			g = ((float*)element)[1];
@@ -1659,12 +1669,14 @@
 		case FORMAT_B16G16R16F:			return 6;
 		case FORMAT_X16B16G16R16F:		return 8;
 		case FORMAT_A16B16G16R16F:		return 8;
+		case FORMAT_X16B16G16R16F_UNSIGNED: return 8;
 		case FORMAT_A32F:				return 4;
 		case FORMAT_R32F:				return 4;
 		case FORMAT_G32R32F:			return 8;
 		case FORMAT_B32G32R32F:			return 12;
 		case FORMAT_X32B32G32R32F:		return 16;
 		case FORMAT_A32B32G32R32F:		return 16;
+		case FORMAT_X32B32G32R32F_UNSIGNED: return 16;
 		// Depth/stencil formats
 		case FORMAT_D16:				return 2;
 		case FORMAT_D32:				return 4;
@@ -2897,11 +2909,13 @@
 		case FORMAT_B16G16R16F:
 		case FORMAT_X16B16G16R16F:
 		case FORMAT_A16B16G16R16F:
+		case FORMAT_X16B16G16R16F_UNSIGNED:
 		case FORMAT_R32F:
 		case FORMAT_G32R32F:
 		case FORMAT_B32G32R32F:
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_D32F:
 		case FORMAT_D32FS8:
 		case FORMAT_D32F_COMPLEMENTARY:
@@ -2949,6 +2963,7 @@
 		case FORMAT_G32R32UI:
 		case FORMAT_X32B32G32R32UI:
 		case FORMAT_A32B32G32R32UI:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_R8UI:
 		case FORMAT_G8R8UI:
 		case FORMAT_X8B8G8R8UI:
@@ -3234,6 +3249,7 @@
 		case FORMAT_G32R32F:        return 2;
 		case FORMAT_X32B32G32R32F:  return 3;
 		case FORMAT_A32B32G32R32F:  return 4;
+		case FORMAT_X32B32G32R32F_UNSIGNED: return 3;
 		case FORMAT_D32F:           return 1;
 		case FORMAT_D32FS8:         return 1;
 		case FORMAT_D32F_LOCKABLE:  return 1;
@@ -3954,12 +3970,14 @@
 		case FORMAT_B16G16R16F:     return FORMAT_X32B32G32R32F;
 		case FORMAT_X16B16G16R16F:	return FORMAT_X32B32G32R32F;
 		case FORMAT_A16B16G16R16F:	return FORMAT_A32B32G32R32F;
+		case FORMAT_X16B16G16R16F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED;
 		case FORMAT_A32F:			return FORMAT_A32B32G32R32F;
 		case FORMAT_R32F:			return FORMAT_R32F;
 		case FORMAT_G32R32F:		return FORMAT_G32R32F;
 		case FORMAT_B32G32R32F:     return FORMAT_X32B32G32R32F;
 		case FORMAT_X32B32G32R32F:  return FORMAT_X32B32G32R32F;
 		case FORMAT_A32B32G32R32F:	return FORMAT_A32B32G32R32F;
+		case FORMAT_X32B32G32R32F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED;
 		// Luminance formats
 		case FORMAT_L8:				return FORMAT_L8;
 		case FORMAT_A4L4:			return FORMAT_A8L8;
@@ -5533,7 +5551,9 @@
 				else ASSERT(false);
 			}
 		}
-		else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F)
+		else if(internal.format == FORMAT_A32B32G32R32F ||
+		        internal.format == FORMAT_X32B32G32R32F ||
+		        internal.format == FORMAT_X32B32G32R32F_UNSIGNED)
 		{
 			#if defined(__i386__) || defined(__x86_64__)
 				if(CPUID::supportsSSE())
diff --git a/src/Renderer/Surface.hpp b/src/Renderer/Surface.hpp
index 7075434..062b0d7 100644
--- a/src/Renderer/Surface.hpp
+++ b/src/Renderer/Surface.hpp
@@ -172,12 +172,14 @@
 		FORMAT_B16G16R16F,
 		FORMAT_X16B16G16R16F,
 		FORMAT_A16B16G16R16F,
+		FORMAT_X16B16G16R16F_UNSIGNED,
 		FORMAT_A32F,
 		FORMAT_R32F,
 		FORMAT_G32R32F,
 		FORMAT_B32G32R32F,
 		FORMAT_X32B32G32R32F,
 		FORMAT_A32B32G32R32F,
+		FORMAT_X32B32G32R32F_UNSIGNED,
 		// Bump map formats
 		FORMAT_V8U8,
 		FORMAT_L6V5U5,
diff --git a/src/Shader/PixelPipeline.cpp b/src/Shader/PixelPipeline.cpp
index 5d4df97..e623735 100644
--- a/src/Shader/PixelPipeline.cpp
+++ b/src/Shader/PixelPipeline.cpp
@@ -356,6 +356,7 @@
 		case FORMAT_G32R32F:
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+	//	case FORMAT_X32B32G32R32F_UNSIGNED:   // Not renderable in any fixed-function API.
 			convertSigned12(oC, current);
 			PixelRoutine::fogBlend(oC, fog);
 
diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp
index b55bf34..3e9082d 100644
--- a/src/Shader/PixelProgram.cpp
+++ b/src/Shader/PixelProgram.cpp
@@ -632,6 +632,7 @@
 			case FORMAT_G32R32F:
 			case FORMAT_X32B32G32R32F:
 			case FORMAT_A32B32G32R32F:
+			case FORMAT_X32B32G32R32F_UNSIGNED:
 			case FORMAT_R32I:
 			case FORMAT_G32R32I:
 			case FORMAT_A32B32G32R32I:
@@ -771,6 +772,12 @@
 			case FORMAT_G8R8UI:
 			case FORMAT_A8B8G8R8UI:
 				break;
+			case FORMAT_X32B32G32R32F_UNSIGNED:
+				oC[index].x = Max(oC[index].x, Float4(0.0f));
+				oC[index].y = Max(oC[index].y, Float4(0.0f));
+				oC[index].z = Max(oC[index].z, Float4(0.0f));
+				oC[index].w = Max(oC[index].w, Float4(0.0f));
+				break;
 			default:
 				ASSERT(false);
 			}
@@ -1135,7 +1142,7 @@
 	{
 		dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias);
 	}
-	
+
 	void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias)
 	{
 		dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset});
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index a5b6f56..1c300b0 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -2080,6 +2080,7 @@
 			break;
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_A32B32G32R32I:
 		case FORMAT_A32B32G32R32UI:
 			buffer = cBuffer;
@@ -2089,7 +2090,8 @@
 			pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
 			pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
 			transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
-			if(state.targetFormat[index] == FORMAT_X32B32G32R32F)
+			if(state.targetFormat[index] == FORMAT_X32B32G32R32F ||
+			   state.targetFormat[index] == FORMAT_X32B32G32R32F_UNSIGNED)
 			{
 				pixel.w = Float4(1.0f);
 			}
@@ -2241,6 +2243,7 @@
 			break;
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_A32B32G32R32I:
 		case FORMAT_A32B32G32R32UI:
 		case FORMAT_A16B16G16R16I:
@@ -2494,6 +2497,7 @@
 			break;
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_A32B32G32R32I:
 		case FORMAT_A32B32G32R32UI:
 			buffer = cBuffer + 16 * x;
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 850516d..b28d6e3 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -236,6 +236,7 @@
 				case FORMAT_G32R32F:
 					c.z = Short4(defaultColorValue);
 				case FORMAT_X32B32G32R32F:
+				case FORMAT_X32B32G32R32F_UNSIGNED:
 					c.w = Short4(0x1000);
 				case FORMAT_A32B32G32R32F:
 					break;
@@ -464,6 +465,7 @@
 				case FORMAT_G32R32F:
 					c.z = Float4(defaultColorValue);
 				case FORMAT_X32B32G32R32F:
+				case FORMAT_X32B32G32R32F_UNSIGNED:
 					c.w = Float4(1.0f);
 				case FORMAT_A32B32G32R32F:
 					break;
@@ -2624,6 +2626,7 @@
 		case FORMAT_G32R32F:
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_A8:
 		case FORMAT_R8:
 		case FORMAT_L8:
@@ -2695,6 +2698,7 @@
 		case FORMAT_G32R32F:
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_D32F:
 		case FORMAT_D32FS8:
 		case FORMAT_D32F_LOCKABLE:
@@ -2773,6 +2777,7 @@
 		case FORMAT_G32R32F:
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_A8:
 		case FORMAT_R8:
 		case FORMAT_L8:
@@ -2854,6 +2859,7 @@
 		case FORMAT_G32R32F:
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_A8:
 		case FORMAT_R8:
 		case FORMAT_L8:
@@ -2927,6 +2933,7 @@
 		case FORMAT_G32R32F:
 		case FORMAT_X32B32G32R32F:
 		case FORMAT_A32B32G32R32F:
+		case FORMAT_X32B32G32R32F_UNSIGNED:
 		case FORMAT_A8:
 		case FORMAT_R8:
 		case FORMAT_L8:
@@ -2998,6 +3005,7 @@
 		case FORMAT_G32R32F:        return component < 2;
 		case FORMAT_X32B32G32R32F:  return component < 3;
 		case FORMAT_A32B32G32R32F:  return component < 3;
+		case FORMAT_X32B32G32R32F_UNSIGNED: return component < 3;
 		case FORMAT_A8:             return false;
 		case FORMAT_R8:             return component < 1;
 		case FORMAT_L8:             return component < 1;