Add unit test for 11/10-bit minifloat denormal underflow

The reference code for conversion from 32-bit floating-point to 11- and
10-bit minifloat formats supports producing denormals for values smaller
than what can be represented as normalized representations. The
arithmetic can underflow to produce zero for values too small to be
represented as denormals.

This arithmetic contains a 32-bit shift operation which can shift by an
amount greater than 32, which has undefined behavior in C++ but produces
zero on x86 processors.

This change adds unit tests for the intended behavior around the cutoff
between the smallest denormal, and zero, to help validate the fix for
the UB in a future change.

Bug: b/147900455
Bug: chromium:1117433
Change-Id: Ic5e495dd822231d52a5551ee12733a616728d486
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/48068
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
diff --git a/src/System/Half.hpp b/src/System/Half.hpp
index 6f73b4d..5775141 100644
--- a/src/System/Half.hpp
+++ b/src/System/Half.hpp
@@ -128,9 +128,25 @@
 
 class R11G11B10F
 {
-	unsigned int R : 11;
-	unsigned int G : 11;
-	unsigned int B : 10;
+public:
+	R11G11B10F(float rgb[3])
+	{
+		R = float32ToFloat11(rgb[0]);
+		G = float32ToFloat11(rgb[1]);
+		B = float32ToFloat10(rgb[2]);
+	}
+
+	operator unsigned int() const
+	{
+		return *reinterpret_cast<const unsigned int *>(this);
+	}
+
+	void toRGB16F(half rgb[3]) const
+	{
+		rgb[0] = float11ToFloat16(R);
+		rgb[1] = float11ToFloat16(G);
+		rgb[2] = float10ToFloat16(B);
+	}
 
 	static inline half float11ToFloat16(unsigned short fp11)
 	{
@@ -142,7 +158,7 @@
 		return shortAsHalf(fp10 << 5);  // Sign bit 0
 	}
 
-	inline unsigned short float32ToFloat11(float fp32)
+	static inline unsigned short float32ToFloat11(float fp32)
 	{
 		const unsigned int float32MantissaMask = 0x7FFFFF;
 		const unsigned int float32ExponentMask = 0x7F800000;
@@ -215,7 +231,7 @@
 		}
 	}
 
-	inline unsigned short float32ToFloat10(float fp32)
+	static inline unsigned short float32ToFloat10(float fp32)
 	{
 		const unsigned int float32MantissaMask = 0x7FFFFF;
 		const unsigned int float32ExponentMask = 0x7F800000;
@@ -249,7 +265,7 @@
 			}
 			else if(float32Sign)
 			{
-				// -INF is clamped to 0 since float11 is positive only
+				// -INF is clamped to 0 since float10 is positive only
 				return 0;
 			}
 			else
@@ -264,14 +280,14 @@
 		}
 		else if(float32Val > float32Maxfloat10)
 		{
-			// The number is too large to be represented as a float11, set to max
+			// The number is too large to be represented as a float10, set to max
 			return float10Max;
 		}
 		else
 		{
 			if(float32Val < float32Minfloat10)
 			{
-				// The number is too small to be represented as a normalized float11
+				// The number is too small to be represented as a normalized float10
 				// Convert it to a denormalized value.
 				const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
 				                           (float32Val >> float32ExponentFirstBit);
@@ -280,7 +296,7 @@
 			}
 			else
 			{
-				// Rebias the exponent to represent the value as a normalized float11
+				// Rebias the exponent to represent the value as a normalized float10
 				float32Val += 0xC8000000;
 			}
 
@@ -288,25 +304,10 @@
 		}
 	}
 
-public:
-	R11G11B10F(float rgb[3])
-	{
-		R = float32ToFloat11(rgb[0]);
-		G = float32ToFloat11(rgb[1]);
-		B = float32ToFloat10(rgb[2]);
-	}
-
-	operator unsigned int() const
-	{
-		return *reinterpret_cast<const unsigned int *>(this);
-	}
-
-	void toRGB16F(half rgb[3]) const
-	{
-		rgb[0] = float11ToFloat16(R);
-		rgb[1] = float11ToFloat16(G);
-		rgb[2] = float10ToFloat16(B);
-	}
+private:
+	unsigned int R : 11;
+	unsigned int G : 11;
+	unsigned int B : 10;
 };
 
 }  // namespace sw
diff --git a/tests/MathUnitTests/unittests.cpp b/tests/MathUnitTests/unittests.cpp
index 04f836c..1ce35c3 100644
--- a/tests/MathUnitTests/unittests.cpp
+++ b/tests/MathUnitTests/unittests.cpp
@@ -21,6 +21,18 @@
 
 using namespace sw;
 
+TEST(MathTest, UnsignedFloat11_10)
+{
+	// Test the largest value which causes underflow to 0, and the smallest value
+	// which produces a denormalized result.
+
+	EXPECT_EQ(R11G11B10F::float32ToFloat11(bit_cast<float>(0x3500007F)), 0x0000);
+	EXPECT_EQ(R11G11B10F::float32ToFloat11(bit_cast<float>(0x35000080)), 0x0001);
+
+	EXPECT_EQ(R11G11B10F::float32ToFloat10(bit_cast<float>(0x3580003F)), 0x0000);
+	EXPECT_EQ(R11G11B10F::float32ToFloat10(bit_cast<float>(0x35800040)), 0x0001);
+}
+
 // Clamps to the [0, hi] range. NaN input produces 0, hi must be non-NaN.
 float clamp0hi(float x, float hi)
 {