Improve mipmap LOD calculation precision.

Our approximation for log2() linearly interpolates between powers of
two. This is not accurate enough for some use cases. Squaring the input
effectively doubles the number of piecewise linear segments.

Bug swiftshader:71

Change-Id: Ie7a1da6b93cb5fbed018b61cf8510393964b5aa4
Reviewed-on: https://swiftshader-review.googlesource.com/10450
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 9bd977e..f40b1dd 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -1406,6 +1406,16 @@
 		}
 	}
 
+	Float SamplerCore::log2sqrt(Float lod)
+	{
+		// log2(sqrt(lod))                               // Equals 0.25 * log2(lod^2).
+		lod *= lod;                                      // Squaring doubles the exponent and produces an extra bit of precision.
+		lod = Float(As<Int>(lod)) - Float(0x3F800000);   // Interpret as integer and subtract the exponent bias.
+		lod *= As<Float>(Int(0x33000000));               // Scale by 0.25 * 2^-23 (mantissa length).
+
+		return lod;
+	}
+
 	void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
 	{
 		if(function != Lod && function != Fetch)
@@ -1451,10 +1461,7 @@
 				lod *= Rcp_pp(anisotropy * anisotropy);
 			}
 
-			// log2(sqrt(lod))
-			lod = Float(As<Int>(lod));
-			lod -= Float(0x3F800000);
-			lod *= As<Float>(Int(0x33800000));
+			lod = log2sqrt(lod);   // log2(sqrt(lod))
 
 			if(function == Bias)
 			{
@@ -1510,10 +1517,7 @@
 				lod = Max(Float(dUV2.x), Float(dUV2.y));   // Square length of major axis
 			}
 
-			// log2(sqrt(lod))
-			lod = Float(As<Int>(lod));
-			lod -= Float(0x3F800000);
-			lod *= As<Float>(Int(0x33800000));
+			lod = log2sqrt(lod);   // log2(sqrt(lod))
 
 			if(function == Bias)
 			{
@@ -1577,10 +1581,7 @@
 
 				lod = Max(Float(dudxy.x), Float(dudxy.y));   // FIXME: Max(dudxy.x, dudxy.y);
 
-				// log2(sqrt(lod))
-				lod = Float(As<Int>(lod));
-				lod -= Float(0x3F800000);
-				lod *= As<Float>(Int(0x33800000));
+				lod = log2sqrt(lod);   // log2(sqrt(lod))
 
 				if(function == Bias)
 				{
diff --git a/src/Shader/SamplerCore.hpp b/src/Shader/SamplerCore.hpp
index f4a9abd..f84e4f9 100644
--- a/src/Shader/SamplerCore.hpp
+++ b/src/Shader/SamplerCore.hpp
@@ -69,6 +69,7 @@
 		void sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
 		void sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
 		void sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
+		Float log2sqrt(Float lod);
 		void computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
 		void computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &x, Float4 &y, Float4 &z, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
 		void computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);