Compute cube LOD based on Manhattan distance.

We previously computed the LOD of cube maps using the 3D Euclidean
distance between the intersections of the sampling rays of a quad with
the cube. This underestimates the gradient at the edges where these
rays intersect multiple faces. Instead use the Manhattan distance. This
may overestimate the footprint dimensions, but only leads to slight
blurring instead of aliasing.

Change-Id: I5ddbb39765462b1c55c4143b5806154cbdfe7130
Reviewed-on: https://swiftshader-review.googlesource.com/5173
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index d0e420a..4255f80 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -1415,6 +1415,15 @@
 		return lod;
 	}
 
+	Float SamplerCore::log2(Float lod)
+	{
+		lod *= lod;                                      // Squaring doubles the exponent and produces an extra bit of precision.
+		lod = Float(As<Int>(lod)) - Float(0x3F800000);   // Interpret as integer and subtract the exponent bias.
+		lod *= As<Float>(Int(0x33800000));               // Scale by 0.5 * 2^-23 (mantissa length).
+
+		return lod;
+	}
+
 	void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
 	{
 		if(function != Lod && function != Fetch)
@@ -1433,7 +1442,7 @@
 				duvdxy = Float4(dudxy.xz, dvdxy.xz);
 			}
 
-			// Scale by texture dimensions and LOD
+			// Scale by texture dimensions and global LOD.
 			Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthHeightLOD));
 
 			Float4 dUV2dxy = dUVdxy * dUVdxy;
@@ -1498,9 +1507,9 @@
 				Float4 V = v * M;
 				Float4 W = w * M;
 
-				dudxy = U - U.xxxx;
-				dvdxy = V - V.xxxx;
-				dsdxy = W - W.xxxx;
+				dudxy = Abs(U - U.xxxx);
+				dvdxy = Abs(V - V.xxxx);
+				dsdxy = Abs(W - W.xxxx);
 			}
 			else
 			{
@@ -1508,26 +1517,25 @@
 				dvdxy = Float4(dsx.y.xx, dsy.y.xx);
 				dsdxy = Float4(dsx.z.xx, dsy.z.xx);
 
-				dudxy *= Float4(M.x);
-				dvdxy *= Float4(M.x);
-				dsdxy *= Float4(M.x);
+				dudxy = Abs(dudxy * Float4(M.x));
+				dvdxy = Abs(dvdxy * Float4(M.x));
+				dsdxy = Abs(dsdxy * Float4(M.x));
 			}
 
-			// Scale by texture dimensions and LOD
-			dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
-			dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
-			dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
+			// Compute the largest Manhattan distance in two dimensions.
+			// This takes the footprint across adjacent faces into account.
+			Float4 duvdxy = dudxy + dvdxy;
+			Float4 dusdxy = dudxy + dsdxy;
+			Float4 dvsdxy = dvdxy + dsdxy;
 
-			dudxy *= dudxy;
-			dvdxy *= dvdxy;
-			dsdxy *= dsdxy;
-
-			dudxy += dvdxy;
-			dudxy += dsdxy;
+			dudxy = Max(Max(duvdxy, dusdxy), dvsdxy);
 
 			lod = Max(Float(dudxy.y), Float(dudxy.z));   // FIXME: Max(dudxy.y, dudxy.z);
 
-			lod = log2sqrt(lod);   // log2(sqrt(lod))
+			// Scale by texture dimension and global LOD.
+			lod *= *Pointer<Float>(texture + OFFSET(Texture,widthLOD));
+
+			lod = log2(lod);
 
 			if(function == Bias)
 			{
@@ -1577,7 +1585,7 @@
 					dsdxy = Float4(dsx.z.xx, dsy.z.xx);
 				}
 
-				// Scale by texture dimensions and LOD
+				// Scale by texture dimensions and global LOD.
 				dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
 				dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,heightLOD));
 				dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,depthLOD));
diff --git a/src/Shader/SamplerCore.hpp b/src/Shader/SamplerCore.hpp
index c31f398..6004bd7 100644
--- a/src/Shader/SamplerCore.hpp
+++ b/src/Shader/SamplerCore.hpp
@@ -71,6 +71,7 @@
 		Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
 		Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
 		Float log2sqrt(Float lod);
+		Float log2(Float lod);
 		void computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
 		void computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function);
 		void computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);