Compute implicit derivatives relative to first quad pixel.

Previously the derivatives in y were computed using the difference
between the second and fourth pixel in the quad. For consistency with
the x derivative, use the first and third pixel instead. Also, some
shuffling can be eliminated by having the x derivative in the y
component, and the y derivative in the z component.

Change-Id: I985dcd3c5e2c47c10caf020cf5cb79587b3b3aab
Reviewed-on: https://swiftshader-review.googlesource.com/14168
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 0c86815..d0e420a 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -1421,7 +1421,7 @@
 		{
 			Float4 duvdxy;
 
-			if(function != Grad)
+			if(function != Grad)   // Implicit
 			{
 				duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
 			}
@@ -1492,15 +1492,15 @@
 		{
 			Float4 dudxy, dvdxy, dsdxy;
 
-			if(function != Grad)
+			if(function != Grad)  // Implicit
 			{
 				Float4 U = u * M;
 				Float4 V = v * M;
 				Float4 W = w * M;
 
-				dudxy = U.ywyw - U;
-				dvdxy = V.ywyw - V;
-				dsdxy = W.ywyw - W;
+				dudxy = U - U.xxxx;
+				dvdxy = V - V.xxxx;
+				dsdxy = W - W.xxxx;
 			}
 			else
 			{
@@ -1508,10 +1508,6 @@
 				dvdxy = Float4(dsx.y.xx, dsy.y.xx);
 				dsdxy = Float4(dsx.z.xx, dsy.z.xx);
 
-				dudxy = Float4(dudxy.xz, dudxy.xz);
-				dvdxy = Float4(dvdxy.xz, dvdxy.xz);
-				dsdxy = Float4(dsdxy.xz, dsdxy.xz);
-
 				dudxy *= Float4(M.x);
 				dvdxy *= Float4(M.x);
 				dsdxy *= Float4(M.x);
@@ -1529,7 +1525,7 @@
 			dudxy += dvdxy;
 			dudxy += dsdxy;
 
-			lod = Max(Float(dudxy.x), Float(dudxy.y));   // FIXME: Max(dudxy.x, dudxy.y);
+			lod = Max(Float(dudxy.y), Float(dudxy.z));   // FIXME: Max(dudxy.y, dudxy.z);
 
 			lod = log2sqrt(lod);   // log2(sqrt(lod))
 
@@ -1568,21 +1564,17 @@
 			{
 				Float4 dudxy, dvdxy, dsdxy;
 
-				if(function != Grad)
+				if(function != Grad)   // Implicit
 				{
-					dudxy = uuuu.ywyw - uuuu;
-					dvdxy = vvvv.ywyw - vvvv;
-					dsdxy = wwww.ywyw - wwww;
+					dudxy = uuuu - uuuu.xxxx;
+					dvdxy = vvvv - vvvv.xxxx;
+					dsdxy = wwww - wwww.xxxx;
 				}
 				else
 				{
 					dudxy = Float4(dsx.x.xx, dsy.x.xx);
 					dvdxy = Float4(dsx.y.xx, dsy.y.xx);
 					dsdxy = Float4(dsx.z.xx, dsy.z.xx);
-
-					dudxy = Float4(dudxy.xz, dudxy.xz);
-					dvdxy = Float4(dvdxy.xz, dvdxy.xz);
-					dsdxy = Float4(dsdxy.xz, dsdxy.xz);
 				}
 
 				// Scale by texture dimensions and LOD
@@ -1597,7 +1589,7 @@
 				dudxy += dvdxy;
 				dudxy += dsdxy;
 
-				lod = Max(Float(dudxy.x), Float(dudxy.y));   // FIXME: Max(dudxy.x, dudxy.y);
+				lod = Max(Float(dudxy.y), Float(dudxy.z));   // FIXME: Max(dudxy.y, dudxy.z);
 
 				lod = log2sqrt(lod);   // log2(sqrt(lod))