Fixed flipX in Device::copyBuffer

When x was flipped, the entire row would be copied in the
reverse byte order, which was wrong for any type with pixels
larger than 1 byte. Now the full pixels are copied in the
reverse order instead of the bytes, so pixels are no longer
byte flipped.

Change-Id: Ie0e0516546a49d0f890a4abe24d44ccb3ffb2350
Reviewed-on: https://swiftshader-review.googlesource.com/7362
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/OpenGL/libGLESv2/Device.cpp b/src/OpenGL/libGLESv2/Device.cpp
index 765cc80..f6aead7 100644
--- a/src/OpenGL/libGLESv2/Device.cpp
+++ b/src/OpenGL/libGLESv2/Device.cpp
@@ -460,9 +460,6 @@
 
 	void Device::copyBuffer(sw::byte *sourceBuffer, sw::byte *destBuffer, unsigned int width, unsigned int height, unsigned int sourcePitch, unsigned int destPitch, unsigned int bytes, bool flipX, bool flipY)
 	{
-		unsigned int widthB = width * bytes;
-		unsigned int widthMaxB = widthB - 1;
-
 		if(flipX)
 		{
 			if(flipY)
@@ -470,9 +467,11 @@
 				sourceBuffer += (height - 1) * sourcePitch;
 				for(unsigned int y = 0; y < height; ++y, sourceBuffer -= sourcePitch, destBuffer += destPitch)
 				{
-					for(unsigned int x = 0; x < widthB; ++x)
+					sw::byte *srcX = sourceBuffer + (width - 1) * bytes;
+					sw::byte *dstX = destBuffer;
+					for(unsigned int x = 0; x < width; ++x, dstX += bytes, srcX -= bytes)
 					{
-						destBuffer[x] = sourceBuffer[widthMaxB - x];
+						memcpy(dstX, srcX, bytes);
 					}
 				}
 			}
@@ -480,15 +479,19 @@
 			{
 				for(unsigned int y = 0; y < height; ++y, sourceBuffer += sourcePitch, destBuffer += destPitch)
 				{
-					for(unsigned int x = 0; x < widthB; ++x)
+					sw::byte *srcX = sourceBuffer + (width - 1) * bytes;
+					sw::byte *dstX = destBuffer;
+					for(unsigned int x = 0; x < width; ++x, dstX += bytes, srcX -= bytes)
 					{
-						destBuffer[x] = sourceBuffer[widthMaxB - x];
+						memcpy(dstX, srcX, bytes);
 					}
 				}
 			}
 		}
 		else
 		{
+			unsigned int widthB = width * bytes;
+
 			if(flipY)
 			{
 				sourceBuffer += (height - 1) * sourcePitch;