Blitter clear implementation

The "clear" operation can now be done through the blitter.
The few changes are:
- The blitter now supports RGBA masking
- The blitter now supports RGB565
- When in "clear" mode, the blitter does a one read/multiple writes

The old clearing code has been deleted from Surface.

Change-Id: I970c3a0323f63ee5c89f02d94a2705e4bcf83866
Reviewed-on: https://swiftshader-review.googlesource.com/4291
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index 74213f9..46796f8 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -3063,361 +3063,31 @@
 		}
 	}
 
-	void Surface::clearColorBuffer(float red, float green, float blue, float alpha, unsigned int rgbaMask, int x0, int y0, int width, int height)
+	bool Surface::isEntire(const SliceRect& rect) const
 	{
-		// FIXME: Also clear buffers in other formats?
+		return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
+	}
 
+	bool Surface::getClearRect(int x0, int y0, int width, int height, SliceRect& rect) const
+	{
 		// Not overlapping
-		if(x0 > internal.width) return;
-		if(y0 > internal.height) return;
-		if(x0 + width < 0) return;
-		if(y0 + height < 0) return;
+		if(x0 > internal.width) return false;
+		if(y0 > internal.height) return false;
+		if(x0 + width < 0) return false;
+		if(y0 + height < 0) return false;
 
 		// Clip against dimensions
-		if(x0 < 0) {width += x0; x0 = 0;}
+		if(x0 < 0) { width += x0; x0 = 0; }
 		if(x0 + width > internal.width) width = internal.width - x0;
-		if(y0 < 0) {height += y0; y0 = 0;}
+		if(y0 < 0) { height += y0; y0 = 0; }
 		if(y0 + height > internal.height) height = internal.height - y0;
 
-		const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
-		const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
+		rect.x0 = x0;
+		rect.x1 = x0 + width;
+		rect.y0 = y0;
+		rect.y1 = y0 + height;
 
-		int x1 = x0 + width;
-		int y1 = y0 + height;
-
-	//	if(lockable || !quadLayoutEnabled)
-		{
-			unsigned char *buffer = (unsigned char*)lockInternal(x0, y0, 0, lock, PUBLIC);
-
-			for(int z = 0; z < internal.depth; z++)
-			{
-				unsigned char *target = buffer;
-
-				for(int y = y0; y < y1; y++)
-				{
-					switch(internal.format)
-					{
-					case FORMAT_NULL:
-						break;
-					case FORMAT_X8R8G8B8:
-					case FORMAT_A8R8G8B8:
-				//	case FORMAT_X8G8R8B8Q:   // FIXME
-				//	case FORMAT_A8G8R8B8Q:   // FIXME
-						{
-							unsigned char r8 = iround(red * 0xFF);
-							unsigned char g8 = iround(green * 0xFF);
-							unsigned char b8 = iround(blue * 0xFF);
-							unsigned char a8 = iround(alpha * 0xFF);
-							unsigned char a8r8g8b8[4] = {b8, g8, r8, a8};
-							unsigned int colorARGB = (unsigned int&)a8r8g8b8;
-
-							if(rgbaMask == 0xF || (internal.format == FORMAT_X8R8G8B8 && rgbaMask == 0x7))
-							{
-								memfill4(target, colorARGB, 4 * (x1 - x0));
-							}
-							else
-							{
-								unsigned int bgraMask = (rgbaMask & 0x1 ? 0x00FF0000 : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0) | (rgbaMask & 0x4 ? 0x000000FF : 0) | (rgbaMask & 0x8 ? 0xFF000000 : 0);
-								unsigned int invMask = ~bgraMask;
-								unsigned int maskedColor = colorARGB & bgraMask;
-								unsigned int *target32 = (unsigned int*)target;
-
-								for(int x = 0; x < width; x++)
-								{
-									target32[x] = maskedColor | (target32[x] & invMask);
-								}
-							}
-						}
-						break;
-					case FORMAT_X8B8G8R8:
-					case FORMAT_A8B8G8R8:
-						{
-							unsigned char r8 = iround(red * 0xFF);
-							unsigned char g8 = iround(green * 0xFF);
-							unsigned char b8 = iround(blue * 0xFF);
-							unsigned char a8 = iround(alpha * 0xFF);
-							unsigned char a8b8g8r8[4] = {r8, g8, b8, a8};
-							unsigned int colorABGR = (unsigned int&)a8b8g8r8;
-
-							if(rgbaMask == 0xF || (internal.format == FORMAT_X8B8G8R8 && rgbaMask == 0x7))
-							{
-								memfill4(target, colorABGR, 4 * (x1 - x0));
-							}
-							else
-							{
-								unsigned int rgbaMask32 = (rgbaMask & 0x1 ? 0x000000FF : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0) | (rgbaMask & 0x4 ? 0x00FF0000 : 0) | (rgbaMask & 0x8 ? 0xFF000000 : 0);
-								unsigned int invMask = ~rgbaMask32;
-								unsigned int maskedColor = colorABGR & rgbaMask32;
- 								unsigned int *target32 = (unsigned int*)target;
-
-								for(int x = 0; x < width; x++)
-								{
-									target32[x] = maskedColor | (target32[x] & invMask);
-								}
-							}
-						}
-						break;
-					case FORMAT_G8R8:
-						{
-							unsigned char r8 = iround(red * 0xFF);
-							unsigned char g8 = iround(green * 0xFF);
-							unsigned char g8r8[4] = {r8, g8, r8, g8};
-
-							if((rgbaMask & 0x3) == 0x3)
-							{
-								memfill4(target, (int&)g8r8, 2 * (x1 - x0));
-							}
-							else
-							{
-								unsigned short rgMask = (rgbaMask & 0x1 ? 0x000000FF : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0);
-								unsigned short invMask = ~rgMask;
-								unsigned short maskedColor = (unsigned short&)g8r8 & rgMask;
-								unsigned short *target16 = (unsigned short*)target;
-
-								for(int x = 0; x < width; x++)
-								{
-									target16[x] = maskedColor | (target16[x] & invMask);
-								}
-							}
-						}
-						break;
-					case FORMAT_G16R16:
-						{
-							unsigned char r16 = iround(red * 0xFFFF);
-							unsigned char g16 = iround(green * 0xFFFF);
-							unsigned short g16r16[2] = {r16, g16};
-
-							if((rgbaMask & 0x3) == 0x3)
-							{
-								memfill4(target, (int&)g16r16, 4 * (x1 - x0));
-							}
-							else
-							{
-								unsigned int rgMask = (rgbaMask & 0x1 ? 0x0000FFFF : 0) | (rgbaMask & 0x2 ? 0xFFFF0000 : 0);
-								unsigned int invMask = ~rgMask;
-								unsigned int maskedColor = (unsigned int&)g16r16 & rgMask;
-								unsigned int *target32 = (unsigned int*)target;
-
-								for(int x = 0; x < width; x++)
-								{
-									target32[x] = maskedColor | (target32[x] & invMask);
-								}
-							}
-						}
-						break;
-					case FORMAT_A16B16G16R16:
-						{
-							unsigned char r16 = iround(red * 0xFFFF);
-							unsigned char g16 = iround(green * 0xFFFF);
-							unsigned char b16 = iround(blue * 0xFFFF);
-							unsigned char a16 = iround(alpha * 0xFFFF);
-
-							if(rgbaMask == 0xF)
-							{
-								for(int x = 0; x < width; x++)
-								{
-									((unsigned short*)target)[4 * x + 0] = r16;
-									((unsigned short*)target)[4 * x + 1] = g16;
-									((unsigned short*)target)[4 * x + 2] = b16;
-									((unsigned short*)target)[4 * x + 3] = a16;
-								}
-							}
-							else
-							{
-								if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 0] = r16;
-								if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 1] = g16;
-								if(rgbaMask & 0x4) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 2] = b16;
-								if(rgbaMask & 0x8) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 3] = a16;
-							}
-						}
-						break;
-					case FORMAT_R32F:
-						if(rgbaMask & 0x1)
-						{
-							for(int x = 0; x < width; x++)
-							{
-								((float*)target)[x] = red;
-							}
-						}
-						break;
-					case FORMAT_G32R32F:
-						if((rgbaMask & 0x3) == 0x3)
-						{
-							for(int x = 0; x < width; x++)
-							{
-								((float*)target)[2 * x + 0] = red;
-								((float*)target)[2 * x + 1] = green;
-							}
-						}
-						else
-						{
-							if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((float*)target)[2 * x + 0] = red;
-							if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((float*)target)[2 * x + 1] = green;
-						}
-						break;
-					case FORMAT_A32B32G32R32F:
-						if(rgbaMask == 0xF)
-						{
-							for(int x = 0; x < width; x++)
-							{
-								((float*)target)[4 * x + 0] = red;
-								((float*)target)[4 * x + 1] = green;
-								((float*)target)[4 * x + 2] = blue;
-								((float*)target)[4 * x + 3] = alpha;
-							}
-						}
-						else
-						{
-							if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 0] = red;
-							if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 1] = green;
-							if(rgbaMask & 0x4) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 2] = blue;
-							if(rgbaMask & 0x8) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 3] = alpha;
-						}
-						break;
-					case FORMAT_R5G6B5:
-						{
-							unsigned int r5 = iround(red * 0x1F);
-							unsigned int g6 = iround(green * 0x3F);
-							unsigned int b5 = iround(blue * 0x1F);
-							unsigned int r5g6b5 = (r5 << 11) | (g6 << 5) | b5;
-
-							if((rgbaMask & 0x7) == 0x7)
-							{
-								unsigned int r5g6b5r5g6b5 = r5g6b5 | (r5g6b5 << 16);
-								memfill4(target, r5g6b5r5g6b5, 2 * (x1 - x0));
-							}
-							else
-							{
-								unsigned short rgbMask = (rgbaMask & 0x1 ? 0xF800 : 0) | (rgbaMask & 0x2 ? 0x07E0 : 0) | (rgbaMask & 0x3 ? 0x001F : 0);
-								unsigned short invMask = ~rgbMask;
-								unsigned short maskedColor = r5g6b5 & rgbMask;
-								unsigned short *target16 = (unsigned short*)target;
-
-								for(int x = 0; x < width; x++)
-								{
-									target16[x] = maskedColor | (target16[x] & invMask);
-								}
-							}
-						}
-						break;
-					default:
-						ASSERT(false);
-					}
-
-					target += internal.pitchB;
-				}
-
-				buffer += internal.sliceB;
-			}
-
-			unlockInternal();
-		}
-	/*	else
-		{
-			int width2 = (internal.width + 1) & ~1;
-
-		//	unsigned char *target = (unsigned char*&)buffer;
-		//
-		//	for(int y = y0; y < y1; y++)
-		//	{
-		//		for(int x = x0; x < x1; x++)
-		//		{
-		//			target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 0] =  (color & 0x000000FF) >> 0;
-		//			target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 4] =  (color & 0x00FF0000) >> 16;
-		//			target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 8] =  (color & 0x0000FF00) >> 8;
-		//			target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 12] = (color & 0xFF000000) >> 24;
-		//		}
-		//	}
-
-			unsigned char colorQ[16];
-
-			colorQ[0] =  (color & 0x000000FF) >> 0;
-			colorQ[1] =  (color & 0x000000FF) >> 0;
-			colorQ[2] =  (color & 0x000000FF) >> 0;
-			colorQ[3] =  (color & 0x000000FF) >> 0;
-			colorQ[4] =  (color & 0x00FF0000) >> 16;
-			colorQ[5] =  (color & 0x00FF0000) >> 16;
-			colorQ[6] =  (color & 0x00FF0000) >> 16;
-			colorQ[7] =  (color & 0x00FF0000) >> 16;
-			colorQ[8] =  (color & 0x0000FF00) >> 8;
-			colorQ[9] =  (color & 0x0000FF00) >> 8;
-			colorQ[10] = (color & 0x0000FF00) >> 8;
-			colorQ[11] = (color & 0x0000FF00) >> 8;
-			colorQ[12] = (color & 0xFF000000) >> 24;
-			colorQ[13] = (color & 0xFF000000) >> 24;
-			colorQ[14] = (color & 0xFF000000) >> 24;
-			colorQ[15] = (color & 0xFF000000) >> 24;
-
-			for(int y = y0; y < y1; y++)
-			{
-				unsigned char *target = (unsigned char*)lockInternal(0, 0, 0, lock) + width2 * 4 * (y & ~1) + 2 * (y & 1);   // FIXME: Unlock
-
-				if((y & 1) == 0 && y + 1 < y1)   // Fill quad line at once
-				{
-					if((x0 & 1) != 0)
-					{
-						target[8 * (x0 & ~1) + 1 + 0] =  (color & 0x000000FF) >> 0;
-						target[8 * (x0 & ~1) + 1 + 4] =  (color & 0x00FF0000) >> 16;
-						target[8 * (x0 & ~1) + 1 + 8] =  (color & 0x0000FF00) >> 8;
-						target[8 * (x0 & ~1) + 1 + 12] = (color & 0xFF000000) >> 24;
-
-						target[8 * (x0 & ~1) + 3 + 0] =  (color & 0x000000FF) >> 0;
-						target[8 * (x0 & ~1) + 3 + 4] =  (color & 0x00FF0000) >> 16;
-						target[8 * (x0 & ~1) + 3 + 8] =  (color & 0x0000FF00) >> 8;
-						target[8 * (x0 & ~1) + 3 + 12] = (color & 0xFF000000) >> 24;
-					}
-
-					__asm
-					{
-						movq mm0, colorQ+0
-						movq mm1, colorQ+8
-
-						mov eax, x0
-						add eax, 1
-						and eax, 0xFFFFFFFE
-						cmp eax, x1
-						jge qEnd
-
-						mov edi, target
-
-					qLoop:
-						movntq [edi+8*eax+0], mm0
-						movntq [edi+8*eax+8], mm1
-
-						add eax, 2
-						cmp eax, x1
-						jl qLoop
-					qEnd:
-						emms
-					}
-
-					if((x1 & 1) != 0)
-					{
-						target[8 * (x1 & ~1) + 0 + 0] =  (color & 0x000000FF) >> 0;
-						target[8 * (x1 & ~1) + 0 + 4] =  (color & 0x00FF0000) >> 16;
-						target[8 * (x1 & ~1) + 0 + 8] =  (color & 0x0000FF00) >> 8;
-						target[8 * (x1 & ~1) + 0 + 12] = (color & 0xFF000000) >> 24;
-
-						target[8 * (x1 & ~1) + 2 + 0] =  (color & 0x000000FF) >> 0;
-						target[8 * (x1 & ~1) + 2 + 4] =  (color & 0x00FF0000) >> 16;
-						target[8 * (x1 & ~1) + 2 + 8] =  (color & 0x0000FF00) >> 8;
-						target[8 * (x1 & ~1) + 2 + 12] = (color & 0xFF000000) >> 24;
-					}
-
-					y++;
-				}
-				else
-				{
-					for(int x = x0; x < x1; x++)
-					{
-						target[8 * (x & ~1) + (x & 1) + 0] =  (color & 0x000000FF) >> 0;
-						target[8 * (x & ~1) + (x & 1) + 4] =  (color & 0x00FF0000) >> 16;
-						target[8 * (x & ~1) + (x & 1) + 8] =  (color & 0x0000FF00) >> 8;
-						target[8 * (x & ~1) + (x & 1) + 12] = (color & 0xFF000000) >> 24;
-					}
-				}
-			}
-		}*/
+		return true;
 	}
 
 	void Surface::clearDepthBuffer(float depth, int x0, int y0, int width, int height)
@@ -3693,48 +3363,6 @@
 		}
 	}
 
-	Color<float> Surface::readExternal(int x, int y, int z) const
-	{
-		ASSERT(external.lock != LOCK_UNLOCKED);
-
-		return external.read(x, y, z);
-	}
-
-	Color<float> Surface::readExternal(int x, int y) const
-	{
-		ASSERT(external.lock != LOCK_UNLOCKED);
-
-		return external.read(x, y);
-	}
-
-	Color<float> Surface::sampleExternal(float x, float y, float z) const
-	{
-		ASSERT(external.lock != LOCK_UNLOCKED);
-
-		return external.sample(x, y, z);
-	}
-
-	Color<float> Surface::sampleExternal(float x, float y) const
-	{
-		ASSERT(external.lock != LOCK_UNLOCKED);
-
-		return external.sample(x, y);
-	}
-
-	void Surface::writeExternal(int x, int y, int z, const Color<float> &color)
-	{
-		ASSERT(external.lock != LOCK_UNLOCKED);
-
-		external.write(x, y, z, color);
-	}
-
-	void Surface::writeExternal(int x, int y, const Color<float> &color)
-	{
-		ASSERT(external.lock != LOCK_UNLOCKED);
-
-		external.write(x, y, color);
-	}
-
 	void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
 	{
 		ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);