Integer types support in Blitter

The blitter now supports integer types. This means:
- It supports conversions to/from integer types from/to
  other already supported types.
- It supports integer to integer conversions without going
  to an intermediate float format.

Also added a Blitter::GetScale() function to avoid having
2 instances of the same switch statement in the code and
added the read/write utility functions. The final Blitter
code is not longer peppered with switch statements and is,
hopefully, easier to read that way.

Change-Id: I80de519aaaa768f8cedd98f97dc4414dda75bf54
Reviewed-on: https://swiftshader-review.googlesource.com/4113
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/D3D9/Direct3DVolumeTexture9.cpp b/src/D3D9/Direct3DVolumeTexture9.cpp
index 5372a2e..5633cc1 100644
--- a/src/D3D9/Direct3DVolumeTexture9.cpp
+++ b/src/D3D9/Direct3DVolumeTexture9.cpp
@@ -216,18 +216,7 @@
 
 					for(int i = 0; i < dWidth; i++)
 					{
-						sw::Color<float> color;
-
-						if(filter <= D3DTEXF_POINT)
-						{
-							color = source->readInternal((int)x, (int)y, (int)z);
-						}
-						else   // filter >= D3DTEXF_LINEAR
-						{
-							color = source->sampleInternal(x, y, z);
-						}
-
-						dest->writeInternal(i, j, k, color);
+						dest->copyInternal(source, i, j, k, x, y, z, filter > D3DTEXF_POINT);
 
 						x += w;
 					}
diff --git a/src/Renderer/Blitter.cpp b/src/Renderer/Blitter.cpp
index 72a0bd8..b55ccb2 100644
--- a/src/Renderer/Blitter.cpp
+++ b/src/Renderer/Blitter.cpp
@@ -67,18 +67,7 @@
 
 			for(int i = dRect.x0; i < dRect.x1; i++)
 			{
-				sw::Color<float> color;
-
-				if(!filter)
-				{
-					color = source->readInternal((int)x, (int)y);
-				}
-				else   // Bilinear filtering
-				{
-					color = source->sampleInternal(x, y);
-				}
-
-				dest->writeInternal(i, j, color);
+				dest->copyInternal(source, i, j, x, y, filter);
 
 				x += w;
 			}
@@ -108,7 +97,7 @@
 				float x = 0.5f * w;
 				for(int i = 0; i < dest->getWidth(); ++i)
 				{
-					dest->writeInternal(i, j, k, source->sampleInternal(x, y, z));
+					dest->copyInternal(source, i, j, k, x, y, z, true);
 					x += w;
 				}
 				y += h;
@@ -133,9 +122,50 @@
 			c.xyz = 0.0f;
 			c.w = Float(Int(*Pointer<Byte>(element)));
 			break;
+		case FORMAT_R8I:
+			c.yzw = 0.0f;
+			c.x = Float(Int(*Pointer<SByte>(element)));
+			break;
+		case FORMAT_R8UI:
+			c.yzw = 0.0f;
+			c.x = Float(Int(*Pointer<Byte>(element)));
+			break;
+		case FORMAT_R8I_SNORM:
+			c.yzw = 0.0f;
+			c.x = Float(Int(*Pointer<SByte>(element)));
+			break;
+		case FORMAT_R8:
+			c.yzw = 0.0f;
+			c.x = Float(Int(*Pointer<Byte>(element)));
+			break;
+		case FORMAT_R16I:
+			c.yzw = 0.0f;
+			c.x = Float(Int(*Pointer<Short>(element)));
+			break;
+		case FORMAT_R16UI:
+			c.yzw = 0.0f;
+			c.x = Float(Int(*Pointer<UShort>(element)));
+			break;
+		case FORMAT_R32I:
+			c.yzw = 0.0f;
+			c.x = Float(Int(*Pointer<Int>(element)));
+			break;
+		case FORMAT_R32UI:
+			c.yzw = 0.0f;
+			c.x = Float(Int(*Pointer<UInt>(element)));
+			break;
 		case FORMAT_A8R8G8B8:
 			c = Float4(*Pointer<Byte4>(element)).zyxw;
 			break;
+		case FORMAT_A8B8G8R8I:
+			c = Float4(*Pointer<SByte4>(element));
+			break;
+		case FORMAT_A8B8G8R8UI:
+			c = Float4(*Pointer<Byte4>(element));
+			break;
+		case FORMAT_A8B8G8R8I_SNORM:
+			c = Float4(*Pointer<SByte4>(element));
+			break;
 		case FORMAT_A8B8G8R8:
 			c = Float4(*Pointer<Byte4>(element));
 			break;
@@ -143,18 +173,84 @@
 			c = Float4(*Pointer<Byte4>(element)).zyxw;
 			c.w = 1.0f;
 			break;
+		case FORMAT_X8B8G8R8I:
+			c = Float4(*Pointer<SByte4>(element));
+			c.w = 1.0f;
+			break;
+		case FORMAT_X8B8G8R8UI:
+			c = Float4(*Pointer<Byte4>(element));
+			c.w = 1.0f;
+			break;
+		case FORMAT_X8B8G8R8I_SNORM:
+			c = Float4(*Pointer<SByte4>(element));
+			c.w = 1.0f;
+			break;
 		case FORMAT_X8B8G8R8:
 			c = Float4(*Pointer<Byte4>(element));
 			c.w = 1.0f;
 			break;
+		case FORMAT_A16B16G16R16I:
+			c = Float4(*Pointer<Short4>(element));
+			break;
 		case FORMAT_A16B16G16R16:
+		case FORMAT_A16B16G16R16UI:
 			c = Float4(*Pointer<UShort4>(element));
 			break;
+		case FORMAT_X16B16G16R16I:
+			c = Float4(*Pointer<Short4>(element));
+			c.w = 1.0f;
+			break;
+		case FORMAT_X16B16G16R16UI:
+			c = Float4(*Pointer<UShort4>(element));
+			c.w = 1.0f;
+			break;
+		case FORMAT_A32B32G32R32I:
+			c = Float4(*Pointer<Int4>(element));
+			break;
+		case FORMAT_A32B32G32R32UI:
+			c = Float4(*Pointer<UInt4>(element));
+			break;
+		case FORMAT_X32B32G32R32I:
+			c = Float4(*Pointer<Int4>(element));
+			c.w = 1.0f;
+			break;
+		case FORMAT_X32B32G32R32UI:
+			c = Float4(*Pointer<UInt4>(element));
+			c.w = 1.0f;
+			break;
+		case FORMAT_G8R8I:
+			c.x = Float(Int(*Pointer<SByte>(element + 0)));
+			c.y = Float(Int(*Pointer<SByte>(element + 1)));
+			break;
+		case FORMAT_G8R8UI:
+			c.x = Float(Int(*Pointer<Byte>(element + 0)));
+			c.y = Float(Int(*Pointer<Byte>(element + 1)));
+			break;
+		case FORMAT_G8R8I_SNORM:
+			c.x = Float(Int(*Pointer<SByte>(element + 0)));
+			c.y = Float(Int(*Pointer<SByte>(element + 1)));
+			break;
+		case FORMAT_G8R8:
+			c.x = Float(Int(*Pointer<Byte>(element + 0)));
+			c.y = Float(Int(*Pointer<Byte>(element + 1)));
+			break;
+		case FORMAT_G16R16I:
+			c.x = Float(Int(*Pointer<Short>(element + 0)));
+			c.y = Float(Int(*Pointer<Short>(element + 2)));
+			break;
 		case FORMAT_G16R16:
-			// FIXME: Optimize
+		case FORMAT_G16R16UI:
 			c.x = Float(Int(*Pointer<UShort>(element + 0)));
 			c.y = Float(Int(*Pointer<UShort>(element + 2)));
 			break;
+		case FORMAT_G32R32I:
+			c.x = Float(Int(*Pointer<Int>(element + 0)));
+			c.y = Float(Int(*Pointer<Int>(element + 4)));
+			break;
+		case FORMAT_G32R32UI:
+			c.x = Float(Int(*Pointer<UInt>(element + 0)));
+			c.y = Float(Int(*Pointer<UInt>(element + 4)));
+			break;
 		case FORMAT_A32B32G32R32F:
 			c = *Pointer<Float4>(element);
 			break;
@@ -172,6 +268,380 @@
 		return true;
 	}
 
+	bool Blitter::write(Float4 &c, Pointer<Byte> element, Format format)
+	{
+		switch(format)
+		{
+		case FORMAT_L8:
+			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
+			break;
+		case FORMAT_A8:
+			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.w)));
+			break;
+		case FORMAT_A8R8G8B8:
+			{
+				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
+				Byte8 c1 = Pack(c0, c0);
+				*Pointer<UInt>(element) = UInt(As<Long>(c1));
+			}
+			break;
+		case FORMAT_A8B8G8R8:
+			{
+				UShort4 c0 = As<UShort4>(RoundShort4(c));
+				Byte8 c1 = Pack(c0, c0);
+				*Pointer<UInt>(element) = UInt(As<Long>(c1));
+			}
+			break;
+		case FORMAT_X8R8G8B8:
+			{
+				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
+				Byte8 c1 = Pack(c0, c0);
+				*Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
+			}
+			break;
+		case FORMAT_X8B8G8R8:
+			{
+				UShort4 c0 = As<UShort4>(RoundShort4(c));
+				Byte8 c1 = Pack(c0, c0);
+				*Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
+			}
+			break;
+		case FORMAT_A32B32G32R32F:
+			*Pointer<Float4>(element) = c;
+			break;
+		case FORMAT_G32R32F:
+			*Pointer<Float2>(element) = Float2(c);
+			break;
+		case FORMAT_R32F:
+			*Pointer<Float>(element) = c.x;
+			break;
+		case FORMAT_A8B8G8R8I:
+		case FORMAT_A8B8G8R8I_SNORM:
+			*Pointer<SByte>(element + 3) = As<SByte>(RoundInt(Float(c.w)));
+		case FORMAT_X8B8G8R8I:
+		case FORMAT_X8B8G8R8I_SNORM:
+			if(format == FORMAT_X8B8G8R8I || format == FORMAT_X8B8G8R8I_SNORM)
+			{
+				*Pointer<SByte>(element + 3) = SByte(0x7F);
+			}
+			*Pointer<SByte>(element + 2) = As<SByte>(RoundInt(Float(c.z)));
+		case FORMAT_G8R8I:
+		case FORMAT_G8R8I_SNORM:
+			*Pointer<SByte>(element + 1) = As<SByte>(RoundInt(Float(c.y)));
+		case FORMAT_R8I:
+		case FORMAT_R8I_SNORM:
+			*Pointer<SByte>(element) = As<SByte>(RoundInt(Float(c.x)));
+			break;
+		case FORMAT_A8B8G8R8UI:
+			*Pointer<Byte>(element + 3) = As<Byte>(UInt(Float(c.w) + Float(0.5f)));
+		case FORMAT_X8B8G8R8UI:
+			if(format == FORMAT_X8B8G8R8UI)
+			{
+				*Pointer<Byte>(element + 3) = Byte(0xFF);
+			}
+			*Pointer<Byte>(element + 2) = As<Byte>(UInt(Float(c.z) + Float(0.5f)));
+		case FORMAT_G8R8UI:
+		case FORMAT_G8R8:
+			*Pointer<Byte>(element + 1) = As<Byte>(UInt(Float(c.y) + Float(0.5f)));
+		case FORMAT_R8UI:
+		case FORMAT_R8:
+			*Pointer<Byte>(element) = As<Byte>(UInt(Float(c.x) + Float(0.5f)));
+			break;
+		case FORMAT_A16B16G16R16I:
+			*Pointer<Short4>(element) = Short4(RoundInt(c));
+			break;
+		case FORMAT_X16B16G16R16I:
+			*Pointer<Short4>(element) = Short4(RoundInt(c));
+			*Pointer<Short>(element + 6) = Short(0x7F);
+			break;
+		case FORMAT_G16R16I:
+			*Pointer<UInt>(element) = UInt(As<Long>(Short4(RoundInt(c))));
+			break;
+		case FORMAT_R16I:
+			*Pointer<Short>(element) = As<Short>(RoundInt(Float(c.x)));
+			break;
+		case FORMAT_A16B16G16R16UI:
+		case FORMAT_A16B16G16R16:
+			*Pointer<UShort4>(element) = UShort4(RoundInt(c));
+			break;
+		case FORMAT_X16B16G16R16UI:
+			*Pointer<UShort4>(element) = UShort4(RoundInt(c));
+			*Pointer<UShort>(element + 6) = UShort(0xFF);
+			break;
+		case FORMAT_G16R16UI:
+		case FORMAT_G16R16:
+			*Pointer<UInt>(element) = UInt(As<Long>(UShort4(RoundInt(c))));
+		case FORMAT_R16UI:
+			*Pointer<UShort>(element) = As<UShort>(UInt(Float(c.x) + Float(0.5f)));
+			break;
+		case FORMAT_A32B32G32R32I:
+			*Pointer<Int4>(element) = RoundInt(c);
+			break;
+		case FORMAT_X32B32G32R32I:
+			*Pointer<Int4>(element) = RoundInt(c);
+			*Pointer<Int>(element + 12) = Int(0x7FFFFFFF);
+			break;
+		case FORMAT_G32R32I:
+			*Pointer<Int>(element + 4) = RoundInt(Float(c.y));
+		case FORMAT_R32I:
+			*Pointer<Int>(element) = RoundInt(Float(c.x));
+			break;
+		case FORMAT_A32B32G32R32UI:
+			*Pointer<UInt4>(element) = UInt4(c + Float4(0.5f));
+			break;
+		case FORMAT_X32B32G32R32UI:
+			*Pointer<UInt4>(element) = UInt4(c + Float4(0.5f));
+			*Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF);
+			break;
+		case FORMAT_G32R32UI:
+			*Pointer<UInt>(element + 4) = UInt(Float(c.y) + Float(0.5f));
+		case FORMAT_R32UI:
+			*Pointer<UInt>(element) = UInt(Float(c.x) + Float(0.5f));
+			break;
+		default:
+			return false;
+		}
+		return true;
+	}
+
+	bool Blitter::read(Int4 &c, Pointer<Byte> element, Format format)
+	{
+		c = Int4(0);
+
+		switch(format)
+		{
+		case FORMAT_A8B8G8R8I:
+			Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
+		case FORMAT_X8B8G8R8I:
+			Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
+		case FORMAT_G8R8I:
+			Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
+		case FORMAT_R8I:
+			Insert(c, Int(*Pointer<SByte>(element)), 0);
+			if(format != FORMAT_A8B8G8R8I)
+			{
+				Insert(c, Int(0x7F), 3); // Set alpha
+			}
+			break;
+		case FORMAT_A8B8G8R8UI:
+			Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
+		case FORMAT_X8B8G8R8UI:
+			Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
+		case FORMAT_G8R8UI:
+			Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
+		case FORMAT_R8UI:
+			Insert(c, Int(*Pointer<Byte>(element)), 0);
+			if(format != FORMAT_A8B8G8R8UI)
+			{
+				Insert(c, Int(0xFF), 3); // Set alpha
+			}
+			break;
+		case FORMAT_A16B16G16R16I:
+			Insert(c, Int(*Pointer<Short>(element + 3)), 3);
+		case FORMAT_X16B16G16R16I:
+			Insert(c, Int(*Pointer<Short>(element + 2)), 2);
+		case FORMAT_G16R16I:
+			Insert(c, Int(*Pointer<Short>(element + 1)), 1);
+		case FORMAT_R16I:
+			Insert(c, Int(*Pointer<Short>(element)), 0);
+			if(format != FORMAT_A16B16G16R16I)
+			{
+				Insert(c, Int(0x7FFF), 3); // Set alpha
+			}
+			break;
+		case FORMAT_A16B16G16R16UI:
+			Insert(c, Int(*Pointer<UShort>(element + 3)), 3);
+		case FORMAT_X16B16G16R16UI:
+			Insert(c, Int(*Pointer<UShort>(element + 2)), 2);
+		case FORMAT_G16R16UI:
+			Insert(c, Int(*Pointer<UShort>(element + 1)), 1);
+		case FORMAT_R16UI:
+			Insert(c, Int(*Pointer<UShort>(element)), 0);
+			if(format != FORMAT_A16B16G16R16UI)
+			{
+				Insert(c, Int(0xFFFF), 3); // Set alpha
+			}
+			break;
+		case FORMAT_A32B32G32R32I:
+			Insert(c, Int(*Pointer<Int>(element + 3)), 3);
+		case FORMAT_X32B32G32R32I:
+			Insert(c, Int(*Pointer<Int>(element + 2)), 2);
+		case FORMAT_G32R32I:
+			Insert(c, Int(*Pointer<Int>(element + 1)), 1);
+		case FORMAT_R32I:
+			Insert(c, Int(*Pointer<Int>(element)), 0);
+			if(format != FORMAT_A32B32G32R32I)
+			{
+				Insert(c, Int(0x7FFFFFFF), 3); // Set alpha
+			}
+			break;
+		case FORMAT_A32B32G32R32UI:
+			Insert(c, Int(*Pointer<UInt>(element + 3)), 3);
+		case FORMAT_X32B32G32R32UI:
+			Insert(c, Int(*Pointer<UInt>(element + 2)), 2);
+		case FORMAT_G32R32UI:
+			Insert(c, Int(*Pointer<UInt>(element + 1)), 1);
+		case FORMAT_R32UI:
+			Insert(c, Int(*Pointer<UInt>(element)), 0);
+			if(format != FORMAT_A32B32G32R32UI)
+			{
+				Insert(c, Int(UInt(0xFFFFFFFFU)), 3); // Set alpha
+			}
+			break;
+		default:
+			return false;
+		}
+
+		return true;
+	}
+
+	bool Blitter::write(Int4 &c, Pointer<Byte> element, Format format)
+	{
+		switch(format)
+		{
+		case FORMAT_A8B8G8R8I:
+			*Pointer<SByte>(element + 3) = As<SByte>(Extract(c, 3));
+		case FORMAT_X8B8G8R8I:
+			*Pointer<SByte>(element + 2) = As<SByte>(Extract(c, 2));
+		case FORMAT_G8R8I:
+			*Pointer<SByte>(element + 1) = As<SByte>(Extract(c, 1));
+		case FORMAT_R8I:
+			*Pointer<SByte>(element) = As<SByte>(Extract(c, 0));
+			if(format != FORMAT_A8B8G8R8I)
+			{
+				*Pointer<SByte>(element + 3) = SByte(0x7F);
+			}
+			break;
+		case FORMAT_A8B8G8R8UI:
+			*Pointer<Byte>(element + 3) = As<Byte>(Extract(c, 3));
+		case FORMAT_X8B8G8R8UI:
+			*Pointer<Byte>(element + 2) = As<Byte>(Extract(c, 2));
+		case FORMAT_G8R8UI:
+			*Pointer<Byte>(element + 1) = As<Byte>(Extract(c, 1));
+		case FORMAT_R8UI:
+			*Pointer<Byte>(element) = As<Byte>(Extract(c, 0));
+			if(format != FORMAT_A8B8G8R8UI)
+			{
+				*Pointer<Byte>(element + 3) = Byte(0xFF);
+			}
+			break;
+		case FORMAT_A16B16G16R16I:
+			*Pointer<Short>(element + 3) = As<Short>(Extract(c, 3));
+		case FORMAT_X16B16G16R16I:
+			*Pointer<Short>(element + 2) = As<Short>(Extract(c, 2));
+		case FORMAT_G16R16I:
+			*Pointer<Short>(element + 1) = As<Short>(Extract(c, 1));
+		case FORMAT_R16I:
+			*Pointer<Short>(element) = As<Short>(Extract(c, 0));
+			if(format != FORMAT_A16B16G16R16I)
+			{
+				*Pointer<Short>(element + 3) = Short(0x7FFF);
+			}
+			break;
+		case FORMAT_A16B16G16R16UI:
+			*Pointer<UShort>(element + 3) = As<UShort>(Extract(c, 3));
+		case FORMAT_X16B16G16R16UI:
+			*Pointer<UShort>(element + 2) = As<UShort>(Extract(c, 2));
+		case FORMAT_G16R16UI:
+			*Pointer<UShort>(element + 1) = As<UShort>(Extract(c, 1));
+		case FORMAT_R16UI:
+			*Pointer<UShort>(element) = As<UShort>(Extract(c, 0));
+			if(format != FORMAT_A16B16G16R16UI)
+			{
+				*Pointer<UShort>(element + 3) = UShort(0xFFFF);
+			}
+			break;
+		case FORMAT_A32B32G32R32I:
+			*Pointer<Int4>(element) = c;
+			break;
+		case FORMAT_X32B32G32R32I:
+			*Pointer<Int4>(element) = c;
+			*Pointer<Int>(element + 3) = Int(0x7FFFFFFF);
+			break;
+		case FORMAT_G32R32I:
+			*Pointer<Int2>(element) = As<Int2>(c);
+			break;
+		case FORMAT_R32I:
+			*Pointer<Int>(element) = Extract(c, 0);
+			break;
+		case FORMAT_A32B32G32R32UI:
+			*Pointer<UInt4>(element) = As<UInt4>(c);
+			break;
+		case FORMAT_X32B32G32R32UI:
+			*Pointer<UInt4>(element) = As<UInt4>(c);
+			*Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF);
+			break;
+		case FORMAT_G32R32UI:
+			*Pointer<UInt2>(element) = As<UInt2>(c);
+			break;
+		case FORMAT_R32UI:
+			*Pointer<UInt>(element) = As<UInt>(Extract(c, 0));
+			break;
+		default:
+			return false;
+		}
+
+		return true;
+	}
+
+	bool Blitter::GetScale(float4& scale, Format format)
+	{
+		switch(format)
+		{
+		case FORMAT_L8:
+		case FORMAT_A8:
+		case FORMAT_A8R8G8B8:
+		case FORMAT_X8R8G8B8:
+		case FORMAT_R8:
+		case FORMAT_G8R8:
+		case FORMAT_X8B8G8R8:
+		case FORMAT_A8B8G8R8:
+			scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
+			break;
+		case FORMAT_R8I_SNORM:
+		case FORMAT_G8R8I_SNORM:
+		case FORMAT_X8B8G8R8I_SNORM:
+		case FORMAT_A8B8G8R8I_SNORM:
+			scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
+			break;
+		case FORMAT_R8I:
+		case FORMAT_R8UI:
+		case FORMAT_G8R8I:
+		case FORMAT_G8R8UI:
+		case FORMAT_X8B8G8R8I:
+		case FORMAT_X8B8G8R8UI:
+		case FORMAT_A8B8G8R8I:
+		case FORMAT_A8B8G8R8UI:
+		case FORMAT_R16I:
+		case FORMAT_R16UI:
+		case FORMAT_G16R16:
+		case FORMAT_G16R16I:
+		case FORMAT_G16R16UI:
+		case FORMAT_X16B16G16R16I:
+		case FORMAT_X16B16G16R16UI:
+		case FORMAT_A16B16G16R16:
+		case FORMAT_A16B16G16R16I:
+		case FORMAT_A16B16G16R16UI:
+		case FORMAT_R32I:
+		case FORMAT_R32UI:
+		case FORMAT_G32R32I:
+		case FORMAT_G32R32UI:
+		case FORMAT_X32B32G32R32I:
+		case FORMAT_X32B32G32R32UI:
+		case FORMAT_A32B32G32R32I:
+		case FORMAT_A32B32G32R32UI:
+		case FORMAT_A32B32G32R32F:
+		case FORMAT_G32R32F:
+		case FORMAT_R32F:
+			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
+			break;
+		default:
+			return false;
+		}
+
+		return true;
+	}
+
 	Routine *Blitter::generate(BlitState &state)
 	{
 		Function<Void, Pointer<Byte> > function;
@@ -205,170 +675,97 @@
 
 				For(Int i = x0d, i < x1d, i++)
 				{
-					Float4 color;
+					bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
+					bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
 
-					if(!state.filter)
+					if(intSrc && intDst) // Integer types do not support filtering
 					{
-						Int X = Int(x);
-						Int Y = Int(y);
-
-						Pointer<Byte> s = source + Y * sPitchB + X * Surface::bytes(state.sourceFormat);
-
+						Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
+						Pointer<Byte> s = source + Int(y) * sPitchB + Int(x) * Surface::bytes(state.sourceFormat);
 						if(!read(color, s, state.sourceFormat))
 						{
 							return nullptr;
 						}
-					}
-					else   // Bilinear filtering
-					{
-						Float x0 = x - 0.5f;
-						Float y0 = y - 0.5f;
 
-						Int X0 = Max(Int(x0), 0);
-						Int Y0 = Max(Int(y0), 0);
-							
-						Int X1 = IfThenElse(X0 + 1 >= sWidth, X0, X0 + 1);
-						Int Y1 = IfThenElse(Y0 + 1 >= sHeight, Y0, Y0 + 1);
-
-						Pointer<Byte> s00 = source + Y0 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
-						Pointer<Byte> s01 = source + Y0 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
-						Pointer<Byte> s10 = source + Y1 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
-						Pointer<Byte> s11 = source + Y1 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
-
-						Float4 c00; if(!read(c00, s00, state.sourceFormat)) return nullptr;
-						Float4 c01; if(!read(c01, s01, state.sourceFormat)) return nullptr;
-						Float4 c10; if(!read(c10, s10, state.sourceFormat)) return nullptr;
-						Float4 c11; if(!read(c11, s11, state.sourceFormat)) return nullptr;
-
-						Float4 fx = Float4(x0 - Float(X0));
-						Float4 fy = Float4(y0 - Float(Y0));
-
-						color = c00 * (Float4(1.0f) - fx) * (Float4(1.0f) - fy) +
-							    c01 * fx * (Float4(1.0f) - fy) +
-								c10 * (Float4(1.0f) - fx) * fy +
-								c11 * fx * fy;
-					}
-
-					float4 unscale;
-
-					switch(state.sourceFormat)
-					{
-					case FORMAT_L8:
-					case FORMAT_A8:
-					case FORMAT_A8R8G8B8:
-					case FORMAT_A8B8G8R8:
-					case FORMAT_X8R8G8B8:
-					case FORMAT_X8B8G8R8:
-						unscale = vector(255, 255, 255, 255);
-						break;
-					case FORMAT_A16B16G16R16:
-					case FORMAT_G16R16:
-						unscale = vector(65535, 65535, 65535, 65535);
-						break;
-					case FORMAT_A32B32G32R32F:
-					case FORMAT_G32R32F:
-					case FORMAT_R32F:
-						unscale = vector(1.0f, 1.0f, 1.0f, 1.0f);
-						break;
-					default:
-						return nullptr;
-					}
-
-					float4 scale;
-
-					switch(state.destFormat)
-					{
-					case FORMAT_L8:
-					case FORMAT_A8:
-					case FORMAT_A8R8G8B8:
-					case FORMAT_A8B8G8R8:
-					case FORMAT_X8R8G8B8:
-					case FORMAT_X8B8G8R8:
-						scale = vector(255, 255, 255, 255);
-						break;
-					case FORMAT_A16B16G16R16:
-					case FORMAT_G16R16:
-						scale = vector(65535, 65535, 65535, 65535);
-						break;
-					case FORMAT_A32B32G32R32F:
-					case FORMAT_G32R32F:
-					case FORMAT_R32F:
-						scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
-						break;
-					default:
-						return nullptr;
-					}
-
-					if(unscale != scale)
-					{
-						color *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
-					}
-
-					if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
-					{
-						color = Min(color, Float4(1.0f, 1.0f, 1.0f, 1.0f));
-
-						color = Max(color, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -1.0f,
-						                          Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -1.0f,
-						                          Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -1.0f,
-						                          Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -1.0f));
-					}
-
-					Pointer<Byte> d = destLine + i * Surface::bytes(state.destFormat);
-
-					switch(state.destFormat)
-					{
-					case FORMAT_L8:
-						*Pointer<Byte>(d) = Byte(RoundInt(Float(color.x)));
-						break;
-					case FORMAT_A8:
-						*Pointer<Byte>(d) = Byte(RoundInt(Float(color.w)));
-						break;
-					case FORMAT_A8R8G8B8:
+						Pointer<Byte> d = destLine + i * Surface::bytes(state.destFormat);
+						if(!write(color, d, state.destFormat))
 						{
-							UShort4 c0 = As<UShort4>(RoundShort4(color.zyxw));
-							Byte8 c1 = Pack(c0, c0);
-							*Pointer<UInt>(d) = UInt(As<Long>(c1));
+							return nullptr;
 						}
-						break;
-					case FORMAT_A8B8G8R8:
+					}
+					else
+					{
+						Float4 color;
+
+						if(!state.filter || intSrc)
 						{
-							UShort4 c0 = As<UShort4>(RoundShort4(color));
-							Byte8 c1 = Pack(c0, c0);
-							*Pointer<UInt>(d) = UInt(As<Long>(c1));
+							Int X = Int(x);
+							Int Y = Int(y);
+
+							Pointer<Byte> s = source + Y * sPitchB + X * Surface::bytes(state.sourceFormat);
+
+							if(!read(color, s, state.sourceFormat))
+							{
+								return nullptr;
+							}
 						}
-						break;
-					case FORMAT_X8R8G8B8:
+						else   // Bilinear filtering
 						{
-							UShort4 c0 = As<UShort4>(RoundShort4(color.zyxw));
-							Byte8 c1 = Pack(c0, c0);
-							*Pointer<UInt>(d) = UInt(As<Long>(c1)) | 0xFF000000;
+							Float x0 = x - 0.5f;
+							Float y0 = y - 0.5f;
+
+							Int X0 = Max(Int(x0), 0);
+							Int Y0 = Max(Int(y0), 0);
+
+							Int X1 = IfThenElse(X0 + 1 >= sWidth, X0, X0 + 1);
+							Int Y1 = IfThenElse(Y0 + 1 >= sHeight, Y0, Y0 + 1);
+
+							Pointer<Byte> s00 = source + Y0 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
+							Pointer<Byte> s01 = source + Y0 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
+							Pointer<Byte> s10 = source + Y1 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
+							Pointer<Byte> s11 = source + Y1 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
+
+							Float4 c00; if(!read(c00, s00, state.sourceFormat)) return nullptr;
+							Float4 c01; if(!read(c01, s01, state.sourceFormat)) return nullptr;
+							Float4 c10; if(!read(c10, s10, state.sourceFormat)) return nullptr;
+							Float4 c11; if(!read(c11, s11, state.sourceFormat)) return nullptr;
+
+							Float4 fx = Float4(x0 - Float(X0));
+							Float4 fy = Float4(y0 - Float(Y0));
+
+							color = c00 * (Float4(1.0f) - fx) * (Float4(1.0f) - fy) +
+							        c01 * fx * (Float4(1.0f) - fy) +
+							        c10 * (Float4(1.0f) - fx) * fy +
+							        c11 * fx * fy;
 						}
-						break;
-					case FORMAT_X8B8G8R8:
+
+						float4 scale, unscale;
+						if(!GetScale(unscale, state.sourceFormat) || !GetScale(scale, state.destFormat))
 						{
-							UShort4 c0 = As<UShort4>(RoundShort4(color));
-							Byte8 c1 = Pack(c0, c0);
-							*Pointer<UInt>(d) = UInt(As<Long>(c1)) | 0xFF000000;
+							return nullptr;
 						}
-						break;
-					case FORMAT_A16B16G16R16:
-						*Pointer<UShort4>(d) = UShort4(RoundInt(color));
-						break;
-					case FORMAT_G16R16:
-						*Pointer<UInt>(d) = UInt(As<Long>(UShort4(RoundInt(color))));
-						break;
-					case FORMAT_A32B32G32R32F:
-						*Pointer<Float4>(d) = color;
-						break;
-					case FORMAT_G32R32F:
-						*Pointer<Float2>(d) = Float2(color);
-						break;
-					case FORMAT_R32F:
-						*Pointer<Float>(d) = color.x;
-						break;
-					default:
-						return nullptr;
+
+						if(unscale != scale)
+						{
+							color *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
+						}
+
+						if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
+						{
+							color = Min(color, Float4(1.0f, 1.0f, 1.0f, 1.0f));
+
+							color = Max(color, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -1.0f,
+							                          Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -1.0f,
+							                          Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -1.0f,
+							                          Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -1.0f));
+						}
+
+						Pointer<Byte> d = destLine + i * Surface::bytes(state.destFormat);
+
+						if(!write(color, d, state.destFormat))
+						{
+							return nullptr;
+						}
 					}
 
 					x += w;
diff --git a/src/Renderer/Blitter.hpp b/src/Renderer/Blitter.hpp
index ca7ec66..3886bdf 100644
--- a/src/Renderer/Blitter.hpp
+++ b/src/Renderer/Blitter.hpp
@@ -65,6 +65,10 @@
 

 	private:

 		bool read(Float4 &color, Pointer<Byte> element, Format format);

+		bool write(Float4 &color, Pointer<Byte> element, Format format);

+		bool read(Int4 &color, Pointer<Byte> element, Format format);

+		bool write(Int4 &color, Pointer<Byte> element, Format format);

+		static bool GetScale(float4& scale, Format format);

 		bool blitReactor(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter);

 		Routine *generate(BlitState &state);

 

diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index b5f244d..61fc9bc 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -2290,11 +2290,30 @@
 		{
 		case FORMAT_R5G6B5:
 		case FORMAT_X8R8G8B8:
+		case FORMAT_X8B8G8R8I:
 		case FORMAT_X8B8G8R8:
 		case FORMAT_A8R8G8B8:
+		case FORMAT_A8B8G8R8I:
+		case FORMAT_R8UI:
+		case FORMAT_G8R8UI:
+		case FORMAT_X8B8G8R8UI:
+		case FORMAT_A8B8G8R8UI:
 		case FORMAT_A8B8G8R8:
+		case FORMAT_G8R8I:
 		case FORMAT_G8R8:
+		case FORMAT_R8I_SNORM:
+		case FORMAT_G8R8I_SNORM:
+		case FORMAT_X8B8G8R8I_SNORM:
+		case FORMAT_A8B8G8R8I_SNORM:
+		case FORMAT_R16I:
+		case FORMAT_R16UI:
+		case FORMAT_G16R16I:
+		case FORMAT_G16R16UI:
 		case FORMAT_G16R16:
+		case FORMAT_X16B16G16R16I:
+		case FORMAT_X16B16G16R16UI:
+		case FORMAT_A16B16G16R16I:
+		case FORMAT_A16B16G16R16UI:
 		case FORMAT_A16B16G16R16:
 		case FORMAT_V8U8:
 		case FORMAT_Q8W8V8U8:
@@ -2303,6 +2322,7 @@
 		case FORMAT_A16W16V16U16:
 		case FORMAT_Q16W16V16U16:
 		case FORMAT_A8:
+		case FORMAT_R8I:
 		case FORMAT_R8:
 		case FORMAT_L8:
 		case FORMAT_L16:
@@ -2310,6 +2330,14 @@
 		case FORMAT_YV12_BT601:
 		case FORMAT_YV12_BT709:
 		case FORMAT_YV12_JFIF:
+		case FORMAT_R32I:
+		case FORMAT_R32UI:
+		case FORMAT_G32R32I:
+		case FORMAT_G32R32UI:
+		case FORMAT_X32B32G32R32I:
+		case FORMAT_X32B32G32R32UI:
+		case FORMAT_A32B32G32R32I:
+		case FORMAT_A32B32G32R32UI:
 			return false;
 		case FORMAT_R32F:
 		case FORMAT_G32R32F:
@@ -2512,6 +2540,40 @@
 		}
 	}
 
+	bool Surface::isNonNormalizedInteger(Format format)
+	{
+		switch(format)
+		{
+		case FORMAT_A8B8G8R8I:
+		case FORMAT_X8B8G8R8I:
+		case FORMAT_G8R8I:
+		case FORMAT_R8I:
+		case FORMAT_A8B8G8R8UI:
+		case FORMAT_X8B8G8R8UI:
+		case FORMAT_G8R8UI:
+		case FORMAT_R8UI:
+		case FORMAT_A16B16G16R16I:
+		case FORMAT_X16B16G16R16I:
+		case FORMAT_G16R16I:
+		case FORMAT_R16I:
+		case FORMAT_A16B16G16R16UI:
+		case FORMAT_X16B16G16R16UI:
+		case FORMAT_G16R16UI:
+		case FORMAT_R16UI:
+		case FORMAT_A32B32G32R32I:
+		case FORMAT_X32B32G32R32I:
+		case FORMAT_G32R32I:
+		case FORMAT_R32I:
+		case FORMAT_A32B32G32R32UI:
+		case FORMAT_X32B32G32R32UI:
+		case FORMAT_G32R32UI:
+		case FORMAT_R32UI:
+			return true;
+		default:
+			return false;
+		}
+	}
+
 	int Surface::componentCount(Format format)
 	{
 		switch(format)
@@ -3300,48 +3362,42 @@
 		external.write(x, y, color);
 	}
 
-	Color<float> Surface::readInternal(int x, int y, int z) const
+	void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
 	{
-		ASSERT(internal.lock != LOCK_UNLOCKED);
+		ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
 
-		return internal.read(x, y, z);
-	}
+		sw::Color<float> color;
 
-	Color<float> Surface::readInternal(int x, int y) const
-	{
-		ASSERT(internal.lock != LOCK_UNLOCKED);
-
-		return internal.read(x, y);
-	}
-
-	Color<float> Surface::sampleInternal(float x, float y, float z) const
-	{
-		ASSERT(internal.lock != LOCK_UNLOCKED);
-
-		return internal.sample(x, y, z);
-	}
-
-	Color<float> Surface::sampleInternal(float x, float y) const
-	{
-		ASSERT(internal.lock != LOCK_UNLOCKED);
-
-		return internal.sample(x, y);
-	}
-
-	void Surface::writeInternal(int x, int y, int z, const Color<float> &color)
-	{
-		ASSERT(internal.lock != LOCK_UNLOCKED);
-
-		internal.write(x, y, z, color);
-	}
-
-	void Surface::writeInternal(int x, int y, const Color<float> &color)
-	{
-		ASSERT(internal.lock != LOCK_UNLOCKED);
+		if(!filter)
+		{
+			color = source->internal.read((int)srcX, (int)srcY);
+		}
+		else   // Bilinear filtering
+		{
+			color = source->internal.sample(srcX, srcY);
+		}
 
 		internal.write(x, y, color);
 	}
 
+	void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
+	{
+		ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
+
+		sw::Color<float> color;
+
+		if(!filter)
+		{
+			color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
+		}
+		else   // Bilinear filtering
+		{
+			color = source->internal.sample(srcX, srcY, srcZ);
+		}
+
+		internal.write(x, y, z, color);
+	}
+
 	bool Surface::hasStencil() const
 	{
 		return isStencil(external.format);
diff --git a/src/Renderer/Surface.hpp b/src/Renderer/Surface.hpp
index 47a023f..eb5af43 100644
--- a/src/Renderer/Surface.hpp
+++ b/src/Renderer/Surface.hpp
@@ -294,12 +294,8 @@
 		void writeExternal(int x, int y, int z, const Color<float> &color);

 		void writeExternal(int x, int y, const Color<float> &color);

 		

-		Color<float> readInternal(int x, int y, int z) const;

-		Color<float> readInternal(int x, int y) const;

-		Color<float> sampleInternal(float x, float y, float z) const;

-		Color<float> sampleInternal(float x, float y) const;

-		void writeInternal(int x, int y, int z, const Color<float> &color);

-		void writeInternal(int x, int y, const Color<float> &color);

+		void copyInternal(const Surface* src, int x, int y, float srcX, float srcY, bool filter);

+		void copyInternal(const Surface* src, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter);

 

 		bool hasStencil() const;

 		bool hasDepth() const;

@@ -327,6 +323,7 @@
 		static bool isSRGBreadable(Format format);

 		static bool isSRGBwritable(Format format);

 		static bool isCompressed(Format format);

+		static bool isNonNormalizedInteger(Format format);

 		static int componentCount(Format format);

 

 		static void setTexturePalette(unsigned int *palette);