Add support for borders around textures.

Borders are required to support seamless cubemap sampling. Subsequent
patches will fill the borders with pixels from adjacent cube faces.

The border is expressed in pixels and is added on all edges,
resulting in an image of dimensions
(width + 2 * border) x (height + 2 * border).
The surface still exposes dimensions of width x height through the API
and points to the same pixel when locked.

Change-Id: I06d5121267ce1a2c50e628490d8690de71bfeb08
Reviewed-on: https://swiftshader-review.googlesource.com/8208
Tested-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/D3D8/Direct3DSurface8.cpp b/src/D3D8/Direct3DSurface8.cpp
index 6ad0e6b..c76b982 100644
--- a/src/D3D8/Direct3DSurface8.cpp
+++ b/src/D3D8/Direct3DSurface8.cpp
@@ -227,7 +227,7 @@
 		desc->Type = D3DRTYPE_SURFACE;
 		desc->Height = height;
 		desc->Width = width;
-		desc->Size = size(getWidth(), getHeight(), getDepth(), getExternalFormat());
+		desc->Size = size(getWidth(), getHeight(), getDepth(), 0, getExternalFormat());
 		desc->MultiSampleType = multiSample;
 		desc->Usage = usage;
 
@@ -285,6 +285,6 @@
 
 	unsigned int Direct3DSurface8::memoryUsage(int width, int height, D3DFORMAT format)
 	{
-		return Surface::size(width, height, 1, translateFormat(format));
+		return Surface::size(width, height, 1, 0, translateFormat(format));
 	}
 }
diff --git a/src/D3D8/Direct3DVolume8.cpp b/src/D3D8/Direct3DVolume8.cpp
index aa33ea1..0191a66 100644
--- a/src/D3D8/Direct3DVolume8.cpp
+++ b/src/D3D8/Direct3DVolume8.cpp
@@ -144,8 +144,8 @@
 			return INVALIDCALL();
 		}
 
-		lockedVolume->RowPitch = pitchB(getWidth(), getExternalFormat(), false);
-		lockedVolume->SlicePitch = sliceB(getWidth(), getHeight(), getExternalFormat(), false);
+		lockedVolume->RowPitch = pitchB(getWidth(), 0, getExternalFormat(), false);
+		lockedVolume->SlicePitch = sliceB(getWidth(), getHeight(), 0, getExternalFormat(), false);
 
 		sw::Lock lock = sw::LOCK_READWRITE;
 
@@ -194,6 +194,6 @@
 
 	unsigned int Direct3DVolume8::memoryUsage(int width, int height, int depth, D3DFORMAT format)
 	{
-		return Surface::size(width, height, depth, translateFormat(format));
+		return Surface::size(width, height, depth, 0, translateFormat(format));
 	}
 }
diff --git a/src/D3D9/Direct3DDevice9.cpp b/src/D3D9/Direct3DDevice9.cpp
index bc58fdb..edeff23 100644
--- a/src/D3D9/Direct3DDevice9.cpp
+++ b/src/D3D9/Direct3DDevice9.cpp
@@ -2660,7 +2660,7 @@
 		void *bitmap = cursorSurface->lockExternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
 
 		delete cursor;
-		cursor = sw::Surface::create(0, width, height, 1, sw::FORMAT_A8R8G8B8, false, false);
+		cursor = sw::Surface::create(nullptr, width, height, 1, 0, sw::FORMAT_A8R8G8B8, false, false);
 
 		void *buffer = cursor->lockExternal(0, 0, 0, sw::LOCK_DISCARD, sw::PUBLIC);
 		memcpy(buffer, bitmap, width * height * sizeof(unsigned int));
@@ -2674,7 +2674,7 @@
 		}
 		else
 		{
-			sw::FrameBuffer::setCursorImage(0);
+			sw::FrameBuffer::setCursorImage(nullptr);
 		}
 
 		sw::FrameBuffer::setCursorOrigin(x0, y0);
diff --git a/src/D3D9/Direct3DSurface9.cpp b/src/D3D9/Direct3DSurface9.cpp
index fdbc099..f2001b7 100644
--- a/src/D3D9/Direct3DSurface9.cpp
+++ b/src/D3D9/Direct3DSurface9.cpp
@@ -76,7 +76,8 @@
 		return (pool != D3DPOOL_DEFAULT) || (usage & D3DUSAGE_DYNAMIC) || lockableOverride;
 	}
 
-	Direct3DSurface9::Direct3DSurface9(Direct3DDevice9 *device, Unknown *container, int width, int height, D3DFORMAT format, D3DPOOL pool, D3DMULTISAMPLE_TYPE multiSample, unsigned int quality, bool lockableOverride, unsigned long usage) : Direct3DResource9(device, D3DRTYPE_SURFACE, pool, memoryUsage(width, height, format)), Surface(getParentResource(container), width, height, sampleCount(multiSample, quality), translateFormat(format), isLockable(pool, usage, lockableOverride), (usage & D3DUSAGE_RENDERTARGET) || (usage & D3DUSAGE_DEPTHSTENCIL)), container(container), width(width), height(height), format(format), pool(pool), multiSample(multiSample), quality(quality), lockable(isLockable(pool, usage, lockableOverride)), usage(usage)
+	Direct3DSurface9::Direct3DSurface9(Direct3DDevice9 *device, Unknown *container, int width, int height, D3DFORMAT format, D3DPOOL pool, D3DMULTISAMPLE_TYPE multiSample, unsigned int quality, bool lockableOverride, unsigned long usage)
+		: Direct3DResource9(device, D3DRTYPE_SURFACE, pool, memoryUsage(width, height, format)), Surface(getParentResource(container), width, height, sampleCount(multiSample, quality), 0, translateFormat(format), isLockable(pool, usage, lockableOverride), (usage & D3DUSAGE_RENDERTARGET) || (usage & D3DUSAGE_DEPTHSTENCIL)), container(container), width(width), height(height), format(format), pool(pool), multiSample(multiSample), quality(quality), lockable(isLockable(pool, usage, lockableOverride)), usage(usage)
 	{
 		parentTexture = dynamic_cast<Direct3DBaseTexture9*>(container);
 	}
@@ -412,6 +413,6 @@
 
 	unsigned int Direct3DSurface9::memoryUsage(int width, int height, D3DFORMAT format)
 	{
-		return Surface::size(width, height, 1, translateFormat(format));
+		return Surface::size(width, height, 1, 0, translateFormat(format));
 	}
 }
diff --git a/src/D3D9/Direct3DVolume9.cpp b/src/D3D9/Direct3DVolume9.cpp
index 4bf498b..2118b5e 100644
--- a/src/D3D9/Direct3DVolume9.cpp
+++ b/src/D3D9/Direct3DVolume9.cpp
@@ -30,7 +30,8 @@
 		return (pool != D3DPOOL_DEFAULT) || (usage & D3DUSAGE_DYNAMIC);
 	}
 
-	Direct3DVolume9::Direct3DVolume9(Direct3DDevice9 *device, Direct3DVolumeTexture9 *container, int width, int height, int depth, D3DFORMAT format, D3DPOOL pool, unsigned long usage) : device(device), Surface(container->getResource(), width, height, depth, translateFormat(format), isLockable(pool, usage), false), container(container), width(width), height(height), depth(depth), format(format), pool(pool), lockable(isLockable(pool, usage)), usage(usage)
+	Direct3DVolume9::Direct3DVolume9(Direct3DDevice9 *device, Direct3DVolumeTexture9 *container, int width, int height, int depth, D3DFORMAT format, D3DPOOL pool, unsigned long usage)
+		: device(device), Surface(container->getResource(), width, height, depth, 0, translateFormat(format), isLockable(pool, usage), false), container(container), width(width), height(height), depth(depth), format(format), pool(pool), lockable(isLockable(pool, usage)), usage(usage)
 	{
 		resource = new Direct3DResource9(device, D3DRTYPE_VOLUME, pool, memoryUsage(width, height, depth, format));
 		resource->bind();
@@ -229,6 +230,6 @@
 
 	unsigned int Direct3DVolume9::memoryUsage(int width, int height, int depth, D3DFORMAT format)
 	{
-		return Surface::size(width, height, depth, translateFormat(format));
+		return Surface::size(width, height, depth, 0, translateFormat(format));
 	}
 }
diff --git a/src/OpenGL/common/Image.cpp b/src/OpenGL/common/Image.cpp
index 72f6c05..fe57b93 100644
--- a/src/OpenGL/common/Image.cpp
+++ b/src/OpenGL/common/Image.cpp
@@ -1232,8 +1232,8 @@
 	public:
 		ImageImplementation(Texture *parentTexture, GLsizei width, GLsizei height, GLenum format, GLenum type)
 			: Image(parentTexture, width, height, format, type) {}
-		ImageImplementation(Texture *parentTexture, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type)
-			: Image(parentTexture, width, height, depth, format, type) {}
+		ImageImplementation(Texture *parentTexture, GLsizei width, GLsizei height, GLsizei depth, int border, GLenum format, GLenum type)
+			: Image(parentTexture, width, height, depth, border, format, type) {}
 		ImageImplementation(GLsizei width, GLsizei height, GLenum format, GLenum type, int pitchP)
 			: Image(width, height, format, type, pitchP) {}
 		ImageImplementation(GLsizei width, GLsizei height, sw::Format internalFormat, int multiSampleDepth, bool lockable)
@@ -1265,9 +1265,9 @@
 		return new ImageImplementation(parentTexture, width, height, format, type);
 	}
 
-	Image *Image::create(Texture *parentTexture, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type)
+	Image *Image::create(Texture *parentTexture, GLsizei width, GLsizei height, GLsizei depth, int border, GLenum format, GLenum type)
 	{
-		return new ImageImplementation(parentTexture, width, height, depth, format, type);
+		return new ImageImplementation(parentTexture, width, height, depth, border, format, type);
 	}
 
 	Image *Image::create(GLsizei width, GLsizei height, GLenum format, GLenum type, int pitchP)
diff --git a/src/OpenGL/common/Image.hpp b/src/OpenGL/common/Image.hpp
index 41fa391..a672f9b 100644
--- a/src/OpenGL/common/Image.hpp
+++ b/src/OpenGL/common/Image.hpp
@@ -52,7 +52,7 @@
 protected:
 	// 2D texture image
 	Image(Texture *parentTexture, GLsizei width, GLsizei height, GLenum format, GLenum type)
-		: sw::Surface(parentTexture->getResource(), width, height, 1, SelectInternalFormat(format, type), true, true),
+		: sw::Surface(parentTexture->getResource(), width, height, 1, 0, SelectInternalFormat(format, type), true, true),
 		  width(width), height(height), format(format), type(type), internalFormat(SelectInternalFormat(format, type)), depth(1),
 		  parentTexture(parentTexture)
 	{
@@ -61,9 +61,9 @@
 		parentTexture->addRef();
 	}
 
-	// 3D texture image
-	Image(Texture *parentTexture, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type)
-		: sw::Surface(parentTexture->getResource(), width, height, depth, SelectInternalFormat(format, type), true, true),
+	// 3D/Cube texture image
+	Image(Texture *parentTexture, GLsizei width, GLsizei height, GLsizei depth, int border, GLenum format, GLenum type)
+		: sw::Surface(parentTexture->getResource(), width, height, depth, border, SelectInternalFormat(format, type), true, true),
 		  width(width), height(height), format(format), type(type), internalFormat(SelectInternalFormat(format, type)), depth(depth),
 		  parentTexture(parentTexture)
 	{
@@ -74,7 +74,7 @@
 
 	// Native EGL image
 	Image(GLsizei width, GLsizei height, GLenum format, GLenum type, int pitchP)
-		: sw::Surface(nullptr, width, height, 1, SelectInternalFormat(format, type), true, true, pitchP),
+		: sw::Surface(nullptr, width, height, 1, 0, SelectInternalFormat(format, type), true, true, pitchP),
 		  width(width), height(height), format(format), type(type), internalFormat(SelectInternalFormat(format, type)), depth(1),
 		  parentTexture(nullptr)
 	{
@@ -84,7 +84,7 @@
 
 	// Render target
 	Image(GLsizei width, GLsizei height, sw::Format internalFormat, int multiSampleDepth, bool lockable)
-		: sw::Surface(nullptr, width, height, multiSampleDepth, internalFormat, lockable, true),
+		: sw::Surface(nullptr, width, height, multiSampleDepth, 0, internalFormat, lockable, true),
 		  width(width), height(height), format(0 /*GL_NONE*/), type(0 /*GL_NONE*/), internalFormat(internalFormat), depth(multiSampleDepth),
 		  parentTexture(nullptr)
 	{
@@ -96,8 +96,8 @@
 	// 2D texture image
 	static Image *create(Texture *parentTexture, GLsizei width, GLsizei height, GLenum format, GLenum type);
 
-	// 3D texture image
-	static Image *create(Texture *parentTexture, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type);
+	// 3D/Cube texture image
+	static Image *create(Texture *parentTexture, GLsizei width, GLsizei height, GLsizei depth, int border, GLenum format, GLenum type);
 
 	// Native EGL image
 	static Image *create(GLsizei width, GLsizei height, GLenum format, GLenum type, int pitchP);
diff --git a/src/OpenGL/libGL/Image.cpp b/src/OpenGL/libGL/Image.cpp
index 4547d92..30a8691 100644
--- a/src/OpenGL/libGL/Image.cpp
+++ b/src/OpenGL/libGL/Image.cpp
@@ -39,14 +39,14 @@
 	Image::Image(Texture *parentTexture, GLsizei width, GLsizei height, GLenum format, GLenum type)
 		: parentTexture(parentTexture), width(width), height(height), format(format), type(type)
 		, internalFormat(selectInternalFormat(format, type)), multiSampleDepth(1)
-		, sw::Surface(getParentResource(parentTexture), width, height, 1, selectInternalFormat(format, type), true, true)
+		, sw::Surface(getParentResource(parentTexture), width, height, 1, 0, selectInternalFormat(format, type), true, true)
 	{
 		referenceCount = 1;
 	}
 
 	Image::Image(Texture *parentTexture, GLsizei width, GLsizei height, sw::Format internalFormat, int multiSampleDepth, bool lockable, bool renderTarget)
 		: parentTexture(parentTexture), width(width), height(height), internalFormat(internalFormat), format(0 /*GL_NONE*/), type(0 /*GL_NONE*/), multiSampleDepth(multiSampleDepth)
-		, sw::Surface(getParentResource(parentTexture), width, height, multiSampleDepth, internalFormat, lockable, renderTarget)
+		, sw::Surface(getParentResource(parentTexture), width, height, multiSampleDepth, 0, internalFormat, lockable, renderTarget)
 	{
 		referenceCount = 1;
 	}
diff --git a/src/OpenGL/libGLESv2/Texture.cpp b/src/OpenGL/libGLESv2/Texture.cpp
index 0b09e11..784d3fd 100644
--- a/src/OpenGL/libGLESv2/Texture.cpp
+++ b/src/OpenGL/libGLESv2/Texture.cpp
@@ -1128,7 +1128,8 @@
 	}
 
 	GLenum sizedInternalFormat = GetSizedInternalFormat(format, GL_UNSIGNED_BYTE);
-	image[face][level] = egl::Image::create(this, width, height, sizedInternalFormat, GL_UNSIGNED_BYTE);
+	int border = (egl::getClientVersion() >= 3) ? 1 : 0;
+	image[face][level] = egl::Image::create(this, width, height, 1, border, sizedInternalFormat, GL_UNSIGNED_BYTE);
 
 	if(!image[face][level])
 	{
@@ -1269,7 +1270,8 @@
 		image[face][level]->release();
 	}
 
-	image[face][level] = egl::Image::create(this, width, height, format, type);
+	int border = (egl::getClientVersion() >= 3) ? 1 : 0;
+	image[face][level] = egl::Image::create(this, width, height, 1, border, format, type);
 
 	if(!image[face][level])
 	{
@@ -1297,7 +1299,8 @@
 	}
 
 	GLenum sizedInternalFormat = GetSizedInternalFormat(format, GL_UNSIGNED_BYTE);
-	image[face][level] = egl::Image::create(this, width, height, sizedInternalFormat, GL_UNSIGNED_BYTE);
+	int border = (egl::getClientVersion() >= 3) ? 1 : 0;
+	image[face][level] = egl::Image::create(this, width, height, 1, border, sizedInternalFormat, GL_UNSIGNED_BYTE);
 
 	if(!image[face][level])
 	{
@@ -1391,7 +1394,8 @@
 				image[f][i]->release();
 			}
 
-			image[f][i] = egl::Image::create(this, std::max(image[0][0]->getWidth() >> i, 1), std::max(image[0][0]->getHeight() >> i, 1), image[0][0]->getFormat(), image[0][0]->getType());
+			int border = (egl::getClientVersion() >= 3) ? 1 : 0;
+			image[f][i] = egl::Image::create(this, std::max(image[0][0]->getWidth() >> i, 1), std::max(image[0][0]->getHeight() >> i, 1), 1, border, image[0][0]->getFormat(), image[0][0]->getType());
 
 			if(!image[f][i])
 			{
@@ -1592,7 +1596,7 @@
 		image[level]->release();
 	}
 
-	image[level] = egl::Image::create(this, width, height, depth, format, type);
+	image[level] = egl::Image::create(this, width, height, depth, 0, format, type);
 
 	if(!image[level])
 	{
@@ -1615,7 +1619,7 @@
 	}
 
 	GLenum sizedInternalFormat = GetSizedInternalFormat(format, GL_UNSIGNED_BYTE);
-	image[level] = egl::Image::create(this, width, height, depth, sizedInternalFormat, GL_UNSIGNED_BYTE);
+	image[level] = egl::Image::create(this, width, height, depth, 0, sizedInternalFormat, GL_UNSIGNED_BYTE);
 
 	if(!image[level])
 	{
@@ -1651,7 +1655,7 @@
 	}
 
 	GLenum sizedInternalFormat = GetSizedInternalFormat(format, GL_UNSIGNED_BYTE);
-	image[level] = egl::Image::create(this, width, height, depth, sizedInternalFormat, GL_UNSIGNED_BYTE);
+	image[level] = egl::Image::create(this, width, height, depth, 0, sizedInternalFormat, GL_UNSIGNED_BYTE);
 
 	if(!image[level])
 	{
@@ -1829,7 +1833,7 @@
 			image[i]->release();
 		}
 
-		image[i] = egl::Image::create(this, std::max(image[0]->getWidth() >> i, 1), std::max(image[0]->getHeight() >> i, 1), std::max(image[0]->getDepth() >> i, 1), image[0]->getFormat(), image[0]->getType());
+		image[i] = egl::Image::create(this, std::max(image[0]->getWidth() >> i, 1), std::max(image[0]->getHeight() >> i, 1), std::max(image[0]->getDepth() >> i, 1), 0, image[0]->getFormat(), image[0]->getType());
 
 		if(!image[i])
 		{
@@ -1928,7 +1932,7 @@
 
 		GLsizei w = std::max(image[0]->getWidth() >> i, 1);
 		GLsizei h = std::max(image[0]->getHeight() >> i, 1);
-		image[i] = egl::Image::create(this, w, h, depth, image[0]->getFormat(), image[0]->getType());
+		image[i] = egl::Image::create(this, w, h, depth, 0, image[0]->getFormat(), image[0]->getType());
 
 		if(!image[i])
 		{
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index 3147177..2ec5183 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -52,14 +52,14 @@
 
 	void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
 	{
-		void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
+		void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * sliceB;
 
 		write(element, color);
 	}
 
 	void Surface::Buffer::write(int x, int y, const Color<float> &color)
 	{
-		void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
+		void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
 
 		write(element, color);
 	}
@@ -370,14 +370,14 @@
 
 	Color<float> Surface::Buffer::read(int x, int y, int z) const
 	{
-		void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
+		void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * sliceB;
 
 		return read(element);
 	}
 
 	Color<float> Surface::Buffer::read(int x, int y) const
 	{
-		void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
+		void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
 
 		return read(element);
 	}
@@ -1088,6 +1088,9 @@
 
 		if(buffer)
 		{
+			x += border;
+			y += border;
+
 			switch(format)
 			{
 			#if S3TC_SUPPORT
@@ -1172,8 +1175,8 @@
 	public:
 		SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
 			: Surface(width, height, depth, format, pixels, pitch, slice) {}
-		SurfaceImplementation(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchP = 0)
-			: Surface(texture, width, height, depth, format, lockable, renderTarget, pitchP) {}
+		SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchP = 0)
+			: Surface(texture, width, height, depth, border, format, lockable, renderTarget, pitchP) {}
 		~SurfaceImplementation() override {};
 
 		void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override
@@ -1192,9 +1195,9 @@
 		return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice);
 	}
 
-	Surface *Surface::create(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided)
+	Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchPprovided)
 	{
-		return new SurfaceImplementation(texture, width, height, depth, format, lockable, renderTarget, pitchPprovided);
+		return new SurfaceImplementation(texture, width, height, depth, border, format, lockable, renderTarget, pitchPprovided);
 	}
 
 	Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
@@ -1214,6 +1217,7 @@
 		external.pitchP = external.bytes ? pitch / external.bytes : 0;
 		external.sliceB = slice;
 		external.sliceP = external.bytes ? slice / external.bytes : 0;
+		external.border = 0;
 		external.lock = LOCK_UNLOCKED;
 		external.dirty = true;
 
@@ -1223,10 +1227,11 @@
 		internal.depth = depth;
 		internal.format = selectInternalFormat(format);
 		internal.bytes = bytes(internal.format);
-		internal.pitchB = pitchB(internal.width, internal.format, false);
-		internal.pitchP = pitchP(internal.width, internal.format, false);
-		internal.sliceB = sliceB(internal.width, internal.height, internal.format, false);
-		internal.sliceP = sliceP(internal.width, internal.height, internal.format, false);
+		internal.pitchB = pitchB(internal.width, 0, internal.format, false);
+		internal.pitchP = pitchP(internal.width, 0, internal.format, false);
+		internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false);
+		internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false);
+		internal.border = 0;
 		internal.lock = LOCK_UNLOCKED;
 		internal.dirty = false;
 
@@ -1236,10 +1241,11 @@
 		stencil.depth = depth;
 		stencil.format = FORMAT_S8;
 		stencil.bytes = bytes(stencil.format);
-		stencil.pitchB = pitchB(stencil.width, stencil.format, false);
-		stencil.pitchP = pitchP(stencil.width, stencil.format, false);
-		stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false);
-		stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false);
+		stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false);
+		stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false);
+		stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false);
+		stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false);
+		stencil.border = 0;
 		stencil.lock = LOCK_UNLOCKED;
 		stencil.dirty = false;
 
@@ -1247,7 +1253,7 @@
 		paletteUsed = 0;
 	}
 
-	Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
+	Surface::Surface(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
 	{
 		resource = texture ? texture : new Resource(0);
 		hasParent = texture != 0;
@@ -1260,10 +1266,11 @@
 		external.depth = depth;
 		external.format = format;
 		external.bytes = bytes(external.format);
-		external.pitchB = pitchB(external.width, external.format, renderTarget && !texture);
-		external.pitchP = pitchP(external.width, external.format, renderTarget && !texture);
-		external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture);
-		external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture);
+		external.pitchB = pitchB(external.width, 0, external.format, renderTarget && !texture);
+		external.pitchP = pitchP(external.width, 0, external.format, renderTarget && !texture);
+		external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture);
+		external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture);
+		external.border = 0;
 		external.lock = LOCK_UNLOCKED;
 		external.dirty = false;
 
@@ -1273,10 +1280,11 @@
 		internal.depth = depth;
 		internal.format = selectInternalFormat(format);
 		internal.bytes = bytes(internal.format);
-		internal.pitchB = !pitchPprovided ? pitchB(internal.width, internal.format, renderTarget) : pitchPprovided * internal.bytes;
-		internal.pitchP = !pitchPprovided ? pitchP(internal.width, internal.format, renderTarget) : pitchPprovided;
-		internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget);
-		internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget);
+		internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes;
+		internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided;
+		internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget);
+		internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget);
+		internal.border = border;
 		internal.lock = LOCK_UNLOCKED;
 		internal.dirty = false;
 
@@ -1286,10 +1294,11 @@
 		stencil.depth = depth;
 		stencil.format = FORMAT_S8;
 		stencil.bytes = bytes(stencil.format);
-		stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget);
-		stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget);
-		stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget);
-		stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget);
+		stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget);
+		stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget);
+		stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget);
+		stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget);
+		stencil.border = 0;
 		stencil.lock = LOCK_UNLOCKED;
 		stencil.dirty = false;
 
@@ -1337,7 +1346,7 @@
 			}
 			else
 			{
-				external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format);
+				external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.format);
 			}
 		}
 
@@ -1389,7 +1398,7 @@
 			}
 			else
 			{
-				internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format);
+				internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.format);
 			}
 		}
 
@@ -1464,7 +1473,7 @@
 
 		if(!stencil.buffer)
 		{
-			stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format);
+			stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.format);
 		}
 
 		return stencil.lockRect(x, y, front, LOCK_READWRITE);   // FIXME
@@ -1640,8 +1649,10 @@
 		return 0;
 	}
 
-	int Surface::pitchB(int width, Format format, bool target)
+	int Surface::pitchB(int width, int border, Format format, bool target)
 	{
+		width += 2 * border;
+
 		if(target || isDepth(format) || isStencil(format))
 		{
 			width = align(width, 2);
@@ -1716,15 +1727,17 @@
 		}
 	}
 
-	int Surface::pitchP(int width, Format format, bool target)
+	int Surface::pitchP(int width, int border, Format format, bool target)
 	{
 		int B = bytes(format);
 
-		return B > 0 ? pitchB(width, format, target) / B : 0;
+		return B > 0 ? pitchB(width, border, format, target) / B : 0;
 	}
 
-	int Surface::sliceB(int width, int height, Format format, bool target)
+	int Surface::sliceB(int width, int height, int border, Format format, bool target)
 	{
+		height += 2 * border;
+
 		if(target || isDepth(format) || isStencil(format))
 		{
 			height = ((height + 1) & ~1);
@@ -1752,7 +1765,7 @@
 		case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
 		case FORMAT_RGBA_ASTC_5x4_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
-			return pitchB(width, format, target) * ((height + 3) / 4);   // Pitch computed per 4 rows
+			return pitchB(width, border, format, target) * ((height + 3) / 4);   // Pitch computed per 4 rows
 		case FORMAT_RGBA_ASTC_5x5_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
 		case FORMAT_RGBA_ASTC_6x5_KHR:
@@ -1761,39 +1774,39 @@
 		case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
 		case FORMAT_RGBA_ASTC_10x5_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
-			return pitchB(width, format, target) * ((height + 4) / 5);   // Pitch computed per 5 rows
+			return pitchB(width, border, format, target) * ((height + 4) / 5);   // Pitch computed per 5 rows
 		case FORMAT_RGBA_ASTC_6x6_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
 		case FORMAT_RGBA_ASTC_8x6_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
 		case FORMAT_RGBA_ASTC_10x6_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
-			return pitchB(width, format, target) * ((height + 5) / 6);   // Pitch computed per 6 rows
+			return pitchB(width, border, format, target) * ((height + 5) / 6);   // Pitch computed per 6 rows
 		case FORMAT_RGBA_ASTC_8x8_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
 		case FORMAT_RGBA_ASTC_10x8_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
-			return pitchB(width, format, target) * ((height + 7) / 8);   // Pitch computed per 8 rows
+			return pitchB(width, border, format, target) * ((height + 7) / 8);   // Pitch computed per 8 rows
 		case FORMAT_RGBA_ASTC_10x10_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
 		case FORMAT_RGBA_ASTC_12x10_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
-			return pitchB(width, format, target) * ((height + 9) / 10);   // Pitch computed per 10 rows
+			return pitchB(width, border, format, target) * ((height + 9) / 10);   // Pitch computed per 10 rows
 		case FORMAT_RGBA_ASTC_12x12_KHR:
 		case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
-			return pitchB(width, format, target) * ((height + 11) / 12);   // Pitch computed per 12 rows
+			return pitchB(width, border, format, target) * ((height + 11) / 12);   // Pitch computed per 12 rows
 		case FORMAT_ATI1:
 		case FORMAT_ATI2:
 		default:
-			return pitchB(width, format, target) * height;   // Pitch computed per row
+			return pitchB(width, border, format, target) * height;   // Pitch computed per row
 		}
 	}
 
-	int Surface::sliceP(int width, int height, Format format, bool target)
+	int Surface::sliceP(int width, int height, int border, Format format, bool target)
 	{
 		int B = bytes(format);
 
-		return B > 0 ? sliceB(width, height, format, target) / B : 0;
+		return B > 0 ? sliceB(width, height, border, format, target) / B : 0;
 	}
 
 	void Surface::update(Buffer &destination, Buffer &source)
@@ -1866,8 +1879,8 @@
 
 	void Surface::genericUpdate(Buffer &destination, Buffer &source)
 	{
-		unsigned char *sourceSlice = (unsigned char*)source.buffer;
-		unsigned char *destinationSlice = (unsigned char*)destination.buffer;
+		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
+		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
 		int depth = min(destination.depth, source.depth);
 		int height = min(destination.height, source.height);
@@ -1907,12 +1920,15 @@
 			sourceSlice += source.sliceB;
 			destinationSlice += destination.sliceB;
 		}
+
+		source.unlockRect();
+		destination.unlockRect();
 	}
 
-	void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source)
+	void Surface::decodeR8G8B8(Buffer &destination, Buffer &source)
 	{
-		unsigned char *sourceSlice = (unsigned char*)source.buffer;
-		unsigned char *destinationSlice = (unsigned char*)destination.buffer;
+		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
+		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
 		for(int z = 0; z < destination.depth && z < source.depth; z++)
 		{
@@ -1943,12 +1959,15 @@
 			sourceSlice += source.sliceB;
 			destinationSlice += destination.sliceB;
 		}
+
+		source.unlockRect();
+		destination.unlockRect();
 	}
 
-	void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source)
+	void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source)
 	{
-		unsigned char *sourceSlice = (unsigned char*)source.buffer;
-		unsigned char *destinationSlice = (unsigned char*)destination.buffer;
+		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
+		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
 		for(int z = 0; z < destination.depth && z < source.depth; z++)
 		{
@@ -1981,12 +2000,15 @@
 			sourceSlice += source.sliceB;
 			destinationSlice += destination.sliceB;
 		}
+
+		source.unlockRect();
+		destination.unlockRect();
 	}
 
-	void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source)
+	void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source)
 	{
-		unsigned char *sourceSlice = (unsigned char*)source.buffer;
-		unsigned char *destinationSlice = (unsigned char*)destination.buffer;
+		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
+		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
 		for(int z = 0; z < destination.depth && z < source.depth; z++)
 		{
@@ -2020,12 +2042,15 @@
 			sourceSlice += source.sliceB;
 			destinationSlice += destination.sliceB;
 		}
+
+		source.unlockRect();
+		destination.unlockRect();
 	}
 
-	void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source)
+	void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source)
 	{
-		unsigned char *sourceSlice = (unsigned char*)source.buffer;
-		unsigned char *destinationSlice = (unsigned char*)destination.buffer;
+		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
+		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
 		for(int z = 0; z < destination.depth && z < source.depth; z++)
 		{
@@ -2058,12 +2083,15 @@
 			sourceSlice += source.sliceB;
 			destinationSlice += destination.sliceB;
 		}
+
+		source.unlockRect();
+		destination.unlockRect();
 	}
 
-	void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source)
+	void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source)
 	{
-		unsigned char *sourceSlice = (unsigned char*)source.buffer;
-		unsigned char *destinationSlice = (unsigned char*)destination.buffer;
+		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
+		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
 		for(int z = 0; z < destination.depth && z < source.depth; z++)
 		{
@@ -2097,12 +2125,15 @@
 			sourceSlice += source.sliceB;
 			destinationSlice += destination.sliceB;
 		}
+
+		source.unlockRect();
+		destination.unlockRect();
 	}
 
-	void Surface::decodeP8(Buffer &destination, const Buffer &source)
+	void Surface::decodeP8(Buffer &destination, Buffer &source)
 	{
-		unsigned char *sourceSlice = (unsigned char*)source.buffer;
-		unsigned char *destinationSlice = (unsigned char*)destination.buffer;
+		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
+		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
 		for(int z = 0; z < destination.depth && z < source.depth; z++)
 		{
@@ -2136,13 +2167,16 @@
 			sourceSlice += source.sliceB;
 			destinationSlice += destination.sliceB;
 		}
+
+		source.unlockRect();
+		destination.unlockRect();
 	}
 
 #if S3TC_SUPPORT
-	void Surface::decodeDXT1(Buffer &internal, const Buffer &external)
+	void Surface::decodeDXT1(Buffer &internal, Buffer &external)
 	{
-		unsigned int *destSlice = (unsigned int*)internal.buffer;
-		const DXT1 *source = (const DXT1*)external.buffer;
+		unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
+		const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY);
 
 		for(int z = 0; z < external.depth; z++)
 		{
@@ -2199,12 +2233,15 @@
 
 			(byte*&)destSlice += internal.sliceB;
 		}
+
+		external.unlockRect();
+		internal.unlockRect();
 	}
 
-	void Surface::decodeDXT3(Buffer &internal, const Buffer &external)
+	void Surface::decodeDXT3(Buffer &internal, Buffer &external)
 	{
-		unsigned int *destSlice = (unsigned int*)internal.buffer;
-		const DXT3 *source = (const DXT3*)external.buffer;
+		unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
+		const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY);
 
 		for(int z = 0; z < external.depth; z++)
 		{
@@ -2246,12 +2283,15 @@
 
 			(byte*&)destSlice += internal.sliceB;
 		}
+
+		external.unlockRect();
+		internal.unlockRect();
 	}
 
-	void Surface::decodeDXT5(Buffer &internal, const Buffer &external)
+	void Surface::decodeDXT5(Buffer &internal, Buffer &external)
 	{
-		unsigned int *destSlice = (unsigned int*)internal.buffer;
-		const DXT5 *source = (const DXT5*)external.buffer;
+		unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
+		const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY);
 
 		for(int z = 0; z < external.depth; z++)
 		{
@@ -2317,13 +2357,16 @@
 
 			(byte*&)destSlice += internal.sliceB;
 		}
+
+		external.unlockRect();
+		internal.unlockRect();
 	}
 #endif
 
-	void Surface::decodeATI1(Buffer &internal, const Buffer &external)
+	void Surface::decodeATI1(Buffer &internal, Buffer &external)
 	{
-		byte *destSlice = (byte*)internal.buffer;
-		const ATI1 *source = (const ATI1*)external.buffer;
+		byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
+		const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY);
 
 		for(int z = 0; z < external.depth; z++)
 		{
@@ -2371,12 +2414,15 @@
 
 			destSlice += internal.sliceB;
 		}
+
+		external.unlockRect();
+		internal.unlockRect();
 	}
 
-	void Surface::decodeATI2(Buffer &internal, const Buffer &external)
+	void Surface::decodeATI2(Buffer &internal, Buffer &external)
 	{
-		word *destSlice = (word*)internal.buffer;
-		const ATI2 *source = (const ATI2*)external.buffer;
+		word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
+		const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY);
 
 		for(int z = 0; z < external.depth; z++)
 		{
@@ -2451,12 +2497,17 @@
 
 			(byte*&)destSlice += internal.sliceB;
 		}
+
+		external.unlockRect();
+		internal.unlockRect();
 	}
 
-	void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB)
+	void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB)
 	{
-		ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
+		ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
 		                    (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
+		external.unlockRect();
+		internal.unlockRect();
 
 		if(isSRGB)
 		{
@@ -2472,34 +2523,37 @@
 			}
 
 			// Perform sRGB conversion in place after decoding
-			byte* src = (byte*)internal.buffer;
+			byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
 			for(int y = 0; y < internal.height; y++)
 			{
-				byte* srcRow = src + y * internal.pitchB;
+				byte *srcRow = src + y * internal.pitchB;
 				for(int x = 0; x <  internal.width; x++)
 				{
-					byte* srcPix = srcRow + x * internal.bytes;
+					byte *srcPix = srcRow + x * internal.bytes;
 					for(int i = 0; i < 3; i++)
 					{
 						srcPix[i] = sRGBtoLinearTable[srcPix[i]];
 					}
 				}
 			}
+			internal.unlockRect();
 		}
 	}
 
-	void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned)
+	void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned)
 	{
 		ASSERT(nbChannels == 1 || nbChannels == 2);
 
-		ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
+		ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
 		                    (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
+		external.unlockRect();
+		internal.unlockRect();
 
 		// FIXME: We convert signed data to float, until signed integer internal formats are supported
 		//        This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats
 		if(isSigned)
 		{
-			sbyte* src = (sbyte*)internal.buffer;
+			sbyte *src = (sbyte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
 
 			for(int y = 0; y < internal.height; y++)
 			{
@@ -2517,15 +2571,20 @@
 					}
 				}
 			}
+
+			internal.unlockRect();
 		}
 	}
 
-	void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
+	void Surface::decodeASTC(Buffer &internal, Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
 	{
 	}
 
-	unsigned int Surface::size(int width, int height, int depth, Format format)
+	unsigned int Surface::size(int width, int height, int depth, int border, Format format)
 	{
+		width += 2 * border;
+		height += 2 * border;
+
 		// Dimensions rounded up to multiples of 4, used for compressed formats
 		int width4 = align(width, 4);
 		int height4 = align(height, 4);
@@ -3107,7 +3166,7 @@
 		return 1;
 	}
 
-	void *Surface::allocateBuffer(int width, int height, int depth, Format format)
+	void *Surface::allocateBuffer(int width, int height, int depth, int border, Format format)
 	{
 		// Render targets require 2x2 quads
 		int width2 = (width + 1) & ~1;
@@ -3116,7 +3175,7 @@
 		// FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
 		// and stencil operations also read 8 bytes per four 8-bit stencil values,
 		// so we have to allocate 4 extra bytes to avoid buffer overruns.
-		return allocate(size(width2, height2, depth, format) + 4);
+		return allocate(size(width2, height2, depth, border, format) + 4);
 	}
 
 	void Surface::memfill4(void *buffer, int pattern, int bytes)
@@ -3537,7 +3596,8 @@
 		       external.height == internal.height &&
 		       external.depth  == internal.depth &&
 		       external.pitchB == internal.pitchB &&
-		       external.sliceB == internal.sliceB;
+		       external.sliceB == internal.sliceB &&
+		       external.border == internal.border;
 	}
 
 	Format Surface::selectInternalFormat(Format format) const
diff --git a/src/Renderer/Surface.hpp b/src/Renderer/Surface.hpp
index 8b3b8c5..3a6e59e 100644
--- a/src/Renderer/Surface.hpp
+++ b/src/Renderer/Surface.hpp
@@ -244,6 +244,7 @@
 			int pitchP;
 			int sliceB;
 			int sliceP;
+			int border;
 			Format format;
 			AtomicInt lock;
 
@@ -252,11 +253,11 @@
 
 	protected:
 		Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice);
-		Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchP = 0);
+		Surface(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchP = 0);
 
 	public:
 		static Surface *create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice);
-		static Surface *create(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchP = 0);
+		static Surface *create(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchP = 0);
 
 		virtual ~Surface() = 0;
 
@@ -265,6 +266,7 @@
 		inline int getWidth() const;
 		inline int getHeight() const;
 		inline int getDepth() const;
+		inline int getBorder() const;
 		inline Format getFormat(bool internal = false) const;
 		inline int getPitchB(bool internal = false) const;
 		inline int getPitchP(bool internal = false) const;
@@ -326,11 +328,11 @@
 		Resource *getResource();
 
 		static int bytes(Format format);
-		static int pitchB(int width, Format format, bool target);
-		static int pitchP(int width, Format format, bool target);
-		static int sliceB(int width, int height, Format format, bool target);
-		static int sliceP(int width, int height, Format format, bool target);
-		static unsigned int size(int width, int height, int depth, Format format);   // FIXME: slice * depth
+		static int pitchB(int width, int border, Format format, bool target);
+		static int pitchP(int width, int border, Format format, bool target);
+		static int sliceB(int width, int height, int border, Format format, bool target);
+		static int sliceP(int width, int height, int border, Format format, bool target);
+		static unsigned int size(int width, int height, int depth, int border, Format format);   // FIXME: slice * depth
 
 		static bool isStencil(Format format);
 		static bool isDepth(Format format);
@@ -433,27 +435,27 @@
 			};
 		};
 
-		static void decodeR8G8B8(Buffer &destination, const Buffer &source);
-		static void decodeX1R5G5B5(Buffer &destination, const Buffer &source);
-		static void decodeA1R5G5B5(Buffer &destination, const Buffer &source);
-		static void decodeX4R4G4B4(Buffer &destination, const Buffer &source);
-		static void decodeA4R4G4B4(Buffer &destination, const Buffer &source);
-		static void decodeP8(Buffer &destination, const Buffer &source);
+		static void decodeR8G8B8(Buffer &destination, Buffer &source);
+		static void decodeX1R5G5B5(Buffer &destination, Buffer &source);
+		static void decodeA1R5G5B5(Buffer &destination, Buffer &source);
+		static void decodeX4R4G4B4(Buffer &destination, Buffer &source);
+		static void decodeA4R4G4B4(Buffer &destination, Buffer &source);
+		static void decodeP8(Buffer &destination, Buffer &source);
 
 		#if S3TC_SUPPORT
-		static void decodeDXT1(Buffer &internal, const Buffer &external);
-		static void decodeDXT3(Buffer &internal, const Buffer &external);
-		static void decodeDXT5(Buffer &internal, const Buffer &external);
+		static void decodeDXT1(Buffer &internal, Buffer &external);
+		static void decodeDXT3(Buffer &internal, Buffer &external);
+		static void decodeDXT5(Buffer &internal, Buffer &external);
 		#endif
-		static void decodeATI1(Buffer &internal, const Buffer &external);
-		static void decodeATI2(Buffer &internal, const Buffer &external);
-		static void decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned);
-		static void decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB);
-		static void decodeASTC(Buffer &internal, const Buffer &external, int xSize, int ySize, int zSize, bool isSRGB);
+		static void decodeATI1(Buffer &internal, Buffer &external);
+		static void decodeATI2(Buffer &internal, Buffer &external);
+		static void decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned);
+		static void decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB);
+		static void decodeASTC(Buffer &internal, Buffer &external, int xSize, int ySize, int zSize, bool isSRGB);
 
 		static void update(Buffer &destination, Buffer &source);
 		static void genericUpdate(Buffer &destination, Buffer &source);
-		static void *allocateBuffer(int width, int height, int depth, Format format);
+		static void *allocateBuffer(int width, int height, int depth, int border, Format format);
 		static void memfill4(void *buffer, int pattern, int bytes);
 
 		bool identicalFormats() const;
@@ -509,6 +511,11 @@
 		return external.depth;
 	}
 
+	int Surface::getBorder() const
+	{
+		return internal.border;
+	}
+
 	Format Surface::getFormat(bool internal) const
 	{
 		return internal ? getInternalFormat() : getExternalFormat();