Exclude padding bytes from image size limit

Surface::size() adds 4 padding bytes due to reading byte4 as 8 bytes for
efficient unpacking to short4 (less efficient prior to SSE 4.1 pmovzxbw
instruction). This padding is never addressed separately so the offset
can't overflow a signed 32-bit integer if we keep the total size minus
the padding below 2 GiB.

On the OpenGL side we currently impose a further limit of 1 GiB, but
this should also take the padding into account to allow for effective
image data up to this limit (e.g.  8192 * 8192 of four samples of 32-bit
elements). Note that our scanline limit is also 8192 so allowing more
than 1 GiB would run into that limit instead.

Bug: chromium:835299
Bug: b/145229887
Change-Id: I4280fca88584235c0da6282ca622ee15b31bc34b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39690
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/OpenGL/common/Image.cpp b/src/OpenGL/common/Image.cpp
index 138f371..2052041 100644
--- a/src/OpenGL/common/Image.cpp
+++ b/src/OpenGL/common/Image.cpp
@@ -635,9 +635,12 @@
 
 namespace egl
 {
-	// We assume the data can be indexed with a signed 32-bit offset, including any padding,
-	// so we must keep the image size reasonable. 1 GiB ought to be enough for anybody.
-	enum { IMPLEMENTATION_MAX_IMAGE_SIZE_BYTES = 0x40000000 };
+	// We assume the image data can be indexed with a signed 32-bit offset,
+	// so we must keep the size reasonable. 1 GiB ought to be enough for anybody.
+	// 4 extra bytes account for the padding added in Surface::size().
+	// They are not addressed separately, so can't cause overflow.
+	// TODO(b/145229887): Eliminate or don't hard-code the padding bytes.
+	enum : uint64_t { IMPLEMENTATION_MAX_IMAGE_SIZE_BYTES = 0x40000000u + 4 };
 
 	enum TransferType
 	{
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index 3de3cfe..529cf09 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -2670,14 +2670,18 @@
 			{
 				uint64_t size = (uint64_t)sliceB(width, height, border, format, true) * depth * samples;
 
-				// FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
+				// We can only sample buffers smaller than 2 GiB, due to signed 32-bit offset calculations.
+				// Force an out-of-memory if larger, or let the caller report an error.
+				if(size >= 0x80000000u)
+				{
+					return std::numeric_limits<size_t>::max();
+				}
+
+				// Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
 				// and stencil operations also read 8 bytes per four 8-bit stencil values,
 				// so we have to allocate 4 extra bytes to avoid buffer overruns.
-				size += 4;
-
-				// We can only sample buffers smaller than 2 GiB.
-				// Force an out-of-memory if larger, or let the caller report an error.
-				return size < 0x80000000u ? (size_t)size : std::numeric_limits<size_t>::max();
+			    // TODO(b/145229887): Eliminate if possible, or don't hard-code.
+				return size + 4;
 			}
 		case FORMAT_YV12_BT601:
 		case FORMAT_YV12_BT709:
diff --git a/tests/GLESUnitTests/unittests.cpp b/tests/GLESUnitTests/unittests.cpp
index 2b01528..6edcb0e 100644
--- a/tests/GLESUnitTests/unittests.cpp
+++ b/tests/GLESUnitTests/unittests.cpp
@@ -1331,6 +1331,12 @@
 		glTexImage3D(GL_TEXTURE_3D, 0, GL_RGBA32F, width, height, depth, 0, GL_RGBA, GL_FLOAT, nullptr);
 		EXPECT_GLENUM_EQ(GL_OUT_OF_MEMORY, glGetError());
 
+		// b/145229887: Allocating an image of exactly 1 GiB should succeed.
+		const int width8k = 8192;
+		const int height8k = 8192;
+		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, width8k, height8k, 0, GL_RGBA, GL_FLOAT, nullptr);
+		EXPECT_NO_GL_ERROR();
+
 		// The spec states that the GL is in an undefined state when GL_OUT_OF_MEMORY
 		// is returned, and the context must be recreated before attempting more rendering.
 		Uninitialize();