CubeMap fixes

The following changes are done in this cl:
- Added multilayer support in Blitter::blit()
- Fixed Image::texelOffsetBytesInStorage() for cubes, since there's
  a 1 pixel border around it, so pixel offset (0,0,0) should be an
  extra line (1 pixel offset in Y) + 1 pixel (1 pixel offset in X)
  from the beginning of the buffer
- Added a number of out of bounds memory access asserts, which
  helped debug this issue.

Fixed failures in:
dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.generate_mipmaps.*
Note: The only failures left in that category are related to filtering
sRGB images, which most likely happens in the wrong colorspace.

Tests: dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.generate_mipmaps.*

Bug b/119620767

Change-Id: I6f18271cb23070eb8f2de3b1d5b35b7021c0ca8b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/27229
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 7648075..0f678dd 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -1515,10 +1515,7 @@
 			return;
 		}
 
-		if((region.srcSubresource.baseArrayLayer != 0) ||
-		   (region.dstSubresource.baseArrayLayer != 0) ||
-		   (region.srcSubresource.layerCount != 1) ||
-		   (region.dstSubresource.layerCount != 1) ||
+		if((region.srcSubresource.layerCount != region.dstSubresource.layerCount) ||
 		   (region.srcSubresource.aspectMask != region.dstSubresource.aspectMask))
 		{
 			UNIMPLEMENTED();
@@ -1545,7 +1542,7 @@
 		VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask);
 
 		State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), dst->getSampleCountFlagBits(),
-		            { filter != VK_FILTER_NEAREST, region.srcSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT, false });
+		            { filter != VK_FILTER_NEAREST, srcAspect == VK_IMAGE_ASPECT_STENCIL_BIT, false });
 		state.clampToEdge = (region.srcOffsets[0].x < 0) ||
 		                    (region.srcOffsets[0].y < 0) ||
 		                    (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) ||
@@ -1583,13 +1580,50 @@
 		VkOffset3D srcOffset = { 0, 0, region.srcOffsets[0].z };
 		VkOffset3D dstOffset = { 0, 0, region.dstOffsets[0].z };
 
-		for(int i = 0; i < numSlices; i++)
+		VkImageSubresourceLayers srcSubresLayers =
 		{
-			data.source = src->getTexelPointer(srcOffset, region.srcSubresource);
-			data.dest = dst->getTexelPointer(dstOffset, region.dstSubresource);
-			blitFunction(&data);
-			srcOffset.z++;
-			dstOffset.z++;
+			region.srcSubresource.aspectMask,
+			region.srcSubresource.mipLevel,
+			region.srcSubresource.baseArrayLayer,
+			1
+		};
+
+		VkImageSubresourceLayers dstSubresLayers =
+		{
+			region.dstSubresource.aspectMask,
+			region.dstSubresource.mipLevel,
+			region.dstSubresource.baseArrayLayer,
+			1
+		};
+
+		VkImageSubresourceRange srcSubresRange =
+		{
+			region.srcSubresource.aspectMask,
+			region.srcSubresource.mipLevel,
+			1,
+			region.srcSubresource.baseArrayLayer,
+			region.srcSubresource.layerCount
+		};
+
+		uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
+
+		for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++, dstSubresLayers.baseArrayLayer++)
+		{
+			srcOffset.z = region.srcOffsets[0].z;
+			dstOffset.z = region.dstOffsets[0].z;
+
+			for(int i = 0; i < numSlices; i++)
+			{
+				data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
+				data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
+
+				ASSERT(data.source < src->end());
+				ASSERT(data.dest < dst->end());
+
+				blitFunction(&data);
+				srcOffset.z++;
+				dstOffset.z++;
+			}
 		}
 	}
 }
diff --git a/src/Vulkan/VkBuffer.cpp b/src/Vulkan/VkBuffer.cpp
index 421a4a0..7f21ce4 100644
--- a/src/Vulkan/VkBuffer.cpp
+++ b/src/Vulkan/VkBuffer.cpp
@@ -107,7 +107,12 @@
 
 void* Buffer::getOffsetPointer(VkDeviceSize offset) const
 {
-	return reinterpret_cast<char*>(memory) + offset;
+	return reinterpret_cast<uint8_t*>(memory) + offset;
+}
+
+uint8_t* Buffer::end() const
+{
+	return reinterpret_cast<uint8_t*>(getOffsetPointer(size + 1));
 }
 
 } // namespace vk
diff --git a/src/Vulkan/VkBuffer.hpp b/src/Vulkan/VkBuffer.hpp
index 255b0f1..1338854 100644
--- a/src/Vulkan/VkBuffer.hpp
+++ b/src/Vulkan/VkBuffer.hpp
@@ -37,6 +37,7 @@
 	void fill(VkDeviceSize dstOffset, VkDeviceSize fillSize, uint32_t data);
 	void update(VkDeviceSize dstOffset, VkDeviceSize dataSize, const void* pData);
 	void* getOffsetPointer(VkDeviceSize offset) const;
+	uint8_t* end() const;
 
 	// DataOffset is the offset in bytes from the Buffer to the pointer to the
 	// buffer's data memory.
diff --git a/src/Vulkan/VkImage.cpp b/src/Vulkan/VkImage.cpp
index 90581e4..7c83f03 100644
--- a/src/Vulkan/VkImage.cpp
+++ b/src/Vulkan/VkImage.cpp
@@ -129,8 +129,8 @@
 	int srcBytesPerTexel = bytesPerTexel(srcAspect);
 	ASSERT(srcBytesPerTexel == dst->bytesPerTexel(dstAspect));
 
-	const char* srcMem = static_cast<const char*>(getTexelPointer(pRegion.srcOffset, pRegion.srcSubresource));
-	char* dstMem = static_cast<char*>(dst->getTexelPointer(pRegion.dstOffset, pRegion.dstSubresource));
+	const uint8_t* srcMem = static_cast<const uint8_t*>(getTexelPointer(pRegion.srcOffset, pRegion.srcSubresource));
+	uint8_t* dstMem = static_cast<uint8_t*>(dst->getTexelPointer(pRegion.dstOffset, pRegion.dstSubresource));
 
 	int srcRowPitchBytes = rowPitchBytes(srcAspect, pRegion.srcSubresource.mipLevel);
 	int srcSlicePitchBytes = slicePitchBytes(srcAspect, pRegion.srcSubresource.mipLevel);
@@ -158,36 +158,53 @@
 
 	if(isSingleLine) // Copy one line
 	{
-		memcpy(dstMem, srcMem, pRegion.extent.width * srcBytesPerTexel);
+		size_t copySize = pRegion.extent.width * srcBytesPerTexel;
+		ASSERT((srcMem + copySize) < end());
+		ASSERT((dstMem + copySize) < dst->end());
+		memcpy(dstMem, srcMem, copySize);
 	}
 	else if(isEntireLine && isSinglePlane) // Copy one plane
 	{
-		memcpy(dstMem, srcMem, pRegion.extent.height * srcRowPitchBytes);
+		size_t copySize = pRegion.extent.height * srcRowPitchBytes;
+		ASSERT((srcMem + copySize) < end());
+		ASSERT((dstMem + copySize) < dst->end());
+		memcpy(dstMem, srcMem, copySize);
 	}
 	else if(isEntirePlane) // Copy multiple planes
 	{
-		memcpy(dstMem, srcMem, pRegion.extent.depth * srcSlicePitchBytes);
+		size_t copySize = pRegion.extent.depth * srcSlicePitchBytes;
+		ASSERT((srcMem + copySize) < end());
+		ASSERT((dstMem + copySize) < dst->end());
+		memcpy(dstMem, srcMem, copySize);
 	}
 	else if(isEntireLine) // Copy plane by plane
 	{
+		size_t copySize = pRegion.extent.height * srcRowPitchBytes;
+
 		for(uint32_t z = 0; z < pRegion.extent.depth; z++, dstMem += dstSlicePitchBytes, srcMem += srcSlicePitchBytes)
 		{
-			memcpy(dstMem, srcMem, pRegion.extent.height * srcRowPitchBytes);
+			ASSERT((srcMem + copySize) < end());
+			ASSERT((dstMem + copySize) < dst->end());
+			memcpy(dstMem, srcMem, copySize);
 		}
 	}
 	else // Copy line by line
 	{
+		size_t copySize = pRegion.extent.width * srcBytesPerTexel;
+
 		for(uint32_t z = 0; z < pRegion.extent.depth; z++)
 		{
 			for(uint32_t y = 0; y < pRegion.extent.height; y++, dstMem += dstRowPitchBytes, srcMem += srcRowPitchBytes)
 			{
-				memcpy(dstMem, srcMem, pRegion.extent.width * srcBytesPerTexel);
+				ASSERT((srcMem + copySize) < end());
+				ASSERT((dstMem + copySize) < dst->end());
+				memcpy(dstMem, srcMem, copySize);
 			}
 		}
 	}
 }
 
-void Image::copy(VkBuffer buffer, const VkBufferImageCopy& region, bool bufferIsSource)
+void Image::copy(VkBuffer buf, const VkBufferImageCopy& region, bool bufferIsSource)
 {
 	if(!((region.imageSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) ||
 	     (region.imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) ||
@@ -220,67 +237,91 @@
 	bool isEntirePlane = isEntireLine && (region.imageExtent.height == mipLevelExtent.height) &&
 	                     (imageSlicePitchBytes == bufferSlicePitchBytes);
 
-	VkDeviceSize layerSize = getLayerSize(aspect);
-	char* bufferMemory = static_cast<char*>(Cast(buffer)->getOffsetPointer(region.bufferOffset));
-	char* imageMemory = static_cast<char*>(deviceMemory->getOffsetPointer(
-	                    getMemoryOffset(aspect, region.imageSubresource.mipLevel,
-	                                    region.imageSubresource.baseArrayLayer) +
-	                    texelOffsetBytesInStorage(region.imageOffset, region.imageSubresource)));
-	char* srcMemory = bufferIsSource ? bufferMemory : imageMemory;
-	char* dstMemory = bufferIsSource ? imageMemory : bufferMemory;
+	Buffer* buffer = Cast(buf);
+	uint8_t* bufferMemory = static_cast<uint8_t*>(buffer->getOffsetPointer(region.bufferOffset));
+	uint8_t* imageMemory = static_cast<uint8_t*>(deviceMemory->getOffsetPointer(
+	                       getMemoryOffset(aspect, region.imageSubresource.mipLevel,
+	                                       region.imageSubresource.baseArrayLayer) +
+	                       texelOffsetBytesInStorage(region.imageOffset, region.imageSubresource)));
+	uint8_t* srcMemory = bufferIsSource ? bufferMemory : imageMemory;
+	uint8_t* dstMemory = bufferIsSource ? imageMemory : bufferMemory;
 
 	VkDeviceSize copySize = 0;
+	VkDeviceSize bufferLayerSize = 0;
 	if(isSingleLine)
 	{
 		copySize = region.imageExtent.width * imageBytesPerTexel;
+		bufferLayerSize = copySize;
 	}
 	else if(isEntireLine && isSinglePlane)
 	{
 		copySize = region.imageExtent.height * imageRowPitchBytes;
+		bufferLayerSize = copySize;
 	}
 	else if(isEntirePlane)
 	{
 		copySize = region.imageExtent.depth * imageSlicePitchBytes; // Copy multiple planes
+		bufferLayerSize = copySize;
 	}
 	else if(isEntireLine) // Copy plane by plane
 	{
 		copySize = region.imageExtent.height * imageRowPitchBytes;
+		bufferLayerSize = copySize * region.imageExtent.depth;
 	}
 	else // Copy line by line
 	{
 		copySize = region.imageExtent.width * imageBytesPerTexel;
+		bufferLayerSize = copySize * region.imageExtent.depth * region.imageExtent.height;
 	}
 
+	VkDeviceSize imageLayerSize = getLayerSize(aspect);
+	VkDeviceSize srcLayerSize = bufferIsSource ? bufferLayerSize : imageLayerSize;
+	VkDeviceSize dstLayerSize = bufferIsSource ? imageLayerSize : bufferLayerSize;
+
 	for(uint32_t i = 0; i < region.imageSubresource.layerCount; i++)
 	{
 		if(isSingleLine || (isEntireLine && isSinglePlane) || isEntirePlane)
 		{
+			ASSERT(((bufferIsSource ? dstMemory : srcMemory) + copySize) < end());
+			ASSERT(((bufferIsSource ? srcMemory : dstMemory) + copySize) < buffer->end());
 			memcpy(dstMemory, srcMemory, copySize);
 		}
 		else if(isEntireLine) // Copy plane by plane
 		{
+			uint8_t* srcPlaneMemory = srcMemory;
+			uint8_t* dstPlaneMemory = dstMemory;
 			for(uint32_t z = 0; z < region.imageExtent.depth; z++)
 			{
-				memcpy(dstMemory, srcMemory, copySize);
-				srcMemory += srcSlicePitchBytes;
-				dstMemory += dstSlicePitchBytes;
+				ASSERT(((bufferIsSource ? dstPlaneMemory : srcPlaneMemory) + copySize) < end());
+				ASSERT(((bufferIsSource ? srcPlaneMemory : dstPlaneMemory) + copySize) < buffer->end());
+				memcpy(dstPlaneMemory, srcPlaneMemory, copySize);
+				srcPlaneMemory += srcSlicePitchBytes;
+				dstPlaneMemory += dstSlicePitchBytes;
 			}
 		}
 		else // Copy line by line
 		{
+			uint8_t* srcLayerMemory = srcMemory;
+			uint8_t* dstLayerMemory = dstMemory;
 			for(uint32_t z = 0; z < region.imageExtent.depth; z++)
 			{
+				uint8_t* srcPlaneMemory = srcLayerMemory;
+				uint8_t* dstPlaneMemory = dstLayerMemory;
 				for(uint32_t y = 0; y < region.imageExtent.height; y++)
 				{
-					memcpy(dstMemory, srcMemory, copySize);
-					srcMemory += srcRowPitchBytes;
-					dstMemory += dstRowPitchBytes;
+					ASSERT(((bufferIsSource ? dstPlaneMemory : srcPlaneMemory) + copySize) < end());
+					ASSERT(((bufferIsSource ? srcPlaneMemory : dstPlaneMemory) + copySize) < buffer->end());
+					memcpy(dstPlaneMemory, srcPlaneMemory, copySize);
+					srcPlaneMemory += srcRowPitchBytes;
+					dstPlaneMemory += dstRowPitchBytes;
 				}
+				srcLayerMemory += srcSlicePitchBytes;
+				dstLayerMemory += dstSlicePitchBytes;
 			}
 		}
 
-		srcMemory += layerSize;
-		dstMemory += layerSize;
+		srcMemory += srcLayerSize;
+		dstMemory += dstLayerSize;
 	}
 }
 
@@ -305,8 +346,8 @@
 {
 	VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
 	return offset.z * slicePitchBytes(aspect, subresource.mipLevel) +
-	       offset.y * rowPitchBytes(aspect, subresource.mipLevel) +
-	       offset.x * bytesPerTexel(aspect);
+	       (offset.y + (isCube() ? 1 : 0)) * rowPitchBytes(aspect, subresource.mipLevel) +
+	       (offset.x + (isCube() ? 1 : 0)) * bytesPerTexel(aspect);
 }
 
 VkExtent3D Image::getMipLevelExtent(uint32_t mipLevel) const
@@ -396,6 +437,11 @@
 	return (flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && (imageType == VK_IMAGE_TYPE_2D);
 }
 
+uint8_t* Image::end() const
+{
+	return reinterpret_cast<uint8_t*>(deviceMemory->getOffsetPointer(deviceMemory->getCommittedMemoryInBytes() + 1));
+}
+
 VkDeviceSize Image::getMemoryOffset(VkImageAspectFlagBits aspect) const
 {
 	switch(format)
diff --git a/src/Vulkan/VkImage.hpp b/src/Vulkan/VkImage.hpp
index 01bc517..fa6f180 100644
--- a/src/Vulkan/VkImage.hpp
+++ b/src/Vulkan/VkImage.hpp
@@ -64,6 +64,7 @@
 	int                      slicePitchBytes(VkImageAspectFlagBits aspect, uint32_t mipLevel) const;
 	void*                    getTexelPointer(const VkOffset3D& offset, const VkImageSubresourceLayers& subresource) const;
 	bool                     isCube() const;
+	uint8_t*                 end() const;
 
 private:
 	void copy(VkBuffer buffer, const VkBufferImageCopy& region, bool bufferIsSource);