Optimize copying image data for presentation

A highly generic Blitter::blitToBuffer() method was used for copying
pixels without requiring any format conversion or scaling.

This change removes Blitter::blitToBuffer() and the unused
blitFromBuffer() methods and implements a copyTo() method which does
straightforward memcpy() of the data.

Also consistently add 'To' and 'From' to the Image class's copy, blit,
and resolve methods to avoid any confusion about which direction the
transfer happens.

Benchmark results:

Run on (48 X 2594 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x24)
  L1 Instruction 32 KiB (x24)
  L2 Unified 256 KiB (x24)
  L3 Unified 30720 KiB (x2)
---------------------------------------------------------
Benchmark               Time             CPU   Iterations
---------------------------------------------------------
(LLVM, before)
Triangle/Hello       8.69 ms         7.39 ms          112

(LLVM, after)
Triangle/Hello      0.878 ms        0.454 ms         2133

(Subzero, before)
Triangle/Hello       26.2 ms         24.8 ms           41

(Subzero, after)
Triangle/Hello       1.11 ms        0.432 ms         1339

Bug: b/147967959
Change-Id: I76a103bbb6e582f987ef1c3cc39d17779ae4ac99
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/45650
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 4b20339..1f820e9 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -1724,128 +1724,22 @@
 	return cornerUpdateRoutine;
 }
 
-void Blitter::blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch)
+void Blitter::copy(const vk::Image *src, uint8_t *dst, unsigned int dstPitch)
 {
-	auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
-	auto format = src->getFormat(aspect);
-	State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
+	VkExtent3D extent = src->getMipLevelExtent(VK_IMAGE_ASPECT_COLOR_BIT, 0);
+	size_t rowBytes = src->getFormat(VK_IMAGE_ASPECT_COLOR_BIT).bytes() * extent.width;
+	unsigned int srcPitch = src->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
+	ASSERT(dstPitch >= rowBytes && srcPitch >= rowBytes && src->getMipLevelExtent(VK_IMAGE_ASPECT_COLOR_BIT, 0).height >= extent.height);
 
-	auto blitRoutine = getBlitRoutine(state);
-	if(!blitRoutine)
+	const uint8_t *s = (uint8_t *)src->getTexelPointer({ 0, 0, 0 }, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0 });
+	uint8_t *d = dst;
+
+	for(uint32_t y = 0; y < extent.height; y++)
 	{
-		return;
-	}
+		memcpy(d, s, rowBytes);
 
-	BlitData data = {
-		nullptr,                                             // source
-		dst,                                                 // dest
-		src->rowPitchBytes(aspect, subresource.mipLevel),    // sPitchB
-		bufferRowPitch,                                      // dPitchB
-		src->slicePitchBytes(aspect, subresource.mipLevel),  // sSliceB
-		bufferSlicePitch,                                    // dSliceB
-
-		0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f,
-
-		0,                                // x0d
-		static_cast<int>(extent.width),   // x1d
-		0,                                // y0d
-		static_cast<int>(extent.height),  // y1d
-		0,                                // z0d
-		static_cast<int>(extent.depth),   // z1d
-
-		static_cast<int>(extent.width),   // sWidth
-		static_cast<int>(extent.height),  // sHeight
-		static_cast<int>(extent.depth),   // sDepth
-
-		false,  // filter3D
-	};
-
-	VkImageSubresource srcSubres = {
-		subresource.aspectMask,
-		subresource.mipLevel,
-		subresource.baseArrayLayer
-	};
-
-	VkImageSubresourceRange srcSubresRange = {
-		subresource.aspectMask,
-		subresource.mipLevel,
-		1,
-		subresource.baseArrayLayer,
-		subresource.layerCount
-	};
-
-	uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
-
-	for(; srcSubres.arrayLayer <= lastLayer; srcSubres.arrayLayer++)
-	{
-		data.source = src->getTexelPointer({ 0, 0, 0 }, srcSubres);
-		ASSERT(data.source < src->end());
-		blitRoutine(&data);
-	}
-}
-
-void Blitter::blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch)
-{
-	auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
-	auto format = dst->getFormat(aspect);
-	State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
-
-	auto blitRoutine = getBlitRoutine(state);
-	if(!blitRoutine)
-	{
-		return;
-	}
-
-	BlitData data = {
-		src,                                                 // source
-		nullptr,                                             // dest
-		bufferRowPitch,                                      // sPitchB
-		dst->rowPitchBytes(aspect, subresource.mipLevel),    // dPitchB
-		bufferSlicePitch,                                    // sSliceB
-		dst->slicePitchBytes(aspect, subresource.mipLevel),  // dSliceB
-
-		static_cast<float>(-offset.x),  // x0
-		static_cast<float>(-offset.y),  // y0
-		static_cast<float>(-offset.z),  // z0
-		1.0f,                           // w
-		1.0f,                           // h
-		1.0f,                           // d
-
-		offset.x,                                    // x0d
-		static_cast<int>(offset.x + extent.width),   // x1d
-		offset.y,                                    // y0d
-		static_cast<int>(offset.y + extent.height),  // y1d
-		offset.z,                                    // z0d
-		static_cast<int>(offset.z + extent.depth),   // z1d
-
-		static_cast<int>(extent.width),   // sWidth
-		static_cast<int>(extent.height),  // sHeight;
-		static_cast<int>(extent.depth),   // sDepth;
-
-		false,  // filter3D
-	};
-
-	VkImageSubresource dstSubres = {
-		subresource.aspectMask,
-		subresource.mipLevel,
-		subresource.baseArrayLayer
-	};
-
-	VkImageSubresourceRange dstSubresRange = {
-		subresource.aspectMask,
-		subresource.mipLevel,
-		1,
-		subresource.baseArrayLayer,
-		subresource.layerCount
-	};
-
-	uint32_t lastLayer = dst->getLastLayerIndex(dstSubresRange);
-
-	for(; dstSubres.arrayLayer <= lastLayer; dstSubres.arrayLayer++)
-	{
-		data.dest = dst->getTexelPointer({ 0, 0, 0 }, dstSubres);
-		ASSERT(data.dest < dst->end());
-		blitRoutine(&data);
+		s += srcPitch;
+		d += dstPitch;
 	}
 }
 
diff --git a/src/Device/Blitter.hpp b/src/Device/Blitter.hpp
index 316d7ec..2205c2b 100644
--- a/src/Device/Blitter.hpp
+++ b/src/Device/Blitter.hpp
@@ -144,8 +144,7 @@
 	void clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea = nullptr);
 
 	void blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter);
-	void blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch);
-	void blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch);
+	void copy(const vk::Image *src, uint8_t *dst, unsigned int dstPitch);
 
 	void updateBorders(vk::Image *image, const VkImageSubresource &subresource);
 
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index f6075b0..e99d6f0 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -970,7 +970,7 @@
 
 	void play(vk::CommandBuffer::ExecutionState &executionState) override
 	{
-		srcImage->blit(dstImage, region, filter);
+		srcImage->blitTo(dstImage, region, filter);
 	}
 
 	std::string description() override { return "vkCmdBlitImage()"; }
@@ -994,7 +994,7 @@
 
 	void play(vk::CommandBuffer::ExecutionState &executionState) override
 	{
-		srcImage->resolve(dstImage, region);
+		srcImage->resolveTo(dstImage, region);
 	}
 
 	std::string description() override { return "vkCmdBlitImage()"; }
diff --git a/src/Vulkan/VkImage.cpp b/src/Vulkan/VkImage.cpp
index 1995d78..11c474e 100644
--- a/src/Vulkan/VkImage.cpp
+++ b/src/Vulkan/VkImage.cpp
@@ -886,9 +886,10 @@
 	return (decompressedImage && isImageViewCompressed) ? decompressedImage : this;
 }
 
-void Image::blit(Image *dstImage, const VkImageBlit &region, VkFilter filter) const
+void Image::blitTo(Image *dstImage, const VkImageBlit &region, VkFilter filter) const
 {
 	device->getBlitter()->blit(this, dstImage, region, filter);
+
 	VkImageSubresourceRange subresourceRange = {
 		region.dstSubresource.aspectMask,
 		region.dstSubresource.mipLevel,
@@ -899,12 +900,12 @@
 	dstImage->prepareForSampling(subresourceRange);
 }
 
-void Image::blitToBuffer(VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch) const
+void Image::copyTo(uint8_t *dst, unsigned int dstPitch) const
 {
-	device->getBlitter()->blitToBuffer(this, subresource, offset, extent, dst, bufferRowPitch, bufferSlicePitch);
+	device->getBlitter()->copy(this, dst, dstPitch);
 }
 
-void Image::resolve(Image *dstImage, const VkImageResolve &region) const
+void Image::resolveTo(Image *dstImage, const VkImageResolve &region) const
 {
 	VkImageBlit blitRegion;
 
diff --git a/src/Vulkan/VkImage.hpp b/src/Vulkan/VkImage.hpp
index e29efb0..2ae171b 100644
--- a/src/Vulkan/VkImage.hpp
+++ b/src/Vulkan/VkImage.hpp
@@ -58,9 +58,9 @@
 	void copyTo(Buffer *dstBuffer, const VkBufferImageCopy &region);
 	void copyFrom(Buffer *srcBuffer, const VkBufferImageCopy &region);
 
-	void blit(Image *dstImage, const VkImageBlit &region, VkFilter filter) const;
-	void blitToBuffer(VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch) const;
-	void resolve(Image *dstImage, const VkImageResolve &region) const;
+	void blitTo(Image *dstImage, const VkImageBlit &region, VkFilter filter) const;
+	void copyTo(uint8_t *dst, unsigned int dstPitch) const;
+	void resolveTo(Image *dstImage, const VkImageResolve &region) const;
 	void clear(const VkClearValue &clearValue, const vk::Format &viewFormat, const VkRect2D &renderArea, const VkImageSubresourceRange &subresourceRange);
 	void clear(const VkClearColorValue &color, const VkImageSubresourceRange &subresourceRange);
 	void clear(const VkClearDepthStencilValue &color, const VkImageSubresourceRange &subresourceRange);
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index bfbe1ba..eeb2dc6 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -40,16 +40,16 @@
 {
 	spvtools::Optimizer opt{ SPV_ENV_VULKAN_1_1 };
 
-	opt.SetMessageConsumer([](spv_message_level_t level, const char *, const spv_position_t &p, const char *m) {
+	opt.SetMessageConsumer([](spv_message_level_t level, const char *source, const spv_position_t &position, const char *message) {
 		switch(level)
 		{
-			case SPV_MSG_FATAL: sw::warn("SPIR-V FATAL: %d:%d %s\n", int(p.line), int(p.column), m);
-			case SPV_MSG_INTERNAL_ERROR: sw::warn("SPIR-V INTERNAL_ERROR: %d:%d %s\n", int(p.line), int(p.column), m);
-			case SPV_MSG_ERROR: sw::warn("SPIR-V ERROR: %d:%d %s\n", int(p.line), int(p.column), m);
-			case SPV_MSG_WARNING: sw::warn("SPIR-V WARNING: %d:%d %s\n", int(p.line), int(p.column), m);
-			case SPV_MSG_INFO: sw::trace("SPIR-V INFO: %d:%d %s\n", int(p.line), int(p.column), m);
-			case SPV_MSG_DEBUG: sw::trace("SPIR-V DEBUG: %d:%d %s\n", int(p.line), int(p.column), m);
-			default: sw::trace("SPIR-V MESSAGE: %d:%d %s\n", int(p.line), int(p.column), m);
+			case SPV_MSG_FATAL: sw::warn("SPIR-V FATAL: %d:%d %s\n", int(position.line), int(position.column), message);
+			case SPV_MSG_INTERNAL_ERROR: sw::warn("SPIR-V INTERNAL_ERROR: %d:%d %s\n", int(position.line), int(position.column), message);
+			case SPV_MSG_ERROR: sw::warn("SPIR-V ERROR: %d:%d %s\n", int(position.line), int(position.column), message);
+			case SPV_MSG_WARNING: sw::warn("SPIR-V WARNING: %d:%d %s\n", int(position.line), int(position.column), message);
+			case SPV_MSG_INFO: sw::trace("SPIR-V INFO: %d:%d %s\n", int(position.line), int(position.column), message);
+			case SPV_MSG_DEBUG: sw::trace("SPIR-V DEBUG: %d:%d %s\n", int(position.line), int(position.column), message);
+			default: sw::trace("SPIR-V MESSAGE: %d:%d %s\n", int(position.line), int(position.column), message);
 		}
 	});
 
diff --git a/src/WSI/Win32SurfaceKHR.cpp b/src/WSI/Win32SurfaceKHR.cpp
index c4c156b..6e067b3 100644
--- a/src/WSI/Win32SurfaceKHR.cpp
+++ b/src/WSI/Win32SurfaceKHR.cpp
@@ -95,11 +95,7 @@
 		return VK_ERROR_OUT_OF_DATE_KHR;
 	}
 
-	VkImageSubresourceLayers subresourceLayers{};
-	subresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-	subresourceLayers.layerCount = 1;
-
-	image->getImage()->blitToBuffer(subresourceLayers, VkOffset3D{}, extent, reinterpret_cast<uint8_t *>(framebuffer), bitmapRowPitch, 0);
+	image->getImage()->copyTo(reinterpret_cast<uint8_t *>(framebuffer), bitmapRowPitch);
 
 	StretchBlt(windowContext, 0, 0, extent.width, extent.height, bitmapContext, 0, 0, extent.width, extent.height, SRCCOPY);
 
diff --git a/src/WSI/Win32SurfaceKHR.hpp b/src/WSI/Win32SurfaceKHR.hpp
index 0fca5dd..0c27465 100644
--- a/src/WSI/Win32SurfaceKHR.hpp
+++ b/src/WSI/Win32SurfaceKHR.hpp
@@ -55,7 +55,7 @@
 	VkExtent2D windowExtent = {};
 
 	HBITMAP bitmap = {};
-	int bitmapRowPitch = 0;
+	unsigned int bitmapRowPitch = 0;
 	void *framebuffer = nullptr;
 };