Image dirtiness

Instead of updating images every time we write to them,
this cl adds a dirtiness mechanism, which will mark an
Image object as dirty for specific unique subresource
regions and then only actually perform the required
task (cube border update, decompression, ...) once the
image memory is accessed for reading.

Bug: b/159045555
Change-Id: I791a69529e4c2e78aa6562251020aa0bf978bd01
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/45188
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index fd8e26f..97b91f6 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -152,6 +152,7 @@
 			}
 		}
 	}
+	dest->contentsChanged(subresourceRange);
 }
 
 bool Blitter::fastClear(void *clearValue, vk::Format clearFormat, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
@@ -306,6 +307,7 @@
 			}
 		}
 	}
+	dest->contentsChanged(subresourceRange);
 
 	return true;
 }
@@ -1889,6 +1891,8 @@
 
 		blitRoutine(&data);
 	}
+
+	dst->contentsChanged(dstSubresRange);
 }
 
 void Blitter::computeCubeCorner(Pointer<Byte> &layer, Int &x0, Int &x1, Int &y0, Int &y1, Int &pitchB, const State &state)
diff --git a/src/Device/Context.hpp b/src/Device/Context.hpp
index a54ec2d..1c31201 100644
--- a/src/Device/Context.hpp
+++ b/src/Device/Context.hpp
@@ -107,6 +107,7 @@
 	VkFormat renderTargetInternalFormat(int index) const;
 	int colorWriteActive(int index) const;
 
+	vk::DescriptorSet::Array descriptorSetObjects = {};
 	vk::DescriptorSet::Bindings descriptorSets = {};
 	vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets = {};
 	Stream input[MAX_INTERFACE_COMPONENTS / 4];
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index 22f10f8..825c09f 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -27,9 +27,11 @@
 #include "System/Memory.hpp"
 #include "System/Timer.hpp"
 #include "Vulkan/VkConfig.hpp"
+#include "Vulkan/VkDescriptorSet.hpp"
 #include "Vulkan/VkDevice.hpp"
 #include "Vulkan/VkFence.hpp"
 #include "Vulkan/VkImageView.hpp"
+#include "Vulkan/VkPipelineLayout.hpp"
 #include "Vulkan/VkQueryPool.hpp"
 
 #include "marl/containers.h"
@@ -206,6 +208,9 @@
 		pixelRoutine = pixelProcessor.routine(pixelState, context->pipelineLayout, context->pixelShader, context->descriptorSets);
 	}
 
+	draw->containsImageWrite = (context->vertexShader && context->vertexShader->containsImageWrite()) ||
+	                           (context->pixelShader && context->pixelShader->containsImageWrite());
+
 	DrawCall::SetupFunction setupPrimitives = nullptr;
 	int ms = context->sampleCount;
 	unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
@@ -249,6 +254,8 @@
 	draw->provokingVertexMode = context->provokingVertexMode;
 	draw->indexType = indexType;
 	draw->lineRasterizationMode = context->lineRasterizationMode;
+	draw->descriptorSetObjects = context->descriptorSetObjects;
+	draw->pipelineLayout = context->pipelineLayout;
 
 	draw->vertexRoutine = vertexRoutine;
 	draw->setupRoutine = setupRoutine;
@@ -382,6 +389,8 @@
 
 	draw->events = events;
 
+	vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->pipelineLayout);
+
 	DrawCall::run(draw, &drawTickets, clusterQueues);
 }
 
@@ -418,6 +427,19 @@
 	vertexRoutine = {};
 	setupRoutine = {};
 	pixelRoutine = {};
+
+	for(auto *rt : renderTarget)
+	{
+		if(rt)
+		{
+			rt->contentsChanged();
+		}
+	}
+
+	if(containsImageWrite)
+	{
+		vk::DescriptorSet::ContentsChanged(descriptorSetObjects, pipelineLayout);
+	}
 }
 
 void DrawCall::run(const marl::Loan<DrawCall> &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp
index f3ccec5..367959f 100644
--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -37,6 +37,7 @@
 class DescriptorSet;
 class Device;
 class Query;
+class PipelineLayout;
 
 }  // namespace vk
 
@@ -157,6 +158,7 @@
 	VertexProcessor::RoutineType vertexRoutine;
 	SetupProcessor::RoutineType setupRoutine;
 	PixelProcessor::RoutineType pixelRoutine;
+	bool containsImageWrite;
 
 	SetupFunction setupPrimitives;
 	SetupProcessor::State setupState;
@@ -164,6 +166,8 @@
 	vk::ImageView *renderTarget[RENDERTARGETS];
 	vk::ImageView *depthBuffer;
 	vk::ImageView *stencilBuffer;
+	vk::DescriptorSet::Array descriptorSetObjects;
+	const vk::PipelineLayout *pipelineLayout;
 	TaskEvents *events;
 
 	vk::Query *occlusionQuery;
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index 2f58f0e..6d8e890 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -205,6 +205,7 @@
 }
 
 void ComputeProgram::run(
+    vk::DescriptorSet::Array const &descriptorSetObjects,
     vk::DescriptorSet::Bindings const &descriptorSets,
     vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
     PushConstantStorage const &pushConstants,
@@ -297,6 +298,11 @@
 	}
 
 	wg.wait();
+
+	if(shader->containsImageWrite())
+	{
+		vk::DescriptorSet::ContentsChanged(descriptorSetObjects, pipelineLayout);
+	}
 }
 
 }  // namespace sw
diff --git a/src/Pipeline/ComputeProgram.hpp b/src/Pipeline/ComputeProgram.hpp
index 9b93f4e..eae0d88 100644
--- a/src/Pipeline/ComputeProgram.hpp
+++ b/src/Pipeline/ComputeProgram.hpp
@@ -54,6 +54,7 @@
 
 	// run executes the compute shader routine for all workgroups.
 	void run(
+	    vk::DescriptorSet::Array const &descriptorSetObjects,
 	    vk::DescriptorSet::Bindings const &descriptorSetBindings,
 	    vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
 	    PushConstantStorage const &pushConstants,
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 793a1ef..8a88646 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -782,6 +782,8 @@
 	void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
 	void emitEpilog(SpirvRoutine *routine) const;
 
+	bool containsImageWrite() const { return imageWriteEmitted; }
+
 	using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
 	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins;
 	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins;
@@ -798,6 +800,7 @@
 	HandleMap<Extension> extensionsByID;
 	std::unordered_set<Extension::Name> extensionsImported;
 	Function::ID entryPoint;
+	mutable bool imageWriteEmitted = false;
 
 	const bool robustBufferAccess = true;
 	spv::ExecutionModel executionModel = spv::ExecutionModelMax;  // Invalid prior to OpEntryPoint parsing.
diff --git a/src/Pipeline/SpirvShaderImage.cpp b/src/Pipeline/SpirvShaderImage.cpp
index f86a296..daa2a4b 100644
--- a/src/Pipeline/SpirvShaderImage.cpp
+++ b/src/Pipeline/SpirvShaderImage.cpp
@@ -988,6 +988,8 @@
 
 SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState *state) const
 {
+	imageWriteEmitted = true;
+
 	auto imageId = Object::ID(insn.word(1));
 	auto &image = getObject(imageId);
 	auto &imageType = getType(image);
diff --git a/src/Vulkan/BUILD.gn b/src/Vulkan/BUILD.gn
index d73f2d8..ec6cd2b 100644
--- a/src/Vulkan/BUILD.gn
+++ b/src/Vulkan/BUILD.gn
@@ -112,6 +112,7 @@
     "VkCommandBuffer.cpp",
     "VkCommandPool.cpp",
     "VkDescriptorPool.cpp",
+    "VkDescriptorSet.cpp",
     "VkDescriptorSetLayout.cpp",
     "VkDescriptorUpdateTemplate.cpp",
     "VkDevice.cpp",
diff --git a/src/Vulkan/CMakeLists.txt b/src/Vulkan/CMakeLists.txt
index 0117533..9944fa3 100644
--- a/src/Vulkan/CMakeLists.txt
+++ b/src/Vulkan/CMakeLists.txt
@@ -33,6 +33,7 @@
     VkConfig.hpp
     VkDescriptorPool.cpp
     VkDescriptorPool.hpp
+    VkDescriptorSet.cpp
     VkDescriptorSet.hpp
     VkDescriptorSetLayout.cpp
     VkDescriptorSetLayout.hpp
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index e99d6f0..a01e5a2 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -186,6 +186,7 @@
 		vk::ComputePipeline *pipeline = static_cast<vk::ComputePipeline *>(pipelineState.pipeline);
 		pipeline->run(baseGroupX, baseGroupY, baseGroupZ,
 		              groupCountX, groupCountY, groupCountZ,
+		              pipelineState.descriptorSetObjects,
 		              pipelineState.descriptorSets,
 		              pipelineState.descriptorDynamicOffsets,
 		              executionState.pushConstants);
@@ -219,6 +220,7 @@
 
 		auto pipeline = static_cast<vk::ComputePipeline *>(pipelineState.pipeline);
 		pipeline->run(0, 0, 0, cmd->x, cmd->y, cmd->z,
+		              pipelineState.descriptorSetObjects,
 		              pipelineState.descriptorSets,
 		              pipelineState.descriptorDynamicOffsets,
 		              executionState.pushConstants);
@@ -534,6 +536,7 @@
 
 		executionState.bindVertexInputs(context, firstInstance);
 
+		context.descriptorSetObjects = pipelineState.descriptorSetObjects;
 		context.descriptorSets = pipelineState.descriptorSets;
 		context.descriptorDynamicOffsets = pipelineState.descriptorDynamicOffsets;
 
@@ -1100,6 +1103,8 @@
 	{
 		for(uint32_t i = 0; i < descriptorSetCount; i++)
 		{
+			// We need both a descriptor set object for updates and a descriptor set data pointer for routines
+			descriptorSetObjects[firstSet + i] = vk::Cast(pDescriptorSets[i]);
 			descriptorSets[firstSet + i] = vk::Cast(pDescriptorSets[i])->data;
 		}
 
@@ -1119,6 +1124,7 @@
 
 		for(uint32_t i = firstSet; i < firstSet + descriptorSetCount; i++)
 		{
+			pipelineState.descriptorSetObjects[i] = descriptorSetObjects[i];
 			pipelineState.descriptorSets[i] = descriptorSets[i];
 		}
 
@@ -1137,6 +1143,7 @@
 	const uint32_t firstDynamicOffset;
 	const uint32_t dynamicOffsetCount;
 
+	vk::DescriptorSet::Array descriptorSetObjects;
 	vk::DescriptorSet::Bindings descriptorSets;
 	vk::DescriptorSet::DynamicOffsets dynamicOffsets;
 };
diff --git a/src/Vulkan/VkCommandBuffer.hpp b/src/Vulkan/VkCommandBuffer.hpp
index f209102..10e2b06 100644
--- a/src/Vulkan/VkCommandBuffer.hpp
+++ b/src/Vulkan/VkCommandBuffer.hpp
@@ -139,6 +139,7 @@
 		struct PipelineState
 		{
 			Pipeline *pipeline = nullptr;
+			vk::DescriptorSet::Array descriptorSetObjects = {};
 			vk::DescriptorSet::Bindings descriptorSets = {};
 			vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets = {};
 		};
diff --git a/src/Vulkan/VkDescriptorPool.cpp b/src/Vulkan/VkDescriptorPool.cpp
index 02754e7c..b0ee5db 100644
--- a/src/Vulkan/VkDescriptorPool.cpp
+++ b/src/Vulkan/VkDescriptorPool.cpp
@@ -22,11 +22,6 @@
 
 namespace {
 
-inline VkDescriptorSet asDescriptorSet(uint8_t *memory)
-{
-	return vk::TtoVkT<vk::DescriptorSet, VkDescriptorSet>(reinterpret_cast<vk::DescriptorSet *>(memory));
-}
-
 inline uint8_t *asMemory(VkDescriptorSet descriptorSet)
 {
 	return reinterpret_cast<uint8_t *>(vk::Cast(descriptorSet));
@@ -143,7 +138,7 @@
 		{
 			for(uint32_t i = 0; i < numAllocs; i++)
 			{
-				pDescriptorSets[i] = asDescriptorSet(memory);
+				pDescriptorSets[i] = *(new(memory) DescriptorSet());
 				nodes.insert(Node(memory, sizes[i]));
 				memory += sizes[i];
 			}
@@ -158,7 +153,7 @@
 		uint8_t *memory = findAvailableMemory(sizes[i]);
 		if(memory)
 		{
-			pDescriptorSets[i] = asDescriptorSet(memory);
+			pDescriptorSets[i] = *(new(memory) DescriptorSet());
 		}
 		else
 		{
diff --git a/src/Vulkan/VkDescriptorSet.cpp b/src/Vulkan/VkDescriptorSet.cpp
new file mode 100644
index 0000000..5cce513
--- /dev/null
+++ b/src/Vulkan/VkDescriptorSet.cpp
@@ -0,0 +1,89 @@
+// Copyright 2020 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "VkDescriptorSet.hpp"
+#include "VkImageView.hpp"
+#include "VkPipelineLayout.hpp"
+
+namespace vk {
+
+void DescriptorSet::ParseDescriptors(const Array &descriptorSets, const PipelineLayout *layout, NotificationType notificationType)
+{
+	if(layout)
+	{
+		uint32_t descriptorSetCount = layout->getDescriptorSetCount();
+		ASSERT(descriptorSetCount <= MAX_BOUND_DESCRIPTOR_SETS);
+
+		for(uint32_t i = 0; i < descriptorSetCount; ++i)
+		{
+			DescriptorSet *descriptorSet = descriptorSets[i];
+			if(!descriptorSet)
+			{
+				continue;
+			}
+
+			marl::lock lock(descriptorSet->header.mutex);
+			uint32_t bindingCount = layout->getBindingCount(i);
+			for(uint32_t j = 0; j < bindingCount; ++j)
+			{
+				VkDescriptorType type = layout->getDescriptorType(i, j);
+				uint32_t descriptorCount = layout->getDescriptorCount(i, j);
+				uint32_t descriptorSize = layout->getDescriptorSize(i, j);
+				uint8_t *descriptorMemory = descriptorSet->data + layout->getBindingOffset(i, j);
+
+				for(uint32_t k = 0; k < descriptorCount; k++)
+				{
+					ImageView *memoryOwner = nullptr;
+					switch(type)
+					{
+						case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+						case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+							memoryOwner = reinterpret_cast<SampledImageDescriptor *>(descriptorMemory)->memoryOwner;
+							break;
+						case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+						case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+							memoryOwner = reinterpret_cast<StorageImageDescriptor *>(descriptorMemory)->memoryOwner;
+							break;
+						default:
+							break;
+					}
+					if(memoryOwner)
+					{
+						if(notificationType == PREPARE_FOR_SAMPLING)
+						{
+							memoryOwner->prepareForSampling();
+						}
+						else if((notificationType == CONTENTS_CHANGED) && (type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE))
+						{
+							memoryOwner->contentsChanged();
+						}
+					}
+					descriptorMemory += descriptorSize;
+				}
+			}
+		}
+	}
+}
+
+void DescriptorSet::ContentsChanged(const Array &descriptorSets, const PipelineLayout *layout)
+{
+	ParseDescriptors(descriptorSets, layout, CONTENTS_CHANGED);
+}
+
+void DescriptorSet::PrepareForSampling(const Array &descriptorSets, const PipelineLayout *layout)
+{
+	ParseDescriptors(descriptorSets, layout, PREPARE_FOR_SAMPLING);
+}
+
+}  // namespace vk
\ No newline at end of file
diff --git a/src/Vulkan/VkDescriptorSet.hpp b/src/Vulkan/VkDescriptorSet.hpp
index d0564ef..3ef058b 100644
--- a/src/Vulkan/VkDescriptorSet.hpp
+++ b/src/Vulkan/VkDescriptorSet.hpp
@@ -16,6 +16,9 @@
 #define VK_DESCRIPTOR_SET_HPP_
 
 // Intentionally not including VkObject.hpp here due to b/127920555
+#include "VkConfig.hpp"
+
+#include "marl/mutex.h"
 
 #include <array>
 #include <cstdint>
@@ -24,10 +27,12 @@
 namespace vk {
 
 class DescriptorSetLayout;
+class PipelineLayout;
 
 struct alignas(16) DescriptorSetHeader
 {
 	DescriptorSetLayout *layout;
+	marl::mutex mutex;
 };
 
 class alignas(16) DescriptorSet
@@ -38,11 +43,28 @@
 		return static_cast<DescriptorSet *>(static_cast<void *>(object));
 	}
 
+	operator VkDescriptorSet()
+	{
+		return { static_cast<uint64_t>(reinterpret_cast<uintptr_t>(this)) };
+	}
+
+	using Array = std::array<DescriptorSet *, vk::MAX_BOUND_DESCRIPTOR_SETS>;
 	using Bindings = std::array<uint8_t *, vk::MAX_BOUND_DESCRIPTOR_SETS>;
 	using DynamicOffsets = std::array<uint32_t, vk::MAX_DESCRIPTOR_SET_COMBINED_BUFFERS_DYNAMIC>;
 
+	static void ContentsChanged(const Array &descriptorSets, const PipelineLayout *layout);
+	static void PrepareForSampling(const Array &descriptorSets, const PipelineLayout *layout);
+
 	DescriptorSetHeader header;
 	alignas(16) uint8_t data[1];
+
+private:
+	enum NotificationType
+	{
+		CONTENTS_CHANGED,
+		PREPARE_FOR_SAMPLING
+	};
+	static void ParseDescriptors(const Array &descriptorSets, const PipelineLayout *layout, NotificationType notificationType);
 };
 
 inline DescriptorSet *Cast(VkDescriptorSet object)
diff --git a/src/Vulkan/VkDescriptorSetLayout.cpp b/src/Vulkan/VkDescriptorSetLayout.cpp
index 12bf32b..b24781a 100644
--- a/src/Vulkan/VkDescriptorSetLayout.cpp
+++ b/src/Vulkan/VkDescriptorSetLayout.cpp
@@ -20,6 +20,8 @@
 #include "VkImageView.hpp"
 #include "VkSampler.hpp"
 
+#include "Reactor/Reactor.hpp"
+
 #include <algorithm>
 #include <cstddef>
 #include <cstring>
@@ -128,7 +130,7 @@
 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
 			return static_cast<uint32_t>(sizeof(BufferDescriptor));
 		default:
-			UNSUPPORTED("Unsupported Descriptor Type");
+			UNSUPPORTED("Unsupported Descriptor Type: %d", int(type));
 			return 0;
 	}
 }
@@ -142,7 +144,7 @@
 size_t DescriptorSetLayout::getDescriptorSetAllocationSize() const
 {
 	// vk::DescriptorSet has a header with a pointer to the layout.
-	return sw::align<alignof(DescriptorSet)>(offsetof(DescriptorSet, data) + getDescriptorSetDataSize());
+	return sw::align<alignof(DescriptorSet)>(OFFSET(DescriptorSet, data) + getDescriptorSetDataSize());
 }
 
 size_t DescriptorSetLayout::getDescriptorSetDataSize() const
@@ -158,6 +160,8 @@
 
 void DescriptorSetLayout::initialize(DescriptorSet *descriptorSet)
 {
+	ASSERT(descriptorSet->header.layout == nullptr);
+
 	// Use a pointer to this descriptor set layout as the descriptor set's header
 	descriptorSet->header.layout = this;
 	uint8_t *mem = descriptorSet->data;
@@ -343,6 +347,7 @@
 			imageSampler[i].swizzle = imageView->getComponentMapping();
 			imageSampler[i].format = format;
 			imageSampler[i].device = device;
+			imageSampler[i].memoryOwner = imageView;
 
 			auto &subresourceRange = imageView->getSubresourceRange();
 
@@ -436,6 +441,7 @@
 			descriptor[i].arrayLayers = imageView->getSubresourceRange().layerCount;
 			descriptor[i].sampleCount = imageView->getSampleCount();
 			descriptor[i].sizeInBytes = static_cast<int>(imageView->getSizeInBytes());
+			descriptor[i].memoryOwner = imageView;
 
 			if(imageView->getFormat().isStencil())
 			{
diff --git a/src/Vulkan/VkDescriptorSetLayout.hpp b/src/Vulkan/VkDescriptorSetLayout.hpp
index b82490a..7ab3439 100644
--- a/src/Vulkan/VkDescriptorSetLayout.hpp
+++ b/src/Vulkan/VkDescriptorSetLayout.hpp
@@ -48,6 +48,8 @@
 	int arrayLayers;
 	int mipLevels;
 	int sampleCount;
+
+	ImageView *memoryOwner;  // Pointer to the view which owns the memory used by the descriptor set
 };
 
 struct alignas(16) StorageImageDescriptor
@@ -67,6 +69,8 @@
 	int stencilRowPitchBytes;
 	int stencilSlicePitchBytes;
 	int stencilSamplePitchBytes;
+
+	ImageView *memoryOwner;  // Pointer to the view which owns the memory used by the descriptor set
 };
 
 struct alignas(16) BufferDescriptor
diff --git a/src/Vulkan/VkImage.cpp b/src/Vulkan/VkImage.cpp
index 874196d..68f4d76 100644
--- a/src/Vulkan/VkImage.cpp
+++ b/src/Vulkan/VkImage.cpp
@@ -458,8 +458,8 @@
 		dstLayer += dstLayerPitch;
 	}
 
-	dstImage->prepareForSampling({ region.dstSubresource.aspectMask, region.dstSubresource.mipLevel, 1,
-	                               region.dstSubresource.baseArrayLayer, region.dstSubresource.layerCount });
+	dstImage->contentsChanged({ region.dstSubresource.aspectMask, region.dstSubresource.mipLevel, 1,
+	                            region.dstSubresource.baseArrayLayer, region.dstSubresource.layerCount });
 }
 
 void Image::copy(Buffer *buffer, const VkBufferImageCopy &region, bool bufferIsSource)
@@ -588,8 +588,8 @@
 
 	if(bufferIsSource)
 	{
-		prepareForSampling({ region.imageSubresource.aspectMask, region.imageSubresource.mipLevel, 1,
-		                     region.imageSubresource.baseArrayLayer, region.imageSubresource.layerCount });
+		contentsChanged({ region.imageSubresource.aspectMask, region.imageSubresource.mipLevel, 1,
+		                  region.imageSubresource.baseArrayLayer, region.imageSubresource.layerCount });
 	}
 }
 
@@ -918,15 +918,6 @@
 void Image::blitTo(Image *dstImage, const VkImageBlit &region, VkFilter filter) const
 {
 	device->getBlitter()->blit(this, dstImage, region, filter);
-
-	VkImageSubresourceRange subresourceRange = {
-		region.dstSubresource.aspectMask,
-		region.dstSubresource.mipLevel,
-		1,
-		region.dstSubresource.baseArrayLayer,
-		region.dstSubresource.layerCount
-	};
-	dstImage->prepareForSampling(subresourceRange);
 }
 
 void Image::copyTo(uint8_t *dst, unsigned int dstPitch) const
@@ -1040,8 +1031,29 @@
 	}
 }
 
-void Image::prepareForSampling(const VkImageSubresourceRange &subresourceRange)
+bool Image::requiresPreprocessing() const
 {
+	return (isCube() && (arrayLayers >= 6)) || decompressedImage;
+}
+
+void Image::contentsChanged(const VkImageSubresourceRange &subresourceRange, ContentsChangedContext contentsChangedContext)
+{
+	// If this function is called after (possibly) writing to this image from a shader,
+	// this must have the VK_IMAGE_USAGE_STORAGE_BIT set for the write operation to be
+	// valid. Otherwise, we can't have legally written to this image, so we know we can
+	// skip updating dirtyResources.
+	if((contentsChangedContext == USING_STORAGE) && !(usage & VK_IMAGE_USAGE_STORAGE_BIT))
+	{
+		return;
+	}
+
+	// If this isn't a cube or a compressed image, we'll never need dirtyResources,
+	// so we can skip updating dirtyResources
+	if(!requiresPreprocessing())
+	{
+		return;
+	}
+
 	uint32_t lastLayer = getLastLayerIndex(subresourceRange);
 	uint32_t lastMipLevel = getLastMipLevel(subresourceRange);
 
@@ -1051,6 +1063,44 @@
 		subresourceRange.baseArrayLayer
 	};
 
+	marl::lock lock(mutex);
+	for(subresource.arrayLayer = subresourceRange.baseArrayLayer;
+	    subresource.arrayLayer <= lastLayer;
+	    subresource.arrayLayer++)
+	{
+		for(subresource.mipLevel = subresourceRange.baseMipLevel;
+		    subresource.mipLevel <= lastMipLevel;
+		    subresource.mipLevel++)
+		{
+			dirtySubresources.insert(subresource);
+		}
+	}
+}
+
+void Image::prepareForSampling(const VkImageSubresourceRange &subresourceRange)
+{
+	// If this isn't a cube or a compressed image, there's nothing to do
+	if(!requiresPreprocessing())
+	{
+		return;
+	}
+
+	uint32_t lastLayer = getLastLayerIndex(subresourceRange);
+	uint32_t lastMipLevel = getLastMipLevel(subresourceRange);
+
+	VkImageSubresource subresource = {
+		subresourceRange.aspectMask,
+		subresourceRange.baseMipLevel,
+		subresourceRange.baseArrayLayer
+	};
+
+	marl::lock lock(mutex);
+
+	if(dirtySubresources.empty())
+	{
+		return;
+	}
+
 	// First, decompress all relevant dirty subregions
 	for(subresource.arrayLayer = subresourceRange.baseArrayLayer;
 	    subresource.arrayLayer <= lastLayer;
@@ -1060,17 +1110,62 @@
 		    subresource.mipLevel <= lastMipLevel;
 		    subresource.mipLevel++)
 		{
-			decompress(subresource);
+			auto it = dirtySubresources.find(subresource);
+			if(it != dirtySubresources.end())
+			{
+				decompress(subresource);
+			}
 		}
 	}
 
 	// Second, update cubemap borders
-	subresource.arrayLayer = subresourceRange.baseArrayLayer;
-	for(subresource.mipLevel = subresourceRange.baseMipLevel;
-	    subresource.mipLevel <= lastMipLevel;
-	    subresource.mipLevel++)
+	for(subresource.arrayLayer = subresourceRange.baseArrayLayer;
+	    subresource.arrayLayer <= lastLayer;
+	    subresource.arrayLayer++)
 	{
-		updateCube(subresource);
+		for(subresource.mipLevel = subresourceRange.baseMipLevel;
+		    subresource.mipLevel <= lastMipLevel;
+		    subresource.mipLevel++)
+		{
+			auto it = dirtySubresources.find(subresource);
+			if(it != dirtySubresources.end())
+			{
+				if(updateCube(subresource))
+				{
+					// updateCube() updates all layers of all cubemaps at once, so remove entries to avoid duplicating effort
+					VkImageSubresource cleanSubresource = subresource;
+					for(cleanSubresource.arrayLayer = 0; cleanSubresource.arrayLayer < arrayLayers - 5;)
+					{
+						// Delete one cube's worth of dirty subregions
+						for(uint32_t i = 0; i < 6; i++, cleanSubresource.arrayLayer++)
+						{
+							auto it = dirtySubresources.find(cleanSubresource);
+							if(it != dirtySubresources.end())
+							{
+								dirtySubresources.erase(it);
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	// Finally, mark all updated subregions clean
+	for(subresource.arrayLayer = subresourceRange.baseArrayLayer;
+	    subresource.arrayLayer <= lastLayer;
+	    subresource.arrayLayer++)
+	{
+		for(subresource.mipLevel = subresourceRange.baseMipLevel;
+		    subresource.mipLevel <= lastMipLevel;
+		    subresource.mipLevel++)
+		{
+			auto it = dirtySubresources.find(subresource);
+			if(it != dirtySubresources.end())
+			{
+				dirtySubresources.erase(it);
+			}
+		}
 	}
 }
 
@@ -1146,7 +1241,7 @@
 	}
 }
 
-void Image::updateCube(const VkImageSubresource &subres)
+bool Image::updateCube(const VkImageSubresource &subres)
 {
 	if(isCube() && (arrayLayers >= 6))
 	{
@@ -1158,7 +1253,11 @@
 		{
 			device->getBlitter()->updateBorders(decompressedImage ? decompressedImage : this, subresource);
 		}
+
+		return true;
 	}
+
+	return false;
 }
 
 void Image::decodeETC2(const VkImageSubresource &subresource)
diff --git a/src/Vulkan/VkImage.hpp b/src/Vulkan/VkImage.hpp
index d133fe5..d19b467 100644
--- a/src/Vulkan/VkImage.hpp
+++ b/src/Vulkan/VkImage.hpp
@@ -18,10 +18,14 @@
 #include "VkFormat.hpp"
 #include "VkObject.hpp"
 
+#include "marl/mutex.h"
+
 #ifdef __ANDROID__
 #	include <vulkan/vk_android_native_buffer.h>  // For VkSwapchainImageUsageFlagsANDROID and buffer_handle_t
 #endif
 
+#include <unordered_set>
+
 namespace vk {
 
 class Buffer;
@@ -90,6 +94,12 @@
 	bool canBindToMemory(DeviceMemory *pDeviceMemory) const;
 
 	void prepareForSampling(const VkImageSubresourceRange &subresourceRange);
+	enum ContentsChangedContext
+	{
+		DIRECT_MEMORY_ACCESS = 0,
+		USING_STORAGE = 1
+	};
+	void contentsChanged(const VkImageSubresourceRange &subresourceRange, ContentsChangedContext contentsChangedContext = DIRECT_MEMORY_ACCESS);
 	const Image *getSampledImage(const vk::Format &imageViewFormat) const;
 
 #ifdef __ANDROID__
@@ -116,8 +126,9 @@
 	VkFormat getClearFormat() const;
 	void clear(void *pixelData, VkFormat pixelFormat, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D &renderArea);
 	int borderSize() const;
+	bool requiresPreprocessing() const;
 	void decompress(const VkImageSubresource &subresource);
-	void updateCube(const VkImageSubresource &subresource);
+	bool updateCube(const VkImageSubresource &subresource);
 	void decodeETC2(const VkImageSubresource &subresource);
 	void decodeBC(const VkImageSubresource &subresource);
 	void decodeASTC(const VkImageSubresource &subresource);
@@ -140,6 +151,39 @@
 #endif
 
 	VkExternalMemoryHandleTypeFlags supportedExternalMemoryHandleTypes = (VkExternalMemoryHandleTypeFlags)0;
+
+	// VkImageSubresource wrapper for use in unordered_set
+	class Subresource
+	{
+	public:
+		Subresource()
+		    : subresource{ (VkImageAspectFlags)0, 0, 0 }
+		{}
+		Subresource(const VkImageSubresource &subres)
+		    : subresource(subres)
+		{}
+		inline operator VkImageSubresource() const { return subresource; }
+
+		bool operator==(const Subresource &other) const
+		{
+			return (subresource.aspectMask == other.subresource.aspectMask) &&
+			       (subresource.mipLevel == other.subresource.mipLevel) &&
+			       (subresource.arrayLayer == other.subresource.arrayLayer);
+		};
+
+		size_t operator()(const Subresource &other) const
+		{
+			return static_cast<size_t>(other.subresource.aspectMask) ^
+			       static_cast<size_t>(other.subresource.mipLevel) ^
+			       static_cast<size_t>(other.subresource.arrayLayer);
+		};
+
+	private:
+		VkImageSubresource subresource;
+	};
+
+	marl::mutex mutex;
+	std::unordered_set<Subresource, Subresource> dirtySubresources GUARDED_BY(mutex);
 };
 
 static inline Image *Cast(VkImage object)
diff --git a/src/Vulkan/VkImageView.hpp b/src/Vulkan/VkImageView.hpp
index 7ab4bb3..297ffd1 100644
--- a/src/Vulkan/VkImageView.hpp
+++ b/src/Vulkan/VkImageView.hpp
@@ -105,7 +105,11 @@
 	bool hasDepthAspect() const { return (subresourceRange.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0; }
 	bool hasStencilAspect() const { return (subresourceRange.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0; }
 
-	void prepareForSampling() const { image->prepareForSampling(subresourceRange); }
+	// This function is only called from the renderer, so use the USING_STORAGE flag,
+	// as it is required in order to write to an image from a shader
+	void contentsChanged() { image->contentsChanged(subresourceRange, Image::USING_STORAGE); }
+
+	void prepareForSampling() { image->prepareForSampling(subresourceRange); }
 
 	const VkComponentMapping &getComponentMapping() const { return components; }
 	const VkImageSubresourceRange &getSubresourceRange() const { return subresourceRange; }
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index eeb2dc6..f8bb462 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -613,13 +613,14 @@
 
 void ComputePipeline::run(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
                           uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
+                          vk::DescriptorSet::Array const &descriptorSetObjects,
                           vk::DescriptorSet::Bindings const &descriptorSets,
                           vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
                           sw::PushConstantStorage const &pushConstants)
 {
 	ASSERT_OR_RETURN(program != nullptr);
 	program->run(
-	    descriptorSets, descriptorDynamicOffsets, pushConstants,
+	    descriptorSetObjects, descriptorSets, descriptorDynamicOffsets, pushConstants,
 	    baseGroupX, baseGroupY, baseGroupZ,
 	    groupCountX, groupCountY, groupCountZ);
 }
diff --git a/src/Vulkan/VkPipeline.hpp b/src/Vulkan/VkPipeline.hpp
index ca2ecf2..611eb84 100644
--- a/src/Vulkan/VkPipeline.hpp
+++ b/src/Vulkan/VkPipeline.hpp
@@ -138,6 +138,7 @@
 
 	void run(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
 	         uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ,
+	         vk::DescriptorSet::Array const &descriptorSetObjects,
 	         vk::DescriptorSet::Bindings const &descriptorSets,
 	         vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
 	         sw::PushConstantStorage const &pushConstants);
diff --git a/src/Vulkan/VkPipelineLayout.cpp b/src/Vulkan/VkPipelineLayout.cpp
index da5a921..4d12cc5 100644
--- a/src/Vulkan/VkPipelineLayout.cpp
+++ b/src/Vulkan/VkPipelineLayout.cpp
@@ -92,6 +92,11 @@
 	return descriptorSetCount;
 }
 
+uint32_t PipelineLayout::getBindingCount(uint32_t setNumber) const
+{
+	return descriptorSets[setNumber].bindingCount;
+}
+
 uint32_t PipelineLayout::getDynamicOffsetIndex(uint32_t setNumber, uint32_t bindingNumber) const
 {
 	ASSERT(setNumber < descriptorSetCount && bindingNumber < descriptorSets[setNumber].bindingCount);
diff --git a/src/Vulkan/VkPipelineLayout.hpp b/src/Vulkan/VkPipelineLayout.hpp
index d48d937..15015fa 100644
--- a/src/Vulkan/VkPipelineLayout.hpp
+++ b/src/Vulkan/VkPipelineLayout.hpp
@@ -30,6 +30,7 @@
 	static size_t ComputeRequiredAllocationSize(const VkPipelineLayoutCreateInfo *pCreateInfo);
 
 	size_t getDescriptorSetCount() const;
+	uint32_t getBindingCount(uint32_t setNumber) const;
 
 	// Returns the index into the pipeline's dynamic offsets array for
 	// the given descriptor set and binding number.