Memory leak fix for SamplingRoutine cache

- The SamplingRoutine cache is now owned by the Device and
  has the same lifetime as the device, which means it gets
  released when the device gets released.
- Added a utility class, SamplingRoutineCache, to contain
  the cache and release all routine entries upon destruction.
- Added a pointer to the current device in the sampler object
  in order to retrieve it during sampling, in order to access
  the routine cache
- The cache is now internally an LRUCache, so it can't cache
  a large quantity of routines and take up a lot of memory.

Bug b/129523279 b/137649247 b/137524292 chromium:971325

Change-Id: If4ff1fbc87962355d0a281b9d0acace4316a02b8
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/32688
Tested-by: Alexis Hétu <sugoi@google.com>
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index a0c07be..d97befb 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -1196,7 +1196,7 @@
 		std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const;
 
 		static ImageSampler *getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler);
-		static ImageSampler *emitSamplerFunction(ImageInstruction instruction, const Sampler &samplerState);
+		static rr::Routine *emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState);
 
 		// TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly.
 		static sw::TextureType convertTextureType(VkImageViewType imageViewType);
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index 6d067e1..ba6f2ac 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -16,13 +16,11 @@
 
 #include "SamplerCore.hpp" // TODO: Figure out what's needed.
 #include "System/Math.hpp"
-#include "Vulkan/VkBuffer.hpp"
 #include "Vulkan/VkDebug.hpp"
-#include "Vulkan/VkDescriptorSet.hpp"
-#include "Vulkan/VkPipelineLayout.hpp"
+#include "Vulkan/VkDescriptorSetLayout.hpp"
+#include "Vulkan/VkDevice.hpp"
 #include "Vulkan/VkImageView.hpp"
 #include "Vulkan/VkSampler.hpp"
-#include "Vulkan/VkDescriptorSetLayout.hpp"
 #include "Device/Config.hpp"
 
 #include <spirv/unified1/spirv.hpp>
@@ -31,31 +29,6 @@
 #include <climits>
 #include <mutex>
 
-namespace
-{
-
-struct SamplingRoutineKey
-{
-	uint32_t instruction;
-	uint32_t sampler;
-	uint32_t imageView;
-
-	bool operator==(const SamplingRoutineKey &rhs) const
-	{
-		return instruction == rhs.instruction && sampler == rhs.sampler && imageView == rhs.imageView;
-	}
-
-	struct Hash
-	{
-		std::size_t operator()(const SamplingRoutineKey &key) const noexcept
-		{
-			return (key.instruction << 16) ^ (key.sampler << 8) ^ key.imageView;
-		}
-	};
-};
-
-}
-
 namespace sw {
 
 SpirvShader::ImageSampler *SpirvShader::getImageSampler(uint32_t inst, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler)
@@ -63,15 +36,18 @@
 	ImageInstruction instruction(inst);
 	ASSERT(imageDescriptor->imageViewId != 0 && (sampler->id != 0 || instruction.samplerMethod == Fetch));
 
-	// TODO(b/129523279): Move somewhere sensible.
-	static std::unordered_map<SamplingRoutineKey, ImageSampler*, SamplingRoutineKey::Hash> cache;
-	static std::mutex mutex;
+	vk::Device::SamplingRoutineCache::Key key = {inst, imageDescriptor->imageViewId, sampler->id};
 
-	SamplingRoutineKey key = {inst, imageDescriptor->imageViewId, sampler->id};
+	ASSERT(imageDescriptor->device);
 
-	std::unique_lock<std::mutex> lock(mutex);
-	auto it = cache.find(key);
-	if (it != cache.end()) { return it->second; }
+	std::unique_lock<std::mutex> lock(imageDescriptor->device->getSamplingRoutineCacheMutex());
+	vk::Device::SamplingRoutineCache* cache = imageDescriptor->device->getSamplingRoutineCache();
+
+	rr::Routine* routine = cache->query(key);
+	if(routine)
+	{
+		return (ImageSampler*)(routine->getEntry());
+	}
 
 	auto type = imageDescriptor->type;
 
@@ -108,13 +84,13 @@
 		UNSUPPORTED("anisotropyEnable");
 	}
 
-	auto fptr = emitSamplerFunction(instruction, samplerState);
+	routine = emitSamplerRoutine(instruction, samplerState);
 
-	cache.emplace(key, fptr);
-	return fptr;
+	cache->add(key, routine);
+	return (ImageSampler*)(routine->getEntry());
 }
 
-SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction instruction, const Sampler &samplerState)
+rr::Routine *SpirvShader::emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState)
 {
 	// TODO(b/129523279): Hold a separate mutex lock for the sampler being built.
 	rr::Function<Void(Pointer<Byte>, Pointer<Byte>, Pointer<SIMD::Float>, Pointer<SIMD::Float>, Pointer<Byte>)> function;
@@ -231,7 +207,7 @@
 		}
 	}
 
-	return (ImageSampler*)function("sampler")->getEntry();
+	return function("sampler");
 }
 
 sw::TextureType SpirvShader::convertTextureType(VkImageViewType imageViewType)
diff --git a/src/Vulkan/VkDescriptorSetLayout.cpp b/src/Vulkan/VkDescriptorSetLayout.cpp
index 65e625e..e4d87ce 100644
--- a/src/Vulkan/VkDescriptorSetLayout.cpp
+++ b/src/Vulkan/VkDescriptorSetLayout.cpp
@@ -269,7 +269,7 @@
 	}
 }
 
-void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src)
+void DescriptorSetLayout::WriteDescriptorSet(Device* device, DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src)
 {
 	DescriptorSetLayout* dstLayout = dstSet->header.layout;
 	auto &binding = dstLayout->bindings[dstLayout->getBindingIndex(entry.dstBinding)];
@@ -294,6 +294,7 @@
 			{
 				imageSampler[i].updateSampler(vk::Cast(update->sampler));
 			}
+			imageSampler[i].device = device;
 		}
 	}
 	else if (entry.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
@@ -319,6 +320,7 @@
 			imageSampler[i].texture.width = sw::replicate(static_cast<float>(numElements));
 			imageSampler[i].texture.height = sw::replicate(1);
 			imageSampler[i].texture.depth = sw::replicate(1);
+			imageSampler[i].device = device;
 
 			sw::Mipmap &mipmap = imageSampler[i].texture.mipmap[0];
 			mipmap.buffer = bufferView->getPointer();
@@ -360,6 +362,7 @@
 			imageSampler[i].type = imageView->getType();
 			imageSampler[i].swizzle = imageView->getComponentMapping();
 			imageSampler[i].format = format;
+			imageSampler[i].device = device;
 
 			auto &subresourceRange = imageView->getSubresourceRange();
 
@@ -572,7 +575,7 @@
 	mipmap.sliceP[3] = sliceP;
 }
 
-void DescriptorSetLayout::WriteDescriptorSet(const VkWriteDescriptorSet& writeDescriptorSet)
+void DescriptorSetLayout::WriteDescriptorSet(Device* device, const VkWriteDescriptorSet& writeDescriptorSet)
 {
 	DescriptorSet* dstSet = vk::Cast(writeDescriptorSet.dstSet);
 	VkDescriptorUpdateTemplateEntry e;
@@ -611,7 +614,7 @@
 		UNIMPLEMENTED("descriptor type %u", writeDescriptorSet.descriptorType);
 	}
 
-	WriteDescriptorSet(dstSet, e, reinterpret_cast<char const *>(ptr));
+	WriteDescriptorSet(device, dstSet, e, reinterpret_cast<char const *>(ptr));
 }
 
 void DescriptorSetLayout::CopyDescriptorSet(const VkCopyDescriptorSet& descriptorCopies)
diff --git a/src/Vulkan/VkDescriptorSetLayout.hpp b/src/Vulkan/VkDescriptorSetLayout.hpp
index 44ac8f2..73535e8 100644
--- a/src/Vulkan/VkDescriptorSetLayout.hpp
+++ b/src/Vulkan/VkDescriptorSetLayout.hpp
@@ -25,6 +25,7 @@
 {
 
 class DescriptorSet;
+class Device;
 
 // TODO(b/129523279): Move to the Device or Pipeline layer.
 struct alignas(16) SampledImageDescriptor
@@ -35,6 +36,7 @@
 
 	// TODO(b/129523279): Minimize to the data actually needed.
 	vk::Sampler sampler;
+	vk::Device* device;
 
 	uint32_t imageViewId;
 	VkImageViewType type;
@@ -84,10 +86,10 @@
 	static size_t ComputeRequiredAllocationSize(const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
 
 	static size_t GetDescriptorSize(VkDescriptorType type);
-	static void WriteDescriptorSet(const VkWriteDescriptorSet& descriptorWrites);
+	static void WriteDescriptorSet(Device* device, const VkWriteDescriptorSet& descriptorWrites);
 	static void CopyDescriptorSet(const VkCopyDescriptorSet& descriptorCopies);
 
-	static void WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src);
+	static void WriteDescriptorSet(Device* device, DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src);
 	static void WriteTextureLevelInfo(sw::Texture *texture, int level, int width, int height, int depth, int pitchP, int sliceP);
 
 	void initialize(DescriptorSet* descriptorSet);
diff --git a/src/Vulkan/VkDescriptorUpdateTemplate.cpp b/src/Vulkan/VkDescriptorUpdateTemplate.cpp
index 76acbe7..e70ad73 100644
--- a/src/Vulkan/VkDescriptorUpdateTemplate.cpp
+++ b/src/Vulkan/VkDescriptorUpdateTemplate.cpp
@@ -35,14 +35,14 @@
 		return info->descriptorUpdateEntryCount * sizeof(VkDescriptorUpdateTemplateEntry);
 	}
 
-	void DescriptorUpdateTemplate::updateDescriptorSet(VkDescriptorSet vkDescriptorSet, const void* pData)
+	void DescriptorUpdateTemplate::updateDescriptorSet(Device* device, VkDescriptorSet vkDescriptorSet, const void* pData)
 	{
 
 		DescriptorSet* descriptorSet = vk::Cast(vkDescriptorSet);
 
 		for(uint32_t i = 0; i < descriptorUpdateEntryCount; i++)
 		{
-			DescriptorSetLayout::WriteDescriptorSet(descriptorSet, descriptorUpdateEntries[i],
+			DescriptorSetLayout::WriteDescriptorSet(device, descriptorSet, descriptorUpdateEntries[i],
 													reinterpret_cast<char const *>(pData));
 		}
 	}
diff --git a/src/Vulkan/VkDescriptorUpdateTemplate.hpp b/src/Vulkan/VkDescriptorUpdateTemplate.hpp
index 7f0e5be..90a8b96 100644
--- a/src/Vulkan/VkDescriptorUpdateTemplate.hpp
+++ b/src/Vulkan/VkDescriptorUpdateTemplate.hpp
@@ -20,6 +20,7 @@
 namespace vk

 {

 	class DescriptorSetLayout;

+	class Device;

 

 	class DescriptorUpdateTemplate : public Object<DescriptorUpdateTemplate, VkDescriptorUpdateTemplate>

 	{

@@ -28,7 +29,7 @@
 

 		static size_t ComputeRequiredAllocationSize(const VkDescriptorUpdateTemplateCreateInfo* info);

 

-		void updateDescriptorSet(VkDescriptorSet descriptorSet, const void* pData);

+		void updateDescriptorSet(Device* device, VkDescriptorSet descriptorSet, const void* pData);

 

 	private:

 		uint32_t                              descriptorUpdateEntryCount = 0;

diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp
index 6b918cd..435985c 100644
--- a/src/Vulkan/VkDevice.cpp
+++ b/src/Vulkan/VkDevice.cpp
@@ -36,6 +36,22 @@
 namespace vk
 {
 
+rr::Routine* Device::SamplingRoutineCache::query(const vk::Device::SamplingRoutineCache::Key& key) const
+{
+	return cache.query(hash(key));
+}
+
+void Device::SamplingRoutineCache::add(const vk::Device::SamplingRoutineCache::Key& key, rr::Routine* routine)
+{
+	ASSERT(routine);
+	cache.add(hash(key), routine);
+}
+
+std::size_t Device::SamplingRoutineCache::hash(const vk::Device::SamplingRoutineCache::Key &key) const
+{
+	return (key.instruction << 16) ^ (key.sampler << 8) ^ key.imageView;
+}
+
 Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures)
 	: physicalDevice(physicalDevice),
 	  queues(reinterpret_cast<Queue*>(mem)),
@@ -72,7 +88,7 @@
 	}
 
 	// FIXME (b/119409619): use an allocator here so we can control all memory allocations
-	blitter = new sw::Blitter();
+	blitter.reset(new sw::Blitter());
 }
 
 void Device::destroy(const VkAllocationCallbacks* pAllocator)
@@ -83,8 +99,6 @@
 	}
 
 	vk::deallocate(queues, pAllocator);
-
-	delete blitter;
 }
 
 size_t Device::ComputeRequiredAllocationSize(const VkDeviceCreateInfo* pCreateInfo)
@@ -212,7 +226,7 @@
 {
 	for(uint32_t i = 0; i < descriptorWriteCount; i++)
 	{
-		DescriptorSetLayout::WriteDescriptorSet(pDescriptorWrites[i]);
+		DescriptorSetLayout::WriteDescriptorSet(this, pDescriptorWrites[i]);
 	}
 
 	for(uint32_t i = 0; i < descriptorCopyCount; i++)
@@ -221,4 +235,18 @@
 	}
 }
 
+Device::SamplingRoutineCache* Device::getSamplingRoutineCache()
+{
+	if(!samplingRoutineCache.get())
+	{
+		samplingRoutineCache.reset(new SamplingRoutineCache());
+	}
+	return samplingRoutineCache.get();
+}
+
+std::mutex& Device::getSamplingRoutineCacheMutex()
+{
+	return samplingRoutineCacheMutex;
+}
+
 } // namespace vk
diff --git a/src/Vulkan/VkDevice.hpp b/src/Vulkan/VkDevice.hpp
index 3e262d3..52212f3 100644
--- a/src/Vulkan/VkDevice.hpp
+++ b/src/Vulkan/VkDevice.hpp
@@ -16,10 +16,15 @@
 #define VK_DEVICE_HPP_
 
 #include "VkObject.hpp"
+#include "Device/LRUCache.hpp"
+#include "Reactor/Routine.hpp"
+#include <memory>
+#include <mutex>
 
 namespace sw
 {
 	class Blitter;
+	class SamplingRoutineCache;
 }
 
 namespace vk
@@ -48,19 +53,43 @@
 	void updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites,
 	                          uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies);
 	const VkPhysicalDeviceFeatures &getEnabledFeatures() const { return enabledFeatures; }
-	sw::Blitter* getBlitter() const { return blitter; }
+	sw::Blitter* getBlitter() const { return blitter.get(); }
+
+	class SamplingRoutineCache
+	{
+	public:
+		SamplingRoutineCache() : cache(1024) {}
+		~SamplingRoutineCache() {}
+
+		struct Key
+		{
+			uint32_t instruction;
+			uint32_t sampler;
+			uint32_t imageView;
+		};
+
+		rr::Routine* query(const Key& key) const;
+		void add(const Key& key, rr::Routine* routine);
+
+	private:
+		std::size_t hash(const Key &key) const;
+		sw::LRUCache<std::size_t, rr::Routine> cache;
+	};
+
+	SamplingRoutineCache* getSamplingRoutineCache();
+	std::mutex& getSamplingRoutineCacheMutex();
 
 private:
 	PhysicalDevice *const physicalDevice = nullptr;
 	Queue *const queues = nullptr;
 	uint32_t queueCount = 0;
-
-	const uint32_t enabledExtensionCount = 0;
+	std::unique_ptr<sw::Blitter> blitter;
+	std::unique_ptr<SamplingRoutineCache> samplingRoutineCache;
+	std::mutex samplingRoutineCacheMutex;
+	uint32_t enabledExtensionCount = 0;
 	typedef char ExtensionName[VK_MAX_EXTENSION_NAME_SIZE];
 	ExtensionName* extensions = nullptr;
 	const VkPhysicalDeviceFeatures enabledFeatures = {};
-
-	sw::Blitter* blitter = nullptr;
 };
 
 using DispatchableDevice = DispatchableObject<Device, VkDevice>;
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index cac17f5..2450b54 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -2558,7 +2558,7 @@
 	TRACE("(VkDevice device = %p, VkDescriptorSet descriptorSet = %p, VkDescriptorUpdateTemplate descriptorUpdateTemplate = %p, const void* pData = %p)",
 	      device, static_cast<void*>(descriptorSet), static_cast<void*>(descriptorUpdateTemplate), pData);
 
-	vk::Cast(descriptorUpdateTemplate)->updateDescriptorSet(descriptorSet, pData);
+	vk::Cast(descriptorUpdateTemplate)->updateDescriptorSet(vk::Cast(device), descriptorSet, pData);
 }
 
 VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, VkExternalBufferProperties* pExternalBufferProperties)