Reactor: Use std::shared_ptr to ref-count rr::Routines

The intrusive reference counting had no smart pointer, making it difficult to correctly manage reference counts. Most of the codebase did not call bind() and unbind() explictly, and relied on some other mechanism for keeping the routine alive.

By switching to std::shared_ptr, we automatically get guarantees that routine cache eviction will not destroy the routine while it is in use.

Bug: b/137524292
Bug: b/137649247
Change-Id: I38b5e8ba3ee084572a427a1de20f4f017ceaae5a
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/34168
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 166049e..87668b7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1535,7 +1535,6 @@
         ${SOURCE_DIR}/Reactor/Reactor.cpp
         ${SOURCE_DIR}/Reactor/Reactor.hpp
         ${SOURCE_DIR}/Reactor/SubzeroReactor.cpp
-        ${SOURCE_DIR}/Reactor/Routine.cpp
         ${SOURCE_DIR}/Reactor/Optimizer.cpp
         ${SOURCE_DIR}/Reactor/Nucleus.hpp
         ${SOURCE_DIR}/Reactor/Routine.hpp
@@ -1670,7 +1669,6 @@
     ${SOURCE_DIR}/Reactor/LLVMReactorDebugInfo.cpp
     ${SOURCE_DIR}/Reactor/LLVMReactorDebugInfo.hpp
     ${SOURCE_DIR}/Reactor/Nucleus.hpp
-    ${SOURCE_DIR}/Reactor/Routine.cpp
     ${SOURCE_DIR}/Reactor/Routine.hpp
     ${SOURCE_DIR}/Reactor/CPUID.cpp
     ${SOURCE_DIR}/Reactor/CPUID.hpp
diff --git a/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj b/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj
index 956f1fa..458f73c 100644
--- a/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj
+++ b/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj
@@ -127,7 +127,6 @@
     <ClCompile Include="$(SolutionDir)src\Reactor\LLVMReactorDebugInfo.cpp" />

     <ClInclude Include="$(SolutionDir)src\Reactor\LLVMReactorDebugInfo.hpp" />

     <ClInclude Include="$(SolutionDir)src\Reactor\Nucleus.hpp" />

-    <ClCompile Include="$(SolutionDir)src\Reactor\Routine.cpp" />

     <ClInclude Include="$(SolutionDir)src\Reactor\Routine.hpp" />

     <ClCompile Include="$(SolutionDir)src\Reactor\CPUID.cpp" />

     <ClInclude Include="$(SolutionDir)src\Reactor\CPUID.hpp" />

diff --git a/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj.filters b/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj.filters
index df9efd5..e31e252 100644
--- a/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj.filters
+++ b/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj.filters
@@ -10,9 +10,6 @@
     <ClCompile Include="$(SolutionDir)src\Reactor\LLVMReactorDebugInfo.cpp">

       <Filter>src\Reactor</Filter>

     </ClCompile>

-    <ClCompile Include="$(SolutionDir)src\Reactor\Routine.cpp">

-      <Filter>src\Reactor</Filter>

-    </ClCompile>

     <ClCompile Include="$(SolutionDir)src\Reactor\CPUID.cpp">

       <Filter>src\Reactor</Filter>

     </ClCompile>

diff --git a/docs/Reactor.md b/docs/Reactor.md
index 7085cc8..df016d7 100644
--- a/docs/Reactor.md
+++ b/docs/Reactor.md
@@ -61,7 +61,7 @@
 The Routine is obtained and materialized by "calling" the ```Function<>``` object to give it a name:

 

 ```C++

-Routine *routine = function("one");

+auto routine = function("one");

 ```

 

 Finally, we can obtain the function pointer to the entry point of the routine, and call it:

@@ -84,9 +84,9 @@
 {

     Int x = function.Arg<0>();

     Int y = function.Arg<1>();

-   

+

     Int sum = x + y;

-   

+

     Return(sum);

 }

 ```

@@ -119,9 +119,9 @@
 Function<Int(Float)> function;

 {

     Float x = function.Arg<0>();

-   

+

     Int cast = Int(x);

-   

+

     Return(cast);

 }

 ```

@@ -132,9 +132,9 @@
 Function<Int(Float)> function;

 {

     Float x = function.Arg<0>();

-   

+

     Int reinterpret = As<Int>(x);

-   

+

     Return(reinterpret);

 }

 ```

@@ -185,7 +185,7 @@
 Function<Float(Float)> function;

 {

     Pointer<Float> x = function.Arg<0>();

-   

+

     If(x > 0.0f)

     {

         Return(1.0f);

diff --git a/src/Android.bp b/src/Android.bp
index 554a533..8f709bc 100644
--- a/src/Android.bp
+++ b/src/Android.bp
@@ -176,7 +176,6 @@
     srcs: [
         "Reactor/Reactor.cpp",
         "Reactor/LLVMReactor.cpp",
-        "Reactor/Routine.cpp",
         "Reactor/Debug.cpp",
     ],
 
@@ -196,7 +195,6 @@
     srcs: [
         "Reactor/Reactor.cpp",
         "Reactor/LLVMReactor.cpp",
-        "Reactor/Routine.cpp",
         "Reactor/Debug.cpp",
     ],
 
@@ -223,7 +221,6 @@
     srcs: [
         "Reactor/Reactor.cpp",
         "Reactor/SubzeroReactor.cpp",
-        "Reactor/Routine.cpp",
         "Reactor/Optimizer.cpp",
         "Reactor/Debug.cpp",
     ],
diff --git a/src/Android.mk b/src/Android.mk
index 4bcd422..4777003 100644
--- a/src/Android.mk
+++ b/src/Android.mk
@@ -57,7 +57,6 @@
 
 COMMON_SRC_FILES += \
 	Reactor/Reactor.cpp \
-	Reactor/Routine.cpp \
 	Reactor/Debug.cpp \
 	Reactor/DebugAndroid.cpp \
 	Reactor/ExecutableMemory.cpp
diff --git a/src/D3D9/Direct3DDevice9.cpp b/src/D3D9/Direct3DDevice9.cpp
index 71f5f53..2f69420 100644
--- a/src/D3D9/Direct3DDevice9.cpp
+++ b/src/D3D9/Direct3DDevice9.cpp
@@ -1830,7 +1830,7 @@
 			void *destBuffer = dest->lockExternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
 
 			static void (__cdecl *blitFunction)(void *dst, void *src);
-			static sw::Routine *blitRoutine;
+			static std::shared_ptr<sw::Routine> blitRoutine;
 			static sw::BlitState blitState = {};
 
 			sw::BlitState update;
@@ -1846,8 +1846,6 @@
 			if(memcmp(&blitState, &update, sizeof(sw::BlitState)) != 0)
 			{
 				blitState = update;
-				delete blitRoutine;
-
 				blitRoutine = sw::FrameBuffer::copyRoutine(blitState);
 				blitFunction = (void(__cdecl*)(void*, void*))blitRoutine->getEntry();
 			}
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index fb4885e..f6d714f 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -52,7 +52,7 @@
 		}
 
 		State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), { 0xF });
-		Routine *blitRoutine = getBlitRoutine(state);
+		auto blitRoutine = getBlitRoutine(state);
 		if(!blitRoutine)
 		{
 			return;
@@ -1304,7 +1304,7 @@
 		return s;
 	}
 
-	Routine *Blitter::generate(const State &state)
+	std::shared_ptr<Routine> Blitter::generate(const State &state)
 	{
 		Function<Void(Pointer<Byte>)> function;
 		{
@@ -1538,10 +1538,10 @@
 		return function("BlitRoutine");
 	}
 
-	Routine *Blitter::getBlitRoutine(const State &state)
+	std::shared_ptr<Routine> Blitter::getBlitRoutine(const State &state)
 	{
 		std::unique_lock<std::mutex> lock(blitMutex);
-		Routine *blitRoutine = blitCache.query(state);
+		auto blitRoutine = blitCache.query(state);
 
 		if(!blitRoutine)
 		{
@@ -1559,10 +1559,10 @@
 		return blitRoutine;
 	}
 
-	Routine *Blitter::getCornerUpdateRoutine(const State &state)
+	std::shared_ptr<Routine> Blitter::getCornerUpdateRoutine(const State &state)
 	{
 		std::unique_lock<std::mutex> lock(cornerUpdateMutex);
-		Routine *cornerUpdateRoutine = cornerUpdateCache.query(state);
+		auto cornerUpdateRoutine = cornerUpdateCache.query(state);
 
 		if(!cornerUpdateRoutine)
 		{
@@ -1587,7 +1587,7 @@
 		State state(format, format.getNonQuadLayoutFormat(), VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
 					{false, false});
 
-		Routine *blitRoutine = getBlitRoutine(state);
+		auto blitRoutine = getBlitRoutine(state);
 		if(!blitRoutine)
 		{
 			return;
@@ -1653,7 +1653,7 @@
 		State state(format.getNonQuadLayoutFormat(), format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
 					{false, false});
 
-		Routine *blitRoutine = getBlitRoutine(state);
+		auto blitRoutine = getBlitRoutine(state);
 		if(!blitRoutine)
 		{
 			return;
@@ -1760,7 +1760,7 @@
 		                    (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
 		                    (doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
 
-		Routine *blitRoutine = getBlitRoutine(state);
+		auto blitRoutine = getBlitRoutine(state);
 		if(!blitRoutine)
 		{
 			return;
@@ -1857,7 +1857,7 @@
 		write(c0, layer + ComputeOffset(x0, y0, pitchB, bytes, quadLayout), state);
 	}
 
-	Routine *Blitter::generateCornerUpdate(const State& state)
+	std::shared_ptr<Routine> Blitter::generateCornerUpdate(const State& state)
 	{
 		// Reading and writing from/to the same image
 		ASSERT(state.sourceFormat == state.destFormat);
@@ -1958,7 +1958,7 @@
 			UNIMPLEMENTED("Multi-sampled cube: %d samples", static_cast<int>(samples));
 		}
 
-		Routine *cornerUpdateRoutine = getCornerUpdateRoutine(state);
+		auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
 		if(!cornerUpdateRoutine)
 		{
 			return;
diff --git a/src/Device/Blitter.hpp b/src/Device/Blitter.hpp
index b95f14c..c8cddf3 100644
--- a/src/Device/Blitter.hpp
+++ b/src/Device/Blitter.hpp
@@ -134,10 +134,10 @@
 		static Int ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout);
 		static Float4 LinearToSRGB(Float4 &color);
 		static Float4 sRGBtoLinear(Float4 &color);
-		Routine *getBlitRoutine(const State &state);
-		Routine *generate(const State &state);
-		Routine *getCornerUpdateRoutine(const State &state);
-		Routine *generateCornerUpdate(const State& state);
+		std::shared_ptr<Routine> getBlitRoutine(const State &state);
+		std::shared_ptr<Routine> generate(const State &state);
+		std::shared_ptr<Routine> getCornerUpdateRoutine(const State &state);
+		std::shared_ptr<Routine> generateCornerUpdate(const State& state);
 		void computeCubeCorner(Pointer<Byte>& layer, Int& x0, Int& x1, Int& y0, Int& y1, Int& pitchB, const State& state);
 
 		void copyCubeEdge(vk::Image* image,
diff --git a/src/Device/LRUCache.hpp b/src/Device/LRUCache.hpp
index 1e3b2d9..9f6c120 100644
--- a/src/Device/LRUCache.hpp
+++ b/src/Device/LRUCache.hpp
@@ -31,8 +31,8 @@
 
 		virtual ~LRUCache();
 
-		Data *query(const Key &key) const;
-		virtual Data *add(const Key &key, Data *data);
+		Data query(const Key &key) const;
+		virtual Data add(const Key &key, const Data &data);
 
 		int getSize() {return size;}
 		Key &getKey(int i) {return key[i];}
@@ -45,7 +45,7 @@
 
 		Key *key;
 		Key **ref;
-		Data **data;
+		Data *data;
 	};
 
 	template<class Key, class Data>
@@ -56,19 +56,19 @@
 		LRUConstCache(int n) : LRUBase(n) {}
 		~LRUConstCache() { clearConstCache(); }
 
-		Data *add(const Key &key, Data *data) override
+		Data add(const Key &key, const Data& data) override
 		{
 			constCacheNeedsUpdate = true;
 			return LRUBase::add(key, data);
 		}
 
 		void updateConstCache();
-		Data *queryConstCache(const Key &key) const;
+		Data queryConstCache(const Key &key) const;
 
 	private:
 		void clearConstCache();
 		bool constCacheNeedsUpdate = false;
-		std::unordered_map<Key, Data*> constCache;
+		std::unordered_map<Key, Data> constCache;
 	};
 
 	// Helper class for clearing the memory of objects at construction.
@@ -124,12 +124,10 @@
 
 		key = new Key[size];
 		ref = new Key*[size];
-		data = new Data*[size];
+		data = new Data[size];
 
 		for(int i = 0; i < size; i++)
 		{
-			data[i] = nullptr;
-
 			ref[i] = &key[i];
 		}
 	}
@@ -143,21 +141,12 @@
 		delete[] ref;
 		ref = nullptr;
 
-		for(int i = 0; i < size; i++)
-		{
-			if(data[i])
-			{
-				data[i]->unbind();
-				data[i] = nullptr;
-			}
-		}
-
 		delete[] data;
 		data = nullptr;
 	}
 
 	template<class Key, class Data>
-	Data *LRUCache<Key, Data>::query(const Key &key) const
+	Data LRUCache<Key, Data>::query(const Key &key) const
 	{
 		for(int i = top; i > top - fill; i--)
 		{
@@ -165,14 +154,14 @@
 
 			if(key == *ref[j])
 			{
-				Data *hit = data[j];
+				Data hit = data[j];
 
 				if(i != top)
 				{
 					// Move one up
 					int k = (j + 1) & mask;
 
-					Data *swapD = data[k];
+					Data swapD = data[k];
 					data[k] = data[j];
 					data[j] = swapD;
 
@@ -189,20 +178,12 @@
 	}
 
 	template<class Key, class Data>
-	Data *LRUCache<Key, Data>::add(const Key &key, Data *data)
+	Data LRUCache<Key, Data>::add(const Key &key, const Data &data)
 	{
 		top = (top + 1) & mask;
 		fill = fill + 1 < size ? fill + 1 : size;
 
 		*ref[top] = key;
-
-		data->bind();
-
-		if(this->data[top])
-		{
-			this->data[top]->unbind();
-		}
-
 		this->data[top] = data;
 
 		return data;
@@ -211,12 +192,6 @@
 	template<class Key, class Data>
 	void LRUConstCache<Key, Data>::clearConstCache()
 	{
-		auto it = constCache.begin();
-		auto itEnd = constCache.end();
-		for(; it != itEnd; ++it)
-		{
-			it->second->unbind();
-		}
 		constCache.clear();
 	}
 
@@ -231,7 +206,6 @@
 			{
 				if(LRUBase::data[i])
 				{
-					LRUBase::data[i]->bind();
 					constCache[*LRUBase::ref[i]] = LRUBase::data[i];
 				}
 			}
@@ -241,7 +215,7 @@
 	}
 
 	template<class Key, class Data>
-	Data *LRUConstCache<Key, Data>::queryConstCache(const Key &key) const
+	Data LRUConstCache<Key, Data>::queryConstCache(const Key &key) const
 	{
 		auto it = constCache.find(key);
 		return (it != constCache.end()) ? it->second : nullptr;
diff --git a/src/Device/PixelProcessor.cpp b/src/Device/PixelProcessor.cpp
index 898da83..d6593b2 100644
--- a/src/Device/PixelProcessor.cpp
+++ b/src/Device/PixelProcessor.cpp
@@ -227,12 +227,12 @@
 		return state;
 	}
 
-	Routine *PixelProcessor::routine(const State &state,
+	std::shared_ptr<Routine> PixelProcessor::routine(const State &state,
 		vk::PipelineLayout const *pipelineLayout,
 		SpirvShader const *pixelShader,
 		const vk::DescriptorSet::Bindings &descriptorSets)
 	{
-		Routine *routine = routineCache->query(state);
+		auto routine = routineCache->query(state);
 
 		if(!routine)
 		{
diff --git a/src/Device/PixelProcessor.hpp b/src/Device/PixelProcessor.hpp
index e48f677..17cda85 100644
--- a/src/Device/PixelProcessor.hpp
+++ b/src/Device/PixelProcessor.hpp
@@ -129,8 +129,8 @@
 
 	protected:
 		const State update(const Context* context) const;
-		Routine *routine(const State &state, vk::PipelineLayout const *pipelineLayout,
-		                 SpirvShader const *pixelShader, const vk::DescriptorSet::Bindings &descriptorSets);
+		std::shared_ptr<Routine> routine(const State &state, vk::PipelineLayout const *pipelineLayout,
+		                                 SpirvShader const *pixelShader, const vk::DescriptorSet::Bindings &descriptorSets);
 		void setRoutineCacheSize(int routineCacheSize);
 
 		// Other semi-constants
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index ec6c43b..f37e261 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -340,10 +340,6 @@
 		draw->indexType = indexType;
 		draw->batchSize = batch;
 
-		vertexRoutine->bind();
-		setupRoutine->bind();
-		pixelRoutine->bind();
-
 		draw->vertexRoutine = vertexRoutine;
 		draw->setupRoutine = setupRoutine;
 		draw->pixelRoutine = pixelRoutine;
@@ -787,9 +783,9 @@
 					draw.queries = nullptr;
 				}
 
-				draw.vertexRoutine->unbind();
-				draw.setupRoutine->unbind();
-				draw.pixelRoutine->unbind();
+				draw.vertexRoutine.reset();
+				draw.setupRoutine.reset();
+				draw.pixelRoutine.reset();
 
 				if(draw.events)
 				{
diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp
index c19010b..2ae2ca5 100644
--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -252,9 +252,9 @@
 		SetupProcessor::State setupState;
 		PixelProcessor::State pixelState;
 
-		Routine *vertexRoutine;
-		Routine *setupRoutine;
-		Routine *pixelRoutine;
+		std::shared_ptr<Routine> vertexRoutine;
+		std::shared_ptr<Routine> setupRoutine;
+		std::shared_ptr<Routine> pixelRoutine;
 
 		vk::Device* device;
 	};
@@ -269,9 +269,9 @@
 		std::atomic<int> indexType;
 		std::atomic<int> batchSize;
 
-		Routine *vertexRoutine;
-		Routine *setupRoutine;
-		Routine *pixelRoutine;
+		std::shared_ptr<Routine> vertexRoutine;
+		std::shared_ptr<Routine> setupRoutine;
+		std::shared_ptr<Routine> pixelRoutine;
 
 		VertexProcessor::RoutinePointer vertexPointer;
 		SetupProcessor::RoutinePointer setupPointer;
diff --git a/src/Device/RoutineCache.hpp b/src/Device/RoutineCache.hpp
index 8420468..61f635a 100644
--- a/src/Device/RoutineCache.hpp
+++ b/src/Device/RoutineCache.hpp
@@ -24,7 +24,7 @@
 	using namespace rr;
 
 	template<class State>
-	using RoutineCache = LRUCache<State, Routine>;
+	using RoutineCache = LRUCache<State, std::shared_ptr<Routine>>;
 }
 
 #endif   // sw_RoutineCache_hpp
diff --git a/src/Device/SetupProcessor.cpp b/src/Device/SetupProcessor.cpp
index 8294514..abf4c36 100644
--- a/src/Device/SetupProcessor.cpp
+++ b/src/Device/SetupProcessor.cpp
@@ -94,9 +94,9 @@
 		return state;
 	}
 
-	Routine *SetupProcessor::routine(const State &state)
+	std::shared_ptr<Routine> SetupProcessor::routine(const State &state)
 	{
-		Routine *routine = routineCache->query(state);
+		auto routine = routineCache->query(state);
 
 		if(!routine)
 		{
diff --git a/src/Device/SetupProcessor.hpp b/src/Device/SetupProcessor.hpp
index c82b8b5..a84f818 100644
--- a/src/Device/SetupProcessor.hpp
+++ b/src/Device/SetupProcessor.hpp
@@ -67,7 +67,7 @@
 
 	protected:
 		State update(const sw::Context* context) const;
-		Routine *routine(const State &state);
+		std::shared_ptr<Routine> routine(const State &state);
 
 		void setRoutineCacheSize(int cacheSize);
 
diff --git a/src/Device/VertexProcessor.cpp b/src/Device/VertexProcessor.cpp
index f6cf89f..c6e5c13 100644
--- a/src/Device/VertexProcessor.cpp
+++ b/src/Device/VertexProcessor.cpp
@@ -94,12 +94,12 @@
 		return state;
 	}
 
-	Routine *VertexProcessor::routine(const State &state,
-	                                  vk::PipelineLayout const *pipelineLayout,
-	                                  SpirvShader const *vertexShader,
-	                                  const vk::DescriptorSet::Bindings &descriptorSets)
+	std::shared_ptr<Routine> VertexProcessor::routine(const State &state,
+	                                                  vk::PipelineLayout const *pipelineLayout,
+	                                                  SpirvShader const *vertexShader,
+	                                                  const vk::DescriptorSet::Bindings &descriptorSets)
 	{
-		Routine *routine = routineCache->query(state);
+		auto routine = routineCache->query(state);
 
 		if(!routine)   // Create one
 		{
diff --git a/src/Device/VertexProcessor.hpp b/src/Device/VertexProcessor.hpp
index 20bd0c8..24a9b3f 100644
--- a/src/Device/VertexProcessor.hpp
+++ b/src/Device/VertexProcessor.hpp
@@ -88,8 +88,8 @@
 
 	protected:
 		const State update(const sw::Context* context);
-		Routine *routine(const State &state, vk::PipelineLayout const *pipelineLayout,
-		                 SpirvShader const *vertexShader, const vk::DescriptorSet::Bindings &descriptorSets);
+		std::shared_ptr<Routine> routine(const State &state, vk::PipelineLayout const *pipelineLayout,
+		                                 SpirvShader const *vertexShader, const vk::DescriptorSet::Bindings &descriptorSets);
 
 		void setRoutineCacheSize(int cacheSize);
 
diff --git a/src/Main/FrameBuffer.cpp b/src/Main/FrameBuffer.cpp
index 7d2e6a0..5e4f6c3 100644
--- a/src/Main/FrameBuffer.cpp
+++ b/src/Main/FrameBuffer.cpp
@@ -66,8 +66,6 @@
 			blitThread->join();
 			delete blitThread;
 		}
-
-		delete blitRoutine;
 	}
 
 	void FrameBuffer::setCursorImage(sw::Surface *cursorImage)
@@ -154,8 +152,6 @@
 		if(memcmp(&blitState, &updateState, sizeof(BlitState)) != 0)
 		{
 			blitState = updateState;
-			delete blitRoutine;
-
 			blitRoutine = copyRoutine(blitState);
 			blitFunction = (void(*)(void*, void*, Cursor*))blitRoutine->getEntry();
 		}
@@ -163,7 +159,7 @@
 		blitFunction(framebuffer, renderbuffer, &cursor);
 	}
 
-	Routine *FrameBuffer::copyRoutine(const BlitState &state)
+	std::shared_ptr<Routine> FrameBuffer::copyRoutine(const BlitState &state)
 	{
 		const int width = state.width;
 		const int height = state.height;
diff --git a/src/Main/FrameBuffer.hpp b/src/Main/FrameBuffer.hpp
index f07feb3..94a1890 100644
--- a/src/Main/FrameBuffer.hpp
+++ b/src/Main/FrameBuffer.hpp
@@ -54,7 +54,7 @@
 		static void setCursorOrigin(int x0, int y0);
 		static void setCursorPosition(int x, int y);
 
-		static Routine *copyRoutine(const BlitState &state);
+		static std::shared_ptr<Routine> copyRoutine(const BlitState &state);
 
 	protected:
 		void copy(sw::Surface *source);
@@ -90,7 +90,7 @@
 		static Cursor cursor;
 
 		void (*blitFunction)(void *dst, void *src, Cursor *cursor);
-		Routine *blitRoutine;
+		std::shared_ptr<Routine> blitRoutine;
 		BlitState blitState;     // State of the current blitRoutine.
 		BlitState updateState;   // State of the routine to be generated.
 
diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp
index a87aac6..80ac5df 100644
--- a/src/Pipeline/SetupRoutine.cpp
+++ b/src/Pipeline/SetupRoutine.cpp
@@ -601,7 +601,7 @@
 		#endif
 	}
 
-	Routine *SetupRoutine::getRoutine()
+	std::shared_ptr<Routine> SetupRoutine::getRoutine()
 	{
 		return routine;
 	}
diff --git a/src/Pipeline/SetupRoutine.hpp b/src/Pipeline/SetupRoutine.hpp
index 0ea0c71..469b4de 100644
--- a/src/Pipeline/SetupRoutine.hpp
+++ b/src/Pipeline/SetupRoutine.hpp
@@ -30,7 +30,7 @@
 		virtual ~SetupRoutine();
 
 		void generate();
-		Routine *getRoutine();
+		std::shared_ptr<Routine> getRoutine();
 
 	private:
 		void setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flatShading, bool perspective, int component);
@@ -40,7 +40,7 @@
 
 		const SetupProcessor::State &state;
 
-		Routine *routine;
+		std::shared_ptr<Routine> routine;
 	};
 }
 
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 8dd0430..7c850ba 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -1232,7 +1232,7 @@
 		std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const;
 
 		static ImageSampler *getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler);
-		static rr::Routine *emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState);
+		static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState);
 
 		// TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly.
 		static sw::TextureType convertTextureType(VkImageViewType imageViewType);
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index 400bb55..5e56977 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -40,7 +40,7 @@
 
 	ASSERT(imageDescriptor->device);
 
-	rr::Routine* routine = imageDescriptor->device->findInConstCache(key);
+	auto routine = imageDescriptor->device->findInConstCache(key);
 	if(routine)
 	{
 		return (ImageSampler*)(routine->getEntry());
@@ -96,7 +96,7 @@
 	return (ImageSampler*)(routine->getEntry());
 }
 
-rr::Routine *SpirvShader::emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState)
+std::shared_ptr<rr::Routine> SpirvShader::emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState)
 {
 	// TODO(b/129523279): Hold a separate mutex lock for the sampler being built.
 	rr::Function<Void(Pointer<Byte>, Pointer<Byte>, Pointer<SIMD::Float>, Pointer<SIMD::Float>, Pointer<Byte>)> function;
diff --git a/src/Reactor/BUILD.gn b/src/Reactor/BUILD.gn
index 94a66da..ccde7db 100644
--- a/src/Reactor/BUILD.gn
+++ b/src/Reactor/BUILD.gn
@@ -46,7 +46,6 @@
     "Debug.cpp",
     "ExecutableMemory.cpp",
     "Reactor.cpp",
-    "Routine.cpp",
   ]
 }
 
diff --git a/src/Reactor/Coroutine.hpp b/src/Reactor/Coroutine.hpp
index 3eadec2..aaf08ae 100644
--- a/src/Reactor/Coroutine.hpp
+++ b/src/Reactor/Coroutine.hpp
@@ -168,7 +168,7 @@
 	{
 		if(core != nullptr)
 		{
-			routine.reset(core->acquireCoroutine("coroutine", cfg));
+			routine = core->acquireCoroutine("coroutine", cfg);
 			core.reset(nullptr);
 		}
 	}
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 9b3dc30..c5e4e0f 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -452,10 +452,10 @@
 			passManager->run(*module);
 		}
 
-		rr::Routine *acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
+		std::shared_ptr<rr::Routine> acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
 		{
 			ASSERT(module);
-			return new JITRoutine(std::move(module), funcs, count, cfg);
+			return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg);
 		}
 
 		const rr::Config config;
@@ -1229,7 +1229,7 @@
 		return ::defaultConfig();
 	}
 
-	Routine *Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
+	std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
 	{
 		auto cfg = cfgEdit.apply(jit->config);
 
@@ -4787,7 +4787,7 @@
 	jit->builder->SetInsertPoint(resumeBlock);
 }
 
-Routine* Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
+std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
 {
 	bool isCoroutine = jit->coroutine.id != nullptr;
 	if (isCoroutine)
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index 0ed1217..cc20e27 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -15,11 +15,12 @@
 #ifndef rr_Nucleus_hpp
 #define rr_Nucleus_hpp
 
+#include <atomic>
 #include <cassert>
 #include <cstdarg>
 #include <cstdint>
+#include <memory>
 #include <vector>
-#include <atomic>
 
 #ifdef None
 #undef None  // b/127920555
@@ -128,7 +129,7 @@
 		static void adjustDefaultConfig(const Config::Edit &cfgEdit);
 		static Config getDefaultConfig();
 
-		Routine *acquireRoutine(const char *name, const Config::Edit &cfgEdit = Config::Edit::None);
+		std::shared_ptr<Routine> acquireRoutine(const char *name, const Config::Edit &cfgEdit = Config::Edit::None);
 
 		static Value *allocateStackVariable(Type *type, int arraySize = 0);
 		static BasicBlock *createBasicBlock();
@@ -155,7 +156,7 @@
 		};
 
 		static void createCoroutine(Type *ReturnType, std::vector<Type*> &Params);
-		Routine *acquireCoroutine(const char *name, const Config::Edit &cfg = Config::Edit::None);
+		std::shared_ptr<Routine> acquireCoroutine(const char *name, const Config::Edit &cfg = Config::Edit::None);
 		static void yield(Value*);
 
 		// Terminators
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 4c82780..5add9cd 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2464,8 +2464,8 @@
 			return Argument<typename std::tuple_element<index, std::tuple<Arguments...>>::type>(arg);
 		}
 
-		Routine *operator()(const char *name, ...);
-		Routine *operator()(const Config::Edit &cfg, const char *name, ...);
+		std::shared_ptr<Routine> operator()(const char *name, ...);
+		std::shared_ptr<Routine> operator()(const Config::Edit &cfg, const char *name, ...);
 
 	protected:
 		Nucleus *core;
@@ -3031,7 +3031,7 @@
 	}
 
 	template<typename Return, typename... Arguments>
-	Routine *Function<Return(Arguments...)>::operator()(const char *name, ...)
+	std::shared_ptr<Routine> Function<Return(Arguments...)>::operator()(const char *name, ...)
 	{
 		char fullName[1024 + 1];
 
@@ -3044,7 +3044,7 @@
 	}
 
 	template<typename Return, typename... Arguments>
-	Routine *Function<Return(Arguments...)>::operator()(const Config::Edit &cfg, const char *name, ...)
+	std::shared_ptr<Routine> Function<Return(Arguments...)>::operator()(const Config::Edit &cfg, const char *name, ...)
 	{
 		char fullName[1024 + 1];
 
diff --git a/src/Reactor/Reactor.vcxproj b/src/Reactor/Reactor.vcxproj
index 5326d2c..a885861 100644
--- a/src/Reactor/Reactor.vcxproj
+++ b/src/Reactor/Reactor.vcxproj
@@ -290,7 +290,6 @@
     <ClCompile Include="LLVMReactorDebugInfo.cpp" />

     <ClCompile Include="ExecutableMemory.cpp" />

     <ClCompile Include="Reactor.cpp" />

-    <ClCompile Include="Routine.cpp" />

     <ClCompile Include="Thread.cpp" />

   </ItemGroup>

   <ItemGroup>

diff --git a/src/Reactor/Reactor.vcxproj.filters b/src/Reactor/Reactor.vcxproj.filters
index f66a728..c7efa0b 100644
--- a/src/Reactor/Reactor.vcxproj.filters
+++ b/src/Reactor/Reactor.vcxproj.filters
@@ -15,9 +15,6 @@
     </Filter>

   </ItemGroup>

   <ItemGroup>

-    <ClCompile Include="Routine.cpp">

-      <Filter>Source Files</Filter>

-    </ClCompile>

     <ClCompile Include="LLVMReactor.cpp">

       <Filter>Source Files</Filter>

     </ClCompile>

diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index 490433e..9bc1227c 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -38,7 +38,7 @@
 
 TEST(ReactorUnitTests, Sample)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Int>, Int)> function;
@@ -73,12 +73,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, Uninitialized)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int()> function;
@@ -110,12 +109,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, Unreachable)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Int)> function;
@@ -141,12 +139,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, VariableAddress)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Int)> function;
@@ -169,12 +166,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, SubVectorLoadStore)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>, Pointer<Byte>)> function;
@@ -229,12 +225,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, VectorConstant)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -278,12 +273,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, Concatenate)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -321,12 +315,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, Swizzle)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -448,12 +441,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, Branching)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Void)> function;
@@ -513,12 +505,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, MinMax)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -604,12 +595,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, NotNeg)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -689,12 +679,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, VectorCompare)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -751,12 +740,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, SaturatedAddAndSubtract)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -864,12 +852,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, Unpack)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>,Pointer<Byte>)> function;
@@ -911,12 +898,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, Pack)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -973,12 +959,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, MulHigh)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -1050,12 +1035,11 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, MulAdd)
 {
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function<Int(Pointer<Byte>)> function;
@@ -1086,7 +1070,6 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, Call)
@@ -1097,7 +1080,7 @@
 		return;
 	}
 
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	struct Class
 	{
@@ -1137,7 +1120,6 @@
 		}
 	}
 
-	delete routine;
 }
 
 // Check that a complex generated function which utilizes all 8 or 16 XMM
@@ -1148,7 +1130,7 @@
 // It's necessary to inspect the registers in a debugger to actually verify.)
 TEST(ReactorUnitTests, PreserveXMMRegisters)
 {
-    Routine *routine = nullptr;
+    std::shared_ptr<Routine> routine;
 
     {
         Function<Void(Pointer<Byte>, Pointer<Byte>)> function;
@@ -1225,7 +1207,6 @@
         EXPECT_EQ(result[3], 0.0f);
     }
 
-    delete routine;
 }
 
 template <typename T>
@@ -1255,7 +1236,7 @@
 	using CType = typename TestFixture::CType;
 	using ReactorType = typename TestFixture::ReactorType;
 
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function< Int(ReactorType) > function;
@@ -1277,7 +1258,6 @@
 		}
 	}
 
-	delete routine;
 }
 
 template <typename T>
@@ -1327,7 +1307,7 @@
 	using CType = typename TestFixture::CType;
 	using ReactorType = typename TestFixture::ReactorType;
 
-	Routine *routine = nullptr;
+	std::shared_ptr<Routine> routine;
 
 	{
 		Function< Pointer<ReactorType>(Pointer<ReactorType>, Int) > function;
@@ -1367,7 +1347,6 @@
 		}
 	}
 
-	delete routine;
 }
 
 TEST(ReactorUnitTests, Coroutines_Fibonacci)
diff --git a/src/Reactor/Routine.cpp b/src/Reactor/Routine.cpp
deleted file mode 100644
index 23cf929..0000000
--- a/src/Reactor/Routine.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "Routine.hpp"
-
-#include "Thread.hpp"
-
-#include <cassert>
-
-namespace rr
-{
-	Routine::Routine()
-	{
-		bindCount = 0;
-	}
-
-	void Routine::bind()
-	{
-		atomicIncrement(&bindCount);
-	}
-
-	void Routine::unbind()
-	{
-		long count = atomicDecrement(&bindCount);
-
-		if(count == 0)
-		{
-			delete this;
-		}
-	}
-
-	Routine::~Routine()
-	{
-		assert(bindCount == 0);
-	}
-}
diff --git a/src/Reactor/Routine.hpp b/src/Reactor/Routine.hpp
index 0158bcc..67560e8 100644
--- a/src/Reactor/Routine.hpp
+++ b/src/Reactor/Routine.hpp
@@ -20,18 +20,10 @@
 	class Routine
 	{
 	public:
-		Routine();
-
-		virtual ~Routine();
+		Routine() = default;
+		virtual ~Routine() = default;
 
 		virtual const void *getEntry(int index = 0) = 0;
-
-		// Reference counting
-		void bind();
-		void unbind();
-
-	private:
-		volatile int bindCount;
 	};
 }
 
diff --git a/src/Reactor/Subzero.vcxproj b/src/Reactor/Subzero.vcxproj
index e9fecf4..6b6490d 100644
--- a/src/Reactor/Subzero.vcxproj
+++ b/src/Reactor/Subzero.vcxproj
@@ -230,7 +230,6 @@
     <ClCompile Include="ExecutableMemory.cpp" />

     <ClCompile Include="Optimizer.cpp" />

     <ClCompile Include="Reactor.cpp" />

-    <ClCompile Include="Routine.cpp" />

     <ClCompile Include="SubzeroReactor.cpp" />

   </ItemGroup>

   <ItemGroup>

diff --git a/src/Reactor/Subzero.vcxproj.filters b/src/Reactor/Subzero.vcxproj.filters
index 7229339..5b9bb2d 100644
--- a/src/Reactor/Subzero.vcxproj.filters
+++ b/src/Reactor/Subzero.vcxproj.filters
@@ -102,9 +102,6 @@
     <ClCompile Include="SubzeroReactor.cpp">

       <Filter>Source Files</Filter>

     </ClCompile>

-    <ClCompile Include="Routine.cpp">

-      <Filter>Source Files</Filter>

-    </ClCompile>

     <ClCompile Include="$(SolutionDir)third_party\subzero\src\IceInstX8632.cpp">

       <Filter>Source Files</Filter>

     </ClCompile>

diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 178c075..0cf2370 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -627,7 +627,7 @@
 		return ::defaultConfig();
 	}
 
-	Routine *Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
+	std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
 	{
 		if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
 		{
@@ -663,7 +663,7 @@
 		Routine *handoffRoutine = ::routine;
 		::routine = nullptr;
 
-		return handoffRoutine;
+		return std::shared_ptr<Routine>(handoffRoutine);
 	}
 
 	Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
@@ -3543,7 +3543,7 @@
 	void FlushDebug() {}
 
 	void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params) { UNIMPLEMENTED("createCoroutine"); }
-	Routine* Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) { UNIMPLEMENTED("acquireCoroutine"); return nullptr; }
+	std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) { UNIMPLEMENTED("acquireCoroutine"); return nullptr; }
 	void Nucleus::yield(Value* val) { UNIMPLEMENTED("Yield"); }
 
 }
diff --git a/src/Renderer/Blitter.cpp b/src/Renderer/Blitter.cpp
index 30ef1e9..d4edbfa 100644
--- a/src/Renderer/Blitter.cpp
+++ b/src/Renderer/Blitter.cpp
@@ -1179,7 +1179,7 @@
 		return s;
 	}
 
-	Routine *Blitter::generate(const State &state)
+	std::shared_ptr<Routine> Blitter::generate(const State &state)
 	{
 		Function<Void(Pointer<Byte>)> function;
 		{
@@ -1420,7 +1420,7 @@
 		state.destSamples = dest->getSamples();
 
 		criticalSection.lock();
-		Routine *blitRoutine = blitCache->query(state);
+		auto blitRoutine = blitCache->query(state);
 
 		if(!blitRoutine)
 		{
diff --git a/src/Renderer/Blitter.hpp b/src/Renderer/Blitter.hpp
index e3db745..9c6b4c0 100644
--- a/src/Renderer/Blitter.hpp
+++ b/src/Renderer/Blitter.hpp
@@ -111,7 +111,7 @@
 		static Float4 LinearToSRGB(Float4 &color);
 		static Float4 sRGBtoLinear(Float4 &color);
 		bool blitReactor(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, const Options &options);
-		Routine *generate(const State &state);
+		std::shared_ptr<Routine> generate(const State &state);
 
 		RoutineCache<State> *blitCache;
 		MutexLock criticalSection;
diff --git a/src/Renderer/LRUCache.hpp b/src/Renderer/LRUCache.hpp
index 1a1a302..bdd0950 100644
--- a/src/Renderer/LRUCache.hpp
+++ b/src/Renderer/LRUCache.hpp
@@ -27,9 +27,9 @@
 
 		~LRUCache();
 
-		Data *query(const Key &key) const;
-		Data *add(const Key &key, Data *data);
-	
+		Data query(const Key &key) const;
+		Data add(const Key &key, const Data &data);
+
 		int getSize() {return size;}
 		Key &getKey(int i) {return key[i];}
 
@@ -41,7 +41,7 @@
 
 		Key *key;
 		Key **ref;
-		Data **data;
+		Data *data;
 	};
 }
 
@@ -57,12 +57,10 @@
 
 		key = new Key[size];
 		ref = new Key*[size];
-		data = new Data*[size];
+		data = new Data[size];
 
 		for(int i = 0; i < size; i++)
 		{
-			data[i] = nullptr;
-
 			ref[i] = &key[i];
 		}
 	}
@@ -76,21 +74,12 @@
 		delete[] ref;
 		ref = nullptr;
 
-		for(int i = 0; i < size; i++)
-		{
-			if(data[i])
-			{
-				data[i]->unbind();
-				data[i] = nullptr;
-			}
-		}
-
 		delete[] data;
 		data = nullptr;
 	}
 
 	template<class Key, class Data>
-	Data *LRUCache<Key, Data>::query(const Key &key) const
+	Data LRUCache<Key, Data>::query(const Key &key) const
 	{
 		for(int i = top; i > top - fill; i--)
 		{
@@ -98,14 +87,14 @@
 
 			if(key == *ref[j])
 			{
-				Data *hit = data[j];
+				Data hit = data[j];
 
 				if(i != top)
 				{
 					// Move one up
 					int k = (j + 1) & mask;
 
-					Data *swapD = data[k];
+					Data swapD = data[k];
 					data[k] = data[j];
 					data[j] = swapD;
 
@@ -122,20 +111,12 @@
 	}
 
 	template<class Key, class Data>
-	Data *LRUCache<Key, Data>::add(const Key &key, Data *data)
+	Data LRUCache<Key, Data>::add(const Key &key, const Data &data)
 	{
 		top = (top + 1) & mask;
 		fill = fill + 1 < size ? fill + 1 : size;
 
 		*ref[top] = key;
-
-		data->bind();
-
-		if(this->data[top])
-		{
-			this->data[top]->unbind();
-		}
-
 		this->data[top] = data;
 
 		return data;
diff --git a/src/Renderer/PixelProcessor.cpp b/src/Renderer/PixelProcessor.cpp
index 133f90c..0b80727 100644
--- a/src/Renderer/PixelProcessor.cpp
+++ b/src/Renderer/PixelProcessor.cpp
@@ -1182,9 +1182,9 @@
 		return state;
 	}
 
-	Routine *PixelProcessor::routine(const State &state)
+	std::shared_ptr<Routine> PixelProcessor::routine(const State &state)
 	{
-		Routine *routine = routineCache->query(state);
+		auto routine = routineCache->query(state);
 
 		if(!routine)
 		{
diff --git a/src/Renderer/PixelProcessor.hpp b/src/Renderer/PixelProcessor.hpp
index 98300de..4fa627c 100644
--- a/src/Renderer/PixelProcessor.hpp
+++ b/src/Renderer/PixelProcessor.hpp
@@ -306,7 +306,7 @@
 
 	protected:
 		const State update() const;
-		Routine *routine(const State &state);
+		std::shared_ptr<Routine> routine(const State &state);
 		void setRoutineCacheSize(int routineCacheSize);
 
 		// Shader constants
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp
index a2c2035..c3c2260 100644
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -360,10 +360,6 @@
 			draw->drawType = drawType;
 			draw->batchSize = batch;
 
-			vertexRoutine->bind();
-			setupRoutine->bind();
-			pixelRoutine->bind();
-
 			draw->vertexRoutine = vertexRoutine;
 			draw->setupRoutine = setupRoutine;
 			draw->pixelRoutine = pixelRoutine;
@@ -1105,9 +1101,9 @@
 					}
 				}
 
-				draw.vertexRoutine->unbind();
-				draw.setupRoutine->unbind();
-				draw.pixelRoutine->unbind();
+				draw.vertexRoutine.reset();
+				draw.setupRoutine.reset();
+				draw.pixelRoutine.reset();
 
 				sync->unlock();
 
diff --git a/src/Renderer/Renderer.hpp b/src/Renderer/Renderer.hpp
index 1118c59..4ed11f6 100644
--- a/src/Renderer/Renderer.hpp
+++ b/src/Renderer/Renderer.hpp
@@ -458,9 +458,9 @@
 		SetupProcessor::State setupState;
 		PixelProcessor::State pixelState;
 
-		Routine *vertexRoutine;
-		Routine *setupRoutine;
-		Routine *pixelRoutine;
+		std::shared_ptr<Routine> vertexRoutine;
+		std::shared_ptr<Routine> setupRoutine;
+		std::shared_ptr<Routine> pixelRoutine;
 	};
 
 	struct DrawCall
@@ -472,9 +472,9 @@
 		AtomicInt drawType;
 		AtomicInt batchSize;
 
-		Routine *vertexRoutine;
-		Routine *setupRoutine;
-		Routine *pixelRoutine;
+		std::shared_ptr<Routine> vertexRoutine;
+		std::shared_ptr<Routine> setupRoutine;
+		std::shared_ptr<Routine> pixelRoutine;
 
 		VertexProcessor::RoutinePointer vertexPointer;
 		SetupProcessor::RoutinePointer setupPointer;
diff --git a/src/Renderer/RoutineCache.hpp b/src/Renderer/RoutineCache.hpp
index 8420468..61f635a 100644
--- a/src/Renderer/RoutineCache.hpp
+++ b/src/Renderer/RoutineCache.hpp
@@ -24,7 +24,7 @@
 	using namespace rr;
 
 	template<class State>
-	using RoutineCache = LRUCache<State, Routine>;
+	using RoutineCache = LRUCache<State, std::shared_ptr<Routine>>;
 }
 
 #endif   // sw_RoutineCache_hpp
diff --git a/src/Renderer/SetupProcessor.cpp b/src/Renderer/SetupProcessor.cpp
index 7211406..d8b9b91 100644
--- a/src/Renderer/SetupProcessor.cpp
+++ b/src/Renderer/SetupProcessor.cpp
@@ -223,9 +223,9 @@
 		return state;
 	}
 
-	Routine *SetupProcessor::routine(const State &state)
+	std::shared_ptr<Routine> SetupProcessor::routine(const State &state)
 	{
-		Routine *routine = routineCache->query(state);
+		auto routine = routineCache->query(state);
 
 		if(!routine)
 		{
diff --git a/src/Renderer/SetupProcessor.hpp b/src/Renderer/SetupProcessor.hpp
index be0adc7..de12afd 100644
--- a/src/Renderer/SetupProcessor.hpp
+++ b/src/Renderer/SetupProcessor.hpp
@@ -91,7 +91,7 @@
 
 	protected:
 		State update() const;
-		Routine *routine(const State &state);
+		std::shared_ptr<Routine> routine(const State &state);
 
 		void setRoutineCacheSize(int cacheSize);
 
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index 463393a..9bd786e 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -1088,9 +1088,9 @@
 		return state;
 	}
 
-	Routine *VertexProcessor::routine(const State &state)
+	std::shared_ptr<Routine> VertexProcessor::routine(const State &state)
 	{
-		Routine *routine = routineCache->query(state);
+		auto routine = routineCache->query(state);
 
 		if(!routine)   // Create one
 		{
diff --git a/src/Renderer/VertexProcessor.hpp b/src/Renderer/VertexProcessor.hpp
index b53263b..329bdac 100644
--- a/src/Renderer/VertexProcessor.hpp
+++ b/src/Renderer/VertexProcessor.hpp
@@ -284,7 +284,7 @@
 		const Matrix &getViewTransform();
 
 		const State update(DrawType drawType);
-		Routine *routine(const State &state);
+		std::shared_ptr<Routine> routine(const State &state);
 
 		bool isFixedFunction();
 		void setRoutineCacheSize(int cacheSize);
diff --git a/src/Shader/SetupRoutine.cpp b/src/Shader/SetupRoutine.cpp
index 6024869..4f2955c 100644
--- a/src/Shader/SetupRoutine.cpp
+++ b/src/Shader/SetupRoutine.cpp
@@ -665,7 +665,7 @@
 		#endif
 	}
 
-	Routine *SetupRoutine::getRoutine()
+	std::shared_ptr<Routine> SetupRoutine::getRoutine()
 	{
 		return routine;
 	}
diff --git a/src/Shader/SetupRoutine.hpp b/src/Shader/SetupRoutine.hpp
index c1c3205..0f34249 100644
--- a/src/Shader/SetupRoutine.hpp
+++ b/src/Shader/SetupRoutine.hpp
@@ -30,7 +30,7 @@
 		virtual ~SetupRoutine();
 
 		void generate();
-		Routine *getRoutine();
+		std::shared_ptr<Routine> getRoutine();
 
 	private:
 		void setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flatShading, bool sprite, bool perspective, bool wrap, int component);
@@ -40,7 +40,7 @@
 
 		const SetupProcessor::State &state;
 
-		Routine *routine;
+		std::shared_ptr<Routine> routine;
 	};
 }
 
diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp
index a260882..230d2d0 100644
--- a/src/Vulkan/VkDevice.cpp
+++ b/src/Vulkan/VkDevice.cpp
@@ -36,18 +36,18 @@
 namespace vk
 {
 
-rr::Routine* Device::SamplingRoutineCache::query(const vk::Device::SamplingRoutineCache::Key& key) const
+std::shared_ptr<rr::Routine> Device::SamplingRoutineCache::query(const vk::Device::SamplingRoutineCache::Key& key) const
 {
 	return cache.query(hash(key));
 }
 
-void Device::SamplingRoutineCache::add(const vk::Device::SamplingRoutineCache::Key& key, rr::Routine* routine)
+void Device::SamplingRoutineCache::add(const vk::Device::SamplingRoutineCache::Key& key, const std::shared_ptr<rr::Routine>& routine)
 {
 	ASSERT(routine);
 	cache.add(hash(key), routine);
 }
 
-rr::Routine* Device::SamplingRoutineCache::queryConst(const vk::Device::SamplingRoutineCache::Key& key) const
+std::shared_ptr<rr::Routine> Device::SamplingRoutineCache::queryConst(const vk::Device::SamplingRoutineCache::Key& key) const
 {
 	return cache.queryConstCache(hash(key));
 }
@@ -251,7 +251,7 @@
 	return samplingRoutineCache.get();
 }
 
-rr::Routine* Device::findInConstCache(const SamplingRoutineCache::Key& key) const
+std::shared_ptr<rr::Routine> Device::findInConstCache(const SamplingRoutineCache::Key& key) const
 {
 	return samplingRoutineCache->queryConst(key);
 }
diff --git a/src/Vulkan/VkDevice.hpp b/src/Vulkan/VkDevice.hpp
index 9bba54e..721dda2 100644
--- a/src/Vulkan/VkDevice.hpp
+++ b/src/Vulkan/VkDevice.hpp
@@ -67,21 +67,21 @@
 			uint32_t imageView;
 		};
 
-		rr::Routine* query(const Key& key) const;
-		void add(const Key& key, rr::Routine* routine);
+		std::shared_ptr<rr::Routine> query(const Key& key) const;
+		void add(const Key& key, const std::shared_ptr<rr::Routine>& routine);
 
-		rr::Routine* queryConst(const Key& key) const;
+		std::shared_ptr<rr::Routine> queryConst(const Key& key) const;
 		void updateConstCache();
 
 		static std::size_t hash(const Key &key);
 
 	private:
-		sw::LRUConstCache<std::size_t, rr::Routine> cache;
+		sw::LRUConstCache<std::size_t, std::shared_ptr<rr::Routine>> cache;
 	};
 
 	SamplingRoutineCache* getSamplingRoutineCache() const;
 	std::mutex& getSamplingRoutineCacheMutex();
-	rr::Routine* findInConstCache(const SamplingRoutineCache::Key& key) const;
+	std::shared_ptr<rr::Routine> findInConstCache(const SamplingRoutineCache::Key& key) const;
 	void updateSamplingRoutineConstCache();
 
 private:
diff --git a/tests/fuzzers/VertexRoutineFuzzer.cpp b/tests/fuzzers/VertexRoutineFuzzer.cpp
index d63e293..d3d477f 100644
--- a/tests/fuzzers/VertexRoutineFuzzer.cpp
+++ b/tests/fuzzers/VertexRoutineFuzzer.cpp
@@ -203,7 +203,7 @@
 	sw::VertexProgram program(state, bytecodeShader.get());
 	program.generate();
 
-	sw::Routine *routine = program("VertexRoutine");
+	auto routine = program("VertexRoutine");
 	assert(routine);
 	const void *entry = routine->getEntry();
 	assert(entry); (void)entry;