Revert "Migrate from yarn to marl."

This reverts commit df6cc08ab401e49118746d91c8a63d03f1883b72.

Reason for revert: Utterly breaks chrome autorollers

Change-Id: I5bc05c538f30d689a078f47766c480be543d2a37
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/36108
Reviewed-by: Ben Clayton <bclayton@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9fc93a5..370ba96 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -106,8 +106,6 @@
 option(REACTOR_EMIT_PRINT_LOCATION "Emit printing of location info for JIT functions" 0)
 option(REACTOR_VERIFY_LLVM_IR "Check reactor-generated LLVM IR is valid even in release builds" 0)
 
-set(BUILD_MARL ${BUILD_VULKAN})
-
 set(DEFAULT_REACTOR_BACKEND "LLVM")
 
 set(REACTOR_BACKEND ${DEFAULT_REACTOR_BACKEND} CACHE STRING "JIT compiler back-end used by Reactor")
@@ -267,6 +265,7 @@
 ###########################################################
 
 set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
+set(YARN_DIR ${SOURCE_DIR}/Yarn)
 set(OPENGL_DIR ${SOURCE_DIR}/OpenGL)
 set(OPENGL_COMPILER_DIR ${OPENGL_DIR}/compiler)
 set(VULKAN_DIR ${SOURCE_DIR}/Vulkan)
@@ -275,7 +274,6 @@
 set(LIBBACKTRACE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/libbacktrace/src)
 set(LIBBACKTRACE_CONFIG_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/libbacktrace/config)
 set(LIBBACKTRACE_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/libbacktrace/src)
-set(MARL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/marl/include)
 set(SUBZERO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/subzero)
 set(SUBZERO_LLVM_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/llvm-subzero)
 set(TESTS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/tests)
@@ -1599,7 +1597,6 @@
 
 set(VULKAN_INCLUDE_DIR
     ${COMMON_INCLUDE_DIR}
-    ${MARL_INCLUDE_DIR}
 )
 
 ###########################################################
@@ -1763,6 +1760,22 @@
     ${CMAKE_CURRENT_SOURCE_DIR}/include/vulkan/*.h}
 )
 
+file(GLOB YARN_FULL_LIST
+    ${YARN_DIR}/*.cpp
+    ${YARN_DIR}/*.hpp
+    ${YARN_DIR}/*.c
+)
+
+if (NOT MSVC)
+    file(GLOB YARN_ASSEMBLY_LIST ${YARN_DIR}/*.S)
+    list(APPEND YARN_FULL_LIST ${YARN_ASSEMBLY_LIST})
+endif(NOT MSVC)
+
+set(YARN_LIST ${YARN_FULL_LIST})
+set(YARN_TEST_LIST ${YARN_FULL_LIST})
+list(FILTER YARN_LIST EXCLUDE REGEX ".*_test\\..*")
+list(FILTER YARN_TEST_LIST INCLUDE REGEX ".*_test\\..*")
+
 ###########################################################
 # Append OS specific files to lists
 ###########################################################
@@ -1861,6 +1874,15 @@
 # SwiftShader Targets
 ###########################################################
 
+add_library(Yarn STATIC ${YARN_LIST})
+set_target_properties(Yarn PROPERTIES
+    POSITION_INDEPENDENT_CODE 1
+    FOLDER "Core"
+    COMPILE_OPTIONS "${SWIFTSHADER_COMPILE_OPTIONS}"
+    COMPILE_DEFINITIONS "NO_SANITIZE_FUNCTION=;"
+)
+target_link_libraries(Yarn ${OS_LIBS})
+
 add_library(SwiftShader STATIC ${SWIFTSHADER_LIST})
 set_target_properties(SwiftShader PROPERTIES
     INCLUDE_DIRECTORIES "${COMMON_INCLUDE_DIR}"
@@ -2008,11 +2030,6 @@
     )
 endif()
 
-if(BUILD_MARL)
-    set(MARL_BUILD_TESTS OFF)
-    add_subdirectory(third_party/marl)
-endif(BUILD_MARL)
-
 if(BUILD_VULKAN)
     if (NOT TARGET SPIRV-Tools)
         # This variable is also used by SPIRV-Tools to locate SPIRV-Headers
@@ -2049,7 +2066,7 @@
     endif()
 
     set_shared_library_export_map(vk_swiftshader ${SOURCE_DIR}/Vulkan)
-    target_link_libraries(vk_swiftshader ${Reactor} marl ${OS_LIBS} SPIRV-Tools SPIRV-Tools-opt)
+    target_link_libraries(vk_swiftshader ${Reactor} Yarn ${OS_LIBS} SPIRV-Tools SPIRV-Tools-opt)
     add_custom_command(
         TARGET vk_swiftshader
         POST_BUILD
@@ -2170,6 +2187,29 @@
 endif(BUILD_TESTS)
 
 if(BUILD_TESTS)
+    # Yarn unit tests
+    file(GLOB YARN_TEST_LIST
+        ${YARN_DIR}/*_test.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/third_party/googletest/googletest/src/gtest-all.cc
+    )
+
+    set(YARN_TEST_INCLUDE_DIR
+        ${CMAKE_CURRENT_SOURCE_DIR}/third_party/googletest/googletest/include/
+        ${CMAKE_CURRENT_SOURCE_DIR}/third_party/googletest/googlemock/include/
+        ${CMAKE_CURRENT_SOURCE_DIR}/third_party/googletest/googletest/
+        ${CMAKE_CURRENT_SOURCE_DIR}/src
+    )
+
+    add_executable(yarn-unittests ${YARN_TEST_LIST})
+    set_target_properties(yarn-unittests PROPERTIES
+        INCLUDE_DIRECTORIES "${YARN_TEST_INCLUDE_DIR}"
+        FOLDER "Tests"
+        COMPILE_OPTIONS "${SWIFTSHADER_COMPILE_OPTIONS}"
+        COMPILE_DEFINITIONS "STANDALONE"
+    )
+
+    target_link_libraries(yarn-unittests Yarn ${OS_LIBS})
+
     # Math unit tests
     set(MATH_UNITTESTS_LIST
         ${CMAKE_CURRENT_SOURCE_DIR}/tests/MathUnitTests/main.cpp
diff --git a/src/Android.bp b/src/Android.bp
index b74589b..88d2a6f 100644
--- a/src/Android.bp
+++ b/src/Android.bp
@@ -492,6 +492,31 @@
     static_libs: [ "libswiftshader_compiler_debug" ],
 }
 
+// Yarn, fibers + scheduler library
+
+cc_library_static {
+    name: "libyarn",
+    vendor: true,
+
+    srcs: [
+        "Yarn/Debug.cpp",
+        "Yarn/Scheduler.cpp",
+        "Yarn/Thread.cpp",
+        "Yarn/Trace.cpp",
+    ],
+
+    arch: {
+        arm: { srcs: ["Yarn/OSFiber_arm.c", "Yarn/OSFiber_asm_arm.S"], },
+        arm64: { srcs: ["Yarn/OSFiber_aarch64.c", "Yarn/OSFiber_asm_aarch64.S"], },
+        x86: { srcs: ["Yarn/OSFiber_x86.c", "Yarn/OSFiber_asm_x86.S"], },
+        x86_64: { srcs: ["Yarn/OSFiber_x64.c", "Yarn/OSFiber_asm_x64.S"], },
+    },
+
+    cflags: [
+        "-Wno-unused-parameter",
+    ],
+}
+
 
 // Vulkan
 
@@ -567,7 +592,6 @@
     ],
 
     static_libs: [
-        "swiftshader_marl",
         "swiftshader_spirv-tools",
     ],
 
@@ -583,6 +607,7 @@
     static_libs: [
         "libswiftshadervk_llvm_debug",
         "libLLVM7_swiftshader",
+        "libyarn",
     ],
 
     defaults: [
@@ -597,6 +622,7 @@
     static_libs: [
         "libswiftshadervk_llvm_debug",
         "libLLVM7_swiftshader",
+        "libyarn",
     ],
 
     cflags: [
diff --git a/src/Device/BUILD.gn b/src/Device/BUILD.gn
index c412c71..9d99aac 100644
--- a/src/Device/BUILD.gn
+++ b/src/Device/BUILD.gn
@@ -61,10 +61,10 @@
   ]
 
   deps = [
-    "../../third_party/marl:Marl_headers",
     "../Pipeline:Pipeline_headers",
     "../System:System_headers",
     "../Vulkan:swiftshader_libvulkan_headers",
+    "../Yarn:Yarn_headers",
   ]
 
   public_deps = [
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index a7e5042..51b03f1 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -32,9 +32,9 @@
 #include "Pipeline/SpirvShader.hpp"
 #include "Vertex.hpp"
 
-#include "marl/containers.h"
-#include "marl/defer.h"
-#include "marl/trace.h"
+#include "Yarn/Containers.hpp"
+#include "Yarn/Defer.hpp"
+#include "Yarn/Trace.hpp"
 
 #undef max
 
@@ -166,7 +166,7 @@
 		if(count == 0) { return; }
 
 		auto id = nextDrawID++;
-		MARL_SCOPED_EVENT("draw %d", id);
+		YARN_SCOPED_EVENT("draw %d", id);
 
 		#ifndef NDEBUG
 		{
@@ -186,16 +186,16 @@
 			return;
 		}
 
-		marl::Pool<sw::DrawCall>::Loan draw;
+		yarn::Pool<sw::DrawCall>::Loan draw;
 		{
-			MARL_SCOPED_EVENT("drawCallPool.borrow()");
+			YARN_SCOPED_EVENT("drawCallPool.borrow()");
 			draw = drawCallPool.borrow();
 		}
 		draw->id = id;
 
 		if(update)
 		{
-			MARL_SCOPED_EVENT("update");
+			YARN_SCOPED_EVENT("update");
 			vertexState = VertexProcessor::update(context);
 			setupState = SetupProcessor::update(context);
 			pixelState = PixelProcessor::update(context);
@@ -416,7 +416,7 @@
 		pixelRoutine.reset();
 	}
 
-	void DrawCall::run(const marl::Loan<DrawCall>& draw, marl::Ticket::Queue* tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
+	void DrawCall::run(const yarn::Loan<DrawCall>& draw, yarn::Ticket::Queue* tickets, yarn::Ticket::Queue clusterQueues[MaxClusterCount])
 	{
 		draw->setup();
 
@@ -425,8 +425,8 @@
 		auto const numBatches = draw->numBatches;
 
 		auto ticket = tickets->take();
-		auto finally = marl::make_shared_finally([draw, ticket] {
-			MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
+		auto finally = yarn::make_shared_finally([draw, ticket] {
+			YARN_SCOPED_EVENT("FINISH draw %d", draw->id);
 			draw->teardown();
 			ticket.done();
 		});
@@ -443,7 +443,7 @@
 				batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
 			}
 
-			marl::schedule([draw, batch, finally] {
+			yarn::schedule([draw, batch, finally] {
 
 				processVertices(draw.get(), batch.get());
 
@@ -468,11 +468,11 @@
 
 	void DrawCall::processVertices(DrawCall* draw, BatchData* batch)
 	{
-		MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
+		YARN_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
 
 		unsigned int triangleIndices[MaxBatchSize + 1][3];  // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
 		{
-			MARL_SCOPED_EVENT("processPrimitiveVertices");
+			YARN_SCOPED_EVENT("processPrimitiveVertices");
 			processPrimitiveVertices(
 				triangleIndices,
 				draw->data->indices,
@@ -496,21 +496,21 @@
 
 	void DrawCall::processPrimitives(DrawCall* draw, BatchData* batch)
 	{
-		MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
+		YARN_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
 		auto triangles = &batch->triangles[0];
 		auto primitives = &batch->primitives[0];
 		batch->numVisible = draw->setupPrimitives(triangles, primitives, draw, batch->numPrimitives);
 	}
 
-	void DrawCall::processPixels(const marl::Loan<DrawCall>& draw, const marl::Loan<BatchData>& batch, const std::shared_ptr<marl::Finally>& finally)
+	void DrawCall::processPixels(const yarn::Loan<DrawCall>& draw, const yarn::Loan<BatchData>& batch, const std::shared_ptr<yarn::Finally>& finally)
 	{
 		struct Data
 		{
-			Data(const marl::Loan<DrawCall>& draw, const marl::Loan<BatchData>& batch, const std::shared_ptr<marl::Finally>& finally)
+			Data(const yarn::Loan<DrawCall>& draw, const yarn::Loan<BatchData>& batch, const std::shared_ptr<yarn::Finally>& finally)
 				: draw(draw), batch(batch), finally(finally) {}
-			marl::Loan<DrawCall> draw;
-			marl::Loan<BatchData> batch;
-			std::shared_ptr<marl::Finally> finally;
+			yarn::Loan<DrawCall> draw;
+			yarn::Loan<BatchData> batch;
+			std::shared_ptr<yarn::Finally> finally;
 		};
 		auto data = std::make_shared<Data>(draw, batch, finally);
 		for (int cluster = 0; cluster < MaxClusterCount; cluster++)
@@ -519,7 +519,7 @@
 			{
 				auto& draw = data->draw;
 				auto& batch = data->batch;
-				MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
+				YARN_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
 				draw->pixelPointer(&batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
 				batch->clusterTickets[cluster].done();
 			});
@@ -528,7 +528,7 @@
 
 	void Renderer::synchronize()
 	{
-		MARL_SCOPED_EVENT("synchronize");
+		YARN_SCOPED_EVENT("synchronize");
 		auto ticket = drawTickets.take();
 		ticket.wait();
 		device->updateSamplingRoutineConstCache();
diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp
index 042f932..4b96471 100644
--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -24,9 +24,9 @@
 #include "Device/Config.hpp"
 #include "Vulkan/VkDescriptorSet.hpp"
 
-#include "marl/pool.h"
-#include "marl/finally.h"
-#include "marl/ticket.h"
+#include "Yarn/Pool.hpp"
+#include "Yarn/Finally.hpp"
+#include "Yarn/Ticket.hpp"
 
 #include <atomic>
 #include <list>
@@ -116,7 +116,7 @@
 	{
 		struct BatchData
 		{
-			using Pool = marl::BoundedPool<BatchData, MaxBatchCount, marl::PoolPolicy::Preserve>;
+			using Pool = yarn::BoundedPool<BatchData, MaxBatchCount, yarn::PoolPolicy::Preserve>;
 
 			TriangleBatch triangles;
 			PrimitiveBatch primitives;
@@ -125,19 +125,19 @@
 			unsigned int firstPrimitive;
 			unsigned int numPrimitives;
 			int numVisible;
-			marl::Ticket clusterTickets[MaxClusterCount];
+			yarn::Ticket clusterTickets[MaxClusterCount];
 		};
 
-		using Pool = marl::BoundedPool<DrawCall, MaxDrawCount, marl::PoolPolicy::Preserve>;
+		using Pool = yarn::BoundedPool<DrawCall, MaxDrawCount, yarn::PoolPolicy::Preserve>;
 		using SetupFunction = int(*)(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count);
 
 		DrawCall();
 		~DrawCall();
 
-		static void run(const marl::Loan<DrawCall>& draw, marl::Ticket::Queue* tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount]);
+		static void run(const yarn::Loan<DrawCall>& draw, yarn::Ticket::Queue* tickets, yarn::Ticket::Queue clusterQueues[MaxClusterCount]);
 		static void processVertices(DrawCall* draw, BatchData* batch);
 		static void processPrimitives(DrawCall* draw, BatchData* batch);
-		static void processPixels(const marl::Loan<DrawCall>& draw, const marl::Loan<BatchData>& batch, const std::shared_ptr<marl::Finally>& finally);
+		static void processPixels(const yarn::Loan<DrawCall>& draw, const yarn::Loan<BatchData>& batch, const std::shared_ptr<yarn::Finally>& finally);
 		void setup();
 		void teardown();
 
@@ -223,8 +223,8 @@
 		std::atomic<int> nextDrawID = {0};
 
 		vk::Query *occlusionQuery = nullptr;
-		marl::Ticket::Queue drawTickets;
-		marl::Ticket::Queue clusterQueues[MaxClusterCount];
+		yarn::Ticket::Queue drawTickets;
+		yarn::Ticket::Queue clusterQueues[MaxClusterCount];
 
 		VertexProcessor::State vertexState;
 		SetupProcessor::State setupState;
diff --git a/src/Pipeline/BUILD.gn b/src/Pipeline/BUILD.gn
index e17084a..72c6a3d 100644
--- a/src/Pipeline/BUILD.gn
+++ b/src/Pipeline/BUILD.gn
@@ -52,10 +52,10 @@
   ]
 
   deps = [
-    "../../third_party/marl:Marl_headers",
     "../Device:Device_headers",
     "../System:System_headers",
     "../Vulkan:swiftshader_libvulkan_headers",
+    "../Yarn:Yarn_headers",
   ]
 
   public_deps = [
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index d0f57be..2a86cc4 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -18,9 +18,9 @@
 #include "Vulkan/VkDebug.hpp"
 #include "Vulkan/VkPipelineLayout.hpp"
 
-#include "marl/defer.h"
-#include "marl/trace.h"
-#include "marl/waitgroup.h"
+#include "Yarn/Defer.hpp"
+#include "Yarn/Trace.hpp"
+#include "Yarn/WaitGroup.hpp"
 
 #include <queue>
 
@@ -44,7 +44,7 @@
 
 	void ComputeProgram::generate()
 	{
-		MARL_SCOPED_EVENT("ComputeProgram::generate");
+		YARN_SCOPED_EVENT("ComputeProgram::generate");
 
 		SpirvRoutine routine(pipelineLayout);
 		shader->emitProlog(&routine);
@@ -222,7 +222,7 @@
 		data.pushConstants = pushConstants;
 		data.constants = &sw::constants;
 
-		marl::WaitGroup wg;
+		yarn::WaitGroup wg;
 		const uint32_t batchCount = 16;
 
 		auto groupCount = groupCountX * groupCountY * groupCountZ;
@@ -230,7 +230,7 @@
 		for (uint32_t batchID = 0; batchID < batchCount && batchID < groupCount; batchID++)
 		{
 			wg.add(1);
-			marl::schedule([=, &data]
+			yarn::schedule([=, &data]
 			{
 				defer(wg.done());
 				std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
@@ -247,7 +247,7 @@
 					auto groupZ = baseGroupZ + groupOffsetZ;
 					auto groupY = baseGroupY + groupOffsetY;
 					auto groupX = baseGroupX + groupOffsetX;
-					MARL_SCOPED_EVENT("groupX: %d, groupY: %d, groupZ: %d", groupX, groupY, groupZ);
+					YARN_SCOPED_EVENT("groupX: %d, groupY: %d, groupZ: %d", groupX, groupY, groupZ);
 
 					using Coroutine = std::unique_ptr<rr::Stream<SpirvShader::YieldResult>>;
 					std::queue<Coroutine> coroutines;
diff --git a/src/Vulkan/BUILD.gn b/src/Vulkan/BUILD.gn
index 3e33cd7..673bf8d 100644
--- a/src/Vulkan/BUILD.gn
+++ b/src/Vulkan/BUILD.gn
@@ -148,13 +148,13 @@
   }
 
   deps = [
-    "../../third_party/marl:Marl",
     "${swiftshader_spirv_tools_dir}:SPIRV-Tools",
     "../Device",
     "../Pipeline",
     "../Reactor:swiftshader_llvm_reactor",
     "../System",
     "../WSI",
+    "../Yarn",
   ]
 
   include_dirs = [
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index c6cbcbc..34e17b0 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -1108,7 +1108,7 @@
 			// Everything else is deferred to the Renderer; we will treat those stages all as if they were
 			// `bottom of pipe`.
 			//
-			// FIXME(chrisforbes): once Marl is integrated, do this in a task so we don't have to stall here.
+			// FIXME(chrisforbes): once Yarn is integrated, do this in a task so we don't have to stall here.
 			executionState.renderer->synchronize();
 		}
 
diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp
index b817b31..4ef1ee5 100644
--- a/src/Vulkan/VkDevice.cpp
+++ b/src/Vulkan/VkDevice.cpp
@@ -57,7 +57,7 @@
 	cache.updateConstCache();
 }
 
-Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, marl::Scheduler *scheduler)
+Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, yarn::Scheduler *scheduler)
 	: physicalDevice(physicalDevice),
 	  queues(reinterpret_cast<Queue*>(mem)),
 	  enabledExtensionCount(pCreateInfo->enabledExtensionCount),
diff --git a/src/Vulkan/VkDevice.hpp b/src/Vulkan/VkDevice.hpp
index 83ce1e9..24ed2c2 100644
--- a/src/Vulkan/VkDevice.hpp
+++ b/src/Vulkan/VkDevice.hpp
@@ -21,7 +21,7 @@
 #include <memory>
 #include <mutex>
 
-namespace marl
+namespace yarn
 {
 	class Scheduler;
 }
@@ -42,7 +42,7 @@
 public:
 	static constexpr VkSystemAllocationScope GetAllocationScope() { return VK_SYSTEM_ALLOCATION_SCOPE_DEVICE; }
 
-	Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, marl::Scheduler *scheduler);
+	Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, yarn::Scheduler *scheduler);
 	void destroy(const VkAllocationCallbacks* pAllocator);
 
 	static size_t ComputeRequiredAllocationSize(const VkDeviceCreateInfo* pCreateInfo);
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index 86ea342..c5b72de 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -22,7 +22,7 @@
 #include "Pipeline/ComputeProgram.hpp"
 #include "Pipeline/SpirvShader.hpp"
 
-#include "marl/trace.h"
+#include "Yarn/Trace.hpp"
 
 #include "spirv-tools/optimizer.hpp"
 
@@ -259,7 +259,7 @@
 
 std::shared_ptr<sw::ComputeProgram> createProgram(const vk::PipelineCache::ComputeProgramKey& key)
 {
-	MARL_SCOPED_EVENT("createProgram");
+	YARN_SCOPED_EVENT("createProgram");
 
 	vk::DescriptorSet::Bindings descriptorSets;  // FIXME(b/129523279): Delay code generation until invoke time.
 	// TODO(b/119409619): use allocator.
diff --git a/src/Vulkan/VkQueue.cpp b/src/Vulkan/VkQueue.cpp
index 8e41bbb..329847d 100644
--- a/src/Vulkan/VkQueue.cpp
+++ b/src/Vulkan/VkQueue.cpp
@@ -19,10 +19,10 @@
 #include "WSI/VkSwapchainKHR.hpp"
 #include "Device/Renderer.hpp"
 
-#include "marl/defer.h"
-#include "marl/scheduler.h"
-#include "marl/thread.h"
-#include "marl/trace.h"
+#include "Yarn/Defer.hpp"
+#include "Yarn/Scheduler.hpp"
+#include "Yarn/Thread.hpp"
+#include "Yarn/Trace.hpp"
 
 #include <cstring>
 
@@ -79,7 +79,7 @@
 namespace vk
 {
 
-Queue::Queue(Device* device, marl::Scheduler *scheduler) : device(device)
+Queue::Queue(Device* device, yarn::Scheduler *scheduler) : device(device)
 {
 	queueThread = std::thread(&Queue::taskLoop, this, scheduler);
 }
@@ -160,9 +160,9 @@
 	}
 }
 
-void Queue::taskLoop(marl::Scheduler* scheduler)
+void Queue::taskLoop(yarn::Scheduler* scheduler)
 {
-	marl::Thread::setName("Queue<%p>", this);
+	yarn::Thread::setName("Queue<%p>", this);
 	scheduler->bind();
 	defer(scheduler->unbind());
 
diff --git a/src/Vulkan/VkQueue.hpp b/src/Vulkan/VkQueue.hpp
index 0bf3d00..c137d15 100644
--- a/src/Vulkan/VkQueue.hpp
+++ b/src/Vulkan/VkQueue.hpp
@@ -22,7 +22,7 @@
 
 #include "System/Synchronization.hpp"
 
-namespace marl
+namespace yarn
 {
 	class Scheduler;
 }
@@ -44,7 +44,7 @@
 	VK_LOADER_DATA loaderData = { ICD_LOADER_MAGIC };
 
 public:
-	Queue(Device* device, marl::Scheduler *scheduler);
+	Queue(Device* device, yarn::Scheduler *scheduler);
 	~Queue();
 
 	operator VkQueue()
@@ -69,7 +69,7 @@
 		Type type = SUBMIT_QUEUE;
 	};
 
-	void taskLoop(marl::Scheduler* scheduler);
+	void taskLoop(yarn::Scheduler* scheduler);
 	void garbageCollect();
 	void submitQueue(const Task& task);
 
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index c300382..aa4e68c 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -68,8 +68,8 @@
 
 #include "Reactor/Nucleus.hpp"
 
-#include "marl/scheduler.h"
-#include "marl/thread.h"
+#include "Yarn/Scheduler.hpp"
+#include "Yarn/Thread.hpp"
 
 #include "System/CPUID.hpp"
 
@@ -120,14 +120,14 @@
 	sw::CPUID::setEnableSSE(true);
 }
 
-marl::Scheduler* getOrCreateScheduler()
+yarn::Scheduler* getOrCreateScheduler()
 {
-	static auto scheduler = std::unique_ptr<marl::Scheduler>(new marl::Scheduler());
+	static auto scheduler = std::unique_ptr<yarn::Scheduler>(new yarn::Scheduler());
 	scheduler->setThreadInitializer([] {
 		sw::CPUID::setFlushToZero(true);
 		sw::CPUID::setDenormalsAreZero(true);
 	});
-	scheduler->setWorkerThreadCount(std::min<size_t>(marl::Thread::numLogicalCPUs(), 16));
+	scheduler->setWorkerThreadCount(std::min<size_t>(yarn::Thread::numLogicalCPUs(), 16));
 	return scheduler.get();
 }
 
diff --git a/src/WSI/BUILD.gn b/src/WSI/BUILD.gn
index 2ea3ad2..c13463e 100644
--- a/src/WSI/BUILD.gn
+++ b/src/WSI/BUILD.gn
@@ -45,7 +45,6 @@
   ]
 
   deps = [
-    "../../third_party/marl:Marl_headers",
     "../Vulkan:swiftshader_libvulkan_headers",
   ]
 }
diff --git a/src/Yarn/BUILD.gn b/src/Yarn/BUILD.gn
new file mode 100644
index 0000000..2718ad1
--- /dev/null
+++ b/src/Yarn/BUILD.gn
@@ -0,0 +1,110 @@
+# Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import("../swiftshader.gni")
+
+import("//testing/test.gni")
+
+config("yarn_config") {
+  include_dirs = [
+    "..",
+  ]
+}
+
+swiftshader_source_set("Yarn_headers") {
+  sources = [
+    "ConditionVariable.hpp",
+    "Containers.hpp",
+    "Debug.hpp",
+    "Defer.hpp",
+    "Finally.hpp",
+    "OSFiber.hpp",
+    "Pool.hpp",
+    "SAL.hpp",
+    "Scheduler.hpp",
+    "Thread.hpp",
+    "Ticket.hpp",
+    "Trace.hpp",
+    "Utils.hpp",
+    "WaitGroup.hpp",
+  ]
+
+  if (is_win) {
+    sources += [ "OSFiber_windows.hpp" ]
+  } else {
+    sources += [
+      "OSFiber_asm.hpp",
+      "OSFiber_asm_aarch64.h",
+      "OSFiber_asm_arm.h",
+      "OSFiber_asm_x64.h",
+      "OSFiber_asm_x86.h",
+    ]
+  }
+
+  public_configs = [
+    ":yarn_config",
+  ]
+}
+
+swiftshader_source_set("Yarn") {
+  sources = [
+    "Debug.cpp",
+    "Scheduler.cpp",
+    "Thread.cpp",
+    "Trace.cpp",
+  ]
+
+  if (!is_win) {
+    sources += [
+      "OSFiber_aarch64.c",
+      "OSFiber_arm.c",
+      "OSFiber_asm_aarch64.S",
+      "OSFiber_asm_arm.S",
+      "OSFiber_asm_x64.S",
+      "OSFiber_asm_x86.S",
+      "OSFiber_x64.c",
+      "OSFiber_x86.c",
+    ]
+  }
+
+  public_deps = [
+    ":Yarn_headers",
+  ]
+}
+
+test("yarn_unittests") {
+  sources = [
+    "ConditionVariable_test.cpp",
+    "Containers_test.cpp",
+    "Defer_test.cpp",
+    "OSFiber_test.cpp",
+    "Pool_test.cpp",
+    "Scheduler_test.cpp",
+    "Ticket_test.cpp",
+    "WaitGroup_test.cpp",
+    "Yarn_test.cpp",
+  ]
+  deps = [
+    ":Yarn",
+    "//testing/gtest",
+    "//testing/gmock",
+  ]
+
+  if (!swiftshader_standalone) {
+    deps += [
+      "//third_party/googletest:gmock",
+      "//third_party/googletest:gtest",
+    ]
+  }
+}
diff --git a/src/Yarn/BlockingCall.hpp b/src/Yarn/BlockingCall.hpp
new file mode 100644
index 0000000..161bba9
--- /dev/null
+++ b/src/Yarn/BlockingCall.hpp
@@ -0,0 +1,85 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "WaitGroup.hpp"
+
+#include <thread>
+#include <type_traits>
+
+namespace yarn {
+namespace detail {
+
+template <typename RETURN_TYPE>
+class OnNewThread
+{
+public:
+    template <typename F, typename ... Args>
+    inline static RETURN_TYPE call(F&& f, Args&& ... args)
+    {
+        RETURN_TYPE result;
+        WaitGroup wg(1);
+        auto thread = std::thread([&]
+        {
+            defer(wg.done());
+            result = f(args...);
+        });
+        wg.wait();
+        thread.join();
+        return result;
+    }
+};
+
+template <>
+class OnNewThread<void>
+{
+public:
+    template <typename F, typename ... Args>
+    inline static void call(F&& f, Args&& ... args)
+    {
+        WaitGroup wg(1);
+        auto thread = std::thread([&]
+        {
+            defer(wg.done());
+            f(args...);
+        });
+        wg.wait();
+        thread.join()
+    }
+};
+
+} // namespace detail
+
+// blocking_call() calls the function F on a new thread, yielding this fiber
+// to execute other tasks until F has returned.
+//
+// Example:
+//
+//  void runABlockingFunctionOnATask()
+//  {
+//      // Schedule a task that calls a blocking, non-yielding function.
+//      yarn::schedule([=] {
+//          // call_blocking_function() may block indefinitely.
+//          // Ensure this call does not block other tasks from running.
+//          auto result = yarn::blocking_call(call_blocking_function);
+//          // call_blocking_function() has now returned.
+//          // result holds the return value of the blocking function call.
+//      });
+//  }
+template <typename F, typename ... Args>
+auto inline blocking_call(F&& f, Args&& ... args) -> decltype(f(args...))
+{
+    return detail::OnNewThread<decltype(f(args...))>::call(std::forward<F>(f), std::forward<Args>(args)...);
+}
+
+} // namespace yarn
diff --git a/src/Yarn/ConditionVariable.hpp b/src/Yarn/ConditionVariable.hpp
new file mode 100644
index 0000000..7676c42
--- /dev/null
+++ b/src/Yarn/ConditionVariable.hpp
@@ -0,0 +1,119 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef yarn_condition_variable_hpp
+#define yarn_condition_variable_hpp
+
+#include "Containers.hpp"
+#include "Debug.hpp"
+#include "Scheduler.hpp"
+
+#include <atomic>
+#include <mutex>
+
+namespace yarn {
+
+// ConditionVariable is a synchronization primitive that can be used to block
+// one or more fibers or threads, until another fiber or thread modifies a
+// shared variable (the condition) and notifies the ConditionVariable.
+//
+// If the ConditionVariable is blocked on a thread with a Scheduler bound, the
+// thread will work on other tasks until the ConditionVariable is unblocked.
+class ConditionVariable
+{
+public:
+    // Notifies and potentially unblocks one waiting fiber or thread.
+    inline void notify_one();
+
+    // Notifies and potentially unblocks all waiting fibers and/or threads.
+    inline void notify_all();
+
+    // Blocks the current fiber or thread until the predicate is satisfied
+    // and the ConditionVariable is notified.
+    template <typename Predicate>
+    inline void wait(std::unique_lock<std::mutex>& lock, Predicate pred);
+
+private:
+    std::mutex mutex;
+    containers::vector<Scheduler::Fiber*, 4> waiting;
+    std::condition_variable condition;
+    std::atomic<int> numWaiting = { 0 };
+    std::atomic<int> numWaitingOnCondition = { 0 };
+};
+
+void ConditionVariable::notify_one()
+{
+    if (numWaiting == 0) { return; }
+    std::unique_lock<std::mutex> lock(mutex);
+    if (waiting.size() > 0)
+    {
+        auto fiber = waiting.back();
+        waiting.pop_back();
+        fiber->schedule();
+    }
+    lock.unlock();
+    if (numWaitingOnCondition > 0) { condition.notify_one(); }
+}
+
+void ConditionVariable::notify_all()
+{
+    if (numWaiting == 0) { return; }
+    std::unique_lock<std::mutex> lock(mutex);
+    while (waiting.size() > 0)
+    {
+        auto fiber = waiting.back();
+        waiting.pop_back();
+        fiber->schedule();
+    }
+    lock.unlock();
+    if (numWaitingOnCondition > 0) { condition.notify_all(); }
+}
+
+template <typename Predicate>
+void ConditionVariable::wait(std::unique_lock<std::mutex>& dataLock, Predicate pred)
+{
+    if (pred())
+    {
+        return;
+    }
+    numWaiting++;
+    if (auto fiber = Scheduler::Fiber::current())
+    {
+        // Currently executing on a scheduler fiber.
+        // Yield to let other tasks run that can unblock this fiber.
+        while (!pred())
+        {
+            mutex.lock();
+            waiting.push_back(fiber);
+            mutex.unlock();
+
+            dataLock.unlock();
+            fiber->yield();
+            dataLock.lock();
+        }
+    }
+    else
+    {
+        // Currently running outside of the scheduler.
+        // Delegate to the std::condition_variable.
+        numWaitingOnCondition++;
+        condition.wait(dataLock, pred);
+        numWaitingOnCondition--;
+    }
+    numWaiting--;
+}
+
+} // namespace yarn
+
+#endif // yarn_condition_variable_hpp
diff --git a/src/Yarn/ConditionVariable_test.cpp b/src/Yarn/ConditionVariable_test.cpp
new file mode 100644
index 0000000..a7bc327
--- /dev/null
+++ b/src/Yarn/ConditionVariable_test.cpp
@@ -0,0 +1,96 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ConditionVariable.hpp"
+
+#include "Yarn_test.hpp"
+
+TEST(WithoutBoundScheduler, ConditionVariable)
+{
+    bool trigger[3] = {false, false, false};
+    bool signal[3] = {false, false, false};
+    std::mutex mutex;
+    yarn::ConditionVariable cv;
+
+    std::thread thread([&]
+    {
+        for (int i = 0; i < 3; i++)
+        {
+            std::unique_lock<std::mutex> lock(mutex);
+            cv.wait(lock, [&] { return trigger[i]; });
+            signal[i] = true;
+            cv.notify_one();
+        }
+    });
+
+    ASSERT_FALSE(signal[0]);
+    ASSERT_FALSE(signal[1]);
+    ASSERT_FALSE(signal[2]);
+
+    for (int i = 0; i < 3; i++)
+    {
+        {
+            std::unique_lock<std::mutex> lock(mutex);
+            trigger[i] = true;
+            cv.notify_one();
+            cv.wait(lock, [&] { return signal[i]; });
+        }
+
+        ASSERT_EQ(signal[0], 0 <= i);
+        ASSERT_EQ(signal[1], 1 <= i);
+        ASSERT_EQ(signal[2], 2 <= i);
+    }
+
+    thread.join();
+}
+
+
+TEST_P(WithBoundScheduler, ConditionVariable)
+{
+    bool trigger[3] = {false, false, false};
+    bool signal[3] = {false, false, false};
+    std::mutex mutex;
+    yarn::ConditionVariable cv;
+
+    std::thread thread([&]
+    {
+        for (int i = 0; i < 3; i++)
+        {
+            std::unique_lock<std::mutex> lock(mutex);
+            cv.wait(lock, [&] { return trigger[i]; });
+            signal[i] = true;
+            cv.notify_one();
+        }
+    });
+
+    ASSERT_FALSE(signal[0]);
+    ASSERT_FALSE(signal[1]);
+    ASSERT_FALSE(signal[2]);
+
+    for (int i = 0; i < 3; i++)
+    {
+        {
+            std::unique_lock<std::mutex> lock(mutex);
+            trigger[i] = true;
+            cv.notify_one();
+            cv.wait(lock, [&] { return signal[i]; });
+        }
+
+        ASSERT_EQ(signal[0], 0 <= i);
+        ASSERT_EQ(signal[1], 1 <= i);
+        ASSERT_EQ(signal[2], 2 <= i);
+    }
+
+    thread.join();
+}
diff --git a/src/Yarn/Containers.hpp b/src/Yarn/Containers.hpp
new file mode 100644
index 0000000..b8b66a8
--- /dev/null
+++ b/src/Yarn/Containers.hpp
@@ -0,0 +1,255 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The yarn::containers namespace holds STL-like container implementations that
+// are optimized for avoiding heap allocations.
+// Note that unlike many other types in yarn, these containers offer no
+// protection for concurrent access and are considered 'thread-unsafe'.
+
+#ifndef yarn_containers_hpp
+#define yarn_containers_hpp
+
+#include "Debug.hpp"
+
+#include <algorithm> // std::max
+#include <type_traits> // std::aligned_storage
+#include <utility> // std::move
+
+#include <stddef.h> // size_t
+
+namespace yarn {
+namespace containers {
+
+////////////////////////////////////////////////////////////////////////////////
+// vector<T, BASE_CAPACITY>
+////////////////////////////////////////////////////////////////////////////////
+
+// vector is a container of contiguously stored elements.
+// Unlike std::vector, yarn::containers::vector keeps the first BASE_CAPACITY
+// elements internally, which will avoid dynamic heap allocations.
+// Once the vector exceeds BASE_CAPACITY elements, vector will allocate storage
+// from the heap.
+template <typename T, int BASE_CAPACITY>
+class vector
+{
+public:
+    inline vector() = default;
+
+    template <int BASE_CAPACITY_2>
+    inline vector(const vector<T, BASE_CAPACITY_2>& other);
+
+    template <int BASE_CAPACITY_2>
+    inline vector(vector<T, BASE_CAPACITY_2>&& other);
+
+    inline ~vector();
+
+    template <int BASE_CAPACITY_2>
+    inline vector<T, BASE_CAPACITY>& operator = (const vector<T, BASE_CAPACITY_2>&);
+
+    template <int BASE_CAPACITY_2>
+    inline vector<T, BASE_CAPACITY>& operator = (vector<T, BASE_CAPACITY_2>&&);
+
+    inline void push_back(const T& el);
+    inline void emplace_back(T&& el);
+    inline void pop_back();
+    inline T& front();
+    inline T& back();
+    inline T* begin();
+    inline T* end();
+    inline T& operator[] (size_t i);
+    inline const T& operator[] (size_t i) const;
+    inline size_t size() const;
+    inline size_t cap() const;
+    inline void resize(size_t n);
+    inline void reserve(size_t n);
+
+private:
+    using TStorage = typename std::aligned_storage<sizeof(T), alignof(T)>::type;
+
+    inline void free();
+
+    size_t count = 0;
+    size_t capacity = BASE_CAPACITY;
+    TStorage buffer[BASE_CAPACITY];
+    TStorage* elements = buffer;
+};
+
+template <typename T, int BASE_CAPACITY>
+template <int BASE_CAPACITY_2>
+vector<T, BASE_CAPACITY>::vector(const vector<T, BASE_CAPACITY_2>& other)
+{
+    *this = other;
+}
+
+template <typename T, int BASE_CAPACITY>
+template <int BASE_CAPACITY_2>
+vector<T, BASE_CAPACITY>::vector(vector<T, BASE_CAPACITY_2>&& other)
+{
+    *this = std::move(other);
+}
+
+template <typename T, int BASE_CAPACITY>
+vector<T, BASE_CAPACITY>::~vector()
+{
+    free();
+}
+
+template <typename T, int BASE_CAPACITY>
+template <int BASE_CAPACITY_2>
+vector<T, BASE_CAPACITY>& vector<T, BASE_CAPACITY>::operator = (const vector<T, BASE_CAPACITY_2>& other)
+{
+    free();
+    reserve(other.size());
+    count = other.size();
+    for (size_t i = 0; i < count; i++)
+    {
+        new (&reinterpret_cast<T*>(elements)[i]) T(other[i]);
+    }
+    return *this;
+}
+
+template <typename T, int BASE_CAPACITY>
+template <int BASE_CAPACITY_2>
+vector<T, BASE_CAPACITY>& vector<T, BASE_CAPACITY>::operator = (vector<T, BASE_CAPACITY_2>&& other)
+{
+    free();
+    reserve(other.size());
+    count = other.size();
+    for (size_t i = 0; i < count; i++)
+    {
+        new (&reinterpret_cast<T*>(elements)[i]) T(std::move(other[i]));
+    }
+    other.resize(0);
+    return *this;
+}
+
+template <typename T, int BASE_CAPACITY>
+void vector<T, BASE_CAPACITY>::push_back(const T& el)
+{
+    reserve(count + 1);
+    new (&reinterpret_cast<T*>(elements)[count]) T(el);
+    count++;
+}
+
+template <typename T, int BASE_CAPACITY>
+void vector<T, BASE_CAPACITY>::emplace_back(T&& el)
+{
+    reserve(count + 1);
+    new (&reinterpret_cast<T*>(elements)[count]) T(std::move(el));
+    count++;
+}
+
+template <typename T, int BASE_CAPACITY>
+void vector<T, BASE_CAPACITY>::pop_back()
+{
+    YARN_ASSERT(count > 0, "pop_back() called on empty vector");
+    count--;
+    reinterpret_cast<T*>(elements)[count].~T();
+}
+
+template <typename T, int BASE_CAPACITY>
+T& vector<T, BASE_CAPACITY>::front()
+{
+    YARN_ASSERT(count > 0, "front() called on empty vector");
+    return reinterpret_cast<T*>(elements)[0];
+}
+
+template <typename T, int BASE_CAPACITY>
+T& vector<T, BASE_CAPACITY>::back()
+{
+    YARN_ASSERT(count > 0, "back() called on empty vector");
+    return reinterpret_cast<T*>(elements)[count - 1];
+}
+
+template <typename T, int BASE_CAPACITY>
+T* vector<T, BASE_CAPACITY>::begin()
+{
+    return reinterpret_cast<T*>(elements);
+}
+
+template <typename T, int BASE_CAPACITY>
+T* vector<T, BASE_CAPACITY>::end()
+{
+    return reinterpret_cast<T*>(elements) + count;
+}
+
+template <typename T, int BASE_CAPACITY>
+T& vector<T, BASE_CAPACITY>::operator[] (size_t i)
+{
+    YARN_ASSERT(i < count, "index %d exceeds vector size %d", int(i), int(count));
+    return reinterpret_cast<T*>(elements)[i];
+}
+
+template <typename T, int BASE_CAPACITY>
+const T& vector<T, BASE_CAPACITY>::operator[] (size_t i) const
+{
+    YARN_ASSERT(i < count, "index %d exceeds vector size %d", int(i), int(count));
+    return reinterpret_cast<T*>(elements)[i];
+}
+
+template <typename T, int BASE_CAPACITY>
+size_t vector<T, BASE_CAPACITY>::size() const
+{
+    return count;
+}
+
+template <typename T, int BASE_CAPACITY>
+void vector<T, BASE_CAPACITY>::resize(size_t n)
+{
+    reserve(n);
+    while (count < n)
+    {
+        new (&reinterpret_cast<T*>(elements)[count++]) T();
+    }
+    while (n < count)
+    {
+        reinterpret_cast<T*>(elements)[--count].~T();
+    }
+}
+
+template <typename T, int BASE_CAPACITY>
+void vector<T, BASE_CAPACITY>::reserve(size_t n)
+{
+    if (n > capacity)
+    {
+        capacity = std::max<size_t>(n * 2, 8);
+        auto grown = new TStorage[capacity];
+        for (size_t i = 0; i < count; i++)
+        {
+            new (&reinterpret_cast<T*>(grown)[i]) T(std::move(reinterpret_cast<T*>(elements)[i]));
+        }
+        free();
+        elements = grown;
+    }
+}
+
+template <typename T, int BASE_CAPACITY>
+void vector<T, BASE_CAPACITY>::free()
+{
+    for (size_t i = 0; i < count; i++)
+    {
+        reinterpret_cast<T*>(elements)[i].~T();
+    }
+
+    if (elements != buffer)
+    {
+        delete []elements;
+        elements = nullptr;
+    }
+}
+
+} // namespace containers
+} // namespace yarn
+
+#endif  // yarn_containers_hpp
diff --git a/src/Yarn/Containers_test.cpp b/src/Yarn/Containers_test.cpp
new file mode 100644
index 0000000..d677606
--- /dev/null
+++ b/src/Yarn/Containers_test.cpp
@@ -0,0 +1,201 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Containers.hpp"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <string>
+
+class ContainersVectorTest : public testing::Test {};
+
+TEST(ContainersVectorTest, Empty)
+{
+    yarn::containers::vector<std::string, 4> vector;
+    ASSERT_EQ(vector.size(), size_t(0));
+}
+
+TEST(ContainersVectorTest, WithinFixedCapIndex)
+{
+    yarn::containers::vector<std::string, 4> vector;
+    vector.resize(4);
+    vector[0] = "A";
+    vector[1] = "B";
+    vector[2] = "C";
+    vector[3] = "D";
+
+    ASSERT_EQ(vector[0], "A");
+    ASSERT_EQ(vector[1], "B");
+    ASSERT_EQ(vector[2], "C");
+    ASSERT_EQ(vector[3], "D");
+}
+
+TEST(ContainersVectorTest, BeyondFixedCapIndex)
+{
+    yarn::containers::vector<std::string, 1> vector;
+    vector.resize(4);
+    vector[0] = "A";
+    vector[1] = "B";
+    vector[2] = "C";
+    vector[3] = "D";
+
+    ASSERT_EQ(vector[0], "A");
+    ASSERT_EQ(vector[1], "B");
+    ASSERT_EQ(vector[2], "C");
+    ASSERT_EQ(vector[3], "D");
+}
+
+TEST(ContainersVectorTest, WithinFixedCapPushPop)
+{
+    yarn::containers::vector<std::string, 4> vector;
+    vector.push_back("A");
+    vector.push_back("B");
+    vector.push_back("C");
+    vector.push_back("D");
+
+    ASSERT_EQ(vector.size(), size_t(4));
+    ASSERT_EQ(vector.end() - vector.begin(), ptrdiff_t(4));
+
+    ASSERT_EQ(vector.front(), "A");
+    ASSERT_EQ(vector.back(), "D");
+    vector.pop_back();
+    ASSERT_EQ(vector.size(), size_t(3));
+    ASSERT_EQ(vector.end() - vector.begin(), ptrdiff_t(3));
+
+    ASSERT_EQ(vector.front(), "A");
+    ASSERT_EQ(vector.back(), "C");
+    vector.pop_back();
+    ASSERT_EQ(vector.size(), size_t(2));
+    ASSERT_EQ(vector.end() - vector.begin(), ptrdiff_t(2));
+
+    ASSERT_EQ(vector.front(), "A");
+    ASSERT_EQ(vector.back(), "B");
+    vector.pop_back();
+    ASSERT_EQ(vector.size(), size_t(1));
+    ASSERT_EQ(vector.end() - vector.begin(), ptrdiff_t(1));
+
+    ASSERT_EQ(vector.front(), "A");
+    ASSERT_EQ(vector.back(), "A");
+    vector.pop_back();
+    ASSERT_EQ(vector.size(), size_t(0));
+}
+
+TEST(ContainersVectorTest, BeyondFixedCapPushPop)
+{
+    yarn::containers::vector<std::string, 2> vector;
+    vector.push_back("A");
+    vector.push_back("B");
+    vector.push_back("C");
+    vector.push_back("D");
+
+    ASSERT_EQ(vector.size(), size_t(4));
+    ASSERT_EQ(vector.end() - vector.begin(), ptrdiff_t(4));
+
+    ASSERT_EQ(vector.front(), "A");
+    ASSERT_EQ(vector.back(), "D");
+    vector.pop_back();
+    ASSERT_EQ(vector.size(), size_t(3));
+    ASSERT_EQ(vector.end() - vector.begin(), ptrdiff_t(3));
+
+    ASSERT_EQ(vector.front(), "A");
+    ASSERT_EQ(vector.back(), "C");
+    vector.pop_back();
+    ASSERT_EQ(vector.size(), size_t(2));
+    ASSERT_EQ(vector.end() - vector.begin(), ptrdiff_t(2));
+
+    ASSERT_EQ(vector.front(), "A");
+    ASSERT_EQ(vector.back(), "B");
+    vector.pop_back();
+    ASSERT_EQ(vector.size(), size_t(1));
+    ASSERT_EQ(vector.end() - vector.begin(), ptrdiff_t(1));
+
+    ASSERT_EQ(vector.front(), "A");
+    ASSERT_EQ(vector.back(), "A");
+    vector.pop_back();
+    ASSERT_EQ(vector.size(), size_t(0));
+}
+
+TEST(ContainersVectorTest, CopyConstruct)
+{
+    yarn::containers::vector<std::string, 4> vectorA;
+
+    vectorA.resize(3);
+    vectorA[0] = "A";
+    vectorA[1] = "B";
+    vectorA[2] = "C";
+
+    yarn::containers::vector<std::string, 2> vectorB(vectorA);
+    ASSERT_EQ(vectorB.size(), size_t(3));
+    ASSERT_EQ(vectorB[0], "A");
+    ASSERT_EQ(vectorB[1], "B");
+    ASSERT_EQ(vectorB[2], "C");
+}
+
+TEST(ContainersVectorTest, MoveConstruct)
+{
+    yarn::containers::vector<std::string, 4> vectorA;
+
+    vectorA.resize(3);
+    vectorA[0] = "A";
+    vectorA[1] = "B";
+    vectorA[2] = "C";
+
+    yarn::containers::vector<std::string, 2> vectorB(std::move(vectorA));
+    ASSERT_EQ(vectorB.size(), size_t(3));
+    ASSERT_EQ(vectorB[0], "A");
+    ASSERT_EQ(vectorB[1], "B");
+    ASSERT_EQ(vectorB[2], "C");
+}
+
+TEST(ContainersVectorTest, Copy)
+{
+    yarn::containers::vector<std::string, 4> vectorA;
+    yarn::containers::vector<std::string, 2> vectorB;
+
+    vectorA.resize(3);
+    vectorA[0] = "A";
+    vectorA[1] = "B";
+    vectorA[2] = "C";
+
+    vectorB.resize(1);
+    vectorB[0] = "Z";
+
+    vectorB = vectorA;
+    ASSERT_EQ(vectorB.size(), size_t(3));
+    ASSERT_EQ(vectorB[0], "A");
+    ASSERT_EQ(vectorB[1], "B");
+    ASSERT_EQ(vectorB[2], "C");
+}
+
+TEST(ContainersVectorTest, Move)
+{
+    yarn::containers::vector<std::string, 4> vectorA;
+    yarn::containers::vector<std::string, 2> vectorB;
+
+    vectorA.resize(3);
+    vectorA[0] = "A";
+    vectorA[1] = "B";
+    vectorA[2] = "C";
+
+    vectorB.resize(1);
+    vectorB[0] = "Z";
+
+    vectorB = std::move(vectorA);
+    ASSERT_EQ(vectorA.size(), size_t(0));
+    ASSERT_EQ(vectorB.size(), size_t(3));
+    ASSERT_EQ(vectorB[0], "A");
+    ASSERT_EQ(vectorB[1], "B");
+    ASSERT_EQ(vectorB[2], "C");
+}
diff --git a/src/Yarn/Debug.cpp b/src/Yarn/Debug.cpp
new file mode 100644
index 0000000..ead7c16
--- /dev/null
+++ b/src/Yarn/Debug.cpp
@@ -0,0 +1,41 @@
+// Copyright 2019 The yarniftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Debug.hpp"
+
+#include "Scheduler.hpp"
+
+#include <cstdlib>
+
+#include <stdarg.h>
+#include <stdio.h>
+
+namespace yarn
+{
+
+void fatal(const char* msg, ...)
+{
+    va_list vararg;
+    va_start(vararg, msg);
+    vfprintf(stderr, msg, vararg);
+    va_end(vararg);
+    abort();
+}
+
+void assert_has_bound_scheduler(const char* feature)
+{
+    YARN_ASSERT(Scheduler::get() != nullptr, "%s requires a yarn::Scheduler to be bound", feature);
+}
+
+}  // namespace yarn
diff --git a/src/Yarn/Debug.hpp b/src/Yarn/Debug.hpp
new file mode 100644
index 0000000..76945b0
--- /dev/null
+++ b/src/Yarn/Debug.hpp
@@ -0,0 +1,45 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef yarn_debug_hpp
+#define yarn_debug_hpp
+
+#if !defined(YARN_DEBUG_ENABLED)
+#    if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
+#        define YARN_DEBUG_ENABLED 1
+#    else
+#        define YARN_DEBUG_ENABLED 0
+#    endif
+#endif
+
+namespace yarn {
+
+void fatal(const char* msg, ...);
+void assert_has_bound_scheduler(const char* feature);
+
+#if YARN_DEBUG_ENABLED
+#define YARN_FATAL(msg, ...) yarn::fatal(msg "\n", ##__VA_ARGS__);
+#define YARN_ASSERT(cond, msg, ...) do { if (!(cond)) { YARN_FATAL("ASSERT: " msg, ##__VA_ARGS__); } } while (false);
+#define YARN_ASSERT_HAS_BOUND_SCHEDULER(feature) assert_has_bound_scheduler(feature);
+#define YARN_UNREACHABLE() YARN_FATAL("UNREACHABLE");
+#else
+#define YARN_FATAL(msg, ...)
+#define YARN_ASSERT(cond, msg, ...)
+#define YARN_ASSERT_HAS_BOUND_SCHEDULER(feature)
+#define YARN_UNREACHABLE()
+#endif
+
+} // namespace yarn
+
+#endif  // yarn_debug_hpp
diff --git a/src/Yarn/Defer.hpp b/src/Yarn/Defer.hpp
new file mode 100644
index 0000000..565dfa1
--- /dev/null
+++ b/src/Yarn/Defer.hpp
@@ -0,0 +1,44 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef yarn_defer_hpp
+#define yarn_defer_hpp
+
+#include "Finally.hpp"
+
+namespace yarn {
+
+#define YARN_CONCAT_(a, b) a ## b
+#define YARN_CONCAT(a, b) YARN_CONCAT_(a,b)
+
+// defer() is a macro to defer execution of a statement until the surrounding
+// scope is closed and is typically used to perform cleanup logic once a
+// function returns.
+//
+// Note: Unlike golang's defer(), the defer statement is executed when the
+// surrounding *scope* is closed, not necessarily the function.
+//
+// Example usage:
+//
+//  void sayHelloWorld()
+//  {
+//      defer(printf("world\n"));
+//      printf("hello ");
+//  }
+//
+#define defer(x) auto YARN_CONCAT(defer_, __LINE__) = yarn::make_finally([&]{ x; })
+
+} // namespace yarn
+
+#endif  // yarn_defer_hpp
diff --git a/src/Yarn/Defer_test.cpp b/src/Yarn/Defer_test.cpp
new file mode 100644
index 0000000..479022f
--- /dev/null
+++ b/src/Yarn/Defer_test.cpp
@@ -0,0 +1,40 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Defer.hpp"
+
+#include "Yarn_test.hpp"
+
+TEST(WithoutBoundScheduler, Defer)
+{
+    bool deferCalled = false;
+    {
+        defer(deferCalled = true);
+    }
+    ASSERT_TRUE(deferCalled);
+}
+
+TEST(WithoutBoundScheduler, DeferOrder)
+{
+    int counter = 0;
+    int a = 0, b = 0, c = 0;
+    {
+        defer(a = ++counter);
+        defer(b = ++counter);
+        defer(c = ++counter);
+    }
+    ASSERT_EQ(a, 3);
+    ASSERT_EQ(b, 2);
+    ASSERT_EQ(c, 1);
+}
\ No newline at end of file
diff --git a/src/Yarn/Finally.hpp b/src/Yarn/Finally.hpp
new file mode 100644
index 0000000..702c87b
--- /dev/null
+++ b/src/Yarn/Finally.hpp
@@ -0,0 +1,89 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Finally can be used to execute a lambda or function when the final reference
+// to the Finally is dropped.
+//
+// The purpose of a finally is to perform cleanup or termination logic and is
+// especially useful when there are multiple early returns within a function.
+//
+// A moveable Finally can be constructed with yarn::make_finally().
+// A sharable Finally can be constructed with yarn::make_shared_finally().
+
+#ifndef yarn_finally_hpp
+#define yarn_finally_hpp
+
+#include <functional>
+#include <memory>
+
+namespace yarn {
+
+// Finally is a pure virtual base class, implemented by the templated
+// FinallyImpl.
+class Finally
+{
+public:
+    virtual ~Finally() = default;
+};
+
+// FinallyImpl implements a Finally.
+// The template parameter F is the function type to be called when the finally
+// is destructed. F must have the signature void().
+template <typename F>
+class FinallyImpl : public Finally
+{
+public:
+    inline FinallyImpl(const F& func);
+    inline FinallyImpl(F&& func);
+    inline FinallyImpl(FinallyImpl<F>&& other);
+    inline ~FinallyImpl();
+
+private:
+    FinallyImpl(const FinallyImpl<F>& other) = delete;
+    FinallyImpl<F>& operator = (const FinallyImpl<F>& other) = delete;
+    FinallyImpl<F>& operator = (FinallyImpl<F>&&) = delete;
+    F func;
+    bool valid = true;
+};
+
+template<typename F>
+FinallyImpl<F>::FinallyImpl(const F& func) : func(func) {}
+
+template<typename F>
+FinallyImpl<F>::FinallyImpl(F&& func) : func(std::move(func)) {}
+
+template<typename F>
+FinallyImpl<F>::FinallyImpl(FinallyImpl<F>&& other) : func(std::move(other.func))
+{
+    other.valid = false;
+}
+
+template<typename F>
+FinallyImpl<F>::~FinallyImpl()
+{
+    if (valid)
+    {
+        func();
+    }
+}
+
+template<typename F>
+inline FinallyImpl<F> make_finally(F&& f) { return FinallyImpl<F>(std::move(f)); }
+
+template<typename F>
+inline std::shared_ptr<Finally> make_shared_finally(F&& f) { return std::make_shared<FinallyImpl<F>>(std::move(f)); }
+
+} // namespace yarn
+
+#endif  // yarn_finally_hpp
diff --git a/src/Yarn/OSFiber.hpp b/src/Yarn/OSFiber.hpp
new file mode 100644
index 0000000..d52de3e
--- /dev/null
+++ b/src/Yarn/OSFiber.hpp
@@ -0,0 +1,21 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(_WIN32)
+#include "OSFiber_windows.hpp"
+#elif defined(YARN_FIBERS_USE_UCONTEXT)
+#include "OSFiber_ucontext.hpp"
+#else
+#include "OSFiber_asm.hpp"
+#endif
diff --git a/src/Yarn/OSFiber_aarch64.c b/src/Yarn/OSFiber_aarch64.c
new file mode 100644
index 0000000..7b09c58
--- /dev/null
+++ b/src/Yarn/OSFiber_aarch64.c
@@ -0,0 +1,33 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__aarch64__)
+
+#include "OSFiber_asm_aarch64.h"
+
+void yarn_fiber_trampoline(void(*target)(void*), void* arg)
+{
+    target(arg);
+}
+
+void yarn_fiber_set_target(struct yarn_fiber_context* ctx, void* stack, uint32_t stack_size, void(*target)(void*), void* arg)
+{
+    uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size);
+    ctx->LR = (uintptr_t)&yarn_fiber_trampoline;
+    ctx->r0 = (uintptr_t)target;
+    ctx->r1 = (uintptr_t)arg;
+    ctx->SP = ((uintptr_t)stack_top) & ~(uintptr_t)15;
+}
+
+#endif // defined(__aarch64__)
diff --git a/src/Yarn/OSFiber_arm.c b/src/Yarn/OSFiber_arm.c
new file mode 100644
index 0000000..4a6b78e
--- /dev/null
+++ b/src/Yarn/OSFiber_arm.c
@@ -0,0 +1,33 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__arm__)
+
+#include "OSFiber_asm_arm.h"
+
+void yarn_fiber_trampoline(void(*target)(void*), void* arg)
+{
+    target(arg);
+}
+
+void yarn_fiber_set_target(struct yarn_fiber_context* ctx, void* stack, uint32_t stack_size, void(*target)(void*), void* arg)
+{
+    uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size);
+    ctx->LR = (uintptr_t)&yarn_fiber_trampoline;
+    ctx->r0 = (uintptr_t)target;
+    ctx->r1 = (uintptr_t)arg;
+    ctx->SP = ((uintptr_t)stack_top) & ~(uintptr_t)15;
+}
+
+#endif // defined(__arm__)
diff --git a/src/Yarn/OSFiber_asm.hpp b/src/Yarn/OSFiber_asm.hpp
new file mode 100644
index 0000000..e0fa508
--- /dev/null
+++ b/src/Yarn/OSFiber_asm.hpp
@@ -0,0 +1,103 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Minimal assembly implementations of fiber context switching for Unix-based
+// platforms.
+//
+// Note: Unlike makecontext, swapcontext or the Windows fiber APIs, these
+// assembly implementations *do not* save or restore signal masks,
+// floating-point control or status registers, FS and GS segment registers,
+// thread-local storage state nor any SIMD registers. This should not be a
+// problem as the yarn scheduler requires fibers to be executed on a single
+// thread.
+
+#if defined(__x86_64__)
+#include "OSFiber_asm_x64.h"
+#elif defined(__i386__)
+#include "OSFiber_asm_x86.h"
+#elif defined(__aarch64__)
+#include "OSFiber_asm_aarch64.h"
+#elif defined(__arm__)
+#include "OSFiber_asm_arm.h"
+#else
+#error "Unsupported target"
+#endif
+
+#include <functional>
+#include <memory>
+
+extern "C"
+{
+
+extern void yarn_fiber_set_target(yarn_fiber_context*, void* stack, uint32_t stack_size, void(*target)(void*), void* arg);
+extern void yarn_fiber_swap(yarn_fiber_context* from, const yarn_fiber_context* to);
+
+} // extern "C"
+
+namespace yarn {
+
+class OSFiber
+{
+public:
+    // createFiberFromCurrentThread() returns a fiber created from the current
+    // thread.
+    static inline OSFiber* createFiberFromCurrentThread();
+
+    // createFiber() returns a new fiber with the given stack size that will
+    // call func when switched to. func() must end by switching back to another
+    // fiber, and must not return.
+    static inline OSFiber* createFiber(size_t stackSize, const std::function<void()>& func);
+
+    // switchTo() immediately switches execution to the given fiber.
+    // switchTo() must be called on the currently executing fiber.
+    inline void switchTo(OSFiber*);
+
+private:
+    static inline void run(OSFiber* self);
+
+    yarn_fiber_context context;
+    std::function<void()> target;
+    std::unique_ptr<uint8_t[]> stack;
+};
+
+OSFiber* OSFiber::createFiberFromCurrentThread()
+{
+    auto out = new OSFiber();
+    out->context = {};
+    return out;
+}
+
+OSFiber* OSFiber::createFiber(size_t stackSize, const std::function<void()>& func)
+{
+    auto out = new OSFiber();
+    out->context = {};
+    out->target = func;
+    out->stack = std::unique_ptr<uint8_t[]>(new uint8_t[stackSize]);
+    yarn_fiber_set_target(&out->context, out->stack.get(), stackSize, reinterpret_cast<void (*)(void*)>(&OSFiber::run), out);
+    return out;
+}
+
+void OSFiber::run(OSFiber* self)
+{
+    std::function<void()> func;
+    std::swap(func, self->target);
+    func();
+}
+
+void OSFiber::switchTo(OSFiber* fiber)
+{
+    yarn_fiber_swap(&context, &fiber->context);
+}
+
+}  // namespace yarn
diff --git a/src/Yarn/OSFiber_asm_aarch64.S b/src/Yarn/OSFiber_asm_aarch64.S
new file mode 100644
index 0000000..6a15cc7
--- /dev/null
+++ b/src/Yarn/OSFiber_asm_aarch64.S
@@ -0,0 +1,102 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__aarch64__)
+
+#define YARN_BUILD_ASM 1
+#include "OSFiber_asm_aarch64.h"
+
+// void yarn_fiber_swap(yarn_fiber_context* from, const yarn_fiber_context* to)
+// x0: from
+// x1: to
+.text
+.global YARN_ASM_SYMBOL(yarn_fiber_swap)
+.align 4
+YARN_ASM_SYMBOL(yarn_fiber_swap):
+
+    // Save context 'from'
+    // TODO: pairs of str can be combined with stp.
+
+    // Store special purpose registers
+    str x16, [x0, #YARN_REG_r16]
+    str x17, [x0, #YARN_REG_r17]
+    str x18, [x0, #YARN_REG_r18]
+
+    // Store callee-preserved registers
+    str x19, [x0, #YARN_REG_r19]
+    str x20, [x0, #YARN_REG_r20]
+    str x21, [x0, #YARN_REG_r21]
+    str x22, [x0, #YARN_REG_r22]
+    str x23, [x0, #YARN_REG_r23]
+    str x24, [x0, #YARN_REG_r24]
+    str x25, [x0, #YARN_REG_r25]
+    str x26, [x0, #YARN_REG_r26]
+    str x27, [x0, #YARN_REG_r27]
+    str x28, [x0, #YARN_REG_r28]
+
+    str d8,  [x0, #YARN_REG_v8]
+    str d9,  [x0, #YARN_REG_v9]
+    str d10, [x0, #YARN_REG_v10]
+    str d11, [x0, #YARN_REG_v11]
+    str d12, [x0, #YARN_REG_v12]
+    str d13, [x0, #YARN_REG_v13]
+    str d14, [x0, #YARN_REG_v14]
+    str d15, [x0, #YARN_REG_v15]
+
+    // Store sp and lr
+    mov x2, sp
+    str x2,  [x0, #YARN_REG_SP]
+    str x30, [x0, #YARN_REG_LR]
+
+    // Load context 'to'
+    mov x7, x1
+
+    // Load special purpose registers
+    ldr x16, [x7, #YARN_REG_r16]
+    ldr x17, [x7, #YARN_REG_r17]
+    ldr x18, [x7, #YARN_REG_r18]
+
+    // Load callee-preserved registers
+    ldr x19, [x7, #YARN_REG_r19]
+    ldr x20, [x7, #YARN_REG_r20]
+    ldr x21, [x7, #YARN_REG_r21]
+    ldr x22, [x7, #YARN_REG_r22]
+    ldr x23, [x7, #YARN_REG_r23]
+    ldr x24, [x7, #YARN_REG_r24]
+    ldr x25, [x7, #YARN_REG_r25]
+    ldr x26, [x7, #YARN_REG_r26]
+    ldr x27, [x7, #YARN_REG_r27]
+    ldr x28, [x7, #YARN_REG_r28]
+
+    ldr d8,  [x7, #YARN_REG_v8]
+    ldr d9,  [x7, #YARN_REG_v9]
+    ldr d10, [x7, #YARN_REG_v10]
+    ldr d11, [x7, #YARN_REG_v11]
+    ldr d12, [x7, #YARN_REG_v12]
+    ldr d13, [x7, #YARN_REG_v13]
+    ldr d14, [x7, #YARN_REG_v14]
+    ldr d15, [x7, #YARN_REG_v15]
+
+    // Load parameter registers
+    ldr x0, [x7, #YARN_REG_r0]
+    ldr x1, [x7, #YARN_REG_r1]
+
+    // Load sp and lr
+    ldr x30, [x7, #YARN_REG_LR]
+    ldr x2,  [x7, #YARN_REG_SP]
+    mov sp, x2
+
+    ret
+
+#endif // defined(__aarch64__)
diff --git a/src/Yarn/OSFiber_asm_aarch64.h b/src/Yarn/OSFiber_asm_aarch64.h
new file mode 100644
index 0000000..fa3456c
--- /dev/null
+++ b/src/Yarn/OSFiber_asm_aarch64.h
@@ -0,0 +1,118 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define YARN_REG_r0  0x00
+#define YARN_REG_r1  0x08
+#define YARN_REG_r16 0x10
+#define YARN_REG_r17 0x18
+#define YARN_REG_r18 0x20
+#define YARN_REG_r19 0x28
+#define YARN_REG_r20 0x30
+#define YARN_REG_r21 0x38
+#define YARN_REG_r22 0x40
+#define YARN_REG_r23 0x48
+#define YARN_REG_r24 0x50
+#define YARN_REG_r25 0x58
+#define YARN_REG_r26 0x60
+#define YARN_REG_r27 0x68
+#define YARN_REG_r28 0x70
+#define YARN_REG_v8  0x78
+#define YARN_REG_v9  0x80
+#define YARN_REG_v10 0x88
+#define YARN_REG_v11 0x90
+#define YARN_REG_v12 0x98
+#define YARN_REG_v13 0xa0
+#define YARN_REG_v14 0xa8
+#define YARN_REG_v15 0xb0
+#define YARN_REG_SP  0xb8
+#define YARN_REG_LR  0xc0
+
+#if defined(__APPLE__)
+#define YARN_ASM_SYMBOL(x) _##x
+#else
+#define YARN_ASM_SYMBOL(x) x
+#endif
+
+#ifndef YARN_BUILD_ASM
+
+#include <stdint.h>
+
+// Procedure Call Standard for the ARM 64-bit Architecture
+// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
+struct yarn_fiber_context
+{
+    // parameter registers
+    uintptr_t r0;
+    uintptr_t r1;
+
+    // special purpose registers
+    uintptr_t r16;
+    uintptr_t r17;
+    uintptr_t r18; // platform specific (maybe inter-procedural state)
+
+    // callee-saved registers
+    uintptr_t r19;
+    uintptr_t r20;
+    uintptr_t r21;
+    uintptr_t r22;
+    uintptr_t r23;
+    uintptr_t r24;
+    uintptr_t r25;
+    uintptr_t r26;
+    uintptr_t r27;
+    uintptr_t r28;
+
+    uintptr_t v8;
+    uintptr_t v9;
+    uintptr_t v10;
+    uintptr_t v11;
+    uintptr_t v12;
+    uintptr_t v13;
+    uintptr_t v14;
+    uintptr_t v15;
+
+    uintptr_t SP; // stack pointer
+    uintptr_t LR; // link register (R30)
+};
+
+#ifdef __cplusplus
+#include <cstddef>
+static_assert(offsetof(yarn_fiber_context, r0)  == YARN_REG_r0,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r1)  == YARN_REG_r1,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r16) == YARN_REG_r16, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r17) == YARN_REG_r17, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r18) == YARN_REG_r18, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r19) == YARN_REG_r19, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r20) == YARN_REG_r20, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r21) == YARN_REG_r21, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r22) == YARN_REG_r22, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r23) == YARN_REG_r23, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r24) == YARN_REG_r24, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r25) == YARN_REG_r25, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r26) == YARN_REG_r26, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r27) == YARN_REG_r27, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r28) == YARN_REG_r28, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v8)  == YARN_REG_v8,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v9)  == YARN_REG_v9,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v10) == YARN_REG_v10, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v11) == YARN_REG_v11, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v12) == YARN_REG_v12, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v13) == YARN_REG_v13, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v14) == YARN_REG_v14, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v15) == YARN_REG_v15, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, SP)  == YARN_REG_SP,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, LR)  == YARN_REG_LR,  "Bad register offset");
+#endif // __cplusplus
+
+#endif // YARN_BUILD_ASM
diff --git a/src/Yarn/OSFiber_asm_arm.S b/src/Yarn/OSFiber_asm_arm.S
new file mode 100644
index 0000000..a810b33
--- /dev/null
+++ b/src/Yarn/OSFiber_asm_arm.S
@@ -0,0 +1,74 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__arm__)
+
+#define YARN_BUILD_ASM 1
+#include "OSFiber_asm_arm.h"
+
+// void yarn_fiber_swap(yarn_fiber_context* from, const yarn_fiber_context* to)
+// x0: from
+// x1: to
+.text
+.global yarn_fiber_swap
+.align 4
+yarn_fiber_swap:
+
+    // Save context 'from'
+    // TODO: multiple registers can be stored in a single instruction with: stm rA, {rB-rC}
+
+    // Store special purpose registers
+    str r12, [r0, #YARN_REG_r12]
+
+    // Store callee-preserved registers
+    str r4, [r0, #YARN_REG_r4]
+    str r5, [r0, #YARN_REG_r5]
+    str r6, [r0, #YARN_REG_r6]
+    str r7, [r0, #YARN_REG_r7]
+    str r8, [r0, #YARN_REG_r8]
+    str r9, [r0, #YARN_REG_r9]
+    str r10, [r0, #YARN_REG_r10]
+    str r11, [r0, #YARN_REG_r11]
+
+    // Store sp, lr and pc
+    str sp, [r0, #YARN_REG_SP]
+    str lr, [r0, #YARN_REG_LR]
+
+    // Load context 'to'
+    // TODO: multiple registers can be loaded in a single instruction with: ldm rA, {rB-rC}
+    mov r3, r1
+
+    // Load special purpose registers
+    ldr r12, [r3, #YARN_REG_r12]
+
+    // Load callee-preserved registers
+    ldr r4, [r3, #YARN_REG_r4]
+    ldr r5, [r3, #YARN_REG_r5]
+    ldr r6, [r3, #YARN_REG_r6]
+    ldr r7, [r3, #YARN_REG_r7]
+    ldr r8, [r3, #YARN_REG_r8]
+    ldr r9, [r3, #YARN_REG_r9]
+    ldr r10, [r3, #YARN_REG_r10]
+    ldr r11, [r3, #YARN_REG_r11]
+
+    // Load parameter registers
+    ldr r0, [r3, #YARN_REG_r0]
+    ldr r1, [r3, #YARN_REG_r1]
+
+    // Load sp, lr and pc
+    ldr sp, [r3, #YARN_REG_SP]
+    ldr lr, [r3, #YARN_REG_LR]
+    mov pc, lr
+
+#endif // defined(__arm__)
diff --git a/src/Yarn/OSFiber_asm_arm.h b/src/Yarn/OSFiber_asm_arm.h
new file mode 100644
index 0000000..641d614
--- /dev/null
+++ b/src/Yarn/OSFiber_asm_arm.h
@@ -0,0 +1,99 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define YARN_REG_r0  0x00
+#define YARN_REG_r1  0x04
+#define YARN_REG_r12 0x08
+#define YARN_REG_r4  0x0c
+#define YARN_REG_r5  0x10
+#define YARN_REG_r6  0x14
+#define YARN_REG_r7  0x18
+#define YARN_REG_r8  0x1c
+#define YARN_REG_r9  0x20
+#define YARN_REG_r10 0x24
+#define YARN_REG_r11 0x28
+#define YARN_REG_v8  0x2c
+#define YARN_REG_v9  0x30
+#define YARN_REG_v10 0x34
+#define YARN_REG_v11 0x38
+#define YARN_REG_v12 0x3c
+#define YARN_REG_v13 0x40
+#define YARN_REG_v14 0x44
+#define YARN_REG_v15 0x48
+#define YARN_REG_SP  0x4c
+#define YARN_REG_LR  0x50
+
+#ifndef YARN_BUILD_ASM
+#include <stdint.h>
+
+// Procedure Call Standard for the ARM 64-bit Architecture
+// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
+struct yarn_fiber_context
+{
+    // parameter registers
+    uintptr_t r0;
+    uintptr_t r1;
+
+    // special purpose registers
+    uintptr_t r12; // Intra-Procedure-call
+
+    // callee-saved registers
+    uintptr_t r4;
+    uintptr_t r5;
+    uintptr_t r6;
+    uintptr_t r7;
+    uintptr_t r8;
+    uintptr_t r9;
+    uintptr_t r10;
+    uintptr_t r11;
+
+    uintptr_t v8;
+    uintptr_t v9;
+    uintptr_t v10;
+    uintptr_t v11;
+    uintptr_t v12;
+    uintptr_t v13;
+    uintptr_t v14;
+    uintptr_t v15;
+
+    uintptr_t SP; // stack pointer (r13)
+    uintptr_t LR; // link register (r14)
+};
+
+#ifdef __cplusplus
+#include <cstddef>
+static_assert(offsetof(yarn_fiber_context, r0)  == YARN_REG_r0,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r1)  == YARN_REG_r1,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r12) == YARN_REG_r12, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r4)  == YARN_REG_r4,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r5)  == YARN_REG_r5,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r6)  == YARN_REG_r6,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r7)  == YARN_REG_r7,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r8)  == YARN_REG_r8,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r9)  == YARN_REG_r9,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r10) == YARN_REG_r10, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, r11) == YARN_REG_r11, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v8)  == YARN_REG_v8,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v9)  == YARN_REG_v9,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v10) == YARN_REG_v10, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v11) == YARN_REG_v11, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v12) == YARN_REG_v12, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v13) == YARN_REG_v13, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v14) == YARN_REG_v14, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, v15) == YARN_REG_v15, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, SP)  == YARN_REG_SP,  "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, LR)  == YARN_REG_LR,  "Bad register offset");
+#endif // __cplusplus
+
+#endif // YARN_BUILD_ASM
diff --git a/src/Yarn/OSFiber_asm_x64.S b/src/Yarn/OSFiber_asm_x64.S
new file mode 100644
index 0000000..3232b71
--- /dev/null
+++ b/src/Yarn/OSFiber_asm_x64.S
@@ -0,0 +1,65 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__x86_64__)
+
+#define YARN_BUILD_ASM 1
+#include "OSFiber_asm_x64.h"
+
+// void yarn_fiber_swap(yarn_fiber_context* from, const yarn_fiber_context* to)
+// rdi: from
+// rsi: to
+.text
+.global YARN_ASM_SYMBOL(yarn_fiber_swap)
+.align 4
+YARN_ASM_SYMBOL(yarn_fiber_swap):
+
+    // Save context 'from'
+
+    // Store callee-preserved registers
+    movq        %rbx, YARN_REG_RBX(%rdi)
+    movq        %rbp, YARN_REG_RBP(%rdi)
+    movq        %r12, YARN_REG_R12(%rdi)
+    movq        %r13, YARN_REG_R13(%rdi)
+    movq        %r14, YARN_REG_R14(%rdi)
+    movq        %r15, YARN_REG_R15(%rdi)
+
+    movq        (%rsp), %rcx             /* call stores the return address on the stack before jumping */
+    movq        %rcx, YARN_REG_RIP(%rdi)
+    leaq        8(%rsp), %rcx            /* skip the pushed return address */
+    movq        %rcx, YARN_REG_RSP(%rdi)
+
+    // Load context 'to'
+    movq        %rsi, %r8
+
+    // Load callee-preserved registers
+    movq        YARN_REG_RBX(%r8), %rbx
+    movq        YARN_REG_RBP(%r8), %rbp
+    movq        YARN_REG_R12(%r8), %r12
+    movq        YARN_REG_R13(%r8), %r13
+    movq        YARN_REG_R14(%r8), %r14
+    movq        YARN_REG_R15(%r8), %r15
+
+    // Load first two call parameters
+    movq        YARN_REG_RDI(%r8), %rdi
+    movq        YARN_REG_RSI(%r8), %rsi
+
+    // Load stack pointer
+    movq        YARN_REG_RSP(%r8), %rsp
+
+    // Load instruction pointer, and jump
+    movq        YARN_REG_RIP(%r8), %rcx
+    jmp         *%rcx
+
+#endif // defined(__x86_64__)
diff --git a/src/Yarn/OSFiber_asm_x64.h b/src/Yarn/OSFiber_asm_x64.h
new file mode 100644
index 0000000..506351c
--- /dev/null
+++ b/src/Yarn/OSFiber_asm_x64.h
@@ -0,0 +1,69 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define YARN_REG_RBX 0x00
+#define YARN_REG_RBP 0x08
+#define YARN_REG_R12 0x10
+#define YARN_REG_R13 0x18
+#define YARN_REG_R14 0x20
+#define YARN_REG_R15 0x28
+#define YARN_REG_RDI 0x30
+#define YARN_REG_RSI 0x38
+#define YARN_REG_RSP 0x40
+#define YARN_REG_RIP 0x48
+
+#if defined(__APPLE__)
+#define YARN_ASM_SYMBOL(x) _##x
+#else
+#define YARN_ASM_SYMBOL(x) x
+#endif
+
+#ifndef YARN_BUILD_ASM
+
+#include <stdint.h>
+
+struct yarn_fiber_context
+{
+    // callee-saved registers
+    uintptr_t RBX;
+    uintptr_t RBP;
+    uintptr_t R12;
+    uintptr_t R13;
+    uintptr_t R14;
+    uintptr_t R15;
+
+    // parameter registers
+    uintptr_t RDI;
+    uintptr_t RSI;
+
+    // stack and instruction registers
+    uintptr_t RSP;
+    uintptr_t RIP;
+};
+
+#ifdef __cplusplus
+#include <cstddef>
+static_assert(offsetof(yarn_fiber_context, RBX) == YARN_REG_RBX, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, RBP) == YARN_REG_RBP, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, R12) == YARN_REG_R12, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, R13) == YARN_REG_R13, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, R14) == YARN_REG_R14, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, R15) == YARN_REG_R15, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, RDI) == YARN_REG_RDI, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, RSI) == YARN_REG_RSI, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, RSP) == YARN_REG_RSP, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, RIP) == YARN_REG_RIP, "Bad register offset");
+#endif // __cplusplus
+
+#endif // YARN_BUILD_ASM
diff --git a/src/Yarn/OSFiber_asm_x86.S b/src/Yarn/OSFiber_asm_x86.S
new file mode 100644
index 0000000..e1bb1f3
--- /dev/null
+++ b/src/Yarn/OSFiber_asm_x86.S
@@ -0,0 +1,57 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__i386__)
+
+#define YARN_BUILD_ASM 1
+#include "OSFiber_asm_x86.h"
+
+// void yarn_fiber_swap(yarn_fiber_context* from, const yarn_fiber_context* to)
+// esp+4: from
+// esp+8: to
+.text
+.global yarn_fiber_swap
+.align 4
+yarn_fiber_swap:
+    // Save context 'from'
+    movl        4(%esp), %eax
+
+    // Store callee-preserved registers
+    movl        %ebx, YARN_REG_EBX(%eax)
+    movl        %ebp, YARN_REG_EBP(%eax)
+    movl        %esi, YARN_REG_ESI(%eax)
+    movl        %edi, YARN_REG_EDI(%eax)
+
+    movl        (%esp), %ecx             /* call stores the return address on the stack before jumping */
+    movl        %ecx, YARN_REG_EIP(%eax)
+    lea         4(%esp), %ecx            /* skip the pushed return address */
+    movl        %ecx, YARN_REG_ESP(%eax)
+
+    // Load context 'to'
+    movl        8(%esp), %ecx
+
+    // Load callee-preserved registers
+    movl        YARN_REG_EBX(%ecx), %ebx
+    movl        YARN_REG_EBP(%ecx), %ebp
+    movl        YARN_REG_ESI(%ecx), %esi
+    movl        YARN_REG_EDI(%ecx), %edi
+
+    // Load stack pointer
+    movl        YARN_REG_ESP(%ecx), %esp
+
+    // Load instruction pointer, and jump
+    movl        YARN_REG_EIP(%ecx), %ecx
+    jmp         *%ecx
+
+#endif // defined(__i386__)
diff --git a/src/Yarn/OSFiber_asm_x86.h b/src/Yarn/OSFiber_asm_x86.h
new file mode 100644
index 0000000..42f355f
--- /dev/null
+++ b/src/Yarn/OSFiber_asm_x86.h
@@ -0,0 +1,50 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define YARN_REG_EBX 0x00
+#define YARN_REG_EBP 0x04
+#define YARN_REG_ESI 0x08
+#define YARN_REG_EDI 0x0c
+#define YARN_REG_ESP 0x10
+#define YARN_REG_EIP 0x14
+
+#ifndef YARN_BUILD_ASM
+#include <stdint.h>
+
+// Assumes cdecl calling convention.
+// Registers EAX, ECX, and EDX are caller-saved, and the rest are callee-saved.
+struct yarn_fiber_context
+{
+    // callee-saved registers
+    uintptr_t EBX;
+    uintptr_t EBP;
+    uintptr_t ESI;
+    uintptr_t EDI;
+
+    // stack and instruction registers
+    uintptr_t ESP;
+    uintptr_t EIP;
+};
+
+#ifdef __cplusplus
+#include <cstddef>
+static_assert(offsetof(yarn_fiber_context, EBX) == YARN_REG_EBX, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, EBP) == YARN_REG_EBP, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, ESI) == YARN_REG_ESI, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, EDI) == YARN_REG_EDI, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, ESP) == YARN_REG_ESP, "Bad register offset");
+static_assert(offsetof(yarn_fiber_context, EIP) == YARN_REG_EIP, "Bad register offset");
+#endif // __cplusplus
+
+#endif // YARN_BUILD_ASM
diff --git a/src/Yarn/OSFiber_test.cpp b/src/Yarn/OSFiber_test.cpp
new file mode 100644
index 0000000..a4afb6b
--- /dev/null
+++ b/src/Yarn/OSFiber_test.cpp
@@ -0,0 +1,44 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "OSFiber.hpp"
+
+#include "Yarn_test.hpp"
+
+TEST(WithoutBoundScheduler, OSFiber)
+{
+    std::string str;
+    auto constexpr fiberStackSize = 8 * 1024;
+    auto main = std::unique_ptr<yarn::OSFiber>(yarn::OSFiber::createFiberFromCurrentThread());
+    std::unique_ptr<yarn::OSFiber> fiberA, fiberB, fiberC;
+    fiberC = std::unique_ptr<yarn::OSFiber>(yarn::OSFiber::createFiber(fiberStackSize, [&]
+    {
+        str += "C";
+        fiberC->switchTo(fiberB.get());
+    }));
+    fiberB = std::unique_ptr<yarn::OSFiber>(yarn::OSFiber::createFiber(fiberStackSize, [&]
+    {
+        str += "B";
+        fiberB->switchTo(fiberA.get());
+    }));
+    fiberA = std::unique_ptr<yarn::OSFiber>(yarn::OSFiber::createFiber(fiberStackSize, [&]
+    {
+        str += "A";
+        fiberA->switchTo(main.get());
+    }));
+
+    main->switchTo(fiberC.get());
+
+    ASSERT_EQ(str, "CBA");
+}
diff --git a/src/Yarn/OSFiber_ucontext.hpp b/src/Yarn/OSFiber_ucontext.hpp
new file mode 100644
index 0000000..6ab1698
--- /dev/null
+++ b/src/Yarn/OSFiber_ucontext.hpp
@@ -0,0 +1,114 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if !defined(_XOPEN_SOURCE)
+// This must come before other #includes, otherwise we'll end up with ucontext_t
+// definition mismatches, leading to memory corruption hilarity.
+#define _XOPEN_SOURCE
+#endif //  !defined(_XOPEN_SOURCE)
+
+#include "Debug.hpp"
+
+#include <functional>
+#include <memory>
+
+#include <ucontext.h>
+
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#endif // defined(__clang__)
+
+namespace yarn {
+
+class OSFiber
+{
+public:
+    // createFiberFromCurrentThread() returns a fiber created from the current
+    // thread.
+    static inline OSFiber* createFiberFromCurrentThread();
+
+    // createFiber() returns a new fiber with the given stack size that will
+    // call func when switched to. func() must end by switching back to another
+    // fiber, and must not return.
+    static inline OSFiber* createFiber(size_t stackSize, const std::function<void()>& func);
+
+    // switchTo() immediately switches execution to the given fiber.
+    // switchTo() must be called on the currently executing fiber.
+    inline void switchTo(OSFiber*);
+
+private:
+	std::unique_ptr<uint8_t[]> stack;
+	ucontext_t context;
+	std::function<void()> target;
+};
+
+OSFiber* OSFiber::createFiberFromCurrentThread()
+{
+	auto out = new OSFiber();
+	out->context = {};
+	getcontext(&out->context);
+	return out;
+}
+
+OSFiber* OSFiber::createFiber(size_t stackSize, const std::function<void()>& func)
+{
+	union Args
+	{
+		OSFiber* self;
+		struct { int a; int b; };
+	};
+
+	struct Target
+	{
+		static void Main(int a, int b)
+		{
+			Args u;
+			u.a = a; u.b = b;
+			std::function<void()> func;
+			std::swap(func, u.self->target);
+			func();
+		}
+	};
+
+	auto out = new OSFiber();
+	out->context = {};
+	out->stack = std::unique_ptr<uint8_t[]>(new uint8_t[stackSize]);
+	out->target = func;
+
+	auto alignmentOffset = 15 - (reinterpret_cast<uintptr_t>(out->stack.get() + 15) & 15);
+	auto res = getcontext(&out->context);
+	YARN_ASSERT(res == 0, "getcontext() returned %d", int(res));
+	out->context.uc_stack.ss_sp = out->stack.get() + alignmentOffset;
+	out->context.uc_stack.ss_size = stackSize - alignmentOffset;
+	out->context.uc_link = nullptr;
+
+	Args args;
+	args.self = out;
+	makecontext(&out->context, reinterpret_cast<void(*)()>(&Target::Main), 2, args.a, args.b);
+
+	return out;
+}
+
+void OSFiber::switchTo(OSFiber* fiber)
+{
+	auto res = swapcontext(&context, &fiber->context);
+	YARN_ASSERT(res == 0, "swapcontext() returned %d", int(res));
+}
+
+}  // namespace yarn
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif // defined(__clang__)
diff --git a/src/Yarn/OSFiber_windows.hpp b/src/Yarn/OSFiber_windows.hpp
new file mode 100644
index 0000000..af07649
--- /dev/null
+++ b/src/Yarn/OSFiber_windows.hpp
@@ -0,0 +1,90 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <functional>
+#include <memory>
+
+#include <Windows.h>
+
+namespace yarn {
+
+class OSFiber
+{
+public:
+	inline ~OSFiber();
+
+    // createFiberFromCurrentThread() returns a fiber created from the current
+    // thread.
+    static inline OSFiber* createFiberFromCurrentThread();
+
+    // createFiber() returns a new fiber with the given stack size that will
+    // call func when switched to. func() must end by switching back to another
+    // fiber, and must not return.
+    static inline OSFiber* createFiber(size_t stackSize, const std::function<void()>& func);
+
+    // switchTo() immediately switches execution to the given fiber.
+    // switchTo() must be called on the currently executing fiber.
+    inline void switchTo(OSFiber*);
+
+private:
+	static inline void WINAPI run(void* self);
+	LPVOID fiber = nullptr;
+	bool isFiberFromThread = false;
+	std::function<void()> target;
+};
+
+OSFiber::~OSFiber()
+{
+	if (fiber != nullptr)
+	{
+		if (isFiberFromThread)
+		{
+			ConvertFiberToThread();
+		}
+		else
+		{
+			DeleteFiber(fiber);
+		}
+	}
+}
+
+OSFiber* OSFiber::createFiberFromCurrentThread()
+{
+	auto out = new OSFiber();
+	out->fiber = ConvertThreadToFiber(nullptr);
+	out->isFiberFromThread = true;
+	return out;
+}
+
+OSFiber* OSFiber::createFiber(size_t stackSize, const std::function<void()>& func)
+{
+	auto out = new OSFiber();
+	out->fiber = CreateFiber(stackSize, &OSFiber::run, out);
+	out->target = func;
+	return out;
+}
+
+void OSFiber::switchTo(OSFiber* fiber)
+{
+	SwitchToFiber(fiber->fiber);
+}
+
+void WINAPI OSFiber::run(void* self)
+{
+	std::function<void()> func;
+	std::swap(func, reinterpret_cast<OSFiber*>(self)->target);
+	func();
+}
+
+}  // namespace yarn
diff --git a/src/Yarn/OSFiber_x64.c b/src/Yarn/OSFiber_x64.c
new file mode 100644
index 0000000..0675053
--- /dev/null
+++ b/src/Yarn/OSFiber_x64.c
@@ -0,0 +1,34 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__x86_64__)
+
+#include "OSFiber_asm_x64.h"
+
+void yarn_fiber_trampoline(void(*target)(void*), void* arg)
+{
+    target(arg);
+}
+
+void yarn_fiber_set_target(struct yarn_fiber_context* ctx, void* stack, uint32_t stack_size, void(*target)(void*), void* arg)
+{
+    uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size);
+    ctx->RIP = (uintptr_t)&yarn_fiber_trampoline;
+    ctx->RDI = (uintptr_t)target;
+    ctx->RSI = (uintptr_t)arg;
+    ctx->RSP = (uintptr_t)&stack_top[-3];
+    stack_top[-2] = 0; // No return target.
+}
+
+#endif // defined(__x86_64__)
diff --git a/src/Yarn/OSFiber_x86.c b/src/Yarn/OSFiber_x86.c
new file mode 100644
index 0000000..2bfe082
--- /dev/null
+++ b/src/Yarn/OSFiber_x86.c
@@ -0,0 +1,34 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__i386__)
+
+#include "OSFiber_asm_x86.h"
+
+void yarn_fiber_trampoline(void(*target)(void*), void* arg)
+{
+    target(arg);
+}
+
+void yarn_fiber_set_target(struct yarn_fiber_context* ctx, void* stack, uint32_t stack_size, void(*target)(void*), void* arg)
+{
+    uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size);
+    ctx->EIP = (uintptr_t)&yarn_fiber_trampoline;
+    ctx->ESP = (uintptr_t)&stack_top[-3];
+    stack_top[-1] = (uintptr_t)arg;
+    stack_top[-2] = (uintptr_t)target;
+    stack_top[-3] = 0; // No return target.
+}
+
+#endif // defined(__i386__)
diff --git a/src/Yarn/Pool.hpp b/src/Yarn/Pool.hpp
new file mode 100644
index 0000000..79d05b5
--- /dev/null
+++ b/src/Yarn/Pool.hpp
@@ -0,0 +1,453 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef yarn_pool_hpp
+#define yarn_pool_hpp
+
+#include "ConditionVariable.hpp"
+
+#include <atomic>
+#include <mutex>
+
+namespace yarn {
+
+// PoolPolicy controls whether pool items are constructed and destructed each
+// time they are borrowed from and returned to a pool, or whether they persist
+// constructed for the lifetime of the pool.
+enum class PoolPolicy
+{
+    // Call the Pool items constructor on borrow(), and destruct the item
+    // when the item is returned.
+    Reconstruct,
+
+    // Construct and destruct all items once for the lifetime of the Pool.
+    // Items will keep their state between loans.
+    Preserve,
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Pool<T>
+////////////////////////////////////////////////////////////////////////////////
+
+// Pool is the abstract base class for BoundedPool<> and UnboundedPool<>.
+template <typename T>
+class Pool
+{
+protected:
+    struct Item;
+    class Storage;
+
+public:
+    // A Loan is returned by the pool's borrow() function.
+    // Loans track the number of references to the loaned item, and return the
+    // item to the pool when the final Loan reference is dropped.
+    class Loan
+    {
+    public:
+        inline Loan() = default;
+        inline Loan(Item*, const std::shared_ptr<Storage>&);
+        inline Loan(const Loan&);
+        inline Loan(Loan&&);
+        inline ~Loan();
+        inline Loan& operator = (const Loan&);
+        inline Loan& operator = (Loan&&);
+        inline T& operator * ();
+        inline T* operator -> () const;
+        inline T* get() const;
+        void reset();
+
+    private:
+        Item *item = nullptr;
+        std::shared_ptr<Storage> storage;
+    };
+
+protected:
+    Pool() = default;
+
+    // The shared storage between the pool and all loans.
+    class Storage
+    {
+    public:
+        virtual ~Storage() = default;
+        virtual void return_(Item*) = 0;
+    };
+
+    // The backing data of a single item in the pool.
+    struct Item
+    {
+        // get() returns a pointer to the item's data.
+        inline T* get();
+
+        // construct() calls the constructor on the item's data.
+        inline void construct();
+
+        // destruct() calls the destructor on the item's data.
+        inline void destruct();
+
+        using Data = typename std::aligned_storage<sizeof(T), alignof(T)>::type;
+        Data data;
+        std::atomic<int> refcount = {0};
+        Item *next = nullptr; // pointer to the next free item in the pool.
+    };
+};
+
+// Loan<T> is an alias to Pool<T>::Loan.
+template <typename T>
+using Loan = typename Pool<T>::Loan;
+
+////////////////////////////////////////////////////////////////////////////////
+// Pool<T>::Item
+////////////////////////////////////////////////////////////////////////////////
+template <typename T>
+T* Pool<T>::Item::get()
+{
+    return reinterpret_cast<T*>(&data);
+}
+
+template <typename T>
+void Pool<T>::Item::construct()
+{
+    new (&data) T;
+}
+
+template <typename T>
+void Pool<T>::Item::destruct()
+{
+    get()->~T();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Pool<T>::Loan
+////////////////////////////////////////////////////////////////////////////////
+template <typename T>
+Pool<T>::Loan::Loan(Item* item, const std::shared_ptr<Storage>& storage) : item(item), storage(storage)
+{
+    item->refcount++;
+}
+
+template <typename T>
+Pool<T>::Loan::Loan(const Loan& other) : item(other.item), storage(other.storage)
+{
+    if (item != nullptr)
+    {
+        item->refcount++;
+    }
+}
+
+template <typename T>
+Pool<T>::Loan::Loan(Loan&& other) : item(other.item), storage(other.storage)
+{
+    other.item = nullptr;
+    other.storage = nullptr;
+}
+
+template <typename T>
+Pool<T>::Loan::~Loan()
+{
+    reset();
+}
+
+template <typename T>
+void Pool<T>::Loan::reset()
+{
+    if (item != nullptr)
+    {
+        auto refs = --item->refcount;
+        YARN_ASSERT(refs >= 0, "reset() called on zero-ref pool item");
+        if (refs == 0)
+        {
+            storage->return_(item);
+        }
+        item = nullptr;
+        storage = nullptr;
+    }
+}
+
+template <typename T>
+typename Pool<T>::Loan& Pool<T>::Loan::operator = (const Pool<T>::Loan& rhs)
+{
+    reset();
+    if (rhs.item != nullptr)
+    {
+        item = rhs.item;
+        storage = rhs.storage;
+        rhs.item->refcount++;
+    }
+    return *this;
+}
+
+template <typename T>
+typename Pool<T>::Loan& Pool<T>::Loan::operator = (Pool<T>::Loan&& rhs)
+{
+    reset();
+    std::swap(item, rhs.item);
+    std::swap(storage, rhs.storage);
+    return *this;
+}
+
+template <typename T>
+T& Pool<T>::Loan::operator * () { return *item->get(); }
+
+template <typename T>
+T* Pool<T>::Loan::operator -> () const { return item->get(); }
+
+template <typename T>
+T* Pool<T>::Loan::get() const { return item->get(); }
+
+////////////////////////////////////////////////////////////////////////////////
+// BoundedPool<T, N, POLICY>
+////////////////////////////////////////////////////////////////////////////////
+
+// BoundedPool<T, N, POLICY> is a pool of items of type T, with a maximum
+// capacity of N items.
+// BoundedPool<> is initially populated with N default-constructed items.
+// POLICY controls whether pool items are constructed and destructed each
+// time they are borrowed from and returned to the pool.
+template <typename T, int N, PoolPolicy POLICY = PoolPolicy::Reconstruct>
+class BoundedPool : public Pool<T>
+{
+public:
+    using Item = typename Pool<T>::Item;
+    using Loan = typename Pool<T>::Loan;
+
+    // borrow() borrows a single item from the pool, blocking until an item is
+    // returned if the pool is empty.
+    inline Loan borrow() const;
+
+    // borrow() borrows count items from the pool, blocking until there are at
+    // least count items in the pool. The function f() is called with each
+    // borrowed item.
+    // F must be a function with the signature: void(T&&)
+    template <typename F>
+    inline void borrow(size_t count, const F& f) const;
+
+    // tryBorrow() attempts to borrow a single item from the pool without
+    // blocking.
+    // The boolean of the returned pair is true on success, or false if the pool
+    // is empty.
+    inline std::pair<Loan, bool> tryBorrow() const;
+
+private:
+    class Storage : public Pool<T>::Storage
+    {
+    public:
+        inline Storage();
+        inline ~Storage();
+        inline void return_(Item*) override;
+
+        std::mutex mutex;
+        ConditionVariable returned;
+        Item items[N];
+        Item *free = nullptr;
+    };
+    std::shared_ptr<Storage> storage = std::make_shared<Storage>();
+};
+
+template <typename T, int N, PoolPolicy POLICY>
+BoundedPool<T, N, POLICY>::Storage::Storage()
+{
+    for (int i = 0; i < N; i++)
+    {
+        if (POLICY == PoolPolicy::Preserve)
+        {
+            items[i].construct();
+        }
+        items[i].next = this->free;
+        this->free = &items[i];
+    }
+}
+
+template <typename T, int N, PoolPolicy POLICY>
+BoundedPool<T, N, POLICY>::Storage::~Storage()
+{
+    if (POLICY == PoolPolicy::Preserve)
+    {
+        for (int i = 0; i < N; i++)
+        {
+            items[i].destruct();
+        }
+    }
+}
+
+template <typename T, int N, PoolPolicy POLICY>
+typename BoundedPool<T, N, POLICY>::Loan BoundedPool<T, N, POLICY>::borrow() const
+{
+    Loan out;
+    borrow(1, [&](Loan&& loan) { out = std::move(loan); });
+    return out;
+}
+
+template <typename T, int N, PoolPolicy POLICY>
+template <typename F>
+void BoundedPool<T, N, POLICY>::borrow(size_t n, const F& f) const
+{
+    std::unique_lock<std::mutex> lock(storage->mutex);
+    for (size_t i = 0; i < n; i++)
+    {
+        storage->returned.wait(lock, [&] { return storage->free != nullptr; });
+        auto item = storage->free;
+        storage->free = storage->free->next;
+        if (POLICY == PoolPolicy::Reconstruct)
+        {
+            item->construct();
+        }
+        f(std::move(Loan(item, storage)));
+    }
+}
+
+template <typename T, int N, PoolPolicy POLICY>
+std::pair<typename BoundedPool<T, N, POLICY>::Loan, bool> BoundedPool<T, N, POLICY>::tryBorrow() const
+{
+    std::unique_lock<std::mutex> lock(storage->mutex);
+    if (storage->free == nullptr)
+    {
+        return std::make_pair(Loan(), false);
+    }
+    auto item = storage->free;
+    storage->free = storage->free->next;
+    item->pool = this;
+    lock.unlock();
+    if (POLICY == PoolPolicy::Reconstruct)
+    {
+        item->construct();
+    }
+    return std::make_pair(Loan(item, storage), true);
+}
+
+template <typename T, int N, PoolPolicy POLICY>
+void BoundedPool<T, N, POLICY>::Storage::return_(Item* item)
+{
+    if (POLICY == PoolPolicy::Reconstruct)
+    {
+        item->destruct();
+    }
+    std::unique_lock<std::mutex> lock(mutex);
+    item->next = free;
+    free = item;
+    lock.unlock();
+    returned.notify_one();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// UnboundedPool
+////////////////////////////////////////////////////////////////////////////////
+
+// UnboundedPool<T, POLICY> is a pool of items of type T.
+// UnboundedPool<> will automatically allocate more items if the pool becomes
+// empty.
+// POLICY controls whether pool items are constructed and destructed each
+// time they are borrowed from and returned to the pool.
+template <typename T, PoolPolicy POLICY = PoolPolicy::Reconstruct>
+class UnboundedPool : public Pool<T>
+{
+public:
+    using Item = typename Pool<T>::Item;
+    using Loan = typename Pool<T>::Loan;
+
+    // borrow() borrows a single item from the pool, automatically allocating
+    // more items if the pool is empty.
+    // This function does not block.
+    inline Loan borrow() const;
+
+    // borrow() borrows count items from the pool, calling the function f() with
+    // each borrowed item.
+    // F must be a function with the signature: void(T&&)
+    // This function does not block.
+    template <typename F>
+    inline void borrow(size_t n, const F& f) const;
+
+private:
+    class Storage : public Pool<T>::Storage
+    {
+    public:
+        inline ~Storage();
+        inline void return_(Item*) override;
+
+        std::mutex mutex;
+        std::vector<Item*> items;
+        Item *free = nullptr;
+    };
+    std::shared_ptr<Storage> storage = std::make_shared<Storage>();
+};
+
+template <typename T, PoolPolicy POLICY>
+UnboundedPool<T, POLICY>::Storage::~Storage()
+{
+    for (auto item : items)
+    {
+        if (POLICY == PoolPolicy::Preserve)
+        {
+            item->destruct();
+        }
+        delete item;
+    }
+}
+
+template <typename T, PoolPolicy POLICY>
+Loan<T> UnboundedPool<T, POLICY>::borrow() const
+{
+    Loan out;
+    borrow(1, [&] (Loan&& loan) { out = std::move(loan); });
+    return out;
+}
+
+template <typename T, PoolPolicy POLICY>
+template <typename F>
+inline void UnboundedPool<T, POLICY>::borrow(size_t n, const F& f) const
+{
+    std::unique_lock<std::mutex> lock(storage->mutex);
+    for (size_t i = 0; i < n; i++)
+    {
+        if (storage->free == nullptr)
+        {
+            auto count = std::max<size_t>(storage->items.size(), 32);
+            for (size_t i = 0; i < count; i++)
+            {
+                auto item = new Item();
+                if (POLICY == PoolPolicy::Preserve)
+                {
+                    item->construct();
+                }
+                storage->items.push_back(item);
+                item->next = storage->free;
+                storage->free = item;
+            }
+        }
+
+        auto item = storage->free;
+        storage->free = storage->free->next;
+        if (POLICY == PoolPolicy::Reconstruct)
+        {
+            item->construct();
+        }
+        f(std::move(Loan(item, storage)));
+    }
+}
+
+template <typename T, PoolPolicy POLICY>
+void UnboundedPool<T, POLICY>::Storage::return_(Item* item)
+{
+    if (POLICY == PoolPolicy::Reconstruct)
+    {
+        item->destruct();
+    }
+    std::unique_lock<std::mutex> lock(mutex);
+    item->next = free;
+    free = item;
+    lock.unlock();
+}
+
+} // namespace yarn
+
+#endif  // yarn_pool_hpp
diff --git a/src/Yarn/Pool_test.cpp b/src/Yarn/Pool_test.cpp
new file mode 100644
index 0000000..b6face5
--- /dev/null
+++ b/src/Yarn/Pool_test.cpp
@@ -0,0 +1,178 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Yarn_test.hpp"
+
+#include "Yarn/Pool.hpp"
+#include "Yarn/WaitGroup.hpp"
+
+TEST_P(WithBoundScheduler, UnboundedPool_ConstructDestruct)
+{
+    yarn::UnboundedPool<int> pool;
+}
+
+TEST_P(WithBoundScheduler, BoundedPool_ConstructDestruct)
+{
+    yarn::BoundedPool<int, 10> pool;
+}
+
+TEST_P(WithBoundScheduler, UnboundedPool_Borrow)
+{
+    yarn::UnboundedPool<int> pool;
+    for (int i = 0; i < 100; i++)
+    {
+        pool.borrow();
+    }
+}
+
+TEST_P(WithBoundScheduler, UnboundedPool_ConcurrentBorrow)
+{
+    yarn::UnboundedPool<int> pool;
+    constexpr int iterations = 10000;
+    yarn::WaitGroup wg(iterations);
+    for (int i = 0; i < iterations; i++) {
+        yarn::schedule([=] { pool.borrow(); wg.done(); });
+    }
+    wg.wait();
+}
+
+TEST_P(WithBoundScheduler, BoundedPool_Borrow)
+{
+    yarn::BoundedPool<int, 100> pool;
+    for (int i = 0; i < 100; i++)
+    {
+        pool.borrow();
+    }
+}
+
+TEST_P(WithBoundScheduler, BoundedPool_ConcurrentBorrow)
+{
+    yarn::BoundedPool<int, 10> pool;
+    constexpr int iterations = 10000;
+    yarn::WaitGroup wg(iterations);
+    for (int i = 0; i < iterations; i++) {
+        yarn::schedule([=]
+        {
+            pool.borrow();
+            wg.done();
+        });
+    }
+    wg.wait();
+}
+
+struct CtorDtorCounter
+{
+    CtorDtorCounter() { ctor_count++; }
+    ~CtorDtorCounter() { dtor_count++; }
+    static void reset() { ctor_count = 0; dtor_count = 0; }
+    static int ctor_count;
+    static int dtor_count;
+};
+
+int CtorDtorCounter::ctor_count = -1;
+int CtorDtorCounter::dtor_count = -1;
+
+TEST_P(WithBoundScheduler, UnboundedPool_PolicyReconstruct)
+{
+    CtorDtorCounter::reset();
+    yarn::UnboundedPool<CtorDtorCounter, yarn::PoolPolicy::Reconstruct> pool;
+    ASSERT_EQ(CtorDtorCounter::ctor_count, 0);
+    ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+    {
+        auto loan = pool.borrow();
+        ASSERT_EQ(CtorDtorCounter::ctor_count, 1);
+        ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+    }
+    ASSERT_EQ(CtorDtorCounter::ctor_count, 1);
+    ASSERT_EQ(CtorDtorCounter::dtor_count, 1);
+    {
+        auto loan = pool.borrow();
+        ASSERT_EQ(CtorDtorCounter::ctor_count, 2);
+        ASSERT_EQ(CtorDtorCounter::dtor_count, 1);
+    }
+    ASSERT_EQ(CtorDtorCounter::ctor_count, 2);
+    ASSERT_EQ(CtorDtorCounter::dtor_count, 2);
+}
+
+TEST_P(WithBoundScheduler, BoundedPool_PolicyReconstruct)
+{
+    CtorDtorCounter::reset();
+    yarn::BoundedPool<CtorDtorCounter, 10, yarn::PoolPolicy::Reconstruct> pool;
+    ASSERT_EQ(CtorDtorCounter::ctor_count, 0);
+    ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+    {
+        auto loan = pool.borrow();
+        ASSERT_EQ(CtorDtorCounter::ctor_count, 1);
+        ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+    }
+    ASSERT_EQ(CtorDtorCounter::ctor_count, 1);
+    ASSERT_EQ(CtorDtorCounter::dtor_count, 1);
+    {
+        auto loan = pool.borrow();
+        ASSERT_EQ(CtorDtorCounter::ctor_count, 2);
+        ASSERT_EQ(CtorDtorCounter::dtor_count, 1);
+    }
+    ASSERT_EQ(CtorDtorCounter::ctor_count, 2);
+    ASSERT_EQ(CtorDtorCounter::dtor_count, 2);
+}
+
+TEST_P(WithBoundScheduler, UnboundedPool_PolicyPreserve)
+{
+    CtorDtorCounter::reset();
+    {
+        yarn::UnboundedPool<CtorDtorCounter, yarn::PoolPolicy::Preserve> pool;
+        int ctor_count;
+        {
+            auto loan = pool.borrow();
+            ASSERT_NE(CtorDtorCounter::ctor_count, 0);
+            ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+            ctor_count = CtorDtorCounter::ctor_count;
+        }
+        ASSERT_EQ(CtorDtorCounter::ctor_count, ctor_count);
+        ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+        {
+            auto loan = pool.borrow();
+            ASSERT_EQ(CtorDtorCounter::ctor_count, ctor_count);
+            ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+        }
+        ASSERT_EQ(CtorDtorCounter::ctor_count, ctor_count);
+        ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+    }
+    ASSERT_EQ(CtorDtorCounter::ctor_count, CtorDtorCounter::dtor_count);
+}
+
+TEST_P(WithBoundScheduler, BoundedPool_PolicyPreserve)
+{
+    CtorDtorCounter::reset();
+    {
+        yarn::BoundedPool<CtorDtorCounter, 10, yarn::PoolPolicy::Preserve> pool;
+        int ctor_count;
+        {
+            auto loan = pool.borrow();
+            ASSERT_NE(CtorDtorCounter::ctor_count, 0);
+            ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+            ctor_count = CtorDtorCounter::ctor_count;
+        }
+        ASSERT_EQ(CtorDtorCounter::ctor_count, ctor_count);
+        ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+        {
+            auto loan = pool.borrow();
+            ASSERT_EQ(CtorDtorCounter::ctor_count, ctor_count);
+            ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+        }
+        ASSERT_EQ(CtorDtorCounter::ctor_count, ctor_count);
+        ASSERT_EQ(CtorDtorCounter::dtor_count, 0);
+    }
+    ASSERT_EQ(CtorDtorCounter::ctor_count, CtorDtorCounter::dtor_count);
+}
diff --git a/src/Yarn/SAL.hpp b/src/Yarn/SAL.hpp
new file mode 100644
index 0000000..acd03c7
--- /dev/null
+++ b/src/Yarn/SAL.hpp
@@ -0,0 +1,29 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Stubs SAL annotation macros for platforms that do not support them.
+// See https://docs.microsoft.com/en-us/visualstudio/code-quality/annotating-locking-behavior?view=vs-2019
+
+#ifndef yarn_sal_hpp
+#define yarn_sal_hpp
+
+#ifndef _Requires_lock_held_
+#define _Requires_lock_held_(x)
+#endif
+
+#ifndef _Requires_lock_not_held_
+#define _Requires_lock_not_held_(x)
+#endif
+
+#endif // yarn_sal_hpp
diff --git a/src/Yarn/Scheduler.cpp b/src/Yarn/Scheduler.cpp
new file mode 100644
index 0000000..25b432e
--- /dev/null
+++ b/src/Yarn/Scheduler.cpp
@@ -0,0 +1,540 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "OSFiber.hpp" // Must come first. See OSFiber_ucontext.hpp.
+
+#include "Scheduler.hpp"
+
+#include "Debug.hpp"
+#include "Defer.hpp"
+#include "Thread.hpp"
+#include "Trace.hpp"
+
+#if defined(_WIN32)
+#include <intrin.h> // __nop()
+#endif
+
+// Enable to trace scheduler events.
+#define ENABLE_TRACE_EVENTS 0
+
+#if ENABLE_TRACE_EVENTS
+#define TRACE(...) YARN_SCOPED_EVENT(__VA_ARGS__)
+#else
+#define TRACE(...)
+#endif
+
+namespace
+{
+
+template <typename T>
+inline T take(std::queue<T>& queue)
+{
+    auto out = std::move(queue.front());
+    queue.pop();
+    return out;
+}
+
+inline void nop()
+{
+#if defined(_WIN32)
+    __nop();
+#else
+    __asm__ __volatile__ ("nop");
+#endif
+}
+
+} // anonymous namespace
+
+namespace yarn {
+
+////////////////////////////////////////////////////////////////////////////////
+// Scheduler
+////////////////////////////////////////////////////////////////////////////////
+thread_local Scheduler* Scheduler::bound = nullptr;
+
+Scheduler* Scheduler::get()
+{
+    return bound;
+}
+
+void Scheduler::bind()
+{
+    YARN_ASSERT(bound == nullptr, "Scheduler already bound");
+    bound = this;
+    {
+        std::unique_lock<std::mutex> lock(singleThreadedWorkerMutex);
+        auto worker = std::unique_ptr<Worker>(new Worker(this, Worker::Mode::SingleThreaded, 0));
+        worker->start();
+        auto tid = std::this_thread::get_id();
+        singleThreadedWorkers.emplace(tid, std::move(worker));
+    }
+}
+
+void Scheduler::unbind()
+{
+    YARN_ASSERT(bound != nullptr, "No scheduler bound");
+    std::unique_ptr<Worker> worker;
+    {
+        std::unique_lock<std::mutex> lock(bound->singleThreadedWorkerMutex);
+        auto tid = std::this_thread::get_id();
+        auto it = bound->singleThreadedWorkers.find(tid);
+        YARN_ASSERT(it != bound->singleThreadedWorkers.end(), "singleThreadedWorker not found");
+        worker = std::move(it->second);
+        bound->singleThreadedWorkers.erase(tid);
+    }
+    worker->flush();
+    worker->stop();
+    bound = nullptr;
+}
+
+Scheduler::Scheduler()
+{
+    for (size_t i = 0; i < spinningWorkers.size(); i++)
+    {
+        spinningWorkers[i] = -1;
+    }
+}
+
+Scheduler::~Scheduler()
+{
+    {
+        std::unique_lock<std::mutex> lock(singleThreadedWorkerMutex);
+        YARN_ASSERT(singleThreadedWorkers.size() == 0, "Scheduler still bound on %d threads", int(singleThreadedWorkers.size()));
+    }
+    setWorkerThreadCount(0);
+}
+
+void Scheduler::setThreadInitializer(const std::function<void()>& func)
+{
+    std::unique_lock<std::mutex> lock(threadInitFuncMutex);
+    threadInitFunc = func;
+}
+
+const std::function<void()>& Scheduler::getThreadInitializer()
+{
+    std::unique_lock<std::mutex> lock(threadInitFuncMutex);
+    return threadInitFunc;
+}
+
+void Scheduler::setWorkerThreadCount(int newCount)
+{
+    YARN_ASSERT(newCount >= 0, "count must be positive");
+    auto oldCount = numWorkerThreads;
+    for (int idx = oldCount - 1; idx >= newCount; idx--)
+    {
+        workerThreads[idx]->stop();
+    }
+    for (int idx = oldCount - 1; idx >= newCount; idx--)
+    {
+        delete workerThreads[idx];
+    }
+    for (int idx = oldCount; idx < newCount; idx++)
+    {
+        workerThreads[idx] = new Worker(this, Worker::Mode::MultiThreaded, idx);
+    }
+    numWorkerThreads = newCount;
+    for (int idx = oldCount; idx < newCount; idx++)
+    {
+        workerThreads[idx]->start();
+    }
+}
+
+int Scheduler::getWorkerThreadCount()
+{
+    return numWorkerThreads;
+}
+
+void Scheduler::enqueue(Task&& task)
+{
+    if (numWorkerThreads > 0)
+    {
+        while (true)
+        {
+            // Prioritize workers that have recently started spinning.
+            auto i = --nextSpinningWorkerIdx % spinningWorkers.size();
+            auto idx = spinningWorkers[i].exchange(-1);
+            if (idx < 0)
+            {
+                // If a spinning worker couldn't be found, round-robin the
+                // workers.
+                idx = nextEnqueueIndex++ % numWorkerThreads;
+            }
+
+            auto worker = workerThreads[idx];
+            if (worker->tryLock())
+            {
+                worker->enqueueAndUnlock(std::move(task));
+                return;
+            }
+        }
+    }
+    else
+    {
+        auto tid = std::this_thread::get_id();
+        std::unique_lock<std::mutex> lock(singleThreadedWorkerMutex);
+        auto it = singleThreadedWorkers.find(tid);
+        YARN_ASSERT(it != singleThreadedWorkers.end(), "singleThreadedWorker not found");
+        it->second->enqueue(std::move(task));
+    }
+}
+
+bool Scheduler::stealWork(Worker* thief, uint64_t from, Task& out)
+{
+    if (numWorkerThreads > 0)
+    {
+        auto thread = workerThreads[from % numWorkerThreads];
+        if (thread != thief)
+        {
+            if (thread->dequeue(out))
+            {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void Scheduler::onBeginSpinning(int workerId)
+{
+    auto idx = nextSpinningWorkerIdx++ % spinningWorkers.size();
+    spinningWorkers[idx] = workerId;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Fiber
+////////////////////////////////////////////////////////////////////////////////
+Scheduler::Fiber::Fiber(OSFiber* impl, uint32_t id) :
+    id(id), impl(impl), worker(Scheduler::Worker::getCurrent())
+{
+    YARN_ASSERT(worker != nullptr, "No Scheduler::Worker bound");
+}
+
+Scheduler::Fiber::~Fiber()
+{
+    delete impl;
+}
+
+Scheduler::Fiber* Scheduler::Fiber::current()
+{
+    auto worker = Scheduler::Worker::getCurrent();
+    return worker != nullptr ? worker->getCurrentFiber() : nullptr;
+}
+
+void Scheduler::Fiber::schedule()
+{
+    worker->enqueue(this);
+}
+
+void Scheduler::Fiber::yield()
+{
+    YARN_SCOPED_EVENT("YIELD");
+    worker->yield(this);
+}
+
+void Scheduler::Fiber::switchTo(Fiber* to)
+{
+    if (to != this)
+    {
+        impl->switchTo(to->impl);
+    }
+}
+
+Scheduler::Fiber* Scheduler::Fiber::create(uint32_t id, size_t stackSize, const std::function<void()>& func)
+{
+    return new Fiber(OSFiber::createFiber(stackSize, func), id);
+}
+
+Scheduler::Fiber* Scheduler::Fiber::createFromCurrentThread(uint32_t id)
+{
+    return new Fiber(OSFiber::createFiberFromCurrentThread(), id);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Scheduler::Worker
+////////////////////////////////////////////////////////////////////////////////
+thread_local Scheduler::Worker* Scheduler::Worker::current = nullptr;
+
+Scheduler::Worker::Worker(Scheduler *scheduler, Mode mode, uint32_t id) : id(id), mode(mode), scheduler(scheduler) {}
+
+void Scheduler::Worker::start()
+{
+    switch (mode)
+    {
+    case Mode::MultiThreaded:
+        thread = std::thread([=]
+        {
+            Thread::setName("Thread<%.2d>", int(id));
+
+            if (auto const &initFunc = scheduler->getThreadInitializer())
+            {
+                initFunc();
+            }
+
+            Scheduler::bound = scheduler;
+            Worker::current = this;
+            mainFiber.reset(Fiber::createFromCurrentThread(0));
+            currentFiber = mainFiber.get();
+            run();
+            mainFiber.reset();
+            Worker::current = nullptr;
+        });
+        break;
+
+    case Mode::SingleThreaded:
+        Worker::current = this;
+        mainFiber.reset(Fiber::createFromCurrentThread(0));
+        currentFiber = mainFiber.get();
+        break;
+
+    default:
+        YARN_ASSERT(false, "Unknown mode: %d", int(mode));
+    }
+}
+
+void Scheduler::Worker::stop()
+{
+    switch (mode)
+    {
+    case Mode::MultiThreaded:
+        shutdown = true;
+        enqueue([]{}); // Ensure the worker is woken up to notice the shutdown.
+        thread.join();
+        break;
+
+    case Mode::SingleThreaded:
+        Worker::current = nullptr;
+        break;
+
+    default:
+        YARN_ASSERT(false, "Unknown mode: %d", int(mode));
+    }
+}
+
+void Scheduler::Worker::yield(Fiber *from)
+{
+    YARN_ASSERT(currentFiber == from, "Attempting to call yield from a non-current fiber");
+
+    // Current fiber is yielding as it is blocked.
+
+    // First wait until there's something else this worker can do.
+    std::unique_lock<std::mutex> lock(work.mutex);
+    waitForWork(lock);
+
+
+    if (work.fibers.size() > 0)
+    {
+        // There's another fiber that has become unblocked, resume that.
+        work.num--;
+        auto to = take(work.fibers);
+        lock.unlock();
+        switchToFiber(to);
+    }
+    else if (idleFibers.size() > 0)
+    {
+        // There's an old fiber we can reuse, resume that.
+        auto to = take(idleFibers);
+        lock.unlock();
+        switchToFiber(to);
+    }
+    else
+    {
+        // Tasks to process and no existing fibers to resume. Spawn a new fiber.
+        lock.unlock();
+        switchToFiber(createWorkerFiber());
+    }
+}
+
+bool Scheduler::Worker::tryLock()
+{
+    return work.mutex.try_lock();
+}
+
+void Scheduler::Worker::enqueue(Fiber* fiber)
+{
+    std::unique_lock<std::mutex> lock(work.mutex);
+    auto wasIdle = work.num == 0;
+    work.fibers.push(std::move(fiber));
+    work.num++;
+    lock.unlock();
+    if (wasIdle) { work.added.notify_one(); }
+}
+
+void Scheduler::Worker::enqueue(Task&& task)
+{
+    work.mutex.lock();
+    enqueueAndUnlock(std::move(task));
+}
+
+void Scheduler::Worker::enqueueAndUnlock(Task&& task)
+{
+    auto wasIdle = work.num == 0;
+    work.tasks.push(std::move(task));
+    work.num++;
+    work.mutex.unlock();
+    if (wasIdle) { work.added.notify_one(); }
+}
+
+bool Scheduler::Worker::dequeue(Task& out)
+{
+    if (work.num.load() == 0) { return false; }
+    if (!work.mutex.try_lock()) { return false; }
+    defer(work.mutex.unlock());
+    if (work.tasks.size() == 0) { return false; }
+    work.num--;
+    out = take(work.tasks);
+    return true;
+}
+
+void Scheduler::Worker::flush()
+{
+    YARN_ASSERT(mode == Mode::SingleThreaded, "flush() can only be used on a single-threaded worker");
+    std::unique_lock<std::mutex> lock(work.mutex);
+    runUntilIdle(lock);
+}
+
+void Scheduler::Worker::run()
+{
+    switch (mode)
+    {
+    case Mode::MultiThreaded:
+    {
+        YARN_NAME_THREAD("Thread<%.2d> Fiber<%.2d>", int(id), Fiber::current()->id);
+        {
+            std::unique_lock<std::mutex> lock(work.mutex);
+            work.added.wait(lock, [this] { return work.num > 0 || shutdown; });
+            while (!shutdown)
+            {
+                waitForWork(lock);
+                runUntilIdle(lock);
+            }
+            Worker::current = nullptr;
+        }
+        switchToFiber(mainFiber.get());
+        break;
+    }
+    case Mode::SingleThreaded:
+        while (!shutdown)
+        {
+            flush();
+            idleFibers.emplace(currentFiber);
+            switchToFiber(mainFiber.get());
+        }
+        break;
+
+    default:
+        YARN_ASSERT(false, "Unknown mode: %d", int(mode));
+    }
+}
+
+_Requires_lock_held_(lock)
+void Scheduler::Worker::waitForWork(std::unique_lock<std::mutex> &lock)
+{
+    YARN_ASSERT(work.num == work.fibers.size() + work.tasks.size(), "work.num out of sync");
+    if (work.num == 0)
+    {
+        scheduler->onBeginSpinning(id);
+        lock.unlock();
+        spinForWork();
+        lock.lock();
+    }
+    work.added.wait(lock, [this] { return work.num > 0 || shutdown; });
+}
+
+void Scheduler::Worker::spinForWork()
+{
+    TRACE("SPIN");
+    Task stolen;
+
+    constexpr auto duration = std::chrono::milliseconds(1);
+    auto start = std::chrono::high_resolution_clock::now();
+    while (std::chrono::high_resolution_clock::now() - start < duration)
+    {
+        for (int i = 0; i < 256; i++) // Empirically picked magic number!
+        {
+            nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
+            nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
+            nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
+            nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
+            if (work.num > 0)
+            {
+                return;
+            }
+        }
+
+        if (scheduler->stealWork(this, rng(), stolen))
+        {
+            std::unique_lock<std::mutex> lock(work.mutex);
+            work.tasks.emplace(std::move(stolen));
+            work.num++;
+            return;
+        }
+
+        std::this_thread::yield();
+    }
+}
+
+_Requires_lock_held_(lock)
+void Scheduler::Worker::runUntilIdle(std::unique_lock<std::mutex> &lock)
+{
+    YARN_ASSERT(work.num == work.fibers.size() + work.tasks.size(), "work.num out of sync");
+    while (work.fibers.size() > 0 || work.tasks.size() > 0)
+    {
+        // Note: we cannot take and store on the stack more than a single fiber
+        // or task at a time, as the Fiber may yield and these items may get
+        // held on suspended fiber stack.
+
+        while (work.fibers.size() > 0)
+        {
+            work.num--;
+            auto fiber = take(work.fibers);
+            lock.unlock();
+            idleFibers.push(currentFiber);
+            switchToFiber(fiber);
+            lock.lock();
+        }
+
+        if (work.tasks.size() > 0)
+        {
+            work.num--;
+            auto task = take(work.tasks);
+            lock.unlock();
+
+            // Run the task.
+            task();
+
+            // std::function<> can carry arguments with complex destructors.
+            // Ensure these are destructed outside of the lock.
+            task = Task();
+
+            lock.lock();
+        }
+    }
+}
+
+Scheduler::Fiber* Scheduler::Worker::createWorkerFiber()
+{
+    auto id = workerFibers.size() + 1;
+    auto fiber = Fiber::create(id, FiberStackSize, [&] { run(); });
+    workerFibers.push_back(std::unique_ptr<Fiber>(fiber));
+    return fiber;
+}
+
+void Scheduler::Worker::switchToFiber(Fiber* to)
+{
+    auto from = currentFiber;
+    currentFiber = to;
+    from->switchTo(to);
+}
+
+} // namespace yarn
diff --git a/src/Yarn/Scheduler.hpp b/src/Yarn/Scheduler.hpp
new file mode 100644
index 0000000..7451698
--- /dev/null
+++ b/src/Yarn/Scheduler.hpp
@@ -0,0 +1,337 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef yarn_scheduler_hpp
+#define yarn_scheduler_hpp
+
+#include "Debug.hpp"
+#include "SAL.hpp"
+
+#include <array>
+#include <atomic>
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <unordered_map>
+
+namespace yarn {
+
+class OSFiber;
+
+// Task is a unit of work for the scheduler.
+using Task = std::function<void()>;
+
+// Scheduler asynchronously processes Tasks.
+// A scheduler can be bound to one or more threads using the bind() method.
+// Once bound to a thread, that thread can call yarn::schedule() to enqueue
+// work tasks to be executed asynchronously.
+// Scheduler are initially constructed in single-threaded mode.
+// Call setWorkerThreadCount() to spawn dedicated worker threads.
+class Scheduler
+{
+    class Worker;
+
+public:
+    Scheduler();
+    ~Scheduler();
+
+    // get() returns the scheduler bound to the current thread.
+    static Scheduler *get();
+
+    // bind() binds this scheduler to the current thread.
+    // There must be no existing scheduler bound to the thread prior to calling.
+    void bind();
+
+    // unbind() unbinds the scheduler currently bound to the current thread.
+    // There must be a existing scheduler bound to the thread prior to calling.
+    static void unbind();
+
+    // enqueue() queues the task for asynchronous execution.
+    void enqueue(Task&& task);
+
+    // setThreadInitializer() sets the worker thread initializer function which
+    // will be called for each new worker thread spawned.
+    // The initializer will only be called on newly created threads (call
+    // setThreadInitializer() before setWorkerThreadCount()).
+    void setThreadInitializer(const std::function<void()>& init);
+
+    // getThreadInitializer() returns the thread initializer function set by
+    // setThreadInitializer().
+    const std::function<void()>& getThreadInitializer();
+
+    // setWorkerThreadCount() adjusts the number of dedicated worker threads.
+    // A count of 0 puts the scheduler into single-threaded mode.
+    // Note: Currently the number of threads cannot be adjusted once tasks
+    // have been enqueued. This restriction may be lifted at a later time.
+    void setWorkerThreadCount(int count);
+
+    // getWorkerThreadCount() returns the number of worker threads.
+    int getWorkerThreadCount();
+
+    // Fibers expose methods to perform cooperative multitasking and are
+    // automatically created by the Scheduler.
+    //
+    // The currently executing Fiber can be obtained by calling Fiber::current().
+    //
+    // When execution becomes blocked, yield() can be called to suspend execution of
+    // the fiber and start executing other pending work.
+    // Once the block has been lifted, schedule() can be called to reschedule the
+    // Fiber on the same thread that previously executed it.
+    class Fiber
+    {
+    public:
+        ~Fiber();
+
+        // current() returns the currently executing fiber, or nullptr if called
+        // without a bound scheduler.
+        static Fiber* current();
+
+        // yield() suspends execution of this Fiber, allowing the thread to work
+        // on other tasks.
+        // yield() must only be called on the currently executing fiber.
+        void yield();
+
+        // schedule() reschedules the suspended Fiber for execution.
+        void schedule();
+
+        // id is the thread-unique identifier of the Fiber.
+        uint32_t const id;
+
+    private:
+        friend class Scheduler;
+
+        Fiber(OSFiber*, uint32_t id);
+
+        // switchTo() switches execution to the given fiber.
+        // switchTo() must only be called on the currently executing fiber.
+        void switchTo(Fiber*);
+
+        // create() constructs and returns a new fiber with the given identifier,
+        // stack size that will executed func when switched to.
+        static Fiber* create(uint32_t id, size_t stackSize, const std::function<void()>& func);
+
+        // createFromCurrentThread() constructs and returns a new fiber with the
+        // given identifier for the current thread.
+        static Fiber* createFromCurrentThread(uint32_t id);
+
+        OSFiber* const impl;
+        Worker* const worker;
+    };
+
+private:
+    // Stack size in bytes of a new fiber.
+    // TODO: Make configurable so the default size can be reduced.
+    static constexpr size_t FiberStackSize = 1024 * 1024;
+
+    // Maximum number of worker threads.
+    static constexpr size_t MaxWorkerThreads = 64;
+
+    // TODO: Implement a queue that recycles elements to reduce number of
+    // heap allocations.
+    using TaskQueue = std::queue<Task>;
+    using FiberQueue = std::queue<Fiber*>;
+
+    // Workers executes Tasks on a single thread.
+    // Once a task is started, it may yield to other tasks on the same Worker.
+    // Tasks are always resumed by the same Worker.
+    class Worker
+    {
+    public:
+        enum class Mode
+        {
+            // Worker will spawn a background thread to process tasks.
+            MultiThreaded,
+
+            // Worker will execute tasks whenever it yields.
+            SingleThreaded,
+        };
+
+        Worker(Scheduler *scheduler, Mode mode, uint32_t id);
+
+        // start() begins execution of the worker.
+        void start();
+
+        // stop() ceases execution of the worker, blocking until all pending
+        // tasks have fully finished.
+        void stop();
+
+        // yield() suspends execution of the current task, and looks for other
+        // tasks to start or continue execution.
+        void yield(Fiber* fiber);
+
+        // enqueue(Fiber*) enqueues resuming of a suspended fiber.
+        void enqueue(Fiber* fiber);
+
+        // enqueue(Task&&) enqueues a new, unstarted task.
+        void enqueue(Task&& task);
+
+        // tryLock() attempts to lock the worker for task enqueing.
+        // If the lock was successful then true is returned, and the caller must
+        // call enqueueAndUnlock().
+        bool tryLock();
+
+        // enqueueAndUnlock() enqueues the task and unlocks the worker.
+        // Must only be called after a call to tryLock() which returned true.
+        void enqueueAndUnlock(Task&& task);
+
+        // flush() processes all pending tasks before returning.
+        void flush();
+
+        // dequeue() attempts to take a Task from the worker. Returns true if
+        // a task was taken and assigned to out, otherwise false.
+        bool dequeue(Task& out);
+
+        // getCurrent() returns the Worker currently bound to the current
+        // thread.
+        static inline Worker* getCurrent();
+
+        // getCurrentFiber() returns the Fiber currently being executed.
+        inline Fiber* getCurrentFiber() const;
+
+        // Unique identifier of the Worker.
+        const uint32_t id;
+
+    private:
+        // run() is the task processing function for the worker.
+        // If the worker was constructed in Mode::MultiThreaded, run() will
+        // continue to process tasks until stop() is called.
+        // If the worker was constructed in Mode::SingleThreaded, run() call
+        // flush() and return.
+        void run();
+
+        // createWorkerFiber() creates a new fiber that when executed calls
+        // run().
+        Fiber* createWorkerFiber();
+
+        // switchToFiber() switches execution to the given fiber. The fiber
+        // must belong to this worker.
+        void switchToFiber(Fiber*);
+
+        // runUntilIdle() executes all pending tasks and then returns.
+        _Requires_lock_held_(lock)
+        void runUntilIdle(std::unique_lock<std::mutex> &lock);
+
+        // waitForWork() blocks until new work is available, potentially calling
+        // spinForWork().
+        _Requires_lock_held_(lock)
+        void waitForWork(std::unique_lock<std::mutex> &lock);
+
+        // spinForWork() attempts to steal work from another Worker, and keeps
+        // the thread awake for a short duration. This reduces overheads of
+        // frequently putting the thread to sleep and re-waking.
+        void spinForWork();
+
+        // Work holds tasks and fibers that are enqueued on the Worker.
+        struct Work
+        {
+            std::atomic<uint64_t> num = { 0 }; // tasks.size() + fibers.size()
+            TaskQueue tasks; // guarded by mutex
+            FiberQueue fibers; // guarded by mutex
+            std::condition_variable added;
+            std::mutex mutex;
+        };
+
+        // https://en.wikipedia.org/wiki/Xorshift
+        class FastRnd
+        {
+        public:
+            inline uint64_t operator ()()
+            {
+                x ^= x << 13;
+                x ^= x >> 7;
+                x ^= x << 17;
+                return x;
+            }
+        private:
+            uint64_t x = std::chrono::system_clock::now().time_since_epoch().count();
+        };
+
+        // The current worker bound to the current thread.
+        static thread_local Worker* current;
+
+        Mode const mode;
+        Scheduler* const scheduler;
+        std::unique_ptr<Fiber> mainFiber;
+        Fiber* currentFiber = nullptr;
+        std::thread thread;
+        Work work;
+        FiberQueue idleFibers; // Fibers that have completed which can be reused.
+        std::vector<std::unique_ptr<Fiber>> workerFibers; // All fibers created by this worker.
+        FastRnd rng;
+        std::atomic<bool> shutdown = { false };
+    };
+
+    // stealWork() attempts to steal a task from the worker with the given id.
+    // Returns true if a task was stolen and assigned to out, otherwise false.
+    bool stealWork(Worker* thief, uint64_t from, Task& out);
+
+    // onBeginSpinning() is called when a Worker calls spinForWork().
+    // The scheduler will prioritize this worker for new tasks to try to prevent
+    // it going to sleep.
+    void onBeginSpinning(int workerId);
+
+    // The scheduler currently bound to the current thread.
+    static thread_local Scheduler* bound;
+
+    std::function<void()> threadInitFunc;
+    std::mutex threadInitFuncMutex;
+
+    std::array<std::atomic<int>, 8> spinningWorkers;
+    std::atomic<unsigned int> nextSpinningWorkerIdx = { 0x8000000 };
+
+    // TODO: Make this lot thread-safe so setWorkerThreadCount() can be called
+    // during execution of tasks.
+    std::atomic<unsigned int> nextEnqueueIndex = { 0 };
+    unsigned int numWorkerThreads = 0;
+    std::array<Worker*, MaxWorkerThreads> workerThreads;
+
+    std::mutex singleThreadedWorkerMutex;
+    std::unordered_map<std::thread::id, std::unique_ptr<Worker>> singleThreadedWorkers;
+};
+
+Scheduler::Worker* Scheduler::Worker::getCurrent()
+{
+    return Worker::current;
+}
+
+Scheduler::Fiber* Scheduler::Worker::getCurrentFiber() const
+{
+    return currentFiber;
+}
+
+// schedule() schedules the function f to be asynchronously called with the
+// given arguments using the currently bound scheduler.
+template<typename Function, typename ... Args>
+inline void schedule(Function&& f, Args&& ... args)
+{
+    YARN_ASSERT_HAS_BOUND_SCHEDULER("yarn::schedule");
+    auto scheduler = Scheduler::get();
+    scheduler->enqueue(std::bind(std::forward<Function>(f), std::forward<Args>(args)...));
+}
+
+// schedule() schedules the function f to be asynchronously called using the
+// currently bound scheduler.
+template<typename Function>
+inline void schedule(Function&& f)
+{
+    YARN_ASSERT_HAS_BOUND_SCHEDULER("yarn::schedule");
+    auto scheduler = Scheduler::get();
+    scheduler->enqueue(std::forward<Function>(f));
+}
+
+} // namespace yarn
+
+#endif // yarn_scheduler_hpp
diff --git a/src/Yarn/Scheduler_test.cpp b/src/Yarn/Scheduler_test.cpp
new file mode 100644
index 0000000..1b48301
--- /dev/null
+++ b/src/Yarn/Scheduler_test.cpp
@@ -0,0 +1,114 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Yarn_test.hpp"
+
+#include "Yarn/WaitGroup.hpp"
+
+TEST(WithoutBoundScheduler, SchedulerConstructAndDestruct)
+{
+    auto scheduler = new yarn::Scheduler();
+    delete scheduler;
+}
+
+TEST(WithoutBoundScheduler, SchedulerBindGetUnbind)
+{
+    auto scheduler = new yarn::Scheduler();
+    scheduler->bind();
+    auto got = yarn::Scheduler::get();
+    ASSERT_EQ(scheduler, got);
+    scheduler->unbind();
+    got = yarn::Scheduler::get();
+    ASSERT_EQ(got, nullptr);
+    delete scheduler;
+}
+
+TEST_P(WithBoundScheduler, SetAndGetWorkerThreadCount)
+{
+    ASSERT_EQ(yarn::Scheduler::get()->getWorkerThreadCount(), GetParam().numWorkerThreads);
+}
+
+TEST_P(WithBoundScheduler, DestructWithPendingTasks)
+{
+    for (int i = 0; i < 10000; i++)
+    {
+        yarn::schedule([] {});
+    }
+}
+
+TEST_P(WithBoundScheduler, DestructWithPendingFibers)
+{
+    yarn::WaitGroup wg(1);
+    for (int i = 0; i < 10000; i++)
+    {
+        yarn::schedule([=] { wg.wait(); });
+    }
+    wg.done();
+
+    auto scheduler = yarn::Scheduler::get();
+    scheduler->unbind();
+    delete scheduler;
+
+    // Rebind a new scheduler so WithBoundScheduler::TearDown() is happy.
+    (new yarn::Scheduler())->bind();
+}
+
+TEST_P(WithBoundScheduler, FibersResumeOnSameYarnThread)
+{
+    yarn::WaitGroup fence(1);
+    yarn::WaitGroup wg(1000);
+    for (int i = 0; i < 1000; i++)
+    {
+        yarn::schedule([=] {
+            auto threadID = std::this_thread::get_id();
+            fence.wait();
+            ASSERT_EQ(threadID, std::this_thread::get_id());
+            wg.done();
+        });
+    }
+    std::this_thread::sleep_for(std::chrono::milliseconds(10)); // just to try and get some tasks to yield.
+    fence.done();
+    wg.wait();
+}
+
+TEST_P(WithBoundScheduler, FibersResumeOnSameStdThread)
+{
+    auto scheduler = yarn::Scheduler::get();
+
+    yarn::WaitGroup fence(1);
+    yarn::WaitGroup wg(1000);
+
+    std::vector<std::thread> threads;
+    for (int i = 0; i < 1000; i++)
+    {
+        threads.push_back(std::thread([=] {
+            scheduler->bind();
+
+            auto threadID = std::this_thread::get_id();
+            fence.wait();
+            ASSERT_EQ(threadID, std::this_thread::get_id());
+            wg.done();
+
+            scheduler->unbind();
+        }));
+    }
+    std::this_thread::sleep_for(std::chrono::milliseconds(10)); // just to try and get some tasks to yield.
+    fence.done();
+    wg.wait();
+
+    for (auto& thread : threads)
+    {
+        thread.join();
+    }
+}
\ No newline at end of file
diff --git a/src/Yarn/Thread.cpp b/src/Yarn/Thread.cpp
new file mode 100644
index 0000000..68dcf47
--- /dev/null
+++ b/src/Yarn/Thread.cpp
@@ -0,0 +1,105 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Thread.hpp"
+
+#include "Trace.hpp"
+
+#include <cstdarg>
+
+#if defined(_WIN32)
+#   ifndef WIN32_LEAN_AND_MEAN
+#       define WIN32_LEAN_AND_MEAN
+#   endif
+#   include <windows.h>
+#elif defined(__APPLE__)
+#   include <pthread.h>
+#   include <mach/thread_act.h>
+#   include <unistd.h>
+#else
+#   include <pthread.h>
+#   include <unistd.h>
+#endif
+
+namespace yarn {
+
+#if defined(_WIN32)
+
+void Thread::setName(const char* fmt, ...)
+{
+    static auto setThreadDescription = reinterpret_cast<HRESULT(WINAPI*)(HANDLE, PCWSTR)>(GetProcAddress(GetModuleHandle("kernelbase.dll"), "SetThreadDescription"));
+    if (setThreadDescription == nullptr)
+    {
+        return;
+    }
+
+    char name[1024];
+    va_list vararg;
+    va_start(vararg, fmt);
+    vsnprintf(name, sizeof(name), fmt, vararg);
+    va_end(vararg);
+
+    wchar_t wname[1024];
+    mbstowcs(wname, name, 1024);
+    setThreadDescription(GetCurrentThread(), wname);
+    YARN_NAME_THREAD("%s", name);
+}
+
+unsigned int Thread::numLogicalCPUs()
+{
+    DWORD_PTR processAffinityMask = 1;
+    DWORD_PTR systemAffinityMask = 1;
+
+    GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask);
+
+    auto count = 0;
+    while (processAffinityMask > 0)
+    {
+        if (processAffinityMask & 1)
+        {
+            count++;
+        }
+
+        processAffinityMask >>= 1;
+    }
+    return count;
+}
+
+#else
+
+void Thread::setName(const char* fmt, ...)
+{
+    char name[1024];
+    va_list vararg;
+    va_start(vararg, fmt);
+    vsnprintf(name, sizeof(name), fmt, vararg);
+    va_end(vararg);
+
+#if defined(__APPLE__)
+    pthread_setname_np(name);
+#elif !defined(__Fuchsia__)
+    pthread_setname_np(pthread_self(), name);
+#endif
+
+    YARN_NAME_THREAD("%s", name);
+}
+
+unsigned int Thread::numLogicalCPUs()
+{
+    return sysconf(_SC_NPROCESSORS_ONLN);
+}
+
+#endif
+
+} // namespace yarn
diff --git a/src/Yarn/Thread.hpp b/src/Yarn/Thread.hpp
new file mode 100644
index 0000000..878cb8e
--- /dev/null
+++ b/src/Yarn/Thread.hpp
@@ -0,0 +1,38 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef yarn_thread_hpp
+#define yarn_thread_hpp
+
+#include <bitset>
+
+namespace yarn {
+
+// Thread contains static methods that abstract OS-specific thread / cpu
+// queries and control.
+class Thread
+{
+public:
+    // setName() sets the name of the currently executing thread for displaying
+    // in a debugger.
+    static void setName(const char* fmt, ...);
+
+    // numLogicalCPUs() returns the number of available logical CPU cores for
+    // the system.
+    static unsigned int numLogicalCPUs();
+};
+
+} // namespace yarn
+
+#endif  // yarn_thread_hpp
diff --git a/src/Yarn/Ticket.hpp b/src/Yarn/Ticket.hpp
new file mode 100644
index 0000000..ebe4d3b
--- /dev/null
+++ b/src/Yarn/Ticket.hpp
@@ -0,0 +1,267 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef yarn_ticket_hpp
+#define yarn_ticket_hpp
+
+#include "Pool.hpp"
+#include "ConditionVariable.hpp"
+#include "Scheduler.hpp"
+
+namespace yarn {
+
+// Ticket is a synchronization primitive used to serially order execution.
+//
+// Tickets exist in 3 mutually exclusive states: Waiting, Called and Finished.
+//
+// Tickets are obtained from a Ticket::Queue, using the Ticket::Queue::take()
+// methods. The order in which tickets are taken from the queue dictates the
+// order in which they are called.
+//
+// The first ticket to be taken from a queue will be in the 'called' state,
+// others will be in the 'waiting' state until the previous ticket has finished.
+//
+// Ticket::wait() will block until the ticket is called.
+// Ticket::done() sets the ticket into the 'finished' state and calls the next
+// taken ticket from the queue.
+//
+// If a ticket is taken from a queue and does not have done() called before
+// its last reference is dropped, it will implicitly call done(), calling the
+// next ticket.
+//
+// Example:
+//
+//  void runTasksConcurrentThenSerially(int numConcurrentTasks)
+//  {
+//      yarn::Ticket::Queue queue;
+//      for (int i = 0; i < numConcurrentTasks; i++)
+//      {
+//          auto ticket = queue.take();
+//          yarn::schedule([=] {
+//              doConcurrentWork(); // <- function may be called concurrently
+//              ticket.wait(); // <- serialize tasks
+//              doSerialWork(); // <- function will not be called concurrently
+//              ticket.done(); // <- optional, as done() is called implicitly on dropping of last reference
+//          });
+//      }
+//  }
+class Ticket
+{
+    struct Shared;
+    struct Record;
+public:
+
+    // Queue hands out Tickets.
+    class Queue
+    {
+    public:
+        // take() returns a single ticket from the queue.
+        inline Ticket take();
+
+        // take() retrieves count tickets from the queue, calling f() with each
+        // retrieved ticket.
+        // F must be a function of the signature: void(Ticket&&)
+        template <typename F>
+        inline void take(size_t count, const F& f);
+
+    private:
+        std::shared_ptr<Shared> shared = std::make_shared<Shared>();
+        UnboundedPool<Record> pool;
+    };
+
+    inline Ticket() = default;
+    inline Ticket(const Ticket& other) = default;
+    inline Ticket(Ticket&& other) = default;
+    inline Ticket& operator = (const Ticket& other) = default;
+
+    // wait() blocks until the ticket is called.
+    inline void wait() const;
+
+    // done() marks the ticket as finished and calls the next ticket.
+    inline void done() const;
+
+    // onCall() registers the function f to be invoked when this ticket is
+    // called. If the ticket is already called prior to calling onCall(), then
+    // f() will be executed immediately.
+    // F must be a function of the signature: void F()
+    template<typename F>
+    inline void onCall(F&& f) const;
+
+private:
+    // Internal doubly-linked-list data structure. One per ticket instance.
+    struct Record
+    {
+        inline ~Record();
+
+        inline void done();
+        inline void callAndUnlock(std::unique_lock<std::mutex> &lock);
+
+        ConditionVariable isCalledCondVar;
+
+        std::shared_ptr<Shared> shared;
+        Record *next = nullptr; // guarded by shared->mutex
+        Record *prev = nullptr; // guarded by shared->mutex
+        inline void unlink(); // guarded by shared->mutex
+        Task onCall; // guarded by shared->mutex
+        bool isCalled = false; // guarded by shared->mutex
+        std::atomic<bool> isDone = { false };
+    };
+
+    // Data shared between all tickets and the queue.
+    struct Shared
+    {
+        std::mutex mutex;
+        Record tail;
+    };
+
+    inline Ticket(Loan<Record>&& record);
+
+    Loan<Record> record;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Ticket
+////////////////////////////////////////////////////////////////////////////////
+
+Ticket::Ticket(Loan<Record>&& record) : record(std::move(record)) {}
+
+void Ticket::wait() const
+{
+    std::unique_lock<std::mutex> lock(record->shared->mutex);
+    record->isCalledCondVar.wait(lock, [this] { return record->isCalled; });
+}
+
+void Ticket::done() const
+{
+    record->done();
+}
+
+template<typename Function>
+void Ticket::onCall(Function&& f) const
+{
+    std::unique_lock<std::mutex> lock(record->shared->mutex);
+    if (record->isCalled)
+    {
+        yarn::schedule(std::move(f));
+        return;
+    }
+    if (record->onCall)
+    {
+        struct Joined
+        {
+            void operator() () const { a(); b(); }
+            Task a, b;
+        };
+        record->onCall = std::move(Joined{ std::move(record->onCall), std::move(f) });
+    }
+    else
+    {
+        record->onCall = std::move(f);
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Ticket::Queue
+////////////////////////////////////////////////////////////////////////////////
+
+Ticket Ticket::Queue::take()
+{
+    Ticket out;
+    take(1, [&](Ticket&& ticket) { out = std::move(ticket); });
+    return out;
+}
+
+template <typename F>
+void Ticket::Queue::take(size_t n, const F& f)
+{
+    Loan<Record> first, last;
+    pool.borrow(n, [&] (Loan<Record>&& record) {
+        Loan<Record> rec = std::move(record);
+        rec->shared = shared;
+        if (first.get() == nullptr)
+        {
+            first = rec;
+        }
+        if (last.get() != nullptr)
+        {
+            last->next = rec.get();
+            rec->prev = last.get();
+        }
+        last = rec;
+        f(std::move(Ticket(std::move(rec))));
+    });
+    last->next = &shared->tail;
+    std::unique_lock<std::mutex> lock(shared->mutex);
+    first->prev = shared->tail.prev;
+    shared->tail.prev = last.get();
+    if (first->prev == nullptr)
+    {
+        first->callAndUnlock(lock);
+    }
+    else
+    {
+        first->prev->next = first.get();
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Ticket::Record
+////////////////////////////////////////////////////////////////////////////////
+
+Ticket::Record::~Record()
+{
+    if (shared != nullptr)
+    {
+        done();
+    }
+}
+
+void Ticket::Record::done()
+{
+    if (isDone.exchange(true)) { return; }
+    std::unique_lock<std::mutex> lock(shared->mutex);
+    auto callNext = (prev == nullptr && next != nullptr) ? next : nullptr;
+    unlink();
+    if (callNext != nullptr) // lock needs to be held otherwise callNext might be destructed.
+    {
+        callNext->callAndUnlock(lock);
+    }
+}
+
+void Ticket::Record::callAndUnlock(std::unique_lock<std::mutex> &lock)
+{
+    if (isCalled) { return; }
+    isCalled = true;
+    Task task;
+    std::swap(task, onCall);
+    isCalledCondVar.notify_all();
+    lock.unlock();
+
+    if (task)
+    {
+        yarn::schedule(std::move(task));
+    }
+}
+
+void Ticket::Record::unlink()
+{
+    if (prev != nullptr) { prev->next = next; }
+    if (next != nullptr) { next->prev = prev; }
+    prev = nullptr;
+    next = nullptr;
+}
+
+} // namespace yarn
+
+#endif  // yarn_ticket_hpp
diff --git a/src/Yarn/Ticket_test.cpp b/src/Yarn/Ticket_test.cpp
new file mode 100644
index 0000000..3bb4b3b
--- /dev/null
+++ b/src/Yarn/Ticket_test.cpp
@@ -0,0 +1,43 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Yarn_test.hpp"
+
+#include "Yarn/Ticket.hpp"
+
+TEST_P(WithBoundScheduler, Ticket)
+{
+    yarn::Ticket::Queue queue;
+
+    constexpr int count = 1000;
+    std::atomic<int> next = { 0 };
+    int result[count] = {};
+
+    for (int i = 0; i < count; i++)
+    {
+        auto ticket = queue.take();
+        yarn::schedule([ticket, i, &result, &next] {
+            ticket.wait();
+            result[next++] = i;
+            ticket.done();
+        });
+    }
+
+    queue.take().wait();
+
+    for (int i = 0; i < count; i++)
+    {
+        ASSERT_EQ(result[i], i);
+    }
+}
diff --git a/src/Yarn/Trace.cpp b/src/Yarn/Trace.cpp
new file mode 100644
index 0000000..0b00e14
--- /dev/null
+++ b/src/Yarn/Trace.cpp
@@ -0,0 +1,248 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The Trace API produces a trace event file that can be consumed with Chrome's
+// about:tracing viewer.
+// Documentation can be found at:
+//   https://www.chromium.org/developers/how-tos/trace-event-profiling-tool
+//   https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/edit
+
+#include "Trace.hpp"
+
+#include "Defer.hpp"
+#include "Scheduler.hpp"
+#include "Thread.hpp"
+
+#if YARN_TRACE_ENABLED
+
+#include <atomic>
+#include <unordered_set>
+#include <fstream>
+
+namespace
+{
+
+// Chrome traces can choke or error on very large trace files.
+// Limit the number of events created to this number.
+static constexpr int MaxEvents = 100000;
+
+uint64_t threadFiberID(uint32_t threadID, uint32_t fiberID)
+{
+    return static_cast<uint64_t>(threadID) * 31 + static_cast<uint64_t>(fiberID);
+}
+
+} // anonymous namespace
+
+namespace yarn
+{
+
+Trace* Trace::get()
+{
+    static Trace trace;
+    return &trace;
+}
+
+Trace::Trace()
+{
+    nameThread("main");
+    thread = std::thread([&] {
+        Thread::setName("Trace worker");
+
+        auto out = std::fstream("chrome.trace", std::ios_base::out);
+
+        out << "[" << std::endl;
+        defer(out << std::endl << "]" << std::endl);
+
+        auto first = true;
+        for (int i = 0; i < MaxEvents; i++)
+        {
+            auto event = take();
+            if (event->type() == Event::Type::Shutdown)
+            {
+                return;
+            }
+            if (!first) { out << "," << std::endl; };
+            first = false;
+            out << "{" << std::endl;
+            event->write(out);
+            out << "}";
+        }
+
+        stopped = true;
+
+        while(take()->type() != Event::Type::Shutdown) {}
+    });
+}
+
+Trace::~Trace()
+{
+    put(new Shutdown());
+    thread.join();
+}
+
+void Trace::nameThread(const char* fmt, ...)
+{
+    if (stopped) { return; }
+    auto event = new NameThreadEvent();
+
+    va_list vararg;
+    va_start(vararg, fmt);
+    vsnprintf(event->name, Trace::MaxEventNameLength, fmt, vararg);
+    va_end(vararg);
+
+    put(event);
+}
+
+void Trace::beginEvent(const char* fmt, ...)
+{
+    if (stopped) { return; }
+    auto event = new BeginEvent();
+
+    va_list vararg;
+    va_start(vararg, fmt);
+    vsnprintf(event->name, Trace::MaxEventNameLength, fmt, vararg);
+    va_end(vararg);
+
+    event->timestamp = timestamp();
+    put(event);
+}
+
+void Trace::endEvent()
+{
+    if (stopped) { return; }
+    auto event = new EndEvent();
+    event->timestamp = timestamp();
+    put(event);
+}
+
+void Trace::beginAsyncEvent(uint32_t id, const char* fmt, ...)
+{
+    if (stopped) { return; }
+    auto event = new AsyncStartEvent();
+
+    va_list vararg;
+    va_start(vararg, fmt);
+    vsnprintf(event->name, Trace::MaxEventNameLength, fmt, vararg);
+    va_end(vararg);
+
+    event->timestamp = timestamp();
+    event->id = id;
+    put(event);
+}
+
+void Trace::endAsyncEvent(uint32_t id, const char* fmt, ...)
+{
+    if (stopped) { return; }
+    auto event = new AsyncEndEvent();
+
+    va_list vararg;
+    va_start(vararg, fmt);
+    vsnprintf(event->name, Trace::MaxEventNameLength, fmt, vararg);
+    va_end(vararg);
+
+    event->timestamp = timestamp();
+    event->id = id;
+    put(event);
+}
+
+uint64_t Trace::timestamp()
+{
+    auto now = std::chrono::high_resolution_clock::now();
+    auto diff = std::chrono::duration_cast<std::chrono::microseconds>(now - createdAt);
+    return static_cast<uint64_t>(diff.count());
+}
+
+void Trace::put(Event* event)
+{
+    auto idx = eventQueueWriteIdx++ % eventQueues.size();
+    auto &queue = eventQueues[idx];
+    std::unique_lock<std::mutex> lock(queue.mutex);
+    auto notify = queue.data.size() == 0;
+    queue.data.push(std::unique_ptr<Event>(event));
+    lock.unlock();
+    if (notify) { queue.condition.notify_one(); }
+}
+
+std::unique_ptr<Trace::Event> Trace::take()
+{
+    auto idx = eventQueueReadIdx++ % eventQueues.size();
+    auto &queue = eventQueues[idx];
+    std::unique_lock<std::mutex> lock(queue.mutex);
+    queue.condition.wait(lock, [&queue] { return queue.data.size() > 0; });
+    auto event = std::move(queue.data.front());
+    queue.data.pop();
+    return event;
+}
+
+#define QUOTE(x) "\"" << x << "\""
+#define INDENT "  "
+
+Trace::Event::Event() :
+    threadID(std::hash<std::thread::id>()(std::this_thread::get_id()))
+{
+    if (auto fiber = Scheduler::Fiber::current())
+    {
+        fiberID = fiber->id;
+    }
+}
+
+void Trace::Event::write(std::ostream &out) const
+{
+    out << INDENT << QUOTE("name") << ": " << QUOTE(name) << "," << std::endl;
+    if (categories != nullptr)
+    {
+        out << INDENT << QUOTE("cat") << ": " << "\"";
+        auto first = true;
+        for (auto category = *categories; category != nullptr; category++)
+        {
+            if (!first) { out << ","; }
+            out << category;
+        }
+        out << "\"," << std::endl;
+    }
+    if (fiberID != 0)
+    {
+        out << INDENT << QUOTE("args") << ": " << "{" << std::endl
+            << INDENT << INDENT << QUOTE("fiber") << ": " << fiberID << std::endl
+            << INDENT << "}," << std::endl;
+    }
+    if (threadID != 0)
+    {
+        out << INDENT << QUOTE("tid") << ": " << threadFiberID(threadID, fiberID) << "," << std::endl;
+    }
+    out << INDENT << QUOTE("ph") << ": " << QUOTE(static_cast<char>(type())) << "," << std::endl
+        << INDENT << QUOTE("pid") << ": " << processID << "," << std::endl
+        << INDENT << QUOTE("ts") << ": " << timestamp << std::endl;
+}
+
+void Trace::NameThreadEvent::write(std::ostream &out) const
+{
+    out << INDENT << QUOTE("name") << ": " << QUOTE("thread_name") << "," << std::endl
+        << INDENT << QUOTE("ph") << ": " << QUOTE("M") << "," << std::endl
+        << INDENT << QUOTE("pid") << ": " << processID << "," << std::endl
+        << INDENT << QUOTE("tid") << ": " << threadFiberID(threadID, fiberID) << "," << std::endl
+        << INDENT << QUOTE("args") << ": {"  << QUOTE("name") << ": " << QUOTE(name) << "}" << std::endl;
+}
+
+void Trace::AsyncEvent::write(std::ostream &out) const
+{
+    out << INDENT << QUOTE("id") << ": " << QUOTE(id) << "," << std::endl
+        << INDENT << QUOTE("cat") << ": " << QUOTE("async") << "," << std::endl;
+    Event::write(out);
+}
+
+
+}  // namespace yarn
+
+#endif // YARN_TRACE_ENABLED
\ No newline at end of file
diff --git a/src/Yarn/Trace.hpp b/src/Yarn/Trace.hpp
new file mode 100644
index 0000000..3034ccf
--- /dev/null
+++ b/src/Yarn/Trace.hpp
@@ -0,0 +1,226 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The Trace API produces a trace event file that can be consumed with Chrome's
+// chrome://tracing viewer.
+// Documentation can be found at:
+//   https://www.chromium.org/developers/how-tos/trace-event-profiling-tool
+//   https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/edit
+
+#define YARN_TRACE_ENABLED 0
+
+#if YARN_TRACE_ENABLED
+
+#include <array>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <cstdarg>
+#include <cstring>
+#include <mutex>
+#include <ostream>
+#include <queue>
+#include <thread>
+
+namespace yarn
+{
+
+// Trace writes a trace event file into the current working directory that can
+// be consumed with Chrome's chrome://tracing viewer.
+// Use the YARN_* macros below instead of using this class directly.
+class Trace
+{
+public:
+    static constexpr size_t MaxEventNameLength = 64;
+
+    static Trace* get();
+
+    void nameThread(const char* fmt, ...);
+    void beginEvent(const char* fmt, ...);
+    void endEvent();
+    void beginAsyncEvent(uint32_t id, const char* fmt, ...);
+    void endAsyncEvent(uint32_t id, const char* fmt, ...);
+
+    class ScopedEvent
+    {
+    public:
+        inline ScopedEvent(const char* fmt, ...);
+        inline ~ScopedEvent();
+    private:
+        Trace * const trace;
+    };
+
+    class ScopedAsyncEvent
+    {
+    public:
+        inline ScopedAsyncEvent(uint32_t id, const char* fmt, ...);
+        inline ~ScopedAsyncEvent();
+    private:
+        Trace * const trace;
+        const uint32_t id;
+        std::string name;
+    };
+
+private:
+    Trace();
+    ~Trace();
+    Trace(const Trace&) = delete;
+    Trace& operator = (const Trace&) = delete;
+
+    struct Event
+    {
+        enum class Type : uint8_t
+        {
+            Begin = 'B',
+            End = 'E',
+            Complete = 'X',
+            Instant = 'i',
+            Counter = 'C',
+            AsyncStart = 'b',
+            AsyncInstant = 'n',
+            AsyncEnd = 'e',
+            FlowStart = 's',
+            FlowStep = 't',
+            FlowEnd = 'f',
+            Sample = 'P',
+            ObjectCreated = 'N',
+            ObjectSnapshot = 'O',
+            ObjectDestroyed = 'D',
+            Metadata = 'M',
+            GlobalMemoryDump = 'V',
+            ProcessMemoryDump = 'v',
+            Mark = 'R',
+            ClockSync = 'c',
+            ContextEnter = '(',
+            ContextLeave = ')',
+
+            // Internal types
+            Shutdown = 'S',
+        };
+
+        Event();
+        virtual ~Event() = default;
+        virtual Type type() const = 0;
+        virtual void write(std::ostream &out) const;
+
+        char name[MaxEventNameLength] = {};
+        const char **categories = nullptr;
+        uint64_t timestamp = 0; // in microseconds
+        uint32_t processID = 0;
+        uint32_t threadID;
+        uint32_t fiberID;
+    };
+
+    struct BeginEvent      : public Event { Type type() const override { return Type::Begin; } };
+    struct EndEvent        : public Event { Type type() const override { return Type::End; } };
+    struct MetadataEvent   : public Event { Type type() const override { return Type::Metadata; } };
+    struct Shutdown        : public Event { Type type() const override { return Type::Shutdown; } };
+
+    struct AsyncEvent : public Event
+    {
+        void write(std::ostream &out) const override;
+        uint32_t id;
+    };
+
+    struct AsyncStartEvent : public AsyncEvent { Type type() const override { return Type::AsyncStart; } };
+    struct AsyncEndEvent   : public AsyncEvent { Type type() const override { return Type::AsyncEnd; } };
+
+    struct NameThreadEvent : public MetadataEvent
+    {
+        void write(std::ostream &out) const override;
+    };
+
+    uint64_t timestamp(); // in microseconds
+
+    void put(Event*);
+    std::unique_ptr<Event> take();
+
+    struct EventQueue
+    {
+        std::queue< std::unique_ptr<Event> > data; // guarded by mutes
+        std::condition_variable condition;
+        std::mutex mutex;
+    };
+    std::array<EventQueue, 1> eventQueues; // TODO: Increasing this from 1 can cause events to go out of order. Investigate, fix.
+    std::atomic<unsigned int> eventQueueWriteIdx = { 0 };
+    unsigned int eventQueueReadIdx = 0;
+    std::chrono::time_point<std::chrono::high_resolution_clock> createdAt = std::chrono::high_resolution_clock::now();
+    std::thread thread;
+    std::atomic<bool> stopped = { false };
+};
+
+Trace::ScopedEvent::ScopedEvent(const char* fmt, ...) : trace(Trace::get())
+{
+    if (trace != nullptr)
+    {
+        char name[Trace::MaxEventNameLength];
+        va_list vararg;
+        va_start(vararg, fmt);
+        vsnprintf(name, Trace::MaxEventNameLength, fmt, vararg);
+        va_end(vararg);
+
+        trace->beginEvent(name);
+    }
+}
+
+Trace::ScopedEvent::~ScopedEvent()
+{
+    if (trace != nullptr)
+    {
+        trace->endEvent();
+    }
+}
+
+Trace::ScopedAsyncEvent::ScopedAsyncEvent(uint32_t id, const char* fmt, ...) : trace(Trace::get()), id(id)
+{
+    if (trace != nullptr)
+    {
+        char buf[Trace::MaxEventNameLength];
+        va_list vararg;
+        va_start(vararg, fmt);
+        vsnprintf(buf, Trace::MaxEventNameLength, fmt, vararg);
+        va_end(vararg);
+        name = buf;
+
+        trace->beginAsyncEvent(id, "%s", buf);
+    }
+}
+
+Trace::ScopedAsyncEvent::~ScopedAsyncEvent()
+{
+    if (trace != nullptr)
+    {
+        trace->endAsyncEvent(id, "%s", name.c_str());
+    }
+}
+
+}  // namespace yarn
+
+#define YARN_CONCAT_(a, b) a ## b
+#define YARN_CONCAT(a, b) YARN_CONCAT_(a,b)
+#define YARN_SCOPED_EVENT(...) yarn::Trace::ScopedEvent YARN_CONCAT(scoped_event, __LINE__)(__VA_ARGS__);
+#define YARN_BEGIN_ASYNC_EVENT(id, ...) do { if (auto t = yarn::Trace::get()) { t->beginAsyncEvent(id, __VA_ARGS__); } } while(false);
+#define YARN_END_ASYNC_EVENT(id, ...) do { if (auto t = yarn::Trace::get()) { t->endAsyncEvent(id, __VA_ARGS__); } } while(false);
+#define YARN_SCOPED_ASYNC_EVENT(id, ...) yarn::Trace::ScopedAsyncEvent YARN_CONCAT(defer_, __LINE__)(id, __VA_ARGS__);
+#define YARN_NAME_THREAD(...) do { if (auto t = yarn::Trace::get()) { t->nameThread(__VA_ARGS__); } } while(false);
+
+#else // YARN_TRACE_ENABLED
+
+#define YARN_SCOPED_EVENT(...)
+#define YARN_BEGIN_ASYNC_EVENT(id, ...)
+#define YARN_END_ASYNC_EVENT(id, ...)
+#define YARN_SCOPED_ASYNC_EVENT(id, ...)
+#define YARN_NAME_THREAD(...)
+
+#endif // YARN_TRACE_ENABLED
diff --git a/src/Yarn/Utils.hpp b/src/Yarn/Utils.hpp
new file mode 100644
index 0000000..87744d5
--- /dev/null
+++ b/src/Yarn/Utils.hpp
@@ -0,0 +1,54 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef yarn_util_hpp
+#define yarn_util_hpp
+
+#include "Scheduler.hpp"
+#include "WaitGroup.hpp"
+
+namespace yarn {
+
+// parallelize() is used to split a number of work items into N smaller batches
+// which can be processed in parallel with the function f().
+// numTotal is the total number of work items to process.
+// numPerTask is the maximum number of work items to process per call to f().
+// There will always be at least one call to f().
+// F must be a function with the signature:
+//    void(COUNTER taskIndex, COUNTER first, COUNTER count)
+// COUNTER is any integer type.
+template <typename F, typename COUNTER>
+inline void parallelize(COUNTER numTotal, COUNTER numPerTask, const F& f)
+{
+    auto numTasks = (numTotal + numPerTask - 1) / numPerTask;
+    WaitGroup wg(numTasks - 1);
+    for (unsigned int task = 1; task < numTasks; task++)
+    {
+        schedule([=] {
+            auto first = task * numPerTask;
+            auto count = std::min(first + numPerTask, numTotal) - first;
+            f(task, first, count);
+            wg.done();
+        });
+    }
+
+    // Run the first chunk on this fiber to reduce the amount of time spent
+    // waiting.
+    f(0, 0, std::min(numPerTask, numTotal));
+    wg.wait();
+}
+
+} // namespace yarn
+
+#endif // yarn_util_hpp
diff --git a/src/Yarn/WaitGroup.hpp b/src/Yarn/WaitGroup.hpp
new file mode 100644
index 0000000..a8c6b14
--- /dev/null
+++ b/src/Yarn/WaitGroup.hpp
@@ -0,0 +1,108 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef yarn_waitgroup_hpp
+#define yarn_waitgroup_hpp
+
+#include "ConditionVariable.hpp"
+#include "Debug.hpp"
+
+#include <atomic>
+#include <mutex>
+
+namespace yarn {
+
+// WaitGroup is a synchronization primitive that holds an internal counter that
+// can incremented, decremented and waited on until it reaches 0.
+// WaitGroups can be used as a simple mechanism for waiting on a number of
+// concurrently execute a number of tasks to complete.
+//
+// Example:
+//
+//  void runTasksConcurrently(int numConcurrentTasks)
+//  {
+//      // Construct the WaitGroup with an initial count of numConcurrentTasks.
+//      yarn::WaitGroup wg(numConcurrentTasks);
+//      for (int i = 0; i < numConcurrentTasks; i++)
+//      {
+//          // Schedule a task to be run asynchronously.
+//          // These may all be run concurrently.
+//          yarn::schedule([=] {
+//              // Once the task has finished, decrement the waitgroup counter
+//              // to signal that this has completed.
+//              defer(wg.done());
+//              doSomeWork();
+//          });
+//      }
+//      // Block until all tasks have completed.
+//      wg.wait();
+//  }
+class WaitGroup
+{
+public:
+    // Constructs the WaitGroup with the specified initial count.
+    inline WaitGroup(unsigned int initialCount = 0);
+
+    // add() increments the internal counter by count.
+    inline void add(unsigned int count = 1) const;
+
+    // done() decrements the internal counter by one.
+    // Returns true if the internal count has reached zero.
+    inline bool done() const;
+
+    // wait() blocks until the WaitGroup counter reaches zero.
+    inline void wait() const;
+
+private:
+    struct Data
+    {
+        std::atomic<unsigned int> count = { 0 };
+        ConditionVariable condition;
+        std::mutex mutex;
+    };
+    const std::shared_ptr<Data> data = std::make_shared<Data>();
+};
+
+inline WaitGroup::WaitGroup(unsigned int initialCount /* = 0 */)
+{
+    data->count = initialCount;
+}
+
+void WaitGroup::add(unsigned int count /* = 1 */) const
+{
+    data->count += count;
+}
+
+bool WaitGroup::done() const
+{
+    YARN_ASSERT(data->count > 0, "yarn::WaitGroup::done() called too many times");
+    auto count = --data->count;
+    if (count == 0)
+    {
+        std::unique_lock<std::mutex> lock(data->mutex);
+        data->condition.notify_all();
+        return true;
+    }
+    return false;
+}
+
+void WaitGroup::wait() const
+{
+    std::unique_lock<std::mutex> lock(data->mutex);
+    data->condition.wait(lock, [this]{ return data->count == 0; });
+}
+
+} // namespace yarn
+
+#endif  // yarn_waitgroup_hpp
diff --git a/src/Yarn/WaitGroup_test.cpp b/src/Yarn/WaitGroup_test.cpp
new file mode 100644
index 0000000..d2048ff
--- /dev/null
+++ b/src/Yarn/WaitGroup_test.cpp
@@ -0,0 +1,61 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Yarn_test.hpp"
+
+#include "Yarn/WaitGroup.hpp"
+
+TEST(WithoutBoundScheduler, WaitGroupDone)
+{
+    yarn::WaitGroup wg(2); // Should not require a scheduler.
+    wg.done();
+    wg.done();
+}
+
+#if YARN_DEBUG_ENABLED
+TEST(WithoutBoundScheduler, WaitGroupDoneTooMany)
+{
+    yarn::WaitGroup wg(2); // Should not require a scheduler.
+    wg.done();
+    wg.done();
+    EXPECT_DEATH(wg.done(), "done\\(\\) called too many times");
+}
+#endif // YARN_DEBUG_ENABLED
+
+TEST_P(WithBoundScheduler, WaitGroup_OneTask)
+{
+    yarn::WaitGroup wg(1);
+    std::atomic<int> counter = {0};
+    yarn::schedule([&counter, wg] {
+        counter++;
+        wg.done();
+    });
+    wg.wait();
+    ASSERT_EQ(counter.load(), 1);
+}
+
+TEST_P(WithBoundScheduler, WaitGroup_10Tasks)
+{
+    yarn::WaitGroup wg(10);
+    std::atomic<int> counter = {0};
+    for (int i = 0; i < 10; i++)
+    {
+        yarn::schedule([&counter, wg] {
+            counter++;
+            wg.done();
+        });
+    }
+    wg.wait();
+    ASSERT_EQ(counter.load(), 10);
+}
diff --git a/src/Yarn/Yarn_test.cpp b/src/Yarn/Yarn_test.cpp
new file mode 100644
index 0000000..a3ace60
--- /dev/null
+++ b/src/Yarn/Yarn_test.cpp
@@ -0,0 +1,30 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "Yarn_test.hpp"
+
+INSTANTIATE_TEST_SUITE_P(SchedulerParams, WithBoundScheduler, testing::Values(
+    SchedulerParams{0}, // Single-threaded mode test
+    SchedulerParams{1}, // Single worker thread
+    SchedulerParams{2}, // 2 worker threads...
+    SchedulerParams{4},
+    SchedulerParams{8},
+    SchedulerParams{64}
+));
+
+int main(int argc, char **argv)
+{
+	::testing::InitGoogleTest(&argc, argv);
+	return RUN_ALL_TESTS();
+}
diff --git a/src/Yarn/Yarn_test.hpp b/src/Yarn/Yarn_test.hpp
new file mode 100644
index 0000000..218b22f
--- /dev/null
+++ b/src/Yarn/Yarn_test.hpp
@@ -0,0 +1,55 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "Yarn/Scheduler.hpp"
+
+// SchedulerParams holds Scheduler construction parameters for testing.
+struct SchedulerParams
+{
+    int numWorkerThreads;
+
+    friend std::ostream& operator<<(std::ostream& os, const SchedulerParams& params) {
+        return os << "SchedulerParams{" <<
+            "numWorkerThreads: " << params.numWorkerThreads <<
+            "}";
+    }
+};
+
+// WithoutBoundScheduler is a test fixture that does not bind a scheduler.
+class WithoutBoundScheduler : public testing::Test {};
+
+// WithBoundScheduler is a parameterized test fixture that performs tests with
+// a bound scheduler using a number of different configurations.
+class WithBoundScheduler : public testing::TestWithParam<SchedulerParams>
+{
+public:
+    void SetUp() override
+    {
+        auto &params = GetParam();
+
+        auto scheduler = new yarn::Scheduler();
+        scheduler->bind();
+        scheduler->setWorkerThreadCount(params.numWorkerThreads);
+    }
+
+    void TearDown() override
+    {
+        auto scheduler = yarn::Scheduler::get();
+        scheduler->unbind();
+        delete scheduler;
+    }
+};
diff --git a/tests/kokoro/gcp_ubuntu/continuous.sh b/tests/kokoro/gcp_ubuntu/continuous.sh
index d7ee2ed..0021c2f 100644
--- a/tests/kokoro/gcp_ubuntu/continuous.sh
+++ b/tests/kokoro/gcp_ubuntu/continuous.sh
@@ -23,6 +23,7 @@
 cd .. # Some tests must be run from project root
 
 build/ReactorUnitTests
+build/yarn-unittests
 build/gles-unittests
 
 if [ "${REACTOR_BACKEND}" != "Subzero" ]; then
diff --git a/tests/kokoro/gcp_windows/continuous.bat b/tests/kokoro/gcp_windows/continuous.bat
index f874e23..0bd0196 100644
--- a/tests/kokoro/gcp_windows/continuous.bat
+++ b/tests/kokoro/gcp_windows/continuous.bat
@@ -28,6 +28,9 @@
 if !ERRORLEVEL! neq 0 exit /b !ERRORLEVEL!
 SET SWIFTSHADER_DISABLE_DEBUGGER_WAIT_DIALOG=1
 
+build\Debug\yarn-unittests.exe
+if !ERRORLEVEL! neq 0 exit /b !ERRORLEVEL!
+
 build\Debug\gles-unittests.exe
 if !ERRORLEVEL! neq 0 exit /b !ERRORLEVEL!
 
diff --git a/tests/kokoro/macos/continuous.sh b/tests/kokoro/macos/continuous.sh
index a5675c0..ca1ae26 100755
--- a/tests/kokoro/macos/continuous.sh
+++ b/tests/kokoro/macos/continuous.sh
@@ -23,6 +23,7 @@
 cd .. # Some tests must be run from project root
 
 build/ReactorUnitTests
+build/yarn-unittests
 build/gles-unittests
 
 if [ "${REACTOR_BACKEND}" != "Subzero" ]; then
diff --git a/third_party/marl/Android.bp b/third_party/marl/Android.bp
deleted file mode 100644
index 52e1b4f..0000000
--- a/third_party/marl/Android.bp
+++ /dev/null
@@ -1,46 +0,0 @@
-//
-// Copyright (C) 2019 The Android Open Source Project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-cc_library_static {
-    name: "swiftshader_marl",
-    vendor_available: true,
-    sdk_version: "27",
-    srcs: [
-        "src/debug.cpp",
-        "src/scheduler.cpp",
-        "src/thread.cpp",
-        "src/trace.cpp",
-    ],
-    local_include_dirs: [
-        "src",
-    ],
-    export_include_dirs: [
-        "include",
-    ],
-    include_dirs: [
-        "external/swiftshader/third_party/marl/include"
-    ],
-    arch: {
-        arm: { srcs: ["src/osfiber_arm.c", "src/osfiber_asm_arm.S"], },
-        arm64: { srcs: ["src/osfiber_aarch64.c", "src/osfiber_asm_aarch64.S"], },
-        x86: { srcs: ["src/osfiber_x86.c", "src/osfiber_asm_x86.S"], },
-        x86_64: { srcs: ["src/osfiber_x64.c", "src/osfiber_asm_x64.S"], },
-    },
-    cflags: [
-        "-Wno-unused-parameter",
-    ],
-    stl: "libc++_static",
-}
diff --git a/third_party/marl/BUILD.gn b/third_party/marl/BUILD.gn
deleted file mode 100644
index 7205597..0000000
--- a/third_party/marl/BUILD.gn
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright 2019 The SwiftShader Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import("../../src/swiftshader.gni")
-
-import("//testing/test.gni")
-
-config("marl_config") {
-  include_dirs = [ "include" ]
-}
-
-swiftshader_source_set("Marl_headers") {
-  sources = [
-    "include/marl/conditionvariable.h",
-    "include/marl/containers.h",
-    "include/marl/debug.h",
-    "include/marl/defer.h",
-    "include/marl/finally.h",
-    "include/marl/osfiber.h",
-    "include/marl/pool.h",
-    "include/marl/sal.h",
-    "include/marl/scheduler.h",
-    "include/marl/thread.h",
-    "include/marl/ticket.h",
-    "include/marl/trace.h",
-    "include/marl/utils.h",
-    "include/marl/waitgroup.h",
-  ]
-
-  if (is_win) {
-    sources += [ "include/marl/osfiber_windows.h" ]
-  } else {
-    sources += [
-      "OSFiber_asm.h",
-      "OSFiber_asm_aarch64.h",
-      "OSFiber_asm_arm.h",
-      "OSFiber_asm_ppc64.h",
-      "OSFiber_asm_x64.h",
-      "OSFiber_asm_x86.h",
-    ]
-  }
-
-  public_configs = [
-    ":marl_config",
-  ]
-}
-
-swiftshader_source_set("Marl") {
-  sources = [
-    "src/debug.cpp",
-    "src/scheduler.cpp",
-    "src/thread.cpp",
-    "src/trace.cpp",
-  ]
-
-  if (!is_win) {
-    sources += [
-      "src/osfiber_aarch64.c",
-      "src/osfiber_arm.c",
-      "src/osfiber_asm_aarch64.S",
-      "src/osfiber_asm_arm.S",
-      "src/osfiber_asm_ppc64.S",
-      "src/osfiber_asm_x64.S",
-      "src/osfiber_asm_x86.S",
-      "src/osfiber_x64.c",
-      "src/osfiber_x86.c",
-    ]
-  }
-
-  public_deps = [
-    ":Marl_headers",
-  ]
-}
-
-test("marl_unittests") {
-  sources = [
-    "src/conditionvariable_test.cpp",
-    "src/containers_test.cpp",
-    "src/defer_test.cpp",
-    "src/osfiber_test.cpp",
-    "src/pool_test.cpp",
-    "src/scheduler_test.cpp",
-    "src/ticket_test.cpp",
-    "src/waitgroup_test.cpp",
-    "src/marl_test.cpp",
-  ]
-  deps = [
-    ":Marl",
-    "//testing/gtest",
-    "//testing/gmock",
-  ]
-}