Implement VK_KHR_timeline_semaphore

Timeline semaphores track a monotonically increasing uint64_t that
serves as a signal to any number of threads on the device or the host.
Threads and the host wait on the value of the payload to become a
specific value, then unblock. Or they can signal the semaphore and
update its value, unblocking all threads waiting on that value in the
process.

Clang's Thread Safety Analysis is not used for TimelineSemaphore as
it has a bug that prevents it from properly analyzing lambdas that access
guarded resources.

Change-Id: Iafd95b8eac11beea21c00df87ca7ca1e51c77c64
Bug: b/147738281
Test: dEQP-VK.api.info.vulkan1p2_limits_validation.timeline_semaphore
Test: dEQP-VK.synchronization.basic.timeline_semaphore.*
Test: dEQP-VK.synchronization.timeline_semaphore.*
Test: dEQP-VK.synchronization.op.single_queue.timeline_semaphore.*
Test: dEQP-VK.synchronization.cross_instance.*
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/52148
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Sean Risser <srisser@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Commit-Queue: Sean Risser <srisser@google.com>
diff --git a/src/Vulkan/BUILD.gn b/src/Vulkan/BUILD.gn
index 8ba0b9b..1612e1a 100644
--- a/src/Vulkan/BUILD.gn
+++ b/src/Vulkan/BUILD.gn
@@ -94,6 +94,7 @@
     "VkShaderModule.hpp",
     "VkSpecializationInfo.hpp",
     "VkStringify.hpp",
+    "VkTimelineSemaphore.hpp",
     "VulkanPlatform.hpp",
   ]
   if (is_linux || is_chromeos || is_android) {
@@ -143,6 +144,7 @@
     "VkShaderModule.cpp",
     "VkSpecializationInfo.cpp",
     "VkStringify.cpp",
+    "VkTimelineSemaphore.cpp",
     "libVulkan.cpp",
     "main.cpp",
     "resource.h",
diff --git a/src/Vulkan/CMakeLists.txt b/src/Vulkan/CMakeLists.txt
index 1eb2fd4..2c1ce24 100644
--- a/src/Vulkan/CMakeLists.txt
+++ b/src/Vulkan/CMakeLists.txt
@@ -91,6 +91,8 @@
     VkShaderModule.hpp
     VkStringify.cpp
     VkStringify.hpp
+    VkTimelineSemaphore.cpp
+    VkTimelineSemaphore.hpp
     VulkanPlatform.hpp
 )
 
diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp
index 7b65016..e420921 100644
--- a/src/Vulkan/VkDevice.cpp
+++ b/src/Vulkan/VkDevice.cpp
@@ -18,6 +18,8 @@
 #include "VkDescriptorSetLayout.hpp"
 #include "VkFence.hpp"
 #include "VkQueue.hpp"
+#include "VkSemaphore.hpp"
+#include "VkTimelineSemaphore.hpp"
 #include "Debug/Context.hpp"
 #include "Debug/Server.hpp"
 #include "Device/Blitter.hpp"
@@ -29,11 +31,21 @@
 
 namespace {
 
-std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds> now()
+using time_point = std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
+
+time_point now()
 {
 	return std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now());
 }
 
+const time_point getEndTimePoint(uint64_t timeout, bool &infiniteTimeout)
+{
+	const time_point start = now();
+	const uint64_t max_timeout = (LLONG_MAX - start.time_since_epoch().count());
+	infiniteTimeout = (timeout > max_timeout);
+	return start + std::chrono::nanoseconds(std::min(max_timeout, timeout));
+}
+
 }  // anonymous namespace
 
 namespace vk {
@@ -206,11 +218,8 @@
 
 VkResult Device::waitForFences(uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll, uint64_t timeout)
 {
-	using time_point = std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
-	const time_point start = now();
-	const uint64_t max_timeout = (LLONG_MAX - start.time_since_epoch().count());
-	bool infiniteTimeout = (timeout > max_timeout);
-	const time_point end_ns = start + std::chrono::nanoseconds(std::min(max_timeout, timeout));
+	bool infiniteTimeout = false;
+	const time_point end_ns = getEndTimePoint(timeout, infiniteTimeout);
 
 	if(waitAll != VK_FALSE)  // All fences must be signaled
 	{
@@ -267,6 +276,63 @@
 	}
 }
 
+VkResult Device::waitForSemaphores(const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
+{
+	bool infiniteTimeout = false;
+	const time_point end_ns = getEndTimePoint(timeout, infiniteTimeout);
+
+	if(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT)
+	{
+		TimelineSemaphore any = TimelineSemaphore();
+
+		for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++)
+		{
+			TimelineSemaphore *semaphore = DynamicCast<TimelineSemaphore>(pWaitInfo->pSemaphores[i]);
+			uint64_t waitValue = pWaitInfo->pValues[i];
+
+			if(semaphore->getCounterValue() == waitValue)
+			{
+				return VK_SUCCESS;
+			}
+
+			semaphore->addDependent(any, waitValue);
+		}
+
+		if(infiniteTimeout)
+		{
+			any.wait(1ull);
+			return VK_SUCCESS;
+		}
+		else
+		{
+			if(any.wait(1, end_ns) == VK_SUCCESS)
+			{
+				return VK_SUCCESS;
+			}
+		}
+
+		return VK_TIMEOUT;
+	}
+	else
+	{
+		ASSERT(pWaitInfo->flags == 0);
+		for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++)
+		{
+			TimelineSemaphore *semaphore = DynamicCast<TimelineSemaphore>(pWaitInfo->pSemaphores[i]);
+			uint64_t value = pWaitInfo->pValues[i];
+			if(infiniteTimeout)
+			{
+				semaphore->wait(value);
+			}
+			else if(semaphore->wait(pWaitInfo->pValues[i], end_ns) != VK_SUCCESS)
+			{
+				return VK_TIMEOUT;
+			}
+		}
+		return VK_SUCCESS;
+	}
+}
+
 VkResult Device::waitIdle()
 {
 	for(uint32_t i = 0; i < queueCount; i++)
diff --git a/src/Vulkan/VkDevice.hpp b/src/Vulkan/VkDevice.hpp
index 0129657..fdcce63 100644
--- a/src/Vulkan/VkDevice.hpp
+++ b/src/Vulkan/VkDevice.hpp
@@ -58,6 +58,7 @@
 	bool hasExtension(const char *extensionName) const;
 	VkQueue getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex) const;
 	VkResult waitForFences(uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll, uint64_t timeout);
+	VkResult waitForSemaphores(const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout);
 	VkResult waitIdle();
 	void getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
 	                                   VkDescriptorSetLayoutSupport *pSupport) const;
diff --git a/src/Vulkan/VkGetProcAddress.cpp b/src/Vulkan/VkGetProcAddress.cpp
index 0edcfcf..1ed42cb 100644
--- a/src/Vulkan/VkGetProcAddress.cpp
+++ b/src/Vulkan/VkGetProcAddress.cpp
@@ -315,6 +315,10 @@
 	MAKE_VULKAN_DEVICE_ENTRY(vkCmdEndRenderPass2),
 	MAKE_VULKAN_DEVICE_ENTRY(vkCmdNextSubpass2),
 	MAKE_VULKAN_DEVICE_ENTRY(vkResetQueryPool),
+	// VK_KHR_timeline_semaphore
+	MAKE_VULKAN_DEVICE_ENTRY(vkGetSemaphoreCounterValue),
+	MAKE_VULKAN_DEVICE_ENTRY(vkSignalSemaphore),
+	MAKE_VULKAN_DEVICE_ENTRY(vkWaitSemaphores),
 };
 
 static const std::vector<std::pair<const char *, std::unordered_map<std::string, PFN_vkVoidFunction>>> deviceExtensionFunctionPointers = {
@@ -377,6 +381,14 @@
 	        MAKE_VULKAN_DEVICE_ENTRY(vkCmdNextSubpass2KHR),
 	        MAKE_VULKAN_DEVICE_ENTRY(vkCmdEndRenderPass2KHR),
 	    } },
+	// VK_KHR_timeline_semaphore
+	{
+	    VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
+	    {
+	        MAKE_VULKAN_DEVICE_ENTRY(vkGetSemaphoreCounterValueKHR),
+	        MAKE_VULKAN_DEVICE_ENTRY(vkSignalSemaphoreKHR),
+	        MAKE_VULKAN_DEVICE_ENTRY(vkWaitSemaphoresKHR),
+	    } },
 	// VK_EXT_line_rasterization
 	{
 	    VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME,
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 581157a..b9e3e54 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -276,6 +276,12 @@
 }
 
 template<typename T>
+static void getPhysicalDeviceTimelineSemaphoreFeatures(T *features)
+{
+	features->timelineSemaphore = VK_TRUE;
+}
+
+template<typename T>
 static void getPhysicalDeviceVulkan12Features(T *features)
 {
 	features->samplerMirrorClampToEdge = VK_FALSE;
@@ -294,7 +300,7 @@
 	getPhysicalDeviceShaderSubgroupExtendedTypesFeatures(features);
 	getPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR(features);
 	getPhysicalDeviceHostQueryResetFeatures(features);
-	features->timelineSemaphore = VK_FALSE;
+	getPhysicalDeviceTimelineSemaphoreFeatures(features);
 	features->bufferDeviceAddress = VK_FALSE;
 	features->bufferDeviceAddressCaptureReplay = VK_FALSE;
 	features->bufferDeviceAddressMultiDevice = VK_FALSE;
@@ -375,6 +381,9 @@
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES:
 				getPhysicalDeviceVulkanMemoryModelFeatures(reinterpret_cast<VkPhysicalDeviceVulkanMemoryModelFeatures *>(curExtension));
 				break;
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES:
+				getPhysicalDeviceTimelineSemaphoreFeatures(reinterpret_cast<VkPhysicalDeviceTimelineSemaphoreFeatures *>(curExtension));
+				break;
 			default:
 				LOG_TRAP("curExtension->pNext->sType = %s", vk::Stringify(curExtension->sType).c_str());
 				break;
@@ -787,6 +796,30 @@
 
 void PhysicalDevice::getProperties(const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, VkExternalSemaphoreProperties *pExternalSemaphoreProperties) const
 {
+	for(const auto *nextInfo = reinterpret_cast<const VkBaseInStructure *>(pExternalSemaphoreInfo->pNext);
+	    nextInfo != nullptr; nextInfo = nextInfo->pNext)
+	{
+		switch(nextInfo->sType)
+		{
+			case VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO:
+			{
+				const auto *tlsInfo = reinterpret_cast<const VkSemaphoreTypeCreateInfo *>(nextInfo);
+				// Timeline Semaphore does not support external semaphore
+				if(tlsInfo->semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE)
+				{
+					pExternalSemaphoreProperties->compatibleHandleTypes = 0;
+					pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+					pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
+					return;
+				}
+			}
+			break;
+			default:
+				WARN("nextInfo->sType = %s", vk::Stringify(nextInfo->sType).c_str());
+				break;
+		}
+	}
+
 #if SWIFTSHADER_EXTERNAL_SEMAPHORE_OPAQUE_FD
 	if(pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT)
 	{
@@ -939,7 +972,8 @@
 template<typename T>
 static void getTimelineSemaphoreProperties(T *properties)
 {
-	properties->maxTimelineSemaphoreValueDifference = 0x7FFFFFFFull;
+	// Our implementation of Timeline Semaphores allows the timeline to advance to any value from any value.
+	properties->maxTimelineSemaphoreValueDifference = (uint64_t)-1;
 }
 
 void PhysicalDevice::getProperties(VkPhysicalDeviceTimelineSemaphoreProperties *properties) const
diff --git a/src/Vulkan/VkPromotedExtensions.cpp b/src/Vulkan/VkPromotedExtensions.cpp
index d406a80..f150221 100644
--- a/src/Vulkan/VkPromotedExtensions.cpp
+++ b/src/Vulkan/VkPromotedExtensions.cpp
@@ -215,4 +215,20 @@
 {
 	vkResetQueryPool(device, queryPool, firstQuery, queryCount);
 }
+
+// VK_KHR_timeline_semaphore
+VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreCounterValueKHR(VkDevice device, VkSemaphore semaphore, uint64_t *pValue)
+{
+	return vkGetSemaphoreCounterValue(device, semaphore, pValue);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL vkSignalSemaphoreKHR(VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo)
+{
+	return vkSignalSemaphore(device, pSignalInfo);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL vkWaitSemaphoresKHR(VkDevice device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
+{
+	return vkWaitSemaphores(device, pWaitInfo, timeout);
+}
 }
diff --git a/src/Vulkan/VkQueue.cpp b/src/Vulkan/VkQueue.cpp
index fd8bc6d..7995eff 100644
--- a/src/Vulkan/VkQueue.cpp
+++ b/src/Vulkan/VkQueue.cpp
@@ -16,6 +16,8 @@
 #include "VkCommandBuffer.hpp"
 #include "VkFence.hpp"
 #include "VkSemaphore.hpp"
+#include "VkStringify.hpp"
+#include "VkTimelineSemaphore.hpp"
 #include "Device/Renderer.hpp"
 #include "WSI/VkSwapchainKHR.hpp"
 
@@ -38,6 +40,25 @@
 		totalSize += pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
 		totalSize += pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
 		totalSize += pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
+
+		for(const auto *extension = reinterpret_cast<const VkBaseInStructure *>(pSubmits[i].pNext);
+		    extension != nullptr; extension = reinterpret_cast<const VkBaseInStructure *>(extension->pNext))
+		{
+			switch(extension->sType)
+			{
+				case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
+				{
+					const auto *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
+					totalSize += sizeof(VkTimelineSemaphoreSubmitInfo);
+					totalSize += tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
+					totalSize += tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
+				}
+				break;
+				default:
+					WARN("submitInfo[%d]->pNext sType: %s", i, vk::Stringify(extension->sType).c_str());
+					break;
+			}
+		}
 	}
 
 	uint8_t *mem = static_cast<uint8_t *>(
@@ -68,6 +89,41 @@
 		submits[i].pCommandBuffers = reinterpret_cast<const VkCommandBuffer *>(mem);
 		memcpy(mem, pSubmits[i].pCommandBuffers, size);
 		mem += size;
+
+		for(const auto *extension = reinterpret_cast<const VkBaseInStructure *>(pSubmits[i].pNext);
+		    extension != nullptr; extension = reinterpret_cast<const VkBaseInStructure *>(extension->pNext))
+		{
+			switch(extension->sType)
+			{
+				case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
+				{
+					const VkTimelineSemaphoreSubmitInfo *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
+
+					size = sizeof(VkTimelineSemaphoreSubmitInfo);
+					VkTimelineSemaphoreSubmitInfo *tlsSubmitInfoCopy = reinterpret_cast<VkTimelineSemaphoreSubmitInfo *>(mem);
+					memcpy(mem, extension, size);
+					// Don't copy the pNext pointer at all.
+					tlsSubmitInfoCopy->pNext = nullptr;
+					mem += size;
+
+					size = tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
+					tlsSubmitInfoCopy->pWaitSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
+					memcpy(mem, tlsSubmitInfo->pWaitSemaphoreValues, size);
+					mem += size;
+
+					size = tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
+					tlsSubmitInfoCopy->pSignalSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
+					memcpy(mem, tlsSubmitInfo->pSignalSemaphoreValues, size);
+					mem += size;
+
+					submits[i].pNext = tlsSubmitInfoCopy;
+				}
+				break;
+				default:
+					WARN("submitInfo[%d]->pNext sType: %s", i, vk::Stringify(extension->sType).c_str());
+					break;
+			}
+		}
 	}
 
 	return submits;
@@ -122,10 +178,38 @@
 
 	for(uint32_t i = 0; i < task.submitCount; i++)
 	{
-		auto &submitInfo = task.pSubmits[i];
+		VkSubmitInfo &submitInfo = task.pSubmits[i];
+		const VkTimelineSemaphoreSubmitInfo *timelineInfo = nullptr;
+		for(const auto *nextInfo = reinterpret_cast<const VkBaseInStructure *>(submitInfo.pNext);
+		    nextInfo != nullptr; nextInfo = nextInfo->pNext)
+		{
+			switch(nextInfo->sType)
+			{
+				case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
+					timelineInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(submitInfo.pNext);
+					break;
+				default:
+					WARN("submitInfo.pNext->sType = %s", vk::Stringify(nextInfo->sType).c_str());
+					break;
+			}
+		}
+
 		for(uint32_t j = 0; j < submitInfo.waitSemaphoreCount; j++)
 		{
-			vk::Cast(submitInfo.pWaitSemaphores[j])->wait(submitInfo.pWaitDstStageMask[j]);
+			if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pWaitSemaphores[j]))
+			{
+				ASSERT_MSG(timelineInfo != nullptr,
+				           "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used");
+				sem->wait(timelineInfo->pWaitSemaphoreValues[j]);
+			}
+			else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pWaitSemaphores[j]))
+			{
+				sem->wait(submitInfo.pWaitDstStageMask[j]);
+			}
+			else
+			{
+				UNSUPPORTED("Unknown semaphore type");
+			}
 		}
 
 		{
@@ -134,13 +218,26 @@
 			executionState.events = task.events.get();
 			for(uint32_t j = 0; j < submitInfo.commandBufferCount; j++)
 			{
-				vk::Cast(submitInfo.pCommandBuffers[j])->submit(executionState);
+				Cast(submitInfo.pCommandBuffers[j])->submit(executionState);
 			}
 		}
 
 		for(uint32_t j = 0; j < submitInfo.signalSemaphoreCount; j++)
 		{
-			vk::Cast(submitInfo.pSignalSemaphores[j])->signal();
+			if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pSignalSemaphores[j]))
+			{
+				ASSERT_MSG(timelineInfo != nullptr,
+				           "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used");
+				sem->signal(timelineInfo->pSignalSemaphoreValues[j]);
+			}
+			else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pSignalSemaphores[j]))
+			{
+				sem->signal();
+			}
+			else
+			{
+				UNSUPPORTED("Unknown semaphore type");
+			}
 		}
 	}
 
@@ -220,7 +317,7 @@
 
 	for(uint32_t i = 0; i < presentInfo->waitSemaphoreCount; i++)
 	{
-		vk::Cast(presentInfo->pWaitSemaphores[i])->wait();
+		vk::DynamicCast<BinarySemaphore>(presentInfo->pWaitSemaphores[i])->wait();
 	}
 
 	VkResult commandResult = VK_SUCCESS;
diff --git a/src/Vulkan/VkSemaphore.cpp b/src/Vulkan/VkSemaphore.cpp
index aa9a7fc..c82543a 100644
--- a/src/Vulkan/VkSemaphore.cpp
+++ b/src/Vulkan/VkSemaphore.cpp
@@ -16,10 +16,13 @@
 
 #include "VkConfig.hpp"
 #include "VkStringify.hpp"
+#include "VkTimelineSemaphore.hpp"
 
 #include "marl/blockingcall.h"
 #include "marl/conditionvariable.h"
 
+#include <chrono>
+#include <climits>
 #include <functional>
 #include <memory>
 #include <utility>
@@ -28,7 +31,7 @@
 
 // This is a base abstract class for all external semaphore implementations
 // used in this source file.
-class Semaphore::External
+class BinarySemaphore::External
 {
 public:
 	virtual ~External() = default;
@@ -90,45 +93,53 @@
 #endif
     0;
 
-namespace {
-
-struct SemaphoreCreateInfo
+// Create a new instance. The external instance will be allocated only
+// the pCreateInfo->pNext chain indicates it needs to be exported.
+SemaphoreCreateInfo::SemaphoreCreateInfo(const VkSemaphoreCreateInfo *pCreateInfo)
 {
-	bool exportSemaphore = false;
-	VkExternalSemaphoreHandleTypeFlags exportHandleTypes = 0;
-
-	// Create a new instance. The external instance will be allocated only
-	// the pCreateInfo->pNext chain indicates it needs to be exported.
-	SemaphoreCreateInfo(const VkSemaphoreCreateInfo *pCreateInfo)
+	for(const auto *nextInfo = reinterpret_cast<const VkBaseInStructure *>(pCreateInfo->pNext);
+	    nextInfo != nullptr; nextInfo = nextInfo->pNext)
 	{
-		for(const auto *nextInfo = reinterpret_cast<const VkBaseInStructure *>(pCreateInfo->pNext);
-		    nextInfo != nullptr; nextInfo = nextInfo->pNext)
+		switch(nextInfo->sType)
 		{
-			switch(nextInfo->sType)
+			case VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO:
 			{
-				case VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO:
+				const auto *exportInfo = reinterpret_cast<const VkExportSemaphoreCreateInfo *>(nextInfo);
+				exportSemaphore = true;
+				exportHandleTypes = exportInfo->handleTypes;
+				if((exportHandleTypes & ~kSupportedTypes) != 0)
 				{
-					const auto *exportInfo = reinterpret_cast<const VkExportSemaphoreCreateInfo *>(nextInfo);
-					exportSemaphore = true;
-					exportHandleTypes = exportInfo->handleTypes;
-					if((exportHandleTypes & ~kSupportedTypes) != 0)
-					{
-						UNSUPPORTED("exportInfo->handleTypes 0x%X (supports 0x%X)",
-						            int(exportHandleTypes),
-						            int(kSupportedTypes));
-					}
+					UNSUPPORTED("exportInfo->handleTypes 0x%X (supports 0x%X)",
+					            int(exportHandleTypes),
+					            int(kSupportedTypes));
 				}
-				break;
-				default:
-					WARN("nextInfo->sType = %s", vk::Stringify(nextInfo->sType).c_str());
 			}
+			break;
+			case VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO:
+			{
+				const auto *tlsInfo = reinterpret_cast<const VkSemaphoreTypeCreateInfo *>(nextInfo);
+				semaphoreType = tlsInfo->semaphoreType;
+				initialPayload = tlsInfo->initialValue;
+			}
+			break;
+			default:
+				WARN("nextInfo->sType = %s", vk::Stringify(nextInfo->sType).c_str());
+				break;
 		}
 	}
-};
+}
 
-}  // namespace
+Semaphore::Semaphore(VkSemaphoreType type)
+    : type(type)
+{
+}
 
-void Semaphore::wait()
+VkSemaphoreType Semaphore::getSemaphoreType() const
+{
+	return type;
+}
+
+void BinarySemaphore::wait()
 {
 	marl::lock lock(mutex);
 	External *ext = tempExternal ? tempExternal : external;
@@ -149,7 +160,7 @@
 		}
 
 		// If the import was temporary, reset the semaphore to its previous state.
-		// See "6.4.5. Importing Semaphore Payloads" in Vulkan 1.1 spec.
+		// See "6.4.5. Importing BinarySemaphore Payloads" in Vulkan 1.1 spec.
 		if(ext == tempExternal)
 		{
 			tempExternal = ext->previous;
@@ -162,8 +173,9 @@
 	}
 }
 
-void Semaphore::signal()
+void BinarySemaphore::signal()
 {
+	ASSERT(type == VK_SEMAPHORE_TYPE_BINARY);
 	marl::lock lock(mutex);
 	External *ext = tempExternal ? tempExternal : external;
 	if(ext)
@@ -178,14 +190,17 @@
 	}
 }
 
-Semaphore::Semaphore(const VkSemaphoreCreateInfo *pCreateInfo, void *mem, const VkAllocationCallbacks *pAllocator)
-    : allocator(pAllocator)
+BinarySemaphore::BinarySemaphore(const VkSemaphoreCreateInfo *pCreateInfo, void *mem, const VkAllocationCallbacks *pAllocator)
+    : Semaphore(VK_SEMAPHORE_TYPE_BINARY)
+    , allocator(pAllocator)
 {
 	SemaphoreCreateInfo info(pCreateInfo);
 	exportableHandleTypes = info.exportHandleTypes;
+	ASSERT(info.semaphoreType == VK_SEMAPHORE_TYPE_BINARY);
+	type = info.semaphoreType;
 }
 
-void Semaphore::destroy(const VkAllocationCallbacks *pAllocator)
+void BinarySemaphore::destroy(const VkAllocationCallbacks *pAllocator)
 {
 	marl::lock lock(mutex);
 	while(tempExternal)
@@ -201,31 +216,31 @@
 	}
 }
 
-size_t Semaphore::ComputeRequiredAllocationSize(const VkSemaphoreCreateInfo *pCreateInfo)
+size_t BinarySemaphore::ComputeRequiredAllocationSize(const VkSemaphoreCreateInfo *pCreateInfo)
 {
 	// Semaphore::External instance is created and destroyed on demand so return 0 here.
 	return 0;
 }
 
 template<class EXTERNAL>
-Semaphore::External *Semaphore::allocateExternal()
+BinarySemaphore::External *BinarySemaphore::allocateExternal()
 {
-	auto *ext = reinterpret_cast<Semaphore::External *>(
+	auto *ext = reinterpret_cast<BinarySemaphore::External *>(
 	    vk::allocate(sizeof(EXTERNAL), alignof(EXTERNAL), allocator));
 	new(ext) EXTERNAL();
 	return ext;
 }
 
-void Semaphore::deallocateExternal(Semaphore::External *ext)
+void BinarySemaphore::deallocateExternal(BinarySemaphore::External *ext)
 {
 	ext->~External();
 	vk::deallocate(ext, allocator);
 }
 
 template<typename ALLOC_FUNC, typename IMPORT_FUNC>
-VkResult Semaphore::importPayload(bool temporaryImport,
-                                  ALLOC_FUNC alloc_func,
-                                  IMPORT_FUNC import_func)
+VkResult BinarySemaphore::importPayload(bool temporaryImport,
+                                        ALLOC_FUNC alloc_func,
+                                        IMPORT_FUNC import_func)
 {
 	marl::lock lock(mutex);
 
@@ -258,7 +273,7 @@
 }
 
 template<typename ALLOC_FUNC, typename EXPORT_FUNC>
-VkResult Semaphore::exportPayload(ALLOC_FUNC alloc_func, EXPORT_FUNC export_func)
+VkResult BinarySemaphore::exportPayload(ALLOC_FUNC alloc_func, EXPORT_FUNC export_func)
 {
 	marl::lock lock(mutex);
 	// Sanity check, do not try to export a semaphore that has a temporary import.
@@ -283,7 +298,7 @@
 }
 
 #if SWIFTSHADER_EXTERNAL_SEMAPHORE_OPAQUE_FD
-VkResult Semaphore::importFd(int fd, bool temporaryImport)
+VkResult BinarySemaphore::importFd(int fd, bool temporaryImport)
 {
 	return importPayload(
 	    temporaryImport,
@@ -295,7 +310,7 @@
 	    });
 }
 
-VkResult Semaphore::exportFd(int *pFd)
+VkResult BinarySemaphore::exportFd(int *pFd)
 {
 	if((exportableHandleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) == 0)
 	{
@@ -314,7 +329,7 @@
 #endif  // SWIFTSHADER_EXTERNAL_SEMAPHORE_OPAQUE_FD
 
 #if VK_USE_PLATFORM_FUCHSIA
-VkResult Semaphore::importHandle(zx_handle_t handle, bool temporaryImport)
+VkResult BinarySemaphore::importHandle(zx_handle_t handle, bool temporaryImport)
 {
 	return importPayload(
 	    temporaryImport,
@@ -326,7 +341,7 @@
 	    });
 }
 
-VkResult Semaphore::exportHandle(zx_handle_t *pHandle)
+VkResult BinarySemaphore::exportHandle(zx_handle_t *pHandle)
 {
 	if((exportableHandleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TEMP_ZIRCON_EVENT_BIT_FUCHSIA) == 0)
 	{
diff --git a/src/Vulkan/VkSemaphore.hpp b/src/Vulkan/VkSemaphore.hpp
index 3f32111..c188a93 100644
--- a/src/Vulkan/VkSemaphore.hpp
+++ b/src/Vulkan/VkSemaphore.hpp
@@ -22,16 +22,45 @@
 #include "marl/mutex.h"
 #include "marl/tsa.h"
 
+#include "System/Synchronization.hpp"
+
 #if VK_USE_PLATFORM_FUCHSIA
 #	include <zircon/types.h>
 #endif
 
 namespace vk {
 
-class Semaphore : public Object<Semaphore, VkSemaphore>
+class BinarySemaphore;
+class TimelineSemaphore;
+
+class Semaphore
 {
 public:
-	Semaphore(const VkSemaphoreCreateInfo *pCreateInfo, void *mem, const VkAllocationCallbacks *pAllocator);
+	Semaphore(VkSemaphoreType type);
+
+	virtual ~Semaphore() = default;
+
+	static inline Semaphore *Cast(VkSemaphore semaphore)
+	{
+		return static_cast<Semaphore *>(static_cast<void *>(semaphore));
+	}
+
+	virtual void destroy(const VkAllocationCallbacks *pAllocator)
+	{
+	}
+
+	VkSemaphoreType getSemaphoreType() const;
+	//static size_t ComputeRequiredAllocationSize(const VkSemaphoreCreateInfo *pCreateInfo);
+
+protected:
+	VkSemaphoreType type;
+	marl::mutex mutex;
+};
+
+class BinarySemaphore : public Semaphore, public Object<BinarySemaphore, VkSemaphore>
+{
+public:
+	BinarySemaphore(const VkSemaphoreCreateInfo *pCreateInfo, void *mem, const VkAllocationCallbacks *pAllocator);
 	void destroy(const VkAllocationCallbacks *pAllocator);
 
 	static size_t ComputeRequiredAllocationSize(const VkSemaphoreCreateInfo *pCreateInfo);
@@ -149,7 +178,6 @@
 	const VkAllocationCallbacks *allocator = nullptr;
 	VkExternalSemaphoreHandleTypeFlags exportableHandleTypes = (VkExternalSemaphoreHandleTypeFlags)0;
 	marl::Event internal;
-	marl::mutex mutex;
 	External *external GUARDED_BY(mutex) = nullptr;
 	External *tempExternal GUARDED_BY(mutex) = nullptr;
 };
@@ -159,6 +187,48 @@
 	return Semaphore::Cast(object);
 }
 
+template<typename T>
+static inline T *DynamicCast(VkSemaphore object)
+{
+	Semaphore *semaphore = vk::Cast(object);
+	if(semaphore == nullptr)
+	{
+		return nullptr;
+	}
+
+	static_assert(std::is_same_v<T, BinarySemaphore> || std::is_same_v<T, TimelineSemaphore>);
+	if constexpr(std::is_same_v<T, BinarySemaphore>)
+	{
+		if(semaphore->getSemaphoreType() != VK_SEMAPHORE_TYPE_BINARY)
+		{
+			return nullptr;
+		}
+	}
+	else
+	{
+		if(semaphore->getSemaphoreType() != VK_SEMAPHORE_TYPE_TIMELINE)
+		{
+			return nullptr;
+		}
+	}
+	return static_cast<T *>(semaphore);
+}
+
+// This struct helps parse VkSemaphoreCreateInfo. It also looks at the pNext
+// structures and stores their data flatly in a single struct. The default
+// values of each data member are what the absence of a pNext struct implies
+// for those values.
+struct SemaphoreCreateInfo
+{
+	bool exportSemaphore = false;
+	VkExternalSemaphoreHandleTypeFlags exportHandleTypes = 0;
+
+	VkSemaphoreType semaphoreType = VK_SEMAPHORE_TYPE_BINARY;
+	uint64_t initialPayload = 0;
+
+	SemaphoreCreateInfo(const VkSemaphoreCreateInfo *pCreateInfo);
+};
+
 }  // namespace vk
 
 #endif  // VK_SEMAPHORE_HPP_
diff --git a/src/Vulkan/VkSemaphoreExternalFuchsia.hpp b/src/Vulkan/VkSemaphoreExternalFuchsia.hpp
index 1e37e68..d204476 100644
--- a/src/Vulkan/VkSemaphoreExternalFuchsia.hpp
+++ b/src/Vulkan/VkSemaphoreExternalFuchsia.hpp
@@ -26,7 +26,7 @@
 
 namespace vk {
 
-class ZirconEventExternalSemaphore : public Semaphore::External
+class ZirconEventExternalSemaphore : public BinarySemaphore::External
 {
 public:
 	~ZirconEventExternalSemaphore()
diff --git a/src/Vulkan/VkSemaphoreExternalLinux.hpp b/src/Vulkan/VkSemaphoreExternalLinux.hpp
index d4e4cd2..3689ad9 100644
--- a/src/Vulkan/VkSemaphoreExternalLinux.hpp
+++ b/src/Vulkan/VkSemaphoreExternalLinux.hpp
@@ -130,7 +130,7 @@
 
 namespace vk {
 
-class OpaqueFdExternalSemaphore : public Semaphore::External
+class OpaqueFdExternalSemaphore : public BinarySemaphore::External
 {
 public:
 	~OpaqueFdExternalSemaphore() { unmapRegion(); }
diff --git a/src/Vulkan/VkTimelineSemaphore.cpp b/src/Vulkan/VkTimelineSemaphore.cpp
new file mode 100644
index 0000000..7aa85db
--- /dev/null
+++ b/src/Vulkan/VkTimelineSemaphore.cpp
@@ -0,0 +1,145 @@
+// Copyright 2021 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "VkTimelineSemaphore.hpp"
+#include "VkSemaphore.hpp"
+
+#include "marl/blockingcall.h"
+#include "marl/conditionvariable.h"
+
+#include <vector>
+
+namespace vk {
+
+TimelineSemaphore::TimelineSemaphore(const VkSemaphoreCreateInfo *pCreateInfo, void *mem, const VkAllocationCallbacks *pAllocator)
+    : Semaphore(VK_SEMAPHORE_TYPE_TIMELINE)
+{
+	SemaphoreCreateInfo info(pCreateInfo);
+	ASSERT(info.semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE);
+	type = info.semaphoreType;
+	shared = marl::Allocator::Default->make_shared<TimelineSemaphore::Shared>(marl::Allocator::Default, info.initialPayload);
+}
+
+TimelineSemaphore::TimelineSemaphore()
+    : Semaphore(VK_SEMAPHORE_TYPE_TIMELINE)
+{
+	type = VK_SEMAPHORE_TYPE_TIMELINE;
+	shared = marl::Allocator::Default->make_shared<TimelineSemaphore::Shared>(marl::Allocator::Default, 0);
+}
+
+size_t TimelineSemaphore::ComputeRequiredAllocationSize(const VkSemaphoreCreateInfo *pCreateInfo)
+{
+	return 0;
+}
+
+void TimelineSemaphore::destroy(const VkAllocationCallbacks *pAllocator)
+{
+}
+
+void TimelineSemaphore::signal(uint64_t value)
+{
+	return shared->signal(value);
+}
+
+void TimelineSemaphore::Shared::signal(uint64_t value)
+{
+	marl::lock lock(mutex);
+	if(counter < value)
+	{
+		counter = value;
+		cv.notify_all();
+		for(auto dep : deps)
+		{
+			dep->signal(id, counter);
+		}
+	}
+}
+
+void TimelineSemaphore::wait(uint64_t value)
+{
+	shared->wait(value);
+}
+
+void TimelineSemaphore::Shared::wait(uint64_t value)
+{
+	marl::lock lock(mutex);
+	cv.wait(lock, [&]() { return counter == value; });
+}
+
+uint64_t TimelineSemaphore::getCounterValue()
+{
+	return shared->getCounterValue();
+}
+
+uint64_t TimelineSemaphore::Shared::getCounterValue()
+{
+	marl::lock lock(mutex);
+	return counter;
+}
+
+std::atomic<int> TimelineSemaphore::Shared::nextId;
+
+TimelineSemaphore::Shared::Shared(marl::Allocator *allocator, uint64_t initialState)
+    : cv(allocator)
+    , counter(initialState)
+    , id(nextId++)
+{
+}
+
+void TimelineSemaphore::Shared::signal(int parentId, uint64_t value)
+{
+	marl::lock lock(mutex);
+	auto it = waitMap.find(parentId);
+	// Either we aren't waiting for a signal, or parentId is not something we're waiting for
+	// Reject any signals that we aren't waiting on
+	if(counter == 0 && it != waitMap.end() && value == it->second)
+	{
+		// Stop waiting on all parents once we find a signal
+		waitMap.clear();
+		counter = 1;
+		cv.notify_all();
+		for(auto dep : deps)
+		{
+			dep->signal(id, counter);
+		}
+	}
+}
+
+void TimelineSemaphore::addDependent(TimelineSemaphore &other, uint64_t waitValue)
+{
+	shared->addDependent(other);
+	other.addDependency(shared->id, waitValue);
+}
+
+void TimelineSemaphore::Shared::addDependent(TimelineSemaphore &other)
+{
+	marl::lock lock(mutex);
+	deps.push_back(other.shared);
+}
+
+void TimelineSemaphore::addDependency(int id, uint64_t waitValue)
+{
+	shared->addDependency(id, waitValue);
+}
+
+void TimelineSemaphore::Shared::addDependency(int id, uint64_t waitValue)
+{
+	marl::lock lock(mutex);
+	auto mapPos = waitMap.find(id);
+	ASSERT(mapPos == waitMap.end());
+
+	waitMap.insert(mapPos, std::make_pair(id, waitValue));
+}
+
+}  // namespace vk
diff --git a/src/Vulkan/VkTimelineSemaphore.hpp b/src/Vulkan/VkTimelineSemaphore.hpp
new file mode 100644
index 0000000..55c3f45
--- /dev/null
+++ b/src/Vulkan/VkTimelineSemaphore.hpp
@@ -0,0 +1,166 @@
+// Copyright 2021 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef VK_TIMELINE_SEMAPHORE_HPP_
+#define VK_TIMELINE_SEMAPHORE_HPP_
+
+#include "VkConfig.hpp"
+#include "VkObject.hpp"
+#include "VkSemaphore.hpp"
+
+#include "marl/conditionvariable.h"
+#include "marl/mutex.h"
+
+#include "System/Synchronization.hpp"
+
+#include <chrono>
+
+namespace vk {
+
+struct Shared;
+
+// Timeline Semaphores track a 64-bit payload instead of a binary payload.
+//
+// A timeline does not have a "signaled" and "unsignalled" state. Threads instead wait
+// for the payload to become a certain value. When a thread signals the timeline, it provides
+// a new payload that is greater than the current payload.
+//
+// There is no way to reset a timeline or to decrease the payload's value. A user must instead
+// create a new timeline with a new initial payload if they desire this behavior.
+class TimelineSemaphore : public Semaphore, public Object<TimelineSemaphore, VkSemaphore>
+{
+public:
+	TimelineSemaphore(const VkSemaphoreCreateInfo *pCreateInfo, void *mem, const VkAllocationCallbacks *pAllocator);
+	TimelineSemaphore();
+
+	static size_t ComputeRequiredAllocationSize(const VkSemaphoreCreateInfo *pCreateInfo);
+
+	// Block until this semaphore is signaled with the specified value;
+	void wait(uint64_t value);
+
+	// Wait until a certain amount of time has passed or until the specified value is signaled.
+	template<class CLOCK, class DURATION>
+	VkResult wait(uint64_t value, const std::chrono::time_point<CLOCK, DURATION> end_ns);
+
+	// Set the payload to the specified value and signal all waiting threads.
+	void signal(uint64_t value);
+
+	// Retrieve the current payload. This should not be used to make thread execution decisions
+	// as there's no guarantee that the value returned here matches the actual payload's value.
+	uint64_t getCounterValue();
+
+	// Dependent timeline semaphores allow an 'any' semaphore to be created that can wait on the
+	// state of multiple other timeline semaphores and be signaled like a binary semaphore
+	// if any of its parent semaphores are signaled with a certain value.
+	//
+	// Since a timeline semaphore can be signalled with nearly any value, but threads waiting
+	// on a timeline semaphore only unblock when a specific value is signaled, dependents can't
+	// naively become signaled whenever their parent semaphores are signaled with a new value.
+	// Instead, the dependent semaphore needs to wait for its parent semaphore to be signaled
+	// with a specific value as well. This specific value may differ for each parent semaphore.
+	//
+	// So this function adds other as a dependent semaphore, and tells it to only become unsignaled
+	// by this semaphore when this semaphore is signaled with waitValue.
+	void addDependent(TimelineSemaphore &other, uint64_t waitValue);
+	void addDependency(int id, uint64_t waitValue);
+
+	// Tells this semaphore to become signaled as part of a dependency chain when the parent semaphore
+	// with the specified id is signaled with the specified waitValue.
+	void addToWaitMap(int parentId, uint64_t waitValue);
+
+	// Clean up any allocated resources
+	void destroy(const VkAllocationCallbacks *pAllocator);
+
+private:
+	// Track the 64-bit payload. Timeline Semaphores have a shared_ptr<Shared>
+	// that they can pass to other Timeline Semaphores to create dependency chains.
+	struct Shared
+	{
+	private:
+		// Guards access to all the resources that may be accessed by other threads.
+		// No clang Thread Safety Analysis is used on variables guarded by mutex
+		// as there is an issue with TSA. Despite instrumenting everything properly,
+		// compilation will fail when a lambda function uses a guarded resource.
+		marl::mutex mutex;
+
+		static std::atomic<int> nextId;
+
+	public:
+		Shared(marl::Allocator *allocator, uint64_t initialState);
+
+		// Block until this semaphore is signaled with the specified value;
+		void wait(uint64_t value);
+		// Wait until a certain amount of time has passed or until the specified value is signaled.
+		template<class CLOCK, class DURATION>
+		VkResult wait(uint64_t value, const std::chrono::time_point<CLOCK, DURATION> end_ns);
+
+		// Pass a signal down to a dependent.
+		void signal(int parentId, uint64_t value);
+		// Set the payload to the specified value and signal all waiting threads.
+		void signal(uint64_t value);
+
+		// Retrieve the current payload. This should not be used to make thread execution decisions
+		// as there's no guarantee that the value returned here matches the actual payload's value.
+		uint64_t getCounterValue();
+
+		// Add the other semaphore's Shared to deps.
+		void addDependent(TimelineSemaphore &other);
+
+		// Add {id, waitValue} as a key-value pair to waitMap.
+		void addDependency(int id, uint64_t waitValue);
+
+		// Entry point to the marl threading library that handles blocking and unblocking.
+		marl::ConditionVariable cv;
+
+		// TODO(b/181683382) -- Add Thread Safety Analysis instrumentation when it can properly
+		// analyze lambdas.
+		// The 64-bit payload.
+		uint64_t counter;
+
+		// A list of this semaphore's dependents.
+		marl::containers::vector<std::shared_ptr<Shared>, 1> deps;
+
+		// A map of {parentId: waitValue} pairs that tracks when this semaphore should unblock if it's
+		// signaled as a dependent by another semaphore.
+		std::map<int, uint64_t> waitMap;
+
+		// An ID that's unique for each instance of Shared
+		const int id;
+	};
+
+	std::shared_ptr<Shared> shared;
+};
+
+template<typename Clock, typename Duration>
+VkResult TimelineSemaphore::wait(uint64_t value,
+                                 const std::chrono::time_point<Clock, Duration> timeout)
+{
+	return shared->wait(value, timeout);
+}
+
+template<typename Clock, typename Duration>
+VkResult TimelineSemaphore::Shared::wait(uint64_t value,
+                                         const std::chrono::time_point<Clock, Duration> timeout)
+{
+	marl::lock lock(mutex);
+	if(!cv.wait_until(lock, timeout, [&]() { return counter == value; }))
+	{
+		return VK_TIMEOUT;
+	}
+	return VK_SUCCESS;
+}
+
+}  // namespace vk
+
+#endif  // VK_TIMELINE_SEMAPHORE_HPP_
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index 482e178..a32c3ff 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -42,6 +42,7 @@
 #include "VkSemaphore.hpp"
 #include "VkShaderModule.hpp"
 #include "VkStringify.hpp"
+#include "VkTimelineSemaphore.hpp"
 
 #include "System/Debug.hpp"
 
@@ -421,6 +422,7 @@
 	{ { VK_KHR_SHADER_SUBGROUP_EXTENDED_TYPES_EXTENSION_NAME, VK_KHR_SHADER_SUBGROUP_EXTENDED_TYPES_SPEC_VERSION } },
 	{ { VK_KHR_SPIRV_1_4_EXTENSION_NAME, VK_KHR_SPIRV_1_4_SPEC_VERSION } },
 	{ { VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_SPEC_VERSION } },
+	{ { VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_SPEC_VERSION } },
 };
 
 static uint32_t numSupportedExtensions(const ExtensionProperties *extensionProperties, uint32_t extensionPropertiesCount)
@@ -1428,7 +1430,35 @@
 		UNSUPPORTED("pCreateInfo->flags %d", int(pCreateInfo->flags));
 	}
 
-	return vk::Semaphore::Create(pAllocator, pCreateInfo, pSemaphore, pAllocator);
+	VkSemaphoreType type = VK_SEMAPHORE_TYPE_BINARY;
+	for(const auto *nextInfo = reinterpret_cast<const VkBaseInStructure *>(pCreateInfo->pNext);
+	    nextInfo != nullptr; nextInfo = nextInfo->pNext)
+	{
+		switch(nextInfo->sType)
+		{
+			case VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO:
+				// Let the semaphore constructor handle this
+				break;
+			case VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO:
+			{
+				const VkSemaphoreTypeCreateInfo *info = reinterpret_cast<const VkSemaphoreTypeCreateInfo *>(nextInfo);
+				type = info->semaphoreType;
+			}
+			break;
+			default:
+				WARN("nextInfo->sType = %s", vk::Stringify(nextInfo->sType).c_str());
+				break;
+		}
+	}
+
+	if(type == VK_SEMAPHORE_TYPE_BINARY)
+	{
+		return vk::BinarySemaphore::Create(pAllocator, pCreateInfo, pSemaphore, pAllocator);
+	}
+	else
+	{
+		return vk::TimelineSemaphore::Create(pAllocator, pCreateInfo, pSemaphore, pAllocator);
+	}
 }
 
 VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore(VkDevice device, VkSemaphore semaphore, const VkAllocationCallbacks *pAllocator)
@@ -1450,7 +1480,9 @@
 		UNSUPPORTED("pGetFdInfo->handleType %d", int(pGetFdInfo->handleType));
 	}
 
-	return vk::Cast(pGetFdInfo->semaphore)->exportFd(pFd);
+	auto *sem = vk::DynamicCast<vk::BinarySemaphore>(pGetFdInfo->semaphore);
+	ASSERT(sem != nullptr);
+	return sem->exportFd(pFd);
 }
 
 VKAPI_ATTR VkResult VKAPI_CALL vkImportSemaphoreFdKHR(VkDevice device, const VkImportSemaphoreFdInfoKHR *pImportSemaphoreInfo)
@@ -1464,7 +1496,9 @@
 	}
 	bool temporaryImport = (pImportSemaphoreInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) != 0;
 
-	return vk::Cast(pImportSemaphoreInfo->semaphore)->importFd(pImportSemaphoreInfo->fd, temporaryImport);
+	auto *sem = vk::DynamicCast<vk::BinarySemaphore>(pImportSemaphoreInfo->semaphore);
+	ASSERT(sem != nullptr);
+	return sem->importFd(pImportSemaphoreInfo->fd, temporaryImport);
 }
 #endif  // SWIFTSHADER_EXTERNAL_SEMAPHORE_OPAQUE_FD
 
@@ -1481,8 +1515,9 @@
 		UNSUPPORTED("pImportSemaphoreZirconHandleInfo->handleType %d", int(pImportSemaphoreZirconHandleInfo->handleType));
 	}
 	bool temporaryImport = (pImportSemaphoreZirconHandleInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) != 0;
-
-	return vk::Cast(pImportSemaphoreZirconHandleInfo->semaphore)->importHandle(pImportSemaphoreZirconHandleInfo->handle, temporaryImport);
+	auto *sem = vk::DynamicCast<vk::BinarySemaphore>(pImportSemaphoreZirconHandleInfo->semaphore);
+	ASSERT(sem != nullptr);
+	return sem->importHandle(pImportSemaphoreZirconHandleInfo->handle, temporaryImport);
 }
 
 VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreZirconHandleFUCHSIA(
@@ -1498,10 +1533,35 @@
 		UNSUPPORTED("pGetZirconHandleInfo->handleType %d", int(pGetZirconHandleInfo->handleType));
 	}
 
-	return vk::Cast(pGetZirconHandleInfo->semaphore)->exportHandle(pZirconHandle);
+	auto *sem = vk::DynamicCast<vk::BinarySemaphore>(pGetZirconHandleInfo->semaphore);
+	ASSERT(sem != nullptr);
+	return sem->exportHandle(pZirconHandle);
 }
 #endif  // VK_USE_PLATFORM_FUCHSIA
 
+VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreCounterValue(VkDevice device, VkSemaphore semaphore, uint64_t *pValue)
+{
+	TRACE("(VkDevice device = %p, VkSemaphore semaphore = %p, uint64_t* pValue = %p)",
+	      device, static_cast<void *>(semaphore), pValue);
+	*pValue = vk::DynamicCast<vk::TimelineSemaphore>(semaphore)->getCounterValue();
+	return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL vkSignalSemaphore(VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo)
+{
+	TRACE("(VkDevice device = %p, const VkSemaphoreSignalInfo *pSignalInfo = %p)",
+	      device, pSignalInfo);
+	vk::DynamicCast<vk::TimelineSemaphore>(pSignalInfo->semaphore)->signal(pSignalInfo->value);
+	return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL vkWaitSemaphores(VkDevice device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
+{
+	TRACE("(VkDevice device = %p, const VkSemaphoreWaitInfo *pWaitInfo = %p, uint64_t timeout = %" PRIu64 ")",
+	      device, pWaitInfo, timeout);
+	return vk::Cast(device)->waitForSemaphores(pWaitInfo, timeout);
+}
+
 VKAPI_ATTR VkResult VKAPI_CALL vkCreateEvent(VkDevice device, const VkEventCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
 {
 	TRACE("(VkDevice device = %p, const VkEventCreateInfo* pCreateInfo = %p, const VkAllocationCallbacks* pAllocator = %p, VkEvent* pEvent = %p)",
@@ -3949,7 +4009,7 @@
 	TRACE("(VkDevice device = %p, VkSwapchainKHR swapchain = %p, uint64_t timeout = %" PRIu64 ", VkSemaphore semaphore = %p, VkFence fence = %p, uint32_t* pImageIndex = %p)",
 	      device, static_cast<void *>(swapchain), timeout, static_cast<void *>(semaphore), static_cast<void *>(fence), pImageIndex);
 
-	return vk::Cast(swapchain)->getNextImage(timeout, vk::Cast(semaphore), vk::Cast(fence), pImageIndex);
+	return vk::Cast(swapchain)->getNextImage(timeout, vk::DynamicCast<vk::BinarySemaphore>(semaphore), vk::Cast(fence), pImageIndex);
 }
 
 VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo)
@@ -3965,7 +4025,7 @@
 	TRACE("(VkDevice device = %p, const VkAcquireNextImageInfoKHR *pAcquireInfo = %p, uint32_t *pImageIndex = %p",
 	      device, pAcquireInfo, pImageIndex);
 
-	return vk::Cast(pAcquireInfo->swapchain)->getNextImage(pAcquireInfo->timeout, vk::Cast(pAcquireInfo->semaphore), vk::Cast(pAcquireInfo->fence), pImageIndex);
+	return vk::Cast(pAcquireInfo->swapchain)->getNextImage(pAcquireInfo->timeout, vk::DynamicCast<vk::BinarySemaphore>(pAcquireInfo->semaphore), vk::Cast(pAcquireInfo->fence), pImageIndex);
 }
 
 VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceGroupPresentCapabilitiesKHR(VkDevice device, VkDeviceGroupPresentCapabilitiesKHR *pDeviceGroupPresentCapabilities)
diff --git a/src/Vulkan/vk_swiftshader.def b/src/Vulkan/vk_swiftshader.def
index 77fb8c6..f0c2c9c 100644
--- a/src/Vulkan/vk_swiftshader.def
+++ b/src/Vulkan/vk_swiftshader.def
@@ -226,6 +226,13 @@
 	vkGetPhysicalDeviceSurfaceCapabilitiesKHR

 	vkGetPhysicalDeviceSurfaceFormatsKHR

 	vkGetPhysicalDeviceSurfacePresentModesKHR

+	; VK_KHR_timeline_semaphore

+	vkGetSemaphoreCounterValue

+	vkSignalSemaphore

+	vkWaitSemaphores

+	vkGetSemaphoreCounterValueKHR

+	vkSignalSemaphoreKHR

+	vkWaitSemaphoresKHR

 	; VK_KHR_win32_surface

 	vkCreateWin32SurfaceKHR

 	vkGetPhysicalDeviceWin32PresentationSupportKHR

diff --git a/src/Vulkan/vk_swiftshader.lds b/src/Vulkan/vk_swiftshader.lds
index 768114e..0268f98 100644
--- a/src/Vulkan/vk_swiftshader.lds
+++ b/src/Vulkan/vk_swiftshader.lds
@@ -230,7 +230,13 @@
 	vkResetQueryPool;
 	# VK_EXT_headless_surface
 	vkCreateHeadlessSurfaceEXT;
-
+	# VK_KHR_timeline_semaphore
+	vkGetSemaphoreCounterValue;
+	vkSignalSemaphore;
+	vkWaitSemaphores;
+	vkGetSemaphoreCounterValueKHR;
+	vkSignalSemaphoreKHR;
+	vkWaitSemaphoresKHR;
 	# Android HAL module info object
 	HMI;
 
diff --git a/src/WSI/VkSwapchainKHR.cpp b/src/WSI/VkSwapchainKHR.cpp
index aed8c03..57d9cdd 100644
--- a/src/WSI/VkSwapchainKHR.cpp
+++ b/src/WSI/VkSwapchainKHR.cpp
@@ -170,7 +170,7 @@
 	return VK_SUCCESS;
 }
 
-VkResult SwapchainKHR::getNextImage(uint64_t timeout, Semaphore *semaphore, Fence *fence, uint32_t *pImageIndex)
+VkResult SwapchainKHR::getNextImage(uint64_t timeout, BinarySemaphore *semaphore, Fence *fence, uint32_t *pImageIndex)
 {
 	for(uint32_t i = 0; i < imageCount; i++)
 	{
diff --git a/src/WSI/VkSwapchainKHR.hpp b/src/WSI/VkSwapchainKHR.hpp
index e5aca6e..b4dcbad 100644
--- a/src/WSI/VkSwapchainKHR.hpp
+++ b/src/WSI/VkSwapchainKHR.hpp
@@ -24,7 +24,7 @@
 namespace vk {
 
 class Fence;
-class Semaphore;
+class BinarySemaphore;
 
 class SwapchainKHR : public Object<SwapchainKHR, VkSwapchainKHR>
 {
@@ -42,7 +42,7 @@
 	uint32_t getImageCount() const;
 	VkResult getImages(uint32_t *pSwapchainImageCount, VkImage *pSwapchainImages) const;
 
-	VkResult getNextImage(uint64_t timeout, Semaphore *semaphore, Fence *fence, uint32_t *pImageIndex);
+	VkResult getNextImage(uint64_t timeout, BinarySemaphore *semaphore, Fence *fence, uint32_t *pImageIndex);
 
 	VkResult present(uint32_t index);
 	PresentImage const &getImage(uint32_t imageIndex) { return images[imageIndex]; }