VK_EXT_inline_uniform_block implementation

This extension provides a simpler way of specifying uniform block
memory. Rather than having to allocate a buffer to contain it, the
uniform block memory can be allocated dynamically, which reduces
the number of API calls required to use uniform blocks.

Tests: dEQP-VK.api.info.vulkan1p2_limits_validation.*
Tests: dEQP-VK.binding_model.descriptor_copy.*.inline_uniform_block*
Bug: b/204502117
Change-Id: I46c36158f0c66ea05cf1a762d3a2f720436533ac
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/62809
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Commit-Queue: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 107a2df..726e457 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -1608,9 +1608,25 @@
 	dbgDeclareResult(insn, resultId);
 }
 
-OutOfBoundsBehavior SpirvShader::EmitState::getOutOfBoundsBehavior(spv::StorageClass storageClass) const
+OutOfBoundsBehavior SpirvShader::getOutOfBoundsBehavior(Object::ID pointerId, EmitState const *state) const
 {
-	switch(storageClass)
+	auto it = descriptorDecorations.find(pointerId);
+	if(it != descriptorDecorations.end())
+	{
+		const auto &d = it->second;
+		if((d.DescriptorSet >= 0) && (d.Binding >= 0))
+		{
+			auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
+			if(descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+			{
+				return OutOfBoundsBehavior::UndefinedBehavior;
+			}
+		}
+	}
+
+	auto &pointer = getObject(pointerId);
+	auto &pointerTy = getType(pointer);
+	switch(pointerTy.storageClass)
 	{
 	case spv::StorageClassUniform:
 	case spv::StorageClassStorageBuffer:
@@ -1702,7 +1718,7 @@
 
 void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, unsigned int multiSampleCount) const
 {
-	EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, robustBufferAccess, multiSampleCount, executionModel);
+	EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, multiSampleCount);
 
 	dbgBeginEmit(&state);
 	defer(dbgEndEmit(&state));
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 80bc88b..4be1d34 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -1018,19 +1018,14 @@
 		          RValue<SIMD::Int> activeLaneMask,
 		          RValue<SIMD::Int> storesAndAtomicsMask,
 		          const vk::DescriptorSet::Bindings &descriptorSets,
-		          bool robustBufferAccess,
-		          unsigned int multiSampleCount,
-		          spv::ExecutionModel executionModel)
+		          unsigned int multiSampleCount)
 		    : routine(routine)
 		    , function(function)
 		    , activeLaneMaskValue(activeLaneMask.value())
 		    , storesAndAtomicsMaskValue(storesAndAtomicsMask.value())
 		    , descriptorSets(descriptorSets)
-		    , robustBufferAccess(robustBufferAccess)
 		    , multiSampleCount(multiSampleCount)
-		    , executionModel(executionModel)
 		{
-			ASSERT(executionModel != spv::ExecutionModelMax);  // Must parse OpEntryPoint before emitting.
 		}
 
 		// Returns the mask describing the active lanes as updated by dynamic
@@ -1087,8 +1082,6 @@
 
 		const vk::DescriptorSet::Bindings &descriptorSets;
 
-		OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
-
 		unsigned int getMultiSampleCount() const { return multiSampleCount; }
 
 		Intermediate &createIntermediate(Object::ID id, uint32_t componentCount)
@@ -1124,9 +1117,7 @@
 		std::unordered_map<Object::ID, Intermediate> intermediates;
 		std::unordered_map<Object::ID, SIMD::Pointer> pointers;
 
-		const bool robustBufferAccess;  // Emit robustBufferAccess safe code.
 		const unsigned int multiSampleCount;
-		const spv::ExecutionModel executionModel;
 	};
 
 	// EmitResult is an enumerator of result values from the Emit functions.
@@ -1248,6 +1239,8 @@
 	// Calling GetPointerToData with objects of any other kind will assert.
 	SIMD::Pointer GetPointerToData(Object::ID id, Int arrayIndex, EmitState const *state) const;
 
+	OutOfBoundsBehavior getOutOfBoundsBehavior(Object::ID pointerId, EmitState const *state) const;
+
 	SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const;
 	SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const;
 
diff --git a/src/Pipeline/SpirvShaderMemory.cpp b/src/Pipeline/SpirvShaderMemory.cpp
index 8439011..f7337c6 100644
--- a/src/Pipeline/SpirvShaderMemory.cpp
+++ b/src/Pipeline/SpirvShaderMemory.cpp
@@ -56,7 +56,7 @@
 	auto ptr = GetPointerToData(pointerId, 0, state);
 	bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
 	auto &dst = state->createIntermediate(resultId, resultTy.componentCount);
-	auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
+	auto robustness = getOutOfBoundsBehavior(pointerId, state);
 
 	VisitMemoryObject(pointerId, [&](const MemoryElement &el) {
 		auto p = ptr + el.offset;
@@ -100,7 +100,7 @@
 
 	auto ptr = GetPointerToData(pointerId, 0, state);
 	bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
-	auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
+	auto robustness = getOutOfBoundsBehavior(pointerId, state);
 
 	SIMD::Int mask = state->activeLaneMask();
 	if(!StoresInHelperInvocation(pointerTy.storageClass))
@@ -395,23 +395,35 @@
 
 			auto set = state->getPointer(id);
 			Assert(set.base != Pointer<Byte>(nullptr));
-			Pointer<Byte> descriptor = set.base + descriptorOffset;                                        // BufferDescriptor*
-			Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr));  // void*
-			Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
+			Pointer<Byte> descriptor = set.base + descriptorOffset;  // BufferDescriptor* or inline uniform block
 
-			if(routine->pipelineLayout->isDescriptorDynamic(d.DescriptorSet, d.Binding))
+			auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
+			if(descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
 			{
-				Int dynamicOffsetIndex =
-				    routine->pipelineLayout->getDynamicOffsetIndex(d.DescriptorSet, d.Binding) +
-				    arrayIndex;
-				Int offset = routine->descriptorDynamicOffsets[dynamicOffsetIndex];
-				Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
-
-				return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
+				// Note: there is no bounds checking for inline uniform blocks.
+				// MAX_INLINE_UNIFORM_BLOCK_SIZE represents the maximum size of
+				// an inline uniform block, but this value should remain unused.
+				return SIMD::Pointer(descriptor, vk::MAX_INLINE_UNIFORM_BLOCK_SIZE);
 			}
 			else
 			{
-				return SIMD::Pointer(data, size);
+				Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr));  // void*
+				Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
+
+				if(routine->pipelineLayout->isDescriptorDynamic(d.DescriptorSet, d.Binding))
+				{
+					Int dynamicOffsetIndex =
+					    routine->pipelineLayout->getDynamicOffsetIndex(d.DescriptorSet, d.Binding) +
+					    arrayIndex;
+					Int offset = routine->descriptorDynamicOffsets[dynamicOffsetIndex];
+					Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
+
+					return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
+				}
+				else
+				{
+					return SIMD::Pointer(data, size);
+				}
 			}
 		}
 
diff --git a/src/Vulkan/VkConfig.hpp b/src/Vulkan/VkConfig.hpp
index 95adbe0..f0816b8 100644
--- a/src/Vulkan/VkConfig.hpp
+++ b/src/Vulkan/VkConfig.hpp
@@ -68,6 +68,8 @@
 
 constexpr uint32_t MAX_COMPUTE_WORKGROUP_INVOCATIONS = 256;
 
+constexpr size_t MAX_INLINE_UNIFORM_BLOCK_SIZE = 256;
+
 constexpr float MAX_POINT_SIZE = 1023.0;
 
 constexpr int MAX_SAMPLER_ALLOCATION_COUNT = 4000;
diff --git a/src/Vulkan/VkDescriptorPool.cpp b/src/Vulkan/VkDescriptorPool.cpp
index 5e7e699..8b98d4b 100644
--- a/src/Vulkan/VkDescriptorPool.cpp
+++ b/src/Vulkan/VkDescriptorPool.cpp
@@ -48,8 +48,17 @@
 
 	for(uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++)
 	{
-		size += pCreateInfo->pPoolSizes[i].descriptorCount *
-		        sw::align(DescriptorSetLayout::GetDescriptorSize(pCreateInfo->pPoolSizes[i].type), 16);
+		uint32_t descriptorSize = DescriptorSetLayout::GetDescriptorSize(pCreateInfo->pPoolSizes[i].type);
+		if(pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
+		{
+			// If type is VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK then descriptorCount
+			// is the number of bytes to allocate for descriptors of this type.
+			size += sw::align(pCreateInfo->pPoolSizes[i].descriptorCount, 16);
+		}
+		else
+		{
+			size += pCreateInfo->pPoolSizes[i].descriptorCount * sw::align(descriptorSize, 16);
+		}
 	}
 
 	return size;
diff --git a/src/Vulkan/VkDescriptorSetLayout.cpp b/src/Vulkan/VkDescriptorSetLayout.cpp
index c8d5f25..06e2555 100644
--- a/src/Vulkan/VkDescriptorSetLayout.cpp
+++ b/src/Vulkan/VkDescriptorSetLayout.cpp
@@ -129,6 +129,8 @@
 	case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
 	case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
 		return static_cast<uint32_t>(sizeof(BufferDescriptor));
+	case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
+		return 1;
 	default:
 		UNSUPPORTED("Unsupported Descriptor Type: %d", int(type));
 		return 0;
@@ -211,6 +213,9 @@
 			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
 				mem += bindings[i].descriptorCount * descriptorSize;
 				break;
+			case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
+				mem += bindings[i].descriptorCount;
+				break;
 			default:
 				UNSUPPORTED("Unsupported Descriptor Type: %d", int(bindings[i].descriptorType));
 			}
@@ -562,6 +567,11 @@
 			bufferDescriptor[i].robustnessSize = static_cast<int>(buffer->getSize() - update->offset);
 		}
 	}
+	else if(entry.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
+	{
+		auto update = reinterpret_cast<const VkWriteDescriptorSetInlineUniformBlock *>(src);
+		memcpy(memToWrite, static_cast<const uint8_t *>(update->pData), update->dataSize);
+	}
 }
 
 void DescriptorSetLayout::WriteDescriptorSet(Device *device, const VkWriteDescriptorSet &writeDescriptorSet)
@@ -600,6 +610,32 @@
 		e.stride = sizeof(VkDescriptorBufferInfo);
 		break;
 
+	case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
+		{
+			auto extInfo = reinterpret_cast<VkBaseInStructure const *>(writeDescriptorSet.pNext);
+			while(extInfo)
+			{
+				if(extInfo->sType == VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK)
+				{
+					// "The descriptorCount of VkDescriptorSetLayoutBinding thus provides the total
+					//  number of bytes a particular binding with an inline uniform block descriptor
+					//  type can hold, while the srcArrayElement, dstArrayElement, and descriptorCount
+					//  members of VkWriteDescriptorSet, VkCopyDescriptorSet, and
+					//  VkDescriptorUpdateTemplateEntry (where applicable) specify the byte offset and
+					//  number of bytes to write/copy to the binding's backing store. Additionally,
+					//  the stride member of VkDescriptorUpdateTemplateEntry is ignored for inline
+					//  uniform blocks and a default value of one is used, meaning that the data to
+					//  update inline uniform block bindings with must be contiguous in memory."
+					ptr = extInfo;
+					e.descriptorCount = 1;
+					e.stride = 1;
+					break;
+				}
+				extInfo = extInfo->pNext;
+			}
+		}
+		break;
+
 	default:
 		UNSUPPORTED("descriptor type %u", writeDescriptorSet.descriptorType);
 	}
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 9410b88..b737bd0 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -354,6 +354,12 @@
 	features->computeFullSubgroups = VK_TRUE;
 }
 
+static void getPhysicalDeviceInlineUniformBlockFeatures(VkPhysicalDeviceInlineUniformBlockFeatures *features)
+{
+	features->inlineUniformBlock = VK_TRUE;
+	features->descriptorBindingInlineUniformBlockUpdateAfterBind = VK_TRUE;
+}
+
 static void getPhysicalDevice4444FormatsFeaturesExt(VkPhysicalDevice4444FormatsFeaturesEXT *features)
 {
 	features->formatA4R4G4B4 = VK_TRUE;
@@ -468,6 +474,9 @@
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES:
 			getPhysicalDeviceSubgroupSizeControlFeatures(reinterpret_cast<VkPhysicalDeviceSubgroupSizeControlFeatures *>(curExtension));
 			break;
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES:
+			getPhysicalDeviceInlineUniformBlockFeatures(reinterpret_cast<VkPhysicalDeviceInlineUniformBlockFeatures *>(curExtension));
+			break;
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT:
 			getPhysicalDevice4444FormatsFeaturesExt(reinterpret_cast<struct VkPhysicalDevice4444FormatsFeaturesEXT *>(curExtension));
 			break;
@@ -1090,6 +1099,15 @@
 	properties->requiredSubgroupSizeStages = subgroupProperties.supportedStages;
 }
 
+void PhysicalDevice::getProperties(VkPhysicalDeviceInlineUniformBlockProperties *properties) const
+{
+	properties->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
+	properties->maxPerStageDescriptorInlineUniformBlocks = 4;
+	properties->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 4;
+	properties->maxDescriptorSetInlineUniformBlocks = 4;
+	properties->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 4;
+}
+
 void PhysicalDevice::getProperties(VkPhysicalDeviceTexelBufferAlignmentProperties *properties) const
 {
 	properties->storageTexelBufferOffsetAlignmentBytes = vk::MIN_TEXEL_BUFFER_OFFSET_ALIGNMENT;
@@ -1272,6 +1290,14 @@
 	return CheckFeature(requested, supported, advancedBlendCoherentOperations);
 }
 
+bool PhysicalDevice::hasExtendedFeatures(const VkPhysicalDeviceInlineUniformBlockFeatures *requested) const
+{
+	auto supported = getSupportedFeatures(requested);
+
+	return CheckFeature(requested, supported, inlineUniformBlock) &&
+	       CheckFeature(requested, supported, descriptorBindingInlineUniformBlockUpdateAfterBind);
+}
+
 bool PhysicalDevice::hasExtendedFeatures(const VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *requested) const
 {
 	auto supported = getSupportedFeatures(requested);
diff --git a/src/Vulkan/VkPhysicalDevice.hpp b/src/Vulkan/VkPhysicalDevice.hpp
index 82b238c..a32edf1 100644
--- a/src/Vulkan/VkPhysicalDevice.hpp
+++ b/src/Vulkan/VkPhysicalDevice.hpp
@@ -46,6 +46,7 @@
 	bool hasExtendedFeatures(const VkPhysicalDeviceBlendOperationAdvancedFeaturesEXT *features) const;
 	bool hasExtendedFeatures(const VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features) const;
 	bool hasExtendedFeatures(const VkPhysicalDeviceSubgroupSizeControlFeatures *requested) const;
+	bool hasExtendedFeatures(const VkPhysicalDeviceInlineUniformBlockFeatures *features) const;
 
 	const VkPhysicalDeviceProperties &getProperties() const;
 	void getProperties(VkPhysicalDeviceIDProperties *properties) const;
@@ -77,6 +78,7 @@
 	void getProperties(VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties) const;
 	void getProperties(VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT *properties) const;
 	void getProperties(VkPhysicalDeviceSubgroupSizeControlProperties *properties) const;
+	void getProperties(VkPhysicalDeviceInlineUniformBlockProperties *properties) const;
 	void getProperties(VkPhysicalDeviceTexelBufferAlignmentProperties *properties) const;
 	void getProperties(VkPhysicalDeviceVulkan11Properties *properties) const;
 
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index c307f45..ed86d2a 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -417,6 +417,7 @@
 	{ { VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_SPEC_VERSION } },
 	// Vulkan 1.3 promoted extensions
 	{ { VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, VK_EXT_EXTENDED_DYNAMIC_STATE_SPEC_VERSION } },
+	{ { VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME, VK_EXT_INLINE_UNIFORM_BLOCK_SPEC_VERSION } },
 	{ { VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_SPEC_VERSION } },
 	{ { VK_EXT_PIPELINE_CREATION_FEEDBACK_EXTENSION_NAME, VK_EXT_PIPELINE_CREATION_FEEDBACK_SPEC_VERSION } },
 	{ { VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, VK_EXT_SUBGROUP_SIZE_CONTROL_SPEC_VERSION } },
@@ -955,6 +956,16 @@
 				}
 			}
 			break;
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES:
+			{
+				const auto *uniformBlockFeatures = reinterpret_cast<const VkPhysicalDeviceInlineUniformBlockFeatures *>(extensionCreateInfo);
+				bool hasFeatures = vk::Cast(physicalDevice)->hasExtendedFeatures(uniformBlockFeatures);
+				if(!hasFeatures)
+				{
+					return VK_ERROR_FEATURE_NOT_PRESENT;
+				}
+			}
+			break;
 		// These structs are supported, but no behavior changes based on their feature bools
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES:
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES:
@@ -2300,7 +2311,14 @@
 	auto extInfo = reinterpret_cast<VkBaseInStructure const *>(pCreateInfo->pNext);
 	while(extInfo)
 	{
-		UNSUPPORTED("pCreateInfo->pNext sType = %s", vk::Stringify(extInfo->sType).c_str());
+		switch(extInfo->sType)
+		{
+		case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO:
+			break;
+		default:
+			UNSUPPORTED("pCreateInfo->pNext sType = %s", vk::Stringify(extInfo->sType).c_str());
+			break;
+		}
 		extInfo = extInfo->pNext;
 	}
 
@@ -3524,6 +3542,12 @@
 				vk::Cast(physicalDevice)->getProperties(properties);
 			}
 			break;
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES:
+			{
+				auto properties = reinterpret_cast<VkPhysicalDeviceInlineUniformBlockProperties *>(extensionProperties);
+				vk::Cast(physicalDevice)->getProperties(properties);
+			}
+			break;
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES:
 			{
 				auto properties = reinterpret_cast<VkPhysicalDeviceTexelBufferAlignmentProperties *>(extensionProperties);