Make all descriptors 16-byte aligned.

Some SampledImageDescriptor members are assumed to be 16-byte aligned,
such as the sw::Mipmap::pitchP member accessed by sampleFloat2D(). To
guarantee this while descriptor sets can contain multiple types of
descriptor, all descriptors are made a multiple of 16 bytes in size.

Also, each descriptor set starts with a pointer to its layout, followed
by the actual descriptor content. To ensure the latter is 16-byte
aligned, the layout pointer was placed in a header structure which
itself is 16-byte aligned and thus includes the necessary padding.

The placement of the descriptor set itself is guaranteed to be 16-byte
aligned by vk::REQUIRED_MEMORY_ALIGNMENT = 16 being used for descriptor
pool allocation.

Bug: b/129523279
Bug: b/123244275
Change-Id: I72fb92d9ff5a8d100d05f5b9e38170ade557d434
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/29970
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Vulkan/VkConfig.h b/src/Vulkan/VkConfig.h
index 3aa5de8..27ef7de 100644
--- a/src/Vulkan/VkConfig.h
+++ b/src/Vulkan/VkConfig.h
@@ -37,10 +37,13 @@
 
 enum
 {
-	REQUIRED_MEMORY_ALIGNMENT = 16, // ARM64 will want 8 bytes for 64b formats; x86 wants 16 bytes for 128b formats
+	// Alignment of all Vulkan objects, pools, device memory, images, buffers, descriptors.
+	REQUIRED_MEMORY_ALIGNMENT = 16,  // 16 bytes for 128-bit vector types.
+	
 	MIN_TEXEL_BUFFER_OFFSET_ALIGNMENT = 256,
 	MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT = 256,
 	MIN_STORAGE_BUFFER_OFFSET_ALIGNMENT = 256,
+
 	MEMORY_TYPE_GENERIC_BIT = 0x1, // Generic system memory.
 };
 
diff --git a/src/Vulkan/VkDescriptorPool.cpp b/src/Vulkan/VkDescriptorPool.cpp
index 28907bb..da5bb39 100644
--- a/src/Vulkan/VkDescriptorPool.cpp
+++ b/src/Vulkan/VkDescriptorPool.cpp
@@ -13,7 +13,10 @@
 // limitations under the License.
 
 #include "VkDescriptorPool.hpp"
+
+#include "VkDescriptorSet.hpp"
 #include "VkDescriptorSetLayout.hpp"
+
 #include <algorithm>
 #include <memory>
 
@@ -38,7 +41,7 @@
 	for(uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++)
 	{
 		size += pCreateInfo->pPoolSizes[i].descriptorCount *
-		        (sizeof(DescriptorSetLayout*) +
+		        (sizeof(DescriptorSetHeader) +
 		         DescriptorSetLayout::GetDescriptorSize(pCreateInfo->pPoolSizes[i].type));
 	}
 
diff --git a/src/Vulkan/VkDescriptorSet.hpp b/src/Vulkan/VkDescriptorSet.hpp
index dee38b5..2ce3d69 100644
--- a/src/Vulkan/VkDescriptorSet.hpp
+++ b/src/Vulkan/VkDescriptorSet.hpp
@@ -24,14 +24,19 @@
 {
 	class DescriptorSetLayout;
 
+	struct alignas(16) DescriptorSetHeader
+	{
+		DescriptorSetLayout* layout;
+	};
+
 	class DescriptorSet
 	{
 	public:
 		using Bindings = std::array<vk::DescriptorSet*, vk::MAX_BOUND_DESCRIPTOR_SETS>;
 		using DynamicOffsets = std::array<uint32_t, vk::MAX_DESCRIPTOR_SET_COMBINED_BUFFERS_DYNAMIC>;
 
-		DescriptorSetLayout* layout;
-		uint8_t data[1];
+		DescriptorSetHeader header;
+		alignas(16) uint8_t data[1];
 	};
 
 	inline DescriptorSet* Cast(VkDescriptorSet object)
diff --git a/src/Vulkan/VkDescriptorSetLayout.cpp b/src/Vulkan/VkDescriptorSetLayout.cpp
index 47f37da..5442e45 100644
--- a/src/Vulkan/VkDescriptorSetLayout.cpp
+++ b/src/Vulkan/VkDescriptorSetLayout.cpp
@@ -89,35 +89,44 @@
 
 size_t DescriptorSetLayout::GetDescriptorSize(VkDescriptorType type)
 {
+	size_t size = 0;
+
 	switch(type)
 	{
 	case VK_DESCRIPTOR_TYPE_SAMPLER:
 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-		return sizeof(SampledImageDescriptor);
+		size = sizeof(SampledImageDescriptor);
+		break;
 	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
 	case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-		return sizeof(StorageImageDescriptor);
+		size = sizeof(StorageImageDescriptor);
+		break;
 	case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-		return sizeof(VkDescriptorImageInfo);
+		size = sizeof(VkDescriptorImageInfo);
+		break;
 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-		return sizeof(VkBufferView);
+		size = sizeof(VkBufferView);
+		break;
 	case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
 	case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
 	case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
 	case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-		return sizeof(VkDescriptorBufferInfo);
+		size = sizeof(VkDescriptorBufferInfo);
+		break;
 	default:
 		UNIMPLEMENTED("Unsupported Descriptor Type");
+		return 0;
 	}
 
-	return 0;
+	// Aligning each descriptor to 16 bytes allows for more efficient vector accesses in the shaders.
+	return sw::align<16>(size);  // TODO(b/123244275): Eliminate by using a custom alignas(16) struct for each desctriptor.
 }
 
 size_t DescriptorSetLayout::getDescriptorSetAllocationSize() const
 {
 	// vk::DescriptorSet has a layout member field.
-	return sizeof(vk::DescriptorSetLayout*) + getDescriptorSetDataSize();
+	return sizeof(vk::DescriptorSetHeader) + getDescriptorSetDataSize();
 }
 
 size_t DescriptorSetLayout::getDescriptorSetDataSize() const
@@ -149,7 +158,7 @@
 {
 	// Use a pointer to this descriptor set layout as the descriptor set's header
 	DescriptorSet* descriptorSet = vk::Cast(vkDescriptorSet);
-	descriptorSet->layout = this;
+	descriptorSet->header.layout = this;
 	uint8_t* mem = descriptorSet->data;
 
 	for(uint32_t i = 0; i < bindingCount; i++)
@@ -255,7 +264,7 @@
 
 void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src)
 {
-	DescriptorSetLayout* dstLayout = dstSet->layout;
+	DescriptorSetLayout* dstLayout = dstSet->header.layout;
 	auto &binding = dstLayout->bindings[dstLayout->getBindingIndex(entry.dstBinding)];
 	ASSERT(dstLayout);
 	ASSERT(binding.descriptorType == entry.descriptorType);
@@ -263,6 +272,8 @@
 	size_t typeSize = 0;
 	uint8_t* memToWrite = dstLayout->getOffsetPointer(dstSet, entry.dstBinding, entry.dstArrayElement, entry.descriptorCount, &typeSize);
 
+	ASSERT(reinterpret_cast<intptr_t>(memToWrite) % 16 == 0);  // Each descriptor must be 16-byte aligned.
+
 	if(entry.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
 	{
 		SampledImageDescriptor *imageSampler = reinterpret_cast<SampledImageDescriptor*>(memToWrite);
@@ -517,11 +528,11 @@
 void DescriptorSetLayout::CopyDescriptorSet(const VkCopyDescriptorSet& descriptorCopies)
 {
 	DescriptorSet* srcSet = vk::Cast(descriptorCopies.srcSet);
-	DescriptorSetLayout* srcLayout = srcSet->layout;
+	DescriptorSetLayout* srcLayout = srcSet->header.layout;
 	ASSERT(srcLayout);
 
 	DescriptorSet* dstSet = vk::Cast(descriptorCopies.dstSet);
-	DescriptorSetLayout* dstLayout = dstSet->layout;
+	DescriptorSetLayout* dstLayout = dstSet->header.layout;
 	ASSERT(dstLayout);
 
 	size_t srcTypeSize = 0;
diff --git a/src/Vulkan/VkDescriptorSetLayout.hpp b/src/Vulkan/VkDescriptorSetLayout.hpp
index d7660b9..9ee4b3b 100644
--- a/src/Vulkan/VkDescriptorSetLayout.hpp
+++ b/src/Vulkan/VkDescriptorSetLayout.hpp
@@ -27,7 +27,7 @@
 class DescriptorSet;
 
 // TODO(b/129523279): Move to the Device or Pipeline layer.
-struct SampledImageDescriptor
+struct alignas(16) SampledImageDescriptor
 {
 	void updateSampler(const vk::Sampler *sampler);
 
@@ -35,10 +35,10 @@
 	const vk::Sampler *sampler;
 	const vk::ImageView *imageView;
 
-	sw::Texture texture;
+	alignas(16) sw::Texture texture;
 };
 
-struct StorageImageDescriptor
+struct alignas(16) StorageImageDescriptor
 {
 	void *ptr;
 	VkExtent3D extent;