Renderer alignment fix

The Renderer needs to be aligned when allocated, otherwise
it leads to all sorts of hard to track issues in the 32 bit
version of the library.

In this instance, Angle's end to end tests on SwiftShader
Vulkan had spurious failures due to having unaligned members
in PixelProcessor, which wasn't aligned because it's used as
a base class of the Renderer class, which wasn't aligned when
allocated. Using an allocator to align the memory fixes the
issue.

I also did a small cleanup of PixelProcessor::setBlendConstant,
as it was a bit hard to debug and changing it this way made it
easier to track what was going on.

Bug b/140888415

Change-Id: Ie5559642456ee4dcb73f9d07c777349f558879ba
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/36248
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Device/PixelProcessor.cpp b/src/Device/PixelProcessor.cpp
index 6976cce..e20cecd 100644
--- a/src/Device/PixelProcessor.cpp
+++ b/src/Device/PixelProcessor.cpp
@@ -62,96 +62,85 @@
 
 	void PixelProcessor::setBlendConstant(const Color<float> &blendConstant)
 	{
-		// FIXME: Compact into generic function   // FIXME: Clamp
-		short blendConstantR = static_cast<short>(iround(65535 * blendConstant.r));
-		short blendConstantG = static_cast<short>(iround(65535 * blendConstant.g));
-		short blendConstantB = static_cast<short>(iround(65535 * blendConstant.b));
-		short blendConstantA = static_cast<short>(iround(65535 * blendConstant.a));
+		// TODO(b/140935644): Compact into generic function, cheack if clamp is required
+		factor.blendConstant4W[0][0] =
+		factor.blendConstant4W[0][1] =
+		factor.blendConstant4W[0][2] =
+		factor.blendConstant4W[0][3] = static_cast<uint16_t>(iround(65535.0f * blendConstant.r));
 
-		factor.blendConstant4W[0][0] = blendConstantR;
-		factor.blendConstant4W[0][1] = blendConstantR;
-		factor.blendConstant4W[0][2] = blendConstantR;
-		factor.blendConstant4W[0][3] = blendConstantR;
+		factor.blendConstant4W[1][0] =
+		factor.blendConstant4W[1][1] =
+		factor.blendConstant4W[1][2] =
+		factor.blendConstant4W[1][3] = static_cast<uint16_t>(iround(65535.0f * blendConstant.g));
 
-		factor.blendConstant4W[1][0] = blendConstantG;
-		factor.blendConstant4W[1][1] = blendConstantG;
-		factor.blendConstant4W[1][2] = blendConstantG;
-		factor.blendConstant4W[1][3] = blendConstantG;
+		factor.blendConstant4W[2][0] =
+		factor.blendConstant4W[2][1] =
+		factor.blendConstant4W[2][2] =
+		factor.blendConstant4W[2][3] = static_cast<uint16_t>(iround(65535.0f * blendConstant.b));
 
-		factor.blendConstant4W[2][0] = blendConstantB;
-		factor.blendConstant4W[2][1] = blendConstantB;
-		factor.blendConstant4W[2][2] = blendConstantB;
-		factor.blendConstant4W[2][3] = blendConstantB;
+		factor.blendConstant4W[3][0] =
+		factor.blendConstant4W[3][1] =
+		factor.blendConstant4W[3][2] =
+		factor.blendConstant4W[3][3] = static_cast<uint16_t>(iround(65535.0f * blendConstant.a));
 
-		factor.blendConstant4W[3][0] = blendConstantA;
-		factor.blendConstant4W[3][1] = blendConstantA;
-		factor.blendConstant4W[3][2] = blendConstantA;
-		factor.blendConstant4W[3][3] = blendConstantA;
+		factor.invBlendConstant4W[0][0] =
+		factor.invBlendConstant4W[0][1] =
+		factor.invBlendConstant4W[0][2] =
+		factor.invBlendConstant4W[0][3] = 0xFFFFu - factor.blendConstant4W[0][0];
 
-		// FIXME: Compact into generic function   // FIXME: Clamp
-		short invBlendConstantR = static_cast<short>(iround(65535 * (1 - blendConstant.r)));
-		short invBlendConstantG = static_cast<short>(iround(65535 * (1 - blendConstant.g)));
-		short invBlendConstantB = static_cast<short>(iround(65535 * (1 - blendConstant.b)));
-		short invBlendConstantA = static_cast<short>(iround(65535 * (1 - blendConstant.a)));
+		factor.invBlendConstant4W[1][0] =
+		factor.invBlendConstant4W[1][1] =
+		factor.invBlendConstant4W[1][2] =
+		factor.invBlendConstant4W[1][3] = 0xFFFFu - factor.blendConstant4W[1][0];
 
-		factor.invBlendConstant4W[0][0] = invBlendConstantR;
-		factor.invBlendConstant4W[0][1] = invBlendConstantR;
-		factor.invBlendConstant4W[0][2] = invBlendConstantR;
-		factor.invBlendConstant4W[0][3] = invBlendConstantR;
+		factor.invBlendConstant4W[2][0] =
+		factor.invBlendConstant4W[2][1] =
+		factor.invBlendConstant4W[2][2] =
+		factor.invBlendConstant4W[2][3] = 0xFFFFu - factor.blendConstant4W[2][0];
 
-		factor.invBlendConstant4W[1][0] = invBlendConstantG;
-		factor.invBlendConstant4W[1][1] = invBlendConstantG;
-		factor.invBlendConstant4W[1][2] = invBlendConstantG;
-		factor.invBlendConstant4W[1][3] = invBlendConstantG;
+		factor.invBlendConstant4W[3][0] =
+		factor.invBlendConstant4W[3][1] =
+		factor.invBlendConstant4W[3][2] =
+		factor.invBlendConstant4W[3][3] = 0xFFFFu - factor.blendConstant4W[3][0];
 
-		factor.invBlendConstant4W[2][0] = invBlendConstantB;
-		factor.invBlendConstant4W[2][1] = invBlendConstantB;
-		factor.invBlendConstant4W[2][2] = invBlendConstantB;
-		factor.invBlendConstant4W[2][3] = invBlendConstantB;
-
-		factor.invBlendConstant4W[3][0] = invBlendConstantA;
-		factor.invBlendConstant4W[3][1] = invBlendConstantA;
-		factor.invBlendConstant4W[3][2] = invBlendConstantA;
-		factor.invBlendConstant4W[3][3] = invBlendConstantA;
-
-		factor.blendConstant4F[0][0] = blendConstant.r;
-		factor.blendConstant4F[0][1] = blendConstant.r;
-		factor.blendConstant4F[0][2] = blendConstant.r;
+		factor.blendConstant4F[0][0] =
+		factor.blendConstant4F[0][1] =
+		factor.blendConstant4F[0][2] =
 		factor.blendConstant4F[0][3] = blendConstant.r;
 
-		factor.blendConstant4F[1][0] = blendConstant.g;
-		factor.blendConstant4F[1][1] = blendConstant.g;
-		factor.blendConstant4F[1][2] = blendConstant.g;
+		factor.blendConstant4F[1][0] =
+		factor.blendConstant4F[1][1] =
+		factor.blendConstant4F[1][2] =
 		factor.blendConstant4F[1][3] = blendConstant.g;
 
-		factor.blendConstant4F[2][0] = blendConstant.b;
-		factor.blendConstant4F[2][1] = blendConstant.b;
-		factor.blendConstant4F[2][2] = blendConstant.b;
+		factor.blendConstant4F[2][0] =
+		factor.blendConstant4F[2][1] =
+		factor.blendConstant4F[2][2] =
 		factor.blendConstant4F[2][3] = blendConstant.b;
 
-		factor.blendConstant4F[3][0] = blendConstant.a;
-		factor.blendConstant4F[3][1] = blendConstant.a;
-		factor.blendConstant4F[3][2] = blendConstant.a;
+		factor.blendConstant4F[3][0] =
+		factor.blendConstant4F[3][1] =
+		factor.blendConstant4F[3][2] =
 		factor.blendConstant4F[3][3] = blendConstant.a;
 
-		factor.invBlendConstant4F[0][0] = 1 - blendConstant.r;
-		factor.invBlendConstant4F[0][1] = 1 - blendConstant.r;
-		factor.invBlendConstant4F[0][2] = 1 - blendConstant.r;
+		factor.invBlendConstant4F[0][0] =
+		factor.invBlendConstant4F[0][1] =
+		factor.invBlendConstant4F[0][2] =
 		factor.invBlendConstant4F[0][3] = 1 - blendConstant.r;
 
-		factor.invBlendConstant4F[1][0] = 1 - blendConstant.g;
-		factor.invBlendConstant4F[1][1] = 1 - blendConstant.g;
-		factor.invBlendConstant4F[1][2] = 1 - blendConstant.g;
+		factor.invBlendConstant4F[1][0] =
+		factor.invBlendConstant4F[1][1] =
+		factor.invBlendConstant4F[1][2] =
 		factor.invBlendConstant4F[1][3] = 1 - blendConstant.g;
 
-		factor.invBlendConstant4F[2][0] = 1 - blendConstant.b;
-		factor.invBlendConstant4F[2][1] = 1 - blendConstant.b;
-		factor.invBlendConstant4F[2][2] = 1 - blendConstant.b;
+		factor.invBlendConstant4F[2][0] =
+		factor.invBlendConstant4F[2][1] =
+		factor.invBlendConstant4F[2][2] =
 		factor.invBlendConstant4F[2][3] = 1 - blendConstant.b;
 
-		factor.invBlendConstant4F[3][0] = 1 - blendConstant.a;
-		factor.invBlendConstant4F[3][1] = 1 - blendConstant.a;
-		factor.invBlendConstant4F[3][2] = 1 - blendConstant.a;
+		factor.invBlendConstant4F[3][0] =
+		factor.invBlendConstant4F[3][1] =
+		factor.invBlendConstant4F[3][2] =
 		factor.invBlendConstant4F[3][3] = 1 - blendConstant.a;
 	}
 
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index a7e5042..854c935 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -159,6 +159,18 @@
 		drawTickets.take().wait();
 	}
 
+	// Renderer objects have to be mem aligned to the alignment provided in the class declaration
+	void* Renderer::operator new(size_t size)
+	{
+		ASSERT(size == sizeof(Renderer));  // This operator can't be called from a derived class
+		return vk::allocate(sizeof(Renderer), alignof(Renderer), vk::DEVICE_MEMORY, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+	}
+
+	void Renderer::operator delete(void* mem)
+	{
+		vk::deallocate(mem, vk::DEVICE_MEMORY);
+	}
+
 	void Renderer::draw(const sw::Context* context, VkIndexType indexType, unsigned int count, int baseVertex,
 			TaskEvents *events, int instanceID, int viewID, void *indexBuffer,
 			PushConstantStorage const & pushConstants, bool update)
diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp
index 042f932..63b4587 100644
--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -189,13 +189,16 @@
 		static bool setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw);
 	};
 
-	class Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor
+	class alignas(16) Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor
 	{
 	public:
 		Renderer(vk::Device* device);
 
 		virtual ~Renderer();
 
+		void* operator new(size_t size);
+		void operator delete(void* mem);
+
 		bool hasOcclusionQuery() const { return occlusionQuery != nullptr; }
 
 		void draw(const sw::Context* context, VkIndexType indexType, unsigned int count, int baseVertex,
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index c6cbcbc..e3a9ae7 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -526,12 +526,12 @@
 
 		// Apply either pipeline state or dynamic state
 		executionState.renderer->setScissor(pipeline->hasDynamicState(VK_DYNAMIC_STATE_SCISSOR) ?
-											executionState.dynamicState.scissor : pipeline->getScissor());
+		                                    executionState.dynamicState.scissor : pipeline->getScissor());
 		executionState.renderer->setViewport(pipeline->hasDynamicState(VK_DYNAMIC_STATE_VIEWPORT) ?
-											 executionState.dynamicState.viewport : pipeline->getViewport());
+		                                     executionState.dynamicState.viewport : pipeline->getViewport());
 		executionState.renderer->setBlendConstant(pipeline->hasDynamicState(VK_DYNAMIC_STATE_BLEND_CONSTANTS) ?
-												  executionState.dynamicState.blendConstants
-																											  : pipeline->getBlendConstants());
+		                                          executionState.dynamicState.blendConstants : pipeline->getBlendConstants());
+
 		if (pipeline->hasDynamicState(VK_DYNAMIC_STATE_DEPTH_BIAS))
 		{
 			// If the depth bias clamping feature is not enabled, depthBiasClamp must be 0.0